From 894ace29638e8d00a962748f7832844533e7e6f7 Mon Sep 17 00:00:00 2001 From: Steve Chen Date: Wed, 17 Jun 2026 23:39:41 -0400 Subject: [PATCH] Fix JSON integer and float serialization. (#519) * Fix JSON integer and float serialization. * Simplify float serialization round-trip fix * fix round trip check * fix lexer EOF OOB read and list.sort comparator signature * revert * fix cases * Update os.c * add `long_v1` --------- Co-authored-by: blueloveTH --- include/pocketpy/common/_generated.h | 1 + python/long_v1.py | 354 +++++++++++++++++++++++++++ src/common/_generated.c | 2 + src/common/sstream.c | 29 ++- src/compiler/lexer.c | 3 +- src/modules/json.c | 2 +- src/modules/os.c | 4 +- src/public/PyList.c | 7 +- tests/250_rfstring.py | 10 +- tests/660_eval.py | 9 +- tests/721_json.py | 3 + 11 files changed, 403 insertions(+), 21 deletions(-) create mode 100644 python/long_v1.py diff --git a/include/pocketpy/common/_generated.h b/include/pocketpy/common/_generated.h index e9da082e..abe471e1 100644 --- a/include/pocketpy/common/_generated.h +++ b/include/pocketpy/common/_generated.h @@ -11,5 +11,6 @@ extern const char kPythonLibs_dataclasses[]; extern const char kPythonLibs_datetime[]; extern const char kPythonLibs_functools[]; extern const char kPythonLibs_heapq[]; +extern const char kPythonLibs_long_v1[]; extern const char kPythonLibs_operator[]; extern const char kPythonLibs_typing[]; diff --git a/python/long_v1.py b/python/long_v1.py new file mode 100644 index 00000000..70d3e26f --- /dev/null +++ b/python/long_v1.py @@ -0,0 +1,354 @@ +# after v1.2.2, int is always 64-bit +PyLong_SHIFT = 60//2 - 1 + +PyLong_BASE = 2 ** PyLong_SHIFT +PyLong_MASK = PyLong_BASE - 1 +PyLong_DECIMAL_SHIFT = 4 +PyLong_DECIMAL_BASE = 10 ** PyLong_DECIMAL_SHIFT + +############################################################## + +def ulong_fromint(x: int): + # return a list of digits and sign + if x == 0: return [0], 1 + sign = 1 if x > 0 else -1 + if sign < 0: x = -x + res = [] + while x: + res.append(x & PyLong_MASK) + x >>= PyLong_SHIFT + return res, sign + +def ulong_cmp(a: list, b: list) -> int: + # return 1 if a>b, -1 if a len(b): return 1 + if len(a) < len(b): return -1 + for i in range(len(a)-1, -1, -1): + if a[i] > b[i]: return 1 + if a[i] < b[i]: return -1 + return 0 + +def ulong_pad_(a: list, size: int): + # pad leading zeros to have `size` digits + delta = size - len(a) + if delta > 0: + a.extend([0] * delta) + +def ulong_unpad_(a: list): + # remove leading zeros + while len(a)>1 and a[-1]==0: + a.pop() + +def ulong_add(a: list, b: list) -> list: + res = [0] * max(len(a), len(b)) + ulong_pad_(a, len(res)) + ulong_pad_(b, len(res)) + carry = 0 + for i in range(len(res)): + carry += a[i] + b[i] + res[i] = carry & PyLong_MASK + carry >>= PyLong_SHIFT + if carry > 0: + res.append(carry) + return res + +def ulong_inc_(a: list): + a[0] += 1 + for i in range(len(a)): + if a[i] < PyLong_BASE: break + a[i] -= PyLong_BASE + if i+1 == len(a): + a.append(1) + else: + a[i+1] += 1 + + +def ulong_sub(a: list, b: list) -> list: + # a >= b + res = [] + borrow = 0 + for i in range(len(b)): + tmp = a[i] - b[i] - borrow + if tmp < 0: + tmp += PyLong_BASE + borrow = 1 + else: + borrow = 0 + res.append(tmp) + for i in range(len(b), len(a)): + tmp = a[i] - borrow + if tmp < 0: + tmp += PyLong_BASE + borrow = 1 + else: + borrow = 0 + res.append(tmp) + ulong_unpad_(res) + return res + +def ulong_divmodi(a: list, b: int): + # b > 0 + res = [] + carry = 0 + for i in range(len(a)-1, -1, -1): + carry <<= PyLong_SHIFT + carry += a[i] + res.append(carry // b) + carry %= b + res.reverse() + ulong_unpad_(res) + return res, carry + + +def ulong_divmod(a: list, b: list): + + if ulong_cmp(a, b) < 0: + return [0], a + + if len(b) == 1: + q, r = ulong_divmodi(a, b[0]) + r, _ = ulong_fromint(r) + return q, r + + max = (len(a) - len(b)) * PyLong_SHIFT + \ + (a[-1].bit_length() - b[-1].bit_length()) + + low = [0] + + high = (max // PyLong_SHIFT) * [0] + \ + [(2**(max % PyLong_SHIFT)) & PyLong_MASK] + + while ulong_cmp(low, high) < 0: + ulong_inc_(high) + mid, r = ulong_divmodi(ulong_add(low, high), 2) + if ulong_cmp(a, ulong_mul(b, mid)) >= 0: + low = mid + else: + high = ulong_sub(mid, [1]) + + q = [0] * (len(a) - len(b) + 1) + while ulong_cmp(a, ulong_mul(b, low)) >= 0: + q = ulong_add(q, low) + a = ulong_sub(a, ulong_mul(b, low)) + ulong_unpad_(q) + return q, a + +def ulong_floordivi(a: list, b: int): + # b > 0 + return ulong_divmodi(a, b)[0] + +def ulong_muli(a: list, b: int): + # b >= 0 + res = [0] * len(a) + carry = 0 + for i in range(len(a)): + carry += a[i] * b + res[i] = carry & PyLong_MASK + carry >>= PyLong_SHIFT + if carry > 0: + res.append(carry) + return res + +def ulong_mul(a: list, b: list): + N = len(a) + len(b) + # use grade-school multiplication + res = [0] * N + for i in range(len(a)): + carry = 0 + for j in range(len(b)): + carry += res[i+j] + a[i] * b[j] + res[i+j] = carry & PyLong_MASK + carry >>= PyLong_SHIFT + res[i+len(b)] = carry + ulong_unpad_(res) + return res + +def ulong_powi(a: list, b: int): + # b >= 0 + if b == 0: return [1] + res = [1] + while b: + if b & 1: + res = ulong_mul(res, a) + a = ulong_mul(a, a) + b >>= 1 + return res + +def ulong_repr(x: list) -> str: + res = [] + while len(x)>1 or x[0]>0: # non-zero + x, r = ulong_divmodi(x, PyLong_DECIMAL_BASE) + res.append(str(r).zfill(PyLong_DECIMAL_SHIFT)) + res.reverse() + s = ''.join(res) + if len(s) == 0: return '0' + if len(s) > 1: s = s.lstrip('0') + return s + +def ulong_fromstr(s: str): + if s[-1] == 'L': + s = s[:-1] + res, base = [0], [1] + if s[0] == '-': + sign = -1 + s = s[1:] + else: + sign = 1 + s = s[::-1] + for c in s: + c = ord(c) - 48 + assert 0 <= c <= 9 + res = ulong_add(res, ulong_muli(base, c)) + base = ulong_muli(base, 10) + return res, sign + +class long: + def __init__(self, x): + if type(x) is tuple: + self.digits, self.sign = x + elif type(x) is int: + self.digits, self.sign = ulong_fromint(x) + elif type(x) is float: + self.digits, self.sign = ulong_fromint(int(x)) + elif type(x) is str: + self.digits, self.sign = ulong_fromstr(x) + elif type(x) is long: + self.digits, self.sign = x.digits.copy(), x.sign + else: + raise TypeError('expected int or str') + + def __len__(self): + return len(self.digits) + + def __add__(self, other): + if type(other) is int: + other = long(other) + elif type(other) is not long: + return NotImplemented + if self.sign == other.sign: + return long((ulong_add(self.digits, other.digits), self.sign)) + else: + cmp = ulong_cmp(self.digits, other.digits) + if cmp == 0: + return long(0) + if cmp > 0: + return long((ulong_sub(self.digits, other.digits), self.sign)) + else: + return long((ulong_sub(other.digits, self.digits), other.sign)) + + def __radd__(self, other): + return self.__add__(other) + + def __sub__(self, other): + if type(other) is int: + other = long(other) + elif type(other) is not long: + return NotImplemented + if self.sign != other.sign: + return long((ulong_add(self.digits, other.digits), self.sign)) + cmp = ulong_cmp(self.digits, other.digits) + if cmp == 0: + return long(0) + if cmp > 0: + return long((ulong_sub(self.digits, other.digits), self.sign)) + else: + return long((ulong_sub(other.digits, self.digits), -other.sign)) + + def __rsub__(self, other): + if type(other) is int: + other = long(other) + elif type(other) is not long: + return NotImplemented + return other.__sub__(self) + + def __mul__(self, other): + if type(other) is int: + return long(( + ulong_muli(self.digits, abs(other)), + self.sign * (1 if other >= 0 else -1) + )) + elif type(other) is long: + return long(( + ulong_mul(self.digits, other.digits), + self.sign * other.sign + )) + return NotImplemented + + def __rmul__(self, other): + return self.__mul__(other) + + ####################################################### + def __divmod__(self, other): + if type(other) is int: + assert self.sign == 1 and other > 0 + q, r = ulong_divmodi(self.digits, other) + return long((q, 1)), r + if type(other) is long: + assert self.sign == 1 and other.sign == 1 + q, r = ulong_divmod(self.digits, other.digits) + assert len(other)>1 or other.digits[0]>0 + return long((q, 1)), long((r, 1)) + raise NotImplementedError + + def __floordiv__(self, other): + return self.__divmod__(other)[0] + + def __mod__(self, other): + return self.__divmod__(other)[1] + + def __pow__(self, other: int): + assert type(other) is int and other >= 0 + if self.sign == -1 and other & 1: + sign = -1 + else: + sign = 1 + return long((ulong_powi(self.digits, other), sign)) + + def __lshift__(self, other: int): + assert type(other) is int and other >= 0 + x = self.digits.copy() + q, r = divmod(other, PyLong_SHIFT) + x = [0]*q + x + for _ in range(r): x = ulong_muli(x, 2) + return long((x, self.sign)) + + def __rshift__(self, other: int): + assert type(other) is int and other >= 0 + x = self.digits.copy() + q, r = divmod(other, PyLong_SHIFT) + x = x[q:] + if not x: return long(0) + for _ in range(r): x = ulong_floordivi(x, 2) + return long((x, self.sign)) + + def __neg__(self): + return long((self.digits, -self.sign)) + + def __cmp__(self, other): + if type(other) is int: + other = long(other) + elif type(other) is not long: + return NotImplemented + if self.sign > other.sign: + return 1 + elif self.sign < other.sign: + return -1 + else: + return ulong_cmp(self.digits, other.digits) + + def __eq__(self, other): + return self.__cmp__(other) == 0 + def __ne__(self, other): + return self.__cmp__(other) != 0 + def __lt__(self, other): + return self.__cmp__(other) < 0 + def __le__(self, other): + return self.__cmp__(other) <= 0 + def __gt__(self, other): + return self.__cmp__(other) > 0 + def __ge__(self, other): + return self.__cmp__(other) >= 0 + + def __repr__(self): + prefix = '-' if self.sign < 0 else '' + return prefix + ulong_repr(self.digits) + 'L' \ No newline at end of file diff --git a/src/common/_generated.c b/src/common/_generated.c index 5bc7d471..080480e2 100644 --- a/src/common/_generated.c +++ b/src/common/_generated.c @@ -9,6 +9,7 @@ const char kPythonLibs_dataclasses[] = "def _get_annotations(cls: type):\n in const char kPythonLibs_datetime[] = "from time import localtime\nimport operator\n\nclass timedelta:\n def __init__(self, days=0, seconds=0):\n self.days = days\n self.seconds = seconds\n\n def __repr__(self):\n return f\"datetime.timedelta(days={self.days}, seconds={self.seconds})\"\n\n def __eq__(self, other) -> bool:\n if not isinstance(other, timedelta):\n return NotImplemented\n return (self.days, self.seconds) == (other.days, other.seconds)\n\n def __ne__(self, other) -> bool:\n if not isinstance(other, timedelta):\n return NotImplemented\n return (self.days, self.seconds) != (other.days, other.seconds)\n\n\nclass date:\n def __init__(self, year: int, month: int, day: int):\n self.year = year\n self.month = month\n self.day = day\n\n @staticmethod\n def today():\n t = localtime()\n return date(t.tm_year, t.tm_mon, t.tm_mday)\n \n def __cmp(self, other, op):\n if not isinstance(other, date):\n return NotImplemented\n if self.year != other.year:\n return op(self.year, other.year)\n if self.month != other.month:\n return op(self.month, other.month)\n return op(self.day, other.day)\n\n def __eq__(self, other) -> bool:\n return self.__cmp(other, operator.eq)\n \n def __ne__(self, other) -> bool:\n return self.__cmp(other, operator.ne)\n\n def __lt__(self, other: 'date') -> bool:\n return self.__cmp(other, operator.lt)\n\n def __le__(self, other: 'date') -> bool:\n return self.__cmp(other, operator.le)\n\n def __gt__(self, other: 'date') -> bool:\n return self.__cmp(other, operator.gt)\n\n def __ge__(self, other: 'date') -> bool:\n return self.__cmp(other, operator.ge)\n\n def __str__(self):\n return f\"{self.year}-{self.month:02}-{self.day:02}\"\n\n def __repr__(self):\n return f\"datetime.date({self.year}, {self.month}, {self.day})\"\n\n\nclass datetime(date):\n def __init__(self, year: int, month: int, day: int, hour: int, minute: int, second: int):\n super().__init__(year, month, day)\n # Validate and set hour, minute, and second\n if not 0 <= hour <= 23:\n raise ValueError(\"Hour must be between 0 and 23\")\n self.hour = hour\n if not 0 <= minute <= 59:\n raise ValueError(\"Minute must be between 0 and 59\")\n self.minute = minute\n if not 0 <= second <= 59:\n raise ValueError(\"Second must be between 0 and 59\")\n self.second = second\n\n def date(self) -> date:\n return date(self.year, self.month, self.day)\n\n @staticmethod\n def now():\n t = localtime()\n tm_sec = t.tm_sec\n if tm_sec == 60:\n tm_sec = 59\n return datetime(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, tm_sec)\n\n def __str__(self):\n return f\"{self.year}-{self.month:02}-{self.day:02} {self.hour:02}:{self.minute:02}:{self.second:02}\"\n\n def __repr__(self):\n return f\"datetime.datetime({self.year}, {self.month}, {self.day}, {self.hour}, {self.minute}, {self.second})\"\n\n def __cmp(self, other, op):\n if not isinstance(other, datetime):\n return NotImplemented\n if self.year != other.year:\n return op(self.year, other.year)\n if self.month != other.month:\n return op(self.month, other.month)\n if self.day != other.day:\n return op(self.day, other.day)\n if self.hour != other.hour:\n return op(self.hour, other.hour)\n if self.minute != other.minute:\n return op(self.minute, other.minute)\n return op(self.second, other.second)\n\n def __eq__(self, other) -> bool:\n return self.__cmp(other, operator.eq)\n \n def __ne__(self, other) -> bool:\n return self.__cmp(other, operator.ne)\n \n def __lt__(self, other) -> bool:\n return self.__cmp(other, operator.lt)\n \n def __le__(self, other) -> bool:\n return self.__cmp(other, operator.le)\n \n def __gt__(self, other) -> bool:\n return self.__cmp(other, operator.gt)\n \n def __ge__(self, other) -> bool:\n return self.__cmp(other, operator.ge)\n\n\n"; const char kPythonLibs_functools[] = "class cache:\n def __init__(self, f):\n self.f = f\n self.cache = {}\n\n def __call__(self, *args):\n if args not in self.cache:\n self.cache[args] = self.f(*args)\n return self.cache[args]\n \nclass lru_cache:\n def __init__(self, maxsize=128):\n self.maxsize = maxsize\n self.cache = {}\n\n def __call__(self, f):\n def wrapped(*args):\n if args in self.cache:\n res = self.cache.pop(args)\n self.cache[args] = res\n return res\n \n res = f(*args)\n if len(self.cache) >= self.maxsize:\n first_key = next(iter(self.cache))\n self.cache.pop(first_key)\n self.cache[args] = res\n return res\n return wrapped\n \ndef reduce(function, sequence, initial=...):\n it = iter(sequence)\n if initial is ...:\n try:\n value = next(it)\n except StopIteration:\n raise TypeError(\"reduce() of empty sequence with no initial value\")\n else:\n value = initial\n for element in it:\n value = function(value, element)\n return value\n\nclass partial:\n def __init__(self, f, *args, **kwargs):\n self.f = f\n if not callable(f):\n raise TypeError(\"the first argument must be callable\")\n self.args = args\n self.kwargs = kwargs\n\n def __call__(self, *args, **kwargs):\n kwargs.update(self.kwargs)\n return self.f(*self.args, *args, **kwargs)\n\n"; const char kPythonLibs_heapq[] = "# Heap queue algorithm (a.k.a. priority queue)\ndef heappush(heap, item):\n \"\"\"Push item onto heap, maintaining the heap invariant.\"\"\"\n heap.append(item)\n _siftdown(heap, 0, len(heap)-1)\n\ndef heappop(heap):\n \"\"\"Pop the smallest item off the heap, maintaining the heap invariant.\"\"\"\n lastelt = heap.pop() # raises appropriate IndexError if heap is empty\n if heap:\n returnitem = heap[0]\n heap[0] = lastelt\n _siftup(heap, 0)\n return returnitem\n return lastelt\n\ndef heapreplace(heap, item):\n \"\"\"Pop and return the current smallest value, and add the new item.\n\n This is more efficient than heappop() followed by heappush(), and can be\n more appropriate when using a fixed-size heap. Note that the value\n returned may be larger than item! That constrains reasonable uses of\n this routine unless written as part of a conditional replacement:\n\n if item > heap[0]:\n item = heapreplace(heap, item)\n \"\"\"\n returnitem = heap[0] # raises appropriate IndexError if heap is empty\n heap[0] = item\n _siftup(heap, 0)\n return returnitem\n\ndef heappushpop(heap, item):\n \"\"\"Fast version of a heappush followed by a heappop.\"\"\"\n if heap and heap[0] < item:\n item, heap[0] = heap[0], item\n _siftup(heap, 0)\n return item\n\ndef heapify(x):\n \"\"\"Transform list into a heap, in-place, in O(len(x)) time.\"\"\"\n n = len(x)\n # Transform bottom-up. The largest index there's any point to looking at\n # is the largest with a child index in-range, so must have 2*i + 1 < n,\n # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so\n # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is\n # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.\n for i in reversed(range(n//2)):\n _siftup(x, i)\n\n# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos\n# is the index of a leaf with a possibly out-of-order value. Restore the\n# heap invariant.\ndef _siftdown(heap, startpos, pos):\n newitem = heap[pos]\n # Follow the path to the root, moving parents down until finding a place\n # newitem fits.\n while pos > startpos:\n parentpos = (pos - 1) >> 1\n parent = heap[parentpos]\n if newitem < parent:\n heap[pos] = parent\n pos = parentpos\n continue\n break\n heap[pos] = newitem\n\ndef _siftup(heap, pos):\n endpos = len(heap)\n startpos = pos\n newitem = heap[pos]\n # Bubble up the smaller child until hitting a leaf.\n childpos = 2*pos + 1 # leftmost child position\n while childpos < endpos:\n # Set childpos to index of smaller child.\n rightpos = childpos + 1\n if rightpos < endpos and not heap[childpos] < heap[rightpos]:\n childpos = rightpos\n # Move the smaller child up.\n heap[pos] = heap[childpos]\n pos = childpos\n childpos = 2*pos + 1\n # The leaf at pos is empty now. Put newitem there, and bubble it up\n # to its final resting place (by sifting its parents down).\n heap[pos] = newitem\n _siftdown(heap, startpos, pos)"; +const char kPythonLibs_long_v1[] = "# after v1.2.2, int is always 64-bit\nPyLong_SHIFT = 60//2 - 1\n\nPyLong_BASE = 2 ** PyLong_SHIFT\nPyLong_MASK = PyLong_BASE - 1\nPyLong_DECIMAL_SHIFT = 4\nPyLong_DECIMAL_BASE = 10 ** PyLong_DECIMAL_SHIFT\n\n##############################################################\n\ndef ulong_fromint(x: int):\n # return a list of digits and sign\n if x == 0: return [0], 1\n sign = 1 if x > 0 else -1\n if sign < 0: x = -x\n res = []\n while x:\n res.append(x & PyLong_MASK)\n x >>= PyLong_SHIFT\n return res, sign\n\ndef ulong_cmp(a: list, b: list) -> int:\n # return 1 if a>b, -1 if a len(b): return 1\n if len(a) < len(b): return -1\n for i in range(len(a)-1, -1, -1):\n if a[i] > b[i]: return 1\n if a[i] < b[i]: return -1\n return 0\n\ndef ulong_pad_(a: list, size: int):\n # pad leading zeros to have `size` digits\n delta = size - len(a)\n if delta > 0:\n a.extend([0] * delta)\n\ndef ulong_unpad_(a: list):\n # remove leading zeros\n while len(a)>1 and a[-1]==0:\n a.pop()\n\ndef ulong_add(a: list, b: list) -> list:\n res = [0] * max(len(a), len(b))\n ulong_pad_(a, len(res))\n ulong_pad_(b, len(res))\n carry = 0\n for i in range(len(res)):\n carry += a[i] + b[i]\n res[i] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n if carry > 0:\n res.append(carry)\n return res\n\ndef ulong_inc_(a: list):\n a[0] += 1\n for i in range(len(a)):\n if a[i] < PyLong_BASE: break\n a[i] -= PyLong_BASE\n if i+1 == len(a):\n a.append(1)\n else:\n a[i+1] += 1\n \n\ndef ulong_sub(a: list, b: list) -> list:\n # a >= b\n res = []\n borrow = 0\n for i in range(len(b)):\n tmp = a[i] - b[i] - borrow\n if tmp < 0:\n tmp += PyLong_BASE\n borrow = 1\n else:\n borrow = 0\n res.append(tmp)\n for i in range(len(b), len(a)):\n tmp = a[i] - borrow\n if tmp < 0:\n tmp += PyLong_BASE\n borrow = 1\n else:\n borrow = 0\n res.append(tmp)\n ulong_unpad_(res)\n return res\n\ndef ulong_divmodi(a: list, b: int):\n # b > 0\n res = []\n carry = 0\n for i in range(len(a)-1, -1, -1):\n carry <<= PyLong_SHIFT\n carry += a[i]\n res.append(carry // b)\n carry %= b\n res.reverse()\n ulong_unpad_(res)\n return res, carry\n\n\ndef ulong_divmod(a: list, b: list):\n\n if ulong_cmp(a, b) < 0:\n return [0], a\n\n if len(b) == 1:\n q, r = ulong_divmodi(a, b[0])\n r, _ = ulong_fromint(r)\n return q, r\n\n max = (len(a) - len(b)) * PyLong_SHIFT + \x5c\n (a[-1].bit_length() - b[-1].bit_length())\n\n low = [0]\n\n high = (max // PyLong_SHIFT) * [0] + \x5c\n [(2**(max % PyLong_SHIFT)) & PyLong_MASK]\n\n while ulong_cmp(low, high) < 0:\n ulong_inc_(high)\n mid, r = ulong_divmodi(ulong_add(low, high), 2)\n if ulong_cmp(a, ulong_mul(b, mid)) >= 0:\n low = mid\n else:\n high = ulong_sub(mid, [1])\n\n q = [0] * (len(a) - len(b) + 1)\n while ulong_cmp(a, ulong_mul(b, low)) >= 0:\n q = ulong_add(q, low)\n a = ulong_sub(a, ulong_mul(b, low))\n ulong_unpad_(q)\n return q, a\n\ndef ulong_floordivi(a: list, b: int):\n # b > 0\n return ulong_divmodi(a, b)[0]\n\ndef ulong_muli(a: list, b: int):\n # b >= 0\n res = [0] * len(a)\n carry = 0\n for i in range(len(a)):\n carry += a[i] * b\n res[i] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n if carry > 0:\n res.append(carry)\n return res\n\ndef ulong_mul(a: list, b: list):\n N = len(a) + len(b)\n # use grade-school multiplication\n res = [0] * N\n for i in range(len(a)):\n carry = 0\n for j in range(len(b)):\n carry += res[i+j] + a[i] * b[j]\n res[i+j] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n res[i+len(b)] = carry\n ulong_unpad_(res)\n return res\n\ndef ulong_powi(a: list, b: int):\n # b >= 0\n if b == 0: return [1]\n res = [1]\n while b:\n if b & 1:\n res = ulong_mul(res, a)\n a = ulong_mul(a, a)\n b >>= 1\n return res\n\ndef ulong_repr(x: list) -> str:\n res = []\n while len(x)>1 or x[0]>0: # non-zero\n x, r = ulong_divmodi(x, PyLong_DECIMAL_BASE)\n res.append(str(r).zfill(PyLong_DECIMAL_SHIFT))\n res.reverse()\n s = ''.join(res)\n if len(s) == 0: return '0'\n if len(s) > 1: s = s.lstrip('0')\n return s\n\ndef ulong_fromstr(s: str):\n if s[-1] == 'L':\n s = s[:-1]\n res, base = [0], [1]\n if s[0] == '-':\n sign = -1\n s = s[1:]\n else:\n sign = 1\n s = s[::-1]\n for c in s:\n c = ord(c) - 48\n assert 0 <= c <= 9\n res = ulong_add(res, ulong_muli(base, c))\n base = ulong_muli(base, 10)\n return res, sign\n\nclass long:\n def __init__(self, x):\n if type(x) is tuple:\n self.digits, self.sign = x\n elif type(x) is int:\n self.digits, self.sign = ulong_fromint(x)\n elif type(x) is float:\n self.digits, self.sign = ulong_fromint(int(x))\n elif type(x) is str:\n self.digits, self.sign = ulong_fromstr(x)\n elif type(x) is long:\n self.digits, self.sign = x.digits.copy(), x.sign\n else:\n raise TypeError('expected int or str')\n \n def __len__(self):\n return len(self.digits)\n\n def __add__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign == other.sign:\n return long((ulong_add(self.digits, other.digits), self.sign))\n else:\n cmp = ulong_cmp(self.digits, other.digits)\n if cmp == 0:\n return long(0)\n if cmp > 0:\n return long((ulong_sub(self.digits, other.digits), self.sign))\n else:\n return long((ulong_sub(other.digits, self.digits), other.sign))\n \n def __radd__(self, other):\n return self.__add__(other)\n \n def __sub__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign != other.sign:\n return long((ulong_add(self.digits, other.digits), self.sign))\n cmp = ulong_cmp(self.digits, other.digits)\n if cmp == 0:\n return long(0)\n if cmp > 0:\n return long((ulong_sub(self.digits, other.digits), self.sign))\n else:\n return long((ulong_sub(other.digits, self.digits), -other.sign))\n \n def __rsub__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n return other.__sub__(self)\n \n def __mul__(self, other):\n if type(other) is int:\n return long((\n ulong_muli(self.digits, abs(other)),\n self.sign * (1 if other >= 0 else -1)\n ))\n elif type(other) is long:\n return long((\n ulong_mul(self.digits, other.digits),\n self.sign * other.sign\n ))\n return NotImplemented\n \n def __rmul__(self, other):\n return self.__mul__(other)\n \n #######################################################\n def __divmod__(self, other):\n if type(other) is int:\n assert self.sign == 1 and other > 0\n q, r = ulong_divmodi(self.digits, other)\n return long((q, 1)), r\n if type(other) is long:\n assert self.sign == 1 and other.sign == 1\n q, r = ulong_divmod(self.digits, other.digits)\n assert len(other)>1 or other.digits[0]>0\n return long((q, 1)), long((r, 1))\n raise NotImplementedError\n\n def __floordiv__(self, other):\n return self.__divmod__(other)[0]\n\n def __mod__(self, other):\n return self.__divmod__(other)[1]\n\n def __pow__(self, other: int):\n assert type(other) is int and other >= 0\n if self.sign == -1 and other & 1:\n sign = -1\n else:\n sign = 1\n return long((ulong_powi(self.digits, other), sign))\n \n def __lshift__(self, other: int):\n assert type(other) is int and other >= 0\n x = self.digits.copy()\n q, r = divmod(other, PyLong_SHIFT)\n x = [0]*q + x\n for _ in range(r): x = ulong_muli(x, 2)\n return long((x, self.sign))\n \n def __rshift__(self, other: int):\n assert type(other) is int and other >= 0\n x = self.digits.copy()\n q, r = divmod(other, PyLong_SHIFT)\n x = x[q:]\n if not x: return long(0)\n for _ in range(r): x = ulong_floordivi(x, 2)\n return long((x, self.sign))\n \n def __neg__(self):\n return long((self.digits, -self.sign))\n \n def __cmp__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign > other.sign:\n return 1\n elif self.sign < other.sign:\n return -1\n else:\n return ulong_cmp(self.digits, other.digits)\n \n def __eq__(self, other):\n return self.__cmp__(other) == 0\n def __ne__(self, other):\n return self.__cmp__(other) != 0\n def __lt__(self, other):\n return self.__cmp__(other) < 0\n def __le__(self, other):\n return self.__cmp__(other) <= 0\n def __gt__(self, other):\n return self.__cmp__(other) > 0\n def __ge__(self, other):\n return self.__cmp__(other) >= 0\n \n def __repr__(self):\n prefix = '-' if self.sign < 0 else ''\n return prefix + ulong_repr(self.digits) + 'L'"; const char kPythonLibs_operator[] = "# https://docs.python.org/3/library/operator.html#mapping-operators-to-functions\n\ndef le(a, b): return a <= b\ndef lt(a, b): return a < b\ndef ge(a, b): return a >= b\ndef gt(a, b): return a > b\ndef eq(a, b): return a == b\ndef ne(a, b): return a != b\n\ndef and_(a, b): return a & b\ndef or_(a, b): return a | b\ndef xor(a, b): return a ^ b\ndef invert(a): return ~a\ndef lshift(a, b): return a << b\ndef rshift(a, b): return a >> b\n\ndef is_(a, b): return a is b\ndef is_not(a, b): return a is not b\ndef not_(a): return not a\ndef truth(a): return bool(a)\ndef contains(a, b): return b in a\n\ndef add(a, b): return a + b\ndef sub(a, b): return a - b\ndef mul(a, b): return a * b\ndef truediv(a, b): return a / b\ndef floordiv(a, b): return a // b\ndef mod(a, b): return a % b\ndef pow(a, b): return a ** b\ndef neg(a): return -a\ndef matmul(a, b): return a @ b\n\ndef getitem(a, b): return a[b]\ndef setitem(a, b, c): a[b] = c\ndef delitem(a, b): del a[b]\n\ndef iadd(a, b): a += b; return a\ndef isub(a, b): a -= b; return a\ndef imul(a, b): a *= b; return a\ndef itruediv(a, b): a /= b; return a\ndef ifloordiv(a, b): a //= b; return a\ndef imod(a, b): a %= b; return a\n# def ipow(a, b): a **= b; return a\n# def imatmul(a, b): a @= b; return a\ndef iand(a, b): a &= b; return a\ndef ior(a, b): a |= b; return a\ndef ixor(a, b): a ^= b; return a\ndef ilshift(a, b): a <<= b; return a\ndef irshift(a, b): a >>= b; return a\n\nclass attrgetter:\n def __init__(self, attr):\n self.attr = attr\n def __call__(self, obj):\n return getattr(obj, self.attr)\n\nclass itemgetter:\n def __init__(self, item):\n self.item = item\n def __call__(self, obj):\n return obj[self.item]\n"; const char kPythonLibs_typing[] = "class _Placeholder:\n def __init__(self, *args, **kwargs):\n pass\n def __getitem__(self, *args):\n return self\n def __call__(self, *args, **kwargs):\n return self\n def __and__(self, other):\n return self\n def __or__(self, other):\n return self\n def __xor__(self, other):\n return self\n\n\n_PLACEHOLDER = _Placeholder()\n\nSequence = _PLACEHOLDER\nList = _PLACEHOLDER\nDict = _PLACEHOLDER\nTuple = _PLACEHOLDER\nSet = _PLACEHOLDER\nAny = _PLACEHOLDER\nUnion = _PLACEHOLDER\nOptional = _PLACEHOLDER\nCallable = _PLACEHOLDER\nType = _PLACEHOLDER\nTypeAlias = _PLACEHOLDER\nNewType = _PLACEHOLDER\n\nClassVar = _PLACEHOLDER\n\nLiteral = _PLACEHOLDER\nLiteralString = _PLACEHOLDER\n\nIterable = _PLACEHOLDER\nGenerator = _PLACEHOLDER\nIterator = _PLACEHOLDER\n\nHashable = _PLACEHOLDER\n\nTypeVar = _PLACEHOLDER\nSelf = _PLACEHOLDER\n\nProtocol = object\nGeneric = object\nNever = object\n\nTYPE_CHECKING = False\n\n# decorators\noverload = lambda x: x\noverride = lambda x: x\nfinal = lambda x: x\n\n# exhaustiveness checking\nassert_never = lambda x: x\n\nTypedDict = dict\nNotRequired = _PLACEHOLDER\nReadOnly = _PLACEHOLDER\nRequired = _PLACEHOLDER\nTypeIs = _PLACEHOLDER\nTypeGuard = _PLACEHOLDER\n\ncast = lambda _, val: val\n"; @@ -22,6 +23,7 @@ const char* load_kPythonLib(const char* name) { if (strcmp(name, "datetime") == 0) return kPythonLibs_datetime; if (strcmp(name, "functools") == 0) return kPythonLibs_functools; if (strcmp(name, "heapq") == 0) return kPythonLibs_heapq; + if (strcmp(name, "long_v1") == 0) return kPythonLibs_long_v1; if (strcmp(name, "operator") == 0) return kPythonLibs_operator; if (strcmp(name, "typing") == 0) return kPythonLibs_typing; return NULL; diff --git a/src/common/sstream.c b/src/common/sstream.c index aabdd6ab..b3122897 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -7,6 +7,7 @@ #include #include #include +#include #include void c11_sbuf__ctor(c11_sbuf* self) { @@ -53,21 +54,23 @@ void c11_sbuf__write_f64(c11_sbuf* self, double val, int precision) { char b[32]; int size; if(precision < 0) { - int prec = 17 - 1; // std::numeric_limits::max_digits10 == 17 - size = snprintf(b, sizeof(b), "%.*g", prec, val); - } else { - int prec = precision; - size = snprintf(b, sizeof(b), "%.*f", prec, val); - } - c11_sbuf__write_cstr(self, b); - bool all_is_digit = true; - for(int i = 1; i < size; i++) { - if(!isdigit(b[i])) { - all_is_digit = false; - break; + for(int g = 15; g <= 17; g++) { + size = snprintf(b, sizeof(b), "%.*g", g, val); + if(strtod(b, NULL) == val) break; } + c11_sbuf__write_cstr(self, b); + bool all_is_digit = true; + for(int i = 1; i < size; i++) { + if(!isdigit(b[i])) { + all_is_digit = false; + break; + } + } + if(all_is_digit) c11_sbuf__write_cstr(self, ".0"); + } else { + size = snprintf(b, sizeof(b), "%.*f", precision, val); + c11_sbuf__write_cstr(self, b); } - if(all_is_digit) c11_sbuf__write_cstr(self, ".0"); } void c11_sbuf__write_sv(c11_sbuf* self, c11_sv sv) { diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index bbedf6fc..34a8343d 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -189,7 +189,8 @@ static Error* LexerError(Lexer* self, const char* fmt, ...) { err->src = self->src; PK_INCREF(self->src); err->lineno = self->current_line; - if(*self->curr_char == '\n') { err->lineno--; } + const char* end = self->src->source->data + self->src->source->size; + if(self->curr_char <= end && *self->curr_char == '\n') { err->lineno--; } va_list args; va_start(args, fmt); vsnprintf(err->msg, sizeof(err->msg), fmt, args); diff --git a/src/modules/json.c b/src/modules/json.c index 181d4aff..fca273f6 100644 --- a/src/modules/json.c +++ b/src/modules/json.c @@ -102,7 +102,7 @@ static bool json__write_namedict_kv(py_Name k, py_Ref v, void* ctx_) { static bool json__write_object(c11_sbuf* buf, py_TValue* obj, int indent, int depth) { switch(obj->type) { case tp_NoneType: c11_sbuf__write_cstr(buf, "null"); return true; - case tp_int: c11_sbuf__write_int(buf, obj->_i64); return true; + case tp_int: c11_sbuf__write_i64(buf, obj->_i64); return true; case tp_float: { if(dmath_isnan(obj->_f64)) { c11_sbuf__write_cstr(buf, "NaN"); diff --git a/src/modules/os.c b/src/modules/os.c index 1109d2cf..7c366730 100644 --- a/src/modules/os.c +++ b/src/modules/os.c @@ -235,8 +235,8 @@ static bool io_FileIO_flush(int argc, py_Ref argv) { void pk__add_module_io() { py_Ref mod = py_newmodule("io"); - - py_Type FileIO = pk_newtype("FileIO", tp_object, mod, NULL, false, true); + py_Type FileIO = py_newtype("FileIO", tp_object, mod, NULL); + py_tpsetfinal(FileIO); py_bindmagic(FileIO, __new__, io_FileIO__new__); py_bindmagic(FileIO, __enter__, io_FileIO__enter__); diff --git a/src/public/PyList.c b/src/public/PyList.c index a30c412d..88f2c843 100644 --- a/src/public/PyList.c +++ b/src/public/PyList.c @@ -384,7 +384,10 @@ static bool list_insert(int argc, py_Ref argv) { return true; } -static int lt_with_key(py_TValue* a, py_TValue* b, py_TValue* key) { +static int lt_with_key(const void* a_, const void* b_, void* extra) { + py_TValue* a = (py_TValue*)a_; + py_TValue* b = (py_TValue*)b_; + py_TValue* key = (py_TValue*)extra; if(!key) return py_less(a, b); VM* vm = pk_current_vm; // project a @@ -416,7 +419,7 @@ static bool list_sort(int argc, py_Ref argv) { bool ok = c11__stable_sort(self->data, self->length, sizeof(py_TValue), - (int (*)(const void*, const void*, void*))lt_with_key, + lt_with_key, key); if(!ok) return false; diff --git a/tests/250_rfstring.py b/tests/250_rfstring.py index 5b687bd6..5381e053 100644 --- a/tests/250_rfstring.py +++ b/tests/250_rfstring.py @@ -64,6 +64,8 @@ assert f'{a:010f}' == '010.000000' assert f'{a:010.2f}' == '0000010.00' assert f'{a:.2f}' == '10.00' assert f'{a:.5f}' == '10.00000' +assert f'{2.5:.0f}' == '2' +assert '{:.0f}'.format(2.5) == '2' b = '123' assert f'{b:10}' == '123 ' @@ -139,4 +141,10 @@ assert f'{A():10}' == 'A ' assert f'{A():10}' == 'A ' a = ['1', '2', '3'] -assert f'a = {'\n'.join(a)}' == 'a = 1\n2\n3' \ No newline at end of file +assert f'a = {'\n'.join(a)}' == 'a = 1\n2\n3' + +assert str(0.1+0.2) == '0.30000000000000004' +assert str(1/2) == '0.5' +assert str(1/3) == '0.3333333333333333' +assert str(1/4) == '0.25' +assert str(1/7) == '0.14285714285714285' diff --git a/tests/660_eval.py b/tests/660_eval.py index 93c3e7a5..f0a88fc0 100644 --- a/tests/660_eval.py +++ b/tests/660_eval.py @@ -75,4 +75,11 @@ assert res == [42, 42] assert x == 33 # test removing trailing newlines -assert eval('[1, 2, 3]\n \n') == [1, 2, 3] \ No newline at end of file +assert eval('[1, 2, 3]\n \n') == [1, 2, 3] + +# lexer doesn't read past NUL on error at end of input +try: + eval('"\\x4') + exit(1) +except SyntaxError: + pass \ No newline at end of file diff --git a/tests/721_json.py b/tests/721_json.py index 29d168bc..8509d02b 100644 --- a/tests/721_json.py +++ b/tests/721_json.py @@ -48,6 +48,9 @@ _j = json.dumps(c) _c = json.loads(_j) assert c == _c +assert json.dumps(9223372036854775807) == '9223372036854775807' +assert json.loads(json.dumps(0.1 + 0.2)) == 0.1 + 0.2 + d = True _j = json.dumps(d) _d = json.loads(_j)