From 5e5f2525b42f91f7e4200859e9aa685d921a0a2d Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 13 Dec 2024 13:39:48 +0800 Subject: [PATCH] add `pickle` module --- include/pocketpy/common/_generated.h | 1 - include/pocketpy/interpreter/modules.h | 1 + include/pocketpy/pocketpy.h | 5 +- python/pickle.py | 182 ----------- src/common/_generated.c | 2 - src/interpreter/vm.c | 1 + src/modules/pickle.c | 428 +++++++++++++++++++++++++ tests/90_pickle.py | 41 +++ 8 files changed, 475 insertions(+), 186 deletions(-) delete mode 100644 python/pickle.py create mode 100644 src/modules/pickle.c diff --git a/include/pocketpy/common/_generated.h b/include/pocketpy/common/_generated.h index ff06a746..4cac6e9b 100644 --- a/include/pocketpy/common/_generated.h +++ b/include/pocketpy/common/_generated.h @@ -12,6 +12,5 @@ extern const char kPythonLibs_datetime[]; extern const char kPythonLibs_functools[]; extern const char kPythonLibs_heapq[]; extern const char kPythonLibs_operator[]; -extern const char kPythonLibs_pickle[]; extern const char kPythonLibs_this[]; extern const char kPythonLibs_typing[]; diff --git a/include/pocketpy/interpreter/modules.h b/include/pocketpy/interpreter/modules.h index 178a5df0..275b85d3 100644 --- a/include/pocketpy/interpreter/modules.h +++ b/include/pocketpy/interpreter/modules.h @@ -12,6 +12,7 @@ void pk__add_module_easing(); void pk__add_module_traceback(); void pk__add_module_enum(); void pk__add_module_inspect(); +void pk__add_module_pickle(); void pk__add_module_linalg(); void pk__add_module_array2d(); diff --git a/include/pocketpy/pocketpy.h b/include/pocketpy/pocketpy.h index 02bdf21f..5a2517d3 100644 --- a/include/pocketpy/pocketpy.h +++ b/include/pocketpy/pocketpy.h @@ -580,7 +580,10 @@ PK_API bool py_len(py_Ref val) PY_RAISE PY_RETURN; PK_API bool py_json_dumps(py_Ref val) PY_RAISE PY_RETURN; /// Python equivalent to `json.loads(val)`. PK_API bool py_json_loads(const char* source) PY_RAISE PY_RETURN; - +/// Python equivalent to `pickle.dumps(val)`. +PK_API bool py_pickle_dumps(py_Ref val) PY_RAISE PY_RETURN; +/// Python equivalent to `pickle.loads(val)`. +PK_API bool py_pickle_loads(const unsigned char* data, int size) PY_RAISE PY_RETURN; /************* Unchecked Functions *************/ PK_API py_ObjectRef py_tuple_data(py_Ref self); diff --git a/python/pickle.py b/python/pickle.py deleted file mode 100644 index d069fe76..00000000 --- a/python/pickle.py +++ /dev/null @@ -1,182 +0,0 @@ -import json -import builtins - -_BASIC_TYPES = [int, float, str, bool, type(None)] -_MOD_T_SEP = "@" - -def _find_class(path: str): - if _MOD_T_SEP not in path: - return builtins.__dict__[path] - modpath, name = path.split(_MOD_T_SEP) - return __import__(modpath).__dict__[name] - -class _Pickler: - def __init__(self, obj) -> None: - self.obj = obj - self.raw_memo = {} # id -> int - self.memo = [] # int -> object - - @staticmethod - def _type_id(t: type): - assert type(t) is type - name = t.__name__ - mod = t.__module__ - if mod is not None: - name = mod + _MOD_T_SEP + name - return name - - def wrap(self, o): - o_t = type(o) - if o_t in _BASIC_TYPES: - return o - if o_t is type: - return ["type", self._type_id(o)] - - index = self.raw_memo.get(id(o), None) - if index is not None: - return [index] - - ret = [] - index = len(self.memo) - self.memo.append(ret) - self.raw_memo[id(o)] = index - - if o_t is tuple: - ret.append("tuple") - ret.append([self.wrap(i) for i in o]) - return [index] - if o_t is bytes: - ret.append("bytes") - ret.append([o[j] for j in range(len(o))]) - return [index] - if o_t is list: - ret.append("list") - ret.append([self.wrap(i) for i in o]) - return [index] - if o_t is dict: - ret.append("dict") - ret.append([[self.wrap(k), self.wrap(v)] for k,v in o.items()]) - return [index] - - _0 = self._type_id(o_t) - - if getattr(o_t, '__struct__', False): - ret.append(_0) - ret.append(o.tostruct().hex()) - return [index] - - if hasattr(o, "__getnewargs__"): - _1 = o.__getnewargs__() # an iterable - _1 = [self.wrap(i) for i in _1] - else: - _1 = None - - if o.__dict__ is None: - _2 = None - else: - _2 = {k: self.wrap(v) for k,v in o.__dict__.items()} - - ret.append(_0) # type id - ret.append(_1) # newargs - ret.append(_2) # state - return [index] - - def run_pipe(self): - o = self.wrap(self.obj) - return [o, self.memo] - - - -class _Unpickler: - def __init__(self, obj, memo: list) -> None: - self.obj = obj - self.memo = memo - self._unwrapped = [None] * len(memo) - - def tag(self, index, o): - assert self._unwrapped[index] is None - self._unwrapped[index] = o - - def unwrap(self, o, index=None): - if type(o) in _BASIC_TYPES: - return o - assert type(o) is list - - if o[0] == "type": - return _find_class(o[1]) - - # reference - if type(o[0]) is int: - assert index is None # index should be None - index = o[0] - if self._unwrapped[index] is None: - o = self.memo[index] - assert type(o) is list - assert type(o[0]) is str - self.unwrap(o, index) - assert self._unwrapped[index] is not None - return self._unwrapped[index] - - # concrete reference type - if o[0] == "tuple": - ret = tuple([self.unwrap(i) for i in o[1]]) - self.tag(index, ret) - return ret - if o[0] == "bytes": - ret = bytes(o[1]) - self.tag(index, ret) - return ret - if o[0] == "list": - ret = [] - self.tag(index, ret) - for i in o[1]: - ret.append(self.unwrap(i)) - return ret - if o[0] == "dict": - ret = {} - self.tag(index, ret) - for k,v in o[1]: - ret[self.unwrap(k)] = self.unwrap(v) - return ret - - # generic object - cls = _find_class(o[0]) - # if getattr(cls, '__struct__', False): - if False: - inst = cls.fromstruct(struct.fromhex(o[1])) - self.tag(index, inst) - return inst - else: - _, newargs, state = o - # create uninitialized instance - new_f = getattr(cls, "__new__") - if newargs is not None: - newargs = [self.unwrap(i) for i in newargs] - inst = new_f(cls, *newargs) - else: - inst = new_f(cls) - self.tag(index, inst) - # restore state - if state is not None: - for k,v in state.items(): - setattr(inst, k, self.unwrap(v)) - return inst - - def run_pipe(self): - return self.unwrap(self.obj) - - -def _wrap(o): - return _Pickler(o).run_pipe() - -def _unwrap(packed: list): - return _Unpickler(*packed).run_pipe() - -def dumps(o) -> bytes: - o = _wrap(o) - return json.dumps(o).encode() - -def loads(b) -> object: - assert type(b) is bytes - o = json.loads(b.decode()) - return _unwrap(o) \ No newline at end of file diff --git a/src/common/_generated.c b/src/common/_generated.c index 1da614f2..4893929b 100644 --- a/src/common/_generated.c +++ b/src/common/_generated.c @@ -10,7 +10,6 @@ const char kPythonLibs_datetime[] = "from time import localtime\nimport operator const char kPythonLibs_functools[] = "class cache:\n def __init__(self, f):\n self.f = f\n self.cache = {}\n\n def __call__(self, *args):\n if args not in self.cache:\n self.cache[args] = self.f(*args)\n return self.cache[args]\n \ndef reduce(function, sequence, initial=...):\n it = iter(sequence)\n if initial is ...:\n try:\n value = next(it)\n except StopIteration:\n raise TypeError(\"reduce() of empty sequence with no initial value\")\n else:\n value = initial\n for element in it:\n value = function(value, element)\n return value\n\nclass partial:\n def __init__(self, f, *args, **kwargs):\n self.f = f\n if not callable(f):\n raise TypeError(\"the first argument must be callable\")\n self.args = args\n self.kwargs = kwargs\n\n def __call__(self, *args, **kwargs):\n kwargs.update(self.kwargs)\n return self.f(*self.args, *args, **kwargs)\n\n"; const char kPythonLibs_heapq[] = "# Heap queue algorithm (a.k.a. priority queue)\ndef heappush(heap, item):\n \"\"\"Push item onto heap, maintaining the heap invariant.\"\"\"\n heap.append(item)\n _siftdown(heap, 0, len(heap)-1)\n\ndef heappop(heap):\n \"\"\"Pop the smallest item off the heap, maintaining the heap invariant.\"\"\"\n lastelt = heap.pop() # raises appropriate IndexError if heap is empty\n if heap:\n returnitem = heap[0]\n heap[0] = lastelt\n _siftup(heap, 0)\n return returnitem\n return lastelt\n\ndef heapreplace(heap, item):\n \"\"\"Pop and return the current smallest value, and add the new item.\n\n This is more efficient than heappop() followed by heappush(), and can be\n more appropriate when using a fixed-size heap. Note that the value\n returned may be larger than item! That constrains reasonable uses of\n this routine unless written as part of a conditional replacement:\n\n if item > heap[0]:\n item = heapreplace(heap, item)\n \"\"\"\n returnitem = heap[0] # raises appropriate IndexError if heap is empty\n heap[0] = item\n _siftup(heap, 0)\n return returnitem\n\ndef heappushpop(heap, item):\n \"\"\"Fast version of a heappush followed by a heappop.\"\"\"\n if heap and heap[0] < item:\n item, heap[0] = heap[0], item\n _siftup(heap, 0)\n return item\n\ndef heapify(x):\n \"\"\"Transform list into a heap, in-place, in O(len(x)) time.\"\"\"\n n = len(x)\n # Transform bottom-up. The largest index there's any point to looking at\n # is the largest with a child index in-range, so must have 2*i + 1 < n,\n # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so\n # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is\n # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.\n for i in reversed(range(n//2)):\n _siftup(x, i)\n\n# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos\n# is the index of a leaf with a possibly out-of-order value. Restore the\n# heap invariant.\ndef _siftdown(heap, startpos, pos):\n newitem = heap[pos]\n # Follow the path to the root, moving parents down until finding a place\n # newitem fits.\n while pos > startpos:\n parentpos = (pos - 1) >> 1\n parent = heap[parentpos]\n if newitem < parent:\n heap[pos] = parent\n pos = parentpos\n continue\n break\n heap[pos] = newitem\n\ndef _siftup(heap, pos):\n endpos = len(heap)\n startpos = pos\n newitem = heap[pos]\n # Bubble up the smaller child until hitting a leaf.\n childpos = 2*pos + 1 # leftmost child position\n while childpos < endpos:\n # Set childpos to index of smaller child.\n rightpos = childpos + 1\n if rightpos < endpos and not heap[childpos] < heap[rightpos]:\n childpos = rightpos\n # Move the smaller child up.\n heap[pos] = heap[childpos]\n pos = childpos\n childpos = 2*pos + 1\n # The leaf at pos is empty now. Put newitem there, and bubble it up\n # to its final resting place (by sifting its parents down).\n heap[pos] = newitem\n _siftdown(heap, startpos, pos)"; const char kPythonLibs_operator[] = "# https://docs.python.org/3/library/operator.html#mapping-operators-to-functions\n\ndef le(a, b): return a <= b\ndef lt(a, b): return a < b\ndef ge(a, b): return a >= b\ndef gt(a, b): return a > b\ndef eq(a, b): return a == b\ndef ne(a, b): return a != b\n\ndef and_(a, b): return a & b\ndef or_(a, b): return a | b\ndef xor(a, b): return a ^ b\ndef invert(a): return ~a\ndef lshift(a, b): return a << b\ndef rshift(a, b): return a >> b\n\ndef is_(a, b): return a is b\ndef is_not(a, b): return a is not b\ndef not_(a): return not a\ndef truth(a): return bool(a)\ndef contains(a, b): return b in a\n\ndef add(a, b): return a + b\ndef sub(a, b): return a - b\ndef mul(a, b): return a * b\ndef truediv(a, b): return a / b\ndef floordiv(a, b): return a // b\ndef mod(a, b): return a % b\ndef pow(a, b): return a ** b\ndef neg(a): return -a\ndef matmul(a, b): return a @ b\n\ndef getitem(a, b): return a[b]\ndef setitem(a, b, c): a[b] = c\ndef delitem(a, b): del a[b]\n\ndef iadd(a, b): a += b; return a\ndef isub(a, b): a -= b; return a\ndef imul(a, b): a *= b; return a\ndef itruediv(a, b): a /= b; return a\ndef ifloordiv(a, b): a //= b; return a\ndef imod(a, b): a %= b; return a\n# def ipow(a, b): a **= b; return a\n# def imatmul(a, b): a @= b; return a\ndef iand(a, b): a &= b; return a\ndef ior(a, b): a |= b; return a\ndef ixor(a, b): a ^= b; return a\ndef ilshift(a, b): a <<= b; return a\ndef irshift(a, b): a >>= b; return a\n"; -const char kPythonLibs_pickle[] = "import json\nimport builtins\n\n_BASIC_TYPES = [int, float, str, bool, type(None)]\n_MOD_T_SEP = \"@\"\n\ndef _find_class(path: str):\n if _MOD_T_SEP not in path:\n return builtins.__dict__[path]\n modpath, name = path.split(_MOD_T_SEP)\n return __import__(modpath).__dict__[name]\n\nclass _Pickler:\n def __init__(self, obj) -> None:\n self.obj = obj\n self.raw_memo = {} # id -> int\n self.memo = [] # int -> object\n\n @staticmethod\n def _type_id(t: type):\n assert type(t) is type\n name = t.__name__\n mod = t.__module__\n if mod is not None:\n name = mod + _MOD_T_SEP + name\n return name\n\n def wrap(self, o):\n o_t = type(o)\n if o_t in _BASIC_TYPES:\n return o\n if o_t is type:\n return [\"type\", self._type_id(o)]\n\n index = self.raw_memo.get(id(o), None)\n if index is not None:\n return [index]\n \n ret = []\n index = len(self.memo)\n self.memo.append(ret)\n self.raw_memo[id(o)] = index\n\n if o_t is tuple:\n ret.append(\"tuple\")\n ret.append([self.wrap(i) for i in o])\n return [index]\n if o_t is bytes:\n ret.append(\"bytes\")\n ret.append([o[j] for j in range(len(o))])\n return [index]\n if o_t is list:\n ret.append(\"list\")\n ret.append([self.wrap(i) for i in o])\n return [index]\n if o_t is dict:\n ret.append(\"dict\")\n ret.append([[self.wrap(k), self.wrap(v)] for k,v in o.items()])\n return [index]\n \n _0 = self._type_id(o_t)\n\n if getattr(o_t, '__struct__', False):\n ret.append(_0)\n ret.append(o.tostruct().hex())\n return [index]\n\n if hasattr(o, \"__getnewargs__\"):\n _1 = o.__getnewargs__() # an iterable\n _1 = [self.wrap(i) for i in _1]\n else:\n _1 = None\n\n if o.__dict__ is None:\n _2 = None\n else:\n _2 = {k: self.wrap(v) for k,v in o.__dict__.items()}\n\n ret.append(_0) # type id\n ret.append(_1) # newargs\n ret.append(_2) # state\n return [index]\n \n def run_pipe(self):\n o = self.wrap(self.obj)\n return [o, self.memo]\n\n\n\nclass _Unpickler:\n def __init__(self, obj, memo: list) -> None:\n self.obj = obj\n self.memo = memo\n self._unwrapped = [None] * len(memo)\n\n def tag(self, index, o):\n assert self._unwrapped[index] is None\n self._unwrapped[index] = o\n\n def unwrap(self, o, index=None):\n if type(o) in _BASIC_TYPES:\n return o\n assert type(o) is list\n\n if o[0] == \"type\":\n return _find_class(o[1])\n\n # reference\n if type(o[0]) is int:\n assert index is None # index should be None\n index = o[0]\n if self._unwrapped[index] is None:\n o = self.memo[index]\n assert type(o) is list\n assert type(o[0]) is str\n self.unwrap(o, index)\n assert self._unwrapped[index] is not None\n return self._unwrapped[index]\n \n # concrete reference type\n if o[0] == \"tuple\":\n ret = tuple([self.unwrap(i) for i in o[1]])\n self.tag(index, ret)\n return ret\n if o[0] == \"bytes\":\n ret = bytes(o[1])\n self.tag(index, ret)\n return ret\n if o[0] == \"list\":\n ret = []\n self.tag(index, ret)\n for i in o[1]:\n ret.append(self.unwrap(i))\n return ret\n if o[0] == \"dict\":\n ret = {}\n self.tag(index, ret)\n for k,v in o[1]:\n ret[self.unwrap(k)] = self.unwrap(v)\n return ret\n \n # generic object\n cls = _find_class(o[0])\n # if getattr(cls, '__struct__', False):\n if False:\n inst = cls.fromstruct(struct.fromhex(o[1]))\n self.tag(index, inst)\n return inst\n else:\n _, newargs, state = o\n # create uninitialized instance\n new_f = getattr(cls, \"__new__\")\n if newargs is not None:\n newargs = [self.unwrap(i) for i in newargs]\n inst = new_f(cls, *newargs)\n else:\n inst = new_f(cls)\n self.tag(index, inst)\n # restore state\n if state is not None:\n for k,v in state.items():\n setattr(inst, k, self.unwrap(v))\n return inst\n\n def run_pipe(self):\n return self.unwrap(self.obj)\n\n\ndef _wrap(o):\n return _Pickler(o).run_pipe()\n\ndef _unwrap(packed: list):\n return _Unpickler(*packed).run_pipe()\n\ndef dumps(o) -> bytes:\n o = _wrap(o)\n return json.dumps(o).encode()\n\ndef loads(b) -> object:\n assert type(b) is bytes\n o = json.loads(b.decode())\n return _unwrap(o)"; const char kPythonLibs_this[] = "print(\"\"\"The Zen of Python, by Tim Peters\n\nBeautiful is better than ugly.\nExplicit is better than implicit.\nSimple is better than complex.\nComplex is better than complicated.\nFlat is better than nested.\nSparse is better than dense.\nReadability counts.\nSpecial cases aren't special enough to break the rules.\nAlthough practicality beats purity.\nErrors should never pass silently.\nUnless explicitly silenced.\nIn the face of ambiguity, refuse the temptation to guess.\nThere should be one-- and preferably only one --obvious way to do it.\nAlthough that way may not be obvious at first unless you're Dutch.\nNow is better than never.\nAlthough never is often better than *right* now.\nIf the implementation is hard to explain, it's a bad idea.\nIf the implementation is easy to explain, it may be a good idea.\nNamespaces are one honking great idea -- let's do more of those!\"\"\")"; const char kPythonLibs_typing[] = "class _Placeholder:\n def __init__(self, *args, **kwargs):\n pass\n def __getitem__(self, *args):\n return self\n def __call__(self, *args, **kwargs):\n return self\n def __and__(self, other):\n return self\n def __or__(self, other):\n return self\n def __xor__(self, other):\n return self\n\n\n_PLACEHOLDER = _Placeholder()\n\nList = _PLACEHOLDER\nDict = _PLACEHOLDER\nTuple = _PLACEHOLDER\nSet = _PLACEHOLDER\nAny = _PLACEHOLDER\nUnion = _PLACEHOLDER\nOptional = _PLACEHOLDER\nCallable = _PLACEHOLDER\nType = _PLACEHOLDER\n\nLiteral = _PLACEHOLDER\nLiteralString = _PLACEHOLDER\n\nIterable = _PLACEHOLDER\nGenerator = _PLACEHOLDER\nIterator = _PLACEHOLDER\n\nHashable = _PLACEHOLDER\n\nTypeVar = _PLACEHOLDER\nSelf = _PLACEHOLDER\n\nProtocol = object\nGeneric = object\n\nTYPE_CHECKING = False\n\n# decorators\noverload = lambda x: x\nfinal = lambda x: x\n"; @@ -25,7 +24,6 @@ const char* load_kPythonLib(const char* name) { if (strcmp(name, "functools") == 0) return kPythonLibs_functools; if (strcmp(name, "heapq") == 0) return kPythonLibs_heapq; if (strcmp(name, "operator") == 0) return kPythonLibs_operator; - if (strcmp(name, "pickle") == 0) return kPythonLibs_pickle; if (strcmp(name, "this") == 0) return kPythonLibs_this; if (strcmp(name, "typing") == 0) return kPythonLibs_typing; return NULL; diff --git a/src/interpreter/vm.c b/src/interpreter/vm.c index 3d948edc..ed072fae 100644 --- a/src/interpreter/vm.c +++ b/src/interpreter/vm.c @@ -216,6 +216,7 @@ void VM__ctor(VM* self) { pk__add_module_traceback(); pk__add_module_enum(); pk__add_module_inspect(); + pk__add_module_pickle(); pk__add_module_conio(); pk__add_module_pkpy(); diff --git a/src/modules/pickle.c b/src/modules/pickle.c new file mode 100644 index 00000000..c95eea78 --- /dev/null +++ b/src/modules/pickle.c @@ -0,0 +1,428 @@ +#include "pocketpy/common/vector.h" +#include "pocketpy/pocketpy.h" + +#include "pocketpy/common/utils.h" +#include "pocketpy/common/sstream.h" +#include "pocketpy/interpreter/vm.h" +#include + +typedef enum { + // clang-format off + PKL_NONE, + PKL_INT8, PKL_INT16, PKL_INT32, PKL_INT64, + PKL_FLOAT32, PKL_FLOAT64, + PKL_TRUE, PKL_FALSE, + PKL_STRING, PKL_BYTES, + PKL_BUILD_LIST, + PKL_BUILD_TUPLE, + PKL_BUILD_DICT, + PKL_VEC2, PKL_VEC3, + PKL_VEC2I, PKL_VEC3I, + PKL_TYPE, + PKL_EOF, + // clang-format on +} PickleOp; + +typedef struct { + c11_vector /*T=char*/ codes; +} PickleObject; + +static void PickleObject__ctor(PickleObject* self) { c11_vector__ctor(&self->codes, sizeof(char)); } + +static void PickleObject__dtor(PickleObject* self) { c11_vector__dtor(&self->codes); } + +static void PickleObject__py_submit(PickleObject* self, py_OutRef out) { + int size; + unsigned char* data = c11_vector__submit(&self->codes, &size); + unsigned char* out_data = py_newbytes(out, size); + memcpy(out_data, data, size); +} + +static void PickleObject__write_bytes(PickleObject* buf, const void* data, int size) { + c11_vector__extend(char, &buf->codes, data, size); +} + +static void pkl__emit_op(PickleObject* buf, PickleOp op) { + c11_vector__push(char, &buf->codes, op); +} + +static void pkl__emit_int(PickleObject* buf, py_i64 val) { + if(val >= INT8_MIN && val <= INT8_MAX) { + pkl__emit_op(buf, PKL_INT8); + PickleObject__write_bytes(buf, &val, 1); + } else if(val >= INT16_MIN && val <= INT16_MAX) { + pkl__emit_op(buf, PKL_INT16); + PickleObject__write_bytes(buf, &val, 2); + } else if(val >= INT32_MIN && val <= INT32_MAX) { + pkl__emit_op(buf, PKL_INT32); + PickleObject__write_bytes(buf, &val, 4); + } else { + pkl__emit_op(buf, PKL_INT64); + PickleObject__write_bytes(buf, &val, 8); + } +} + +#define UNALIGNED_READ(p_val, p_buf) \ + do { \ + memcpy((p_val), (p_buf), sizeof(*(p_val))); \ + (p_buf) += sizeof(*(p_val)); \ + } while(0) + +static py_i64 pkl__read_int(const unsigned char** p) { + PickleOp op = (PickleOp) * *p; + (*p)++; + switch(op) { + case PKL_INT8: { + int8_t val; + UNALIGNED_READ(&val, *p); + return val; + } + case PKL_INT16: { + int16_t val; + UNALIGNED_READ(&val, *p); + return val; + } + case PKL_INT32: { + int32_t val; + UNALIGNED_READ(&val, *p); + return val; + } + case PKL_INT64: { + int64_t val; + UNALIGNED_READ(&val, *p); + return val; + } + default: c11__abort("pkl__read_int(): invalid op: %d", op); + } +} + +const static char* pkl__read_cstr(const unsigned char** p) { + const char* p_str = (const char*)*p; + int length = strlen(p_str); + *p += length + 1; // include '\0' + return p_str; +} + +static bool pickle_loads(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + PY_CHECK_ARG_TYPE(0, tp_bytes); + int size; + const unsigned char* data = py_tobytes(argv, &size); + return py_pickle_loads(data, size); +} + +static bool pickle_dumps(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + return py_pickle_dumps(argv); +} + +void pk__add_module_pickle() { + py_Ref mod = py_newmodule("pickle"); + + int x = 1; + bool is_little_endian = *(char*)&x == 1; + if(!is_little_endian) c11__abort("is_little_endian != true"); + + py_bindfunc(mod, "loads", pickle_loads); + py_bindfunc(mod, "dumps", pickle_dumps); +} + +static bool pickle__write_object(PickleObject* buf, py_TValue* obj); + +static bool pickle__write_array(PickleObject* buf, PickleOp op, py_TValue* arr, int length) { + for(int i = 0; i < length; i++) { + bool ok = pickle__write_object(buf, arr + i); + if(!ok) return false; + } + pkl__emit_op(buf, op); + pkl__emit_int(buf, length); + return true; +} + +static bool pickle__write_dict_kv(py_Ref k, py_Ref v, void* ctx) { + PickleObject* buf = (PickleObject*)ctx; + if(!pickle__write_object(buf, k)) return false; + if(!pickle__write_object(buf, v)) return false; + return true; +} + +static bool pickle__write_object(PickleObject* buf, py_TValue* obj) { + switch(obj->type) { + case tp_NoneType: { + pkl__emit_op(buf, PKL_NONE); + return true; + } + case tp_int: { + py_i64 val = obj->_i64; + pkl__emit_int(buf, val); + return true; + } + case tp_float: { + py_f64 val = obj->_f64; + float val32 = (float)val; + if(val == val32) { + pkl__emit_op(buf, PKL_FLOAT32); + PickleObject__write_bytes(buf, &val32, 4); + } else { + pkl__emit_op(buf, PKL_FLOAT64); + PickleObject__write_bytes(buf, &val, 8); + } + return true; + } + case tp_bool: { + bool val = obj->_bool; + pkl__emit_op(buf, val ? PKL_TRUE : PKL_FALSE); + return true; + } + case tp_str: { + pkl__emit_op(buf, PKL_STRING); + c11_sv sv = py_tosv(obj); + pkl__emit_int(buf, sv.size); + PickleObject__write_bytes(buf, sv.data, sv.size); + return true; + } + case tp_bytes: { + pkl__emit_op(buf, PKL_BYTES); + int size; + unsigned char* data = py_tobytes(obj, &size); + pkl__emit_int(buf, size); + PickleObject__write_bytes(buf, data, size); + return true; + } + case tp_list: { + return pickle__write_array(buf, PKL_BUILD_LIST, py_list_data(obj), py_list_len(obj)); + } + case tp_tuple: { + return pickle__write_array(buf, PKL_BUILD_TUPLE, py_tuple_data(obj), py_tuple_len(obj)); + } + case tp_dict: { + bool ok = py_dict_apply(obj, pickle__write_dict_kv, (void*)buf); + if(!ok) return false; + pkl__emit_op(buf, PKL_BUILD_DICT); + pkl__emit_int(buf, py_dict_len(obj)); + return true; + } + case tp_vec2: { + c11_vec2 val = py_tovec2(obj); + pkl__emit_op(buf, PKL_VEC2); + PickleObject__write_bytes(buf, &val, sizeof(c11_vec2)); + return true; + } + case tp_vec3: { + c11_vec3 val = py_tovec3(obj); + pkl__emit_op(buf, PKL_VEC3); + PickleObject__write_bytes(buf, &val, sizeof(c11_vec3)); + return true; + } + case tp_vec2i: { + c11_vec2i val = py_tovec2i(obj); + pkl__emit_op(buf, PKL_VEC2I); + pkl__emit_int(buf, val.x); + pkl__emit_int(buf, val.y); + return true; + } + case tp_vec3i: { + c11_vec3i val = py_tovec3i(obj); + pkl__emit_op(buf, PKL_VEC3I); + pkl__emit_int(buf, val.x); + pkl__emit_int(buf, val.y); + pkl__emit_int(buf, val.z); + return true; + } + case tp_type: { + pkl__emit_op(buf, PKL_TYPE); + py_TypeInfo* ti = pk__type_info(py_totype(obj)); + const char* mod_name = py_tostr(py_getdict(&ti->module, __name__)); + c11_sbuf path_buf; + c11_sbuf__ctor(&path_buf); + c11_sbuf__write_cstr(&path_buf, mod_name); + c11_sbuf__write_cstr(&path_buf, "@"); + c11_sbuf__write_cstr(&path_buf, py_name2str(ti->name)); + c11_string* path = c11_sbuf__submit(&path_buf); + // include '\0' + PickleObject__write_bytes(buf, path->data, path->size + 1); + c11_string__delete(path); + return true; + } + default: return TypeError("'%t' object is not picklable", obj->type); + } +} + +bool py_pickle_dumps(py_Ref val) { + PickleObject buf; + PickleObject__ctor(&buf); + bool ok = pickle__write_object(&buf, val); + if(!ok) { + PickleObject__dtor(&buf); + return false; + } + pkl__emit_op(&buf, PKL_EOF); + PickleObject__py_submit(&buf, py_retval()); + return true; +} + +bool py_pickle_loads(const unsigned char* data, int size) { + py_StackRef p0 = py_peek(0); + const unsigned char* p = data; + while(true) { + PickleOp op = (PickleOp)*p; + p++; + switch(op) { + case PKL_NONE: { + py_pushnone(); + break; + } + case PKL_INT8: { + int8_t val; + UNALIGNED_READ(&val, p); + py_newint(py_pushtmp(), val); + break; + } + case PKL_INT16: { + int16_t val; + UNALIGNED_READ(&val, p); + py_newint(py_pushtmp(), val); + break; + } + case PKL_INT32: { + int32_t val; + UNALIGNED_READ(&val, p); + py_newint(py_pushtmp(), val); + break; + } + case PKL_INT64: { + int64_t val; + UNALIGNED_READ(&val, p); + py_newint(py_pushtmp(), val); + break; + } + case PKL_FLOAT32: { + float val; + UNALIGNED_READ(&val, p); + py_newfloat(py_pushtmp(), val); + break; + } + case PKL_FLOAT64: { + double val; + UNALIGNED_READ(&val, p); + py_newfloat(py_pushtmp(), val); + break; + } + case PKL_TRUE: { + py_newbool(py_pushtmp(), true); + break; + } + case PKL_FALSE: { + py_newbool(py_pushtmp(), false); + break; + } + case PKL_STRING: { + int size = pkl__read_int(&p); + char* dst = py_newstrn(py_pushtmp(), size); + memcpy(dst, p, size); + p += size; + break; + } + case PKL_BYTES: { + int size = pkl__read_int(&p); + unsigned char* dst = py_newbytes(py_pushtmp(), size); + memcpy(dst, p, size); + p += size; + break; + } + case PKL_BUILD_LIST: { + int length = pkl__read_int(&p); + py_OutRef val = py_retval(); + py_newlistn(val, length); + for(int i = length - 1; i >= 0; i--) { + py_StackRef item = py_peek(-1); + py_list_setitem(val, i, item); + py_pop(); + } + py_push(val); + break; + } + case PKL_BUILD_TUPLE: { + int length = pkl__read_int(&p); + py_OutRef val = py_retval(); + py_newtuple(val, length); + for(int i = length - 1; i >= 0; i--) { + py_StackRef item = py_peek(-1); + py_tuple_setitem(val, i, item); + py_pop(); + } + py_push(val); + break; + } + case PKL_BUILD_DICT: { + int length = pkl__read_int(&p); + py_OutRef val = py_pushtmp(); + py_newdict(val); + py_StackRef begin = py_peek(-1) - 2 * length; + py_StackRef end = py_peek(-1); + for(py_StackRef i = begin; i < end; i += 2) { + py_StackRef k = i; + py_StackRef v = i + 1; + bool ok = py_dict_setitem(val, k, v); + if(!ok) return false; + } + py_assign(py_retval(), val); + py_shrink(2 * length + 1); + py_push(py_retval()); + break; + } + case PKL_VEC2: { + c11_vec2 val; + UNALIGNED_READ(&val, p); + py_newvec2(py_pushtmp(), val); + break; + } + case PKL_VEC3: { + c11_vec3 val; + UNALIGNED_READ(&val, p); + py_newvec3(py_pushtmp(), val); + break; + } + case PKL_VEC2I: { + c11_vec2i val; + val.x = pkl__read_int(&p); + val.y = pkl__read_int(&p); + py_newvec2i(py_pushtmp(), val); + break; + } + case PKL_VEC3I: { + c11_vec3i val; + val.x = pkl__read_int(&p); + val.y = pkl__read_int(&p); + val.z = pkl__read_int(&p); + py_newvec3i(py_pushtmp(), val); + break; + } + case PKL_TYPE: { + const char* path = pkl__read_cstr(&p); + char* sep_index = strchr(path, '@'); + assert(sep_index != NULL); + *sep_index = '\0'; + const char* mod_name = path; + const char* type_name = sep_index + 1; + py_Type t = py_gettype(mod_name, py_name(type_name)); + *sep_index = '@'; + if(t == 0) { + return ImportError("cannot import '%s' from '%s'", type_name, mod_name); + } + py_push(py_tpobject(t)); + break; + } + case PKL_EOF: { + if(py_peek(0) - p0 != 1) { return ValueError("invalid pickle data"); } + py_assign(py_retval(), p0); + py_pop(); + return true; + } + default: c11__unreachable(); + } + } + return true; +} + +#undef UNALIGNED_READ \ No newline at end of file diff --git a/tests/90_pickle.py b/tests/90_pickle.py index e751fb77..34fb1bb4 100644 --- a/tests/90_pickle.py +++ b/tests/90_pickle.py @@ -1,3 +1,44 @@ +import pickle as pkl + +def test(data): # type: ignore + print('-'*50) + b = pkl.dumps(data) + print(b) + o = pkl.loads(b) + print(o) + assert data == o + +test(None) # PKL_NONE +test(1) # PKL_INT8 +test(277) # PKL_INT16 +test(-66666) # PKL_INT32 +test(0xffffffffffff) # PKL_INT64 +test(1.0) # PKL_FLOAT32 +test(1.12312434234) # PKL_FLOAT64 +test(True) # PKL_TRUE +test(False) # PKL_FALSE +test("hello") # PKL_STRING +test(b"hello") # PKL_BYTES + +from linalg import vec2, vec3, vec2i, vec3i + +test(vec2(2/3, 1.0)) # PKL_VEC2 +test(vec3(2/3, 1.0, 3.0)) # PKL_VEC3 +test(vec2i(1, 2)) # PKL_VEC2I +test(vec3i(1, 2, 3)) # PKL_VEC3I + +test(vec3i) # PKL_TYPE + +test([1, 2, 3]) # PKL_LIST +test((1, 2, 3)) # PKL_TUPLE +test({1: 2, 3: 4}) # PKL_DICT + +# test complex data +test([1, '2', True]) +test([1, '2', 3.0, True]) +test([1, '2', True, {'key': 4}]) +test([1, '2', 3.0, True, {'k1': 4, 'k2': [b'xxxx']}]) + exit() from pickle import dumps, loads, _wrap, _unwrap