Compare commits

...

11 Commits

Author SHA1 Message Date
BLUELOVETH
335783b3ab
Update json.c 2024-12-16 21:12:19 +08:00
BLUELOVETH
41373b825a
Update json.c 2024-12-16 21:10:40 +08:00
blueloveTH
468c02244e update io 2024-12-16 20:48:08 +08:00
blueloveTH
3eeaeaa65d add io module and open 2024-12-16 20:40:04 +08:00
blueloveTH
4c533b03cb ... 2024-12-16 19:54:08 +08:00
blueloveTH
2ef181f0f4 add benchmarks 2024-12-16 19:00:18 +08:00
blueloveTH
eb35c33dd5 ... 2024-12-16 18:51:25 +08:00
blueloveTH
86dc516791 ... 2024-12-16 18:25:31 +08:00
blueloveTH
58b5455871 support object pickle 2024-12-16 17:53:29 +08:00
blueloveTH
1b4902dbd3 support custom __reduce__ 2024-12-16 17:22:25 +08:00
blueloveTH
d5d7853598 fix a bug 2024-12-16 16:55:12 +08:00
14 changed files with 284 additions and 23 deletions

View File

@ -0,0 +1,24 @@
import json
data1 = [1, 2, 3] * 100
data2 = [1.0, 2.0, 3.0] * 100
data3 = ['abcdefg', 'hijklmn', '_______________1'] * 100
data4 = [True, False, True] * 100
data5 = [None, None] * 100
original = {
'1': data1,
'2': data2,
'3': data3,
'45': {
'4': data4,
'5': data5,
}
}
for i in range(10000):
encoded = json.dumps(original)
decoded = json.loads(encoded)
if i == 0:
assert original == decoded

View File

@ -0,0 +1,24 @@
import pickle
data1 = [1, 2, 3] * 100
data2 = [1.0, 2.0, 3.0] * 100
data3 = ['abcdefg', 'hijklmn', '_______________1'] * 100
data4 = [True, False, True] * 100
data5 = [None, None] * 100
original = {
'1': data1,
'2': data2,
'3': data3,
'45': {
'4': data4,
'5': data5,
}
}
for i in range(10000):
encoded = pickle.dumps(original)
decoded = pickle.loads(encoded)
if i == 0:
assert original == decoded

View File

@ -1,5 +1,4 @@
exit(0)
import os
os.chdir('benchmarks')
import json
@ -19,9 +18,3 @@ dumped: str = json.dumps(data)
loaded: dict = json.loads(dumped)
assert len(data) == len(loaded)
assert data == loaded
#### very very slow!! DO NOT RUN IT
# import pickle
# data_pickled: bytes = pickle.dumps(data)
# assert isinstance(data_pickled, bytes)
# assert pickle.loads(data_pickled) == data

View File

@ -2,6 +2,8 @@
void pk__add_module_os();
void pk__add_module_sys();
void pk__add_module_io();
void pk__add_module_math();
void pk__add_module_dis();
void pk__add_module_random();

View File

@ -63,6 +63,7 @@ MAGIC_METHOD(__float__)
MAGIC_METHOD(__int__)
MAGIC_METHOD(__round__)
MAGIC_METHOD(__getattr__)
MAGIC_METHOD(__reduce__)
MAGIC_METHOD(__missing__)
#endif

View File

@ -947,7 +947,7 @@ FrameResult VM__run_top_frame(VM* self) {
}
py_Type type =
pk_newtype(py_name2str(name), base, frame->module, NULL, true, false);
pk_newtype(py_name2str(name), base, frame->module, NULL, base_ti->is_python, false);
PUSH(py_tpobject(type));
self->__curr_class = TOP();
DISPATCH();

View File

@ -206,6 +206,7 @@ void VM__ctor(VM* self) {
// add modules
pk__add_module_os();
pk__add_module_sys();
pk__add_module_io();
pk__add_module_math();
pk__add_module_dis();
pk__add_module_random();

View File

@ -24,6 +24,11 @@ void pk__add_module_json() {
py_setdict(mod, py_name("null"), py_None());
py_setdict(mod, py_name("true"), py_True());
py_setdict(mod, py_name("false"), py_False());
py_TValue tmp;
py_newfloat(&tmp, NAN);
py_setdict(mod, py_name("NaN"), &tmp);
py_newfloat(&tmp, INFINITY);
py_setdict(mod, py_name("Infinity"), &tmp);
py_bindfunc(mod, "loads", json_loads);
py_bindfunc(mod, "dumps", json_dumps);
@ -53,7 +58,7 @@ static bool json__write_dict_kv(py_Ref k, py_Ref v, void* ctx_) {
ctx->first = false;
if(!py_isstr(k)) return TypeError("keys must be strings");
c11_sbuf__write_quoted(ctx->buf, py_tosv(k), '"');
c11_sbuf__write_char(ctx->buf, ':');
c11_sbuf__write_cstr(ctx->buf, ": ");
return json__write_object(ctx->buf, v);
}

View File

@ -1,9 +1,7 @@
#include "pocketpy/objects/base.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/common/sstream.h"
#include "pocketpy/interpreter/vm.h"
#include <errno.h>
#if PY_SYS_PLATFORM == 0
#include <direct.h>
@ -68,4 +66,98 @@ void pk__add_module_sys() {
py_newstr(py_emplacedict(mod, py_name("platform")), PY_SYS_PLATFORM_STRING);
py_newstr(py_emplacedict(mod, py_name("version")), PK_VERSION);
py_newlist(py_emplacedict(mod, py_name("argv")));
}
}
typedef struct {
const char* path;
const char* mode;
FILE* file;
} io_FileIO;
static bool io_FileIO__new__(int argc, py_Ref argv) {
// __new__(cls, file, mode)
PY_CHECK_ARGC(3);
PY_CHECK_ARG_TYPE(1, tp_str);
PY_CHECK_ARG_TYPE(2, tp_str);
py_Type cls = py_totype(argv);
io_FileIO* ud = py_newobject(py_retval(), cls, 0, sizeof(io_FileIO));
ud->path = py_tostr(py_arg(1));
ud->mode = py_tostr(py_arg(2));
ud->file = fopen(ud->path, ud->mode);
if(ud->file == NULL) {
const char* msg = strerror(errno);
return IOError("[Errno %d] %s: %s", errno, msg, ud->path);
}
return true;
}
static bool io_FileIO__enter__(int argc, py_Ref argv) {
py_assign(py_retval(), py_arg(0));
return true;
}
static bool io_FileIO__exit__(int argc, py_Ref argv) {
io_FileIO* ud = py_touserdata(py_arg(0));
if(ud->file != NULL) {
fclose(ud->file);
ud->file = NULL;
}
return true;
}
static bool io_FileIO_read(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
io_FileIO* ud = py_touserdata(py_arg(0));
fseek(ud->file, 0, SEEK_END);
int filesize = ftell(ud->file);
fseek(ud->file, 0, SEEK_SET);
if(ud->mode[strlen(ud->mode) - 1] == 'b') {
void* dst = py_newbytes(py_retval(), filesize);
fread(dst, 1, filesize, ud->file);
} else {
void* dst = py_newstrn(py_retval(), filesize);
fread(dst, 1, filesize, ud->file);
}
return true;
}
static bool io_FileIO_close(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
io_FileIO* ud = py_touserdata(py_arg(0));
if(ud->file != NULL) {
fclose(ud->file);
ud->file = NULL;
}
return true;
}
static bool io_FileIO_write(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
io_FileIO* ud = py_touserdata(py_arg(0));
if(ud->mode[strlen(ud->mode) - 1] == 'b') {
PY_CHECK_ARG_TYPE(1, tp_bytes);
int filesize;
unsigned char* data = py_tobytes(py_arg(1), &filesize);
fwrite(data, 1, filesize, ud->file);
} else {
PY_CHECK_ARG_TYPE(1, tp_str);
c11_sv sv = py_tosv(py_arg(1));
fwrite(sv.data, 1, sv.size, ud->file);
}
return true;
}
void pk__add_module_io() {
py_Ref mod = py_newmodule("io");
py_Type FileIO = pk_newtype("FileIO", tp_object, mod, NULL, false, true);
py_bindmagic(FileIO, __new__, io_FileIO__new__);
py_bindmagic(FileIO, __enter__, io_FileIO__enter__);
py_bindmagic(FileIO, __exit__, io_FileIO__exit__);
py_bindmethod(FileIO, "read", io_FileIO_read);
py_bindmethod(FileIO, "write", io_FileIO_write);
py_bindmethod(FileIO, "close", io_FileIO_close);
py_setdict(&pk_current_vm->builtins, py_name("open"), py_tpobject(FileIO));
}

View File

@ -1,4 +1,5 @@
#include "pocketpy/common/vector.h"
#include "pocketpy/interpreter/typeinfo.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
@ -10,7 +11,7 @@ typedef enum {
// clang-format off
PKL_MEMO_GET,
PKL_MEMO_SET,
PKL_NONE, PKL_ELLIPSIS,
PKL_NIL, PKL_NONE, PKL_ELLIPSIS,
PKL_INT_0, PKL_INT_1, PKL_INT_2, PKL_INT_3, PKL_INT_4, PKL_INT_5, PKL_INT_6, PKL_INT_7,
PKL_INT_8, PKL_INT_9, PKL_INT_10, PKL_INT_11, PKL_INT_12, PKL_INT_13, PKL_INT_14, PKL_INT_15,
PKL_INT8, PKL_INT16, PKL_INT32, PKL_INT64,
@ -25,6 +26,8 @@ typedef enum {
PKL_TYPE,
PKL_ARRAY2D,
PKL_TVALUE,
PKL_CALL,
PKL_OBJECT,
PKL_EOF,
// clang-format on
} PickleOp;
@ -62,8 +65,8 @@ static void c11_sbuf__write_type_path(c11_sbuf* path_buf, py_Type type) {
c11_sbuf__write_cstr(path_buf, py_name2str(ti->name));
return;
}
const char* mod_name = py_tostr(py_getdict(&ti->module, __name__));
c11_sbuf__write_cstr(path_buf, mod_name);
const char* mod_path = py_tostr(py_getdict(&ti->module, __path__));
c11_sbuf__write_cstr(path_buf, mod_path);
c11_sbuf__write_char(path_buf, '.');
c11_sbuf__write_cstr(path_buf, py_name2str(ti->name));
}
@ -102,7 +105,7 @@ static py_i64 pkl__read_int(const unsigned char** p) {
PickleOp op = (PickleOp) * *p;
(*p)++;
switch(op) {
// clang-format off
// clang-format off
case PKL_INT_0: return 0; case PKL_INT_1: return 1; case PKL_INT_2: return 2; case PKL_INT_3: return 3;
case PKL_INT_4: return 4; case PKL_INT_5: return 5; case PKL_INT_6: return 6; case PKL_INT_7: return 7;
case PKL_INT_8: return 8; case PKL_INT_9: return 9; case PKL_INT_10: return 10; case PKL_INT_11: return 11;
@ -190,6 +193,9 @@ static void pkl__store_memo(PickleObject* buf, PyObject* memo_key) {
static bool pkl__write_object(PickleObject* buf, py_TValue* obj) {
switch(obj->type) {
case tp_nil: {
return ValueError("'nil' object is not picklable");
}
case tp_NoneType: {
pkl__emit_op(buf, PKL_NONE);
return true;
@ -339,6 +345,53 @@ static bool pkl__write_object(PickleObject* buf, py_TValue* obj) {
buf->used_types[obj->type] = true;
return true;
}
// try memo for `is_ptr=true` objects
if(pkl__try_memo(buf, obj->_obj)) return true;
py_TypeInfo* ti = pk__type_info(obj->type);
py_Ref f_reduce = py_tpfindmagic(obj->type, __reduce__);
if(f_reduce != NULL) {
if(!py_call(f_reduce, 1, obj)) return false;
// expected: (callable, args)
py_Ref reduced = py_retval();
if(!py_istuple(reduced)) { return TypeError("__reduce__ must return a tuple"); }
if(py_tuple_len(reduced) != 2) {
return TypeError("__reduce__ must return a tuple of length 2");
}
if(!pkl__write_object(buf, py_tuple_getitem(reduced, 0))) return false;
pkl__emit_op(buf, PKL_NIL);
py_Ref args_tuple = py_tuple_getitem(reduced, 1);
int args_length = py_tuple_len(args_tuple);
for(int i = 0; i < args_length; i++) {
if(!pkl__write_object(buf, py_tuple_getitem(args_tuple, i))) return false;
}
pkl__emit_op(buf, PKL_CALL);
pkl__emit_int(buf, args_length);
// store memo
pkl__store_memo(buf, obj->_obj);
return true;
}
if(ti->is_python) {
NameDict* dict = PyObject__dict(obj->_obj);
for(int i = dict->length - 1; i >= 0; i--) {
NameDict_KV* kv = c11__at(NameDict_KV, dict, i);
if(!pkl__write_object(buf, &kv->value)) return false;
}
pkl__emit_op(buf, PKL_OBJECT);
pkl__emit_int(buf, obj->type);
buf->used_types[obj->type] = true;
pkl__emit_int(buf, dict->length);
for(int i = 0; i < dict->length; i++) {
NameDict_KV* kv = c11__at(NameDict_KV, dict, i);
c11_sv field = py_name2sv(kv->key);
// include '\0'
PickleObject__write_bytes(buf, field.data, field.size + 1);
}
// store memo
pkl__store_memo(buf, obj->_obj);
return true;
}
return TypeError("'%t' object is not picklable", obj->type);
}
}
@ -441,6 +494,10 @@ bool py_pickle_loads_body(const unsigned char* p, int memo_length, c11_smallmap_
py_tuple_setitem(memo, index, py_peek(-1));
break;
}
case PKL_NIL: {
py_pushnil();
break;
}
case PKL_NONE: {
py_pushnone();
break;
@ -449,7 +506,7 @@ bool py_pickle_loads_body(const unsigned char* p, int memo_length, c11_smallmap_
py_newellipsis(py_pushtmp());
break;
}
// clang-format off
// clang-format off
case PKL_INT_0: case PKL_INT_1: case PKL_INT_2: case PKL_INT_3:
case PKL_INT_4: case PKL_INT_5: case PKL_INT_6: case PKL_INT_7:
case PKL_INT_8: case PKL_INT_9: case PKL_INT_10: case PKL_INT_11:
@ -609,6 +666,28 @@ bool py_pickle_loads_body(const unsigned char* p, int memo_length, c11_smallmap_
p += sizeof(py_TValue);
break;
}
case PKL_CALL: {
int argc = pkl__read_int(&p);
if(!py_vectorcall(argc, 0)) return false;
py_push(py_retval());
break;
}
case PKL_OBJECT: {
py_Type type = (py_Type)pkl__read_int(&p);
type = pkl__fix_type(type, type_mapping);
py_newobject(py_retval(), type, -1, 0);
NameDict* dict = PyObject__dict(py_retval()->_obj);
int dict_length = pkl__read_int(&p);
for(int i = 0; i < dict_length; i++) {
py_StackRef value = py_peek(-1);
c11_sv field = {(const char*)p, strlen((const char*)p)};
NameDict__set(dict, py_namev(field), *value);
py_pop();
p += field.size + 1;
}
py_push(py_retval());
break;
}
case PKL_EOF: {
// [memo, obj]
if(py_peek(0) - p0 != 2) return ValueError("invalid pickle data");

View File

@ -1,8 +1,6 @@
#include "pocketpy/common/str.h"
#include "pocketpy/objects/base.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/vm.h"

View File

@ -7,7 +7,7 @@ bool pk__object_new(int argc, py_Ref argv) {
py_Type cls = py_totype(py_arg(0));
py_TypeInfo* ti = pk__type_info(cls);
if(!ti->is_python) {
return TypeError("object.__new__(%t) is not safe, use %t.__new__()", cls, cls);
return TypeError("object.__new__(%t) is not safe, use %t.__new__() instead", cls, cls);
}
py_newobject(py_retval(), cls, -1, 0);
return true;

View File

@ -1,3 +1,5 @@
exit()
try:
import os
import io

View File

@ -101,6 +101,46 @@ a = array2d[TVal].fromlist([
[TVal(3), 1]])
test(a)
# test __reduce__
class A:
def __init__(self, seed):
self.seed = seed
self.x = seed
self.y = seed + 1
self.z = seed + 2
def __eq__(self, other):
return (self.x, self.y, self.z) == (other.x, other.y, other.z)
def __ne__(self, other):
return (self.x, self.y, self.z) != (other.x, other.y, other.z)
def __repr__(self):
return f"A({self.seed}, x={self.x}, y={self.y}, z={self.z})"
def __reduce__(self):
print('__reduce__() called')
return A, (self.seed,)
test([A(1)]*10)
class Simple:
def __init__(self, x):
self.field1 = x
self.field2 = [...]
def __eq__(self, other): return self.field1 == other.field1
def __ne__(self, other): return self.field1 != other.field1
test(Simple(1))
test([Simple(2)]*10)
from dataclasses import dataclass
@dataclass
class Data:
a: int
b: str = '2'
c: float = 3.0
test(Data(1))
exit()
from pickle import dumps, loads, _wrap, _unwrap