From cdcdded9a3d491a510aa68868a02bc3526eb471b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 15 Oct 2023 16:47:53 +0800 Subject: [PATCH] some fix --- 3rd/cjson/src/cJSONw.cpp | 17 +++++----- benchmarks/ldtk_cjson.py | 21 +++++++++--- benchmarks/ldtk_json.py | 11 ++++--- docs/features/basic.md | 1 - include/pocketpy/str.h | 2 +- include/pocketpy/vm.h | 2 -- src/compiler.cpp | 16 ++++----- src/error.cpp | 3 +- src/pocketpy.cpp | 71 ---------------------------------------- src/str.cpp | 8 +++-- src/vm.cpp | 71 ++++++++++++++++++++++++++++++++++++++-- tests/01_int.py | 3 +- 12 files changed, 117 insertions(+), 109 deletions(-) diff --git a/3rd/cjson/src/cJSONw.cpp b/3rd/cjson/src/cJSONw.cpp index 9877962f..3e832248 100644 --- a/3rd/cjson/src/cJSONw.cpp +++ b/3rd/cjson/src/cJSONw.cpp @@ -117,9 +117,7 @@ void add_module_cjson(VM* vm){ if(json == NULL){ const char* start = cJSON_GetErrorPtr(); const char* end = start; - while(*end != '\0' && *end != '\n'){ - end++; - } + while(*end != '\0' && *end != '\n') end++; vm->IOError(fmt("cjson: ", std::string_view(start, end-start))); UNREACHABLE(); } @@ -129,12 +127,13 @@ void add_module_cjson(VM* vm){ }); vm->bind_func<1>(mod, "dumps", [](VM* vm, ArgsView args) { - cJSON* cjson = convert_python_object_to_cjson(args[0], vm); - char* str = cJSON_Print(cjson); - cJSON_Delete(cjson); - PyObject* ret = VAR((const char*)str); - hooks.free_fn(str); - return ret; + return vm->py_json(args[0]); + // cJSON* cjson = convert_python_object_to_cjson(args[0], vm); + // char* str = cJSON_Print(cjson); + // cJSON_Delete(cjson); + // PyObject* ret = VAR((const char*)str); + // hooks.free_fn(str); + // return ret; }); } diff --git a/benchmarks/ldtk_cjson.py b/benchmarks/ldtk_cjson.py index 80873819..6310b0e7 100644 --- a/benchmarks/ldtk_cjson.py +++ b/benchmarks/ldtk_cjson.py @@ -16,7 +16,20 @@ with open(f'res/{_2489KB}', 'r') as f: data: dict = json.loads(json_content) assert isinstance(data, dict) -# dumped: str = json.dumps(data) -# loaded: dict = json.loads(dumped) -# assert len(data) == len(loaded) -# assert data == loaded +# serialize and deserialize +dumped: str = json.dumps(data) +for _ in range(10): + loaded: dict = json.loads(dumped) +assert len(data) == len(loaded) +assert data == loaded + +#### very very slow!! +import pickle + +with open(f'res/{_339KB}', 'r') as f: + json_content = f.read() +data: dict = json.loads(json_content) + +data_pickled: bytes = pickle.dumps(data) +assert isinstance(data_pickled, bytes) +assert pickle.loads(data_pickled) == data \ No newline at end of file diff --git a/benchmarks/ldtk_json.py b/benchmarks/ldtk_json.py index b669ee2d..b2277937 100644 --- a/benchmarks/ldtk_json.py +++ b/benchmarks/ldtk_json.py @@ -10,13 +10,14 @@ with open(f'res/{_2489KB}', 'r') as f: data: dict = json.loads(json_content) assert isinstance(data, dict) -# dumped: str = json.dumps(data) -# loaded: dict = json.loads(dumped) -# assert len(data) == len(loaded) -# assert data == loaded +# serialize and deserialize +dumped: str = json.dumps(data) +loaded: dict = json.loads(dumped) +assert len(data) == len(loaded) +assert data == loaded +#### very very slow!! DO NOT RUN IT # import pickle -##### very very slow!! DO NOT RUN IT # data_pickled: bytes = pickle.dumps(data) # assert isinstance(data_pickled, bytes) # assert pickle.loads(data_pickled) == data \ No newline at end of file diff --git a/docs/features/basic.md b/docs/features/basic.md index d34c71b7..baccf82f 100644 --- a/docs/features/basic.md +++ b/docs/features/basic.md @@ -43,7 +43,6 @@ The features marked with `YES` are supported, and the features marked with `NO` + `__len__` + `__iter__` + `__next__` -+ `__json__` + `__neg__` + `__bool__` (unused) diff --git a/include/pocketpy/str.h b/include/pocketpy/str.h index 17e6b7c9..e0463858 100644 --- a/include/pocketpy/str.h +++ b/include/pocketpy/str.h @@ -70,6 +70,7 @@ struct Str{ Str lower() const; Str upper() const; Str escape(bool single_quote=true) const; + void escape_(std::stringstream& ss, bool single_quote=true) const; int index(const Str& sub, int start=0) const; Str replace(char old, char new_) const; Str replace(const Str& old, const Str& new_, int count=-1) const; @@ -146,7 +147,6 @@ const StrName __hash__ = StrName::get("__hash__"); // unused const StrName __len__ = StrName::get("__len__"); const StrName __iter__ = StrName::get("__iter__"); const StrName __next__ = StrName::get("__next__"); // unused -const StrName __json__ = StrName::get("__json__"); const StrName __neg__ = StrName::get("__neg__"); // unused const StrName __bool__ = StrName::get("__bool__"); // unused // logical operators diff --git a/include/pocketpy/vm.h b/include/pocketpy/vm.h index 0478872a..4989fa69 100644 --- a/include/pocketpy/vm.h +++ b/include/pocketpy/vm.h @@ -63,7 +63,6 @@ struct PyTypeInfo{ i64 (*m__len__)(VM* vm, PyObject*) = nullptr; PyObject* (*m__iter__)(VM* vm, PyObject*) = nullptr; PyObject* (*m__next__)(VM* vm, PyObject*) = nullptr; - PyObject* (*m__json__)(VM* vm, PyObject*) = nullptr; PyObject* (*m__neg__)(VM* vm, PyObject*) = nullptr; PyObject* (*m__bool__)(VM* vm, PyObject*) = nullptr; PyObject* (*m__invert__)(VM* vm, PyObject*) = nullptr; @@ -233,7 +232,6 @@ public: BIND_UNARY_SPECIAL(__str__) BIND_UNARY_SPECIAL(__iter__) BIND_UNARY_SPECIAL(__next__) - BIND_UNARY_SPECIAL(__json__) BIND_UNARY_SPECIAL(__neg__) BIND_UNARY_SPECIAL(__bool__) BIND_UNARY_SPECIAL(__invert__) diff --git a/src/compiler.cpp b/src/compiler.cpp index e9f378c6..7a0a650e 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -38,14 +38,14 @@ namespace pkpy{ SyntaxError("maximum number of local variables exceeded"); } if(ctx()->co->consts.size() > 65535){ - // std::map counts; - // for(PyObject* c: ctx()->co->consts){ - // std::string key = obj_type_name(vm, vm->_tp(c)).str(); - // counts[key] += 1; - // } - // for(auto pair: counts){ - // std::cout << pair.first << ": " << pair.second << std::endl; - // } + std::map counts; + for(PyObject* c: ctx()->co->consts){ + std::string key = obj_type_name(vm, vm->_tp(c)).str(); + counts[key] += 1; + } + for(auto pair: counts){ + std::cout << pair.first << ": " << pair.second << std::endl; + } SyntaxError("maximum number of constants exceeded"); } if(codes.size() > 65535 && ctx()->co->src->mode != JSON_MODE){ diff --git a/src/error.cpp b/src/error.cpp index 4d56caf9..1507b3cb 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -25,7 +25,8 @@ namespace pkpy{ if(lineno < 0) lineno = 0; const char* _start = line_starts.at(lineno); const char* i = _start; - while(*i != '\n' && *i != '\0') i++; + // max 200 chars + while(*i != '\n' && *i != '\0' && i-_start < 200) i++; return {_start, i}; } diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index b21d8712..a14a0895 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -406,7 +406,6 @@ void init_builtins(VM* _vm) { _vm->bind__iter__(_vm->tp_range, [](VM* vm, PyObject* obj) { return VAR_T(RangeIter, PK_OBJ_GET(Range, obj)); }); _vm->bind__repr__(_vm->_type("NoneType"), [](VM* vm, PyObject* obj) { return VAR("None"); }); - _vm->bind__json__(_vm->_type("NoneType"), [](VM* vm, PyObject* obj) { return VAR("null"); }); _vm->bind__truediv__(_vm->tp_float, [](VM* vm, PyObject* lhs, PyObject* rhs) { f64 value = CAST_F(rhs); @@ -486,7 +485,6 @@ void init_builtins(VM* _vm) { }); _vm->bind__repr__(_vm->tp_int, [](VM* vm, PyObject* obj) { return VAR(std::to_string(_CAST(i64, obj))); }); - _vm->bind__json__(_vm->tp_int, [](VM* vm, PyObject* obj) { return VAR(std::to_string(_CAST(i64, obj))); }); _vm->bind__neg__(_vm->tp_int, [](VM* vm, PyObject* obj) { return VAR(-_CAST(i64, obj)); }); @@ -550,11 +548,6 @@ void init_builtins(VM* _vm) { if(std::all_of(s.begin()+1, s.end(), isdigit)) s += ".0"; return VAR(s); }); - _vm->bind__json__(_vm->tp_float, [](VM* vm, PyObject* obj) { - f64 val = _CAST(f64, obj); - if(std::isinf(val) || std::isnan(val)) vm->ValueError("cannot jsonify 'nan' or 'inf'"); - return VAR(std::to_string(val)); - }); /************ str ************/ _vm->bind_constructor<2>("str", PK_LAMBDA(vm->py_str(args[1]))); @@ -595,10 +588,6 @@ void init_builtins(VM* _vm) { const Str& self = _CAST(Str&, obj); return VAR(self.escape(true)); }); - _vm->bind__json__(_vm->tp_str, [](VM* vm, PyObject* obj) { - const Str& self = _CAST(Str&, obj); - return VAR(self.escape(false)); - }); #define BIND_CMP_STR(name, op) \ _vm->bind##name(_vm->tp_str, [](VM* vm, PyObject* lhs, PyObject* rhs) { \ @@ -710,16 +699,6 @@ void init_builtins(VM* _vm) { }); /************ list ************/ - // list.__repr__ = lambda self: '[' + ', '.join([repr(i) for i in self]) + ']' - // list.__json__ = lambda self: '[' + ', '.join([i.__json__() for i in self]) + ']' - // tuple.__json__ = lambda self: '[' + ', '.join([i.__json__() for i in self]) + ']' - - // def __f(self): - // if len(self) == 1: - // return '(' + repr(self[0]) + ',)' - // return '(' + ', '.join([repr(i) for i in self]) + ')' - // tuple.__repr__ = __f - _vm->bind__repr__(_vm->tp_list, [](VM* vm, PyObject* _0){ List& iterable = _CAST(List&, _0); std::stringstream ss; @@ -732,18 +711,6 @@ void init_builtins(VM* _vm) { return VAR(ss.str()); }); - _vm->bind__json__(_vm->tp_list, [](VM* vm, PyObject* _0){ - List& iterable = _CAST(List&, _0); - std::stringstream ss; - ss << '['; - for(int i=0; ipy_json(iterable[i])); - if(i != iterable.size()-1) ss << ", "; - } - ss << ']'; - return VAR(ss.str()); - }); - _vm->bind__repr__(_vm->tp_tuple, [](VM* vm, PyObject* _0){ Tuple& iterable = _CAST(Tuple&, _0); std::stringstream ss; @@ -761,18 +728,6 @@ void init_builtins(VM* _vm) { return VAR(ss.str()); }); - _vm->bind__json__(_vm->tp_tuple, [](VM* vm, PyObject* _0){ - Tuple& iterable = _CAST(Tuple&, _0); - std::stringstream ss; - ss << '['; - for(int i=0; ipy_json(iterable[i])); - if(i != iterable.size()-1) ss << ", "; - } - ss << ']'; - return VAR(ss.str()); - }); - _vm->bind_constructor<-1>("list", [](VM* vm, ArgsView args) { if(args.size() == 1+0) return VAR(List()); if(args.size() == 1+1){ @@ -999,10 +954,6 @@ void init_builtins(VM* _vm) { bool val = _CAST(bool, self); return VAR(val ? "True" : "False"); }); - _vm->bind__json__(_vm->tp_bool, [](VM* vm, PyObject* self) { - bool val = _CAST(bool, self); - return VAR(val ? "true" : "false"); - }); _vm->bind__and__(_vm->tp_bool, [](VM* vm, PyObject* lhs, PyObject* rhs) { return VAR(_CAST(bool, lhs) && CAST(bool, rhs)); @@ -1322,28 +1273,6 @@ void init_builtins(VM* _vm) { return VAR(ss.str()); }); - _vm->bind__json__(_vm->tp_dict, [](VM* vm, PyObject* obj) { - Dict& self = _CAST(Dict&, obj); - std::stringstream ss; - ss << "{"; - bool first = true; - - self.apply([&](PyObject* k, PyObject* v){ - if(!first) ss << ", "; - first = false; - if(!is_non_tagged_type(k, vm->tp_str)){ - vm->TypeError(fmt("json keys must be string, got ", obj_type_name(vm, vm->_tp(k)))); - UNREACHABLE(); - } - Str key = _CAST(Str&, k).escape(false); - Str value = CAST(Str&, vm->py_json(v)); - ss << key << ": " << value; - }); - - ss << "}"; - return VAR(ss.str()); - }); - _vm->bind__eq__(_vm->tp_dict, [](VM* vm, PyObject* a, PyObject* b) { Dict& self = _CAST(Dict&, a); if(!is_non_tagged_type(b, vm->tp_dict)) return vm->NotImplemented; diff --git a/src/str.cpp b/src/str.cpp index 1aa713ef..96ba57d8 100644 --- a/src/str.cpp +++ b/src/str.cpp @@ -224,8 +224,13 @@ int utf8len(unsigned char c, bool suppress){ return Str(copy); } - Str Str::escape(bool single_quote) const { + Str Str::escape(bool single_quote) const{ std::stringstream ss; + escape_(ss, single_quote); + return ss.str(); + } + + void Str::escape_(std::stringstream& ss, bool single_quote) const { ss << (single_quote ? '\'' : '"'); for (int i=0; ioperator[](i); @@ -251,7 +256,6 @@ int utf8len(unsigned char c, bool suppress){ } } ss << (single_quote ? '\'' : '"'); - return ss.str(); } int Str::index(const Str& sub, int start) const { diff --git a/src/vm.cpp b/src/vm.cpp index 5effe577..9538076a 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -2,6 +2,72 @@ namespace pkpy{ + struct JsonSerializer{ + VM* vm; + PyObject* root; + std::stringstream ss; + + JsonSerializer(VM* vm, PyObject* root) : vm(vm), root(root) {} + + template + void write_array(T& arr){ + ss << '['; + for(int i=0; itp_str)){ + vm->TypeError(fmt("json keys must be string, got ", obj_type_name(vm, vm->_tp(k)))); + UNREACHABLE(); + } + ss << _CAST(Str&, k).escape(false) << ": "; + write_object(v); + }); + ss << '}'; + } + + void write_object(PyObject* obj){ + Type obj_t = vm->_tp(obj); + if(obj == vm->None){ + ss << "null"; + }else if(obj_t == vm->tp_int){ + ss << _CAST(i64, obj); + }else if(obj_t == vm->tp_float){ + f64 val = _CAST(f64, obj); + if(std::isinf(val) || std::isnan(val)) vm->ValueError("cannot jsonify 'nan' or 'inf'"); + ss << val; + }else if(obj_t == vm->tp_bool){ + ss << (obj == vm->True ? "true" : "false"); + }else if(obj_t == vm->tp_str){ + _CAST(Str&, obj).escape_(ss, false); + }else if(obj_t == vm->tp_list){ + write_array(_CAST(List&, obj)); + }else if(obj_t == vm->tp_tuple){ + write_array(_CAST(Tuple&, obj)); + }else if(obj_t == vm->tp_dict){ + write_dict(_CAST(Dict&, obj)); + }else{ + vm->TypeError(fmt("unrecognized type ", obj_type_name(vm, obj_t).escape())); + UNREACHABLE(); + } + } + + std::string serialize(){ + auto _lock = vm->heap.gc_scope_lock(); + write_object(root); + return ss.str(); + } + }; + VM::VM(bool enable_os) : heap(this), enable_os(enable_os) { this->vm = this; this->_c.error = nullptr; @@ -39,9 +105,8 @@ namespace pkpy{ } PyObject* VM::py_json(PyObject* obj){ - const PyTypeInfo* ti = _inst_type_info(obj); - if(ti->m__json__) return ti->m__json__(this, obj); - return call_method(obj, __json__); + auto j = JsonSerializer(this, obj); + return VAR(j.serialize()); } PyObject* VM::py_iter(PyObject* obj){ diff --git a/tests/01_int.py b/tests/01_int.py index a5aa7ff3..fd077509 100644 --- a/tests/01_int.py +++ b/tests/01_int.py @@ -48,10 +48,9 @@ assert x == 6 x //= 2 assert x == 3 -# test __str__, __repr__, __json__ +# test __str__, __repr__ assert str(1) == '1' assert repr(1) == '1' -assert (1).__json__() == '1' # test int() assert int(1) == 1