From c2ef720d90f0378f9ec767d0619275895edf0314 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 27 Mar 2023 22:23:46 +0800 Subject: [PATCH 01/73] update gc --- src/gc.h | 51 +++++++++ src/namedict.h | 7 ++ src/obj.h | 16 ++- src/tuplelist.h | 35 +++--- src/vm.h | 276 ++++++++++++++++++++++++------------------------ 5 files changed, 227 insertions(+), 158 deletions(-) create mode 100644 src/gc.h diff --git a/src/gc.h b/src/gc.h new file mode 100644 index 00000000..3081b259 --- /dev/null +++ b/src/gc.h @@ -0,0 +1,51 @@ +#pragma once + +#include "obj.h" + +namespace pkpy { + using PyVar0 = PyObject*; + + // a generational mark and sweep garbage collector + struct GC{ + using Generation = std::vector; + static const int kTotalGen = 3; + Generation gen[kTotalGen]; + + void add(PyVar0 obj){ + if(!obj->need_gc) return; + gen[0].push_back(obj); + } + + void sweep(int index){ + Generation& g = gen[index]; + if(index < kTotalGen-1){ + for(int i=0; imarked){ + g[i]->marked = false; + gen[index+1].push_back(g[i]); + }else{ + delete g[i]; + } + } + g.clear(); + }else{ + Generation alive; + // the oldest generation + for(int i=0; imarked){ + g[i]->marked = false; + alive.push_back(g[i]); + }else{ + delete g[i]; + } + } + g = std::move(alive); + } + } + + void collect(int index){ + sweep(index); + } + }; + +} // namespace pkpy \ No newline at end of file diff --git a/src/namedict.h b/src/namedict.h index 8b8a3516..90f8d34a 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -230,6 +230,13 @@ while(!_keys[i].empty()) { \ } return v; } + + void apply_v(void(*f)(PyVar)) { + for(uint16_t i=0; i<_capacity; i++){ + if(_keys[i].empty()) continue; + f(value(i)); + } + } #undef HASH_PROBE #undef _hash }; diff --git a/src/obj.h b/src/obj.h index fb2a0e12..46acb280 100644 --- a/src/obj.h +++ b/src/obj.h @@ -88,6 +88,9 @@ public: }; struct PyObject { + bool need_gc; + bool marked; + /**********/ Type type; NameDict* _attr; @@ -96,6 +99,12 @@ struct PyObject { inline const PyVar& attr(StrName name) const noexcept { return _attr->get(name); } virtual void* value() = 0; + virtual void mark() { + if(!need_gc || marked) return; + marked = true; + if(is_attr_valid()) attr().apply_v([](PyVar v){ v->mark(); }); + } + PyObject(Type type) : type(type) {} virtual ~PyObject() { delete _attr; } }; @@ -119,9 +128,14 @@ struct Py_ : PyObject { } } void* value() override { return &_value; } + + void mark() override { + PyObject::mark(); + // extra mark for `T` + } }; -#define OBJ_GET(T, obj) (((Py_*)((obj).get()))->_value) +#define OBJ_GET(T, obj) (((Py_*)(obj))->_value) #define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__)) const int kTpIntIndex = 2; diff --git a/src/tuplelist.h b/src/tuplelist.h index 5594a563..a07459e8 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -5,12 +5,12 @@ #include "str.h" namespace pkpy { - using List = std::vector; + using List = std::vector; class Args { - static THREAD_LOCAL SmallArrayPool _pool; + static THREAD_LOCAL SmallArrayPool _pool; - PyVar* _args; + PyObject** _args; int _size; inline void _alloc(int n){ @@ -35,14 +35,14 @@ namespace pkpy { static pkpy::Args from_list(List&& other) noexcept { Args ret(other.size()); - memcpy((void*)ret._args, (void*)other.data(), sizeof(PyVar)*ret.size()); - memset((void*)other.data(), 0, sizeof(PyVar)*ret.size()); + memcpy((void*)ret._args, (void*)other.data(), sizeof(PyObject*)*ret.size()); + memset((void*)other.data(), 0, sizeof(PyObject*)*ret.size()); other.clear(); return ret; } - PyVar& operator[](int i){ return _args[i]; } - const PyVar& operator[](int i) const { return _args[i]; } + PyObject*& operator[](int i){ return _args[i]; } + PyObject* operator[](int i) const { return _args[i]; } Args& operator=(Args&& other) noexcept { _pool.dealloc(_args, _size); @@ -57,29 +57,30 @@ namespace pkpy { List move_to_list() noexcept { List ret(_size); - memcpy((void*)ret.data(), (void*)_args, sizeof(PyVar)*_size); - memset((void*)_args, 0, sizeof(PyVar)*_size); + memcpy((void*)ret.data(), (void*)_args, sizeof(PyObject*)*_size); + memset((void*)_args, 0, sizeof(PyObject*)*_size); return ret; } - void extend_self(const PyVar& self){ - static_assert(std::is_standard_layout_v); - PyVar* old_args = _args; + void extend_self(PyObject* self){ + PyObject** old_args = _args; int old_size = _size; _alloc(old_size+1); _args[0] = self; if(old_size == 0) return; - memcpy((void*)(_args+1), (void*)old_args, sizeof(PyVar)*old_size); - memset((void*)old_args, 0, sizeof(PyVar)*old_size); + memcpy((void*)(_args+1), (void*)old_args, sizeof(PyObject*)*old_size); + memset((void*)old_args, 0, sizeof(PyObject*)*old_size); _pool.dealloc(old_args, old_size); } ~Args(){ _pool.dealloc(_args, _size); } }; - static const Args _zero(0); - inline const Args& no_arg() { return _zero; } + inline const Args& no_arg() { + static const Args _zero(0); + return _zero; + } template Args one_arg(T&& a) { @@ -106,5 +107,5 @@ namespace pkpy { } typedef Args Tuple; - THREAD_LOCAL SmallArrayPool Args::_pool; + THREAD_LOCAL SmallArrayPool Args::_pool; } // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index 22523e45..e90b1544 100644 --- a/src/vm.h +++ b/src/vm.h @@ -6,22 +6,22 @@ namespace pkpy{ #define DEF_NATIVE_2(ctype, ptype) \ - template<> ctype py_cast(VM* vm, const PyVar& obj) { \ + template<> ctype py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype _py_cast(VM* vm, const PyVar& obj) { \ + template<> ctype _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype& py_cast(VM* vm, const PyVar& obj) { \ + template<> ctype& py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype& _py_cast(VM* vm, const PyVar& obj) { \ + template<> ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - PyVar py_var(VM* vm, const ctype& value) { return vm->new_object(vm->ptype, value);} \ - PyVar py_var(VM* vm, ctype&& value) { return vm->new_object(vm->ptype, std::move(value));} + PyObject* py_var(VM* vm, const ctype& value) { return vm->new_object(vm->ptype, value);} \ + PyObject* py_var(VM* vm, ctype&& value) { return vm->new_object(vm->ptype, std::move(value));} class Generator: public BaseIter { std::unique_ptr frame; @@ -30,11 +30,11 @@ public: Generator(VM* vm, std::unique_ptr&& frame) : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {} - PyVar next(); + PyObject* next(); }; struct PyTypeInfo{ - PyVar obj; + PyObject* obj; Type base; Str name; }; @@ -43,23 +43,27 @@ class VM { VM* vm; // self reference for simplify code public: std::stack< std::unique_ptr > callstack; - PyVar _py_op_call; - PyVar _py_op_yield; std::vector _all_types; - PyVar run_frame(Frame* frame); + PyObject* run_frame(Frame* frame); NameDict _modules; // loaded modules std::map _lazy_modules; // lazy loaded modules - PyVar None, True, False, Ellipsis; + + // singleton objects, need_gc=false + PyObject* _py_op_call; + PyObject* _py_op_yield; + PyObject* None; + PyObject* True; + PyObject* False; + PyObject* Ellipsis; + + PyObject* builtins; // builtins module + PyObject* _main; // __main__ module bool use_stdio; std::ostream* _stdout; std::ostream* _stderr; - - PyVar builtins; // builtins module - PyVar _main; // __main__ module - int recursionlimit = 1000; VM(bool use_stdio){ @@ -77,7 +81,7 @@ public: // for(int i=0; i<128; i++) _ascii_str_pool[i] = new_object(tp_str, std::string(1, (char)i)); } - PyVar asStr(const PyVar& obj){ + PyObject* asStr(PyObject* obj){ PyVarOrNull f = getattr(obj, __str__, false, true); if(f != nullptr) return call(f); return asRepr(obj); @@ -90,7 +94,7 @@ public: return callstack.top().get(); } - PyVar asIter(const PyVar& obj){ + PyObject* asIter(PyObject* obj){ if(is_type(obj, tp_native_iterator)) return obj; PyVarOrNull iter_f = getattr(obj, __iter__, false, true); if(iter_f != nullptr) return call(iter_f); @@ -98,25 +102,25 @@ public: return nullptr; } - PyVar asList(const PyVar& iterable){ + PyObject* asList(PyObject* iterable){ if(is_type(iterable, tp_list)) return iterable; return call(_t(tp_list), one_arg(iterable)); } - PyVar* find_name_in_mro(PyObject* cls, StrName name){ - PyVar* val; + PyObject** find_name_in_mro(PyObject* cls, StrName name){ + PyObject** val; do{ val = cls->attr().try_get(name); if(val != nullptr) return val; Type cls_t = static_cast*>(cls)->_value; Type base = _all_types[cls_t.index].base; if(base.index == -1) break; - cls = _all_types[base.index].obj.get(); + cls = _all_types[base.index].obj; }while(true); return nullptr; } - bool isinstance(const PyVar& obj, Type cls_t){ + bool isinstance(PyObject* obj, Type cls_t){ Type obj_t = OBJ_GET(Type, _t(obj)); do{ if(obj_t == cls_t) return true; @@ -127,36 +131,36 @@ public: return false; } - PyVar fast_call(StrName name, Args&& args){ - PyVar* val = find_name_in_mro(_t(args[0]).get(), name); + PyObject* fast_call(StrName name, Args&& args){ + PyObject** val = find_name_in_mro(_t(args[0]).get(), name); if(val != nullptr) return call(*val, std::move(args)); AttributeError(args[0], name); return nullptr; } - inline PyVar call(const PyVar& _callable){ + inline PyObject* call(PyObject* _callable){ return call(_callable, no_arg(), no_arg(), false); } template - inline std::enable_if_t, Args>, PyVar> - call(const PyVar& _callable, ArgT&& args){ + inline std::enable_if_t, Args>, PyObject*> + call(PyObject* _callable, ArgT&& args){ return call(_callable, std::forward(args), no_arg(), false); } template - inline std::enable_if_t, Args>, PyVar> - call(const PyVar& obj, const StrName name, ArgT&& args){ + inline std::enable_if_t, Args>, PyObject*> + call(PyObject* obj, const StrName name, ArgT&& args){ return call(getattr(obj, name, true, true), std::forward(args), no_arg(), false); } - inline PyVar call(const PyVar& obj, StrName name){ + inline PyObject* call(PyObject* obj, StrName name){ return call(getattr(obj, name, true, true), no_arg(), no_arg(), false); } // repl mode is only for setting `frame->id` to 0 - PyVarOrNull exec(Str source, Str filename, CompileMode mode, PyVar _module=nullptr){ + PyObject* exec(Str source, Str filename, CompileMode mode, PyObject* _module=nullptr){ if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); @@ -180,19 +184,20 @@ public: } template - inline PyVar _exec(Args&&... args){ + inline PyObject* _exec(Args&&... args){ callstack.push(_new_frame(std::forward(args)...)); return _exec(); } - PyVar property(NativeFuncRaw fget){ - PyVar p = builtins->attr("property"); - PyVar method = new_object(tp_native_function, NativeFunc(fget, 1, false)); + PyObject* property(NativeFuncRaw fget){ + PyObject* p = builtins->attr("property"); + PyObject* method = new_object(tp_native_function, NativeFunc(fget, 1, false)); return call(p, one_arg(method)); } - PyVar new_type_object(PyVar mod, StrName name, Type base){ - PyVar obj = make_sp>(tp_type, _all_types.size()); + PyObject* new_type_object(PyObject* mod, StrName name, Type base){ + // use gcnew + PyObject* obj = make_sp>(tp_type, _all_types.size()); PyTypeInfo info{ .obj = obj, .base = base, @@ -204,19 +209,19 @@ public: } Type _new_type_object(StrName name, Type base=0) { - PyVar obj = new_type_object(nullptr, name, base); + PyObject* obj = new_type_object(nullptr, name, base); return OBJ_GET(Type, obj); } template - inline PyVar new_object(const PyVar& type, const T& _value) { + inline PyObject* new_object(PyObject* type, const T& _value) { #if PK_EXTRA_CHECK if(!is_type(type, tp_type)) UNREACHABLE(); #endif return make_sp>>(OBJ_GET(Type, type), _value); } template - inline PyVar new_object(const PyVar& type, T&& _value) { + inline PyObject* new_object(PyObject* type, T&& _value) { #if PK_EXTRA_CHECK if(!is_type(type, tp_type)) UNREACHABLE(); #endif @@ -224,16 +229,16 @@ public: } template - inline PyVar new_object(Type type, const T& _value) { + inline PyObject* new_object(Type type, const T& _value) { return make_sp>>(type, _value); } template - inline PyVar new_object(Type type, T&& _value) { + inline PyObject* new_object(Type type, T&& _value) { return make_sp>>(type, std::move(_value)); } - PyVar _find_type(const Str& type){ - PyVar* obj = builtins->attr().try_get(type); + PyObject* _find_type(const Str& type){ + PyObject** obj = builtins->attr().try_get(type); if(!obj){ for(auto& t: _all_types) if(t.name == type) return t.obj; throw std::runtime_error("type not found: " + type); @@ -282,12 +287,12 @@ public: Type tp_super, tp_exception, tp_star_wrapper; template - inline PyVar PyIter(P&& value) { + inline PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); return new_object(tp_native_iterator, std::forward

(value)); } - inline BaseIter* PyIter_AS_C(const PyVar& obj) + inline BaseIter* PyIter_AS_C(PyObject* obj) { check_type(obj, tp_native_iterator); return static_cast(obj->value()); @@ -313,22 +318,22 @@ public: void ValueError(const Str& msg){ _error("ValueError", msg); } void NameError(StrName name){ _error("NameError", "name " + name.str().escape(true) + " is not defined"); } - void AttributeError(PyVar obj, StrName name){ + void AttributeError(PyObject* obj, StrName name){ _error("AttributeError", "type " + OBJ_NAME(_t(obj)).escape(true) + " has no attribute " + name.str().escape(true)); } void AttributeError(Str msg){ _error("AttributeError", msg); } - inline void check_type(const PyVar& obj, Type type){ + inline void check_type(PyObject* obj, Type type){ if(is_type(obj, type)) return; TypeError("expected " + OBJ_NAME(_t(type)).escape(true) + ", but got " + OBJ_NAME(_t(obj)).escape(true)); } - inline PyVar& _t(Type t){ + inline PyObject* _t(Type t){ return _all_types[t.index].obj; } - inline PyVar& _t(const PyVar& obj){ + inline PyObject* _t(PyObject* obj){ if(is_int(obj)) return _t(tp_int); if(is_float(obj)) return _t(tp_float); return _all_types[OBJ_GET(Type, _t(obj->type)).index].obj; @@ -341,42 +346,34 @@ public: } } - inline PyVarOrNull getattr(const PyVar& obj, StrName name, bool throw_err=true, bool class_only=false){ - return getattr(&obj, name, throw_err, class_only); - } - template - inline void setattr(PyVar& obj, StrName name, T&& value){ - setattr(&obj, name, std::forward(value)); - } - CodeObject_ compile(Str source, Str filename, CompileMode mode); void post_init(); - PyVar num_negated(const PyVar& obj); - f64 num_to_float(const PyVar& obj); - const PyVar& asBool(const PyVar& obj); - i64 hash(const PyVar& obj); - PyVar asRepr(const PyVar& obj); - PyVar new_module(StrName name); + PyObject* num_negated(PyObject* obj); + f64 num_to_float(PyObject* obj); + PyObject* asBool(PyObject* obj); + i64 hash(PyObject* obj); + PyObject* asRepr(PyObject* obj); + PyObject* new_module(StrName name); Str disassemble(CodeObject_ co); void init_builtin_types(); - PyVar call(const PyVar& _callable, Args args, const Args& kwargs, bool opCall); + PyObject* call(PyObject* _callable, Args args, const Args& kwargs, bool opCall); void unpack_args(Args& args); - PyVarOrNull getattr(const PyVar* obj, StrName name, bool throw_err=true, bool class_only=false); + PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true, bool class_only=false); template - void setattr(PyVar* obj, StrName name, T&& value); + void setattr(PyObject* obj, StrName name, T&& value); template - void bind_method(PyVar obj, Str funcName, NativeFuncRaw fn); + void bind_method(PyObject* obj, Str funcName, NativeFuncRaw fn); template - void bind_func(PyVar obj, Str funcName, NativeFuncRaw fn); + void bind_func(PyObject* obj, Str funcName, NativeFuncRaw fn); void _error(Exception e); - PyVar _exec(); + PyObject* _exec(); template PyVarRef PyRef(P&& value); - const BaseRef* PyRef_AS_C(const PyVar& obj); + const BaseRef* PyRef_AS_C(PyObject* obj); }; -PyVar NativeFunc::operator()(VM* vm, Args& args) const{ +PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ int args_size = args.size() - (int)method; // remove self if(argc != -1 && args_size != argc) { vm->TypeError("expected " + std::to_string(argc) + " arguments, but got " + std::to_string(args_size)); @@ -437,11 +434,11 @@ DEF_NATIVE_2(Exception, tp_exception) DEF_NATIVE_2(StarWrapper, tp_star_wrapper) #define PY_CAST_INT(T) \ -template<> T py_cast(VM* vm, const PyVar& obj){ \ +template<> T py_cast(VM* vm, PyObject* obj){ \ vm->check_type(obj, vm->tp_int); \ return (T)(obj.bits >> 2); \ } \ -template<> T _py_cast(VM* vm, const PyVar& obj){ \ +template<> T _py_cast(VM* vm, PyObject* obj){ \ return (T)(obj.bits >> 2); \ } @@ -457,38 +454,38 @@ PY_CAST_INT(unsigned long) PY_CAST_INT(unsigned long long) -template<> float py_cast(VM* vm, const PyVar& obj){ +template<> float py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); i64 bits = obj.bits; bits = (bits >> 2) << 2; return __8B(bits)._float; } -template<> float _py_cast(VM* vm, const PyVar& obj){ +template<> float _py_cast(VM* vm, PyObject* obj){ i64 bits = obj.bits; bits = (bits >> 2) << 2; return __8B(bits)._float; } -template<> double py_cast(VM* vm, const PyVar& obj){ +template<> double py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); i64 bits = obj.bits; bits = (bits >> 2) << 2; return __8B(bits)._float; } -template<> double _py_cast(VM* vm, const PyVar& obj){ +template<> double _py_cast(VM* vm, PyObject* obj){ i64 bits = obj.bits; bits = (bits >> 2) << 2; return __8B(bits)._float; } -#define PY_VAR_INT(T) \ - PyVar py_var(VM* vm, T _val){ \ - i64 val = static_cast(_val); \ - if(((val << 2) >> 2) != val){ \ +#define PY_VAR_INT(T) \ + PyObject* py_var(VM* vm, T _val){ \ + i64 val = static_cast(_val); \ + if(((val << 2) >> 2) != val){ \ vm->_error("OverflowError", std::to_string(val) + " is out of range"); \ } \ val = (val << 2) | 0b01; \ - return PyVar(reinterpret_cast(val)); \ + return reinterpret_cast(val); \ } PY_VAR_INT(char) @@ -502,44 +499,44 @@ PY_VAR_INT(unsigned int) PY_VAR_INT(unsigned long) PY_VAR_INT(unsigned long long) -#define PY_VAR_FLOAT(T) \ - PyVar py_var(VM* vm, T _val){ \ - f64 val = static_cast(_val); \ - i64 bits = __8B(val)._int; \ - bits = (bits >> 2) << 2; \ - bits |= 0b10; \ - return PyVar(reinterpret_cast(bits)); \ +#define PY_VAR_FLOAT(T) \ + PyObject* py_var(VM* vm, T _val){ \ + f64 val = static_cast(_val); \ + i64 bits = __8B(val)._int; \ + bits = (bits >> 2) << 2; \ + bits |= 0b10; \ + return reinterpret_cast(bits); \ } PY_VAR_FLOAT(float) PY_VAR_FLOAT(double) -const PyVar& py_var(VM* vm, bool val){ +PyObject* py_var(VM* vm, bool val){ return val ? vm->True : vm->False; } -template<> bool py_cast(VM* vm, const PyVar& obj){ +template<> bool py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_bool); return obj == vm->True; } -template<> bool _py_cast(VM* vm, const PyVar& obj){ +template<> bool _py_cast(VM* vm, PyObject* obj){ return obj == vm->True; } -PyVar py_var(VM* vm, const char val[]){ +PyObject* py_var(VM* vm, const char val[]){ return VAR(Str(val)); } -PyVar py_var(VM* vm, std::string val){ +PyObject* py_var(VM* vm, std::string val){ return VAR(Str(std::move(val))); } template -void _check_py_class(VM* vm, const PyVar& obj){ +void _check_py_class(VM* vm, PyObject* obj){ vm->check_type(obj, T::_type(vm)); } -PyVar VM::num_negated(const PyVar& obj){ +PyObject* VM::num_negated(PyObject* obj){ if (is_int(obj)){ return VAR(-CAST(i64, obj)); }else if(is_float(obj)){ @@ -549,7 +546,7 @@ PyVar VM::num_negated(const PyVar& obj){ return nullptr; } -f64 VM::num_to_float(const PyVar& obj){ +f64 VM::num_to_float(PyObject* obj){ if(is_float(obj)){ return CAST(f64, obj); } else if (is_int(obj)){ @@ -559,20 +556,20 @@ f64 VM::num_to_float(const PyVar& obj){ return 0; } -const PyVar& VM::asBool(const PyVar& obj){ +PyObject* VM::asBool(PyObject* obj){ if(is_type(obj, tp_bool)) return obj; if(obj == None) return False; if(is_type(obj, tp_int)) return VAR(CAST(i64, obj) != 0); if(is_type(obj, tp_float)) return VAR(CAST(f64, obj) != 0.0); PyVarOrNull len_fn = getattr(obj, __len__, false, true); if(len_fn != nullptr){ - PyVar ret = call(len_fn); + PyObject* ret = call(len_fn); return VAR(CAST(i64, ret) > 0); } return True; } -i64 VM::hash(const PyVar& obj){ +i64 VM::hash(PyObject* obj){ if (is_type(obj, tp_str)) return CAST(Str&, obj).hash(); if (is_int(obj)) return CAST(i64, obj); if (is_type(obj, tp_tuple)) { @@ -594,12 +591,12 @@ i64 VM::hash(const PyVar& obj){ return 0; } -PyVar VM::asRepr(const PyVar& obj){ +PyObject* VM::asRepr(PyObject* obj){ return call(obj, __repr__); } -PyVar VM::new_module(StrName name) { - PyVar obj = new_object(tp_module, DummyModule()); +PyObject* VM::new_module(StrName name) { + PyObject* obj = new_object(tp_module, DummyModule()); obj->attr().set(__name__, VAR(name.str())); _modules.set(name, obj); return obj; @@ -665,7 +662,7 @@ Str VM::disassemble(CodeObject_ co){ ss << '\n' << consts.str() << '\n' << names.str() << '\n'; for(int i=0; iconsts.size(); i++){ - PyVar obj = co->consts[i]; + PyObject* obj = co->consts[i]; if(is_type(obj, tp_function)){ const auto& f = CAST(Function&, obj); ss << disassemble(f.code); @@ -731,10 +728,10 @@ void VM::init_builtin_types(){ for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } -PyVar VM::call(const PyVar& _callable, Args args, const Args& kwargs, bool opCall){ - if(is_type(_callable, tp_type)){ - PyVar* new_f = _callable->attr().try_get(__new__); - PyVar obj; +PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ + if(is_type(callable, tp_type)){ + PyObject** new_f = callable->attr().try_get(__new__); + PyObject* obj; if(new_f != nullptr){ obj = call(*new_f, std::move(args), kwargs, false); }else{ @@ -745,19 +742,18 @@ PyVar VM::call(const PyVar& _callable, Args args, const Args& kwargs, bool opCal return obj; } - const PyVar* callable = &_callable; - if(is_type(*callable, tp_bound_method)){ - auto& bm = CAST(BoundMethod&, *callable); - callable = &bm.method; // get unbound method + if(is_type(callable, tp_bound_method)){ + auto& bm = CAST(BoundMethod&, callable); + callable = bm.method; // get unbound method args.extend_self(bm.obj); } - if(is_type(*callable, tp_native_function)){ - const auto& f = OBJ_GET(NativeFunc, *callable); + if(is_type(callable, tp_native_function)){ + const auto& f = OBJ_GET(NativeFunc, callable); if(kwargs.size() != 0) TypeError("native_function does not accept keyword arguments"); return f(this, args); - } else if(is_type(*callable, tp_function)){ - const Function& fn = CAST(Function&, *callable); + } else if(is_type(callable, tp_function)){ + const Function& fn = CAST(Function&, callable); NameDict_ locals = make_sp( fn.code->perfect_locals_capacity, kLocalsLoadFactor, @@ -797,7 +793,7 @@ PyVar VM::call(const PyVar& _callable, Args args, const Args& kwargs, bool opCal } locals->set(key, kwargs[i+1]); } - const PyVar& _module = fn._module != nullptr ? fn._module : top_frame()->_module; + PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; auto _frame = _new_frame(fn.code, _module, locals, fn._closure); if(fn.code->is_generator) return PyIter(Generator(this, std::move(_frame))); callstack.push(std::move(_frame)); @@ -819,7 +815,7 @@ void VM::unpack_args(Args& args){ if(is_type(args[i], tp_star_wrapper)){ auto& star = _CAST(StarWrapper&, args[i]); if(!star.rvalue) UNREACHABLE(); - PyVar list = asList(star.obj); + PyObject* list = asList(star.obj); List& list_c = CAST(List&, list); unpacked.insert(unpacked.end(), list_c.begin(), list_c.end()); }else{ @@ -829,25 +825,25 @@ void VM::unpack_args(Args& args){ args = Args::from_list(std::move(unpacked)); } -using Super = std::pair; +using Super = std::pair; // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance -PyVarOrNull VM::getattr(const PyVar* obj, StrName name, bool throw_err, bool class_only){ +PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ PyObject* objtype = _t(*obj).get(); if(is_type(*obj, tp_super)){ const Super& super = OBJ_GET(Super, *obj); obj = &super.first; objtype = _t(super.second).get(); } - PyVar* cls_var = find_name_in_mro(objtype, name); + PyObject** cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor - PyVar* descr_get = _t(*cls_var)->attr().try_get(__get__); + PyObject** descr_get = _t(*cls_var)->attr().try_get(__get__); if(descr_get != nullptr) return call(*descr_get, two_args(*cls_var, *obj)); } // handle instance __dict__ if(!class_only && !(*obj).is_tagged() && (*obj)->is_attr_valid()){ - PyVar* val = (*obj)->attr().try_get(name); + PyObject** val = (*obj)->attr().try_get(name); if(val != nullptr) return *val; } if(cls_var != nullptr){ @@ -862,22 +858,22 @@ PyVarOrNull VM::getattr(const PyVar* obj, StrName name, bool throw_err, bool cla } template -void VM::setattr(PyVar* obj, StrName name, T&& value){ +void VM::setattr(PyObject* obj, StrName name, T&& value){ static_assert(std::is_same_v, PyVar>); - PyObject* objtype = _t(*obj).get(); - if(is_type(*obj, tp_super)){ + PyObject* objtype = _t(obj).get(); + if(is_type(obj, tp_super)){ Super& super = OBJ_GET(Super, *obj); - obj = &super.first; + obj = super.first; objtype = _t(super.second).get(); } - PyVar* cls_var = find_name_in_mro(objtype, name); + PyObject** cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor - const PyVar& cls_var_t = _t(*cls_var); + PyObject* cls_var_t = _t(*cls_var); if(cls_var_t->attr().contains(__get__)){ - PyVar* descr_set = cls_var_t->attr().try_get(__set__); + PyObject** descr_set = cls_var_t->attr().try_get(__set__); if(descr_set != nullptr){ - call(*descr_set, three_args(*cls_var, *obj, std::forward(value))); + call(*descr_set, three_args(*cls_var, obj, std::forward(value))); }else{ TypeError("readonly attribute: " + name.str().escape(true)); } @@ -885,18 +881,18 @@ void VM::setattr(PyVar* obj, StrName name, T&& value){ } } // handle instance __dict__ - if((*obj).is_tagged() || !(*obj)->is_attr_valid()) TypeError("cannot set attribute"); - (*obj)->attr().set(name, std::forward(value)); + if(obj.is_tagged() || !(*obj)->is_attr_valid()) TypeError("cannot set attribute"); + obj->attr().set(name, std::forward(value)); } template -void VM::bind_method(PyVar obj, Str name, NativeFuncRaw fn) { +void VM::bind_method(PyObject* obj, Str name, NativeFuncRaw fn) { check_type(obj, tp_type); obj->attr().set(name, VAR(NativeFunc(fn, ARGC, true))); } template -void VM::bind_func(PyVar obj, Str name, NativeFuncRaw fn) { +void VM::bind_func(PyObject* obj, Str name, NativeFuncRaw fn) { obj->attr().set(name, VAR(NativeFunc(fn, ARGC, false))); } @@ -909,10 +905,10 @@ void VM::_error(Exception e){ _raise(); } -PyVar VM::_exec(){ +PyObject* VM::_exec(){ Frame* frame = top_frame(); i64 base_id = frame->id; - PyVar ret = nullptr; + PyObject* ret = nullptr; bool need_raise = false; while(true){ @@ -936,7 +932,7 @@ PyVar VM::_exec(){ }catch(HandledException& e){ continue; }catch(UnhandledException& e){ - PyVar obj = frame->pop(); + PyObject* obj = frame->pop(); Exception& _e = CAST(Exception&, obj); _e.st_push(frame->snapshot()); callstack.pop(); From f57fa16ee27ed468c82331a82bed860e39f5397e Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 17:08:42 +0800 Subject: [PATCH 02/73] update gc --- src/ceval.h | 91 ++++++++++++++++---------------- src/cffi.h | 40 +++++++-------- src/codeobject.h | 2 +- src/common.h | 21 +++++++- src/compiler.h | 12 ++--- src/frame.h | 30 +++++------ src/gc.h | 59 +++++++++------------ src/io.h | 8 +-- src/iter.h | 16 +++--- src/main.cpp | 2 +- src/memory.h | 60 ++-------------------- src/namedict.h | 40 +++++++-------- src/obj.h | 88 ++++++++++++++----------------- src/parser.h | 4 +- src/pocketpy.h | 50 +++++++++--------- src/ref.h | 54 +++++++++---------- src/tuplelist.h | 30 +++-------- src/vm.h | 131 +++++++++++++++++++++-------------------------- 18 files changed, 323 insertions(+), 415 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 2eaed684..643d5042 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,7 +7,7 @@ namespace pkpy{ Str _read_file_cwd(const Str& name, bool* ok); -PyVar VM::run_frame(Frame* frame){ +PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -16,7 +16,7 @@ PyVar VM::run_frame(Frame* frame){ case OP_SETUP_DECORATOR: continue; case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); continue; case OP_LOAD_FUNCTION: { - const PyVar obj = frame->co->consts[byte.arg]; + PyObject* obj = frame->co->consts[byte.arg]; Function f = CAST(Function, obj); // copy f._module = frame->_module; frame->push(VAR(f)); @@ -37,13 +37,13 @@ PyVar VM::run_frame(Frame* frame){ } continue; case OP_BUILD_ATTR_REF: case OP_BUILD_ATTR: { auto& attr = frame->co->names[byte.arg]; - PyVar obj = frame->pop_value(this); + PyObject* obj = frame->pop_value(this); AttrRef ref = AttrRef(obj, NameRef(attr)); if(byte.op == OP_BUILD_ATTR) frame->push(ref.get(this, frame)); else frame->push(PyRef(ref)); } continue; case OP_BUILD_INDEX: { - PyVar index = frame->pop_value(this); + PyObject* index = frame->pop_value(this); auto ref = IndexRef(frame->pop_value(this), index); if(byte.arg > 0) frame->push(ref.get(this, frame)); else frame->push(PyRef(ref)); @@ -57,9 +57,6 @@ PyVar VM::run_frame(Frame* frame){ } continue; case OP_ROT_TWO: ::std::swap(frame->top(), frame->top_1()); continue; case OP_STORE_REF: { - // PyVar obj = frame->pop_value(this); - // PyVarRef r = frame->pop(); - // PyRef_AS_C(r)->set(this, frame, std::move(obj)); PyRef_AS_C(frame->top_1())->set(this, frame, frame->top_value(this)); frame->_pop(); frame->_pop(); } continue; @@ -84,25 +81,25 @@ PyVar VM::run_frame(Frame* frame){ case OP_LOAD_EVAL_FN: frame->push(builtins->attr(m_eval)); continue; case OP_BEGIN_CLASS: { auto& name = frame->co->names[byte.arg]; - PyVar clsBase = frame->pop_value(this); + PyObject* clsBase = frame->pop_value(this); if(clsBase == None) clsBase = _t(tp_object); check_type(clsBase, tp_type); - PyVar cls = new_type_object(frame->_module, name.first, OBJ_GET(Type, clsBase)); + PyObject* cls = new_type_object(frame->_module, name.first, OBJ_GET(Type, clsBase)); frame->push(cls); } continue; case OP_END_CLASS: { - PyVar cls = frame->pop(); + PyObject* cls = frame->pop(); cls->attr()._try_perfect_rehash(); }; continue; case OP_STORE_CLASS_ATTR: { auto& name = frame->co->names[byte.arg]; - PyVar obj = frame->pop_value(this); - PyVar& cls = frame->top(); + PyObject* obj = frame->pop_value(this); + PyObject* cls = frame->top(); cls->attr().set(name.first, std::move(obj)); } continue; case OP_RETURN_VALUE: return frame->pop_value(this); case OP_PRINT_EXPR: { - const PyVar expr = frame->top_value(this); + PyObject* expr = frame->top_value(this); if(expr != None) *_stdout << CAST(Str, asRepr(expr)) << '\n'; } continue; case OP_POP_TOP: frame->_pop(); continue; @@ -122,7 +119,7 @@ PyVar VM::run_frame(Frame* frame){ Args args(2); args[1] = frame->pop(); args[0] = frame->top_value(this); - PyVar ret = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); + PyObject* ret = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); PyRef_AS_C(frame->top())->set(this, frame, std::move(ret)); frame->_pop(); } continue; @@ -130,7 +127,7 @@ PyVar VM::run_frame(Frame* frame){ Args args(2); args[1] = frame->pop_value(this); args[0] = frame->top_value(this); - PyVar ret = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); + PyObject* ret = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); PyRef_AS_C(frame->top())->set(this, frame, std::move(ret)); frame->_pop(); } continue; @@ -141,14 +138,14 @@ PyVar VM::run_frame(Frame* frame){ frame->top() = fast_call(CMP_SPECIAL_METHODS[byte.arg], std::move(args)); } continue; case OP_IS_OP: { - PyVar rhs = frame->pop_value(this); + PyObject* rhs = frame->pop_value(this); bool ret_c = rhs == frame->top_value(this); if(byte.arg == 1) ret_c = !ret_c; frame->top() = VAR(ret_c); } continue; case OP_CONTAINS_OP: { - PyVar rhs = frame->pop_value(this); - bool ret_c = CAST(bool, call(rhs, __contains__, one_arg(frame->pop_value(this)))); + PyObject* rhs = frame->pop_value(this); + bool ret_c = CAST(bool, call(rhs, __contains__, Args{frame->pop_value(this)})); if(byte.arg == 1) ret_c = !ret_c; frame->push(VAR(ret_c)); } continue; @@ -156,8 +153,8 @@ PyVar VM::run_frame(Frame* frame){ frame->top() = num_negated(frame->top_value(this)); continue; case OP_UNARY_NOT: { - PyVar obj = frame->pop_value(this); - const PyVar& obj_bool = asBool(obj); + PyObject* obj = frame->pop_value(this); + PyObject* obj_bool = asBool(obj); frame->push(VAR(!_CAST(bool, obj_bool))); } continue; case OP_POP_JUMP_IF_FALSE: @@ -168,9 +165,9 @@ PyVar VM::run_frame(Frame* frame){ case OP_LOAD_FALSE: frame->push(False); continue; case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue; case OP_ASSERT: { - PyVar _msg = frame->pop_value(this); + PyObject* _msg = frame->pop_value(this); Str msg = CAST(Str, asStr(_msg)); - PyVar expr = frame->pop_value(this); + PyObject* expr = frame->pop_value(this); if(asBool(expr) != True) _error("AssertionError", msg); } continue; case OP_EXCEPTION_MATCH: { @@ -179,7 +176,7 @@ PyVar VM::run_frame(Frame* frame){ frame->push(VAR(e.match_type(name))); } continue; case OP_RAISE: { - PyVar obj = frame->pop_value(this); + PyObject* obj = frame->pop_value(this); Str msg = obj == None ? "" : CAST(Str, asStr(obj)); StrName type = frame->co->names[byte.arg].first; _error(type, msg); @@ -190,32 +187,32 @@ PyVar VM::run_frame(Frame* frame){ continue; case OP_BUILD_MAP: { Args items = frame->pop_n_values_reversed(this, byte.arg*2); - PyVar obj = call(builtins->attr("dict")); + PyObject* obj = call(builtins->attr("dict")); for(int i=0; ipush(obj); } continue; case OP_BUILD_SET: { - PyVar list = VAR( + PyObject* list = VAR( frame->pop_n_values_reversed(this, byte.arg).move_to_list() ); - PyVar obj = call(builtins->attr("set"), one_arg(list)); + PyObject* obj = call(builtins->attr("set"), Args{list}); frame->push(obj); } continue; case OP_LIST_APPEND: { - PyVar obj = frame->pop_value(this); + PyObject* obj = frame->pop_value(this); List& list = CAST(List&, frame->top_1()); list.push_back(std::move(obj)); } continue; case OP_MAP_ADD: { - PyVar value = frame->pop_value(this); - PyVar key = frame->pop_value(this); - call(frame->top_1(), __setitem__, two_args(key, value)); + PyObject* value = frame->pop_value(this); + PyObject* key = frame->pop_value(this); + call(frame->top_1(), __setitem__, Args{key, value}); } continue; case OP_SET_ADD: { - PyVar obj = frame->pop_value(this); - call(frame->top_1(), "add", one_arg(obj)); + PyObject* obj = frame->pop_value(this); + call(frame->top_1(), "add", Args{obj}); } continue; case OP_DUP_TOP_VALUE: frame->push(frame->top_value(this)); continue; case OP_UNARY_STAR: { @@ -232,16 +229,16 @@ PyVar VM::run_frame(Frame* frame){ Args kwargs = frame->pop_n_values_reversed(this, KWARGC*2); Args args = frame->pop_n_values_reversed(this, ARGC); if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); - PyVar callable = frame->pop_value(this); - PyVar ret = call(callable, std::move(args), kwargs, true); + PyObject* callable = frame->pop_value(this); + PyObject* ret = call(callable, std::move(args), kwargs, true); if(ret == _py_op_call) return ret; frame->push(std::move(ret)); } continue; case OP_CALL_UNPACK: case OP_CALL: { Args args = frame->pop_n_values_reversed(this, byte.arg); if(byte.op == OP_CALL_UNPACK) unpack_args(args); - PyVar callable = frame->pop_value(this); - PyVar ret = call(callable, std::move(args), no_arg(), true); + PyObject* callable = frame->pop_value(this); + PyObject* ret = call(callable, std::move(args), no_arg(), true); if(ret == _py_op_call) return ret; frame->push(std::move(ret)); } continue; @@ -254,15 +251,15 @@ PyVar VM::run_frame(Frame* frame){ frame->jump_abs_safe(it->second); } continue; case OP_GET_ITER: { - PyVar obj = frame->pop_value(this); - PyVar iter = asIter(obj); + PyObject* obj = frame->pop_value(this); + PyObject* iter = asIter(obj); check_type(frame->top(), tp_ref); PyIter_AS_C(iter)->loop_var = frame->pop(); frame->push(std::move(iter)); } continue; case OP_FOR_ITER: { BaseIter* it = PyIter_AS_C(frame->top()); - PyVar obj = it->next(); + PyObject* obj = it->next(); if(obj != nullptr){ PyRef_AS_C(it->loop_var)->set(this, frame, std::move(obj)); }else{ @@ -279,18 +276,18 @@ PyVar VM::run_frame(Frame* frame){ frame->jump_abs_safe(blockEnd); } continue; case OP_JUMP_IF_FALSE_OR_POP: { - const PyVar expr = frame->top_value(this); + PyObject* expr = frame->top_value(this); if(asBool(expr)==False) frame->jump_abs(byte.arg); else frame->pop_value(this); } continue; case OP_JUMP_IF_TRUE_OR_POP: { - const PyVar expr = frame->top_value(this); + PyObject* expr = frame->top_value(this); if(asBool(expr)==True) frame->jump_abs(byte.arg); else frame->pop_value(this); } continue; case OP_BUILD_SLICE: { - PyVar stop = frame->pop_value(this); - PyVar start = frame->pop_value(this); + PyObject* stop = frame->pop_value(this); + PyObject* start = frame->pop_value(this); Slice s; if(start != None) { s.start = CAST(int, start);} if(stop != None) { s.stop = CAST(int, stop);} @@ -298,7 +295,7 @@ PyVar VM::run_frame(Frame* frame){ } continue; case OP_IMPORT_NAME: { StrName name = frame->co->names[byte.arg].first; - PyVar* ext_mod = _modules.try_get(name); + PyObject** ext_mod = _modules.try_get(name); if(ext_mod == nullptr){ Str source; auto it2 = _lazy_modules.find(name); @@ -311,7 +308,7 @@ PyVar VM::run_frame(Frame* frame){ _lazy_modules.erase(it2); } CodeObject_ code = compile(source, name.str(), EXEC_MODE); - PyVar new_mod = new_module(name); + PyObject* new_mod = new_module(name); _exec(code, new_mod); frame->push(new_mod); new_mod->attr()._try_perfect_rehash(); @@ -320,7 +317,7 @@ PyVar VM::run_frame(Frame* frame){ } } continue; case OP_STORE_ALL_NAMES: { - PyVar obj = frame->pop_value(this); + PyObject* obj = frame->pop_value(this); for(auto& [name, value]: obj->attr().items()){ Str s = name.str(); if(s.empty() || s[0] == '_') continue; diff --git a/src/cffi.h b/src/cffi.h index de589893..3039ce9a 100644 --- a/src/cffi.h +++ b/src/cffi.h @@ -14,7 +14,7 @@ struct NativeProxyFunc { _Fp func; NativeProxyFunc(_Fp func) : func(func) {} - PyVar operator()(VM* vm, Args& args) { + PyObject* operator()(VM* vm, Args& args) { if (args.size() != N) { vm->TypeError("expected " + std::to_string(N) + " arguments, but got " + std::to_string(args.size())); } @@ -22,13 +22,13 @@ struct NativeProxyFunc { } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { func(py_cast(vm, args[Is])...); return vm->None; } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { __Ret ret = func(py_cast(vm, args[Is])...); return VAR(std::move(ret)); } @@ -41,7 +41,7 @@ struct NativeProxyMethod { _Fp func; NativeProxyMethod(_Fp func) : func(func) {} - PyVar operator()(VM* vm, Args& args) { + PyObject* operator()(VM* vm, Args& args) { int actual_size = args.size() - 1; if (actual_size != N) { vm->TypeError("expected " + std::to_string(N) + " arguments, but got " + std::to_string(actual_size)); @@ -50,14 +50,14 @@ struct NativeProxyMethod { } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { T& self = py_cast(vm, args[0]); (self.*func)(py_cast(vm, args[Is+1])...); return vm->None; } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { T& self = py_cast(vm, args[0]); __Ret ret = (self.*func)(py_cast(vm, args[Is+1])...); return VAR(std::move(ret)); @@ -200,7 +200,7 @@ struct Pointer{ return Pointer(ctype, level, ptr-offset*unit_size()); } - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<-1>(type, "__new__", CPP_NOT_IMPLEMENTED()); vm->bind_method<0>(type, "__repr__", [](VM* vm, Args& args) { @@ -268,7 +268,7 @@ struct Pointer{ template inline T& ref() noexcept { return *reinterpret_cast(ptr); } - PyVar get(VM* vm){ + PyObject* get(VM* vm){ if(level > 1) return VAR_T(Pointer, ctype, level-1, ref()); switch(ctype->index){ #define CASE(T) case type_index(): return VAR(ref()) @@ -291,7 +291,7 @@ struct Pointer{ return VAR_T(Pointer, *this); } - void set(VM* vm, const PyVar& val){ + void set(VM* vm, PyObject* val){ if(level > 1) { Pointer& p = CAST(Pointer&, val); ref() = p.ptr; // We don't check the type, just copy the underlying address @@ -359,7 +359,7 @@ struct Value { Value& operator=(const Value& other) = delete; Value(const Value& other) = delete; - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<-1>(type, "__new__", CPP_NOT_IMPLEMENTED()); vm->bind_method<0>(type, "ptr", [](VM* vm, Args& args) { @@ -388,7 +388,7 @@ struct CType{ CType() : type(_type_db.get()) {} CType(const TypeInfo* type) : type(type) {} - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<1>(type, "__new__", [](VM* vm, Args& args) { const Str& name = CAST(Str&, args[0]); const TypeInfo* type = _type_db.get(name); @@ -404,8 +404,8 @@ struct CType{ }; void add_module_c(VM* vm){ - PyVar mod = vm->new_module("c"); - PyVar ptr_t = Pointer::register_class(vm, mod); + PyObject* mod = vm->new_module("c"); + PyObject* ptr_t = Pointer::register_class(vm, mod); Value::register_class(vm, mod); CType::register_class(vm, mod); @@ -462,11 +462,11 @@ void add_module_c(VM* vm){ }); } -PyVar py_var(VM* vm, void* p){ +PyObject* py_var(VM* vm, void* p){ return VAR_T(Pointer, _type_db.get(), (char*)p); } -PyVar py_var(VM* vm, char* p){ +PyObject* py_var(VM* vm, char* p){ return VAR_T(Pointer, _type_db.get(), (char*)p); } @@ -491,7 +491,7 @@ struct pointer { }; template -T py_pointer_cast(VM* vm, const PyVar& var){ +T py_pointer_cast(VM* vm, PyObject* var){ static_assert(std::is_pointer_v); Pointer& p = CAST(Pointer&, var); const TypeInfo* type = _type_db.get::baseT>(); @@ -503,14 +503,14 @@ T py_pointer_cast(VM* vm, const PyVar& var){ } template -T py_value_cast(VM* vm, const PyVar& var){ +T py_value_cast(VM* vm, PyObject* var){ static_assert(std::is_pod_v); Value& v = CAST(Value&, var); return *reinterpret_cast(v.data); } template -std::enable_if_t>, PyVar> +std::enable_if_t>, PyObject*> py_var(VM* vm, T p){ const TypeInfo* type = _type_db.get::baseT>(); if(type == nullptr) type = _type_db.get(); @@ -518,9 +518,9 @@ py_var(VM* vm, T p){ } template -std::enable_if_t>, PyVar> +std::enable_if_t>, PyObject*> py_var(VM* vm, T p){ - if constexpr(std::is_same_v) return p; + if constexpr(std::is_same_v) return p; const TypeInfo* type = _type_db.get(); return VAR_T(Value, type, &p); } diff --git a/src/codeobject.h b/src/codeobject.h index 97ad83b7..27981b72 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -94,7 +94,7 @@ struct CodeObject { return names.size() - 1; } - int add_const(PyVar v){ + int add_const(PyObject* v){ consts.push_back(v); return consts.size() - 1; } diff --git a/src/common.h b/src/common.h index 01a3609e..1cce45e6 100644 --- a/src/common.h +++ b/src/common.h @@ -57,7 +57,6 @@ namespace std = ::std; struct Dummy { }; struct DummyInstance { }; struct DummyModule { }; -#define DUMMY_VAL Dummy() struct Type { int index; @@ -85,4 +84,24 @@ struct Type { const float kLocalsLoadFactor = 0.67f; const float kInstAttrLoadFactor = 0.67f; const float kTypeAttrLoadFactor = 0.5f; + +static_assert(sizeof(i64) == sizeof(int*)); +static_assert(sizeof(f64) == sizeof(int*)); +static_assert(std::numeric_limits::is_iec559); +static_assert(std::numeric_limits::is_iec559); + +struct PyObject; +#define BITS(p) (reinterpret_cast(p)) +inline bool is_tagged(PyObject* p) noexcept { return (BITS(p) & 0b11) != 0b00; } +inline bool is_int(PyObject* p) noexcept { return (BITS(p) & 0b11) == 0b01; } +inline bool is_float(PyObject* p) noexcept { return (BITS(p) & 0b11) == 0b10; } + +inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept { + return is_tagged(a) && is_tagged(b); +} + +inline bool is_both_int(PyObject* a, PyObject* b) noexcept { + return is_int(a) && is_int(b); +} + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index 3312d5be..0e6a5bb8 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -353,14 +353,14 @@ private: } void exprLiteral() { - PyVar value = parser->prev.value; + PyObject* value = parser->prev.value; int index = co()->add_const(value); emit(OP_LOAD_CONST, index); } void exprFString() { static const std::regex pattern(R"(\{(.*?)\})"); - PyVar value = parser->prev.value; + PyObject* value = parser->prev.value; Str s = CAST(Str, value); std::sregex_iterator begin(s.begin(), s.end(), pattern); std::sregex_iterator end; @@ -1059,7 +1059,7 @@ private: case 1: func.starred_arg = name; state+=1; break; case 2: { consume(TK("=")); - PyVarOrNull value = read_literal(); + PyObject* value = read_literal(); if(value == nullptr){ SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type)); } @@ -1115,10 +1115,10 @@ private: } } - PyVarOrNull read_literal(){ + PyObject* read_literal(){ if(match(TK("-"))){ consume(TK("@num")); - PyVar val = parser->prev.value; + PyObject* val = parser->prev.value; return vm->num_negated(val); } if(match(TK("@num"))) return parser->prev.value; @@ -1166,7 +1166,7 @@ public: code->optimize(vm); return code; }else if(mode()==JSON_MODE){ - PyVarOrNull value = read_literal(); + PyObject* value = read_literal(); if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value)); else if(match(TK("{"))) exprMap(); else if(match(TK("["))) exprList(); diff --git a/src/frame.h b/src/frame.h index 4b456d33..850cc6f8 100644 --- a/src/frame.h +++ b/src/frame.h @@ -7,27 +7,27 @@ namespace pkpy{ static THREAD_LOCAL uint64_t kFrameGlobalId = 0; struct Frame { - std::vector _data; + std::vector _data; int _ip = -1; int _next_ip = 0; const CodeObject* co; - PyVar _module; + PyObject* _module; NameDict_ _locals; NameDict_ _closure; const uint64_t id; - std::vector>> s_try_block; + std::vector>> s_try_block; inline NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); } inline NameDict& f_globals() noexcept { return _module->attr(); } - inline PyVar* f_closure_try_get(StrName name) noexcept { + inline PyObject** f_closure_try_get(StrName name) noexcept { if(_closure == nullptr) return nullptr; return _closure->try_get(name); } Frame(const CodeObject_& co, - const PyVar& _module, + PyObject* _module, const NameDict_& _locals=nullptr, const NameDict_& _closure=nullptr) : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { } @@ -57,11 +57,11 @@ struct Frame { return _next_ip < co->codes.size(); } - inline PyVar pop(){ + inline PyObject* pop(){ #if PK_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif - PyVar v = std::move(_data.back()); + PyObject* v = _data.back(); _data.pop_back(); return v; } @@ -73,28 +73,28 @@ struct Frame { _data.pop_back(); } - inline void try_deref(VM*, PyVar&); + inline void try_deref(VM*, PyObject*&); - inline PyVar pop_value(VM* vm){ - PyVar value = pop(); + inline PyObject* pop_value(VM* vm){ + PyObject* value = pop(); try_deref(vm, value); return value; } - inline PyVar top_value(VM* vm){ - PyVar value = top(); + inline PyObject* top_value(VM* vm){ + PyObject* value = top(); try_deref(vm, value); return value; } - inline PyVar& top(){ + inline PyObject*& top(){ #if PK_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } - inline PyVar& top_1(){ + inline PyObject*& top_1(){ #if PK_EXTRA_CHECK if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif @@ -117,7 +117,7 @@ struct Frame { bool jump_to_exception_handler(){ if(s_try_block.empty()) return false; - PyVar obj = pop(); + PyObject* obj = pop(); auto& p = s_try_block.back(); _data = std::move(p.second); _data.push_back(obj); diff --git a/src/gc.h b/src/gc.h index 3081b259..ce85395e 100644 --- a/src/gc.h +++ b/src/gc.h @@ -3,49 +3,38 @@ #include "obj.h" namespace pkpy { - using PyVar0 = PyObject*; + struct ManagedHeap{ + std::vector heap; - // a generational mark and sweep garbage collector - struct GC{ - using Generation = std::vector; - static const int kTotalGen = 3; - Generation gen[kTotalGen]; - - void add(PyVar0 obj){ - if(!obj->need_gc) return; - gen[0].push_back(obj); + template + PyObject* gcnew(Type type, T&& val){ + PyObject* obj = new Py_>(type, std::forward(val)); + obj->gc.enabled = true; + heap.push_back(obj); + return obj; } - void sweep(int index){ - Generation& g = gen[index]; - if(index < kTotalGen-1){ - for(int i=0; imarked){ - g[i]->marked = false; - gen[index+1].push_back(g[i]); - }else{ - delete g[i]; - } + void sweep(){ + std::vector alive; + for(PyObject* obj: heap){ + if(obj->gc.marked){ + obj->gc.marked = false; + alive.push_back(obj); + }else{ + delete obj; } - g.clear(); - }else{ - Generation alive; - // the oldest generation - for(int i=0; imarked){ - g[i]->marked = false; - alive.push_back(g[i]); - }else{ - delete g[i]; - } - } - g = std::move(alive); } + heap.clear(); + heap.swap(alive); } - void collect(int index){ - sweep(index); + void collect(VM* vm){ + std::vector roots = get_roots(vm); + for(PyObject* obj: roots) obj->mark(); + sweep(); } + + std::vector get_roots(VM* vm); }; } // namespace pkpy \ No newline at end of file diff --git a/src/io.h b/src/io.h index 549a9c6c..6ee42756 100644 --- a/src/io.h +++ b/src/io.h @@ -42,7 +42,7 @@ struct FileIO { if(!_fs.is_open()) vm->IOError(strerror(errno)); } - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<2>(type, "__new__", [](VM* vm, Args& args){ return VAR_T(FileIO, vm, CAST(Str, args[0]), CAST(Str, args[1]) @@ -79,15 +79,15 @@ struct FileIO { }; void add_module_io(VM* vm){ - PyVar mod = vm->new_module("io"); - PyVar type = FileIO::register_class(vm, mod); + PyObject* mod = vm->new_module("io"); + PyObject* type = FileIO::register_class(vm, mod); vm->bind_builtin_func<2>("open", [type](VM* vm, const Args& args){ return vm->call(type, args); }); } void add_module_os(VM* vm){ - PyVar mod = vm->new_module("os"); + PyObject* mod = vm->new_module("os"); // Working directory is shared by all VMs!! vm->bind_func<0>(mod, "getcwd", [](VM* vm, const Args& args){ return VAR(std::filesystem::current_path().string()); diff --git a/src/iter.h b/src/iter.h index 293602b4..1ebdbdb6 100644 --- a/src/iter.h +++ b/src/iter.h @@ -8,7 +8,7 @@ class RangeIter : public BaseIter { i64 current; Range r; public: - RangeIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { + RangeIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { this->r = OBJ_GET(Range, _ref); this->current = r.start; } @@ -17,7 +17,7 @@ public: return r.step > 0 ? current < r.stop : current > r.stop; } - PyVar next(){ + PyObject* next(){ if(!_has_next()) return nullptr; current += r.step; return VAR(current-r.step); @@ -29,8 +29,8 @@ class ArrayIter : public BaseIter { size_t index = 0; const T* p; public: - ArrayIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { p = &OBJ_GET(T, _ref);} - PyVar next(){ + ArrayIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { p = &OBJ_GET(T, _ref);} + PyObject* next(){ if(index == p->size()) return nullptr; return p->operator[](index++); } @@ -40,20 +40,20 @@ class StringIter : public BaseIter { int index = 0; Str* str; public: - StringIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { + StringIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { str = &OBJ_GET(Str, _ref); } - PyVar next() { + PyObject* next() { if(index == str->u8_length()) return nullptr; return VAR(str->u8_getitem(index++)); } }; -PyVar Generator::next(){ +PyObject* Generator::next(){ if(state == 2) return nullptr; vm->callstack.push(std::move(frame)); - PyVar ret = vm->_exec(); + PyObject* ret = vm->_exec(); if(ret == vm->_py_op_yield){ frame = std::move(vm->callstack.top()); vm->callstack.pop(); diff --git a/src/main.cpp b/src/main.cpp index 432cef8a..7ac4146f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -75,7 +75,7 @@ int main(int argc, char** argv){ // set parent path as cwd std::filesystem::current_path(filepath.parent_path()); - pkpy::PyVarOrNull ret = nullptr; + pkpy::PyObject* ret = nullptr; ret = vm->exec(src.c_str(), argv_1, pkpy::EXEC_MODE); pkpy_delete(vm); return ret != nullptr ? 0 : 1; diff --git a/src/memory.h b/src/memory.h index 2e446528..404a423b 100644 --- a/src/memory.h +++ b/src/memory.h @@ -4,31 +4,12 @@ namespace pkpy{ -struct PyObject; - -template -struct SpAllocator { - template - inline static int* alloc(){ - return (int*)malloc(sizeof(int) + sizeof(U)); - } - - inline static void dealloc(int* counter){ - ((T*)(counter + 1))->~T(); - free(counter); - } -}; - template struct shared_ptr { - union { - int* counter; - i64 bits; - }; - + int* counter; #define _t() (T*)(counter + 1) -#define _inc_counter() if(!is_tagged() && counter) ++(*counter) -#define _dec_counter() if(!is_tagged() && counter && --(*counter) == 0) SpAllocator::dealloc(counter) +#define _inc_counter() if(counter) ++(*counter) +#define _dec_counter() if(counter && --(*counter) == 0) {((T*)(counter + 1))->~T(); free(counter);} public: shared_ptr() : counter(nullptr) {} @@ -69,7 +50,6 @@ public: T* get() const { return _t(); } int use_count() const { - if(is_tagged()) return 0; return counter ? *counter : 0; } @@ -77,44 +57,20 @@ public: _dec_counter(); counter = nullptr; } - - inline constexpr bool is_tagged() const { - if constexpr(!std::is_same_v) return false; - return (bits & 0b11) != 0b00; - } - inline bool is_tag_00() const { return (bits & 0b11) == 0b00; } - inline bool is_tag_01() const { return (bits & 0b11) == 0b01; } - inline bool is_tag_10() const { return (bits & 0b11) == 0b10; } - inline bool is_tag_11() const { return (bits & 0b11) == 0b11; } }; #undef _t #undef _inc_counter #undef _dec_counter - template - shared_ptr make_sp(Args&&... args) { - static_assert(std::is_base_of_v, "U must be derived from T"); - static_assert(std::has_virtual_destructor_v, "T must have virtual destructor"); - static_assert(!std::is_same_v || (!std::is_same_v && !std::is_same_v)); - int* p = SpAllocator::template alloc(); *p = 1; - new(p+1) U(std::forward(args)...); - return shared_ptr(p); - } - template shared_ptr make_sp(Args&&... args) { - int* p = SpAllocator::template alloc(); *p = 1; + int* p = (int*)malloc(sizeof(int) + sizeof(T)); + *p = 1; new(p+1) T(std::forward(args)...); return shared_ptr(p); } -static_assert(sizeof(i64) == sizeof(int*)); -static_assert(sizeof(f64) == sizeof(int*)); -static_assert(sizeof(shared_ptr) == sizeof(int*)); -static_assert(std::numeric_limits::is_iec559); -static_assert(std::numeric_limits::is_iec559); - template struct SmallArrayPool { std::vector buckets[__Bucket+1]; @@ -145,10 +101,4 @@ struct SmallArrayPool { } } }; - - -typedef shared_ptr PyVar; -typedef PyVar PyVarOrNull; -typedef PyVar PyVarRef; - }; // namespace pkpy diff --git a/src/namedict.h b/src/namedict.h index 90f8d34a..22d6b783 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -6,7 +6,7 @@ namespace pkpy{ -const int kNameDictNodeSize = sizeof(StrName) + sizeof(PyVar); +const int kNameDictNodeSize = sizeof(StrName) + sizeof(PyObject*); template struct DictArrayPool { @@ -26,9 +26,7 @@ struct DictArrayPool { } void dealloc(StrName* head, uint16_t n){ - PyVar* _values = (PyVar*)(head + n); if(n > __Bucket || buckets[n].size() >= __BucketSize){ - for(int i=0; i(_keys + _capacity)[i]; + inline PyObject*& value(uint16_t i){ + return reinterpret_cast(_keys + _capacity)[i]; } - inline const PyVar& value(uint16_t i) const { - return reinterpret_cast(_keys + _capacity)[i]; + inline PyObject* value(uint16_t i) const { + return reinterpret_cast(_keys + _capacity)[i]; } NameDict(uint16_t capacity=2, float load_factor=0.67, uint16_t hash_seed=kHashSeeds[0]): @@ -123,19 +121,19 @@ while(!_keys[i].empty()) { \ i = (i + 1) & _mask; \ } - const PyVar& operator[](StrName key) const { + PyObject* operator[](StrName key) const { bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); return value(i); } - PyVar& get(StrName key){ - bool ok; uint16_t i; - HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); - return value(i); - } + // PyObject*& get(StrName key){ + // bool ok; uint16_t i; + // HASH_PROBE(key, ok, i); + // if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); + // return value(i); + // } template void set(StrName key, T&& val){ @@ -154,7 +152,7 @@ while(!_keys[i].empty()) { \ void _rehash(bool resize){ StrName* old_keys = _keys; - PyVar* old_values = &value(0); + PyObject** old_values = &value(0); uint16_t old_capacity = _capacity; if(resize){ _capacity = find_next_capacity(_capacity * 2); @@ -177,18 +175,18 @@ while(!_keys[i].empty()) { \ _rehash(false); // do not resize } - inline PyVar* try_get(StrName key){ + inline PyObject** try_get(StrName key){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) return nullptr; return &value(i); } - inline bool try_set(StrName key, PyVar&& val){ + inline bool try_set(StrName key, PyObject* val){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) return false; - value(i) = std::move(val); + value(i) = val; return true; } @@ -213,8 +211,8 @@ while(!_keys[i].empty()) { \ _size--; } - std::vector> items() const { - std::vector> v; + std::vector> items() const { + std::vector> v; for(uint16_t i=0; i<_capacity; i++){ if(_keys[i].empty()) continue; v.push_back(std::make_pair(_keys[i], value(i))); @@ -231,7 +229,7 @@ while(!_keys[i].empty()) { \ return v; } - void apply_v(void(*f)(PyVar)) { + void apply_v(void(*f)(PyObject*)) { for(uint16_t i=0; i<_capacity; i++){ if(_keys[i].empty()) continue; f(value(i)); diff --git a/src/obj.h b/src/obj.h index 46acb280..80bfa100 100644 --- a/src/obj.h +++ b/src/obj.h @@ -12,7 +12,7 @@ struct Frame; struct BaseRef; class VM; -typedef std::function NativeFuncRaw; +typedef std::function NativeFuncRaw; typedef shared_ptr CodeObject_; typedef shared_ptr NameDict_; @@ -22,7 +22,7 @@ struct NativeFunc { bool method; NativeFunc(NativeFuncRaw f, int argc, bool method) : f(f), argc(argc), method(method) {} - inline PyVar operator()(VM* vm, Args& args) const; + inline PyObject* operator()(VM* vm, Args& args) const; }; struct Function { @@ -34,7 +34,7 @@ struct Function { std::vector kwargs_order; // runtime settings - PyVar _module = nullptr; + PyObject* _module = nullptr; NameDict_ _closure = nullptr; bool has_name(StrName val) const { @@ -46,9 +46,9 @@ struct Function { }; struct BoundMethod { - PyVar obj; - PyVar method; - BoundMethod(const PyVar& obj, const PyVar& method) : obj(obj), method(method) {} + PyObject* obj; + PyObject* method; + BoundMethod(PyObject* obj, PyObject* method) : obj(obj), method(method) {} }; struct Range { @@ -58,9 +58,9 @@ struct Range { }; struct StarWrapper { - PyVar obj; + PyObject* obj; bool rvalue; - StarWrapper(const PyVar& obj, bool rvalue): obj(obj), rvalue(rvalue) {} + StarWrapper(PyObject* obj, bool rvalue): obj(obj), rvalue(rvalue) {} }; struct Slice { @@ -79,30 +79,34 @@ struct Slice { class BaseIter { protected: VM* vm; - PyVar _ref; // keep a reference to the object so it will not be deleted while iterating + PyObject* _ref; // keep a reference to the object so it will not be deleted while iterating public: - virtual PyVar next() = 0; - PyVarRef loop_var; - BaseIter(VM* vm, PyVar _ref) : vm(vm), _ref(_ref) {} + virtual PyObject* next() = 0; + PyObject* loop_var; + BaseIter(VM* vm, PyObject* _ref) : vm(vm), _ref(_ref) {} virtual ~BaseIter() = default; }; +struct GCHeader { + bool enabled; // whether this object is managed by GC + bool marked; // whether this object is marked + GCHeader() : enabled(false), marked(false) {} +}; + struct PyObject { - bool need_gc; - bool marked; - /**********/ + GCHeader gc; Type type; NameDict* _attr; inline bool is_attr_valid() const noexcept { return _attr != nullptr; } inline NameDict& attr() noexcept { return *_attr; } - inline const PyVar& attr(StrName name) const noexcept { return _attr->get(name); } + inline PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; virtual void mark() { - if(!need_gc || marked) return; - marked = true; - if(is_attr_valid()) attr().apply_v([](PyVar v){ v->mark(); }); + if(!gc.enabled || gc.marked) return; + gc.marked = true; + if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); }); } PyObject(Type type) : type(type) {} @@ -141,67 +145,51 @@ struct Py_ : PyObject { const int kTpIntIndex = 2; const int kTpFloatIndex = 3; -inline bool is_type(const PyVar& obj, Type type) noexcept { +inline bool is_type(PyObject* obj, Type type) noexcept { switch(type.index){ - case kTpIntIndex: return obj.is_tag_01(); - case kTpFloatIndex: return obj.is_tag_10(); - default: return !obj.is_tagged() && obj->type == type; + case kTpIntIndex: return is_tag_01(obj); + case kTpFloatIndex: return is_tag_10(obj); + default: return !is_tagged(obj) && obj->type == type; } } -inline bool is_both_int_or_float(const PyVar& a, const PyVar& b) noexcept { - return a.is_tagged() && b.is_tagged(); -} - -inline bool is_both_int(const PyVar& a, const PyVar& b) noexcept { - return (a.bits & b.bits & 0b11) == 0b01; -} - -inline bool is_int(const PyVar& obj) noexcept { - return obj.is_tag_01(); -} - -inline bool is_float(const PyVar& obj) noexcept { - return obj.is_tag_10(); -} - #define PY_CLASS(T, mod, name) \ static Type _type(VM* vm) { \ static const StrName __x0(#mod); \ static const StrName __x1(#name); \ return OBJ_GET(Type, vm->_modules[__x0]->attr(__x1)); \ } \ - static PyVar register_class(VM* vm, PyVar mod) { \ - PyVar type = vm->new_type_object(mod, #name, vm->tp_object); \ + static PyObject* register_class(VM* vm, PyObject* mod) { \ + PyObject* type = vm->new_type_object(mod, #name, vm->tp_object); \ if(OBJ_NAME(mod) != #mod) UNREACHABLE(); \ T::_register(vm, mod, type); \ type->attr()._try_perfect_rehash(); \ return type; \ } -union __8B { +union BitsCvt { i64 _int; f64 _float; - __8B(i64 val) : _int(val) {} - __8B(f64 val) : _float(val) {} + BitsCvt(i64 val) : _int(val) {} + BitsCvt(f64 val) : _float(val) {} }; template struct is_py_class : std::false_type {}; template struct is_py_class> : std::true_type {}; template -void _check_py_class(VM* vm, const PyVar& var); +void _check_py_class(VM* vm, PyObject* var); template -T py_pointer_cast(VM* vm, const PyVar& var); +T py_pointer_cast(VM* vm, PyObject* var); template -T py_value_cast(VM* vm, const PyVar& var); +T py_value_cast(VM* vm, PyObject* var); struct Discarded {}; template -__T py_cast(VM* vm, const PyVar& obj) { +__T py_cast(VM* vm, PyObject* obj) { using T = std::decay_t<__T>; if constexpr(std::is_pointer_v){ return py_pointer_cast(vm, obj); @@ -216,7 +204,7 @@ __T py_cast(VM* vm, const PyVar& obj) { } template -__T _py_cast(VM* vm, const PyVar& obj) { +__T _py_cast(VM* vm, PyObject* obj) { using T = std::decay_t<__T>; if constexpr(std::is_pointer_v<__T>){ return py_pointer_cast<__T>(vm, obj); @@ -228,7 +216,7 @@ __T _py_cast(VM* vm, const PyVar& obj) { } #define VAR(x) py_var(vm, x) -#define VAR_T(T, ...) vm->new_object(T::_type(vm), T(__VA_ARGS__)) +#define VAR_T(T, ...) vm->heap.gcnew(T::_type(vm), T(__VA_ARGS__)) #define CAST(T, x) py_cast(vm, x) #define _CAST(T, x) _py_cast(vm, x) diff --git a/src/parser.h b/src/parser.h index c867ea4a..63b450ff 100644 --- a/src/parser.h +++ b/src/parser.h @@ -54,7 +54,7 @@ struct Token{ const char* start; int length; int line; - PyVar value; + PyObject* value; Str str() const { return Str(start, length);} @@ -271,7 +271,7 @@ struct Parser { return true; } - void set_next_token(TokenIndex type, PyVar value=nullptr) { + void set_next_token(TokenIndex type, PyObject* value=nullptr) { switch(type){ case TK("{"): case TK("["): case TK("("): brackets_level++; break; case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; diff --git a/src/pocketpy.h b/src/pocketpy.h index 0b105367..6bf8bb66 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -69,7 +69,7 @@ void init_builtins(VM* _vm) { vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); } Type base = vm->_all_types[type.index].base; - return vm->new_object(vm->tp_super, Super(args[1], base)); + return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); }); _vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) { @@ -79,16 +79,16 @@ void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("id", [](VM* vm, Args& args) { - const PyVar& obj = args[0]; - if(obj.is_tagged()) return VAR((i64)0); - return VAR(obj.bits); + PyObject* obj = args[0]; + if(is_tagged(obj)) return VAR((i64)0); + return VAR(BITS(obj)); }); _vm->bind_builtin_func<2>("divmod", [](VM* vm, Args& args) { i64 lhs = CAST(i64, args[0]); i64 rhs = CAST(i64, args[1]); if(rhs == 0) vm->ZeroDivisionError(); - return VAR(two_args(VAR(lhs/rhs), VAR(lhs%rhs))); + return VAR(Tuple{VAR(lhs/rhs), VAR(lhs%rhs)}); }); _vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) { @@ -169,8 +169,8 @@ void init_builtins(VM* _vm) { }); _vm->bind_method<0>("object", "__repr__", [](VM* vm, Args& args) { - PyVar self = args[0]; - std::uintptr_t addr = self.is_tagged() ? 0 : (uintptr_t)self.get(); + PyObject* self = args[0]; + std::uintptr_t addr = is_tagged(self) ? 0 : (uintptr_t)self; StrStream ss; ss << std::hex << addr; Str s = "<" + OBJ_NAME(vm->_t(self)) + " object at 0x" + ss.str() + ">"; @@ -405,7 +405,7 @@ void init_builtins(VM* _vm) { _vm->bind_method<1>("str", "join", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); StrStream ss; - PyVar obj = vm->asList(args[1]); + PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); for (int i = 0; i < list.size(); ++i) { if (i > 0) ss << self; @@ -423,7 +423,7 @@ void init_builtins(VM* _vm) { _vm->bind_method<1>("list", "extend", [](VM* vm, Args& args) { List& self = CAST(List&, args[0]); - PyVar obj = vm->asList(args[1]); + PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); self.insert(self.end(), list.begin(), list.end()); return vm->None; @@ -575,7 +575,7 @@ void init_builtins(VM* _vm) { #endif void add_module_time(VM* vm){ - PyVar mod = vm->new_module("time"); + PyObject* mod = vm->new_module("time"); vm->bind_func<0>(mod, "time", [](VM* vm, Args& args) { auto now = std::chrono::high_resolution_clock::now(); return VAR(std::chrono::duration_cast(now.time_since_epoch()).count() / 1000000.0); @@ -583,7 +583,7 @@ void add_module_time(VM* vm){ } void add_module_sys(VM* vm){ - PyVar mod = vm->new_module("sys"); + PyObject* mod = vm->new_module("sys"); vm->setattr(mod, "version", VAR(PK_VERSION)); vm->bind_func<1>(mod, "getrefcount", CPP_LAMBDA(VAR(args[0].use_count()))); @@ -596,7 +596,7 @@ void add_module_sys(VM* vm){ } void add_module_json(VM* vm){ - PyVar mod = vm->new_module("json"); + PyObject* mod = vm->new_module("json"); vm->bind_func<1>(mod, "loads", [](VM* vm, Args& args) { const Str& expr = CAST(Str&, args[0]); CodeObject_ code = vm->compile(expr, "", JSON_MODE); @@ -607,7 +607,7 @@ void add_module_json(VM* vm){ } void add_module_math(VM* vm){ - PyVar mod = vm->new_module("math"); + PyObject* mod = vm->new_module("math"); vm->setattr(mod, "pi", VAR(3.1415926535897932384)); vm->setattr(mod, "e" , VAR(2.7182818284590452354)); @@ -626,9 +626,9 @@ void add_module_math(VM* vm){ } void add_module_dis(VM* vm){ - PyVar mod = vm->new_module("dis"); + PyObject* mod = vm->new_module("dis"); vm->bind_func<1>(mod, "dis", [](VM* vm, Args& args) { - PyVar f = args[0]; + PyObject* f = args[0]; if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, args[0]).method; CodeObject_ code = CAST(Function, f).code; (*vm->_stdout) << vm->disassemble(code); @@ -644,14 +644,14 @@ struct ReMatch { std::smatch m; ReMatch(i64 start, i64 end, std::smatch m) : start(start), end(end), m(m) {} - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_method<-1>(type, "__init__", CPP_NOT_IMPLEMENTED()); vm->bind_method<0>(type, "start", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).start))); vm->bind_method<0>(type, "end", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).end))); vm->bind_method<0>(type, "span", [](VM* vm, Args& args) { auto& self = CAST(ReMatch&, args[0]); - return VAR(two_args(VAR(self.start), VAR(self.end))); + return VAR(Tuple{VAR(self.start), VAR(self.end)}); }); vm->bind_method<1>(type, "group", [](VM* vm, Args& args) { @@ -663,7 +663,7 @@ struct ReMatch { } }; -PyVar _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){ +PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){ std::regex re(pattern); std::smatch m; if(std::regex_search(string, m, re)){ @@ -676,7 +676,7 @@ PyVar _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* v }; void add_module_re(VM* vm){ - PyVar mod = vm->new_module("re"); + PyObject* mod = vm->new_module("re"); ReMatch::register_class(vm, mod); vm->bind_func<2>(mod, "match", [](VM* vm, Args& args) { @@ -740,7 +740,7 @@ struct Random{ gen.seed(seed); } - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<0>(type, "__new__", CPP_LAMBDA(VAR_T(Random))); vm->bind_method<1>(type, "seed", native_proxy_callable(&Random::seed)); vm->bind_method<2>(type, "randint", native_proxy_callable(&Random::randint)); @@ -750,7 +750,7 @@ struct Random{ }; void add_module_random(VM* vm){ - PyVar mod = vm->new_module("random"); + PyObject* mod = vm->new_module("random"); Random::register_class(vm, mod); CodeObject_ code = vm->compile(kPythonLibs["random"], "random.py", EXEC_MODE); vm->_exec(code, mod); @@ -851,7 +851,7 @@ extern "C" { /// Return `__repr__` of the result. /// If the variable is not found, return `nullptr`. char* pkpy_vm_get_global(pkpy::VM* vm, const char* name){ - pkpy::PyVar* val = vm->_main->attr().try_get(name); + pkpy::PyObject** val = vm->_main->attr().try_get(name); if(val == nullptr) return nullptr; try{ pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(*val)); @@ -867,7 +867,7 @@ extern "C" { /// Return `__repr__` of the result. /// If there is any error, return `nullptr`. char* pkpy_vm_eval(pkpy::VM* vm, const char* source){ - pkpy::PyVarOrNull ret = vm->exec(source, "", pkpy::EVAL_MODE); + pkpy::PyObject* ret = vm->exec(source, "", pkpy::EVAL_MODE); if(ret == nullptr) return nullptr; try{ pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(ret)); @@ -950,13 +950,13 @@ extern "C" { for(int i=0; mod[i]; i++) if(mod[i] == ' ') return nullptr; for(int i=0; name[i]; i++) if(name[i] == ' ') return nullptr; std::string f_header = std::string(mod) + '.' + name + '#' + std::to_string(kGlobalBindId++); - pkpy::PyVar obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod); + pkpy::PyObject* obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod); vm->bind_func<-1>(obj, name, [ret_code, f_header](pkpy::VM* vm, const pkpy::Args& args){ pkpy::StrStream ss; ss << f_header; for(int i=0; icall(args[i], pkpy::__json__); + pkpy::PyObject* x = vm->call(args[i], pkpy::__json__); ss << pkpy::CAST(pkpy::Str&, x); } char* packet = strdup(ss.str().c_str()); diff --git a/src/ref.h b/src/ref.h index 9719d218..88d3e58a 100644 --- a/src/ref.h +++ b/src/ref.h @@ -6,8 +6,8 @@ namespace pkpy { struct BaseRef { - virtual PyVar get(VM*, Frame*) const = 0; - virtual void set(VM*, Frame*, PyVar) const = 0; + virtual PyObject* get(VM*, Frame*) const = 0; + virtual void set(VM*, Frame*, PyObject*) const = 0; virtual void del(VM*, Frame*) const = 0; virtual ~BaseRef() = default; }; @@ -18,8 +18,8 @@ struct NameRef : BaseRef { inline NameScope scope() const { return pair.second; } NameRef(const std::pair& pair) : pair(pair) {} - PyVar get(VM* vm, Frame* frame) const{ - PyVar* val; + PyObject* get(VM* vm, Frame* frame) const{ + PyObject** val; val = frame->f_locals().try_get(name()); if(val != nullptr) return *val; val = frame->f_closure_try_get(name()); @@ -32,12 +32,12 @@ struct NameRef : BaseRef { return nullptr; } - void set(VM* vm, Frame* frame, PyVar val) const{ + void set(VM* vm, Frame* frame, PyObject* val) const{ switch(scope()) { - case NAME_LOCAL: frame->f_locals().set(name(), std::move(val)); break; + case NAME_LOCAL: frame->f_locals().set(name(), val); break; case NAME_GLOBAL: - if(frame->f_locals().try_set(name(), std::move(val))) return; - frame->f_globals().set(name(), std::move(val)); + if(frame->f_locals().try_set(name(), val)) return; + frame->f_globals().set(name(), val); break; default: UNREACHABLE(); } @@ -70,15 +70,15 @@ struct NameRef : BaseRef { }; struct AttrRef : BaseRef { - mutable PyVar obj; + mutable PyObject* obj; NameRef attr; - AttrRef(PyVar obj, NameRef attr) : obj(obj), attr(attr) {} + AttrRef(PyObject* obj, NameRef attr) : obj(obj), attr(attr) {} - PyVar get(VM* vm, Frame* frame) const{ + PyObject* get(VM* vm, Frame* frame) const{ return vm->getattr(obj, attr.name()); } - void set(VM* vm, Frame* frame, PyVar val) const{ + void set(VM* vm, Frame* frame, PyObject* val) const{ vm->setattr(obj, attr.name(), std::move(val)); } @@ -90,22 +90,22 @@ struct AttrRef : BaseRef { }; struct IndexRef : BaseRef { - mutable PyVar obj; - PyVar index; - IndexRef(PyVar obj, PyVar index) : obj(obj), index(index) {} + mutable PyObject* obj; + PyObject* index; + IndexRef(PyObject* obj, PyObject* index) : obj(obj), index(index) {} - PyVar get(VM* vm, Frame* frame) const{ - return vm->fast_call(__getitem__, two_args(obj, index)); + PyObject* get(VM* vm, Frame* frame) const{ + return vm->fast_call(__getitem__, Args{obj, index}); } - void set(VM* vm, Frame* frame, PyVar val) const{ + void set(VM* vm, Frame* frame, PyObject* val) const{ Args args(3); args[0] = obj; args[1] = index; args[2] = std::move(val); vm->fast_call(__setitem__, std::move(args)); } void del(VM* vm, Frame* frame) const{ - vm->fast_call(__delitem__, two_args(obj, index)); + vm->fast_call(__delitem__, Args{obj, index}); } }; @@ -113,7 +113,7 @@ struct TupleRef : BaseRef { Tuple objs; TupleRef(Tuple&& objs) : objs(std::move(objs)) {} - PyVar get(VM* vm, Frame* frame) const{ + PyObject* get(VM* vm, Frame* frame) const{ Tuple args(objs.size()); for (int i = 0; i < objs.size(); i++) { args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame); @@ -121,11 +121,11 @@ struct TupleRef : BaseRef { return VAR(std::move(args)); } - void set(VM* vm, Frame* frame, PyVar val) const{ + void set(VM* vm, Frame* frame, PyObject* val) const{ val = vm->asIter(val); BaseIter* iter = vm->PyIter_AS_C(val); for(int i=0; itp_star_wrapper)){ auto& star = _CAST(StarWrapper&, objs[i]); if(star.rvalue) vm->ValueError("can't use starred expression here"); @@ -141,7 +141,7 @@ struct TupleRef : BaseRef { vm->PyRef_AS_C(objs[i])->set(vm, frame, x); } } - PyVarOrNull x = iter->next(); + PyObject* x = iter->next(); if(x != nullptr) vm->ValueError("too many values to unpack"); } @@ -152,19 +152,19 @@ struct TupleRef : BaseRef { template -PyVarRef VM::PyRef(P&& value) { +PyObject* VM::PyRef(P&& value) { static_assert(std::is_base_of_v>); - return new_object(tp_ref, std::forward

(value)); + return heap.gcnew

(tp_ref, std::forward

(value)); } -const BaseRef* VM::PyRef_AS_C(const PyVar& obj) +const BaseRef* VM::PyRef_AS_C(PyObject* obj) { if(!is_type(obj, tp_ref)) TypeError("expected an l-value"); return static_cast(obj->value()); } /***** Frame's Impl *****/ -inline void Frame::try_deref(VM* vm, PyVar& v){ +inline void Frame::try_deref(VM* vm, PyObject*& v){ if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this); } diff --git a/src/tuplelist.h b/src/tuplelist.h index a07459e8..97710de4 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -3,6 +3,7 @@ #include "common.h" #include "memory.h" #include "str.h" +#include namespace pkpy { using List = std::vector; @@ -33,6 +34,11 @@ namespace pkpy { other._size = 0; } + Args(std::initializer_list list) : Args(list.size()){ + int i=0; + for(auto& p : list) _args[i++] = p; + } + static pkpy::Args from_list(List&& other) noexcept { Args ret(other.size()); memcpy((void*)ret._args, (void*)other.data(), sizeof(PyObject*)*ret.size()); @@ -82,30 +88,6 @@ namespace pkpy { return _zero; } - template - Args one_arg(T&& a) { - Args ret(1); - ret[0] = std::forward(a); - return ret; - } - - template - Args two_args(T1&& a, T2&& b) { - Args ret(2); - ret[0] = std::forward(a); - ret[1] = std::forward(b); - return ret; - } - - template - Args three_args(T1&& a, T2&& b, T3&& c) { - Args ret(3); - ret[0] = std::forward(a); - ret[1] = std::forward(b); - ret[2] = std::forward(c); - return ret; - } - typedef Args Tuple; THREAD_LOCAL SmallArrayPool Args::_pool; } // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index e90b1544..bde67257 100644 --- a/src/vm.h +++ b/src/vm.h @@ -1,7 +1,9 @@ #pragma once +#include "common.h" #include "frame.h" #include "error.h" +#include "gc.h" namespace pkpy{ @@ -20,8 +22,8 @@ namespace pkpy{ template<> ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - PyObject* py_var(VM* vm, const ctype& value) { return vm->new_object(vm->ptype, value);} \ - PyObject* py_var(VM* vm, ctype&& value) { return vm->new_object(vm->ptype, std::move(value));} + PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \ + PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));} class Generator: public BaseIter { std::unique_ptr frame; @@ -41,6 +43,7 @@ struct PyTypeInfo{ class VM { VM* vm; // self reference for simplify code + ManagedHeap heap; public: std::stack< std::unique_ptr > callstack; std::vector _all_types; @@ -78,11 +81,10 @@ public: } init_builtin_types(); - // for(int i=0; i<128; i++) _ascii_str_pool[i] = new_object(tp_str, std::string(1, (char)i)); } PyObject* asStr(PyObject* obj){ - PyVarOrNull f = getattr(obj, __str__, false, true); + PyObject* f = getattr(obj, __str__, false, true); if(f != nullptr) return call(f); return asRepr(obj); } @@ -95,8 +97,8 @@ public: } PyObject* asIter(PyObject* obj){ - if(is_type(obj, tp_native_iterator)) return obj; - PyVarOrNull iter_f = getattr(obj, __iter__, false, true); + if(is_type(obj, tp_iterator)) return obj; + PyObject* iter_f = getattr(obj, __iter__, false, true); if(iter_f != nullptr) return call(iter_f); TypeError(OBJ_NAME(_t(obj)).escape(true) + " object is not iterable"); return nullptr; @@ -104,7 +106,7 @@ public: PyObject* asList(PyObject* iterable){ if(is_type(iterable, tp_list)) return iterable; - return call(_t(tp_list), one_arg(iterable)); + return call(_t(tp_list), Args{iterable}); } PyObject** find_name_in_mro(PyObject* cls, StrName name){ @@ -191,13 +193,13 @@ public: PyObject* property(NativeFuncRaw fget){ PyObject* p = builtins->attr("property"); - PyObject* method = new_object(tp_native_function, NativeFunc(fget, 1, false)); - return call(p, one_arg(method)); + PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false)); + return call(p, Args{method}); } PyObject* new_type_object(PyObject* mod, StrName name, Type base){ // use gcnew - PyObject* obj = make_sp>(tp_type, _all_types.size()); + PyObject* obj = new Py_(tp_type, _all_types.size()); PyTypeInfo info{ .obj = obj, .base = base, @@ -213,30 +215,6 @@ public: return OBJ_GET(Type, obj); } - template - inline PyObject* new_object(PyObject* type, const T& _value) { -#if PK_EXTRA_CHECK - if(!is_type(type, tp_type)) UNREACHABLE(); -#endif - return make_sp>>(OBJ_GET(Type, type), _value); - } - template - inline PyObject* new_object(PyObject* type, T&& _value) { -#if PK_EXTRA_CHECK - if(!is_type(type, tp_type)) UNREACHABLE(); -#endif - return make_sp>>(OBJ_GET(Type, type), std::move(_value)); - } - - template - inline PyObject* new_object(Type type, const T& _value) { - return make_sp>>(type, _value); - } - template - inline PyObject* new_object(Type type, T&& _value) { - return make_sp>>(type, std::move(_value)); - } - PyObject* _find_type(const Str& type){ PyObject** obj = builtins->attr().try_get(type); if(!obj){ @@ -282,19 +260,19 @@ public: // for quick access Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; Type tp_list, tp_tuple; - Type tp_function, tp_native_function, tp_native_iterator, tp_bound_method; + Type tp_function, tp_native_function, tp_iterator, tp_bound_method; Type tp_slice, tp_range, tp_module, tp_ref; Type tp_super, tp_exception, tp_star_wrapper; template inline PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); - return new_object(tp_native_iterator, std::forward

(value)); + return heap.gcnew

(tp_iterator, std::forward

(value)); } inline BaseIter* PyIter_AS_C(PyObject* obj) { - check_type(obj, tp_native_iterator); + check_type(obj, tp_iterator); return static_cast(obj->value()); } @@ -369,7 +347,7 @@ public: PyObject* _exec(); template - PyVarRef PyRef(P&& value); + PyObject* PyRef(P&& value); const BaseRef* PyRef_AS_C(PyObject* obj); }; @@ -458,23 +436,23 @@ template<> float py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); i64 bits = obj.bits; bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } template<> float _py_cast(VM* vm, PyObject* obj){ i64 bits = obj.bits; bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } template<> double py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); i64 bits = obj.bits; bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } template<> double _py_cast(VM* vm, PyObject* obj){ i64 bits = obj.bits; bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } @@ -502,7 +480,7 @@ PY_VAR_INT(unsigned long long) #define PY_VAR_FLOAT(T) \ PyObject* py_var(VM* vm, T _val){ \ f64 val = static_cast(_val); \ - i64 bits = __8B(val)._int; \ + i64 bits = BitsCvt(val)._int; \ bits = (bits >> 2) << 2; \ bits |= 0b10; \ return reinterpret_cast(bits); \ @@ -561,7 +539,7 @@ PyObject* VM::asBool(PyObject* obj){ if(obj == None) return False; if(is_type(obj, tp_int)) return VAR(CAST(i64, obj) != 0); if(is_type(obj, tp_float)) return VAR(CAST(f64, obj) != 0.0); - PyVarOrNull len_fn = getattr(obj, __len__, false, true); + PyObject* len_fn = getattr(obj, __len__, false, true); if(len_fn != nullptr){ PyObject* ret = call(len_fn); return VAR(CAST(i64, ret) > 0); @@ -596,8 +574,11 @@ PyObject* VM::asRepr(PyObject* obj){ } PyObject* VM::new_module(StrName name) { - PyObject* obj = new_object(tp_module, DummyModule()); + PyObject* obj = new Py_(tp_module, DummyModule()); obj->attr().set(__name__, VAR(name.str())); + // we do not allow override in order to avoid memory leak + // it is because Module objects are not garbage collected + if(_modules.contains(name)) UNREACHABLE(); _modules.set(name, obj); return obj; } @@ -672,9 +653,11 @@ Str VM::disassemble(CodeObject_ co){ } void VM::init_builtin_types(){ - // Py_(Type type, T&& val) - PyVar _tp_object = make_sp>(Type(1), Type(0)); - PyVar _tp_type = make_sp>(Type(1), Type(1)); + PyObject* _tp_object = new Py_(Type(1), Type(0)); + PyObject* _tp_type = new Py_(Type(1), Type(1)); + // PyTypeObject is managed by _all_types + // PyModuleObject is managed by _modules + // They are not managed by GC, so we use a simple "new" _all_types.push_back({.obj = _tp_object, .base = -1, .name = "object"}); _all_types.push_back({.obj = _tp_type, .base = 0, .name = "type"}); tp_object = 0; tp_type = 1; @@ -695,17 +678,17 @@ void VM::init_builtin_types(){ tp_function = _new_type_object("function"); tp_native_function = _new_type_object("native_function"); - tp_native_iterator = _new_type_object("native_iterator"); + tp_iterator = _new_type_object("iterator"); tp_bound_method = _new_type_object("bound_method"); tp_super = _new_type_object("super"); tp_exception = _new_type_object("Exception"); - this->None = new_object(_new_type_object("NoneType"), DUMMY_VAL); - this->Ellipsis = new_object(_new_type_object("ellipsis"), DUMMY_VAL); - this->True = new_object(tp_bool, true); - this->False = new_object(tp_bool, false); - this->_py_op_call = new_object(_new_type_object("_py_op_call"), DUMMY_VAL); - this->_py_op_yield = new_object(_new_type_object("_py_op_yield"), DUMMY_VAL); + this->None = new Py_(_new_type_object("NoneType"), {}); + this->Ellipsis = new Py_(_new_type_object("ellipsis"), {}); + this->True = new Py_(tp_bool, {}); + this->False = new Py_(tp_bool, {}); + this->_py_op_call = new Py_(_new_type_object("_py_op_call"), {}); + this->_py_op_yield = new Py_(_new_type_object("_py_op_yield"), {}); this->builtins = new_module("builtins"); this->_main = new_module("__main__"); @@ -735,8 +718,8 @@ PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCal if(new_f != nullptr){ obj = call(*new_f, std::move(args), kwargs, false); }else{ - obj = new_object(_callable, DummyInstance()); - PyVarOrNull init_f = getattr(obj, __init__, false, true); + obj = heap.gcnew(_callable, {}); + PyObject* init_f = getattr(obj, __init__, false, true); if (init_f != nullptr) call(init_f, std::move(args), kwargs, false); } return obj; @@ -801,7 +784,7 @@ PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCal return _exec(); } - PyVarOrNull call_f = getattr(_callable, __call__, false, true); + PyObject* call_f = getattr(_callable, __call__, false, true); if(call_f != nullptr){ return call(call_f, std::move(args), kwargs, false); } @@ -829,42 +812,44 @@ using Super = std::pair; // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ - PyObject* objtype = _t(*obj).get(); - if(is_type(*obj, tp_super)){ - const Super& super = OBJ_GET(Super, *obj); - obj = &super.first; - objtype = _t(super.second).get(); + PyObject* objtype = _t(obj); + // handle super() proxy + if(is_type(obj, tp_super)){ + const Super& super = OBJ_GET(Super, obj); + obj = super.first; + objtype = _t(super.second); } PyObject** cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor PyObject** descr_get = _t(*cls_var)->attr().try_get(__get__); - if(descr_get != nullptr) return call(*descr_get, two_args(*cls_var, *obj)); + if(descr_get != nullptr) return call(*descr_get, Args{*cls_var, obj}); } // handle instance __dict__ - if(!class_only && !(*obj).is_tagged() && (*obj)->is_attr_valid()){ - PyObject** val = (*obj)->attr().try_get(name); + if(!class_only && !is_tagged(obj) && obj->is_attr_valid()){ + PyObject** val = obj->attr().try_get(name); if(val != nullptr) return *val; } if(cls_var != nullptr){ // bound method is non-data descriptor if(is_type(*cls_var, tp_function) || is_type(*cls_var, tp_native_function)){ - return VAR(BoundMethod(*obj, *cls_var)); + return VAR(BoundMethod(obj, *cls_var)); } return *cls_var; } - if(throw_err) AttributeError(*obj, name); + if(throw_err) AttributeError(obj, name); return nullptr; } template void VM::setattr(PyObject* obj, StrName name, T&& value){ - static_assert(std::is_same_v, PyVar>); - PyObject* objtype = _t(obj).get(); + static_assert(std::is_same_v, PyObject*>); + PyObject* objtype = _t(obj); + // handle super() proxy if(is_type(obj, tp_super)){ Super& super = OBJ_GET(Super, *obj); obj = super.first; - objtype = _t(super.second).get(); + objtype = _t(super.second); } PyObject** cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ @@ -873,7 +858,7 @@ void VM::setattr(PyObject* obj, StrName name, T&& value){ if(cls_var_t->attr().contains(__get__)){ PyObject** descr_set = cls_var_t->attr().try_get(__set__); if(descr_set != nullptr){ - call(*descr_set, three_args(*cls_var, obj, std::forward(value))); + call(*descr_set, Args{*cls_var, obj, std::forward(value)}); }else{ TypeError("readonly attribute: " + name.str().escape(true)); } @@ -881,7 +866,7 @@ void VM::setattr(PyObject* obj, StrName name, T&& value){ } } // handle instance __dict__ - if(obj.is_tagged() || !(*obj)->is_attr_valid()) TypeError("cannot set attribute"); + if(is_tagged(obj) || !obj->is_attr_valid()) TypeError("cannot set attribute"); obj->attr().set(name, std::forward(value)); } From 6714a3f7843afe512ccf8903720d12901bce1551 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 17:46:49 +0800 Subject: [PATCH 03/73] update gc --- preprocess.py | 2 +- src/ceval.h | 2 +- src/cffi.h | 10 ++-- src/codeobject.h | 4 +- src/compiler.h | 2 +- src/frame.h | 2 +- src/gc.h | 5 +- src/io.h | 6 +-- src/iter.h | 2 +- src/namedict.h | 8 ++-- src/obj.h | 6 +-- src/pocketpy.h | 33 +++++++------- src/ref.h | 4 +- src/str.h | 6 +-- src/tuplelist.h | 14 +++--- src/vm.h | 116 +++++++++++++++++++++++++---------------------- 16 files changed, 113 insertions(+), 109 deletions(-) diff --git a/preprocess.py b/preprocess.py index deffcff7..23ac599e 100644 --- a/preprocess.py +++ b/preprocess.py @@ -20,7 +20,7 @@ def generate_python_sources(): #include namespace pkpy{ - std::map kPythonLibs = { + inline static std::map kPythonLibs = { ''' for key, value in sources.items(): header += ' '*8 + '{"' + key + '", "' + value + '"},' diff --git a/src/ceval.h b/src/ceval.h index 643d5042..0ef326a7 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,7 +7,7 @@ namespace pkpy{ Str _read_file_cwd(const Str& name, bool* ok); -PyObject* VM::run_frame(Frame* frame){ +inline PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ const Bytecode& byte = frame->next_bytecode(); switch (byte.op) diff --git a/src/cffi.h b/src/cffi.h index 3039ce9a..0bc6e8a8 100644 --- a/src/cffi.h +++ b/src/cffi.h @@ -152,7 +152,7 @@ struct TypeDB{ static TypeDB _type_db; -auto _ = [](){ +inline static auto ___x = [](){ #define REGISTER_BASIC_TYPE(T) _type_db.register_type(#T, {}); _type_db.register_type("void", {}); REGISTER_BASIC_TYPE(char); @@ -403,9 +403,9 @@ struct CType{ } }; -void add_module_c(VM* vm){ +inline void add_module_c(VM* vm){ PyObject* mod = vm->new_module("c"); - PyObject* ptr_t = Pointer::register_class(vm, mod); + Pointer::register_class(vm, mod); Value::register_class(vm, mod); CType::register_class(vm, mod); @@ -462,11 +462,11 @@ void add_module_c(VM* vm){ }); } -PyObject* py_var(VM* vm, void* p){ +inline PyObject* py_var(VM* vm, void* p){ return VAR_T(Pointer, _type_db.get(), (char*)p); } -PyObject* py_var(VM* vm, char* p){ +inline PyObject* py_var(VM* vm, char* p){ return VAR_T(Pointer, _type_db.get(), (char*)p); } diff --git a/src/codeobject.h b/src/codeobject.h index 27981b72..4778955c 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -18,7 +18,7 @@ enum Opcode { #undef OPCODE }; -static const char* OP_NAMES[] = { +inline const char* OP_NAMES[] = { #define OPCODE(name) #name, #include "opcodes.h" #undef OPCODE @@ -31,7 +31,7 @@ struct Bytecode{ uint16_t block; }; -Str pad(const Str& s, const int n){ +inline Str pad(const Str& s, const int n){ if(s.size() >= n) return s.substr(0, n); return s + std::string(n - s.size(), ' '); } diff --git a/src/compiler.h b/src/compiler.h index 0e6a5bb8..320e0aee 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -766,7 +766,7 @@ private: // from a import b as c, d as e void compile_from_import() { - Token tkmodule = _compile_import(); + _compile_import(); consume(TK("import")); if (match(TK("*"))) { if(name_scope() != NAME_GLOBAL) SyntaxError("import * can only be used in global scope"); diff --git a/src/frame.h b/src/frame.h index 850cc6f8..b4a0d18d 100644 --- a/src/frame.h +++ b/src/frame.h @@ -73,7 +73,7 @@ struct Frame { _data.pop_back(); } - inline void try_deref(VM*, PyObject*&); + void try_deref(VM*, PyObject*&); inline PyObject* pop_value(VM* vm){ PyObject* value = pop(); diff --git a/src/gc.h b/src/gc.h index ce85395e..bb1e2f36 100644 --- a/src/gc.h +++ b/src/gc.h @@ -6,12 +6,9 @@ namespace pkpy { struct ManagedHeap{ std::vector heap; - template - PyObject* gcnew(Type type, T&& val){ - PyObject* obj = new Py_>(type, std::forward(val)); + void _add(PyObject* obj){ obj->gc.enabled = true; heap.push_back(obj); - return obj; } void sweep(){ diff --git a/src/io.h b/src/io.h index 6ee42756..b5ea9f43 100644 --- a/src/io.h +++ b/src/io.h @@ -10,7 +10,7 @@ namespace pkpy{ -Str _read_file_cwd(const Str& name, bool* ok){ +inline Str _read_file_cwd(const Str& name, bool* ok){ std::filesystem::path path(name.c_str()); bool exists = std::filesystem::exists(path); if(!exists){ @@ -78,7 +78,7 @@ struct FileIO { } }; -void add_module_io(VM* vm){ +inline void add_module_io(VM* vm){ PyObject* mod = vm->new_module("io"); PyObject* type = FileIO::register_class(vm, mod); vm->bind_builtin_func<2>("open", [type](VM* vm, const Args& args){ @@ -86,7 +86,7 @@ void add_module_io(VM* vm){ }); } -void add_module_os(VM* vm){ +inline void add_module_os(VM* vm){ PyObject* mod = vm->new_module("os"); // Working directory is shared by all VMs!! vm->bind_func<0>(mod, "getcwd", [](VM* vm, const Args& args){ diff --git a/src/iter.h b/src/iter.h index 1ebdbdb6..71cd9a70 100644 --- a/src/iter.h +++ b/src/iter.h @@ -50,7 +50,7 @@ public: } }; -PyObject* Generator::next(){ +inline PyObject* Generator::next(){ if(state == 2) return nullptr; vm->callstack.push(std::move(frame)); PyObject* ret = vm->_exec(); diff --git a/src/namedict.h b/src/namedict.h index 22d6b783..2534a90d 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -6,7 +6,7 @@ namespace pkpy{ -const int kNameDictNodeSize = sizeof(StrName) + sizeof(PyObject*); +const int kNameDictNodeSize = sizeof(StrName) + sizeof(void*); template struct DictArrayPool { @@ -41,7 +41,7 @@ struct DictArrayPool { const std::vector kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117}; static DictArrayPool<32> _dict_pool; -uint16_t find_next_capacity(uint16_t n){ +inline uint16_t find_next_capacity(uint16_t n){ uint16_t x = 2; while(x < n) x <<= 1; return x; @@ -49,7 +49,7 @@ uint16_t find_next_capacity(uint16_t n){ #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) ) -uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& keys){ +inline uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& keys){ if(keys.empty()) return kHashSeeds[0]; std::set indices; std::pair best_score = {kHashSeeds[0], 0.0f}; @@ -207,7 +207,7 @@ while(!_keys[i].empty()) { \ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); - _keys[i] = StrName(); value(i).reset(); + _keys[i] = StrName(); value(i) = nullptr; _size--; } diff --git a/src/obj.h b/src/obj.h index 80bfa100..9b094403 100644 --- a/src/obj.h +++ b/src/obj.h @@ -147,8 +147,8 @@ const int kTpFloatIndex = 3; inline bool is_type(PyObject* obj, Type type) noexcept { switch(type.index){ - case kTpIntIndex: return is_tag_01(obj); - case kTpFloatIndex: return is_tag_10(obj); + case kTpIntIndex: return is_int(obj); + case kTpFloatIndex: return is_float(obj); default: return !is_tagged(obj) && obj->type == type; } } @@ -216,7 +216,7 @@ __T _py_cast(VM* vm, PyObject* obj) { } #define VAR(x) py_var(vm, x) -#define VAR_T(T, ...) vm->heap.gcnew(T::_type(vm), T(__VA_ARGS__)) +#define VAR_T(T, ...) vm->gcnew(T::_type(vm), T(__VA_ARGS__)) #define CAST(T, x) py_cast(vm, x) #define _CAST(T, x) _py_cast(vm, x) diff --git a/src/pocketpy.h b/src/pocketpy.h index 6bf8bb66..ac5fd710 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -10,7 +10,7 @@ namespace pkpy { -CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { +inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { Compiler compiler(this, source.c_str(), filename, mode); try{ return compiler.compile(); @@ -42,7 +42,7 @@ CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { }); -void init_builtins(VM* _vm) { +inline void init_builtins(VM* _vm) { BIND_NUM_ARITH_OPT(__add__, +) BIND_NUM_ARITH_OPT(__sub__, -) BIND_NUM_ARITH_OPT(__mul__, *) @@ -69,7 +69,7 @@ void init_builtins(VM* _vm) { vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); } Type base = vm->_all_types[type.index].base; - return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); + return vm->gcnew(vm->tp_super, Super(args[1], base)); }); _vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) { @@ -88,7 +88,8 @@ void init_builtins(VM* _vm) { i64 lhs = CAST(i64, args[0]); i64 rhs = CAST(i64, args[1]); if(rhs == 0) vm->ZeroDivisionError(); - return VAR(Tuple{VAR(lhs/rhs), VAR(lhs%rhs)}); + Tuple t = Tuple{VAR(lhs/rhs), VAR(lhs%rhs)}; + return VAR(std::move(t)); }); _vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) { @@ -574,7 +575,7 @@ void init_builtins(VM* _vm) { #define __EXPORT #endif -void add_module_time(VM* vm){ +inline void add_module_time(VM* vm){ PyObject* mod = vm->new_module("time"); vm->bind_func<0>(mod, "time", [](VM* vm, Args& args) { auto now = std::chrono::high_resolution_clock::now(); @@ -582,20 +583,17 @@ void add_module_time(VM* vm){ }); } -void add_module_sys(VM* vm){ +inline void add_module_sys(VM* vm){ PyObject* mod = vm->new_module("sys"); vm->setattr(mod, "version", VAR(PK_VERSION)); - - vm->bind_func<1>(mod, "getrefcount", CPP_LAMBDA(VAR(args[0].use_count()))); vm->bind_func<0>(mod, "getrecursionlimit", CPP_LAMBDA(VAR(vm->recursionlimit))); - vm->bind_func<1>(mod, "setrecursionlimit", [](VM* vm, Args& args) { vm->recursionlimit = CAST(int, args[0]); return vm->None; }); } -void add_module_json(VM* vm){ +inline void add_module_json(VM* vm){ PyObject* mod = vm->new_module("json"); vm->bind_func<1>(mod, "loads", [](VM* vm, Args& args) { const Str& expr = CAST(Str&, args[0]); @@ -606,7 +604,7 @@ void add_module_json(VM* vm){ vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__))); } -void add_module_math(VM* vm){ +inline void add_module_math(VM* vm){ PyObject* mod = vm->new_module("math"); vm->setattr(mod, "pi", VAR(3.1415926535897932384)); vm->setattr(mod, "e" , VAR(2.7182818284590452354)); @@ -625,7 +623,7 @@ void add_module_math(VM* vm){ vm->bind_func<1>(mod, "sqrt", CPP_LAMBDA(VAR(std::sqrt(vm->num_to_float(args[0]))))); } -void add_module_dis(VM* vm){ +inline void add_module_dis(VM* vm){ PyObject* mod = vm->new_module("dis"); vm->bind_func<1>(mod, "dis", [](VM* vm, Args& args) { PyObject* f = args[0]; @@ -651,7 +649,8 @@ struct ReMatch { vm->bind_method<0>(type, "span", [](VM* vm, Args& args) { auto& self = CAST(ReMatch&, args[0]); - return VAR(Tuple{VAR(self.start), VAR(self.end)}); + Tuple t = Tuple{VAR(self.start), VAR(self.end)}; + return VAR(std::move(t)); }); vm->bind_method<1>(type, "group", [](VM* vm, Args& args) { @@ -663,7 +662,7 @@ struct ReMatch { } }; -PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){ +inline PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){ std::regex re(pattern); std::smatch m; if(std::regex_search(string, m, re)){ @@ -675,7 +674,7 @@ PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, V return vm->None; }; -void add_module_re(VM* vm){ +inline void add_module_re(VM* vm){ PyObject* mod = vm->new_module("re"); ReMatch::register_class(vm, mod); @@ -749,14 +748,14 @@ struct Random{ } }; -void add_module_random(VM* vm){ +inline void add_module_random(VM* vm){ PyObject* mod = vm->new_module("random"); Random::register_class(vm, mod); CodeObject_ code = vm->compile(kPythonLibs["random"], "random.py", EXEC_MODE); vm->_exec(code, mod); } -void VM::post_init(){ +inline void VM::post_init(){ init_builtins(this); add_module_sys(this); add_module_time(this); diff --git a/src/ref.h b/src/ref.h index 88d3e58a..f08c4956 100644 --- a/src/ref.h +++ b/src/ref.h @@ -154,10 +154,10 @@ struct TupleRef : BaseRef { template PyObject* VM::PyRef(P&& value) { static_assert(std::is_base_of_v>); - return heap.gcnew

(tp_ref, std::forward

(value)); + return gcnew

(tp_ref, std::forward

(value)); } -const BaseRef* VM::PyRef_AS_C(PyObject* obj) +inline const BaseRef* VM::PyRef_AS_C(PyObject* obj) { if(!is_type(obj, tp_ref)) TypeError("expected an l-value"); return static_cast(obj->value()); diff --git a/src/str.h b/src/str.h index 7c7ccd88..d5119619 100644 --- a/src/str.h +++ b/src/str.h @@ -127,7 +127,7 @@ public: const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101}; -bool is_unicode_Lo_char(uint32_t c) { +inline bool is_unicode_Lo_char(uint32_t c) { auto index = std::lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA; if(c == kLoRangeA[index]) return true; index -= 1; @@ -184,8 +184,8 @@ struct StrName { } }; -std::map> StrName::_interned; -std::vector StrName::_r_interned; +inline std::map> StrName::_interned; +inline std::vector StrName::_r_interned; const StrName __class__ = StrName::get("__class__"); const StrName __base__ = StrName::get("__base__"); diff --git a/src/tuplelist.h b/src/tuplelist.h index 97710de4..c9eca86e 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -41,8 +41,8 @@ namespace pkpy { static pkpy::Args from_list(List&& other) noexcept { Args ret(other.size()); - memcpy((void*)ret._args, (void*)other.data(), sizeof(PyObject*)*ret.size()); - memset((void*)other.data(), 0, sizeof(PyObject*)*ret.size()); + memcpy((void*)ret._args, (void*)other.data(), sizeof(void*)*ret.size()); + memset((void*)other.data(), 0, sizeof(void*)*ret.size()); other.clear(); return ret; } @@ -63,8 +63,8 @@ namespace pkpy { List move_to_list() noexcept { List ret(_size); - memcpy((void*)ret.data(), (void*)_args, sizeof(PyObject*)*_size); - memset((void*)_args, 0, sizeof(PyObject*)*_size); + memcpy((void*)ret.data(), (void*)_args, sizeof(void*)*_size); + memset((void*)_args, 0, sizeof(void*)*_size); return ret; } @@ -75,8 +75,8 @@ namespace pkpy { _args[0] = self; if(old_size == 0) return; - memcpy((void*)(_args+1), (void*)old_args, sizeof(PyObject*)*old_size); - memset((void*)old_args, 0, sizeof(PyObject*)*old_size); + memcpy((void*)(_args+1), (void*)old_args, sizeof(void*)*old_size); + memset((void*)old_args, 0, sizeof(void*)*old_size); _pool.dealloc(old_args, old_size); } @@ -89,5 +89,5 @@ namespace pkpy { } typedef Args Tuple; - THREAD_LOCAL SmallArrayPool Args::_pool; + inline THREAD_LOCAL SmallArrayPool Args::_pool; } // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index bde67257..df534c8a 100644 --- a/src/vm.h +++ b/src/vm.h @@ -8,22 +8,22 @@ namespace pkpy{ #define DEF_NATIVE_2(ctype, ptype) \ - template<> ctype py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype _py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype& py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype& py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype& _py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \ - PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));} + inline PyObject* py_var(VM* vm, const ctype& value) { return vm->gcnew(vm->ptype, value);} \ + inline PyObject* py_var(VM* vm, ctype&& value) { return vm->gcnew(vm->ptype, std::move(value));} class Generator: public BaseIter { std::unique_ptr frame; @@ -134,14 +134,21 @@ public: } PyObject* fast_call(StrName name, Args&& args){ - PyObject** val = find_name_in_mro(_t(args[0]).get(), name); + PyObject** val = find_name_in_mro(_t(args[0]), name); if(val != nullptr) return call(*val, std::move(args)); AttributeError(args[0], name); return nullptr; } - inline PyObject* call(PyObject* _callable){ - return call(_callable, no_arg(), no_arg(), false); + template + PyObject* gcnew(Type type, T&& val){ + PyObject* obj = new Py_>(type, std::forward(val)); + heap._add(obj); + return obj; + } + + inline PyObject* call(PyObject* callable){ + return call(callable, no_arg(), no_arg(), false); } template @@ -193,7 +200,7 @@ public: PyObject* property(NativeFuncRaw fget){ PyObject* p = builtins->attr("property"); - PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false)); + PyObject* method = gcnew(tp_native_function, NativeFunc(fget, 1, false)); return call(p, Args{method}); } @@ -267,7 +274,7 @@ public: template inline PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); - return heap.gcnew

(tp_iterator, std::forward

(value)); + return gcnew

(tp_iterator, std::forward

(value)); } inline BaseIter* PyIter_AS_C(PyObject* obj) @@ -359,7 +366,7 @@ PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ return f(vm, args); } -void CodeObject::optimize(VM* vm){ +inline void CodeObject::optimize(VM* vm){ std::vector keys; for(auto& p: names) if(p.second == NAME_LOCAL) keys.push_back(p.first); uint32_t base_n = (uint32_t)(keys.size() / kLocalsLoadFactor + 0.5); @@ -411,13 +418,13 @@ DEF_NATIVE_2(Slice, tp_slice) DEF_NATIVE_2(Exception, tp_exception) DEF_NATIVE_2(StarWrapper, tp_star_wrapper) -#define PY_CAST_INT(T) \ -template<> T py_cast(VM* vm, PyObject* obj){ \ - vm->check_type(obj, vm->tp_int); \ - return (T)(obj.bits >> 2); \ -} \ -template<> T _py_cast(VM* vm, PyObject* obj){ \ - return (T)(obj.bits >> 2); \ +#define PY_CAST_INT(T) \ +template<> inline T py_cast(VM* vm, PyObject* obj){ \ + vm->check_type(obj, vm->tp_int); \ + return (T)(BITS(obj) >> 2); \ +} \ +template<> inline T _py_cast(VM* vm, PyObject* obj){ \ + return (T)(BITS(obj) >> 2); \ } PY_CAST_INT(char) @@ -432,32 +439,32 @@ PY_CAST_INT(unsigned long) PY_CAST_INT(unsigned long long) -template<> float py_cast(VM* vm, PyObject* obj){ +template<> inline float py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); - i64 bits = obj.bits; + i64 bits = BITS(obj); bits = (bits >> 2) << 2; return BitsCvt(bits)._float; } -template<> float _py_cast(VM* vm, PyObject* obj){ - i64 bits = obj.bits; +template<> inline float _py_cast(VM* vm, PyObject* obj){ + i64 bits = BITS(obj); bits = (bits >> 2) << 2; return BitsCvt(bits)._float; } -template<> double py_cast(VM* vm, PyObject* obj){ +template<> inline double py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); - i64 bits = obj.bits; + i64 bits = BITS(obj); bits = (bits >> 2) << 2; return BitsCvt(bits)._float; } -template<> double _py_cast(VM* vm, PyObject* obj){ - i64 bits = obj.bits; +template<> inline double _py_cast(VM* vm, PyObject* obj){ + i64 bits = BITS(obj); bits = (bits >> 2) << 2; return BitsCvt(bits)._float; } #define PY_VAR_INT(T) \ - PyObject* py_var(VM* vm, T _val){ \ + inline PyObject* py_var(VM* vm, T _val){ \ i64 val = static_cast(_val); \ if(((val << 2) >> 2) != val){ \ vm->_error("OverflowError", std::to_string(val) + " is out of range"); \ @@ -478,7 +485,7 @@ PY_VAR_INT(unsigned long) PY_VAR_INT(unsigned long long) #define PY_VAR_FLOAT(T) \ - PyObject* py_var(VM* vm, T _val){ \ + inline PyObject* py_var(VM* vm, T _val){ \ f64 val = static_cast(_val); \ i64 bits = BitsCvt(val)._int; \ bits = (bits >> 2) << 2; \ @@ -489,23 +496,23 @@ PY_VAR_INT(unsigned long long) PY_VAR_FLOAT(float) PY_VAR_FLOAT(double) -PyObject* py_var(VM* vm, bool val){ +inline PyObject* py_var(VM* vm, bool val){ return val ? vm->True : vm->False; } -template<> bool py_cast(VM* vm, PyObject* obj){ +template<> inline bool py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_bool); return obj == vm->True; } -template<> bool _py_cast(VM* vm, PyObject* obj){ +template<> inline bool _py_cast(VM* vm, PyObject* obj){ return obj == vm->True; } -PyObject* py_var(VM* vm, const char val[]){ +inline PyObject* py_var(VM* vm, const char val[]){ return VAR(Str(val)); } -PyObject* py_var(VM* vm, std::string val){ +inline PyObject* py_var(VM* vm, std::string val){ return VAR(Str(std::move(val))); } @@ -514,7 +521,7 @@ void _check_py_class(VM* vm, PyObject* obj){ vm->check_type(obj, T::_type(vm)); } -PyObject* VM::num_negated(PyObject* obj){ +inline PyObject* VM::num_negated(PyObject* obj){ if (is_int(obj)){ return VAR(-CAST(i64, obj)); }else if(is_float(obj)){ @@ -524,7 +531,7 @@ PyObject* VM::num_negated(PyObject* obj){ return nullptr; } -f64 VM::num_to_float(PyObject* obj){ +inline f64 VM::num_to_float(PyObject* obj){ if(is_float(obj)){ return CAST(f64, obj); } else if (is_int(obj)){ @@ -534,7 +541,7 @@ f64 VM::num_to_float(PyObject* obj){ return 0; } -PyObject* VM::asBool(PyObject* obj){ +inline PyObject* VM::asBool(PyObject* obj){ if(is_type(obj, tp_bool)) return obj; if(obj == None) return False; if(is_type(obj, tp_int)) return VAR(CAST(i64, obj) != 0); @@ -547,7 +554,7 @@ PyObject* VM::asBool(PyObject* obj){ return True; } -i64 VM::hash(PyObject* obj){ +inline i64 VM::hash(PyObject* obj){ if (is_type(obj, tp_str)) return CAST(Str&, obj).hash(); if (is_int(obj)) return CAST(i64, obj); if (is_type(obj, tp_tuple)) { @@ -555,11 +562,12 @@ i64 VM::hash(PyObject* obj){ const Tuple& items = CAST(Tuple&, obj); for (int i=0; i> 2)); // recommended by Github Copilot + // recommended by Github Copilot + x = x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); } return x; } - if (is_type(obj, tp_type)) return obj.bits; + if (is_type(obj, tp_type)) return BITS(obj); if (is_type(obj, tp_bool)) return _CAST(bool, obj) ? 1 : 0; if (is_float(obj)){ f64 val = CAST(f64, obj); @@ -569,11 +577,11 @@ i64 VM::hash(PyObject* obj){ return 0; } -PyObject* VM::asRepr(PyObject* obj){ +inline PyObject* VM::asRepr(PyObject* obj){ return call(obj, __repr__); } -PyObject* VM::new_module(StrName name) { +inline PyObject* VM::new_module(StrName name) { PyObject* obj = new Py_(tp_module, DummyModule()); obj->attr().set(__name__, VAR(name.str())); // we do not allow override in order to avoid memory leak @@ -583,7 +591,7 @@ PyObject* VM::new_module(StrName name) { return obj; } -Str VM::disassemble(CodeObject_ co){ +inline Str VM::disassemble(CodeObject_ co){ std::vector jumpTargets; for(auto byte : co->codes){ if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_SAFE_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ @@ -652,7 +660,7 @@ Str VM::disassemble(CodeObject_ co){ return Str(ss.str()); } -void VM::init_builtin_types(){ +inline void VM::init_builtin_types(){ PyObject* _tp_object = new Py_(Type(1), Type(0)); PyObject* _tp_type = new Py_(Type(1), Type(1)); // PyTypeObject is managed by _all_types @@ -711,14 +719,14 @@ void VM::init_builtin_types(){ for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } -PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ +inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ if(is_type(callable, tp_type)){ PyObject** new_f = callable->attr().try_get(__new__); PyObject* obj; if(new_f != nullptr){ obj = call(*new_f, std::move(args), kwargs, false); }else{ - obj = heap.gcnew(_callable, {}); + obj = gcnew(OBJ_GET(Type, callable), {}); PyObject* init_f = getattr(obj, __init__, false, true); if (init_f != nullptr) call(init_f, std::move(args), kwargs, false); } @@ -784,15 +792,15 @@ PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCal return _exec(); } - PyObject* call_f = getattr(_callable, __call__, false, true); + PyObject* call_f = getattr(callable, __call__, false, true); if(call_f != nullptr){ return call(call_f, std::move(args), kwargs, false); } - TypeError(OBJ_NAME(_t(*callable)).escape(true) + " object is not callable"); + TypeError(OBJ_NAME(_t(callable)).escape(true) + " object is not callable"); return None; } -void VM::unpack_args(Args& args){ +inline void VM::unpack_args(Args& args){ List unpacked; for(int i=0; i; // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance -PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ +inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ PyObject* objtype = _t(obj); // handle super() proxy if(is_type(obj, tp_super)){ @@ -842,12 +850,12 @@ PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_on } template -void VM::setattr(PyObject* obj, StrName name, T&& value){ +inline void VM::setattr(PyObject* obj, StrName name, T&& value){ static_assert(std::is_same_v, PyObject*>); PyObject* objtype = _t(obj); // handle super() proxy if(is_type(obj, tp_super)){ - Super& super = OBJ_GET(Super, *obj); + Super& super = OBJ_GET(Super, obj); obj = super.first; objtype = _t(super.second); } @@ -881,7 +889,7 @@ void VM::bind_func(PyObject* obj, Str name, NativeFuncRaw fn) { obj->attr().set(name, VAR(NativeFunc(fn, ARGC, false))); } -void VM::_error(Exception e){ +inline void VM::_error(Exception e){ if(callstack.empty()){ e.is_re = false; throw e; @@ -890,7 +898,7 @@ void VM::_error(Exception e){ _raise(); } -PyObject* VM::_exec(){ +inline PyObject* VM::_exec(){ Frame* frame = top_frame(); i64 base_id = frame->id; PyObject* ret = nullptr; From 0be3e7ab6c7cf6f22979f3070ddfa2e31c2133de Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 17:58:28 +0800 Subject: [PATCH 04/73] update gc --- src/cffi.h | 2 +- src/common.h | 4 ++-- src/compiler.h | 2 +- src/frame.h | 32 ++++++++++++++++---------------- src/iter.h | 2 +- src/namedict.h | 14 +++++++------- src/obj.h | 10 +++++----- src/parser.h | 2 +- src/ref.h | 4 ++-- src/tuplelist.h | 4 ++-- src/vm.h | 26 +++++++++++++------------- 11 files changed, 51 insertions(+), 51 deletions(-) diff --git a/src/cffi.h b/src/cffi.h index 0bc6e8a8..fbb13a8e 100644 --- a/src/cffi.h +++ b/src/cffi.h @@ -266,7 +266,7 @@ struct Pointer{ } template - inline T& ref() noexcept { return *reinterpret_cast(ptr); } + T& ref() noexcept { return *reinterpret_cast(ptr); } PyObject* get(VM* vm){ if(level > 1) return VAR_T(Pointer, ctype, level-1, ref()); diff --git a/src/common.h b/src/common.h index 1cce45e6..9b2cf7aa 100644 --- a/src/common.h +++ b/src/common.h @@ -62,10 +62,10 @@ struct Type { int index; Type(): index(-1) {} Type(int index): index(index) {} - inline bool operator==(Type other) const noexcept { + bool operator==(Type other) const noexcept { return this->index == other.index; } - inline bool operator!=(Type other) const noexcept { + bool operator!=(Type other) const noexcept { return this->index != other.index; } }; diff --git a/src/compiler.h b/src/compiler.h index 320e0aee..764863cb 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -305,7 +305,7 @@ private: parser->set_next_token(TK("@eof")); } - inline TokenIndex peek() { + TokenIndex peek() { return parser->curr.type; } diff --git a/src/frame.h b/src/frame.h index b4a0d18d..f2b1a2b0 100644 --- a/src/frame.h +++ b/src/frame.h @@ -18,10 +18,10 @@ struct Frame { const uint64_t id; std::vector>> s_try_block; - inline NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); } - inline NameDict& f_globals() noexcept { return _module->attr(); } + NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); } + NameDict& f_globals() noexcept { return _module->attr(); } - inline PyObject** f_closure_try_get(StrName name) noexcept { + PyObject** f_closure_try_get(StrName name) noexcept { if(_closure == nullptr) return nullptr; return _closure->try_get(name); } @@ -32,7 +32,7 @@ struct Frame { const NameDict_& _closure=nullptr) : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { } - inline const Bytecode& next_bytecode() { + const Bytecode& next_bytecode() { _ip = _next_ip++; return co->codes[_ip]; } @@ -53,11 +53,11 @@ struct Frame { // return ss.str(); // } - inline bool has_next_bytecode() const { + bool has_next_bytecode() const { return _next_ip < co->codes.size(); } - inline PyObject* pop(){ + PyObject* pop(){ #if PK_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif @@ -66,7 +66,7 @@ struct Frame { return v; } - inline void _pop(){ + void _pop(){ #if PK_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif @@ -75,26 +75,26 @@ struct Frame { void try_deref(VM*, PyObject*&); - inline PyObject* pop_value(VM* vm){ + PyObject* pop_value(VM* vm){ PyObject* value = pop(); try_deref(vm, value); return value; } - inline PyObject* top_value(VM* vm){ + PyObject* top_value(VM* vm){ PyObject* value = top(); try_deref(vm, value); return value; } - inline PyObject*& top(){ + PyObject*& top(){ #if PK_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } - inline PyObject*& top_1(){ + PyObject*& top_1(){ #if PK_EXTRA_CHECK if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif @@ -102,16 +102,16 @@ struct Frame { } template - inline void push(T&& obj){ _data.push_back(std::forward(obj)); } + void push(T&& obj){ _data.push_back(std::forward(obj)); } - inline void jump_abs(int i){ _next_ip = i; } - inline void jump_rel(int i){ _next_ip += i; } + void jump_abs(int i){ _next_ip = i; } + void jump_rel(int i){ _next_ip += i; } - inline void on_try_block_enter(){ + void on_try_block_enter(){ s_try_block.emplace_back(co->codes[_ip].block, _data); } - inline void on_try_block_exit(){ + void on_try_block_exit(){ s_try_block.pop_back(); } diff --git a/src/iter.h b/src/iter.h index 71cd9a70..42dd0c08 100644 --- a/src/iter.h +++ b/src/iter.h @@ -13,7 +13,7 @@ public: this->current = r.start; } - inline bool _has_next(){ + bool _has_next(){ return r.step > 0 ? current < r.stop : current > r.stop; } diff --git a/src/namedict.h b/src/namedict.h index 2534a90d..130b60f6 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -41,7 +41,7 @@ struct DictArrayPool { const std::vector kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117}; static DictArrayPool<32> _dict_pool; -inline uint16_t find_next_capacity(uint16_t n){ +inline static uint16_t find_next_capacity(uint16_t n){ uint16_t x = 2; while(x < n) x <<= 1; return x; @@ -49,7 +49,7 @@ inline uint16_t find_next_capacity(uint16_t n){ #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) ) -inline uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& keys){ +inline static uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& keys){ if(keys.empty()) return kHashSeeds[0]; std::set indices; std::pair best_score = {kHashSeeds[0], 0.0f}; @@ -73,11 +73,11 @@ struct NameDict { uint16_t _mask; StrName* _keys; - inline PyObject*& value(uint16_t i){ + PyObject*& value(uint16_t i){ return reinterpret_cast(_keys + _capacity)[i]; } - inline PyObject* value(uint16_t i) const { + PyObject* value(uint16_t i) const { return reinterpret_cast(_keys + _capacity)[i]; } @@ -175,14 +175,14 @@ while(!_keys[i].empty()) { \ _rehash(false); // do not resize } - inline PyObject** try_get(StrName key){ + PyObject** try_get(StrName key){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) return nullptr; return &value(i); } - inline bool try_set(StrName key, PyObject* val){ + bool try_set(StrName key, PyObject* val){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) return false; @@ -190,7 +190,7 @@ while(!_keys[i].empty()) { \ return true; } - inline bool contains(StrName key) const { + bool contains(StrName key) const { bool ok; uint16_t i; HASH_PROBE(key, ok, i); return ok; diff --git a/src/obj.h b/src/obj.h index 9b094403..8abcea95 100644 --- a/src/obj.h +++ b/src/obj.h @@ -22,7 +22,7 @@ struct NativeFunc { bool method; NativeFunc(NativeFuncRaw f, int argc, bool method) : f(f), argc(argc), method(method) {} - inline PyObject* operator()(VM* vm, Args& args) const; + PyObject* operator()(VM* vm, Args& args) const; }; struct Function { @@ -98,9 +98,9 @@ struct PyObject { Type type; NameDict* _attr; - inline bool is_attr_valid() const noexcept { return _attr != nullptr; } - inline NameDict& attr() noexcept { return *_attr; } - inline PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } + bool is_attr_valid() const noexcept { return _attr != nullptr; } + NameDict& attr() noexcept { return *_attr; } + PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; virtual void mark() { @@ -120,7 +120,7 @@ struct Py_ : PyObject { Py_(Type type, const T& val): PyObject(type), _value(val) { _init(); } Py_(Type type, T&& val): PyObject(type), _value(std::move(val)) { _init(); } - inline void _init() noexcept { + void _init() noexcept { if constexpr (std::is_same_v || std::is_same_v) { _attr = new NameDict(8, kTypeAttrLoadFactor); }else if constexpr(std::is_same_v){ diff --git a/src/parser.h b/src/parser.h index 63b450ff..da36b888 100644 --- a/src/parser.h +++ b/src/parser.h @@ -120,7 +120,7 @@ struct Parser { return t; } - inline char peekchar() const{ return *curr_char; } + char peekchar() const{ return *curr_char; } bool match_n_chars(int n, char c0){ const char* c = curr_char; diff --git a/src/ref.h b/src/ref.h index f08c4956..dbbf9f0d 100644 --- a/src/ref.h +++ b/src/ref.h @@ -14,8 +14,8 @@ struct BaseRef { struct NameRef : BaseRef { const std::pair pair; - inline StrName name() const { return pair.first; } - inline NameScope scope() const { return pair.second; } + StrName name() const { return pair.first; } + NameScope scope() const { return pair.second; } NameRef(const std::pair& pair) : pair(pair) {} PyObject* get(VM* vm, Frame* frame) const{ diff --git a/src/tuplelist.h b/src/tuplelist.h index c9eca86e..76732ed9 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -14,7 +14,7 @@ namespace pkpy { PyObject** _args; int _size; - inline void _alloc(int n){ + void _alloc(int n){ this->_args = _pool.alloc(n); this->_size = n; } @@ -59,7 +59,7 @@ namespace pkpy { return *this; } - inline int size() const { return _size; } + int size() const { return _size; } List move_to_list() noexcept { List ret(_size); diff --git a/src/vm.h b/src/vm.h index df534c8a..0e8f1b86 100644 --- a/src/vm.h +++ b/src/vm.h @@ -89,7 +89,7 @@ public: return asRepr(obj); } - inline Frame* top_frame() const { + Frame* top_frame() const { #if PK_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); #endif @@ -147,23 +147,23 @@ public: return obj; } - inline PyObject* call(PyObject* callable){ + PyObject* call(PyObject* callable){ return call(callable, no_arg(), no_arg(), false); } template - inline std::enable_if_t, Args>, PyObject*> + std::enable_if_t, Args>, PyObject*> call(PyObject* _callable, ArgT&& args){ return call(_callable, std::forward(args), no_arg(), false); } template - inline std::enable_if_t, Args>, PyObject*> + std::enable_if_t, Args>, PyObject*> call(PyObject* obj, const StrName name, ArgT&& args){ return call(getattr(obj, name, true, true), std::forward(args), no_arg(), false); } - inline PyObject* call(PyObject* obj, StrName name){ + PyObject* call(PyObject* obj, StrName name){ return call(getattr(obj, name, true, true), no_arg(), no_arg(), false); } @@ -185,7 +185,7 @@ public: } template - inline std::unique_ptr _new_frame(Args&&... args){ + std::unique_ptr _new_frame(Args&&... args){ if(callstack.size() > recursionlimit){ _error("RecursionError", "maximum recursion depth exceeded"); } @@ -193,7 +193,7 @@ public: } template - inline PyObject* _exec(Args&&... args){ + PyObject* _exec(Args&&... args){ callstack.push(_new_frame(std::forward(args)...)); return _exec(); } @@ -272,12 +272,12 @@ public: Type tp_super, tp_exception, tp_star_wrapper; template - inline PyObject* PyIter(P&& value) { + PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); return gcnew

(tp_iterator, std::forward

(value)); } - inline BaseIter* PyIter_AS_C(PyObject* obj) + BaseIter* PyIter_AS_C(PyObject* obj) { check_type(obj, tp_iterator); return static_cast(obj->value()); @@ -309,16 +309,16 @@ public: void AttributeError(Str msg){ _error("AttributeError", msg); } - inline void check_type(PyObject* obj, Type type){ + void check_type(PyObject* obj, Type type){ if(is_type(obj, type)) return; TypeError("expected " + OBJ_NAME(_t(type)).escape(true) + ", but got " + OBJ_NAME(_t(obj)).escape(true)); } - inline PyObject* _t(Type t){ + PyObject* _t(Type t){ return _all_types[t.index].obj; } - inline PyObject* _t(PyObject* obj){ + PyObject* _t(PyObject* obj){ if(is_int(obj)) return _t(tp_int); if(is_float(obj)) return _t(tp_float); return _all_types[OBJ_GET(Type, _t(obj->type)).index].obj; @@ -358,7 +358,7 @@ public: const BaseRef* PyRef_AS_C(PyObject* obj); }; -PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ +inline PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ int args_size = args.size() - (int)method; // remove self if(argc != -1 && args_size != argc) { vm->TypeError("expected " + std::to_string(argc) + " arguments, but got " + std::to_string(args_size)); From de7240e0c15d1f6f1f7806d5833f5d203dee01f5 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 18:03:30 +0800 Subject: [PATCH 05/73] some rename --- python/{dict.py => _dict.py} | 0 python/{set.py => _set.py} | 0 src/pocketpy.h | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename python/{dict.py => _dict.py} (100%) rename python/{set.py => _set.py} (100%) diff --git a/python/dict.py b/python/_dict.py similarity index 100% rename from python/dict.py rename to python/_dict.py diff --git a/python/set.py b/python/_set.py similarity index 100% rename from python/set.py rename to python/_set.py diff --git a/src/pocketpy.h b/src/pocketpy.h index ac5fd710..21dfeab9 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -774,9 +774,9 @@ inline void VM::post_init(){ CodeObject_ code = compile(kPythonLibs["builtins"], "", EXEC_MODE); this->_exec(code, this->builtins); - code = compile(kPythonLibs["dict"], "", EXEC_MODE); + code = compile(kPythonLibs["_dict"], "", EXEC_MODE); this->_exec(code, this->builtins); - code = compile(kPythonLibs["set"], "", EXEC_MODE); + code = compile(kPythonLibs["_set"], "", EXEC_MODE); this->_exec(code, this->builtins); // property is defined in builtins.py so we need to add it after builtins is loaded From 01121c339af510731626e9e5122f37f6488b21f2 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 18:05:12 +0800 Subject: [PATCH 06/73] Update pocketpy.h --- src/pocketpy.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pocketpy.h b/src/pocketpy.h index 21dfeab9..708bae5a 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -565,14 +565,14 @@ inline void init_builtins(VM* _vm) { } #ifdef _WIN32 -#define __EXPORT __declspec(dllexport) +#define __EXPORT __declspec(dllexport) inline #elif __APPLE__ -#define __EXPORT __attribute__((visibility("default"))) __attribute__((used)) +#define __EXPORT __attribute__((visibility("default"))) __attribute__((used)) inline #elif __EMSCRIPTEN__ #include -#define __EXPORT EMSCRIPTEN_KEEPALIVE +#define __EXPORT EMSCRIPTEN_KEEPALIVE inline #else -#define __EXPORT +#define __EXPORT inline #endif inline void add_module_time(VM* vm){ From 85cfaa4e1462b8da01b18f134d676aecbde394f4 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 21:49:50 +0800 Subject: [PATCH 07/73] update gc --- src/ceval.h | 16 ++--- src/codeobject.h | 16 +---- src/common.h | 15 ++-- src/compiler.h | 10 +-- src/frame.h | 2 +- src/io.h | 6 +- src/memory.h | 82 +++------------------ src/namedict.h | 132 +++++++++++----------------------- src/obj.h | 4 +- src/parser.h | 4 +- src/pocketpy.h | 12 ++-- src/ref.h | 10 +-- src/tuplelist.h | 151 +++++++++++++++++++-------------------- src/vm.h | 181 ++++++++++++++++++++++------------------------- 14 files changed, 248 insertions(+), 393 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 0ef326a7..b1f934e6 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -5,8 +5,6 @@ namespace pkpy{ -Str _read_file_cwd(const Str& name, bool* ok); - inline PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ const Bytecode& byte = frame->next_bytecode(); @@ -183,11 +181,11 @@ inline PyObject* VM::run_frame(Frame* frame){ } continue; case OP_RE_RAISE: _raise(); continue; case OP_BUILD_LIST: - frame->push(VAR(frame->pop_n_values_reversed(this, byte.arg).move_to_list())); + frame->push(VAR(frame->pop_n_values_reversed(this, byte.arg).to_list())); continue; case OP_BUILD_MAP: { Args items = frame->pop_n_values_reversed(this, byte.arg*2); - PyObject* obj = call(builtins->attr("dict")); + PyObject* obj = call(builtins->attr("dict"), no_arg()); for(int i=0; ipop_n_values_reversed(this, byte.arg).move_to_list() + frame->pop_n_values_reversed(this, byte.arg).to_list() ); PyObject* obj = call(builtins->attr("set"), Args{list}); frame->push(obj); @@ -295,7 +293,7 @@ inline PyObject* VM::run_frame(Frame* frame){ } continue; case OP_IMPORT_NAME: { StrName name = frame->co->names[byte.arg].first; - PyObject** ext_mod = _modules.try_get(name); + PyObject* ext_mod = _modules.try_get(name); if(ext_mod == nullptr){ Str source; auto it2 = _lazy_modules.find(name); @@ -313,7 +311,7 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->push(new_mod); new_mod->attr()._try_perfect_rehash(); }else{ - frame->push(*ext_mod); + frame->push(ext_mod); } } continue; case OP_STORE_ALL_NAMES: { @@ -326,8 +324,8 @@ inline PyObject* VM::run_frame(Frame* frame){ }; continue; case OP_YIELD_VALUE: return _py_op_yield; // TODO: using "goto" inside with block may cause __exit__ not called - case OP_WITH_ENTER: call(frame->pop_value(this), __enter__); continue; - case OP_WITH_EXIT: call(frame->pop_value(this), __exit__); continue; + case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); continue; + case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); continue; case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); continue; case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); continue; default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); diff --git a/src/codeobject.h b/src/codeobject.h index 4778955c..92f4ba70 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -26,16 +26,11 @@ inline const char* OP_NAMES[] = { struct Bytecode{ uint8_t op; + uint16_t block; int arg; int line; - uint16_t block; }; -inline Str pad(const Str& s, const int n){ - if(s.size() >= n) return s.substr(0, n); - return s + std::string(n - s.size(), ' '); -} - enum CodeBlockType { NO_BLOCK, FOR_LOOP, @@ -49,19 +44,14 @@ struct CodeBlock { int parent; // parent index in blocks int start; // start index of this block in codes, inclusive int end; // end index of this block in codes, exclusive - - std::string to_string() const { - if(parent == -1) return ""; - return "[B:" + std::to_string(type) + "]"; - } }; struct CodeObject { - shared_ptr src; + std::shared_ptr src; Str name; bool is_generator = false; - CodeObject(shared_ptr src, Str name) { + CodeObject(std::shared_ptr src, Str name) { this->src = src; this->name = name; } diff --git a/src/common.h b/src/common.h index 9b2cf7aa..3f69ac1e 100644 --- a/src/common.h +++ b/src/common.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include #define PK_VERSION "0.9.5" #define PK_EXTRA_CHECK 0 @@ -54,20 +54,17 @@ namespace pkpy{ namespace std = ::std; -struct Dummy { }; -struct DummyInstance { }; +struct Dummy { }; +struct DummyInstance { }; struct DummyModule { }; struct Type { int index; Type(): index(-1) {} Type(int index): index(index) {} - bool operator==(Type other) const noexcept { - return this->index == other.index; - } - bool operator!=(Type other) const noexcept { - return this->index != other.index; - } + bool operator==(Type other) const noexcept { return this->index == other.index; } + bool operator!=(Type other) const noexcept { return this->index != other.index; } + operator int() const noexcept { return this->index; } }; //#define THREAD_LOCAL thread_local diff --git a/src/compiler.h b/src/compiler.h index 764863cb..85f7a733 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -35,7 +35,7 @@ public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ this->vm = vm; this->parser = std::make_unique( - make_sp(source, filename, mode) + std::make_shared(source, filename, mode) ); // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ @@ -394,7 +394,7 @@ private: _compile_f_args(func, false); consume(TK(":")); } - func.code = make_sp(parser->src, func.name.str()); + func.code = std::make_shared(parser->src, func.name.str()); this->codes.push(func.code); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; emit(OP_RETURN_VALUE); @@ -711,7 +711,7 @@ private: int emit(Opcode opcode, int arg=-1, bool keepline=false) { int line = parser->prev.line; co()->codes.push_back( - Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i} + Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line} ); int i = co()->codes.size() - 1; if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; @@ -1090,7 +1090,7 @@ private: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = make_sp(parser->src, func.name.str()); + func.code = std::make_shared(parser->src, func.name.str()); this->codes.push(func.code); compile_block_body(); func.code->optimize(vm); @@ -1154,7 +1154,7 @@ public: if(used) UNREACHABLE(); used = true; - CodeObject_ code = make_sp(parser->src, Str("")); + CodeObject_ code = std::make_shared(parser->src, Str("")); codes.push(code); lex_token(); lex_token(); diff --git a/src/frame.h b/src/frame.h index f2b1a2b0..37848a11 100644 --- a/src/frame.h +++ b/src/frame.h @@ -21,7 +21,7 @@ struct Frame { NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); } NameDict& f_globals() noexcept { return _module->attr(); } - PyObject** f_closure_try_get(StrName name) noexcept { + PyObject* f_closure_try_get(StrName name) noexcept { if(_closure == nullptr) return nullptr; return _closure->try_get(name); } diff --git a/src/io.h b/src/io.h index b5ea9f43..a5fbb614 100644 --- a/src/io.h +++ b/src/io.h @@ -157,10 +157,10 @@ inline void add_module_os(VM* vm){ #else namespace pkpy{ -void add_module_io(VM* vm){} -void add_module_os(VM* vm){} +inline void add_module_io(VM* vm){} +inline void add_module_os(VM* vm){} -Str _read_file_cwd(const Str& name, bool* ok){ +inline Str _read_file_cwd(const Str& name, bool* ok){ *ok = false; return Str(); } diff --git a/src/memory.h b/src/memory.h index 404a423b..1b4eab16 100644 --- a/src/memory.h +++ b/src/memory.h @@ -4,79 +4,12 @@ namespace pkpy{ -template -struct shared_ptr { - int* counter; -#define _t() (T*)(counter + 1) -#define _inc_counter() if(counter) ++(*counter) -#define _dec_counter() if(counter && --(*counter) == 0) {((T*)(counter + 1))->~T(); free(counter);} - -public: - shared_ptr() : counter(nullptr) {} - shared_ptr(int* counter) : counter(counter) {} - shared_ptr(const shared_ptr& other) : counter(other.counter) { - _inc_counter(); - } - shared_ptr(shared_ptr&& other) noexcept : counter(other.counter) { - other.counter = nullptr; - } - ~shared_ptr() { _dec_counter(); } - - bool operator==(const shared_ptr& other) const { return counter == other.counter; } - bool operator!=(const shared_ptr& other) const { return counter != other.counter; } - bool operator<(const shared_ptr& other) const { return counter < other.counter; } - bool operator>(const shared_ptr& other) const { return counter > other.counter; } - bool operator<=(const shared_ptr& other) const { return counter <= other.counter; } - bool operator>=(const shared_ptr& other) const { return counter >= other.counter; } - bool operator==(std::nullptr_t) const { return counter == nullptr; } - bool operator!=(std::nullptr_t) const { return counter != nullptr; } - - shared_ptr& operator=(const shared_ptr& other) { - _dec_counter(); - counter = other.counter; - _inc_counter(); - return *this; - } - - shared_ptr& operator=(shared_ptr&& other) noexcept { - _dec_counter(); - counter = other.counter; - other.counter = nullptr; - return *this; - } - - T& operator*() const { return *_t(); } - T* operator->() const { return _t(); } - T* get() const { return _t(); } - - int use_count() const { - return counter ? *counter : 0; - } - - void reset(){ - _dec_counter(); - counter = nullptr; - } -}; - -#undef _t -#undef _inc_counter -#undef _dec_counter - - template - shared_ptr make_sp(Args&&... args) { - int* p = (int*)malloc(sizeof(int) + sizeof(T)); - *p = 1; - new(p+1) T(std::forward(args)...); - return shared_ptr(p); - } - -template -struct SmallArrayPool { +template +struct FreeListA { std::vector buckets[__Bucket+1]; T* alloc(int n){ - if(n == 0) return nullptr; + if constexpr(__ZeroCheck) if(n == 0) return nullptr; if(n > __Bucket || buckets[n].empty()){ return new T[n]; }else{ @@ -87,7 +20,7 @@ struct SmallArrayPool { } void dealloc(T* p, int n){ - if(n == 0) return; + if constexpr(__ZeroCheck) if(n == 0) return; if(n > __Bucket || buckets[n].size() >= __BucketSize){ delete[] p; }else{ @@ -95,10 +28,11 @@ struct SmallArrayPool { } } - ~SmallArrayPool(){ - for(int i=1; i<=__Bucket; i++){ - for(auto p: buckets[i]) delete[] p; + ~FreeListA(){ + for(int i=0; i<=__Bucket; i++){ + for(T* p : buckets[i]) delete[] p; } } }; + }; // namespace pkpy diff --git a/src/namedict.h b/src/namedict.h index 130b60f6..888980c3 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -6,40 +6,7 @@ namespace pkpy{ -const int kNameDictNodeSize = sizeof(StrName) + sizeof(void*); - -template -struct DictArrayPool { - std::vector buckets[__Bucket+1]; - - StrName* alloc(uint16_t n){ - StrName* _keys; - if(n > __Bucket || buckets[n].empty()){ - _keys = (StrName*)malloc(kNameDictNodeSize * n); - memset((void*)_keys, 0, kNameDictNodeSize * n); - }else{ - _keys = buckets[n].back(); - memset((void*)_keys, 0, sizeof(StrName) * n); - buckets[n].pop_back(); - } - return _keys; - } - - void dealloc(StrName* head, uint16_t n){ - if(n > __Bucket || buckets[n].size() >= __BucketSize){ - free(head); - }else{ - buckets[n].push_back(head); - } - } - - ~DictArrayPool(){ - // let it leak, since this object is static - } -}; - const std::vector kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117}; -static DictArrayPool<32> _dict_pool; inline static uint16_t find_next_capacity(uint16_t n){ uint16_t x = 2; @@ -66,75 +33,61 @@ inline static uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vect } struct NameDict { + using Item = std::pair; + inline static FreeListA _pool; + uint16_t _capacity; uint16_t _size; float _load_factor; uint16_t _hash_seed; uint16_t _mask; - StrName* _keys; - - PyObject*& value(uint16_t i){ - return reinterpret_cast(_keys + _capacity)[i]; - } - - PyObject* value(uint16_t i) const { - return reinterpret_cast(_keys + _capacity)[i]; - } + Item* _items; NameDict(uint16_t capacity=2, float load_factor=0.67, uint16_t hash_seed=kHashSeeds[0]): _capacity(capacity), _size(0), _load_factor(load_factor), _hash_seed(hash_seed), _mask(capacity-1) { - _keys = _dict_pool.alloc(capacity); - } + _items = _pool.alloc(_capacity); + } NameDict(const NameDict& other) { memcpy(this, &other, sizeof(NameDict)); - _keys = _dict_pool.alloc(_capacity); + _items = _pool.alloc(_capacity); for(int i=0; i<_capacity; i++){ - _keys[i] = other._keys[i]; - value(i) = other.value(i); + _items[i] = other._items[i]; } } NameDict& operator=(const NameDict& other) { - _dict_pool.dealloc(_keys, _capacity); + _pool.dealloc(_items, _capacity); memcpy(this, &other, sizeof(NameDict)); - _keys = _dict_pool.alloc(_capacity); + _items = _pool.alloc(_capacity); for(int i=0; i<_capacity; i++){ - _keys[i] = other._keys[i]; - value(i) = other.value(i); + _items[i] = other._items[i]; } return *this; } - ~NameDict(){ _dict_pool.dealloc(_keys, _capacity); } + ~NameDict(){ _pool.dealloc(_items, _capacity); } NameDict(NameDict&&) = delete; NameDict& operator=(NameDict&&) = delete; uint16_t size() const { return _size; } -#define HASH_PROBE(key, ok, i) \ -ok = false; \ -i = _hash(key, _mask, _hash_seed); \ -while(!_keys[i].empty()) { \ - if(_keys[i] == (key)) { ok = true; break; } \ - i = (i + 1) & _mask; \ +#define HASH_PROBE(key, ok, i) \ +ok = false; \ +i = _hash(key, _mask, _hash_seed); \ +while(!_items[i].first.empty()) { \ + if(_items[i].first == (key)) { ok = true; break; } \ + i = (i + 1) & _mask; \ } PyObject* operator[](StrName key) const { bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); - return value(i); + return _items[i].second; } - // PyObject*& get(StrName key){ - // bool ok; uint16_t i; - // HASH_PROBE(key, ok, i); - // if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); - // return value(i); - // } - template void set(StrName key, T&& val){ bool ok; uint16_t i; @@ -145,29 +98,27 @@ while(!_keys[i].empty()) { \ _rehash(true); HASH_PROBE(key, ok, i); } - _keys[i] = key; + _items[i].first = key; } - value(i) = std::forward(val); + _items[i].second = std::forward(val); } void _rehash(bool resize){ - StrName* old_keys = _keys; - PyObject** old_values = &value(0); + Item* old_items = _items; uint16_t old_capacity = _capacity; if(resize){ _capacity = find_next_capacity(_capacity * 2); _mask = _capacity - 1; } - _keys = _dict_pool.alloc(_capacity); + _items = _pool.alloc(_capacity); for(uint16_t i=0; i> items() const { - std::vector> v; + std::vector items() const { + std::vector v; for(uint16_t i=0; i<_capacity; i++){ - if(_keys[i].empty()) continue; - v.push_back(std::make_pair(_keys[i], value(i))); + if(_items[i].first.empty()) continue; + v.push_back(_items[i]); } return v; } @@ -223,16 +175,16 @@ while(!_keys[i].empty()) { \ std::vector keys() const { std::vector v; for(uint16_t i=0; i<_capacity; i++){ - if(_keys[i].empty()) continue; - v.push_back(_keys[i]); + if(_items[i].first.empty()) continue; + v.push_back(_items[i].first); } return v; } void apply_v(void(*f)(PyObject*)) { for(uint16_t i=0; i<_capacity; i++){ - if(_keys[i].empty()) continue; - f(value(i)); + if(_items[i].first.empty()) continue; + f(_items[i].second); } } #undef HASH_PROBE diff --git a/src/obj.h b/src/obj.h index 8abcea95..48d358ee 100644 --- a/src/obj.h +++ b/src/obj.h @@ -13,8 +13,8 @@ struct BaseRef; class VM; typedef std::function NativeFuncRaw; -typedef shared_ptr CodeObject_; -typedef shared_ptr NameDict_; +typedef std::shared_ptr CodeObject_; +typedef std::shared_ptr NameDict_; struct NativeFunc { NativeFuncRaw f; diff --git a/src/parser.h b/src/parser.h index da36b888..fd281c6c 100644 --- a/src/parser.h +++ b/src/parser.h @@ -95,7 +95,7 @@ enum Precedence { // The context of the parsing phase for the compiler. struct Parser { - shared_ptr src; + std::shared_ptr src; const char* token_start; const char* curr_char; @@ -290,7 +290,7 @@ struct Parser { else set_next_token(one); } - Parser(shared_ptr src) { + Parser(std::shared_ptr src) { this->src = src; this->token_start = src->source; this->curr_char = src->source; diff --git a/src/pocketpy.h b/src/pocketpy.h index 708bae5a..a2e3d8fa 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -514,7 +514,7 @@ inline void init_builtins(VM* _vm) { /************ PyTuple ************/ _vm->bind_static_method<1>("tuple", "__new__", [](VM* vm, Args& args) { List list = CAST(List, vm->asList(args[0])); - return VAR(Tuple::from_list(std::move(list))); + return VAR(Tuple(std::move(list))); }); _vm->bind_method<0>("tuple", "__iter__", [](VM* vm, Args& args) { @@ -529,7 +529,7 @@ inline void init_builtins(VM* _vm) { s.normalize(self.size()); List new_list; for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]); - return VAR(Tuple::from_list(std::move(new_list))); + return VAR(Tuple(std::move(new_list))); } int index = CAST(int, args[1]); @@ -601,7 +601,7 @@ inline void add_module_json(VM* vm){ return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); }); - vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__))); + vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__, no_arg()))); } inline void add_module_math(VM* vm){ @@ -850,10 +850,10 @@ extern "C" { /// Return `__repr__` of the result. /// If the variable is not found, return `nullptr`. char* pkpy_vm_get_global(pkpy::VM* vm, const char* name){ - pkpy::PyObject** val = vm->_main->attr().try_get(name); + pkpy::PyObject* val = vm->_main->attr().try_get(name); if(val == nullptr) return nullptr; try{ - pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(*val)); + pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(val)); return strdup(repr.c_str()); }catch(...){ return nullptr; @@ -955,7 +955,7 @@ extern "C" { ss << f_header; for(int i=0; icall(args[i], pkpy::__json__); + pkpy::PyObject* x = vm->call(args[i], pkpy::__json__, pkpy::no_arg()); ss << pkpy::CAST(pkpy::Str&, x); } char* packet = strdup(ss.str().c_str()); diff --git a/src/ref.h b/src/ref.h index dbbf9f0d..4bcb3b3e 100644 --- a/src/ref.h +++ b/src/ref.h @@ -19,15 +19,15 @@ struct NameRef : BaseRef { NameRef(const std::pair& pair) : pair(pair) {} PyObject* get(VM* vm, Frame* frame) const{ - PyObject** val; + PyObject* val; val = frame->f_locals().try_get(name()); - if(val != nullptr) return *val; + if(val != nullptr) return val; val = frame->f_closure_try_get(name()); - if(val != nullptr) return *val; + if(val != nullptr) return val; val = frame->f_globals().try_get(name()); - if(val != nullptr) return *val; + if(val != nullptr) return val; val = vm->builtins->attr().try_get(name()); - if(val != nullptr) return *val; + if(val != nullptr) return val; vm->NameError(name()); return nullptr; } diff --git a/src/tuplelist.h b/src/tuplelist.h index 76732ed9..8143dda8 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -3,91 +3,84 @@ #include "common.h" #include "memory.h" #include "str.h" -#include namespace pkpy { - using List = std::vector; - class Args { - static THREAD_LOCAL SmallArrayPool _pool; +using List = std::vector; - PyObject** _args; - int _size; +class Args { + inline static THREAD_LOCAL FreeListA _pool; - void _alloc(int n){ - this->_args = _pool.alloc(n); - this->_size = n; - } + PyObject** _args; + int _size; - public: - Args(int n){ _alloc(n); } - - Args(const Args& other){ - _alloc(other._size); - for(int i=0; i<_size; i++) _args[i] = other._args[i]; - } - - Args(Args&& other) noexcept { - this->_args = other._args; - this->_size = other._size; - other._args = nullptr; - other._size = 0; - } - - Args(std::initializer_list list) : Args(list.size()){ - int i=0; - for(auto& p : list) _args[i++] = p; - } - - static pkpy::Args from_list(List&& other) noexcept { - Args ret(other.size()); - memcpy((void*)ret._args, (void*)other.data(), sizeof(void*)*ret.size()); - memset((void*)other.data(), 0, sizeof(void*)*ret.size()); - other.clear(); - return ret; - } - - PyObject*& operator[](int i){ return _args[i]; } - PyObject* operator[](int i) const { return _args[i]; } - - Args& operator=(Args&& other) noexcept { - _pool.dealloc(_args, _size); - this->_args = other._args; - this->_size = other._size; - other._args = nullptr; - other._size = 0; - return *this; - } - - int size() const { return _size; } - - List move_to_list() noexcept { - List ret(_size); - memcpy((void*)ret.data(), (void*)_args, sizeof(void*)*_size); - memset((void*)_args, 0, sizeof(void*)*_size); - return ret; - } - - void extend_self(PyObject* self){ - PyObject** old_args = _args; - int old_size = _size; - _alloc(old_size+1); - _args[0] = self; - if(old_size == 0) return; - - memcpy((void*)(_args+1), (void*)old_args, sizeof(void*)*old_size); - memset((void*)old_args, 0, sizeof(void*)*old_size); - _pool.dealloc(old_args, old_size); - } - - ~Args(){ _pool.dealloc(_args, _size); } - }; - - inline const Args& no_arg() { - static const Args _zero(0); - return _zero; + void _alloc(int n){ + this->_args = _pool.alloc(n); + this->_size = n; } - typedef Args Tuple; - inline THREAD_LOCAL SmallArrayPool Args::_pool; +public: + Args(int n){ _alloc(n); } + + Args(const Args& other){ + _alloc(other._size); + for(int i=0; i<_size; i++) _args[i] = other._args[i]; + } + + Args(Args&& other) noexcept { + this->_args = other._args; + this->_size = other._size; + other._args = nullptr; + other._size = 0; + } + + Args(std::initializer_list list) : Args(list.size()){ + int i = 0; + for(PyObject* p : list) _args[i++] = p; + } + + Args(List&& other) noexcept : Args(other.size()){ + for(int i=0; i<_size; i++) _args[i] = other[i]; + other.clear(); + } + + PyObject*& operator[](int i){ return _args[i]; } + PyObject* operator[](int i) const { return _args[i]; } + + Args& operator=(Args&& other) noexcept { + _pool.dealloc(_args, _size); + this->_args = other._args; + this->_size = other._size; + other._args = nullptr; + other._size = 0; + return *this; + } + + int size() const { return _size; } + + List to_list() noexcept { + List ret(_size); + for(int i=0; i<_size; i++) ret[i] = _args[i]; + return ret; + } + + void extend_self(PyObject* self){ + PyObject** old_args = _args; + int old_size = _size; + _alloc(old_size+1); + _args[0] = self; + for(int i=0; i inline ctype py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> inline ctype _py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - template<> inline ctype& py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype& py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ + template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ inline PyObject* py_var(VM* vm, const ctype& value) { return vm->gcnew(vm->ptype, value);} \ inline PyObject* py_var(VM* vm, ctype&& value) { return vm->gcnew(vm->ptype, std::move(value));} + class Generator: public BaseIter { std::unique_ptr frame; int state; // 0,1,2 @@ -61,6 +64,7 @@ public: PyObject* False; PyObject* Ellipsis; + // managed by _modules, need_gc=false PyObject* builtins; // builtins module PyObject* _main; // __main__ module @@ -83,12 +87,6 @@ public: init_builtin_types(); } - PyObject* asStr(PyObject* obj){ - PyObject* f = getattr(obj, __str__, false, true); - if(f != nullptr) return call(f); - return asRepr(obj); - } - Frame* top_frame() const { #if PK_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); @@ -96,10 +94,16 @@ public: return callstack.top().get(); } + PyObject* asStr(PyObject* obj){ + PyObject* f = getattr(obj, __str__, false, true); + if(f != nullptr) return call(f, no_arg()); + return asRepr(obj); + } + PyObject* asIter(PyObject* obj){ if(is_type(obj, tp_iterator)) return obj; PyObject* iter_f = getattr(obj, __iter__, false, true); - if(iter_f != nullptr) return call(iter_f); + if(iter_f != nullptr) return call(iter_f, no_arg()); TypeError(OBJ_NAME(_t(obj)).escape(true) + " object is not iterable"); return nullptr; } @@ -109,15 +113,15 @@ public: return call(_t(tp_list), Args{iterable}); } - PyObject** find_name_in_mro(PyObject* cls, StrName name){ - PyObject** val; + PyObject* find_name_in_mro(PyObject* cls, StrName name){ + PyObject* val; do{ val = cls->attr().try_get(name); if(val != nullptr) return val; Type cls_t = static_cast*>(cls)->_value; - Type base = _all_types[cls_t.index].base; + Type base = _all_types[cls_t].base; if(base.index == -1) break; - cls = _all_types[base.index].obj; + cls = _all_types[base].obj; }while(true); return nullptr; } @@ -126,7 +130,7 @@ public: Type obj_t = OBJ_GET(Type, _t(obj)); do{ if(obj_t == cls_t) return true; - Type base = _all_types[obj_t.index].base; + Type base = _all_types[obj_t].base; if(base.index == -1) break; obj_t = base; }while(true); @@ -134,8 +138,8 @@ public: } PyObject* fast_call(StrName name, Args&& args){ - PyObject** val = find_name_in_mro(_t(args[0]), name); - if(val != nullptr) return call(*val, std::move(args)); + PyObject* val = find_name_in_mro(_t(args[0]), name); + if(val != nullptr) return call(val, std::move(args)); AttributeError(args[0], name); return nullptr; } @@ -147,28 +151,19 @@ public: return obj; } - PyObject* call(PyObject* callable){ - return call(callable, no_arg(), no_arg(), false); - } - template std::enable_if_t, Args>, PyObject*> - call(PyObject* _callable, ArgT&& args){ - return call(_callable, std::forward(args), no_arg(), false); + call(PyObject* callable, ArgT&& args){ + return call(callable, std::forward(args), no_arg(), false); } template std::enable_if_t, Args>, PyObject*> call(PyObject* obj, const StrName name, ArgT&& args){ - return call(getattr(obj, name, true, true), std::forward(args), no_arg(), false); + PyObject* callable = getattr(obj, name, true, true); + return call(callable, std::forward(args), no_arg(), false); } - PyObject* call(PyObject* obj, StrName name){ - return call(getattr(obj, name, true, true), no_arg(), no_arg(), false); - } - - - // repl mode is only for setting `frame->id` to 0 PyObject* exec(Str source, Str filename, CompileMode mode, PyObject* _module=nullptr){ if(_module == nullptr) _module = _main; try { @@ -205,7 +200,6 @@ public: } PyObject* new_type_object(PyObject* mod, StrName name, Type base){ - // use gcnew PyObject* obj = new Py_(tp_type, _all_types.size()); PyTypeInfo info{ .obj = obj, @@ -223,12 +217,12 @@ public: } PyObject* _find_type(const Str& type){ - PyObject** obj = builtins->attr().try_get(type); - if(!obj){ + PyObject* obj = builtins->attr().try_get(type); + if(obj == nullptr){ for(auto& t: _all_types) if(t.name == type) return t.obj; throw std::runtime_error("type not found: " + type); } - return *obj; + return obj; } template @@ -294,7 +288,6 @@ public: else throw UnhandledException(); } -public: void IOError(const Str& msg) { _error("IOError", msg); } void NotImplementedError(){ _error("NotImplementedError", ""); } void TypeError(const Str& msg){ _error("TypeError", msg); } @@ -332,7 +325,6 @@ public: } CodeObject_ compile(Str source, Str filename, CompileMode mode); - void post_init(); PyObject* num_negated(PyObject* obj); f64 num_to_float(PyObject* obj); PyObject* asBool(PyObject* obj); @@ -341,21 +333,20 @@ public: PyObject* new_module(StrName name); Str disassemble(CodeObject_ co); void init_builtin_types(); - PyObject* call(PyObject* _callable, Args args, const Args& kwargs, bool opCall); + PyObject* call(PyObject* callable, Args args, const Args& kwargs, bool opCall); void unpack_args(Args& args); PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true, bool class_only=false); template void setattr(PyObject* obj, StrName name, T&& value); template - void bind_method(PyObject* obj, Str funcName, NativeFuncRaw fn); + void bind_method(PyObject*, Str, NativeFuncRaw); template - void bind_func(PyObject* obj, Str funcName, NativeFuncRaw fn); - void _error(Exception e); + void bind_func(PyObject*, Str, NativeFuncRaw); + void _error(Exception); PyObject* _exec(); - - template - PyObject* PyRef(P&& value); + template PyObject* PyRef(P&&); const BaseRef* PyRef_AS_C(PyObject* obj); + void post_init(); }; inline PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ @@ -418,13 +409,13 @@ DEF_NATIVE_2(Slice, tp_slice) DEF_NATIVE_2(Exception, tp_exception) DEF_NATIVE_2(StarWrapper, tp_star_wrapper) -#define PY_CAST_INT(T) \ -template<> inline T py_cast(VM* vm, PyObject* obj){ \ - vm->check_type(obj, vm->tp_int); \ - return (T)(BITS(obj) >> 2); \ -} \ -template<> inline T _py_cast(VM* vm, PyObject* obj){ \ - return (T)(BITS(obj) >> 2); \ +#define PY_CAST_INT(T) \ +template<> inline T py_cast(VM* vm, PyObject* obj){ \ + vm->check_type(obj, vm->tp_int); \ + return (T)(BITS(obj) >> 2); \ +} \ +template<> inline T _py_cast(VM* vm, PyObject* obj){ \ + return (T)(BITS(obj) >> 2); \ } PY_CAST_INT(char) @@ -463,10 +454,10 @@ template<> inline double _py_cast(VM* vm, PyObject* obj){ } -#define PY_VAR_INT(T) \ - inline PyObject* py_var(VM* vm, T _val){ \ - i64 val = static_cast(_val); \ - if(((val << 2) >> 2) != val){ \ +#define PY_VAR_INT(T) \ + inline PyObject* py_var(VM* vm, T _val){ \ + i64 val = static_cast(_val); \ + if(((val << 2) >> 2) != val){ \ vm->_error("OverflowError", std::to_string(val) + " is out of range"); \ } \ val = (val << 2) | 0b01; \ @@ -485,9 +476,9 @@ PY_VAR_INT(unsigned long) PY_VAR_INT(unsigned long long) #define PY_VAR_FLOAT(T) \ - inline PyObject* py_var(VM* vm, T _val){ \ + inline PyObject* py_var(VM* vm, T _val){ \ f64 val = static_cast(_val); \ - i64 bits = BitsCvt(val)._int; \ + i64 bits = BitsCvt(val)._int; \ bits = (bits >> 2) << 2; \ bits |= 0b10; \ return reinterpret_cast(bits); \ @@ -546,9 +537,9 @@ inline PyObject* VM::asBool(PyObject* obj){ if(obj == None) return False; if(is_type(obj, tp_int)) return VAR(CAST(i64, obj) != 0); if(is_type(obj, tp_float)) return VAR(CAST(f64, obj) != 0.0); - PyObject* len_fn = getattr(obj, __len__, false, true); - if(len_fn != nullptr){ - PyObject* ret = call(len_fn); + PyObject* len_f = getattr(obj, __len__, false, true); + if(len_f != nullptr){ + PyObject* ret = call(len_f, no_arg()); return VAR(CAST(i64, ret) > 0); } return True; @@ -578,7 +569,7 @@ inline i64 VM::hash(PyObject* obj){ } inline PyObject* VM::asRepr(PyObject* obj){ - return call(obj, __repr__); + return call(obj, __repr__, no_arg()); } inline PyObject* VM::new_module(StrName name) { @@ -592,6 +583,11 @@ inline PyObject* VM::new_module(StrName name) { } inline Str VM::disassemble(CodeObject_ co){ + auto pad = [](const Str& s, const int n){ + if(s.size() >= n) return s.substr(0, n); + return s + std::string(n - s.size(), ' '); + }; + std::vector jumpTargets; for(auto byte : co->codes){ if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_SAFE_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ @@ -633,8 +629,9 @@ inline Str VM::disassemble(CodeObject_ co){ auto& x = co->names[(byte.arg >> 16) & 0xFFFF]; argStr += " (" + a.first.str() + '[' + x.first.str() + "])"; } - ss << pad(argStr, 20); // may overflow - ss << co->blocks[byte.block].to_string(); + ss << argStr; + // ss << pad(argStr, 20); // may overflow + // ss << co->blocks[byte.block].to_string(); if(i != co->codes.size() - 1) ss << '\n'; } StrStream consts; @@ -661,13 +658,11 @@ inline Str VM::disassemble(CodeObject_ co){ } inline void VM::init_builtin_types(){ - PyObject* _tp_object = new Py_(Type(1), Type(0)); - PyObject* _tp_type = new Py_(Type(1), Type(1)); // PyTypeObject is managed by _all_types // PyModuleObject is managed by _modules // They are not managed by GC, so we use a simple "new" - _all_types.push_back({.obj = _tp_object, .base = -1, .name = "object"}); - _all_types.push_back({.obj = _tp_type, .base = 0, .name = "type"}); + _all_types.push_back({.obj = new Py_(Type(1), Type(0)), .base = -1, .name = "object"}); + _all_types.push_back({.obj = new Py_(Type(1), Type(1)), .base = 0, .name = "type"}); tp_object = 0; tp_type = 1; tp_int = _new_type_object("int"); @@ -683,7 +678,6 @@ inline void VM::init_builtin_types(){ tp_module = _new_type_object("module"); tp_ref = _new_type_object("_ref"); tp_star_wrapper = _new_type_object("_star_wrapper"); - tp_function = _new_type_object("function"); tp_native_function = _new_type_object("native_function"); tp_iterator = _new_type_object("iterator"); @@ -697,6 +691,7 @@ inline void VM::init_builtin_types(){ this->False = new Py_(tp_bool, {}); this->_py_op_call = new Py_(_new_type_object("_py_op_call"), {}); this->_py_op_yield = new Py_(_new_type_object("_py_op_yield"), {}); + this->builtins = new_module("builtins"); this->_main = new_module("__main__"); @@ -712,19 +707,16 @@ inline void VM::init_builtin_types(){ builtins->attr().set("range", _t(tp_range)); post_init(); - for(int i=0; i<_all_types.size(); i++){ - auto& t = _all_types[i]; - t.obj->attr()._try_perfect_rehash(); - } + for(auto& t: _all_types) t.obj->attr()._try_perfect_rehash(); for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ if(is_type(callable, tp_type)){ - PyObject** new_f = callable->attr().try_get(__new__); + PyObject* new_f = callable->attr().try_get(__new__); PyObject* obj; if(new_f != nullptr){ - obj = call(*new_f, std::move(args), kwargs, false); + obj = call(new_f, std::move(args), kwargs, false); }else{ obj = gcnew(OBJ_GET(Type, callable), {}); PyObject* init_f = getattr(obj, __init__, false, true); @@ -745,7 +737,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo return f(this, args); } else if(is_type(callable, tp_function)){ const Function& fn = CAST(Function&, callable); - NameDict_ locals = make_sp( + NameDict_ locals = std::make_shared( fn.code->perfect_locals_capacity, kLocalsLoadFactor, fn.code->perfect_hash_seed @@ -754,7 +746,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo int i = 0; for(StrName name : fn.args){ if(i < args.size()){ - locals->set(name, std::move(args[i++])); + locals->set(name, args[i++]); continue; } TypeError("missing positional argument " + name.str().escape(true)); @@ -764,12 +756,12 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo if(!fn.starred_arg.empty()){ List vargs; // handle *args - while(i < args.size()) vargs.push_back(std::move(args[i++])); - locals->set(fn.starred_arg, VAR(Tuple::from_list(std::move(vargs)))); + while(i < args.size()) vargs.push_back(args[i++]); + locals->set(fn.starred_arg, VAR(Tuple(std::move(vargs)))); }else{ for(StrName key : fn.kwargs_order){ if(i < args.size()){ - locals->set(key, std::move(args[i++])); + locals->set(key, args[i++]); }else{ break; } @@ -806,14 +798,13 @@ inline void VM::unpack_args(Args& args){ if(is_type(args[i], tp_star_wrapper)){ auto& star = _CAST(StarWrapper&, args[i]); if(!star.rvalue) UNREACHABLE(); - PyObject* list = asList(star.obj); - List& list_c = CAST(List&, list); - unpacked.insert(unpacked.end(), list_c.begin(), list_c.end()); + List& list = CAST(List&, asList(star.obj)); + unpacked.insert(unpacked.end(), list.begin(), list.end()); }else{ unpacked.push_back(args[i]); } } - args = Args::from_list(std::move(unpacked)); + args = Args(std::move(unpacked)); } using Super = std::pair; @@ -827,23 +818,23 @@ inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool c obj = super.first; objtype = _t(super.second); } - PyObject** cls_var = find_name_in_mro(objtype, name); + PyObject* cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor - PyObject** descr_get = _t(*cls_var)->attr().try_get(__get__); - if(descr_get != nullptr) return call(*descr_get, Args{*cls_var, obj}); + PyObject* descr_get = _t(cls_var)->attr().try_get(__get__); + if(descr_get != nullptr) return call(descr_get, Args{cls_var, obj}); } // handle instance __dict__ if(!class_only && !is_tagged(obj) && obj->is_attr_valid()){ - PyObject** val = obj->attr().try_get(name); - if(val != nullptr) return *val; + PyObject* val = obj->attr().try_get(name); + if(val != nullptr) return val; } if(cls_var != nullptr){ // bound method is non-data descriptor - if(is_type(*cls_var, tp_function) || is_type(*cls_var, tp_native_function)){ - return VAR(BoundMethod(obj, *cls_var)); + if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ + return VAR(BoundMethod(obj, cls_var)); } - return *cls_var; + return cls_var; } if(throw_err) AttributeError(obj, name); return nullptr; @@ -859,14 +850,14 @@ inline void VM::setattr(PyObject* obj, StrName name, T&& value){ obj = super.first; objtype = _t(super.second); } - PyObject** cls_var = find_name_in_mro(objtype, name); + PyObject* cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor - PyObject* cls_var_t = _t(*cls_var); + PyObject* cls_var_t = _t(cls_var); if(cls_var_t->attr().contains(__get__)){ - PyObject** descr_set = cls_var_t->attr().try_get(__set__); + PyObject* descr_set = cls_var_t->attr().try_get(__set__); if(descr_set != nullptr){ - call(*descr_set, Args{*cls_var, obj, std::forward(value)}); + call(descr_set, Args{cls_var, obj, std::forward(value)}); }else{ TypeError("readonly attribute: " + name.str().escape(true)); } From 6ea82f01fd50b60e5ad4953441965e049fe0d49e Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 21:56:05 +0800 Subject: [PATCH 08/73] update gc --- src/codeobject.h | 4 +-- src/compiler.h | 8 +++--- src/memory.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ src/obj.h | 4 +-- src/parser.h | 4 +-- src/vm.h | 2 +- 6 files changed, 75 insertions(+), 11 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index 92f4ba70..98fdb47f 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -47,11 +47,11 @@ struct CodeBlock { }; struct CodeObject { - std::shared_ptr src; + shared_ptr src; Str name; bool is_generator = false; - CodeObject(std::shared_ptr src, Str name) { + CodeObject(shared_ptr src, Str name) { this->src = src; this->name = name; } diff --git a/src/compiler.h b/src/compiler.h index 85f7a733..c20cc606 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -35,7 +35,7 @@ public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ this->vm = vm; this->parser = std::make_unique( - std::make_shared(source, filename, mode) + make_sp(source, filename, mode) ); // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ @@ -394,7 +394,7 @@ private: _compile_f_args(func, false); consume(TK(":")); } - func.code = std::make_shared(parser->src, func.name.str()); + func.code = make_sp(parser->src, func.name.str()); this->codes.push(func.code); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; emit(OP_RETURN_VALUE); @@ -1090,7 +1090,7 @@ private: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = std::make_shared(parser->src, func.name.str()); + func.code = make_sp(parser->src, func.name.str()); this->codes.push(func.code); compile_block_body(); func.code->optimize(vm); @@ -1154,7 +1154,7 @@ public: if(used) UNREACHABLE(); used = true; - CodeObject_ code = std::make_shared(parser->src, Str("")); + CodeObject_ code = make_sp(parser->src, Str("")); codes.push(code); lex_token(); lex_token(); diff --git a/src/memory.h b/src/memory.h index 1b4eab16..16e2321c 100644 --- a/src/memory.h +++ b/src/memory.h @@ -4,6 +4,70 @@ namespace pkpy{ +template +struct shared_ptr { + int* counter; + + T* _t() const noexcept { return (T*)(counter + 1); } + void _inc_counter() { if(counter) ++(*counter); } + void _dec_counter() { if(counter && --(*counter) == 0) {((T*)(counter + 1))->~T(); free(counter);} } + +public: + shared_ptr() : counter(nullptr) {} + shared_ptr(int* counter) : counter(counter) {} + shared_ptr(const shared_ptr& other) : counter(other.counter) { + _inc_counter(); + } + shared_ptr(shared_ptr&& other) noexcept : counter(other.counter) { + other.counter = nullptr; + } + ~shared_ptr() { _dec_counter(); } + + bool operator==(const shared_ptr& other) const { return counter == other.counter; } + bool operator!=(const shared_ptr& other) const { return counter != other.counter; } + bool operator<(const shared_ptr& other) const { return counter < other.counter; } + bool operator>(const shared_ptr& other) const { return counter > other.counter; } + bool operator<=(const shared_ptr& other) const { return counter <= other.counter; } + bool operator>=(const shared_ptr& other) const { return counter >= other.counter; } + bool operator==(std::nullptr_t) const { return counter == nullptr; } + bool operator!=(std::nullptr_t) const { return counter != nullptr; } + + shared_ptr& operator=(const shared_ptr& other) { + _dec_counter(); + counter = other.counter; + _inc_counter(); + return *this; + } + + shared_ptr& operator=(shared_ptr&& other) noexcept { + _dec_counter(); + counter = other.counter; + other.counter = nullptr; + return *this; + } + + T& operator*() const { return *_t(); } + T* operator->() const { return _t(); } + T* get() const { return _t(); } + + int use_count() const { + return counter ? *counter : 0; + } + + void reset(){ + _dec_counter(); + counter = nullptr; + } +}; + +template +shared_ptr make_sp(Args&&... args) { + int* p = (int*)malloc(sizeof(int) + sizeof(T)); + *p = 1; + new(p+1) T(std::forward(args)...); + return shared_ptr(p); +} + template struct FreeListA { std::vector buckets[__Bucket+1]; diff --git a/src/obj.h b/src/obj.h index 48d358ee..8abcea95 100644 --- a/src/obj.h +++ b/src/obj.h @@ -13,8 +13,8 @@ struct BaseRef; class VM; typedef std::function NativeFuncRaw; -typedef std::shared_ptr CodeObject_; -typedef std::shared_ptr NameDict_; +typedef shared_ptr CodeObject_; +typedef shared_ptr NameDict_; struct NativeFunc { NativeFuncRaw f; diff --git a/src/parser.h b/src/parser.h index fd281c6c..da36b888 100644 --- a/src/parser.h +++ b/src/parser.h @@ -95,7 +95,7 @@ enum Precedence { // The context of the parsing phase for the compiler. struct Parser { - std::shared_ptr src; + shared_ptr src; const char* token_start; const char* curr_char; @@ -290,7 +290,7 @@ struct Parser { else set_next_token(one); } - Parser(std::shared_ptr src) { + Parser(shared_ptr src) { this->src = src; this->token_start = src->source; this->curr_char = src->source; diff --git a/src/vm.h b/src/vm.h index 75413e71..686d9056 100644 --- a/src/vm.h +++ b/src/vm.h @@ -737,7 +737,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo return f(this, args); } else if(is_type(callable, tp_function)){ const Function& fn = CAST(Function&, callable); - NameDict_ locals = std::make_shared( + NameDict_ locals = make_sp( fn.code->perfect_locals_capacity, kLocalsLoadFactor, fn.code->perfect_hash_seed From 16ef631bfd0b8e12c086435dc555f855a357a57f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 28 Mar 2023 22:11:58 +0800 Subject: [PATCH 09/73] update gc Update ref.h up --- .github/workflows/main.yml | 2 +- src/obj.h | 21 +++++++++------------ src/pocketpy.h | 6 ++++++ src/ref.h | 4 +--- src/vm.h | 11 +++++++++++ 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c01fbb29..033b90a5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -47,7 +47,7 @@ jobs: build_dir: web env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - if: github.event_name == 'push' + if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v3 with: path: output diff --git a/src/obj.h b/src/obj.h index 8abcea95..3a776cf6 100644 --- a/src/obj.h +++ b/src/obj.h @@ -87,6 +87,9 @@ public: virtual ~BaseIter() = default; }; +template struct is_container_gc : std::false_type {}; +template struct is_container_gc> : std::true_type {}; + struct GCHeader { bool enabled; // whether this object is managed by GC bool marked; // whether this object is marked @@ -135,7 +138,7 @@ struct Py_ : PyObject { void mark() override { PyObject::mark(); - // extra mark for `T` + if constexpr (is_container_gc::value) _value._mark(); } }; @@ -174,19 +177,13 @@ union BitsCvt { BitsCvt(f64 val) : _float(val) {} }; -template struct is_py_class : std::false_type {}; +template struct is_py_class : std::false_type {}; template struct is_py_class> : std::true_type {}; -template -void _check_py_class(VM* vm, PyObject* var); - -template -T py_pointer_cast(VM* vm, PyObject* var); - -template -T py_value_cast(VM* vm, PyObject* var); - -struct Discarded {}; +template void _check_py_class(VM*, PyObject*); +template T py_pointer_cast(VM*, PyObject*); +template T py_value_cast(VM*, PyObject*); +struct Discarded { }; template __T py_cast(VM* vm, PyObject* obj) { diff --git a/src/pocketpy.h b/src/pocketpy.h index a2e3d8fa..3f92734e 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -755,6 +755,11 @@ inline void add_module_random(VM* vm){ vm->_exec(code, mod); } +inline void add_module_gc(VM* vm){ + PyObject* mod = vm->new_module("gc"); + vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->gc_collect()))); +} + inline void VM::post_init(){ init_builtins(this); add_module_sys(this); @@ -767,6 +772,7 @@ inline void VM::post_init(){ add_module_io(this); add_module_os(this); add_module_c(this); + add_module_gc(this); for(const char* name: {"this", "functools", "collections", "heapq", "bisect"}){ _lazy_modules[name] = kPythonLibs[name]; diff --git a/src/ref.h b/src/ref.h index 4bcb3b3e..7a6f0310 100644 --- a/src/ref.h +++ b/src/ref.h @@ -99,9 +99,7 @@ struct IndexRef : BaseRef { } void set(VM* vm, Frame* frame, PyObject* val) const{ - Args args(3); - args[0] = obj; args[1] = index; args[2] = std::move(val); - vm->fast_call(__setitem__, std::move(args)); + vm->fast_call(__setitem__, Args{obj, index, val}); } void del(VM* vm, Frame* frame) const{ diff --git a/src/vm.h b/src/vm.h index 686d9056..01bd7289 100644 --- a/src/vm.h +++ b/src/vm.h @@ -144,6 +144,11 @@ public: return nullptr; } + i64 gc_collect(){ + heap.collect(this); + return 0; + } + template PyObject* gcnew(Type type, T&& val){ PyObject* obj = new Py_>(type, std::forward(val)); @@ -931,4 +936,10 @@ inline PyObject* VM::_exec(){ } } +inline std::vector ManagedHeap::get_roots(VM *vm) { + std::vector roots; + // ... + return roots; +} + } // namespace pkpy \ No newline at end of file From 8764a23302d93ef0208491dbddca150f87a00d51 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 10:52:58 +0800 Subject: [PATCH 10/73] update gc --- src/memory.h | 24 +++++++++++++++--------- src/namedict.h | 7 +++---- src/obj.h | 11 ++++------- src/pocketpy.h | 8 ++++---- src/tuplelist.h | 4 ++-- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/memory.h b/src/memory.h index 16e2321c..61f6246d 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,6 +1,7 @@ #pragma once #include "common.h" +#include namespace pkpy{ @@ -68,25 +69,30 @@ shared_ptr make_sp(Args&&... args) { return shared_ptr(p); } -template +template struct FreeListA { std::vector buckets[__Bucket+1]; T* alloc(int n){ - if constexpr(__ZeroCheck) if(n == 0) return nullptr; + static_assert(std::is_standard_layout_v); + T* p; if(n > __Bucket || buckets[n].empty()){ - return new T[n]; + p = (T*)malloc(sizeof(T) * n); }else{ - T* p = buckets[n].back(); + p = buckets[n].back(); buckets[n].pop_back(); - return p; } + if constexpr(__ZeroInit){ + // the constructor of T should be equivalent to zero initialization + memset((void*)p, 0, sizeof(T) * n); + } + return p; } void dealloc(T* p, int n){ - if constexpr(__ZeroCheck) if(n == 0) return; - if(n > __Bucket || buckets[n].size() >= __BucketSize){ - delete[] p; + if(p == nullptr) return; + if(n > __Bucket || buckets[n].size() >= 80){ + free(p); }else{ buckets[n].push_back(p); } @@ -94,7 +100,7 @@ struct FreeListA { ~FreeListA(){ for(int i=0; i<=__Bucket; i++){ - for(T* p : buckets[i]) delete[] p; + for(T* p : buckets[i]) free(p); } } }; diff --git a/src/namedict.h b/src/namedict.h index 888980c3..0cd24e36 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -34,7 +34,7 @@ inline static uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vect struct NameDict { using Item = std::pair; - inline static FreeListA _pool; + inline static THREAD_LOCAL FreeListA _pool; uint16_t _capacity; uint16_t _size; @@ -88,8 +88,7 @@ while(!_items[i].first.empty()) { \ return _items[i].second; } - template - void set(StrName key, T&& val){ + void set(StrName key, PyObject* val){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) { @@ -100,7 +99,7 @@ while(!_items[i].first.empty()) { \ } _items[i].first = key; } - _items[i].second = std::forward(val); + _items[i].second = val; } void _rehash(bool resize){ diff --git a/src/obj.h b/src/obj.h index 3a776cf6..3593b366 100644 --- a/src/obj.h +++ b/src/obj.h @@ -105,12 +105,7 @@ struct PyObject { NameDict& attr() noexcept { return *_attr; } PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; - - virtual void mark() { - if(!gc.enabled || gc.marked) return; - gc.marked = true; - if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); }); - } + virtual void mark() = 0; PyObject(Type type) : type(type) {} virtual ~PyObject() { delete _attr; } @@ -137,7 +132,9 @@ struct Py_ : PyObject { void* value() override { return &_value; } void mark() override { - PyObject::mark(); + if(!gc.enabled || gc.marked) return; + gc.marked = true; + if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); }); if constexpr (is_container_gc::value) _value._mark(); } }; diff --git a/src/pocketpy.h b/src/pocketpy.h index 3f92734e..13502ec0 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -68,7 +68,7 @@ inline void init_builtins(VM* _vm) { if(!vm->isinstance(args[1], type)){ vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); } - Type base = vm->_all_types[type.index].base; + Type base = vm->_all_types[type].base; return vm->gcnew(vm->tp_super, Super(args[1], base)); }); @@ -788,11 +788,11 @@ inline void VM::post_init(){ // property is defined in builtins.py so we need to add it after builtins is loaded _t(tp_object)->attr().set(__class__, property(CPP_LAMBDA(vm->_t(args[0])))); _t(tp_type)->attr().set(__base__, property([](VM* vm, Args& args){ - const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0]).index]; - return info.base.index == -1 ? vm->None : vm->_all_types[info.base.index].obj; + const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])]; + return info.base.index == -1 ? vm->None : vm->_all_types[info.base].obj; })); _t(tp_type)->attr().set(__name__, property([](VM* vm, Args& args){ - const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0]).index]; + const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])]; return VAR(info.name); })); } diff --git a/src/tuplelist.h b/src/tuplelist.h index 8143dda8..4c9cda33 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -9,13 +9,13 @@ namespace pkpy { using List = std::vector; class Args { - inline static THREAD_LOCAL FreeListA _pool; + inline static THREAD_LOCAL FreeListA _pool; PyObject** _args; int _size; void _alloc(int n){ - this->_args = _pool.alloc(n); + this->_args = (n==0) ? nullptr : _pool.alloc(n); this->_size = n; } From 86a290e6c4cae2acc5652c3c839b8b2289d2d0fe Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 10:59:16 +0800 Subject: [PATCH 11/73] up --- amalgamate.py | 2 +- src/main.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/amalgamate.py b/amalgamate.py index b2564c5b..af1a2a2e 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -8,7 +8,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: pipeline = [ ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"], ["obj.h", "parser.h", "codeobject.h", "frame.h"], - ["vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"], + ["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] ] diff --git a/src/main.cpp b/src/main.cpp index 7ac4146f..3de4870f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -69,7 +69,10 @@ int main(int argc, char** argv){ return 1; } std::ifstream file(filepath); - if(!file.is_open()) return 1; + if(!file.is_open()){ + std::cerr << "Failed to open file: " << argv_1 << std::endl; + return 1; + } std::string src((std::istreambuf_iterator(file)), std::istreambuf_iterator()); // set parent path as cwd From 0b2d54f88d2164473eb73fe252e090cbf1d93d70 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 11:03:58 +0800 Subject: [PATCH 12/73] Update main.cpp --- src/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 3de4870f..3c96fd54 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -66,14 +66,15 @@ int main(int argc, char** argv){ filepath = std::filesystem::absolute(filepath); if(!std::filesystem::exists(filepath)){ std::cerr << "File not found: " << argv_1 << std::endl; - return 1; + return 2; } std::ifstream file(filepath); if(!file.is_open()){ std::cerr << "Failed to open file: " << argv_1 << std::endl; - return 1; + return 3; } std::string src((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + file.close(); // set parent path as cwd std::filesystem::current_path(filepath.parent_path()); From 9634e5c40254c3066453ac8a2327a729eeb3df0c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 13:16:45 +0800 Subject: [PATCH 13/73] update gc --- src/codeobject.h | 4 ++ src/common.h | 34 ++++++++++++++++- src/compiler.h | 2 +- src/error.h | 4 +- src/frame.h | 11 ++++++ src/gc.h | 95 ++++++++++++++++++++++++++++++++++++------------ src/iter.h | 17 +++++++++ src/namedict.h | 7 +--- src/obj.h | 25 +++++++------ src/parser.h | 4 +- src/pocketpy.h | 4 +- src/ref.h | 16 +++++++- src/vm.h | 80 +++++++++++++++++----------------------- 13 files changed, 205 insertions(+), 98 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index 98fdb47f..fcd503bb 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -89,6 +89,10 @@ struct CodeObject { return consts.size() - 1; } + void _mark() const { + for(PyObject* v : consts) OBJ_MARK(v); + } + /************************************************/ int _curr_block_i = 0; int _rvalue = 0; diff --git a/src/common.h b/src/common.h index 3f69ac1e..06530277 100644 --- a/src/common.h +++ b/src/common.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -18,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +26,7 @@ #include #include #include +#include #define PK_VERSION "0.9.5" #define PK_EXTRA_CHECK 0 @@ -101,4 +100,35 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept { return is_int(a) && is_int(b); } + +template +class queue{ + std::list list; +public: + void push(const T& t){ list.push_back(t); } + void push(T&& t){ list.push_back(std::move(t)); } + void pop(){ list.pop_front(); } + void clear(){ list.clear(); } + bool empty() const { return list.empty(); } + size_t size() const { return list.size(); } + T& front(){ return list.front(); } + const T& front() const { return list.front(); } + const std::list& data() const { return list; } +}; + +template +class stack{ + std::vector vec; +public: + void push(const T& t){ vec.push_back(t); } + void push(T&& t){ vec.push_back(std::move(t)); } + void pop(){ vec.pop_back(); } + void clear(){ vec.clear(); } + bool empty() const { return vec.empty(); } + size_t size() const { return vec.size(); } + T& top(){ return vec.back(); } + const T& top() const { return vec.back(); } + const std::vector& data() const { return vec; } +}; + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index c20cc606..942828d2 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -21,7 +21,7 @@ enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; class Compiler { std::unique_ptr parser; - std::stack codes; + stack codes; int lexing_count = 0; bool used = false; VM* vm; diff --git a/src/error.h b/src/error.h index 945e929c..d0732b50 100644 --- a/src/error.h +++ b/src/error.h @@ -72,7 +72,7 @@ struct SourceData { class Exception { StrName type; Str msg; - std::stack stacktrace; + stack stacktrace; public: Exception(StrName type, Str msg): type(type), msg(msg) {} bool match_type(StrName type) const { return this->type == type;} @@ -84,7 +84,7 @@ public: } Str summary() const { - std::stack st(stacktrace); + stack st(stacktrace); StrStream ss; if(is_re) ss << "Traceback (most recent call last):\n"; while(!st.empty()) { ss << st.top() << '\n'; st.pop(); } diff --git a/src/frame.h b/src/frame.h index 37848a11..4ab98631 100644 --- a/src/frame.h +++ b/src/frame.h @@ -159,6 +159,17 @@ struct Frame { for(int i=n-1; i>=0; i--) v[i] = pop(); return v; } + + void _mark() const { + for(PyObject* obj : _data) OBJ_MARK(obj); + if(_locals != nullptr) _locals->_mark(); + if(_closure != nullptr) _closure->_mark(); + OBJ_MARK(_module); + for(auto& p : s_try_block){ + for(PyObject* obj : p.second) OBJ_MARK(obj); + } + co->_mark(); + } }; }; // namespace pkpy \ No newline at end of file diff --git a/src/gc.h b/src/gc.h index bb1e2f36..3fb5b76b 100644 --- a/src/gc.h +++ b/src/gc.h @@ -1,37 +1,84 @@ #pragma once #include "obj.h" +#include "codeobject.h" +#include "namedict.h" namespace pkpy { - struct ManagedHeap{ - std::vector heap; +struct ManagedHeap{ + std::vector gen; - void _add(PyObject* obj){ - obj->gc.enabled = true; - heap.push_back(obj); - } + template + PyObject* gcnew(Type type, T&& val){ + PyObject* obj = new Py_>(type, std::forward(val)); + gen.push_back(obj); + return obj; + } - void sweep(){ - std::vector alive; - for(PyObject* obj: heap){ - if(obj->gc.marked){ - obj->gc.marked = false; - alive.push_back(obj); - }else{ - delete obj; - } + template + PyObject* _new(Type type, T&& val){ + return gcnew(type, std::forward(val)); + } + + int sweep(){ + std::vector alive; + for(PyObject* obj: gen){ + if(obj->gc.marked){ + obj->gc.marked = false; + alive.push_back(obj); + }else{ + delete obj; } - heap.clear(); - heap.swap(alive); } + int freed = gen.size() - alive.size(); + gen.clear(); + gen.swap(alive); + return freed; + } - void collect(VM* vm){ - std::vector roots = get_roots(vm); - for(PyObject* obj: roots) obj->mark(); - sweep(); - } + int collect(VM* vm){ + mark(vm); + return sweep(); + } - std::vector get_roots(VM* vm); - }; + void mark(VM* vm); +}; + + +inline void NameDict::_mark(){ + for(uint16_t i=0; i<_capacity; i++){ + if(_items[i].first.empty()) continue; + OBJ_MARK(_items[i].second); + } +} + +template<> inline void _mark(List& t){ + for(PyObject* obj: t) OBJ_MARK(obj); +} + +template<> inline void _mark(Tuple& t){ + for(int i=0; i inline void _mark(Function& t){ + t.code->_mark(); + t.kwargs._mark(); + if(t._module != nullptr) OBJ_MARK(t._module); + if(t._closure != nullptr) t._closure->_mark(); +} + +template<> inline void _mark(BoundMethod& t){ + OBJ_MARK(t.obj); + OBJ_MARK(t.method); +} + +template<> inline void _mark(StarWrapper& t){ + OBJ_MARK(t.obj); +} + +template<> inline void _mark(Super& t){ + OBJ_MARK(t.first); +} +// NOTE: std::function may capture some PyObject*, they can not be marked } // namespace pkpy \ No newline at end of file diff --git a/src/iter.h b/src/iter.h index 42dd0c08..464b48de 100644 --- a/src/iter.h +++ b/src/iter.h @@ -65,4 +65,21 @@ inline PyObject* Generator::next(){ } } +inline void BaseIter::_mark() { + if(_ref != nullptr) OBJ_MARK(_ref); + if(loop_var != nullptr) OBJ_MARK(loop_var); +} + +inline void Generator::_mark(){ + BaseIter::_mark(); + frame->_mark(); +} + +template +void _mark(T& t){ + if constexpr(std::is_base_of_v){ + t._mark(); + } +} + } // namespace pkpy \ No newline at end of file diff --git a/src/namedict.h b/src/namedict.h index 0cd24e36..5623fce4 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -180,12 +180,7 @@ while(!_items[i].first.empty()) { \ return v; } - void apply_v(void(*f)(PyObject*)) { - for(uint16_t i=0; i<_capacity; i++){ - if(_items[i].first.empty()) continue; - f(_items[i].second); - } - } + void _mark(); #undef HASH_PROBE #undef _hash }; diff --git a/src/obj.h b/src/obj.h index 3593b366..634debbe 100644 --- a/src/obj.h +++ b/src/obj.h @@ -63,6 +63,8 @@ struct StarWrapper { StarWrapper(PyObject* obj, bool rvalue): obj(obj), rvalue(rvalue) {} }; +using Super = std::pair; + struct Slice { int start = 0; int stop = 0x7fffffff; @@ -84,16 +86,13 @@ public: virtual PyObject* next() = 0; PyObject* loop_var; BaseIter(VM* vm, PyObject* _ref) : vm(vm), _ref(_ref) {} + virtual void _mark(); virtual ~BaseIter() = default; }; -template struct is_container_gc : std::false_type {}; -template struct is_container_gc> : std::true_type {}; - struct GCHeader { - bool enabled; // whether this object is managed by GC bool marked; // whether this object is marked - GCHeader() : enabled(false), marked(false) {} + GCHeader() : marked(false) {} }; struct PyObject { @@ -105,12 +104,15 @@ struct PyObject { NameDict& attr() noexcept { return *_attr; } PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; - virtual void mark() = 0; + virtual void _mark() = 0; PyObject(Type type) : type(type) {} virtual ~PyObject() { delete _attr; } }; +template +void _mark(T& t); + template struct Py_ : PyObject { T _value; @@ -131,16 +133,17 @@ struct Py_ : PyObject { } void* value() override { return &_value; } - void mark() override { - if(!gc.enabled || gc.marked) return; + void _mark() override { + if(gc.marked) return; gc.marked = true; - if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); }); - if constexpr (is_container_gc::value) _value._mark(); + if(is_attr_valid()) attr()._mark(); + pkpy::_mark(_value); // handle PyObject* inside _value `T` } }; #define OBJ_GET(T, obj) (((Py_*)(obj))->_value) #define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__)) +#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_mark() const int kTpIntIndex = 2; const int kTpFloatIndex = 3; @@ -210,7 +213,7 @@ __T _py_cast(VM* vm, PyObject* obj) { } #define VAR(x) py_var(vm, x) -#define VAR_T(T, ...) vm->gcnew(T::_type(vm), T(__VA_ARGS__)) +#define VAR_T(T, ...) vm->heap.gcnew(T::_type(vm), T(__VA_ARGS__)) #define CAST(T, x) py_cast(vm, x) #define _CAST(T, x) _py_cast(vm, x) diff --git a/src/parser.h b/src/parser.h index da36b888..60c280b2 100644 --- a/src/parser.h +++ b/src/parser.h @@ -101,8 +101,8 @@ struct Parser { const char* curr_char; int current_line = 1; Token prev, curr; - std::queue nexts; - std::stack indents; + queue nexts; + stack indents; int brackets_level = 0; diff --git a/src/pocketpy.h b/src/pocketpy.h index 13502ec0..a036a8bd 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -69,7 +69,7 @@ inline void init_builtins(VM* _vm) { vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); } Type base = vm->_all_types[type].base; - return vm->gcnew(vm->tp_super, Super(args[1], base)); + return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); }); _vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) { @@ -757,7 +757,7 @@ inline void add_module_random(VM* vm){ inline void add_module_gc(VM* vm){ PyObject* mod = vm->new_module("gc"); - vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->gc_collect()))); + vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect(vm)))); } inline void VM::post_init(){ diff --git a/src/ref.h b/src/ref.h index 7a6f0310..8129b316 100644 --- a/src/ref.h +++ b/src/ref.h @@ -152,7 +152,7 @@ struct TupleRef : BaseRef { template PyObject* VM::PyRef(P&& value) { static_assert(std::is_base_of_v>); - return gcnew

(tp_ref, std::forward

(value)); + return heap.gcnew

(tp_ref, std::forward

(value)); } inline const BaseRef* VM::PyRef_AS_C(PyObject* obj) @@ -166,4 +166,18 @@ inline void Frame::try_deref(VM* vm, PyObject*& v){ if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this); } +/***** GC's Impl *****/ +template<> inline void _mark(AttrRef& t){ + OBJ_MARK(obj); +} + +template<> inline void _mark(IndexRef& t){ + OBJ_MARK(obj); + OBJ_MARK(index); +} + +template<> inline void _mark(TupleRef& t){ + _mark(t.objs); +} + } // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index 01bd7289..1e8c02ce 100644 --- a/src/vm.h +++ b/src/vm.h @@ -24,8 +24,8 @@ Str _read_file_cwd(const Str& name, bool* ok); template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - inline PyObject* py_var(VM* vm, const ctype& value) { return vm->gcnew(vm->ptype, value);} \ - inline PyObject* py_var(VM* vm, ctype&& value) { return vm->gcnew(vm->ptype, std::move(value));} + inline PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \ + inline PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));} class Generator: public BaseIter { @@ -35,7 +35,8 @@ public: Generator(VM* vm, std::unique_ptr&& frame) : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {} - PyObject* next(); + PyObject* next() override; + void _mark() override; }; struct PyTypeInfo{ @@ -46,9 +47,9 @@ struct PyTypeInfo{ class VM { VM* vm; // self reference for simplify code - ManagedHeap heap; public: - std::stack< std::unique_ptr > callstack; + ManagedHeap heap; + stack< std::unique_ptr > callstack; std::vector _all_types; PyObject* run_frame(Frame* frame); @@ -56,15 +57,12 @@ public: NameDict _modules; // loaded modules std::map _lazy_modules; // lazy loaded modules - // singleton objects, need_gc=false PyObject* _py_op_call; PyObject* _py_op_yield; PyObject* None; PyObject* True; PyObject* False; PyObject* Ellipsis; - - // managed by _modules, need_gc=false PyObject* builtins; // builtins module PyObject* _main; // __main__ module @@ -73,6 +71,13 @@ public: std::ostream* _stderr; int recursionlimit = 1000; + // for quick access + Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; + Type tp_list, tp_tuple; + Type tp_function, tp_native_function, tp_iterator, tp_bound_method; + Type tp_slice, tp_range, tp_module, tp_ref; + Type tp_super, tp_exception, tp_star_wrapper; + VM(bool use_stdio){ this->vm = this; this->use_stdio = use_stdio; @@ -118,7 +123,7 @@ public: do{ val = cls->attr().try_get(name); if(val != nullptr) return val; - Type cls_t = static_cast*>(cls)->_value; + Type cls_t = OBJ_GET(Type, cls); Type base = _all_types[cls_t].base; if(base.index == -1) break; cls = _all_types[base].obj; @@ -144,18 +149,6 @@ public: return nullptr; } - i64 gc_collect(){ - heap.collect(this); - return 0; - } - - template - PyObject* gcnew(Type type, T&& val){ - PyObject* obj = new Py_>(type, std::forward(val)); - heap._add(obj); - return obj; - } - template std::enable_if_t, Args>, PyObject*> call(PyObject* callable, ArgT&& args){ @@ -200,12 +193,12 @@ public: PyObject* property(NativeFuncRaw fget){ PyObject* p = builtins->attr("property"); - PyObject* method = gcnew(tp_native_function, NativeFunc(fget, 1, false)); + PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false)); return call(p, Args{method}); } PyObject* new_type_object(PyObject* mod, StrName name, Type base){ - PyObject* obj = new Py_(tp_type, _all_types.size()); + PyObject* obj = heap._new(tp_type, _all_types.size()); PyTypeInfo info{ .obj = obj, .base = base, @@ -263,17 +256,10 @@ public: return index; } - // for quick access - Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; - Type tp_list, tp_tuple; - Type tp_function, tp_native_function, tp_iterator, tp_bound_method; - Type tp_slice, tp_range, tp_module, tp_ref; - Type tp_super, tp_exception, tp_star_wrapper; - template PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); - return gcnew

(tp_iterator, std::forward

(value)); + return heap.gcnew

(tp_iterator, std::forward

(value)); } BaseIter* PyIter_AS_C(PyObject* obj) @@ -323,6 +309,7 @@ public: } ~VM() { + heap.collect(this); if(!use_stdio){ delete _stdout; delete _stderr; @@ -578,7 +565,7 @@ inline PyObject* VM::asRepr(PyObject* obj){ } inline PyObject* VM::new_module(StrName name) { - PyObject* obj = new Py_(tp_module, DummyModule()); + PyObject* obj = heap._new(tp_module, DummyModule()); obj->attr().set(__name__, VAR(name.str())); // we do not allow override in order to avoid memory leak // it is because Module objects are not garbage collected @@ -666,8 +653,8 @@ inline void VM::init_builtin_types(){ // PyTypeObject is managed by _all_types // PyModuleObject is managed by _modules // They are not managed by GC, so we use a simple "new" - _all_types.push_back({.obj = new Py_(Type(1), Type(0)), .base = -1, .name = "object"}); - _all_types.push_back({.obj = new Py_(Type(1), Type(1)), .base = 0, .name = "type"}); + _all_types.push_back({.obj = heap._new(Type(1), Type(0)), .base = -1, .name = "object"}); + _all_types.push_back({.obj = heap._new(Type(1), Type(1)), .base = 0, .name = "type"}); tp_object = 0; tp_type = 1; tp_int = _new_type_object("int"); @@ -690,12 +677,12 @@ inline void VM::init_builtin_types(){ tp_super = _new_type_object("super"); tp_exception = _new_type_object("Exception"); - this->None = new Py_(_new_type_object("NoneType"), {}); - this->Ellipsis = new Py_(_new_type_object("ellipsis"), {}); - this->True = new Py_(tp_bool, {}); - this->False = new Py_(tp_bool, {}); - this->_py_op_call = new Py_(_new_type_object("_py_op_call"), {}); - this->_py_op_yield = new Py_(_new_type_object("_py_op_yield"), {}); + this->None = heap._new(_new_type_object("NoneType"), {}); + this->Ellipsis = heap._new(_new_type_object("ellipsis"), {}); + this->True = heap._new(tp_bool, {}); + this->False = heap._new(tp_bool, {}); + this->_py_op_call = heap._new(_new_type_object("_py_op_call"), {}); + this->_py_op_yield = heap._new(_new_type_object("_py_op_yield"), {}); this->builtins = new_module("builtins"); this->_main = new_module("__main__"); @@ -723,7 +710,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo if(new_f != nullptr){ obj = call(new_f, std::move(args), kwargs, false); }else{ - obj = gcnew(OBJ_GET(Type, callable), {}); + obj = heap.gcnew(OBJ_GET(Type, callable), {}); PyObject* init_f = getattr(obj, __init__, false, true); if (init_f != nullptr) call(init_f, std::move(args), kwargs, false); } @@ -812,8 +799,6 @@ inline void VM::unpack_args(Args& args){ args = Args(std::move(unpacked)); } -using Super = std::pair; - // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ PyObject* objtype = _t(obj); @@ -936,10 +921,11 @@ inline PyObject* VM::_exec(){ } } -inline std::vector ManagedHeap::get_roots(VM *vm) { - std::vector roots; - // ... - return roots; +inline void ManagedHeap::mark(VM *vm) { + // iterate callstack frames + for(auto& frame : vm->callstack.data()){ + frame->_mark(); + } } } // namespace pkpy \ No newline at end of file From 78b73998daadfab02a92091ae0c4fdeac07dc21e Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 13:38:01 +0800 Subject: [PATCH 14/73] update gc --- src/ceval.h | 2 ++ src/common.h | 2 +- src/gc.h | 9 +++++++++ src/obj.h | 3 ++- src/pocketpy.h | 2 +- src/ref.h | 6 +++--- src/vm.h | 11 ++++++----- 7 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index b1f934e6..51033f99 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,6 +7,8 @@ namespace pkpy{ inline PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ + heap._auto_collect(this); + const Bytecode& byte = frame->next_bytecode(); switch (byte.op) { diff --git a/src/common.h b/src/common.h index 06530277..c9e86185 100644 --- a/src/common.h +++ b/src/common.h @@ -29,7 +29,7 @@ #include #define PK_VERSION "0.9.5" -#define PK_EXTRA_CHECK 0 +#define PK_EXTRA_CHECK 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/gc.h b/src/gc.h index 3fb5b76b..a897ff8f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -7,11 +7,13 @@ namespace pkpy { struct ManagedHeap{ std::vector gen; + int counter = 0; template PyObject* gcnew(Type type, T&& val){ PyObject* obj = new Py_>(type, std::forward(val)); gen.push_back(obj); + counter++; return obj; } @@ -36,6 +38,13 @@ struct ManagedHeap{ return freed; } + void _auto_collect(VM* vm){ + if(counter > 1000){ + counter = 0; + collect(vm); + } + } + int collect(VM* vm){ mark(vm); return sweep(); diff --git a/src/obj.h b/src/obj.h index 634debbe..b65e3215 100644 --- a/src/obj.h +++ b/src/obj.h @@ -135,8 +135,9 @@ struct Py_ : PyObject { void _mark() override { if(gc.marked) return; + // std::cout << "marking " << type << std::endl; gc.marked = true; - if(is_attr_valid()) attr()._mark(); + if(_attr != nullptr) _attr->_mark(); pkpy::_mark(_value); // handle PyObject* inside _value `T` } }; diff --git a/src/pocketpy.h b/src/pocketpy.h index a036a8bd..78442bab 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -771,7 +771,7 @@ inline void VM::post_init(){ add_module_random(this); add_module_io(this); add_module_os(this); - add_module_c(this); + // add_module_c(this); add_module_gc(this); for(const char* name: {"this", "functools", "collections", "heapq", "bisect"}){ diff --git a/src/ref.h b/src/ref.h index 8129b316..8026929e 100644 --- a/src/ref.h +++ b/src/ref.h @@ -168,12 +168,12 @@ inline void Frame::try_deref(VM* vm, PyObject*& v){ /***** GC's Impl *****/ template<> inline void _mark(AttrRef& t){ - OBJ_MARK(obj); + OBJ_MARK(t.obj); } template<> inline void _mark(IndexRef& t){ - OBJ_MARK(obj); - OBJ_MARK(index); + OBJ_MARK(t.obj); + OBJ_MARK(t.index); } template<> inline void _mark(TupleRef& t){ diff --git a/src/vm.h b/src/vm.h index 1e8c02ce..a49ea8e2 100644 --- a/src/vm.h +++ b/src/vm.h @@ -650,9 +650,6 @@ inline Str VM::disassemble(CodeObject_ co){ } inline void VM::init_builtin_types(){ - // PyTypeObject is managed by _all_types - // PyModuleObject is managed by _modules - // They are not managed by GC, so we use a simple "new" _all_types.push_back({.obj = heap._new(Type(1), Type(0)), .base = -1, .name = "object"}); _all_types.push_back({.obj = heap._new(Type(1), Type(1)), .base = 0, .name = "type"}); tp_object = 0; tp_type = 1; @@ -699,7 +696,10 @@ inline void VM::init_builtin_types(){ builtins->attr().set("range", _t(tp_range)); post_init(); - for(auto& t: _all_types) t.obj->attr()._try_perfect_rehash(); + for(int i=0; i<_all_types.size(); i++){ + // std::cout << i << ": " << _all_types[i].name << std::endl; + _all_types[i].obj->attr()._try_perfect_rehash(); + } for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } @@ -922,7 +922,8 @@ inline PyObject* VM::_exec(){ } inline void ManagedHeap::mark(VM *vm) { - // iterate callstack frames + vm->_modules._mark(); + for(auto& t: vm->_all_types) t.obj->_mark(); for(auto& frame : vm->callstack.data()){ frame->_mark(); } From 6a9d220433fe8e8b31e527e6c1e90bde1d6eeb44 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 15:34:50 +0800 Subject: [PATCH 15/73] update gc --- python/_dict.py | 8 ++++++-- src/ceval.h | 11 +++++++---- src/gc.h | 39 +++++++++++++++++++++++++++++---------- src/iter.h | 2 +- src/obj.h | 4 ++-- src/pocketpy.h | 8 +++----- src/vm.h | 13 ++++++++----- tests/07_dict.py | 5 ++++- tests/80_json.py | 12 ++++++------ 9 files changed, 66 insertions(+), 36 deletions(-) diff --git a/python/_dict.py b/python/_dict.py index 2a3b8137..24adfc9b 100644 --- a/python/_dict.py +++ b/python/_dict.py @@ -1,8 +1,12 @@ class dict: - def __init__(self, capacity=13): - self._capacity = capacity + def __init__(self, mapping=None): + self._capacity = 16 self._a = [None] * self._capacity self._len = 0 + + if mapping is not None: + for k,v in mapping: + self[k] = v def __len__(self): return self._len diff --git a/src/ceval.h b/src/ceval.h index 51033f99..b4893812 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -186,11 +186,14 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->push(VAR(frame->pop_n_values_reversed(this, byte.arg).to_list())); continue; case OP_BUILD_MAP: { - Args items = frame->pop_n_values_reversed(this, byte.arg*2); - PyObject* obj = call(builtins->attr("dict"), no_arg()); - for(int i=0; ipop_value(this); + PyObject* key = frame->pop_value(this); + list[i] = VAR(Tuple({key, value})); } + PyObject* d_arg = VAR(std::move(list)); + PyObject* obj = call(builtins->attr("dict"), Args{d_arg}); frame->push(obj); } continue; case OP_BUILD_SET: { diff --git a/src/gc.h b/src/gc.h index a897ff8f..b4e0643f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -1,59 +1,78 @@ #pragma once +#include "common.h" #include "obj.h" #include "codeobject.h" #include "namedict.h" namespace pkpy { struct ManagedHeap{ + std::vector _no_gc; std::vector gen; - int counter = 0; + + int gc_threshold = 700; + int gc_counter = 0; template PyObject* gcnew(Type type, T&& val){ PyObject* obj = new Py_>(type, std::forward(val)); gen.push_back(obj); - counter++; + gc_counter++; return obj; } template PyObject* _new(Type type, T&& val){ - return gcnew(type, std::forward(val)); + PyObject* obj = new Py_>(type, std::forward(val)); + obj->gc.enabled = false; + _no_gc.push_back(obj); + return obj; } - int sweep(){ + ~ManagedHeap(){ + for(PyObject* obj: _no_gc) delete obj; + } + + int sweep(VM* vm){ std::vector alive; for(PyObject* obj: gen){ if(obj->gc.marked){ obj->gc.marked = false; alive.push_back(obj); }else{ + // _delete_hook(vm, obj); delete obj; } } + + // clear _no_gc marked flag + for(PyObject* obj: _no_gc) obj->gc.marked = false; + int freed = gen.size() - alive.size(); gen.clear(); gen.swap(alive); return freed; } + void _delete_hook(VM* vm, PyObject* obj); + void _auto_collect(VM* vm){ - if(counter > 1000){ - counter = 0; - collect(vm); - } + if(gc_counter < gc_threshold) return; + gc_counter = 0; + collect(vm); + gc_threshold = gen.size() * 2; } int collect(VM* vm){ mark(vm); - return sweep(); + int freed = sweep(vm); + // std::cout << "GC: " << freed << " objects freed" << std::endl; + return freed; } void mark(VM* vm); }; - inline void NameDict::_mark(){ for(uint16_t i=0; i<_capacity; i++){ if(_items[i].first.empty()) continue; diff --git a/src/iter.h b/src/iter.h index 464b48de..f8ecdb5c 100644 --- a/src/iter.h +++ b/src/iter.h @@ -72,7 +72,7 @@ inline void BaseIter::_mark() { inline void Generator::_mark(){ BaseIter::_mark(); - frame->_mark(); + if(frame!=nullptr) frame->_mark(); } template diff --git a/src/obj.h b/src/obj.h index b65e3215..c7c38a90 100644 --- a/src/obj.h +++ b/src/obj.h @@ -91,8 +91,9 @@ public: }; struct GCHeader { + bool enabled; // whether this object is managed by GC bool marked; // whether this object is marked - GCHeader() : marked(false) {} + GCHeader() : enabled(true), marked(false) {} }; struct PyObject { @@ -135,7 +136,6 @@ struct Py_ : PyObject { void _mark() override { if(gc.marked) return; - // std::cout << "marking " << type << std::endl; gc.marked = true; if(_attr != nullptr) _attr->_mark(); pkpy::_mark(_value); // handle PyObject* inside _value `T` diff --git a/src/pocketpy.h b/src/pocketpy.h index 78442bab..bc6a8109 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -88,8 +88,7 @@ inline void init_builtins(VM* _vm) { i64 lhs = CAST(i64, args[0]); i64 rhs = CAST(i64, args[1]); if(rhs == 0) vm->ZeroDivisionError(); - Tuple t = Tuple{VAR(lhs/rhs), VAR(lhs%rhs)}; - return VAR(std::move(t)); + return VAR(Tuple({VAR(lhs/rhs), VAR(lhs%rhs)})); }); _vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) { @@ -146,7 +145,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("hex", [](VM* vm, Args& args) { - std::stringstream ss; + StrStream ss; ss << std::hex << CAST(i64, args[0]); return VAR("0x" + ss.str()); }); @@ -649,8 +648,7 @@ struct ReMatch { vm->bind_method<0>(type, "span", [](VM* vm, Args& args) { auto& self = CAST(ReMatch&, args[0]); - Tuple t = Tuple{VAR(self.start), VAR(self.end)}; - return VAR(std::move(t)); + return VAR(Tuple({VAR(self.start), VAR(self.end)})); }); vm->bind_method<1>(type, "group", [](VM* vm, Args& args) { diff --git a/src/vm.h b/src/vm.h index a49ea8e2..bfb73a8f 100644 --- a/src/vm.h +++ b/src/vm.h @@ -881,15 +881,14 @@ inline void VM::_error(Exception e){ inline PyObject* VM::_exec(){ Frame* frame = top_frame(); - i64 base_id = frame->id; - PyObject* ret = nullptr; + const i64 base_id = frame->id; bool need_raise = false; while(true){ if(frame->id < base_id) UNREACHABLE(); try{ if(need_raise){ need_raise = false; _raise(); } - ret = run_frame(frame); + PyObject* ret = run_frame(frame); if(ret == _py_op_yield) return _py_op_yield; if(ret != _py_op_call){ if(frame->id == base_id){ // [ frameBase<- ] @@ -922,11 +921,15 @@ inline PyObject* VM::_exec(){ } inline void ManagedHeap::mark(VM *vm) { - vm->_modules._mark(); - for(auto& t: vm->_all_types) t.obj->_mark(); + for(PyObject* obj: _no_gc) OBJ_MARK(obj); for(auto& frame : vm->callstack.data()){ frame->_mark(); } } +inline void ManagedHeap::_delete_hook(VM *vm, PyObject *obj){ + Type t = OBJ_GET(Type, vm->_t(obj)); + std::cout << "delete " << vm->_all_types[t].name << " at " << obj << std::endl; +} + } // namespace pkpy \ No newline at end of file diff --git a/tests/07_dict.py b/tests/07_dict.py index 9c3826fc..50f21cff 100644 --- a/tests/07_dict.py +++ b/tests/07_dict.py @@ -42,4 +42,7 @@ d1 = {1:2, 3:4} d2 = {3:4, 1:2} d3 = {1:2, 3:4, 5:6} assert d1 == d2 -assert d1 != d3 \ No newline at end of file +assert d1 != d3 + +a = dict([(1, 2), (3, 4)]) +assert a == {1: 2, 3: 4} \ No newline at end of file diff --git a/tests/80_json.py b/tests/80_json.py index b285faed..6a589c32 100644 --- a/tests/80_json.py +++ b/tests/80_json.py @@ -3,12 +3,12 @@ a = { 'b': 2, 'c': None, 'd': [1, 2, 3], - # 'e': { - # 'a': 1, - # 'b': 2, - # 'c': None, - # 'd': [1, 2, 3], - # }, + 'e': { + 'a': 1, + 'b': 2, + 'c': None, + 'd': [1, 2, 3], + }, "f": 'This is a string', 'g': [True, False, None], 'h': False From 87e52df142340000590f062c86555c15acbf7c82 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 15:35:34 +0800 Subject: [PATCH 16/73] Update common.h --- src/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common.h b/src/common.h index c9e86185..06530277 100644 --- a/src/common.h +++ b/src/common.h @@ -29,7 +29,7 @@ #include #define PK_VERSION "0.9.5" -#define PK_EXTRA_CHECK 1 +#define PK_EXTRA_CHECK 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 From 120773891aebcb9d82fc371c354d16ab48f9b75f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 19:32:46 +0800 Subject: [PATCH 17/73] update gc --- src/ceval.h | 18 +--------------- src/compiler.h | 56 +++++++++++++++++++++++++++++++------------------- src/gc.h | 45 ++++++++++++++++++++++++++++++++++++++-- src/opcodes.h | 3 --- src/vm.h | 1 + 5 files changed, 80 insertions(+), 43 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index b4893812..531c3ea6 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,7 +7,7 @@ namespace pkpy{ inline PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ - heap._auto_collect(this); + // heap._auto_collect(this); const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -115,22 +115,6 @@ inline PyObject* VM::run_frame(Frame* frame){ args[0] = frame->top_value(this); frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); } continue; - case OP_INPLACE_BINARY_OP: { - Args args(2); - args[1] = frame->pop(); - args[0] = frame->top_value(this); - PyObject* ret = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); - PyRef_AS_C(frame->top())->set(this, frame, std::move(ret)); - frame->_pop(); - } continue; - case OP_INPLACE_BITWISE_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - PyObject* ret = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); - PyRef_AS_C(frame->top())->set(this, frame, std::move(ret)); - frame->_pop(); - } continue; case OP_COMPARE_OP: { Args args(2); args[1] = frame->pop_value(this); diff --git a/src/compiler.h b/src/compiler.h index 942828d2..613fd970 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -1,6 +1,7 @@ #pragma once #include "codeobject.h" +#include "common.h" #include "parser.h" #include "error.h" #include "ceval.h" @@ -405,40 +406,51 @@ private: } void exprAssign() { - int lhs = co()->codes.empty() ? -1 : co()->codes.size() - 1; + if(co()->codes.empty()) UNREACHABLE(); + bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; + int _name_arg = co()->codes.back().arg; + // if the last op is OP_LOAD_NAME_REF, remove it + // because we will emit OP_STORE_NAME or OP_STORE_CLASS_ATTR + if(is_load_name_ref) co()->codes.pop_back(); + co()->_rvalue += 1; TokenIndex op = parser->prev.type; if(op == TK("=")) { // a = (expr) EXPR_TUPLE(); - if(lhs!=-1 && co()->codes[lhs].op == OP_LOAD_NAME_REF){ - if(co()->_is_compiling_class){ - emit(OP_STORE_CLASS_ATTR, co()->codes[lhs].arg); - }else{ - emit(OP_STORE_NAME, co()->codes[lhs].arg); - } - co()->codes[lhs].op = OP_NO_OP; - co()->codes[lhs].arg = -1; + if(is_load_name_ref){ + auto op = co()->_is_compiling_class ? OP_STORE_CLASS_ATTR : OP_STORE_NAME; + emit(op, _name_arg); }else{ if(co()->_is_compiling_class) SyntaxError(); emit(OP_STORE_REF); } }else{ // a += (expr) -> a = a + (expr) if(co()->_is_compiling_class) SyntaxError(); + if(is_load_name_ref){ + emit(OP_LOAD_NAME, _name_arg); + }else{ + emit(OP_DUP_TOP_VALUE); + } EXPR(); switch (op) { - case TK("+="): emit(OP_INPLACE_BINARY_OP, 0); break; - case TK("-="): emit(OP_INPLACE_BINARY_OP, 1); break; - case TK("*="): emit(OP_INPLACE_BINARY_OP, 2); break; - case TK("/="): emit(OP_INPLACE_BINARY_OP, 3); break; - case TK("//="): emit(OP_INPLACE_BINARY_OP, 4); break; - case TK("%="): emit(OP_INPLACE_BINARY_OP, 5); break; - case TK("<<="): emit(OP_INPLACE_BITWISE_OP, 0); break; - case TK(">>="): emit(OP_INPLACE_BITWISE_OP, 1); break; - case TK("&="): emit(OP_INPLACE_BITWISE_OP, 2); break; - case TK("|="): emit(OP_INPLACE_BITWISE_OP, 3); break; - case TK("^="): emit(OP_INPLACE_BITWISE_OP, 4); break; + case TK("+="): emit(OP_BINARY_OP, 0); break; + case TK("-="): emit(OP_BINARY_OP, 1); break; + case TK("*="): emit(OP_BINARY_OP, 2); break; + case TK("/="): emit(OP_BINARY_OP, 3); break; + case TK("//="): emit(OP_BINARY_OP, 4); break; + case TK("%="): emit(OP_BINARY_OP, 5); break; + case TK("<<="): emit(OP_BITWISE_OP, 0); break; + case TK(">>="): emit(OP_BITWISE_OP, 1); break; + case TK("&="): emit(OP_BITWISE_OP, 2); break; + case TK("|="): emit(OP_BITWISE_OP, 3); break; + case TK("^="): emit(OP_BITWISE_OP, 4); break; default: UNREACHABLE(); } + if(is_load_name_ref){ + emit(OP_STORE_NAME, _name_arg); + }else{ + emit(OP_STORE_REF); + } } co()->_rvalue -= 1; } @@ -791,6 +803,9 @@ private: consume_end_stmt(); } + // a = 1 + 2 + // ['a', '1', '2', '+', '='] + // void parse_expression(Precedence precedence) { lex_token(); GrammarFn prefix = rules[parser->prev.type].prefix; @@ -1003,7 +1018,6 @@ private: // If last op is not an assignment, pop the result. uint8_t last_op = co()->codes.back().op; if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF && - last_op!=OP_INPLACE_BINARY_OP && last_op!=OP_INPLACE_BITWISE_OP && last_op!=OP_STORE_ALL_NAMES && last_op!=OP_STORE_CLASS_ATTR){ for(int i=begin; icodes[i].op==OP_BUILD_TUPLE_REF) co()->codes[i].op = OP_BUILD_TUPLE; diff --git a/src/gc.h b/src/gc.h index b4e0643f..1e4d3f16 100644 --- a/src/gc.h +++ b/src/gc.h @@ -5,12 +5,46 @@ #include "codeobject.h" #include "namedict.h" +/* +0: object +1: type +2: int +3: float +4: bool +5: str +6: list +7: tuple +8: slice +9: range +10: module +11: _ref +12: _star_wrapper +13: function +14: native_function +15: iterator +16: bound_method +17: super +18: Exception +19: NoneType +20: ellipsis +21: _py_op_call +22: _py_op_yield +23: re.Match +24: random.Random +25: io.FileIO +26: property +27: staticmethod +28: dict +29: set +*/ + namespace pkpy { struct ManagedHeap{ std::vector _no_gc; std::vector gen; - int gc_threshold = 700; + static const int kMinGCThreshold = 700; + int gc_threshold = kMinGCThreshold; int gc_counter = 0; template @@ -29,8 +63,13 @@ struct ManagedHeap{ return obj; } + inline static std::map deleted; + ~ManagedHeap(){ for(PyObject* obj: _no_gc) delete obj; + for(auto& [type, count]: deleted){ + std::cout << "GC: " << type << "=" << count << std::endl; + } } int sweep(VM* vm){ @@ -41,6 +80,7 @@ struct ManagedHeap{ alive.push_back(obj); }else{ // _delete_hook(vm, obj); + deleted[obj->type] += 1; delete obj; } } @@ -49,6 +89,7 @@ struct ManagedHeap{ for(PyObject* obj: _no_gc) obj->gc.marked = false; int freed = gen.size() - alive.size(); + // std::cout << "GC: " << alive.size() << "/" << gen.size() << " (" << freed << " freed)" << std::endl; gen.clear(); gen.swap(alive); return freed; @@ -61,12 +102,12 @@ struct ManagedHeap{ gc_counter = 0; collect(vm); gc_threshold = gen.size() * 2; + if(gc_threshold < kMinGCThreshold) gc_threshold = kMinGCThreshold; } int collect(VM* vm){ mark(vm); int freed = sweep(vm); - // std::cout << "GC: " << freed << " objects freed" << std::endl; return freed; } diff --git a/src/opcodes.h b/src/opcodes.h index 1ad9dcad..693a579d 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -81,9 +81,6 @@ OPCODE(YIELD_VALUE) OPCODE(FAST_INDEX) // a[x] OPCODE(FAST_INDEX_REF) // a[x] -OPCODE(INPLACE_BINARY_OP) -OPCODE(INPLACE_BITWISE_OP) - OPCODE(SETUP_CLOSURE) OPCODE(SETUP_DECORATOR) OPCODE(STORE_ALL_NAMES) diff --git a/src/vm.h b/src/vm.h index bfb73a8f..749f321b 100644 --- a/src/vm.h +++ b/src/vm.h @@ -166,6 +166,7 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); + if(_module == _main) std::cout << disassemble(code) << '\n'; return _exec(code, _module); }catch (const Exception& e){ *_stderr << e.summary() << '\n'; From e78aa44895972cf2b5ad9e4a64df49bda5b0fa2f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 23:33:45 +0800 Subject: [PATCH 18/73] update lexer --- amalgamate.py | 4 +- src/ceval.h | 4 +- src/common.h | 34 +--- src/compiler.h | 378 +++++++++--------------------------- src/expr.h | 108 +++++++++++ src/frame.h | 8 +- src/gc.h | 6 +- src/lexer.h | 510 +++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 302 ----------------------------- src/pocketpy.h | 2 + src/vm.h | 4 +- 11 files changed, 730 insertions(+), 630 deletions(-) create mode 100644 src/expr.h create mode 100644 src/lexer.h delete mode 100644 src/parser.h diff --git a/amalgamate.py b/amalgamate.py index af1a2a2e..8327b940 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -6,8 +6,8 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: OPCODES_TEXT = f.read() pipeline = [ - ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"], - ["obj.h", "parser.h", "codeobject.h", "frame.h"], + ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], + ["obj.h", "codeobject.h", "frame.h"], ["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] ] diff --git a/src/ceval.h b/src/ceval.h index 531c3ea6..30397335 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,7 +7,7 @@ namespace pkpy{ inline PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ - // heap._auto_collect(this); + heap._auto_collect(this); const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -325,7 +325,7 @@ inline PyObject* VM::run_frame(Frame* frame){ if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE"); return frame->pop_value(this); } -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE"); #endif return None; diff --git a/src/common.h b/src/common.h index 06530277..1e022115 100644 --- a/src/common.h +++ b/src/common.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -26,10 +25,13 @@ #include #include #include -#include +#include -#define PK_VERSION "0.9.5" -#define PK_EXTRA_CHECK 0 +#define PK_VERSION "0.9.6" + +// debug macros +#define DEBUG_NO_BUILTIN_MODULES 0 +#define DEBUG_EXTRA_CHECK 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 @@ -40,13 +42,13 @@ #if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__) typedef int32_t i64; typedef float f64; -#define S_TO_INT std::stoi -#define S_TO_FLOAT std::stof +#define S_TO_INT(...) static_cast(std::stoi(__VA_ARGS__)) +#define S_TO_FLOAT(...) static_cast(std::stof(__VA_ARGS__)) #else typedef int64_t i64; typedef double f64; -#define S_TO_INT std::stoll -#define S_TO_FLOAT std::stod +#define S_TO_INT(...) static_cast(std::stoll(__VA_ARGS__)) +#define S_TO_FLOAT(...) static_cast(std::stod(__VA_ARGS__)) #endif namespace pkpy{ @@ -100,22 +102,6 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept { return is_int(a) && is_int(b); } - -template -class queue{ - std::list list; -public: - void push(const T& t){ list.push_back(t); } - void push(T&& t){ list.push_back(std::move(t)); } - void pop(){ list.pop_front(); } - void clear(){ list.clear(); } - bool empty() const { return list.empty(); } - size_t size() const { return list.size(); } - T& front(){ return list.front(); } - const T& front() const { return list.front(); } - const std::list& data() const { return list; } -}; - template class stack{ std::vector vec; diff --git a/src/compiler.h b/src/compiler.h index 613fd970..9510e316 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -2,7 +2,7 @@ #include "codeobject.h" #include "common.h" -#include "parser.h" +#include "lexer.h" #include "error.h" #include "ceval.h" @@ -18,24 +18,21 @@ struct GrammarRule{ Precedence precedence; }; -enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; - class Compiler { - std::unique_ptr parser; + std::unique_ptr lexer; stack codes; - int lexing_count = 0; bool used = false; VM* vm; std::map rules; CodeObject_ co() const{ return codes.top(); } - CompileMode mode() const{ return parser->src->mode; } + CompileMode mode() const{ return lexer->src->mode; } NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ this->vm = vm; - this->parser = std::make_unique( + this->lexer = std::make_unique( make_sp(source, filename, mode) ); @@ -104,239 +101,36 @@ public: } private: - Str eat_string_until(char quote, bool raw) { - bool quote3 = parser->match_n_chars(2, quote); - std::vector buff; - while (true) { - char c = parser->eatchar_include_newline(); - if (c == quote){ - if(quote3 && !parser->match_n_chars(2, quote)){ - buff.push_back(c); - continue; - } - break; - } - if (c == '\0'){ - if(quote3 && parser->src->mode == REPL_MODE){ - throw NeedMoreLines(false); - } - SyntaxError("EOL while scanning string literal"); - } - if (c == '\n'){ - if(!quote3) SyntaxError("EOL while scanning string literal"); - else{ - buff.push_back(c); - continue; - } - } - if (!raw && c == '\\') { - switch (parser->eatchar_include_newline()) { - case '"': buff.push_back('"'); break; - case '\'': buff.push_back('\''); break; - case '\\': buff.push_back('\\'); break; - case 'n': buff.push_back('\n'); break; - case 'r': buff.push_back('\r'); break; - case 't': buff.push_back('\t'); break; - default: SyntaxError("invalid escape char"); - } - } else { - buff.push_back(c); - } - } - return Str(buff.data(), buff.size()); - } + int i = 0; + std::vector tokens; - void eat_string(char quote, StringType type) { - Str s = eat_string_until(quote, type == RAW_STRING); - if(type == F_STRING){ - parser->set_next_token(TK("@fstr"), VAR(s)); - }else{ - parser->set_next_token(TK("@str"), VAR(s)); - } - } - - void eat_number() { - static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); - std::smatch m; - - const char* i = parser->token_start; - while(*i != '\n' && *i != '\0') i++; - std::string s = std::string(parser->token_start, i); - - try{ - if (std::regex_search(s, m, pattern)) { - // here is m.length()-1, since the first char was eaten by lex_token() - for(int j=0; jeatchar(); - - int base = 10; - size_t size; - if (m[1].matched) base = 16; - if (m[2].matched) { - if(base == 16) SyntaxError("hex literal should not contain a dot"); - parser->set_next_token(TK("@num"), VAR(S_TO_FLOAT(m[0], &size))); - } else { - parser->set_next_token(TK("@num"), VAR(S_TO_INT(m[0], &size, base))); - } - if (size != m.length()) UNREACHABLE(); - } - }catch(std::exception& _){ - SyntaxError("invalid number literal"); - } - } - - void lex_token(){ - lexing_count++; - _lex_token(); - lexing_count--; - } - - // Lex the next token and set it as the next token. - void _lex_token() { - parser->prev = parser->curr; - parser->curr = parser->next_token(); - //std::cout << parser->curr.info() << std::endl; - - while (parser->peekchar() != '\0') { - parser->token_start = parser->curr_char; - char c = parser->eatchar_include_newline(); - switch (c) { - case '\'': case '"': eat_string(c, NORMAL_STRING); return; - case '#': parser->skip_line_comment(); break; - case '{': parser->set_next_token(TK("{")); return; - case '}': parser->set_next_token(TK("}")); return; - case ',': parser->set_next_token(TK(",")); return; - case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return; - case ';': parser->set_next_token(TK(";")); return; - case '(': parser->set_next_token(TK("(")); return; - case ')': parser->set_next_token(TK(")")); return; - case '[': parser->set_next_token(TK("[")); return; - case ']': parser->set_next_token(TK("]")); return; - case '@': parser->set_next_token(TK("@")); return; - case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return; - case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return; - case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return; - case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return; - case '?': parser->set_next_token(TK("?")); return; - case '.': { - if(parser->matchchar('.')) { - if(parser->matchchar('.')) { - parser->set_next_token(TK("...")); - } else { - SyntaxError("invalid token '..'"); - } - } else { - parser->set_next_token(TK(".")); - } - return; - } - case '=': parser->set_next_token_2('=', TK("="), TK("==")); return; - case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return; - case '>': { - if(parser->matchchar('=')) parser->set_next_token(TK(">=")); - else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>=")); - else parser->set_next_token(TK(">")); - return; - } - case '<': { - if(parser->matchchar('=')) parser->set_next_token(TK("<=")); - else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<=")); - else parser->set_next_token(TK("<")); - return; - } - case '-': { - if(parser->matchchar('=')) parser->set_next_token(TK("-=")); - else if(parser->matchchar('>')) parser->set_next_token(TK("->")); - else parser->set_next_token(TK("-")); - return; - } - case '!': - if(parser->matchchar('=')) parser->set_next_token(TK("!=")); - else SyntaxError("expected '=' after '!'"); - break; - case '*': - if (parser->matchchar('*')) { - parser->set_next_token(TK("**")); // '**' - } else { - parser->set_next_token_2('=', TK("*"), TK("*=")); - } - return; - case '/': - if(parser->matchchar('/')) { - parser->set_next_token_2('=', TK("//"), TK("//=")); - } else { - parser->set_next_token_2('=', TK("/"), TK("/=")); - } - return; - case '\r': break; // just ignore '\r' - case ' ': case '\t': parser->eat_spaces(); break; - case '\n': { - parser->set_next_token(TK("@eol")); - if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level"); - return; - } - default: { - if(c == 'f'){ - if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;} - if(parser->matchchar('"')) {eat_string('"', F_STRING); return;} - }else if(c == 'r'){ - if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;} - if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;} - } - - if (c >= '0' && c <= '9') { - eat_number(); - return; - } - - switch (parser->eat_name()) - { - case 0: break; - case 1: SyntaxError("invalid char: " + std::string(1, c)); - case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); - case 3: SyntaxError("@id contains invalid char"); break; - case 4: SyntaxError("invalid JSON token"); break; - default: UNREACHABLE(); - } - return; - } - } - } - - parser->token_start = parser->curr_char; - parser->set_next_token(TK("@eof")); - } - - TokenIndex peek() { - return parser->curr.type; - } - - // not sure this will work - TokenIndex peek_next() { - if(parser->nexts.empty()) return TK("@eof"); - return parser->nexts.front().type; - } + const Token& prev() { return tokens.at(i-1); } + const Token& curr() { return tokens.at(i); } + const Token& next() { return tokens.at(i+1); } + const Token& peek(int offset=0) { return tokens.at(i+offset); } + void advance() { i++; } bool match(TokenIndex expected) { - if (peek() != expected) return false; - lex_token(); + if (curr().type != expected) return false; + advance(); return true; } void consume(TokenIndex expected) { if (!match(expected)){ StrStream ss; - ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'"; + ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'"; SyntaxError(ss.str()); } } bool match_newlines(bool repl_throw=false) { bool consumed = false; - if (peek() == TK("@eol")) { - while (peek() == TK("@eol")) lex_token(); + if (curr().type == TK("@eol")) { + while (curr().type == TK("@eol")) advance(); consumed = true; } - if (repl_throw && peek() == TK("@eof")){ + if (repl_throw && curr().type == TK("@eof")){ throw NeedMoreLines(co()->_is_compiling_class); } return consumed; @@ -344,8 +138,8 @@ private: bool match_end_stmt() { if (match(TK(";"))) { match_newlines(); return true; } - if (match_newlines() || peek()==TK("@eof")) return true; - if (peek() == TK("@dedent")) return true; + if (match_newlines() || curr().type == TK("@eof")) return true; + if (curr().type == TK("@dedent")) return true; return false; } @@ -353,15 +147,27 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } + PyObject* get_value(const Token& token) { + switch (token.type) { + case TK("@num"): + if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); + if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); + UNREACHABLE(); + case TK("@str"): case TK("@fstr"): + return VAR(std::get(token.value)); + default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type)); + } + } + void exprLiteral() { - PyObject* value = parser->prev.value; + PyObject* value = get_value(prev()); int index = co()->add_const(value); emit(OP_LOAD_CONST, index); } void exprFString() { static const std::regex pattern(R"(\{(.*?)\})"); - PyObject* value = parser->prev.value; + PyObject* value = get_value(prev()); Str s = CAST(Str, value); std::sregex_iterator begin(s.begin(), s.end(), pattern); std::sregex_iterator end; @@ -395,7 +201,7 @@ private: _compile_f_args(func, false); consume(TK(":")); } - func.code = make_sp(parser->src, func.name.str()); + func.code = make_sp(lexer->src, func.name.str()); this->codes.push(func.code); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; emit(OP_RETURN_VALUE); @@ -414,7 +220,7 @@ private: if(is_load_name_ref) co()->codes.pop_back(); co()->_rvalue += 1; - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; if(op == TK("=")) { // a = (expr) EXPR_TUPLE(); if(is_load_name_ref){ @@ -487,7 +293,7 @@ private: } void exprBinaryOp() { - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; parse_expression((Precedence)(rules[op].precedence + 1)); switch (op) { @@ -525,7 +331,7 @@ private: } void exprUnaryOp() { - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; parse_expression((Precedence)(PREC_UNARY + 1)); switch (op) { case TK("-"): emit(OP_UNARY_NEGATIVE); break; @@ -588,7 +394,7 @@ private: int ARGC = 0; do { match_newlines(mode()==REPL_MODE); - if (peek() == TK("]")) break; + if (curr().type == TK("]")) break; EXPR(); ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))){ @@ -609,9 +415,9 @@ private: int ARGC = 0; do { match_newlines(mode()==REPL_MODE); - if (peek() == TK("}")) break; + if (curr().type == TK("}")) break; EXPR(); - if(peek() == TK(":")) parsing_dict = true; + if(curr().type == TK(":")) parsing_dict = true; if(parsing_dict){ consume(TK(":")); EXPR(); @@ -637,10 +443,10 @@ private: bool need_unpack = false; do { match_newlines(mode()==REPL_MODE); - if (peek() == TK(")")) break; - if(peek() == TK("@id") && peek_next() == TK("=")) { + if (curr().type == TK(")")) break; + if(curr().type == TK("@id") && next().type == TK("=")) { consume(TK("@id")); - const Str& key = parser->prev.str(); + const Str& key = prev().str(); emit(OP_LOAD_CONST, co()->add_const(VAR(key))); consume(TK("=")); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; @@ -666,7 +472,7 @@ private: void exprName(){ _exprName(false); } void _exprName(bool force_lvalue) { - Token tkname = parser->prev; + const Token& tkname = prev(); int index = co()->add_name(tkname.str(), name_scope()); bool fast_load = !force_lvalue && co()->_rvalue>0; emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index); @@ -674,7 +480,7 @@ private: void exprAttrib() { consume(TK("@id")); - const Str& name = parser->prev.str(); + const Str& name = prev().str(); int index = co()->add_name(name, NAME_ATTR); emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index); } @@ -710,7 +516,7 @@ private: } void exprValue() { - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; switch (op) { case TK("None"): emit(OP_LOAD_NONE); break; case TK("True"): emit(OP_LOAD_TRUE); break; @@ -721,7 +527,7 @@ private: } int emit(Opcode opcode, int arg=-1, bool keepline=false) { - int line = parser->prev.line; + int line = prev().line; co()->codes.push_back( Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line} ); @@ -738,7 +544,7 @@ private: void compile_block_body(CompilerAction action=nullptr) { if(action == nullptr) action = &Compiler::compile_stmt; consume(TK(":")); - if(peek()!=TK("@eol") && peek()!=TK("@eof")){ + if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ (this->*action)(); // inline block return; } @@ -746,7 +552,7 @@ private: SyntaxError("expected a new line after ':'"); } consume(TK("@indent")); - while (peek() != TK("@dedent")) { + while (curr().type != TK("@dedent")) { match_newlines(); (this->*action)(); match_newlines(); @@ -756,7 +562,7 @@ private: Token _compile_import() { consume(TK("@id")); - Token tkmodule = parser->prev; + Token tkmodule = prev(); int index = co()->add_name(tkmodule.str(), NAME_SPECIAL); emit(OP_IMPORT_NAME, index); return tkmodule; @@ -768,7 +574,7 @@ private: Token tkmodule = _compile_import(); if (match(TK("as"))) { consume(TK("@id")); - tkmodule = parser->prev; + tkmodule = prev(); } int index = co()->add_name(tkmodule.str(), name_scope()); emit(OP_STORE_NAME, index); @@ -789,12 +595,12 @@ private: do { emit(OP_DUP_TOP_VALUE); consume(TK("@id")); - Token tkname = parser->prev; + Token tkname = prev(); int index = co()->add_name(tkname.str(), NAME_ATTR); emit(OP_BUILD_ATTR, index); if (match(TK("as"))) { consume(TK("@id")); - tkname = parser->prev; + tkname = prev(); } index = co()->add_name(tkname.str(), name_scope()); emit(OP_STORE_NAME, index); @@ -807,14 +613,14 @@ private: // ['a', '1', '2', '+', '='] // void parse_expression(Precedence precedence) { - lex_token(); - GrammarFn prefix = rules[parser->prev.type].prefix; - if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type)); + advance(); + GrammarFn prefix = rules[prev().type].prefix; + if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); (this->*prefix)(); bool meet_assign_token = false; - while (rules[peek()].precedence >= precedence) { - lex_token(); - TokenIndex op = parser->prev.type; + while (rules[curr().type].precedence >= precedence) { + advance(); + TokenIndex op = prev().type; if (op == TK("=")){ if(meet_assign_token) SyntaxError(); meet_assign_token = true; @@ -891,7 +697,7 @@ private: do { consume(TK("except")); if(match(TK("@id"))){ - int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL); + int name_idx = co()->add_name(prev().str(), NAME_SPECIAL); emit(OP_EXCEPTION_MATCH, name_idx); }else{ emit(OP_LOAD_TRUE); @@ -901,7 +707,7 @@ private: compile_block_body(); patches.push_back(emit(OP_JUMP_ABSOLUTE)); patch_jump(patch); - }while(peek() == TK("except")); + }while(curr().type == TK("except")); emit(OP_RE_RAISE); // no match, re-raise for (int patch : patches) patch_jump(patch); } @@ -968,7 +774,7 @@ private: EXPR(); consume(TK("as")); consume(TK("@id")); - Token tkname = parser->prev; + Token tkname = prev(); int index = co()->add_name(tkname.str(), name_scope()); emit(OP_STORE_NAME, index); emit(OP_LOAD_NAME_REF, index); @@ -979,18 +785,18 @@ private: } else if(match(TK("label"))){ if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); - Str label = parser->prev.str(); + Str label = prev().str(); bool ok = co()->add_label(label); if(!ok) SyntaxError("label '" + label + "' already exists"); consume_end_stmt(); } else if(match(TK("goto"))){ // https://entrian.com/goto/ if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); - emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL)); + emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL)); consume_end_stmt(); } else if(match(TK("raise"))){ consume(TK("@id")); - int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL); + int dummy_t = co()->add_name(prev().str(), NAME_SPECIAL); if(match(TK("(")) && !match(TK(")"))){ EXPR(); consume(TK(")")); }else{ @@ -1005,7 +811,7 @@ private: } else if(match(TK("global"))){ do { consume(TK("@id")); - co()->global_names[parser->prev.str()] = 1; + co()->global_names[prev().str()] = 1; } while (match(TK(","))); consume_end_stmt(); } else if(match(TK("pass"))){ @@ -1030,10 +836,10 @@ private: void compile_class(){ consume(TK("@id")); - int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); + int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL); int super_cls_name_idx = -1; if(match(TK("(")) && match(TK("@id"))){ - super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); + super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL); consume(TK(")")); } if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); @@ -1059,13 +865,13 @@ private: } consume(TK("@id")); - const Str& name = parser->prev.str(); + const Str& name = prev().str(); if(func.has_name(name)) SyntaxError("duplicate argument name"); // eat type hints if(enable_type_hints && match(TK(":"))) consume(TK("@id")); - if(state == 0 && peek() == TK("=")) state = 2; + if(state == 0 && curr().type == TK("=")) state = 2; switch (state) { @@ -1075,7 +881,7 @@ private: consume(TK("=")); PyObject* value = read_literal(); if(value == nullptr){ - SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type)); + SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type)); } func.kwargs.set(name, value); func.kwargs_order.push_back(name); @@ -1090,11 +896,11 @@ private: Function func; StrName obj_name; consume(TK("@id")); - func.name = parser->prev.str(); + func.name = prev().str(); if(!co()->_is_compiling_class && match(TK("::"))){ consume(TK("@id")); obj_name = func.name; - func.name = parser->prev.str(); + func.name = prev().str(); } consume(TK("(")); if (!match(TK(")"))) { @@ -1104,7 +910,7 @@ private: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = make_sp(parser->src, func.name.str()); + func.code = make_sp(lexer->src, func.name.str()); this->codes.push(func.code); compile_block_body(); func.code->optimize(vm); @@ -1132,11 +938,11 @@ private: PyObject* read_literal(){ if(match(TK("-"))){ consume(TK("@num")); - PyObject* val = parser->prev.value; + PyObject* val = get_value(prev()); return vm->num_negated(val); } - if(match(TK("@num"))) return parser->prev.value; - if(match(TK("@str"))) return parser->prev.value; + if(match(TK("@num"))) return get_value(prev()); + if(match(TK("@str"))) return get_value(prev()); if(match(TK("True"))) return VAR(true); if(match(TK("False"))) return VAR(false); if(match(TK("None"))) return vm->None; @@ -1144,23 +950,8 @@ private: return nullptr; } - /***** Error Reporter *****/ - void throw_err(Str type, Str msg){ - int lineno = parser->curr.line; - const char* cursor = parser->curr.start; - // if error occurs in lexing, lineno should be `parser->current_line` - if(lexing_count > 0){ - lineno = parser->current_line; - cursor = parser->curr_char; - } - if(parser->peekchar() == '\n') lineno--; - auto e = Exception("SyntaxError", msg); - e.st_push(parser->src->snapshot(lineno, cursor)); - throw e; - } - void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } - void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } - void IndentationError(Str msg){ throw_err("IndentationError", msg); } + void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); } + void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); } public: CodeObject_ compile(){ @@ -1168,11 +959,16 @@ public: if(used) UNREACHABLE(); used = true; - CodeObject_ code = make_sp(parser->src, Str("")); + tokens = lexer->run(); + // if(lexer->src->filename == "tests/01_int.py"){ + // for(auto& t: tokens) std::cout << t.info() << std::endl; + // } + + CodeObject_ code = make_sp(lexer->src, lexer->src->filename); codes.push(code); - lex_token(); lex_token(); - match_newlines(); + advance(); // skip @sof, so prev() is always valid + match_newlines(); // skip leading '\n' if(mode()==EVAL_MODE) { EXPR_TUPLE(); diff --git a/src/expr.h b/src/expr.h new file mode 100644 index 00000000..5e21f629 --- /dev/null +++ b/src/expr.h @@ -0,0 +1,108 @@ +#pragma once + +#include "codeobject.h" +#include "common.h" +#include "parser.h" +#include "error.h" +#include "ceval.h" +#include + +namespace pkpy{ + +struct Expression; +typedef std::unique_ptr Expression_; + +struct Expression{ + std::vector children; + virtual Str to_string() const = 0; +}; + +struct NameExpr: Expression{ + Str name; + NameScope scope; + NameExpr(Str name, NameScope scope): name(name), scope(scope) {} + Str to_string() const override { return name; } +}; + +struct GroupExpr: Expression{ + Expression_ expr; + GroupExpr(Expression_ expr): expr(std::move(expr)) {} + Str to_string() const override { return "()"; } +}; + +struct UnaryExpr: Expression{ + TokenIndex op; + UnaryExpr(TokenIndex op): op(op) {} + Str to_string() const override { return TK_STR(op); } +}; + +struct NotExpr: Expression{ + Str to_string() const override { return "not"; } +}; + +struct AndExpr: Expression{ + Str to_string() const override { return "and"; } +}; + +struct OrExpr: Expression{ + Str to_string() const override { return "or"; } +}; + +// None, True, False, ... +struct SpecialValueExpr: Expression{ + TokenIndex token; + SpecialValueExpr(TokenIndex token): token(token) {} + Str to_string() const override { return TK_STR(token); } +}; + +// @num, @str which needs to invoke OP_LOAD_CONST +struct LiteralExpr: Expression{ + PyObject* value; + LiteralExpr(PyObject* value): value(value) {} + Str to_string() const override { return "literal"; } +}; + +struct ListExpr: Expression{ + Str to_string() const override { return "[]"; } +}; + +struct DictExpr: Expression{ + Str to_string() const override { return "{}"; } +}; + +struct LambdaExpr: Expression{ + Str to_string() const override { return "lambda"; } +}; + +struct FStringExpr: Expression{ + Str to_string() const override { return "@fstr"; } +}; + +struct AttribExpr: Expression{ + Str to_string() const override { return "."; } +}; + +struct CallExpr: Expression{ + Str to_string() const override { return "()"; } +}; + +struct BinaryExpr: Expression{ + TokenIndex op; + BinaryExpr(TokenIndex op): op(op) {} + Str to_string() const override { return TK_STR(op); } +}; + +struct TernaryExpr: Expression{ + Str to_string() const override { return "?"; } +}; + +struct AssignExpr: Expression{ + Str to_string() const override { return "="; } +}; + +struct CommaExpr: Expression{ + Str to_string() const override { return ","; } +}; + + +} // namespace pkpy \ No newline at end of file diff --git a/src/frame.h b/src/frame.h index 4ab98631..e2b9bc13 100644 --- a/src/frame.h +++ b/src/frame.h @@ -58,7 +58,7 @@ struct Frame { } PyObject* pop(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif PyObject* v = _data.back(); @@ -67,7 +67,7 @@ struct Frame { } void _pop(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif _data.pop_back(); @@ -88,14 +88,14 @@ struct Frame { } PyObject*& top(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } PyObject*& top_1(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif return _data[_data.size()-2]; diff --git a/src/gc.h b/src/gc.h index 1e4d3f16..746bef60 100644 --- a/src/gc.h +++ b/src/gc.h @@ -67,9 +67,9 @@ struct ManagedHeap{ ~ManagedHeap(){ for(PyObject* obj: _no_gc) delete obj; - for(auto& [type, count]: deleted){ - std::cout << "GC: " << type << "=" << count << std::endl; - } + // for(auto& [type, count]: deleted){ + // std::cout << "GC: " << type << "=" << count << std::endl; + // } } int sweep(VM* vm){ diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 00000000..3e20071b --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,510 @@ +#pragma once + +#include "common.h" +#include "error.h" +#include "str.h" + +namespace pkpy{ + +typedef uint8_t TokenIndex; + +constexpr const char* kTokens[] = { + "@eof", "@eol", "@sof", + ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::", + "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->", + "<<", ">>", "&", "|", "^", "?", "@", + "==", "!=", ">=", "<=", + "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=", + /** KW_BEGIN **/ + "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", + "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", + "goto", "label", // extended keywords, not available in cpython + "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", + /** KW_END **/ + "is not", "not in", + "@id", "@num", "@str", "@fstr", + "@indent", "@dedent" +}; + +using TokenValue = std::variant; +const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); + +constexpr TokenIndex TK(const char token[]) { + for(int k=0; k kTokenKwMap = [](){ + std::map map; + for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k; + return map; +}(); + + +struct Token{ + TokenIndex type; + const char* start; + int length; + int line; + TokenValue value; + + Str str() const { return Str(start, length);} + + Str info() const { + StrStream ss; + Str raw = str(); + if (raw == Str("\n")) raw = "\\n"; + ss << line << ": " << TK_STR(type) << " '" << raw << "'"; + return ss.str(); + } +}; + +// https://docs.python.org/3/reference/expressions.html +enum Precedence { + PREC_NONE, + PREC_ASSIGNMENT, // = + PREC_COMMA, // , + PREC_TERNARY, // ?: + PREC_LOGICAL_OR, // or + PREC_LOGICAL_AND, // and + PREC_LOGICAL_NOT, // not + PREC_EQUALITY, // == != + PREC_TEST, // in / is / is not / not in + PREC_COMPARISION, // < > <= >= + PREC_BITWISE_OR, // | + PREC_BITWISE_XOR, // ^ + PREC_BITWISE_AND, // & + PREC_BITWISE_SHIFT, // << >> + PREC_TERM, // + - + PREC_FACTOR, // * / % // + PREC_UNARY, // - not + PREC_EXPONENT, // ** + PREC_CALL, // () + PREC_SUBSCRIPT, // [] + PREC_ATTRIB, // .index + PREC_PRIMARY, +}; + +enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; + +struct Lexer { + shared_ptr src; + const char* token_start; + const char* curr_char; + int current_line = 1; + std::vector nexts; + stack indents; + int brackets_level = 0; + bool used = false; + + char peekchar() const{ return *curr_char; } + + bool match_n_chars(int n, char c0){ + const char* c = curr_char; + for(int i=0; i 0) return true; + int spaces = eat_spaces(); + if(peekchar() == '#') skip_line_comment(); + if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; + // https://docs.python.org/3/reference/lexical_analysis.html#indentation + if(spaces > indents.top()){ + indents.push(spaces); + nexts.push_back(Token{TK("@indent"), token_start, 0, current_line}); + } else if(spaces < indents.top()){ + while(spaces < indents.top()){ + indents.pop(); + nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line}); + } + if(spaces != indents.top()){ + return false; + } + } + return true; + } + + char eatchar() { + char c = peekchar(); + if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); + curr_char++; + return c; + } + + char eatchar_include_newline() { + char c = peekchar(); + curr_char++; + if (c == '\n'){ + current_line++; + src->line_starts.push_back(curr_char); + } + return c; + } + + int eat_name() { + curr_char--; + while(true){ + uint8_t c = peekchar(); + int u8bytes = 0; + if((c & 0b10000000) == 0b00000000) u8bytes = 1; + else if((c & 0b11100000) == 0b11000000) u8bytes = 2; + else if((c & 0b11110000) == 0b11100000) u8bytes = 3; + else if((c & 0b11111000) == 0b11110000) u8bytes = 4; + else return 1; + if(u8bytes == 1){ + if(isalpha(c) || c=='_' || isdigit(c)) { + curr_char++; + continue; + }else{ + break; + } + } + // handle multibyte char + std::string u8str(curr_char, u8bytes); + if(u8str.size() != u8bytes) return 2; + uint32_t value = 0; + for(int k=0; k < u8bytes; k++){ + uint8_t b = u8str[k]; + if(k==0){ + if(u8bytes == 2) value = (b & 0b00011111) << 6; + else if(u8bytes == 3) value = (b & 0b00001111) << 12; + else if(u8bytes == 4) value = (b & 0b00000111) << 18; + }else{ + value |= (b & 0b00111111) << (6*(u8bytes-k-1)); + } + } + if(is_unicode_Lo_char(value)) curr_char += u8bytes; + else break; + } + + int length = (int)(curr_char - token_start); + if(length == 0) return 3; + std::string_view name(token_start, length); + + if(src->mode == JSON_MODE){ + if(name == "true"){ + add_token(TK("True")); + } else if(name == "false"){ + add_token(TK("False")); + } else if(name == "null"){ + add_token(TK("None")); + } else { + return 4; + } + return 0; + } + + if(kTokenKwMap.count(name)){ + if(name == "not"){ + if(strncmp(curr_char, " in", 3) == 0){ + curr_char += 3; + add_token(TK("not in")); + return 0; + } + }else if(name == "is"){ + if(strncmp(curr_char, " not", 4) == 0){ + curr_char += 4; + add_token(TK("is not")); + return 0; + } + } + add_token(kTokenKwMap.at(name)); + } else { + add_token(TK("@id")); + } + return 0; + } + + void skip_line_comment() { + char c; + while ((c = peekchar()) != '\0') { + if (c == '\n') return; + eatchar(); + } + } + + bool matchchar(char c) { + if (peekchar() != c) return false; + eatchar_include_newline(); + return true; + } + + void add_token(TokenIndex type, TokenValue value={}) { + switch(type){ + case TK("{"): case TK("["): case TK("("): brackets_level++; break; + case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; + } + nexts.push_back( Token{ + type, + token_start, + (int)(curr_char - token_start), + current_line - ((type == TK("@eol")) ? 1 : 0), + value + }); + } + + void add_token_2(char c, TokenIndex one, TokenIndex two) { + if (matchchar(c)) add_token(two); + else add_token(one); + } + + Str eat_string_until(char quote, bool raw) { + bool quote3 = match_n_chars(2, quote); + std::vector buff; + while (true) { + char c = eatchar_include_newline(); + if (c == quote){ + if(quote3 && !match_n_chars(2, quote)){ + buff.push_back(c); + continue; + } + break; + } + if (c == '\0'){ + if(quote3 && src->mode == REPL_MODE){ + throw NeedMoreLines(false); + } + SyntaxError("EOL while scanning string literal"); + } + if (c == '\n'){ + if(!quote3) SyntaxError("EOL while scanning string literal"); + else{ + buff.push_back(c); + continue; + } + } + if (!raw && c == '\\') { + switch (eatchar_include_newline()) { + case '"': buff.push_back('"'); break; + case '\'': buff.push_back('\''); break; + case '\\': buff.push_back('\\'); break; + case 'n': buff.push_back('\n'); break; + case 'r': buff.push_back('\r'); break; + case 't': buff.push_back('\t'); break; + default: SyntaxError("invalid escape char"); + } + } else { + buff.push_back(c); + } + } + return Str(buff.data(), buff.size()); + } + + void eat_string(char quote, StringType type) { + Str s = eat_string_until(quote, type == RAW_STRING); + if(type == F_STRING){ + add_token(TK("@fstr"), s); + }else{ + add_token(TK("@str"), s); + } + } + + void eat_number() { + static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); + std::smatch m; + + const char* i = token_start; + while(*i != '\n' && *i != '\0') i++; + std::string s = std::string(token_start, i); + + try{ + if (std::regex_search(s, m, pattern)) { + // here is m.length()-1, since the first char was eaten by lex_token() + for(int j=0; j=")); + else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>=")); + else add_token(TK(">")); + return true; + } + case '<': { + if(matchchar('=')) add_token(TK("<=")); + else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<=")); + else add_token(TK("<")); + return true; + } + case '-': { + if(matchchar('=')) add_token(TK("-=")); + else if(matchchar('>')) add_token(TK("->")); + else add_token(TK("-")); + return true; + } + case '!': + if(matchchar('=')) add_token(TK("!=")); + else SyntaxError("expected '=' after '!'"); + break; + case '*': + if (matchchar('*')) { + add_token(TK("**")); // '**' + } else { + add_token_2('=', TK("*"), TK("*=")); + } + return true; + case '/': + if(matchchar('/')) { + add_token_2('=', TK("//"), TK("//=")); + } else { + add_token_2('=', TK("/"), TK("/=")); + } + return true; + case '\r': break; // just ignore '\r' + case ' ': case '\t': eat_spaces(); break; + case '\n': { + add_token(TK("@eol")); + if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level"); + return true; + } + default: { + if(c == 'f'){ + if(matchchar('\'')) {eat_string('\'', F_STRING); return true;} + if(matchchar('"')) {eat_string('"', F_STRING); return true;} + }else if(c == 'r'){ + if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;} + if(matchchar('"')) {eat_string('"', RAW_STRING); return true;} + } + if (c >= '0' && c <= '9') { + eat_number(); + return true; + } + switch (eat_name()) + { + case 0: break; + case 1: SyntaxError("invalid char: " + std::string(1, c)); + case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); + case 3: SyntaxError("@id contains invalid char"); break; + case 4: SyntaxError("invalid JSON token"); break; + default: UNREACHABLE(); + } + return true; + } + } + } + + token_start = curr_char; + while(indents.size() > 1){ + indents.pop(); + add_token(TK("@dedent")); + return true; + } + add_token(TK("@eof")); + return false; + } + + /***** Error Reporter *****/ + void throw_err(Str type, Str msg){ + int lineno = current_line; + const char* cursor = curr_char; + if(peekchar() == '\n'){ + lineno--; + cursor--; + } + throw_err(type, msg, lineno, cursor); + } + + void throw_err(Str type, Str msg, int lineno, const char* cursor){ + auto e = Exception("SyntaxError", msg); + e.st_push(src->snapshot(lineno, cursor)); + throw e; + } + void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } + void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } + void IndentationError(Str msg){ throw_err("IndentationError", msg); } + + Lexer(shared_ptr src) { + this->src = src; + this->token_start = src->source; + this->curr_char = src->source; + this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line}); + this->indents.push(0); + } + + std::vector run() { + if(used) UNREACHABLE(); + used = true; + while (lex_one_token()); + return std::move(nexts); + } +}; + +} // namespace pkpy \ No newline at end of file diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index 60c280b2..00000000 --- a/src/parser.h +++ /dev/null @@ -1,302 +0,0 @@ -#pragma once - -#include "error.h" -#include "obj.h" - -namespace pkpy{ - -typedef uint8_t TokenIndex; - -constexpr const char* kTokens[] = { - "@error", "@eof", "@eol", "@sof", - ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::", - "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->", - "<<", ">>", "&", "|", "^", "?", "@", - "==", "!=", ">=", "<=", - "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=", - /** KW_BEGIN **/ - "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", - "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", - "goto", "label", // extended keywords, not available in cpython - "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", - /** KW_END **/ - "is not", "not in", - "@id", "@num", "@str", "@fstr", - "@indent", "@dedent" -}; - -const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); - -constexpr TokenIndex TK(const char token[]) { - for(int k=0; k kTokenKwMap = [](){ - std::map map; - for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k; - return map; -}(); - - -struct Token{ - TokenIndex type; - - const char* start; - int length; - int line; - PyObject* value; - - Str str() const { return Str(start, length);} - - Str info() const { - StrStream ss; - Str raw = str(); - if (raw == Str("\n")) raw = "\\n"; - ss << line << ": " << TK_STR(type) << " '" << raw << "'"; - return ss.str(); - } -}; - -// https://docs.python.org/3/reference/expressions.html -enum Precedence { - PREC_NONE, - PREC_ASSIGNMENT, // = - PREC_COMMA, // , - PREC_TERNARY, // ?: - PREC_LOGICAL_OR, // or - PREC_LOGICAL_AND, // and - PREC_LOGICAL_NOT, // not - PREC_EQUALITY, // == != - PREC_TEST, // in / is / is not / not in - PREC_COMPARISION, // < > <= >= - PREC_BITWISE_OR, // | - PREC_BITWISE_XOR, // ^ - PREC_BITWISE_AND, // & - PREC_BITWISE_SHIFT, // << >> - PREC_TERM, // + - - PREC_FACTOR, // * / % // - PREC_UNARY, // - not - PREC_EXPONENT, // ** - PREC_CALL, // () - PREC_SUBSCRIPT, // [] - PREC_ATTRIB, // .index - PREC_PRIMARY, -}; - -// The context of the parsing phase for the compiler. -struct Parser { - shared_ptr src; - - const char* token_start; - const char* curr_char; - int current_line = 1; - Token prev, curr; - queue nexts; - stack indents; - - int brackets_level = 0; - - Token next_token(){ - if(nexts.empty()){ - return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line}; - } - Token t = nexts.front(); - if(t.type == TK("@eof") && indents.size()>1){ - nexts.pop(); - indents.pop(); - return Token{TK("@dedent"), token_start, 0, current_line}; - } - nexts.pop(); - return t; - } - - char peekchar() const{ return *curr_char; } - - bool match_n_chars(int n, char c0){ - const char* c = curr_char; - for(int i=0; i 0) return true; - int spaces = eat_spaces(); - if(peekchar() == '#') skip_line_comment(); - if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; - // https://docs.python.org/3/reference/lexical_analysis.html#indentation - if(spaces > indents.top()){ - indents.push(spaces); - nexts.push(Token{TK("@indent"), token_start, 0, current_line}); - } else if(spaces < indents.top()){ - while(spaces < indents.top()){ - indents.pop(); - nexts.push(Token{TK("@dedent"), token_start, 0, current_line}); - } - if(spaces != indents.top()){ - return false; - } - } - return true; - } - - char eatchar() { - char c = peekchar(); - if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); - curr_char++; - return c; - } - - char eatchar_include_newline() { - char c = peekchar(); - curr_char++; - if (c == '\n'){ - current_line++; - src->line_starts.push_back(curr_char); - } - return c; - } - - int eat_name() { - curr_char--; - while(true){ - uint8_t c = peekchar(); - int u8bytes = 0; - if((c & 0b10000000) == 0b00000000) u8bytes = 1; - else if((c & 0b11100000) == 0b11000000) u8bytes = 2; - else if((c & 0b11110000) == 0b11100000) u8bytes = 3; - else if((c & 0b11111000) == 0b11110000) u8bytes = 4; - else return 1; - if(u8bytes == 1){ - if(isalpha(c) || c=='_' || isdigit(c)) { - curr_char++; - continue; - }else{ - break; - } - } - // handle multibyte char - std::string u8str(curr_char, u8bytes); - if(u8str.size() != u8bytes) return 2; - uint32_t value = 0; - for(int k=0; k < u8bytes; k++){ - uint8_t b = u8str[k]; - if(k==0){ - if(u8bytes == 2) value = (b & 0b00011111) << 6; - else if(u8bytes == 3) value = (b & 0b00001111) << 12; - else if(u8bytes == 4) value = (b & 0b00000111) << 18; - }else{ - value |= (b & 0b00111111) << (6*(u8bytes-k-1)); - } - } - if(is_unicode_Lo_char(value)) curr_char += u8bytes; - else break; - } - - int length = (int)(curr_char - token_start); - if(length == 0) return 3; - std::string_view name(token_start, length); - - if(src->mode == JSON_MODE){ - if(name == "true"){ - set_next_token(TK("True")); - } else if(name == "false"){ - set_next_token(TK("False")); - } else if(name == "null"){ - set_next_token(TK("None")); - } else { - return 4; - } - return 0; - } - - if(kTokenKwMap.count(name)){ - if(name == "not"){ - if(strncmp(curr_char, " in", 3) == 0){ - curr_char += 3; - set_next_token(TK("not in")); - return 0; - } - }else if(name == "is"){ - if(strncmp(curr_char, " not", 4) == 0){ - curr_char += 4; - set_next_token(TK("is not")); - return 0; - } - } - set_next_token(kTokenKwMap.at(name)); - } else { - set_next_token(TK("@id")); - } - return 0; - } - - void skip_line_comment() { - char c; - while ((c = peekchar()) != '\0') { - if (c == '\n') return; - eatchar(); - } - } - - bool matchchar(char c) { - if (peekchar() != c) return false; - eatchar_include_newline(); - return true; - } - - void set_next_token(TokenIndex type, PyObject* value=nullptr) { - switch(type){ - case TK("{"): case TK("["): case TK("("): brackets_level++; break; - case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; - } - nexts.push( Token{ - type, - token_start, - (int)(curr_char - token_start), - current_line - ((type == TK("@eol")) ? 1 : 0), - value - }); - } - - void set_next_token_2(char c, TokenIndex one, TokenIndex two) { - if (matchchar(c)) set_next_token(two); - else set_next_token(one); - } - - Parser(shared_ptr src) { - this->src = src; - this->token_start = src->source; - this->curr_char = src->source; - this->nexts.push(Token{TK("@sof"), token_start, 0, current_line}); - this->indents.push(0); - } -}; - -} // namespace pkpy \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h index bc6a8109..f5fe5aac 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -760,6 +760,7 @@ inline void add_module_gc(VM* vm){ inline void VM::post_init(){ init_builtins(this); +#if !DEBUG_NO_BUILTIN_MODULES add_module_sys(this); add_module_time(this); add_module_json(this); @@ -793,6 +794,7 @@ inline void VM::post_init(){ const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])]; return VAR(info.name); })); +#endif } } // namespace pkpy diff --git a/src/vm.h b/src/vm.h index 749f321b..da1c1d05 100644 --- a/src/vm.h +++ b/src/vm.h @@ -93,7 +93,7 @@ public: } Frame* top_frame() const { -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); #endif return callstack.top().get(); @@ -166,7 +166,7 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); - if(_module == _main) std::cout << disassemble(code) << '\n'; + // if(_module == _main) std::cout << disassemble(code) << '\n'; return _exec(code, _module); }catch (const Exception& e){ *_stderr << e.summary() << '\n'; From c607d11bd68ab76ac71db50a47dd5e626500e617 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 23:47:33 +0800 Subject: [PATCH 19/73] up up up up up --- src/codeobject.h | 20 -- src/common.h | 13 +- src/compiler.h | 756 ++++++++++++++++++++++++++++------------------- src/error.h | 15 +- src/expr.h | 149 ++++++++-- src/lexer.h | 10 +- src/main.cpp | 1 - src/vm.h | 46 +-- 8 files changed, 625 insertions(+), 385 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index fcd503bb..b742dba6 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -92,26 +92,6 @@ struct CodeObject { void _mark() const { for(PyObject* v : consts) OBJ_MARK(v); } - - /************************************************/ - int _curr_block_i = 0; - int _rvalue = 0; - bool _is_compiling_class = false; - bool _is_curr_block_loop() const { - return blocks[_curr_block_i].type == FOR_LOOP || blocks[_curr_block_i].type == WHILE_LOOP; - } - - void _enter_block(CodeBlockType type){ - blocks.push_back(CodeBlock{type, _curr_block_i, (int)codes.size()}); - _curr_block_i = blocks.size()-1; - } - - void _exit_block(){ - blocks[_curr_block_i].end = codes.size(); - _curr_block_i = blocks[_curr_block_i].parent; - if(_curr_block_i < 0) UNREACHABLE(); - } - /************************************************/ }; diff --git a/src/common.h b/src/common.h index 1e022115..9ca65136 100644 --- a/src/common.h +++ b/src/common.h @@ -68,8 +68,7 @@ struct Type { operator int() const noexcept { return this->index; } }; -//#define THREAD_LOCAL thread_local -#define THREAD_LOCAL +#define THREAD_LOCAL // thread_local #define CPP_LAMBDA(x) ([](VM* vm, Args& args) { return x; }) #define CPP_NOT_IMPLEMENTED() ([](VM* vm, Args& args) { vm->NotImplementedError(); return vm->None; }) @@ -79,9 +78,9 @@ struct Type { #define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!"); #endif -const float kLocalsLoadFactor = 0.67f; -const float kInstAttrLoadFactor = 0.67f; -const float kTypeAttrLoadFactor = 0.5f; +inline const float kLocalsLoadFactor = 0.67f; +inline const float kInstAttrLoadFactor = 0.67f; +inline const float kTypeAttrLoadFactor = 0.5f; static_assert(sizeof(i64) == sizeof(int*)); static_assert(sizeof(f64) == sizeof(int*)); @@ -114,7 +113,11 @@ public: size_t size() const { return vec.size(); } T& top(){ return vec.back(); } const T& top() const { return vec.back(); } + T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; } const std::vector& data() const { return vec; } }; +struct Expression; +typedef std::unique_ptr Expression_; + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index 9510e316..bc58348f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -5,29 +5,82 @@ #include "lexer.h" #include "error.h" #include "ceval.h" +#include "expr.h" +#include "obj.h" +#include "str.h" namespace pkpy{ class Compiler; -typedef void (Compiler::*GrammarFn)(); -typedef void (Compiler::*CompilerAction)(); +typedef void (Compiler::*PrattCallback)(); -struct GrammarRule{ - GrammarFn prefix; - GrammarFn infix; +struct PrattRule{ + PrattCallback prefix; + PrattCallback infix; Precedence precedence; }; +struct CodeEmitContext{ + CodeObject_ co; + stack s_expr; + + CodeEmitContext(CodeObject_ co): co(co) {} + + int curr_block_i = 0; + bool is_compiling_class = false; + + bool is_curr_block_loop() const { + return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; + } + + void enter_block(CodeBlockType type){ + co->blocks.push_back(CodeBlock{ + type, curr_block_i, (int)co->codes.size() + }); + curr_block_i = co->blocks.size()-1; + } + + void exit_block(){ + co->blocks[curr_block_i].end = co->codes.size(); + curr_block_i = co->blocks[curr_block_i].parent; + if(curr_block_i < 0) UNREACHABLE(); + } + + // clear the expression stack and generate bytecode + void emit_expr(){ + if(s_expr.size() != 1) UNREACHABLE(); + Expression_ expr = s_expr.popx(); + // emit + // ... + } +}; + class Compiler { std::unique_ptr lexer; - stack codes; + stack contexts; + std::map rules; bool used = false; VM* vm; - std::map rules; - CodeObject_ co() const{ return codes.top(); } + CodeObject* co() const{ return contexts.top().co.get(); } + CodeEmitContext* ctx() { return &contexts.top(); } CompileMode mode() const{ return lexer->src->mode; } - NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } + NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } + + template + CodeObject_ push_context(Args&&... args){ + CodeObject_ co = make_sp(std::forward(args)...); + contexts.push(CodeEmitContext(co)); + return co; + } + + void pop_context(){ + if(!ctx()->s_expr.empty()){ + ctx()->emit_expr(); + } + ctx()->co->optimize(vm); + contexts.pop(); + } public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ @@ -41,8 +94,8 @@ public: #define NO_INFIX nullptr, PREC_NONE for(TokenIndex i=0; i>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA }; + rules[TK(":")] = { nullptr, METHOD(exprSlice), PREC_SLICE }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; @@ -107,7 +161,7 @@ private: const Token& prev() { return tokens.at(i-1); } const Token& curr() { return tokens.at(i); } const Token& next() { return tokens.at(i+1); } - const Token& peek(int offset=0) { return tokens.at(i+offset); } + const Token& peek(int offset) { return tokens.at(i+offset); } void advance() { i++; } bool match(TokenIndex expected) { @@ -131,7 +185,7 @@ private: consumed = true; } if (repl_throw && curr().type == TK("@eof")){ - throw NeedMoreLines(co()->_is_compiling_class); + throw NeedMoreLines(ctx()->is_compiling_class); } return consumed; } @@ -159,238 +213,305 @@ private: } } - void exprLiteral() { - PyObject* value = get_value(prev()); - int index = co()->add_const(value); - emit(OP_LOAD_CONST, index); + void exprLiteral(){ + ctx()->s_expr.push( + std::make_unique(prev().value) + ); + // PyObject* value = get_value(prev()); + // int index = co()->add_const(value); + // emit(OP_LOAD_CONST, index); } - void exprFString() { - static const std::regex pattern(R"(\{(.*?)\})"); - PyObject* value = get_value(prev()); - Str s = CAST(Str, value); - std::sregex_iterator begin(s.begin(), s.end(), pattern); - std::sregex_iterator end; - int size = 0; - int i = 0; - for(auto it = begin; it != end; it++) { - std::smatch m = *it; - if (i < m.position()) { - std::string literal = s.substr(i, m.position() - i); - emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - size++; - } - emit(OP_LOAD_EVAL_FN); - emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); - emit(OP_CALL, 1); - size++; - i = (int)(m.position() + m.length()); - } - if (i < s.size()) { - std::string literal = s.substr(i, s.size() - i); - emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - size++; - } - emit(OP_BUILD_STRING, size); + void exprFString(){ + ctx()->s_expr.push( + std::make_unique(std::get(prev().value)) + ); + // static const std::regex pattern(R"(\{(.*?)\})"); + // PyObject* value = get_value(prev()); + // Str s = CAST(Str, value); + // std::sregex_iterator begin(s.begin(), s.end(), pattern); + // std::sregex_iterator end; + // int size = 0; + // int i = 0; + // for(auto it = begin; it != end; it++) { + // std::smatch m = *it; + // if (i < m.position()) { + // std::string literal = s.substr(i, m.position() - i); + // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); + // size++; + // } + // emit(OP_LOAD_EVAL_FN); + // emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); + // emit(OP_CALL, 1); + // size++; + // i = (int)(m.position() + m.length()); + // } + // if (i < s.size()) { + // std::string literal = s.substr(i, s.size() - i); + // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); + // size++; + // } + // emit(OP_BUILD_STRING, size); } - void exprLambda() { + void emit_expr(){} + + void exprLambda(){ Function func; func.name = ""; if(!match(TK(":"))){ _compile_f_args(func, false); consume(TK(":")); } - func.code = make_sp(lexer->src, func.name.str()); - this->codes.push(func.code); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; + func.code = push_context(lexer->src, func.name.str()); + EXPR(); + emit_expr(); emit(OP_RETURN_VALUE); - func.code->optimize(vm); - this->codes.pop(); - emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); + pop_context(); + + ctx()->s_expr.push( + std::make_unique(std::move(func), name_scope()) + ); + + // emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); + // if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); } - void exprAssign() { - if(co()->codes.empty()) UNREACHABLE(); - bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; - int _name_arg = co()->codes.back().arg; - // if the last op is OP_LOAD_NAME_REF, remove it - // because we will emit OP_STORE_NAME or OP_STORE_CLASS_ATTR - if(is_load_name_ref) co()->codes.pop_back(); - - co()->_rvalue += 1; + void exprAssign(){ + Expression_ lhs = ctx()->s_expr.popx(); TokenIndex op = prev().type; - if(op == TK("=")) { // a = (expr) - EXPR_TUPLE(); - if(is_load_name_ref){ - auto op = co()->_is_compiling_class ? OP_STORE_CLASS_ATTR : OP_STORE_NAME; - emit(op, _name_arg); - }else{ - if(co()->_is_compiling_class) SyntaxError(); - emit(OP_STORE_REF); - } - }else{ // a += (expr) -> a = a + (expr) - if(co()->_is_compiling_class) SyntaxError(); - if(is_load_name_ref){ - emit(OP_LOAD_NAME, _name_arg); - }else{ - emit(OP_DUP_TOP_VALUE); - } - EXPR(); - switch (op) { - case TK("+="): emit(OP_BINARY_OP, 0); break; - case TK("-="): emit(OP_BINARY_OP, 1); break; - case TK("*="): emit(OP_BINARY_OP, 2); break; - case TK("/="): emit(OP_BINARY_OP, 3); break; - case TK("//="): emit(OP_BINARY_OP, 4); break; - case TK("%="): emit(OP_BINARY_OP, 5); break; - case TK("<<="): emit(OP_BITWISE_OP, 0); break; - case TK(">>="): emit(OP_BITWISE_OP, 1); break; - case TK("&="): emit(OP_BITWISE_OP, 2); break; - case TK("|="): emit(OP_BITWISE_OP, 3); break; - case TK("^="): emit(OP_BITWISE_OP, 4); break; - default: UNREACHABLE(); - } - if(is_load_name_ref){ - emit(OP_STORE_NAME, _name_arg); - }else{ - emit(OP_STORE_REF); - } + EXPR_TUPLE(); + if(op == TK("=")){ + ctx()->s_expr.push( + std::make_unique(std::move(lhs), ctx()->s_expr.popx()) + ); + }else{ + // += -= ... + ctx()->s_expr.push( + std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) + ); } - co()->_rvalue -= 1; + + // if(co()->codes.empty()) UNREACHABLE(); + // bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; + // int _name_arg = co()->codes.back().arg; + // // if the last op is OP_LOAD_NAME_REF, remove it + // // because we will emit OP_STORE_NAME or OP_STORE_CLASS_ATTR + // if(is_load_name_ref) co()->codes.pop_back(); + + // co()->_rvalue += 1; + // TokenIndex op = prev().type; + // if(op == TK("=")) { // a = (expr) + // EXPR_TUPLE(); + // if(is_load_name_ref){ + // auto op = ctx()->is_compiling_class ? OP_STORE_CLASS_ATTR : OP_STORE_NAME; + // emit(op, _name_arg); + // }else{ + // if(ctx()->is_compiling_class) SyntaxError(); + // emit(OP_STORE_REF); + // } + // }else{ // a += (expr) -> a = a + (expr) + // if(ctx()->is_compiling_class) SyntaxError(); + // if(is_load_name_ref){ + // emit(OP_LOAD_NAME, _name_arg); + // }else{ + // emit(OP_DUP_TOP_VALUE); + // } + // EXPR(); + // switch (op) { + // case TK("+="): emit(OP_BINARY_OP, 0); break; + // case TK("-="): emit(OP_BINARY_OP, 1); break; + // case TK("*="): emit(OP_BINARY_OP, 2); break; + // case TK("/="): emit(OP_BINARY_OP, 3); break; + // case TK("//="): emit(OP_BINARY_OP, 4); break; + // case TK("%="): emit(OP_BINARY_OP, 5); break; + // case TK("<<="): emit(OP_BITWISE_OP, 0); break; + // case TK(">>="): emit(OP_BITWISE_OP, 1); break; + // case TK("&="): emit(OP_BITWISE_OP, 2); break; + // case TK("|="): emit(OP_BITWISE_OP, 3); break; + // case TK("^="): emit(OP_BITWISE_OP, 4); break; + // default: UNREACHABLE(); + // } + // if(is_load_name_ref){ + // emit(OP_STORE_NAME, _name_arg); + // }else{ + // emit(OP_STORE_REF); + // } + // } + // co()->_rvalue -= 1; } - void exprComma() { + void exprSlice(){ + } + + void exprComma(){ int size = 1; // an expr is in the stack now do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok size++; } while(match(TK(","))); - emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); + std::vector items(size); + for(int i=size-1; i>=0; i--) items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push( + std::make_unique(std::move(items)) + ); + // emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); } - void exprOr() { - int patch = emit(OP_JUMP_IF_TRUE_OR_POP); + void exprOr(){ + Expression_ lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_OR); - patch_jump(patch); + ctx()->s_expr.push( + std::make_unique(std::move(lhs), ctx()->s_expr.popx()) + ); + + // int patch = emit(OP_JUMP_IF_TRUE_OR_POP); + // parse_expression(PREC_LOGICAL_OR); + // patch_jump(patch); } - void exprAnd() { - int patch = emit(OP_JUMP_IF_FALSE_OR_POP); + void exprAnd(){ + Expression_ lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_AND); - patch_jump(patch); + ctx()->s_expr.push( + std::make_unique(std::move(lhs), ctx()->s_expr.popx()) + ); + // int patch = emit(OP_JUMP_IF_FALSE_OR_POP); + // parse_expression(PREC_LOGICAL_AND); + // patch_jump(patch); } - void exprTernary() { - int patch = emit(OP_POP_JUMP_IF_FALSE); + void exprTernary(){ + Expression_ cond = ctx()->s_expr.popx(); EXPR(); // if true - int patch2 = emit(OP_JUMP_ABSOLUTE); + Expression_ true_expr = ctx()->s_expr.popx(); consume(TK(":")); - patch_jump(patch); EXPR(); // if false - patch_jump(patch2); + Expression_ false_expr = ctx()->s_expr.popx(); + ctx()->s_expr.push( + std::make_unique(std::move(cond), std::move(true_expr), std::move(false_expr)) + ); + // int patch = emit(OP_POP_JUMP_IF_FALSE); + // EXPR(); // if true + // int patch2 = emit(OP_JUMP_ABSOLUTE); + // consume(TK(":")); + // patch_jump(patch); + // EXPR(); // if false + // patch_jump(patch2); } - void exprBinaryOp() { + void exprBinaryOp(){ TokenIndex op = prev().type; + Expression_ lhs = ctx()->s_expr.popx(); parse_expression((Precedence)(rules[op].precedence + 1)); + ctx()->s_expr.push( + std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) + ); + // switch (op) { + // case TK("+"): emit(OP_BINARY_OP, 0); break; + // case TK("-"): emit(OP_BINARY_OP, 1); break; + // case TK("*"): emit(OP_BINARY_OP, 2); break; + // case TK("/"): emit(OP_BINARY_OP, 3); break; + // case TK("//"): emit(OP_BINARY_OP, 4); break; + // case TK("%"): emit(OP_BINARY_OP, 5); break; + // case TK("**"): emit(OP_BINARY_OP, 6); break; - switch (op) { - case TK("+"): emit(OP_BINARY_OP, 0); break; - case TK("-"): emit(OP_BINARY_OP, 1); break; - case TK("*"): emit(OP_BINARY_OP, 2); break; - case TK("/"): emit(OP_BINARY_OP, 3); break; - case TK("//"): emit(OP_BINARY_OP, 4); break; - case TK("%"): emit(OP_BINARY_OP, 5); break; - case TK("**"): emit(OP_BINARY_OP, 6); break; + // case TK("<"): emit(OP_COMPARE_OP, 0); break; + // case TK("<="): emit(OP_COMPARE_OP, 1); break; + // case TK("=="): emit(OP_COMPARE_OP, 2); break; + // case TK("!="): emit(OP_COMPARE_OP, 3); break; + // case TK(">"): emit(OP_COMPARE_OP, 4); break; + // case TK(">="): emit(OP_COMPARE_OP, 5); break; + // case TK("in"): emit(OP_CONTAINS_OP, 0); break; + // case TK("not in"): emit(OP_CONTAINS_OP, 1); break; + // case TK("is"): emit(OP_IS_OP, 0); break; + // case TK("is not"): emit(OP_IS_OP, 1); break; - case TK("<"): emit(OP_COMPARE_OP, 0); break; - case TK("<="): emit(OP_COMPARE_OP, 1); break; - case TK("=="): emit(OP_COMPARE_OP, 2); break; - case TK("!="): emit(OP_COMPARE_OP, 3); break; - case TK(">"): emit(OP_COMPARE_OP, 4); break; - case TK(">="): emit(OP_COMPARE_OP, 5); break; - case TK("in"): emit(OP_CONTAINS_OP, 0); break; - case TK("not in"): emit(OP_CONTAINS_OP, 1); break; - case TK("is"): emit(OP_IS_OP, 0); break; - case TK("is not"): emit(OP_IS_OP, 1); break; - - case TK("<<"): emit(OP_BITWISE_OP, 0); break; - case TK(">>"): emit(OP_BITWISE_OP, 1); break; - case TK("&"): emit(OP_BITWISE_OP, 2); break; - case TK("|"): emit(OP_BITWISE_OP, 3); break; - case TK("^"): emit(OP_BITWISE_OP, 4); break; - default: UNREACHABLE(); - } + // case TK("<<"): emit(OP_BITWISE_OP, 0); break; + // case TK(">>"): emit(OP_BITWISE_OP, 1); break; + // case TK("&"): emit(OP_BITWISE_OP, 2); break; + // case TK("|"): emit(OP_BITWISE_OP, 3); break; + // case TK("^"): emit(OP_BITWISE_OP, 4); break; + // default: UNREACHABLE(); + // } } void exprNot() { parse_expression((Precedence)(PREC_LOGICAL_NOT + 1)); - emit(OP_UNARY_NOT); + ctx()->s_expr.push( + std::make_unique(ctx()->s_expr.popx()) + ); + // emit(OP_UNARY_NOT); } - void exprUnaryOp() { - TokenIndex op = prev().type; + void exprUnaryOp(){ + TokenIndex type = prev().type; parse_expression((Precedence)(PREC_UNARY + 1)); - switch (op) { - case TK("-"): emit(OP_UNARY_NEGATIVE); break; - case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; - default: UNREACHABLE(); - } + ctx()->s_expr.push( + std::make_unique(type, ctx()->s_expr.popx()) + ); + // switch (type) { + // case TK("-"): emit(OP_UNARY_NEGATIVE); break; + // case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; + // default: UNREACHABLE(); + // } } - void exprGrouping() { + // () is just for change precedence, so we don't need to push it into stack + void exprGroup(){ match_newlines(mode()==REPL_MODE); EXPR_TUPLE(); match_newlines(mode()==REPL_MODE); consume(TK(")")); } - void _consume_comp(Opcode op0, Opcode op1, int _patch, int _body_start){ - int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); - int _body_end = co()->codes.size(); - co()->codes[_patch].op = OP_JUMP_ABSOLUTE; - co()->codes[_patch].arg = _body_end; - emit(op0, 0); - EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); - match_newlines(mode()==REPL_MODE); + // void _consume_comp(Opcode op0, Opcode op1, int _patch, int _body_start){ + // int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); + // int _body_end = co()->codes.size(); + // co()->codes[_patch].op = OP_JUMP_ABSOLUTE; + // co()->codes[_patch].arg = _body_end; + // emit(op0, 0); + // EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); + // match_newlines(mode()==REPL_MODE); - int _skipPatch = emit(OP_JUMP_ABSOLUTE); - int _cond_start = co()->codes.size(); - int _cond_end_return = -1; - if(match(TK("if"))) { - EXPR_TUPLE(); - _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); - } - patch_jump(_skipPatch); + // int _skipPatch = emit(OP_JUMP_ABSOLUTE); + // int _cond_start = co()->codes.size(); + // int _cond_end_return = -1; + // if(match(TK("if"))) { + // EXPR_TUPLE(); + // _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); + // } + // patch_jump(_skipPatch); - emit(OP_GET_ITER); - co()->_enter_block(FOR_LOOP); - emit(OP_FOR_ITER); + // emit(OP_GET_ITER); + // co()->_enter_block(FOR_LOOP); + // emit(OP_FOR_ITER); - if(_cond_end_return != -1) { // there is an if condition - emit(OP_JUMP_ABSOLUTE, _cond_start); - patch_jump(_cond_end_return); - int ifpatch = emit(OP_POP_JUMP_IF_FALSE); - emit(OP_JUMP_ABSOLUTE, _body_start); - patch_jump(_body_end_return); - emit(op1); - patch_jump(ifpatch); - }else{ - emit(OP_JUMP_ABSOLUTE, _body_start); - patch_jump(_body_end_return); - emit(op1); - } + // if(_cond_end_return != -1) { // there is an if condition + // emit(OP_JUMP_ABSOLUTE, _cond_start); + // patch_jump(_cond_end_return); + // int ifpatch = emit(OP_POP_JUMP_IF_FALSE); + // emit(OP_JUMP_ABSOLUTE, _body_start); + // patch_jump(_body_end_return); + // emit(op1); + // patch_jump(ifpatch); + // }else{ + // emit(OP_JUMP_ABSOLUTE, _body_start); + // patch_jump(_body_end_return); + // emit(op1); + // } + + // emit(OP_LOOP_CONTINUE, -1, true); + // co()->_exit_block(); + // match_newlines(mode()==REPL_MODE); + // } + + template + void _consume_comp(){ - emit(OP_LOOP_CONTINUE, -1, true); - co()->_exit_block(); - match_newlines(mode()==REPL_MODE); } void exprList() { - int _patch = emit(OP_NO_OP); - int _body_start = co()->codes.size(); int ARGC = 0; do { match_newlines(mode()==REPL_MODE); @@ -398,19 +519,38 @@ private: EXPR(); ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))){ - _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); + _consume_comp(); consume(TK("]")); return; } } while (match(TK(","))); match_newlines(mode()==REPL_MODE); consume(TK("]")); - emit(OP_BUILD_LIST, ARGC); + auto list_expr = std::make_unique(); + list_expr->items.resize(ARGC); + for(int i=ARGC-1; i>=0; i--) list_expr->items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(list_expr)); + + // int _patch = emit(OP_NO_OP); + // int _body_start = co()->codes.size(); + // int ARGC = 0; + // do { + // match_newlines(mode()==REPL_MODE); + // if (curr().type == TK("]")) break; + // EXPR(); ARGC++; + // match_newlines(mode()==REPL_MODE); + // if(ARGC == 1 && match(TK("for"))){ + // _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); + // consume(TK("]")); + // return; + // } + // } while (match(TK(","))); + // match_newlines(mode()==REPL_MODE); + // consume(TK("]")); + // emit(OP_BUILD_LIST, ARGC); } void exprMap() { - int _patch = emit(OP_NO_OP); - int _body_start = co()->codes.size(); bool parsing_dict = false; int ARGC = 0; do { @@ -421,78 +561,113 @@ private: if(parsing_dict){ consume(TK(":")); EXPR(); + Expression_ value = ctx()->s_expr.popx(); + ctx()->s_expr.push( + std::make_unique(ctx()->s_expr.popx(), std::move(value)) + ); } ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))){ - if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); - else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); + if(parsing_dict) _consume_comp(); + else _consume_comp(); consume(TK("}")); return; } } while (match(TK(","))); consume(TK("}")); + if(ARGC == 0 || parsing_dict){ + auto e = std::make_unique(); + e->items.resize(ARGC); + for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + }else{ + auto e = std::make_unique(); + e->items.resize(ARGC); + for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } + // int _patch = emit(OP_NO_OP); + // int _body_start = co()->codes.size(); + // bool parsing_dict = false; + // int ARGC = 0; + // do { + // match_newlines(mode()==REPL_MODE); + // if (curr().type == TK("}")) break; + // EXPR(); + // if(curr().type == TK(":")) parsing_dict = true; + // if(parsing_dict){ + // consume(TK(":")); + // EXPR(); + // } + // ARGC++; + // match_newlines(mode()==REPL_MODE); + // if(ARGC == 1 && match(TK("for"))){ + // if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); + // else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); + // consume(TK("}")); + // return; + // } + // } while (match(TK(","))); + // consume(TK("}")); - if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); - else emit(OP_BUILD_SET, ARGC); + // if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); + // else emit(OP_BUILD_SET, ARGC); } void exprCall() { - int ARGC = 0; - int KWARGC = 0; - bool need_unpack = false; + auto e = std::make_unique(); do { match_newlines(mode()==REPL_MODE); - if (curr().type == TK(")")) break; - if(curr().type == TK("@id") && next().type == TK("=")) { + if (curr().type==TK(")")) break; + if(curr().type==TK("@id") && next().type==TK("=")) { consume(TK("@id")); - const Str& key = prev().str(); - emit(OP_LOAD_CONST, co()->add_const(VAR(key))); + Str key = prev().str(); + // emit(OP_LOAD_CONST, co()->add_const(VAR(key))); consume(TK("=")); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - KWARGC++; + EXPR(); + e->kwargs.push_back({key, ctx()->s_expr.popx()}); } else{ - if(KWARGC > 0) SyntaxError("positional argument follows keyword argument"); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; - ARGC++; + if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument"); + EXPR(); + // if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; + e->args.push_back(ctx()->s_expr.popx()); } match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK(")")); - if(ARGC > 32767) SyntaxError("too many positional arguments"); - if(KWARGC > 32767) SyntaxError("too many keyword arguments"); - if(KWARGC > 0){ - emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); - }else{ - emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); - } + ctx()->s_expr.push(std::move(e)); + // if(ARGC > 32767) SyntaxError("too many positional arguments"); + // if(KWARGC > 32767) SyntaxError("too many keyword arguments"); + // if(KWARGC > 0){ + // emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); + // }else{ + // emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); + // } } - void exprName(){ _exprName(false); } - - void _exprName(bool force_lvalue) { - const Token& tkname = prev(); - int index = co()->add_name(tkname.str(), name_scope()); - bool fast_load = !force_lvalue && co()->_rvalue>0; - emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index); + void exprName(){ + ctx()->s_expr.push( + std::make_unique(prev().str(), name_scope()) + ); } void exprAttrib() { consume(TK("@id")); - const Str& name = prev().str(); - int index = co()->add_name(name, NAME_ATTR); - emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index); + ctx()->s_expr.push( + std::make_unique(ctx()->s_expr.popx(), prev().str()) + ); } // [:], [:b] // [a], [a:], [a:b] - void exprSubscript() { + void exprSubscr() { + Expression_ a = nullptr; + Expression_ b = nullptr; if(match(TK(":"))){ - emit(OP_LOAD_NONE); - if(match(TK("]"))){ - emit(OP_LOAD_NONE); - }else{ + if(match(TK("]"))){ // [:] + + }else{ // [:b] EXPR_TUPLE(); consume(TK("]")); } @@ -500,36 +675,31 @@ private: }else{ EXPR_TUPLE(); if(match(TK(":"))){ - if(match(TK("]"))){ + if(match(TK("]"))){ // [a:] emit(OP_LOAD_NONE); - }else{ + }else{ // [a:b] EXPR_TUPLE(); consume(TK("]")); } emit(OP_BUILD_SLICE); - }else{ + }else{ // [a] consume(TK("]")); } } - emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); + // emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); } void exprValue() { - TokenIndex op = prev().type; - switch (op) { - case TK("None"): emit(OP_LOAD_NONE); break; - case TK("True"): emit(OP_LOAD_TRUE); break; - case TK("False"): emit(OP_LOAD_FALSE); break; - case TK("..."): emit(OP_LOAD_ELLIPSIS); break; - default: UNREACHABLE(); - } + ctx()->s_expr.push( + std::make_unique(prev().type) + ); } int emit(Opcode opcode, int arg=-1, bool keepline=false) { int line = prev().line; co()->codes.push_back( - Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line} + Bytecode{(uint8_t)opcode, (uint16_t)ctx()->curr_block_i, arg, line} ); int i = co()->codes.size() - 1; if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; @@ -541,11 +711,10 @@ private: co()->codes[addr_index].arg = target; } - void compile_block_body(CompilerAction action=nullptr) { - if(action == nullptr) action = &Compiler::compile_stmt; + void compile_block_body() { consume(TK(":")); if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ - (this->*action)(); // inline block + compile_stmt(); // inline block return; } if(!match_newlines(mode()==REPL_MODE)){ @@ -554,7 +723,7 @@ private: consume(TK("@indent")); while (curr().type != TK("@dedent")) { match_newlines(); - (this->*action)(); + compile_stmt(); match_newlines(); } consume(TK("@dedent")); @@ -612,20 +781,17 @@ private: // a = 1 + 2 // ['a', '1', '2', '+', '='] // - void parse_expression(Precedence precedence) { + void parse_expression(Precedence precedence, bool allowslice=false) { advance(); - GrammarFn prefix = rules[prev().type].prefix; + PrattCallback prefix = rules[prev().type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); (this->*prefix)(); - bool meet_assign_token = false; + // rhs of = cannot be a AssignExpr or InplaceAssignExpr while (rules[curr().type].precedence >= precedence) { + TokenIndex op = curr().type; advance(); - TokenIndex op = prev().type; - if (op == TK("=")){ - if(meet_assign_token) SyntaxError(); - meet_assign_token = true; - } - GrammarFn infix = rules[op].infix; + if (op == TK(":") && !allowslice) SyntaxError(); + PrattCallback infix = rules[op].infix; if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); } @@ -633,9 +799,8 @@ private: void compile_if_stmt() { match_newlines(); - co()->_rvalue += 1; - EXPR_TUPLE(); // condition - co()->_rvalue -= 1; + EXPR(); // condition + emit_expr(); int ifpatch = emit(OP_POP_JUMP_IF_FALSE); compile_block_body(); @@ -655,44 +820,45 @@ private: } void compile_while_loop() { - co()->_enter_block(WHILE_LOOP); - co()->_rvalue += 1; - EXPR_TUPLE(); // condition - co()->_rvalue -= 1; + ctx()->enter_block(WHILE_LOOP); + EXPR(); // condition + emit_expr(); int patch = emit(OP_POP_JUMP_IF_FALSE); compile_block_body(); emit(OP_LOOP_CONTINUE, -1, true); patch_jump(patch); - co()->_exit_block(); + ctx()->exit_block(); } void EXPR_FOR_VARS(){ int size = 0; do { consume(TK("@id")); - _exprName(true); size++; + int index = co()->add_name(prev().str(), name_scope()); + emit(OP_LOAD_NAME_REF, index); + size++; } while (match(TK(","))); if(size > 1) emit(OP_BUILD_TUPLE_REF, size); } void compile_for_loop() { EXPR_FOR_VARS();consume(TK("in")); - co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1; + EXPR_TUPLE(); emit_expr(); emit(OP_GET_ITER); - co()->_enter_block(FOR_LOOP); + ctx()->enter_block(FOR_LOOP); emit(OP_FOR_ITER); compile_block_body(); emit(OP_LOOP_CONTINUE, -1, true); - co()->_exit_block(); + ctx()->exit_block(); } void compile_try_except() { - co()->_enter_block(TRY_EXCEPT); + ctx()->enter_block(TRY_EXCEPT); emit(OP_TRY_BLOCK_ENTER); compile_block_body(); emit(OP_TRY_BLOCK_EXIT); std::vector patches = { emit(OP_JUMP_ABSOLUTE) }; - co()->_exit_block(); + ctx()->exit_block(); do { consume(TK("except")); @@ -714,29 +880,25 @@ private: void compile_stmt() { if (match(TK("break"))) { - if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop"); + if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); consume_end_stmt(); emit(OP_LOOP_BREAK); } else if (match(TK("continue"))) { - if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); + if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); consume_end_stmt(); emit(OP_LOOP_CONTINUE); } else if (match(TK("yield"))) { - if (codes.size() == 1) SyntaxError("'yield' outside function"); - co()->_rvalue += 1; - EXPR_TUPLE(); - co()->_rvalue -= 1; + if (contexts.size() <= 1) SyntaxError("'yield' outside function"); + EXPR_TUPLE(); emit_expr(); consume_end_stmt(); co()->is_generator = true; emit(OP_YIELD_VALUE, -1, true); } else if (match(TK("return"))) { - if (codes.size() == 1) SyntaxError("'return' outside function"); + if (contexts.size() <= 1) SyntaxError("'return' outside function"); if(match_end_stmt()){ emit(OP_LOAD_NONE); }else{ - co()->_rvalue += 1; - EXPR_TUPLE(); // return value - co()->_rvalue -= 1; + EXPR_TUPLE(); emit_expr(); consume_end_stmt(); } emit(OP_RETURN_VALUE, -1, true); @@ -763,11 +925,11 @@ private: } else if (match(TK("try"))) { compile_try_except(); } else if(match(TK("assert"))) { - co()->_rvalue += 1; - EXPR(); - if (match(TK(","))) EXPR(); - else emit(OP_LOAD_CONST, co()->add_const(VAR(""))); - co()->_rvalue -= 1; + EXPR_TUPLE(); emit_expr(); + // OP_CODE needs to change + + // if (match(TK(","))) EXPR(); + // else emit(OP_LOAD_CONST, co()->add_const(VAR(""))); emit(OP_ASSERT); consume_end_stmt(); } else if(match(TK("with"))){ @@ -845,9 +1007,9 @@ private: if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); else emit(OP_LOAD_NAME, super_cls_name_idx); emit(OP_BEGIN_CLASS, cls_name_idx); - co()->_is_compiling_class = true; + ctx()->is_compiling_class = true; compile_block_body(); - co()->_is_compiling_class = false; + ctx()->is_compiling_class = false; emit(OP_END_CLASS); } @@ -897,7 +1059,7 @@ private: StrName obj_name; consume(TK("@id")); func.name = prev().str(); - if(!co()->_is_compiling_class && match(TK("::"))){ + if(!ctx()->is_compiling_class && match(TK("::"))){ consume(TK("@id")); obj_name = func.name; func.name = prev().str(); @@ -910,14 +1072,12 @@ private: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = make_sp(lexer->src, func.name.str()); - this->codes.push(func.code); + func.code = push_context(lexer->src, func.name.str()); compile_block_body(); - func.code->optimize(vm); - this->codes.pop(); + pop_context(); emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); - if(!co()->_is_compiling_class){ + if(!ctx()->is_compiling_class){ if(obj_name.empty()){ if(has_decorator) emit(OP_CALL, 1); emit(OP_STORE_NAME, co()->add_name(func.name, name_scope())); @@ -952,28 +1112,27 @@ private: void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); } void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); } + void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); } public: CodeObject_ compile(){ - // can only be called once if(used) UNREACHABLE(); used = true; tokens = lexer->run(); - // if(lexer->src->filename == "tests/01_int.py"){ + // if(lexer->src->filename == ""){ // for(auto& t: tokens) std::cout << t.info() << std::endl; // } - CodeObject_ code = make_sp(lexer->src, lexer->src->filename); - codes.push(code); + CodeObject_ code = push_context(lexer->src, lexer->src->filename); advance(); // skip @sof, so prev() is always valid - match_newlines(); // skip leading '\n' + match_newlines(); // skip possible leading '\n' if(mode()==EVAL_MODE) { EXPR_TUPLE(); consume(TK("@eof")); - code->optimize(vm); + pop_context(); return code; }else if(mode()==JSON_MODE){ PyObject* value = read_literal(); @@ -982,7 +1141,8 @@ public: else if(match(TK("["))) exprList(); else SyntaxError("expect a JSON object or array"); consume(TK("@eof")); - return code; // no need to optimize for JSON decoding + pop_context(); + return code; } while (!match(TK("@eof"))) { @@ -993,7 +1153,7 @@ public: } match_newlines(); } - code->optimize(vm); + pop_context(); return code; } }; diff --git a/src/error.h b/src/error.h index d0732b50..34d4874d 100644 --- a/src/error.h +++ b/src/error.h @@ -1,6 +1,7 @@ #pragma once #include "namedict.h" +#include "str.h" #include "tuplelist.h" namespace pkpy{ @@ -22,7 +23,7 @@ enum CompileMode { }; struct SourceData { - const char* source; + std::string source; Str filename; std::vector line_starts; CompileMode mode; @@ -38,11 +39,17 @@ struct SourceData { } SourceData(const char* source, Str filename, CompileMode mode) { - source = strdup(source); // Skip utf8 BOM if there is any. if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; + // Remove all '\r' + StrStream ss; + while(*source != '\0'){ + if(*source != '\r') ss << *source; + source++; + } + this->filename = filename; - this->source = source; + this->source = ss.str(); line_starts.push_back(source); this->mode = mode; } @@ -65,8 +72,6 @@ struct SourceData { } return ss.str(); } - - ~SourceData() { free((void*)source); } }; class Exception { diff --git a/src/expr.h b/src/expr.h index 5e21f629..0ae035bf 100644 --- a/src/expr.h +++ b/src/expr.h @@ -2,107 +2,200 @@ #include "codeobject.h" #include "common.h" -#include "parser.h" +#include "lexer.h" #include "error.h" #include "ceval.h" -#include namespace pkpy{ -struct Expression; -typedef std::unique_ptr Expression_; - struct Expression{ - std::vector children; virtual Str to_string() const = 0; }; struct NameExpr: Expression{ Str name; NameScope scope; - NameExpr(Str name, NameScope scope): name(name), scope(scope) {} + NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {} + NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {} Str to_string() const override { return name; } }; -struct GroupExpr: Expression{ - Expression_ expr; - GroupExpr(Expression_ expr): expr(std::move(expr)) {} - Str to_string() const override { return "()"; } -}; - struct UnaryExpr: Expression{ TokenIndex op; - UnaryExpr(TokenIndex op): op(op) {} + Expression_ child; + UnaryExpr(TokenIndex op, Expression_&& child): op(op), child(std::move(child)) {} Str to_string() const override { return TK_STR(op); } }; struct NotExpr: Expression{ + Expression_ child; + NotExpr(Expression_&& child): child(std::move(child)) {} Str to_string() const override { return "not"; } }; struct AndExpr: Expression{ + Expression_ lhs; + Expression_ rhs; + AndExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} Str to_string() const override { return "and"; } }; struct OrExpr: Expression{ + Expression_ lhs; + Expression_ rhs; + OrExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} Str to_string() const override { return "or"; } }; -// None, True, False, ... -struct SpecialValueExpr: Expression{ +// [None, True, False, ...] +struct SpecialLiteralExpr: Expression{ TokenIndex token; - SpecialValueExpr(TokenIndex token): token(token) {} + SpecialLiteralExpr(TokenIndex token): token(token) {} Str to_string() const override { return TK_STR(token); } + + void gen(){ + // switch (token) { + // case TK("None"): emit(OP_LOAD_NONE); break; + // case TK("True"): emit(OP_LOAD_TRUE); break; + // case TK("False"): emit(OP_LOAD_FALSE); break; + // case TK("..."): emit(OP_LOAD_ELLIPSIS); break; + // default: UNREACHABLE(); + // } + } }; // @num, @str which needs to invoke OP_LOAD_CONST struct LiteralExpr: Expression{ - PyObject* value; - LiteralExpr(PyObject* value): value(value) {} + TokenValue value; + LiteralExpr(TokenValue value): value(value) {} Str to_string() const override { return "literal"; } }; +struct SliceExpr: Expression{ + Expression_ start; + Expression_ stop; + Expression_ step; + SliceExpr(Expression_&& start, Expression_&& stop, Expression_&& step): + start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {} + Str to_string() const override { return "slice"; } +}; + struct ListExpr: Expression{ + std::vector items; Str to_string() const override { return "[]"; } }; struct DictExpr: Expression{ + std::vector items; // each item is a DictItemExpr Str to_string() const override { return "{}"; } }; +struct SetExpr: Expression{ + std::vector items; + Str to_string() const override { return "{}"; } +}; + + +struct TupleExpr: Expression{ + std::vector items; + TupleExpr(std::vector&& items): items(std::move(items)) {} + Str to_string() const override { return "(a, b, c)"; } +}; + +struct CompExpr: Expression{ + Expression_ expr; // loop expr + Expression_ vars; // loop vars + Expression_ iter; // loop iter + Expression_ cond; // optional if condition + virtual void emit_expr() = 0; +}; + +// a:b +struct DictItemExpr: Expression{ + Expression_ key; + Expression_ value; + DictItemExpr(Expression_&& key, Expression_&& value) + : key(std::move(key)), value(std::move(value)) {} + Str to_string() const override { return "dict item"; } +}; + +struct ListCompExpr: CompExpr{ +}; + +struct DictCompExpr: CompExpr{ +}; + +struct SetCompExpr: CompExpr{ +}; + struct LambdaExpr: Expression{ + Function func; + NameScope scope; + LambdaExpr(Function&& func, NameScope scope): func(std::move(func)), scope(scope) {} Str to_string() const override { return "lambda"; } }; struct FStringExpr: Expression{ + Str src; + FStringExpr(const Str& src): src(src) {} Str to_string() const override { return "@fstr"; } }; +struct SubscrExpr: Expression{ + Expression_ a; + Expression_ b; + SubscrExpr(Expression_&& a, Expression_&& b): a(std::move(a)), b(std::move(b)) {} + Str to_string() const override { return "a[b]"; } +}; + struct AttribExpr: Expression{ + Expression_ a; + Str b; + AttribExpr(Expression_ a, const Str& b): a(std::move(a)), b(b) {} + AttribExpr(Expression_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} Str to_string() const override { return "."; } }; +struct AssignExpr: Expression{ + Expression_ lhs; + Expression_ rhs; + AssignExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} + Str to_string() const override { return "="; } +}; + +struct InplaceAssignExpr: Expression{ + TokenIndex op; + Expression_ lhs; + Expression_ rhs; + InplaceAssignExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) + : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} + Str to_string() const override { return TK_STR(op); } +}; + + struct CallExpr: Expression{ + std::vector args; + std::vector> kwargs; Str to_string() const override { return "()"; } }; struct BinaryExpr: Expression{ TokenIndex op; - BinaryExpr(TokenIndex op): op(op) {} + Expression_ lhs; + Expression_ rhs; + BinaryExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) + : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} Str to_string() const override { return TK_STR(op); } }; struct TernaryExpr: Expression{ + Expression_ cond; + Expression_ true_expr; + Expression_ false_expr; + TernaryExpr(Expression_&& cond, Expression_&& true_expr, Expression_&& false_expr) + : cond(std::move(cond)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {} Str to_string() const override { return "?"; } }; -struct AssignExpr: Expression{ - Str to_string() const override { return "="; } -}; - -struct CommaExpr: Expression{ - Str to_string() const override { return ","; } -}; - } // namespace pkpy \ No newline at end of file diff --git a/src/lexer.h b/src/lexer.h index 3e20071b..8412aa4c 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -68,11 +68,12 @@ struct Token{ } }; -// https://docs.python.org/3/reference/expressions.html +// https://docs.python.org/3/reference/expressions.html#operator-precedence enum Precedence { PREC_NONE, PREC_ASSIGNMENT, // = PREC_COMMA, // , + PREC_SLICE, // : (only available inside a subscript expression) PREC_TERNARY, // ?: PREC_LOGICAL_OR, // or PREC_LOGICAL_AND, // and @@ -135,7 +136,7 @@ struct Lexer { if(brackets_level > 0) return true; int spaces = eat_spaces(); if(peekchar() == '#') skip_line_comment(); - if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; + if(peekchar() == '\0' || peekchar() == '\n') return true; // https://docs.python.org/3/reference/lexical_analysis.html#indentation if(spaces > indents.top()){ indents.push(spaces); @@ -428,7 +429,6 @@ struct Lexer { add_token_2('=', TK("/"), TK("/=")); } return true; - case '\r': break; // just ignore '\r' case ' ': case '\t': eat_spaces(); break; case '\n': { add_token(TK("@eol")); @@ -493,8 +493,8 @@ struct Lexer { Lexer(shared_ptr src) { this->src = src; - this->token_start = src->source; - this->curr_char = src->source; + this->token_start = src->source.c_str(); + this->curr_char = src->source.c_str(); this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line}); this->indents.push(0); } diff --git a/src/main.cpp b/src/main.cpp index 3c96fd54..b3156c41 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,7 +21,6 @@ std::string getline(bool* eof=nullptr) { std::string output; output.resize(length); WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL); - if(!output.empty() && output.back() == '\r') output.pop_back(); return output; } diff --git a/src/vm.h b/src/vm.h index da1c1d05..383a00cf 100644 --- a/src/vm.h +++ b/src/vm.h @@ -357,30 +357,30 @@ inline void CodeObject::optimize(VM* vm){ perfect_locals_capacity = find_next_capacity(base_n); perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys); - for(int i=1; inum_negated(consts[pos]); - } + // for(int i=1; inum_negated(consts[pos]); + // } - if(i>=2 && codes[i].op == OP_BUILD_INDEX){ - const Bytecode& a = codes[i-1]; - const Bytecode& x = codes[i-2]; - if(codes[i].arg == 1){ - if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){ - codes[i].op = OP_FAST_INDEX; - }else continue; - }else{ - if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){ - codes[i].op = OP_FAST_INDEX_REF; - }else continue; - } - codes[i].arg = (a.arg << 16) | x.arg; - codes[i-1].op = OP_NO_OP; - codes[i-2].op = OP_NO_OP; - } - } + // if(i>=2 && codes[i].op == OP_BUILD_INDEX){ + // const Bytecode& a = codes[i-1]; + // const Bytecode& x = codes[i-2]; + // if(codes[i].arg == 1){ + // if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){ + // codes[i].op = OP_FAST_INDEX; + // }else continue; + // }else{ + // if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){ + // codes[i].op = OP_FAST_INDEX_REF; + // }else continue; + // } + // codes[i].arg = (a.arg << 16) | x.arg; + // codes[i-1].op = OP_NO_OP; + // codes[i-2].op = OP_NO_OP; + // } + // } // pre-compute sn in co_consts for(int i=0; i Date: Fri, 31 Mar 2023 17:19:32 +0800 Subject: [PATCH 20/73] up --- src/codeobject.h | 24 +-- src/common.h | 4 +- src/compiler.h | 468 ++++++++++++++--------------------------------- src/expr.h | 442 ++++++++++++++++++++++++++++++++------------ src/lexer.h | 3 +- 5 files changed, 475 insertions(+), 466 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index b742dba6..d8552f82 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -39,6 +39,9 @@ enum CodeBlockType { TRY_EXCEPT, }; +#define BC_NOARG -1 +#define BC_KEEPLINE -1 + struct CodeBlock { CodeBlockType type; int parent; // parent index in blocks @@ -68,27 +71,6 @@ struct CodeObject { void optimize(VM* vm); - bool add_label(StrName label){ - if(labels.count(label)) return false; - labels[label] = codes.size(); - return true; - } - - int add_name(StrName name, NameScope scope){ - if(scope == NAME_LOCAL && global_names.count(name)) scope = NAME_GLOBAL; - auto p = std::make_pair(name, scope); - for(int i=0; i& data() const { return vec; } }; -struct Expression; -typedef std::unique_ptr Expression_; +struct Expr; +typedef std::unique_ptr Expr_; } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index bc58348f..57d204a8 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -20,41 +20,6 @@ struct PrattRule{ Precedence precedence; }; -struct CodeEmitContext{ - CodeObject_ co; - stack s_expr; - - CodeEmitContext(CodeObject_ co): co(co) {} - - int curr_block_i = 0; - bool is_compiling_class = false; - - bool is_curr_block_loop() const { - return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; - } - - void enter_block(CodeBlockType type){ - co->blocks.push_back(CodeBlock{ - type, curr_block_i, (int)co->codes.size() - }); - curr_block_i = co->blocks.size()-1; - } - - void exit_block(){ - co->blocks[curr_block_i].end = co->codes.size(); - curr_block_i = co->blocks[curr_block_i].parent; - if(curr_block_i < 0) UNREACHABLE(); - } - - // clear the expression stack and generate bytecode - void emit_expr(){ - if(s_expr.size() != 1) UNREACHABLE(); - Expression_ expr = s_expr.popx(); - // emit - // ... - } -}; - class Compiler { std::unique_ptr lexer; stack contexts; @@ -70,7 +35,7 @@ class Compiler { template CodeObject_ push_context(Args&&... args){ CodeObject_ co = make_sp(std::forward(args)...); - contexts.push(CodeEmitContext(co)); + contexts.push(CodeEmitContext(vm, co)); return co; } @@ -117,30 +82,29 @@ public: rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK("True")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("False")] = { METHOD(exprValue), NO_INFIX }; + rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; - rules[TK("None")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("...")] = { METHOD(exprValue), NO_INFIX }; rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK(">>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA }; - rules[TK(":")] = { nullptr, METHOD(exprSlice), PREC_SLICE }; + rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; @@ -150,7 +114,7 @@ public: #undef NO_INFIX #define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression -#define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed +#define EXPR_TUPLE() parse_expression(PREC_TUPLE) // no '=', but ',' is allowed #define EXPR_ANY() parse_expression(PREC_ASSIGNMENT) } @@ -201,96 +165,55 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } - PyObject* get_value(const Token& token) { - switch (token.type) { - case TK("@num"): - if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); - if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); - UNREACHABLE(); - case TK("@str"): case TK("@fstr"): - return VAR(std::get(token.value)); - default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type)); - } - } - void exprLiteral(){ ctx()->s_expr.push( - std::make_unique(prev().value) + expr_prev_line(prev().value) ); - // PyObject* value = get_value(prev()); - // int index = co()->add_const(value); - // emit(OP_LOAD_CONST, index); } void exprFString(){ ctx()->s_expr.push( - std::make_unique(std::get(prev().value)) + expr_prev_line(std::get(prev().value)) ); - // static const std::regex pattern(R"(\{(.*?)\})"); - // PyObject* value = get_value(prev()); - // Str s = CAST(Str, value); - // std::sregex_iterator begin(s.begin(), s.end(), pattern); - // std::sregex_iterator end; - // int size = 0; - // int i = 0; - // for(auto it = begin; it != end; it++) { - // std::smatch m = *it; - // if (i < m.position()) { - // std::string literal = s.substr(i, m.position() - i); - // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - // size++; - // } - // emit(OP_LOAD_EVAL_FN); - // emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); - // emit(OP_CALL, 1); - // size++; - // i = (int)(m.position() + m.length()); - // } - // if (i < s.size()) { - // std::string literal = s.substr(i, s.size() - i); - // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - // size++; - // } - // emit(OP_BUILD_STRING, size); } - void emit_expr(){} + template + std::unique_ptr expr_prev_line(Args&&... args) { + std::unique_ptr expr = std::make_unique(std::forward(args)...); + expr->line = prev().line; + return expr; + } void exprLambda(){ - Function func; - func.name = ""; + auto e = expr_prev_line(); + e->func.name = ""; + e->scope = name_scope(); if(!match(TK(":"))){ - _compile_f_args(func, false); + _compile_f_args(e->func, false); consume(TK(":")); } - func.code = push_context(lexer->src, func.name.str()); + e->func.code = push_context(lexer->src, ""); EXPR(); - emit_expr(); - emit(OP_RETURN_VALUE); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); + ctx()->s_expr.push(std::move(e)); + } - ctx()->s_expr.push( - std::make_unique(std::move(func), name_scope()) - ); - - // emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - // if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); + void exprInplaceAssign(){ + auto e = expr_prev_line(); + e->op = prev().type; + e->lhs = ctx()->s_expr.popx(); + EXPR_TUPLE(); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprAssign(){ - Expression_ lhs = ctx()->s_expr.popx(); - TokenIndex op = prev().type; + auto e = expr_prev_line(); + e->lhs = ctx()->s_expr.popx(); EXPR_TUPLE(); - if(op == TK("=")){ - ctx()->s_expr.push( - std::make_unique(std::move(lhs), ctx()->s_expr.popx()) - ); - }else{ - // += -= ... - ctx()->s_expr.push( - std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) - ); - } + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); // if(co()->codes.empty()) UNREACHABLE(); // bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; @@ -341,123 +264,73 @@ private: // co()->_rvalue -= 1; } - void exprSlice(){ - } - - void exprComma(){ - int size = 1; // an expr is in the stack now + void exprTuple(){ + auto e = expr_prev_line(); do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok - size++; + e->items.push_back(ctx()->s_expr.popx()); } while(match(TK(","))); - std::vector items(size); - for(int i=size-1; i>=0; i--) items[i] = ctx()->s_expr.popx(); - ctx()->s_expr.push( - std::make_unique(std::move(items)) - ); - // emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); + ctx()->s_expr.push(std::move(e)); } void exprOr(){ - Expression_ lhs = ctx()->s_expr.popx(); - parse_expression(PREC_LOGICAL_OR); - ctx()->s_expr.push( - std::make_unique(std::move(lhs), ctx()->s_expr.popx()) - ); - - // int patch = emit(OP_JUMP_IF_TRUE_OR_POP); - // parse_expression(PREC_LOGICAL_OR); - // patch_jump(patch); + auto e = expr_prev_line(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_OR + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprAnd(){ - Expression_ lhs = ctx()->s_expr.popx(); - parse_expression(PREC_LOGICAL_AND); - ctx()->s_expr.push( - std::make_unique(std::move(lhs), ctx()->s_expr.popx()) - ); - // int patch = emit(OP_JUMP_IF_FALSE_OR_POP); - // parse_expression(PREC_LOGICAL_AND); - // patch_jump(patch); + auto e = expr_prev_line(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_AND + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprTernary(){ - Expression_ cond = ctx()->s_expr.popx(); + auto e = expr_prev_line(); + e->cond = ctx()->s_expr.popx(); EXPR(); // if true - Expression_ true_expr = ctx()->s_expr.popx(); + e->true_expr = ctx()->s_expr.popx(); consume(TK(":")); EXPR(); // if false - Expression_ false_expr = ctx()->s_expr.popx(); - ctx()->s_expr.push( - std::make_unique(std::move(cond), std::move(true_expr), std::move(false_expr)) - ); - // int patch = emit(OP_POP_JUMP_IF_FALSE); - // EXPR(); // if true - // int patch2 = emit(OP_JUMP_ABSOLUTE); - // consume(TK(":")); - // patch_jump(patch); - // EXPR(); // if false - // patch_jump(patch2); + e->false_expr = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprBinaryOp(){ - TokenIndex op = prev().type; - Expression_ lhs = ctx()->s_expr.popx(); - parse_expression((Precedence)(rules[op].precedence + 1)); - ctx()->s_expr.push( - std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) - ); - // switch (op) { - // case TK("+"): emit(OP_BINARY_OP, 0); break; - // case TK("-"): emit(OP_BINARY_OP, 1); break; - // case TK("*"): emit(OP_BINARY_OP, 2); break; - // case TK("/"): emit(OP_BINARY_OP, 3); break; - // case TK("//"): emit(OP_BINARY_OP, 4); break; - // case TK("%"): emit(OP_BINARY_OP, 5); break; - // case TK("**"): emit(OP_BINARY_OP, 6); break; - - // case TK("<"): emit(OP_COMPARE_OP, 0); break; - // case TK("<="): emit(OP_COMPARE_OP, 1); break; - // case TK("=="): emit(OP_COMPARE_OP, 2); break; - // case TK("!="): emit(OP_COMPARE_OP, 3); break; - // case TK(">"): emit(OP_COMPARE_OP, 4); break; - // case TK(">="): emit(OP_COMPARE_OP, 5); break; - // case TK("in"): emit(OP_CONTAINS_OP, 0); break; - // case TK("not in"): emit(OP_CONTAINS_OP, 1); break; - // case TK("is"): emit(OP_IS_OP, 0); break; - // case TK("is not"): emit(OP_IS_OP, 1); break; - - // case TK("<<"): emit(OP_BITWISE_OP, 0); break; - // case TK(">>"): emit(OP_BITWISE_OP, 1); break; - // case TK("&"): emit(OP_BITWISE_OP, 2); break; - // case TK("|"): emit(OP_BITWISE_OP, 3); break; - // case TK("^"): emit(OP_BITWISE_OP, 4); break; - // default: UNREACHABLE(); - // } + auto e = expr_prev_line(); + e->op = prev().type; + e->lhs = ctx()->s_expr.popx(); + parse_expression(rules[e->op].precedence + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprNot() { - parse_expression((Precedence)(PREC_LOGICAL_NOT + 1)); + parse_expression(PREC_LOGICAL_NOT + 1); ctx()->s_expr.push( - std::make_unique(ctx()->s_expr.popx()) + expr_prev_line(ctx()->s_expr.popx()) ); - // emit(OP_UNARY_NOT); } void exprUnaryOp(){ TokenIndex type = prev().type; - parse_expression((Precedence)(PREC_UNARY + 1)); - ctx()->s_expr.push( - std::make_unique(type, ctx()->s_expr.popx()) - ); - // switch (type) { - // case TK("-"): emit(OP_UNARY_NEGATIVE); break; - // case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; - // default: UNREACHABLE(); - // } + parse_expression(PREC_UNARY + 1); + Expr_ e; + switch(type){ + case TK("-"): + e = expr_prev_line(ctx()->s_expr.popx()); + case TK("*"): + e = expr_prev_line(ctx()->s_expr.popx()); + default: UNREACHABLE(); + } + ctx()->s_expr.push(std::move(e)); } - // () is just for change precedence, so we don't need to push it into stack + // () is just for change precedence void exprGroup(){ match_newlines(mode()==REPL_MODE); EXPR_TUPLE(); @@ -507,52 +380,37 @@ private: // } template - void _consume_comp(){ - + void _consume_comp(Expr_ expr){ + static_assert(std::is_base_of::value); + std::unique_ptr ce = std::make_unique(); + ce->expr = std::move(expr); + // ... + ctx()->s_expr.push(std::move(ce)); } void exprList() { - int ARGC = 0; + auto e = expr_prev_line(); do { match_newlines(mode()==REPL_MODE); if (curr().type == TK("]")) break; - EXPR(); ARGC++; + EXPR(); + e->items.push_back(ctx()->s_expr.popx()); match_newlines(mode()==REPL_MODE); - if(ARGC == 1 && match(TK("for"))){ - _consume_comp(); + if(e->items.size()==1 && match(TK("for"))){ + _consume_comp(std::move(e->items[0])); consume(TK("]")); return; } } while (match(TK(","))); match_newlines(mode()==REPL_MODE); consume(TK("]")); - auto list_expr = std::make_unique(); - list_expr->items.resize(ARGC); - for(int i=ARGC-1; i>=0; i--) list_expr->items[i] = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(list_expr)); - - // int _patch = emit(OP_NO_OP); - // int _body_start = co()->codes.size(); - // int ARGC = 0; - // do { - // match_newlines(mode()==REPL_MODE); - // if (curr().type == TK("]")) break; - // EXPR(); ARGC++; - // match_newlines(mode()==REPL_MODE); - // if(ARGC == 1 && match(TK("for"))){ - // _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); - // consume(TK("]")); - // return; - // } - // } while (match(TK(","))); - // match_newlines(mode()==REPL_MODE); - // consume(TK("]")); - // emit(OP_BUILD_LIST, ARGC); + ctx()->s_expr.push(std::move(e)); } + // {...} may be dict or set void exprMap() { bool parsing_dict = false; - int ARGC = 0; + std::vector items; do { match_newlines(mode()==REPL_MODE); if (curr().type == TK("}")) break; @@ -561,62 +419,33 @@ private: if(parsing_dict){ consume(TK(":")); EXPR(); - Expression_ value = ctx()->s_expr.popx(); - ctx()->s_expr.push( - std::make_unique(ctx()->s_expr.popx(), std::move(value)) - ); + auto dict_item = expr_prev_line(); + dict_item->key = ctx()->s_expr.popx(); + dict_item->value = ctx()->s_expr.popx(); + items.push_back(std::move(dict_item)); + }else{ + items.push_back(ctx()->s_expr.popx()); } - ARGC++; match_newlines(mode()==REPL_MODE); - if(ARGC == 1 && match(TK("for"))){ - if(parsing_dict) _consume_comp(); - else _consume_comp(); + if(items.size()==1 && match(TK("for"))){ + if(parsing_dict) _consume_comp(std::move(items[0])); + else _consume_comp(std::move(items[0])); consume(TK("}")); return; } } while (match(TK(","))); consume(TK("}")); - if(ARGC == 0 || parsing_dict){ - auto e = std::make_unique(); - e->items.resize(ARGC); - for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + if(items.size()==0 || parsing_dict){ + auto e = expr_prev_line(std::move(items)); ctx()->s_expr.push(std::move(e)); }else{ - auto e = std::make_unique(); - e->items.resize(ARGC); - for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + auto e = expr_prev_line(std::move(items)); ctx()->s_expr.push(std::move(e)); } - // int _patch = emit(OP_NO_OP); - // int _body_start = co()->codes.size(); - // bool parsing_dict = false; - // int ARGC = 0; - // do { - // match_newlines(mode()==REPL_MODE); - // if (curr().type == TK("}")) break; - // EXPR(); - // if(curr().type == TK(":")) parsing_dict = true; - // if(parsing_dict){ - // consume(TK(":")); - // EXPR(); - // } - // ARGC++; - // match_newlines(mode()==REPL_MODE); - // if(ARGC == 1 && match(TK("for"))){ - // if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); - // else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); - // consume(TK("}")); - // return; - // } - // } while (match(TK(","))); - // consume(TK("}")); - - // if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); - // else emit(OP_BUILD_SET, ARGC); } void exprCall() { - auto e = std::make_unique(); + auto e = _expr(); do { match_newlines(mode()==REPL_MODE); if (curr().type==TK(")")) break; @@ -648,69 +477,49 @@ private: void exprName(){ ctx()->s_expr.push( - std::make_unique(prev().str(), name_scope()) + expr_prev_line(prev().str(), name_scope()) ); } void exprAttrib() { consume(TK("@id")); ctx()->s_expr.push( - std::make_unique(ctx()->s_expr.popx(), prev().str()) + expr_prev_line(ctx()->s_expr.popx(), prev().str()) ); } - // [:], [:b] - // [a], [a:], [a:b] void exprSubscr() { - Expression_ a = nullptr; - Expression_ b = nullptr; - if(match(TK(":"))){ - if(match(TK("]"))){ // [:] - - }else{ // [:b] - EXPR_TUPLE(); - consume(TK("]")); - } - emit(OP_BUILD_SLICE); - }else{ + auto e = expr_prev_line(); + std::vector items; + do { EXPR_TUPLE(); - if(match(TK(":"))){ - if(match(TK("]"))){ // [a:] - emit(OP_LOAD_NONE); - }else{ // [a:b] - EXPR_TUPLE(); - consume(TK("]")); + items.push_back(ctx()->s_expr.popx()); + } while(match(TK(":"))); + consume(TK("]")); + switch(items.size()){ + case 1: + e->b = std::move(items[0]); + break; + case 2: case 3: { + auto slice = expr_prev_line(); + slice->start = std::move(items[0]); + slice->stop = std::move(items[1]); + if(items.size()==3){ + slice->step = std::move(items[2]); } - emit(OP_BUILD_SLICE); - }else{ // [a] - consume(TK("]")); - } + e->b = std::move(slice); + } break; + default: SyntaxError(); break; } - - // emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); + ctx()->s_expr.push(std::move(e)); } - void exprValue() { + void exprLiteral0() { ctx()->s_expr.push( - std::make_unique(prev().type) + expr_prev_line(prev().type) ); } - int emit(Opcode opcode, int arg=-1, bool keepline=false) { - int line = prev().line; - co()->codes.push_back( - Bytecode{(uint8_t)opcode, (uint16_t)ctx()->curr_block_i, arg, line} - ); - int i = co()->codes.size() - 1; - if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; - return i; - } - - inline void patch_jump(int addr_index) { - int target = co()->codes.size(); - co()->codes[addr_index].arg = target; - } - void compile_block_body() { consume(TK(":")); if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ @@ -778,10 +587,11 @@ private: consume_end_stmt(); } - // a = 1 + 2 - // ['a', '1', '2', '+', '='] - // - void parse_expression(Precedence precedence, bool allowslice=false) { + void parse_expression(int precedence){ + parse_expression((Precedence)precedence); + } + + void parse_expression(Precedence precedence) { advance(); PrattCallback prefix = rules[prev().type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); diff --git a/src/expr.h b/src/expr.h index 0ae035bf..4740b9f1 100644 --- a/src/expr.h +++ b/src/expr.h @@ -8,115 +8,262 @@ namespace pkpy{ -struct Expression{ - virtual Str to_string() const = 0; +struct CodeEmitContext; +struct Expr{ + int line = 0; + virtual Str str() const = 0; + ~Expr() = default; + virtual void emit(CodeEmitContext* ctx) = 0; }; -struct NameExpr: Expression{ +struct CodeEmitContext{ + CodeObject_ co; + VM* vm; + stack s_expr; + + CodeEmitContext(VM* vm, CodeObject_ co): co(co) {} + CodeEmitContext(const CodeEmitContext&) = delete; + CodeEmitContext& operator=(const CodeEmitContext&) = delete; + CodeEmitContext(CodeEmitContext&&) = delete; + CodeEmitContext& operator=(CodeEmitContext&&) = delete; + + int curr_block_i = 0; + bool is_compiling_class = false; + + bool is_curr_block_loop() const { + return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; + } + + void enter_block(CodeBlockType type){ + co->blocks.push_back(CodeBlock{ + type, curr_block_i, (int)co->codes.size() + }); + curr_block_i = co->blocks.size()-1; + } + + void exit_block(){ + co->blocks[curr_block_i].end = co->codes.size(); + curr_block_i = co->blocks[curr_block_i].parent; + if(curr_block_i < 0) UNREACHABLE(); + } + + // clear the expression stack and generate bytecode + void emit_expr(){ + if(s_expr.size() != 1) UNREACHABLE(); + Expr_ expr = s_expr.popx(); + // emit + // ... + } + + int emit(Opcode opcode, int arg, int line) { + co->codes.push_back( + Bytecode{(uint8_t)opcode, (uint16_t)curr_block_i, arg, line} + ); + int i = co->codes.size() - 1; + if(line==BC_KEEPLINE && i>=1) co->codes[i].line = co->codes[i-1].line; + return i; + } + + void patch_jump(int index) { + int target = co->codes.size(); + co->codes[index].arg = target; + } + + bool add_label(StrName label){ + if(co->labels.count(label)) return false; + co->labels[label] = co->codes.size(); + return true; + } + + int add_name(StrName name, NameScope scope){ + if(scope == NAME_LOCAL && co->global_names.count(name)) scope = NAME_GLOBAL; + auto p = std::make_pair(name, scope); + for(int i=0; inames.size(); i++){ + if(co->names[i] == p) return i; + } + co->names.push_back(p); + return co->names.size() - 1; + } + + int add_const(PyObject* v){ + co->consts.push_back(v); + return co->consts.size() - 1; + } +}; + +struct NameExpr: Expr{ Str name; NameScope scope; NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {} NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {} - Str to_string() const override { return name; } + + Str str() const override { return "$" + name; } + + void emit(CodeEmitContext* ctx) override { + int index = ctx->add_name(name, scope); + ctx->emit(OP_LOAD_NAME, index, line); + } }; -struct UnaryExpr: Expression{ - TokenIndex op; - Expression_ child; - UnaryExpr(TokenIndex op, Expression_&& child): op(op), child(std::move(child)) {} - Str to_string() const override { return TK_STR(op); } +struct StarredExpr: Expr{ + Expr_ child; + StarredExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "*"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_STAR, (int)false, line); + } }; -struct NotExpr: Expression{ - Expression_ child; - NotExpr(Expression_&& child): child(std::move(child)) {} - Str to_string() const override { return "not"; } +struct NegatedExpr: Expr{ + Expr_ child; + NegatedExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "-"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); + } }; -struct AndExpr: Expression{ - Expression_ lhs; - Expression_ rhs; - AndExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return "and"; } +struct NotExpr: Expr{ + Expr_ child; + NotExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "not"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_NOT, BC_NOARG, line); + } }; -struct OrExpr: Expression{ - Expression_ lhs; - Expression_ rhs; - OrExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return "or"; } +struct AndExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + Str str() const override { return "and"; } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line); + rhs->emit(ctx); + ctx->patch_jump(patch); + } +}; + +struct OrExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + Str str() const override { return "or"; } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line); + rhs->emit(ctx); + ctx->patch_jump(patch); + } }; // [None, True, False, ...] -struct SpecialLiteralExpr: Expression{ +struct Literal0Expr: Expr{ TokenIndex token; - SpecialLiteralExpr(TokenIndex token): token(token) {} - Str to_string() const override { return TK_STR(token); } + Literal0Expr(TokenIndex token): token(token) {} + Str str() const override { return TK_STR(token); } - void gen(){ - // switch (token) { - // case TK("None"): emit(OP_LOAD_NONE); break; - // case TK("True"): emit(OP_LOAD_TRUE); break; - // case TK("False"): emit(OP_LOAD_FALSE); break; - // case TK("..."): emit(OP_LOAD_ELLIPSIS); break; - // default: UNREACHABLE(); - // } + void emit(CodeEmitContext* ctx) override { + switch (token) { + case TK("None"): ctx->emit(OP_LOAD_NONE, BC_NOARG, line); break; + case TK("True"): ctx->emit(OP_LOAD_TRUE, BC_NOARG, line); break; + case TK("False"): ctx->emit(OP_LOAD_FALSE, BC_NOARG, line); break; + case TK("..."): ctx->emit(OP_LOAD_ELLIPSIS, BC_NOARG, line); break; + default: UNREACHABLE(); + } } }; // @num, @str which needs to invoke OP_LOAD_CONST -struct LiteralExpr: Expression{ +struct LiteralExpr: Expr{ TokenValue value; LiteralExpr(TokenValue value): value(value) {} - Str to_string() const override { return "literal"; } + Str str() const override { + if(std::holds_alternative(value)){ + return std::to_string(std::get(value)); + } + + if(std::holds_alternative(value)){ + return std::to_string(std::get(value)); + } + + if(std::holds_alternative(value)){ + return std::get(value).escape(true); + } + + UNREACHABLE(); + } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + PyObject* obj = nullptr; + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + + if(!obj) UNREACHABLE(); + int index = ctx->add_const(obj); + ctx->emit(OP_LOAD_CONST, index, line); + } }; -struct SliceExpr: Expression{ - Expression_ start; - Expression_ stop; - Expression_ step; - SliceExpr(Expression_&& start, Expression_&& stop, Expression_&& step): - start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {} - Str to_string() const override { return "slice"; } +struct SliceExpr: Expr{ + Expr_ start; + Expr_ stop; + Expr_ step; + Str str() const override { return "slice()"; } }; -struct ListExpr: Expression{ - std::vector items; - Str to_string() const override { return "[]"; } +struct ListExpr: Expr{ + std::vector items; + Str str() const override { return "[]"; } }; -struct DictExpr: Expression{ - std::vector items; // each item is a DictItemExpr - Str to_string() const override { return "{}"; } +struct DictExpr: Expr{ + std::vector items; // each item is a DictItemExpr + DictExpr(std::vector&& items): items(std::move(items)) {} + Str str() const override { return "{}"; } }; -struct SetExpr: Expression{ - std::vector items; - Str to_string() const override { return "{}"; } +struct SetExpr: Expr{ + std::vector items; + Set(std::vector&& items): items(std::move(items)) {} + Str str() const override { return "{}"; } }; - -struct TupleExpr: Expression{ - std::vector items; - TupleExpr(std::vector&& items): items(std::move(items)) {} - Str to_string() const override { return "(a, b, c)"; } +struct TupleExpr: Expr{ + std::vector items; + Str str() const override { return "tuple()"; } }; -struct CompExpr: Expression{ - Expression_ expr; // loop expr - Expression_ vars; // loop vars - Expression_ iter; // loop iter - Expression_ cond; // optional if condition +struct CompExpr: Expr{ + Expr_ expr; // loop expr + Expr_ vars; // loop vars + Expr_ iter; // loop iter + Expr_ cond; // optional if condition virtual void emit_expr() = 0; }; // a:b -struct DictItemExpr: Expression{ - Expression_ key; - Expression_ value; - DictItemExpr(Expression_&& key, Expression_&& value) - : key(std::move(key)), value(std::move(value)) {} - Str to_string() const override { return "dict item"; } +struct DictItemExpr: Expr{ + Expr_ key; + Expr_ value; + Str str() const override { return "k:v"; } }; struct ListCompExpr: CompExpr{ @@ -128,73 +275,144 @@ struct DictCompExpr: CompExpr{ struct SetCompExpr: CompExpr{ }; -struct LambdaExpr: Expression{ +struct LambdaExpr: Expr{ Function func; NameScope scope; - LambdaExpr(Function&& func, NameScope scope): func(std::move(func)), scope(scope) {} - Str to_string() const override { return "lambda"; } + Str str() const override { return ""; } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + ctx->emit(OP_LOAD_FUNCTION, ctx->add_const(VAR(func)), line); + if(scope == NAME_LOCAL) ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, line); + } }; -struct FStringExpr: Expression{ +struct FStringExpr: Expr{ Str src; FStringExpr(const Str& src): src(src) {} - Str to_string() const override { return "@fstr"; } + Str str() const override { + return "f" + src.escape(true); + } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + static const std::regex pattern(R"(\{(.*?)\})"); + std::sregex_iterator begin(src.begin(), src.end(), pattern); + std::sregex_iterator end; + int size = 0; + int i = 0; + for(auto it = begin; it != end; it++) { + std::smatch m = *it; + if (i < m.position()) { + std::string literal = src.substr(i, m.position() - i); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); + size++; + } + ctx->emit(OP_LOAD_EVAL_FN, BC_NOARG, line); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line); + ctx->emit(OP_CALL, 1, line); + size++; + i = (int)(m.position() + m.length()); + } + if (i < src.size()) { + std::string literal = src.substr(i, src.size() - i); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); + size++; + } + ctx->emit(OP_BUILD_STRING, size, line); + } }; -struct SubscrExpr: Expression{ - Expression_ a; - Expression_ b; - SubscrExpr(Expression_&& a, Expression_&& b): a(std::move(a)), b(std::move(b)) {} - Str to_string() const override { return "a[b]"; } +struct SubscrExpr: Expr{ + Expr_ a; + Expr_ b; + Str str() const override { return "a[b]"; } }; -struct AttribExpr: Expression{ - Expression_ a; +struct AttribExpr: Expr{ + Expr_ a; Str b; - AttribExpr(Expression_ a, const Str& b): a(std::move(a)), b(b) {} - AttribExpr(Expression_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} - Str to_string() const override { return "."; } + AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {} + AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} + Str str() const override { return "a.b"; } }; -struct AssignExpr: Expression{ - Expression_ lhs; - Expression_ rhs; - AssignExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return "="; } +struct AssignExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + Str str() const override { return "="; } }; -struct InplaceAssignExpr: Expression{ +struct InplaceAssignExpr: Expr{ TokenIndex op; - Expression_ lhs; - Expression_ rhs; - InplaceAssignExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) - : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return TK_STR(op); } + Expr_ lhs; + Expr_ rhs; + Str str() const override { return TK_STR(op); } }; - -struct CallExpr: Expression{ - std::vector args; - std::vector> kwargs; - Str to_string() const override { return "()"; } +struct CallExpr: Expr{ + std::vector args; + std::vector> kwargs; + Str str() const override { return "()"; } }; -struct BinaryExpr: Expression{ +struct BinaryExpr: Expr{ TokenIndex op; - Expression_ lhs; - Expression_ rhs; - BinaryExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) - : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return TK_STR(op); } + Expr_ lhs; + Expr_ rhs; + Str str() const override { return TK_STR(op); } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + rhs->emit(ctx); + switch (op) { + case TK("+"): ctx->emit(OP_BINARY_OP, 0, line); break; + case TK("-"): ctx->emit(OP_BINARY_OP, 1, line); break; + case TK("*"): ctx->emit(OP_BINARY_OP, 2, line); break; + case TK("/"): ctx->emit(OP_BINARY_OP, 3, line); break; + case TK("//"): ctx->emit(OP_BINARY_OP, 4, line); break; + case TK("%"): ctx->emit(OP_BINARY_OP, 5, line); break; + case TK("**"): ctx->emit(OP_BINARY_OP, 6, line); break; + + case TK("<"): ctx->emit(OP_COMPARE_OP, 0, line); break; + case TK("<="): ctx->emit(OP_COMPARE_OP, 1, line); break; + case TK("=="): ctx->emit(OP_COMPARE_OP, 2, line); break; + case TK("!="): ctx->emit(OP_COMPARE_OP, 3, line); break; + case TK(">"): ctx->emit(OP_COMPARE_OP, 4, line); break; + case TK(">="): ctx->emit(OP_COMPARE_OP, 5, line); break; + case TK("in"): ctx->emit(OP_CONTAINS_OP, 0, line); break; + case TK("not in"): ctx->emit(OP_CONTAINS_OP, 1, line); break; + case TK("is"): ctx->emit(OP_IS_OP, 0, line); break; + case TK("is not"): ctx->emit(OP_IS_OP, 1, line); break; + + case TK("<<"): ctx->emit(OP_BITWISE_OP, 0, line); break; + case TK(">>"): ctx->emit(OP_BITWISE_OP, 1, line); break; + case TK("&"): ctx->emit(OP_BITWISE_OP, 2, line); break; + case TK("|"): ctx->emit(OP_BITWISE_OP, 3, line); break; + case TK("^"): ctx->emit(OP_BITWISE_OP, 4, line); break; + default: UNREACHABLE(); + } + } }; -struct TernaryExpr: Expression{ - Expression_ cond; - Expression_ true_expr; - Expression_ false_expr; - TernaryExpr(Expression_&& cond, Expression_&& true_expr, Expression_&& false_expr) - : cond(std::move(cond)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {} - Str to_string() const override { return "?"; } +struct TernaryExpr: Expr{ + Expr_ cond; + Expr_ true_expr; + Expr_ false_expr; + + Str str() const override { + return "cond ? true_expr : false_expr"; + } + + void emit(CodeEmitContext* ctx) override { + cond->emit(ctx); + int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line); + true_expr->emit(ctx); + int patch_2 = ctx->emit(OP_JUMP_ABSOLUTE, BC_NOARG, true_expr->line); + ctx->patch_jump(patch); + false_expr->emit(ctx); + ctx->patch_jump(patch_2); + } }; diff --git a/src/lexer.h b/src/lexer.h index 8412aa4c..8b997ca5 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -72,8 +72,7 @@ struct Token{ enum Precedence { PREC_NONE, PREC_ASSIGNMENT, // = - PREC_COMMA, // , - PREC_SLICE, // : (only available inside a subscript expression) + PREC_TUPLE, // , PREC_TERNARY, // ?: PREC_LOGICAL_OR, // or PREC_LOGICAL_AND, // and From 744b0f8dde948875c34e2392c3d98b543723300c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 31 Mar 2023 17:30:24 +0800 Subject: [PATCH 21/73] Update compiler.h --- src/compiler.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 57d204a8..51480334 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -445,14 +445,13 @@ private: } void exprCall() { - auto e = _expr(); + auto e = expr_prev_line(); do { match_newlines(mode()==REPL_MODE); if (curr().type==TK(")")) break; if(curr().type==TK("@id") && next().type==TK("=")) { consume(TK("@id")); Str key = prev().str(); - // emit(OP_LOAD_CONST, co()->add_const(VAR(key))); consume(TK("=")); EXPR(); e->kwargs.push_back({key, ctx()->s_expr.popx()}); @@ -538,24 +537,24 @@ private: consume(TK("@dedent")); } - Token _compile_import() { + Str _compile_import() { consume(TK("@id")); - Token tkmodule = prev(); - int index = co()->add_name(tkmodule.str(), NAME_SPECIAL); - emit(OP_IMPORT_NAME, index); - return tkmodule; + Str name = prev().str(); + int index = ctx()->add_name(name, NAME_SPECIAL); + ctx()->emit(OP_IMPORT_NAME, index, peek(-2).line); + return name; } // import a as b void compile_normal_import() { do { - Token tkmodule = _compile_import(); + Str name = _compile_import(); if (match(TK("as"))) { consume(TK("@id")); - tkmodule = prev(); + name = prev().str(); } - int index = co()->add_name(tkmodule.str(), name_scope()); - emit(OP_STORE_NAME, index); + int index = ctx()->add_name(name, name_scope()); + ctx()->emit(OP_STORE_NAME, index, prev().line); } while (match(TK(","))); consume_end_stmt(); } @@ -566,12 +565,12 @@ private: consume(TK("import")); if (match(TK("*"))) { if(name_scope() != NAME_GLOBAL) SyntaxError("import * can only be used in global scope"); - emit(OP_STORE_ALL_NAMES); + ctx()->emit(OP_STORE_ALL_NAMES, BC_NOARG, prev().line); consume_end_stmt(); return; } do { - emit(OP_DUP_TOP_VALUE); + ctx()->emit(OP_DUP_TOP_VALUE, BC_NOARG, BC_KEEPLINE); consume(TK("@id")); Token tkname = prev(); int index = co()->add_name(tkname.str(), NAME_ATTR); From e8da0211055d4af380c004a196f18a98c9d86d65 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Sat, 1 Apr 2023 04:25:09 +0000 Subject: [PATCH 22/73] up --- src/compiler.h | 77 +++++++++++++++++++++++++------------------------- src/expr.h | 2 +- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 51480334..f4f9ed8c 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -572,17 +572,17 @@ private: do { ctx()->emit(OP_DUP_TOP_VALUE, BC_NOARG, BC_KEEPLINE); consume(TK("@id")); - Token tkname = prev(); - int index = co()->add_name(tkname.str(), NAME_ATTR); - emit(OP_BUILD_ATTR, index); + Str name = prev().str(); + int index = ctx()->add_name(name, NAME_ATTR); + ctx()->emit(OP_BUILD_ATTR, index, prev().line); if (match(TK("as"))) { consume(TK("@id")); - tkname = prev(); + name = prev().str(); } - index = co()->add_name(tkname.str(), name_scope()); - emit(OP_STORE_NAME, index); + index = ctx()->add_name(name, name_scope()); + ctx()->emit(OP_STORE_NAME, index, prev().line); } while (match(TK(","))); - emit(OP_POP_TOP); + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); consume_end_stmt(); } @@ -599,7 +599,6 @@ private: while (rules[curr().type].precedence >= precedence) { TokenIndex op = curr().type; advance(); - if (op == TK(":") && !allowslice) SyntaxError(); PrattCallback infix = rules[op].infix; if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); @@ -609,33 +608,33 @@ private: void compile_if_stmt() { match_newlines(); EXPR(); // condition - emit_expr(); - int ifpatch = emit(OP_POP_JUMP_IF_FALSE); + ctx()->emit_expr(); + int ifpatch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); if (match(TK("elif"))) { - int exit_jump = emit(OP_JUMP_ABSOLUTE); - patch_jump(ifpatch); + int exit_jump = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(ifpatch); compile_if_stmt(); - patch_jump(exit_jump); + ctx()->patch_jump(exit_jump); } else if (match(TK("else"))) { - int exit_jump = emit(OP_JUMP_ABSOLUTE); - patch_jump(ifpatch); + int exit_jump = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(ifpatch); compile_block_body(); - patch_jump(exit_jump); + ctx()->patch_jump(exit_jump); } else { - patch_jump(ifpatch); + ctx()->patch_jump(ifpatch); } } void compile_while_loop() { ctx()->enter_block(WHILE_LOOP); EXPR(); // condition - emit_expr(); - int patch = emit(OP_POP_JUMP_IF_FALSE); + ctx()->emit_expr(); + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); - emit(OP_LOOP_CONTINUE, -1, true); - patch_jump(patch); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx()->patch_jump(patch); ctx()->exit_block(); } @@ -643,7 +642,7 @@ private: int size = 0; do { consume(TK("@id")); - int index = co()->add_name(prev().str(), name_scope()); + int index = ctx()->add_name(prev().str(), name_scope()); emit(OP_LOAD_NAME_REF, index); size++; } while (match(TK(","))); @@ -663,16 +662,18 @@ private: void compile_try_except() { ctx()->enter_block(TRY_EXCEPT); - emit(OP_TRY_BLOCK_ENTER); + ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line); compile_block_body(); - emit(OP_TRY_BLOCK_EXIT); - std::vector patches = { emit(OP_JUMP_ABSOLUTE) }; + ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE); + std::vector patches = { + ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) + }; ctx()->exit_block(); do { consume(TK("except")); if(match(TK("@id"))){ - int name_idx = co()->add_name(prev().str(), NAME_SPECIAL); + int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL); emit(OP_EXCEPTION_MATCH, name_idx); }else{ emit(OP_LOAD_TRUE); @@ -691,26 +692,28 @@ private: if (match(TK("break"))) { if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); consume_end_stmt(); - emit(OP_LOOP_BREAK); + ctx()->emit(OP_LOOP_BREAK, BC_NOARG, prev().line); } else if (match(TK("continue"))) { if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); consume_end_stmt(); - emit(OP_LOOP_CONTINUE); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, prev().line); } else if (match(TK("yield"))) { if (contexts.size() <= 1) SyntaxError("'yield' outside function"); - EXPR_TUPLE(); emit_expr(); + EXPR_TUPLE(); + ctx()->emit_expr(); consume_end_stmt(); co()->is_generator = true; - emit(OP_YIELD_VALUE, -1, true); + ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE); } else if (match(TK("return"))) { if (contexts.size() <= 1) SyntaxError("'return' outside function"); if(match_end_stmt()){ - emit(OP_LOAD_NONE); + ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); }else{ - EXPR_TUPLE(); emit_expr(); + EXPR_TUPLE(); + ctx()->emit_expr(); consume_end_stmt(); } - emit(OP_RETURN_VALUE, -1, true); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); } else if (match(TK("if"))) { compile_if_stmt(); } else if (match(TK("while"))) { @@ -734,12 +737,10 @@ private: } else if (match(TK("try"))) { compile_try_except(); } else if(match(TK("assert"))) { - EXPR_TUPLE(); emit_expr(); + EXPR_TUPLE(); + ctx()->emit_expr(); // OP_CODE needs to change - - // if (match(TK(","))) EXPR(); - // else emit(OP_LOAD_CONST, co()->add_const(VAR(""))); - emit(OP_ASSERT); + ctx()->emit(OP_ASSERT, BC_NOARG, BC_KEEPLINE); consume_end_stmt(); } else if(match(TK("with"))){ EXPR(); diff --git a/src/expr.h b/src/expr.h index 4740b9f1..aff04c12 100644 --- a/src/expr.h +++ b/src/expr.h @@ -242,7 +242,7 @@ struct DictExpr: Expr{ struct SetExpr: Expr{ std::vector items; - Set(std::vector&& items): items(std::move(items)) {} + SetExpr(std::vector&& items): items(std::move(items)) {} Str str() const override { return "{}"; } }; From ea86ea3a34ea7129e764a4a812075942c44569ea Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 1 Apr 2023 18:25:43 +0800 Subject: [PATCH 23/73] up --- src/codeobject.h | 2 +- src/compiler.h | 343 ++++++++++++++++++++++++----------------------- src/expr.h | 22 +-- src/lexer.h | 1 - 4 files changed, 185 insertions(+), 183 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index d8552f82..3ac01caa 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -62,7 +62,7 @@ struct CodeObject { std::vector codes; List consts; std::vector> names; - std::map global_names; + std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; diff --git a/src/compiler.h b/src/compiler.h index f4f9ed8c..846b9555 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -20,6 +20,8 @@ struct PrattRule{ Precedence precedence; }; +enum ExprAction { EXPR_PUSH_STACK, EXPR_RVALUE, EXPR_LVALUE }; + class Compiler { std::unique_ptr lexer; stack contexts; @@ -82,28 +84,16 @@ public: rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; - rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; - rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; @@ -113,9 +103,18 @@ public: #undef METHOD #undef NO_INFIX -#define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression -#define EXPR_TUPLE() parse_expression(PREC_TUPLE) // no '=', but ',' is allowed -#define EXPR_ANY() parse_expression(PREC_ASSIGNMENT) + // rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; + // rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; } private: @@ -199,18 +198,32 @@ private: ctx()->s_expr.push(std::move(e)); } + // assignment是一种特殊的无返回值表达式,他不应该位于PREC中 void exprInplaceAssign(){ auto e = expr_prev_line(); e->op = prev().type; e->lhs = ctx()->s_expr.popx(); + // lhs cannot be a assignment expression, i.e. a = b += c is not allowed + if(e->lhs->is_assignment()) SyntaxError(); EXPR_TUPLE(); e->rhs = ctx()->s_expr.popx(); ctx()->s_expr.push(std::move(e)); } + void EXPR(ExprAction action=EXPR_PUSH_STACK) { + parse_expression(PREC_TUPLE + 1, action); + } + + void EXPR_TUPLE(ExprAction action=EXPR_PUSH_STACK) { + parse_expression(PREC_TUPLE, action); + } + void exprAssign(){ auto e = expr_prev_line(); e->lhs = ctx()->s_expr.popx(); + // lhs cannot be a assignment expression, i.e. a = b = c is not allowed + // however in cpython, it is allowed, we'll fix it later + if(e->lhs->is_assignment()) SyntaxError(); EXPR_TUPLE(); e->rhs = ctx()->s_expr.popx(); ctx()->s_expr.push(std::move(e)); @@ -586,16 +599,11 @@ private: consume_end_stmt(); } - void parse_expression(int precedence){ - parse_expression((Precedence)precedence); - } - - void parse_expression(Precedence precedence) { + void parse_expression(int precedence, ExprAction action=EXPR_PUSH_STACK) { advance(); PrattCallback prefix = rules[prev().type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); (this->*prefix)(); - // rhs of = cannot be a AssignExpr or InplaceAssignExpr while (rules[curr().type].precedence >= precedence) { TokenIndex op = curr().type; advance(); @@ -603,34 +611,36 @@ private: if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); } + switch(action){ + case EXPR_PUSH_STACK: break; + case EXPR_RVALUE: ctx()->emit_rvalue(); break; + case EXPR_LVALUE: ctx()->emit_lvalue(); break; + default: UNREACHABLE(); + } } void compile_if_stmt() { - match_newlines(); - EXPR(); // condition - ctx()->emit_expr(); - int ifpatch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); + EXPR(EXPR_RVALUE); // condition + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); - if (match(TK("elif"))) { - int exit_jump = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); - ctx()->patch_jump(ifpatch); + int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(patch); compile_if_stmt(); - ctx()->patch_jump(exit_jump); + ctx()->patch_jump(exit_patch); } else if (match(TK("else"))) { - int exit_jump = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); - ctx()->patch_jump(ifpatch); + int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(patch); compile_block_body(); - ctx()->patch_jump(exit_jump); + ctx()->patch_jump(exit_patch); } else { - ctx()->patch_jump(ifpatch); + ctx()->patch_jump(patch); } } void compile_while_loop() { ctx()->enter_block(WHILE_LOOP); - EXPR(); // condition - ctx()->emit_expr(); + EXPR(EXPR_RVALUE); // condition int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); @@ -638,25 +648,15 @@ private: ctx()->exit_block(); } - void EXPR_FOR_VARS(){ - int size = 0; - do { - consume(TK("@id")); - int index = ctx()->add_name(prev().str(), name_scope()); - emit(OP_LOAD_NAME_REF, index); - size++; - } while (match(TK(","))); - if(size > 1) emit(OP_BUILD_TUPLE_REF, size); - } - void compile_for_loop() { - EXPR_FOR_VARS();consume(TK("in")); - EXPR_TUPLE(); emit_expr(); - emit(OP_GET_ITER); + EXPR_TUPLE(EXPR_LVALUE); + consume(TK("in")); + EXPR_TUPLE(EXPR_RVALUE); + ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); ctx()->enter_block(FOR_LOOP); - emit(OP_FOR_ITER); + ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); compile_block_body(); - emit(OP_LOOP_CONTINUE, -1, true); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); ctx()->exit_block(); } @@ -688,120 +688,131 @@ private: for (int patch : patches) patch_jump(patch); } + void compile_decorated(){ + EXPR(EXPR_RVALUE); + if(!match_newlines(mode()==REPL_MODE)){ + SyntaxError("expected a new line after '@'"); + } + ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line); + consume(TK("def")); + compile_function(); + } + + bool try_compile_assignment(){ + + } + void compile_stmt() { - if (match(TK("break"))) { - if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); - consume_end_stmt(); - ctx()->emit(OP_LOOP_BREAK, BC_NOARG, prev().line); - } else if (match(TK("continue"))) { - if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); - consume_end_stmt(); - ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, prev().line); - } else if (match(TK("yield"))) { - if (contexts.size() <= 1) SyntaxError("'yield' outside function"); - EXPR_TUPLE(); - ctx()->emit_expr(); - consume_end_stmt(); - co()->is_generator = true; - ctx()->emit(OP_YIELD_VALUE, BC_NOARG, BC_KEEPLINE); - } else if (match(TK("return"))) { - if (contexts.size() <= 1) SyntaxError("'return' outside function"); - if(match_end_stmt()){ - ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); - }else{ - EXPR_TUPLE(); - ctx()->emit_expr(); + advance(); + int kw_line = prev().line; // backup line number + switch(prev().type){ + case TK("break"): + if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); + ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line); consume_end_stmt(); - } - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); - } else if (match(TK("if"))) { - compile_if_stmt(); - } else if (match(TK("while"))) { - compile_while_loop(); - } else if (match(TK("for"))) { - compile_for_loop(); - } else if (match(TK("import"))){ - compile_normal_import(); - } else if (match(TK("from"))){ - compile_from_import(); - } else if (match(TK("def"))){ - compile_function(); - } else if (match(TK("@"))){ - EXPR(); - if(!match_newlines(mode()==REPL_MODE)){ - SyntaxError("expected a new line after '@'"); - } - emit(OP_SETUP_DECORATOR); - consume(TK("def")); - compile_function(); - } else if (match(TK("try"))) { - compile_try_except(); - } else if(match(TK("assert"))) { - EXPR_TUPLE(); - ctx()->emit_expr(); - // OP_CODE needs to change - ctx()->emit(OP_ASSERT, BC_NOARG, BC_KEEPLINE); - consume_end_stmt(); - } else if(match(TK("with"))){ - EXPR(); - consume(TK("as")); - consume(TK("@id")); - Token tkname = prev(); - int index = co()->add_name(tkname.str(), name_scope()); - emit(OP_STORE_NAME, index); - emit(OP_LOAD_NAME_REF, index); - emit(OP_WITH_ENTER); - compile_block_body(); - emit(OP_LOAD_NAME_REF, index); - emit(OP_WITH_EXIT); - } else if(match(TK("label"))){ - if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); - consume(TK(".")); consume(TK("@id")); - Str label = prev().str(); - bool ok = co()->add_label(label); - if(!ok) SyntaxError("label '" + label + "' already exists"); - consume_end_stmt(); - } else if(match(TK("goto"))){ // https://entrian.com/goto/ - if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); - consume(TK(".")); consume(TK("@id")); - emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL)); - consume_end_stmt(); - } else if(match(TK("raise"))){ - consume(TK("@id")); - int dummy_t = co()->add_name(prev().str(), NAME_SPECIAL); - if(match(TK("(")) && !match(TK(")"))){ - EXPR(); consume(TK(")")); - }else{ - emit(OP_LOAD_NONE); - } - emit(OP_RAISE, dummy_t); - consume_end_stmt(); - } else if(match(TK("del"))){ - EXPR_TUPLE(); - emit(OP_DELETE_REF); - consume_end_stmt(); - } else if(match(TK("global"))){ - do { - consume(TK("@id")); - co()->global_names[prev().str()] = 1; - } while (match(TK(","))); - consume_end_stmt(); - } else if(match(TK("pass"))){ - consume_end_stmt(); - } else { - int begin = co()->codes.size(); - EXPR_ANY(); - int end = co()->codes.size(); - consume_end_stmt(); - // If last op is not an assignment, pop the result. - uint8_t last_op = co()->codes.back().op; - if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF && - last_op!=OP_STORE_ALL_NAMES && last_op!=OP_STORE_CLASS_ATTR){ - for(int i=begin; icodes[i].op==OP_BUILD_TUPLE_REF) co()->codes[i].op = OP_BUILD_TUPLE; + break; + case TK("continue"): + if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("yield"): + if (contexts.size() <= 1) SyntaxError("'yield' outside function"); + EXPR_TUPLE(EXPR_RVALUE); + // if yield present, the function is a generator + ctx()->co->is_generator = true; + ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("return"): + if (contexts.size() <= 1) SyntaxError("'return' outside function"); + if(match_end_stmt()){ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); + }else{ + EXPR_TUPLE(EXPR_RVALUE); + consume_end_stmt(); } - if(mode()==REPL_MODE && name_scope() == NAME_GLOBAL) emit(OP_PRINT_EXPR, -1, true); - emit(OP_POP_TOP, -1, true); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line); + break; + /*************************************************/ + case TK("if"): compile_if_stmt(); break; + case TK("while"): compile_while_loop(); break; + case TK("for"): compile_for_loop(); break; + case TK("import"): compile_normal_import(); break; + case TK("from"): compile_from_import(); break; + case TK("def"): compile_function(); break; + case TK("@"): compile_decorated(); break; + case TK("try"): compile_try_except(); break; + case TK("pass"): consume_end_stmt(); break; + /*************************************************/ + case TK("assert"): + EXPR_TUPLE(EXPR_RVALUE); + // TODO: change OP_ASSERT impl in ceval.h + ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("del"): + EXPR_TUPLE(EXPR_LVALUE); + ctx()->emit(OP_DELETE_REF, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("global"): + do { + consume(TK("@id")); + co()->global_names.insert(prev().str()); + } while (match(TK(","))); + consume_end_stmt(); + break; + case TK("raise"): { + consume(TK("@id")); + int dummy_t = ctx()->add_name(prev().str(), NAME_SPECIAL); + if(match(TK("(")) && !match(TK(")"))){ + EXPR(EXPR_RVALUE); consume(TK(")")); + }else{ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); + } + ctx()->emit(OP_RAISE, dummy_t, kw_line); + consume_end_stmt(); + } break; + case TK("with"): { + EXPR(EXPR_RVALUE); + consume(TK("as")); + consume(TK("@id")); + int index = ctx()->add_name(prev().str(), name_scope()); + emit(OP_STORE_NAME, index); + emit(OP_LOAD_NAME_REF, index); + emit(OP_WITH_ENTER); + compile_block_body(); + emit(OP_LOAD_NAME_REF, index); + emit(OP_WITH_EXIT); + } break; + /*************************************************/ + // TODO: refactor goto/label use special $ syntax + case TK("label"): + if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); + consume(TK(".")); consume(TK("@id")); + bool ok = co()->add_label(prev().str()); + if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists"); + consume_end_stmt(); + break; + case TK("goto"): + if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); + consume(TK(".")); consume(TK("@id")); + emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL)); + consume_end_stmt(); + break; + /*************************************************/ + // dangling expression or assignment + default: { + EXPR_TUPLE(true); + bool assigment = try_compile_assignment(); + if(!assigment){ + if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){ + emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); + } + emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); + } + consume_end_stmt(); } } } diff --git a/src/expr.h b/src/expr.h index aff04c12..4145e741 100644 --- a/src/expr.h +++ b/src/expr.h @@ -6,14 +6,19 @@ #include "error.h" #include "ceval.h" + namespace pkpy{ struct CodeEmitContext; struct Expr{ int line = 0; - virtual Str str() const = 0; - ~Expr() = default; + virtual ~Expr() = default; virtual void emit(CodeEmitContext* ctx) = 0; + virtual Str str() const = 0; + + virtual void emit_lvalue(CodeEmitContext* ctx){ + throw std::runtime_error("emit_lvalue() is not supported"); + } }; struct CodeEmitContext{ @@ -337,19 +342,6 @@ struct AttribExpr: Expr{ Str str() const override { return "a.b"; } }; -struct AssignExpr: Expr{ - Expr_ lhs; - Expr_ rhs; - Str str() const override { return "="; } -}; - -struct InplaceAssignExpr: Expr{ - TokenIndex op; - Expr_ lhs; - Expr_ rhs; - Str str() const override { return TK_STR(op); } -}; - struct CallExpr: Expr{ std::vector args; std::vector> kwargs; diff --git a/src/lexer.h b/src/lexer.h index 8b997ca5..7551421e 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -71,7 +71,6 @@ struct Token{ // https://docs.python.org/3/reference/expressions.html#operator-precedence enum Precedence { PREC_NONE, - PREC_ASSIGNMENT, // = PREC_TUPLE, // , PREC_TERNARY, // ?: PREC_LOGICAL_OR, // or From 83d6ac2e784d131a6e2bea4fa0ff04dac22517fe Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 1 Apr 2023 21:57:05 +0800 Subject: [PATCH 24/73] up --- .github/workflows.rar | Bin 0 -> 1227 bytes .github/workflows/main.yml | 129 ----------------------- src/ceval.h | 15 +-- src/compiler.h | 209 +++++++++++++++---------------------- src/expr.h | 1 + src/frame.h | 4 - 6 files changed, 88 insertions(+), 270 deletions(-) create mode 100644 .github/workflows.rar delete mode 100644 .github/workflows/main.yml diff --git a/.github/workflows.rar b/.github/workflows.rar new file mode 100644 index 0000000000000000000000000000000000000000..81c8ec1d2a4878c49d57e099b87ee5ce03c0e574 GIT binary patch literal 1227 zcmV;+1T_0nVR9iF2LS-{;ezW60R;yD1_1$pfPesRB3rO80s{-q2n5j|pg1}=!1P%j+9;xgF(rw$TIq`tQMoMlh~C~}#fzo%Ytg%J;`lRcHn5&y zJSPb_{CM>6oIMy%Ilw*ullUW^ArYZI5;r^WJINeaN&HDVGyQ}ga6r_j_s(=p{PKTM zwv0T4s5@T=;-^6MEb$<;?UIGq{sYbfOp(e~KMx`*4pIi7OaL>JJ_pdtna<{j=bP}O znQQ7Jdyh2o6QD7o%%tRA;Q6DJp_(@#!IV-Iu6+jgm0`p7Z1`8^Jy3Z-no2o->k-J3 z%fW&WlTSaQ>)WC@WDv9VJp_1h9El`h;It`-4S(IHR-o(+|1zXo15I(~b_hKKox1>s zL$Asa2)Ib}gX1d<&Yw>^OEPxH04XDLK#T@M2hx7>KInn=dw+#zk_gP{O(@jzQLl!t z&AAMnD{I`C{gD9#%D#N6TP{SQ$zPIS=1NABL`V`v-T%~hoX-e;XxVWax3j9G9ld>> zy^3l4N~Mnq9nr6F>V!=Jq$ykc6c=Oj@aji19ISclsg=Kq;gUlu(TF4u0N51FsU{1g z@6;}R?N>@v%J@Lzge@E@+r3#kRS0(3w^W;To&c@WQpYp(?wCuP&Ps2NqQY$~=Fe1C zT+fEY?T5j$09)E`Old;dnrv_5ngamc)HcQH=Tx7t+bBF#T4D8L%lUSCn#K<*jgIJf!LULpjc8P|PNN zEFLjy>Y>rcQ|bR+wZHDmN=FVx+QB`QCW+mQA0(KM{U$KR5p@%_x#sf}IST3MUY=Cy zUy+-B^0c6_btl5hGQn+D`bkbcM&Ksbi7wcj{SwSJBR_%Vile7Bm$|izsrbvWwQ?z< z9m?(>PihIk=`W5D!Xw{`|KHP*qIOu{cMYZ{_UP_iFL!h;13+cQffPo|&;`MXDQemQ z<@C#bR*zMQuIBJ_Fg^XaErbr>dWhrs&461oa31Qk^yU=IAo=Vi(fkq1SB&L87`GxU>d5Smpqr}Cp zI~PcqdfWG%%3x<&k(E;=kB}-&8uNb4>gTchEIj^#(djoBEa}C5kH3l^wP#gkjxUSg z0{hb%$Xg!;l<)?G9g1W~grC?EoD&HI77TN$X>6PkRJQD43S9&p5 zwc-q$(LL*;IVkEhas_next_bytecode()){ - heap._auto_collect(this); + while(true){ + heap._auto_collect(this); // gc const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -320,15 +321,7 @@ inline PyObject* VM::run_frame(Frame* frame){ default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); } } - - if(frame->co->src->mode == EVAL_MODE || frame->co->src->mode == JSON_MODE){ - if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE"); - return frame->pop_value(this); - } -#if DEBUG_EXTRA_CHECK - if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE"); -#endif - return None; + UNREACHABLE(); } } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index 846b9555..774e4540 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -20,8 +20,6 @@ struct PrattRule{ Precedence precedence; }; -enum ExprAction { EXPR_PUSH_STACK, EXPR_RVALUE, EXPR_LVALUE }; - class Compiler { std::unique_ptr lexer; stack contexts; @@ -42,8 +40,10 @@ class Compiler { } void pop_context(){ - if(!ctx()->s_expr.empty()){ - ctx()->emit_expr(); + if(!ctx()->s_expr.empty()) UNREACHABLE(); + if(ctx()->co->codes.back().op != OP_RETURN_VALUE){ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); } ctx()->co->optimize(vm); contexts.pop(); @@ -164,16 +164,12 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } - void exprLiteral(){ - ctx()->s_expr.push( - expr_prev_line(prev().value) - ); + void EXPR(ExprAction action=EXPR_PUSH_STACK) { + parse_expression(PREC_TUPLE + 1, action); } - void exprFString(){ - ctx()->s_expr.push( - expr_prev_line(std::get(prev().value)) - ); + void EXPR_TUPLE(ExprAction action=EXPR_PUSH_STACK) { + parse_expression(PREC_TUPLE, action); } template @@ -183,6 +179,19 @@ private: return expr; } + /********************************************/ + + // PASS + void exprLiteral(){ + ctx()->s_expr.push(expr_prev_line(prev().value)); + } + + // PASS + void exprFString(){ + ctx()->s_expr.push(expr_prev_line(std::get(prev().value))); + } + + // PASS void exprLambda(){ auto e = expr_prev_line(); e->func.name = ""; @@ -192,42 +201,14 @@ private: consume(TK(":")); } e->func.code = push_context(lexer->src, ""); - EXPR(); + // https://github.com/blueloveTH/pocketpy/issues/37 + EXPR(true); ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); ctx()->s_expr.push(std::move(e)); } - // assignment是一种特殊的无返回值表达式,他不应该位于PREC中 - void exprInplaceAssign(){ - auto e = expr_prev_line(); - e->op = prev().type; - e->lhs = ctx()->s_expr.popx(); - // lhs cannot be a assignment expression, i.e. a = b += c is not allowed - if(e->lhs->is_assignment()) SyntaxError(); - EXPR_TUPLE(); - e->rhs = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(e)); - } - - void EXPR(ExprAction action=EXPR_PUSH_STACK) { - parse_expression(PREC_TUPLE + 1, action); - } - - void EXPR_TUPLE(ExprAction action=EXPR_PUSH_STACK) { - parse_expression(PREC_TUPLE, action); - } - void exprAssign(){ - auto e = expr_prev_line(); - e->lhs = ctx()->s_expr.popx(); - // lhs cannot be a assignment expression, i.e. a = b = c is not allowed - // however in cpython, it is allowed, we'll fix it later - if(e->lhs->is_assignment()) SyntaxError(); - EXPR_TUPLE(); - e->rhs = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(e)); - // if(co()->codes.empty()) UNREACHABLE(); // bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; // int _name_arg = co()->codes.back().arg; @@ -277,6 +258,7 @@ private: // co()->_rvalue -= 1; } + // PASS void exprTuple(){ auto e = expr_prev_line(); do { @@ -286,6 +268,7 @@ private: ctx()->s_expr.push(std::move(e)); } + // PASS void exprOr(){ auto e = expr_prev_line(); e->lhs = ctx()->s_expr.popx(); @@ -294,14 +277,16 @@ private: ctx()->s_expr.push(std::move(e)); } + // PASS void exprAnd(){ - auto e = expr_prev_line(); + auto e = expr_prev_line(); e->lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_AND + 1); e->rhs = ctx()->s_expr.popx(); ctx()->s_expr.push(std::move(e)); } + // PASS void exprTernary(){ auto e = expr_prev_line(); e->cond = ctx()->s_expr.popx(); @@ -313,6 +298,7 @@ private: ctx()->s_expr.push(std::move(e)); } + // PASS void exprBinaryOp(){ auto e = expr_prev_line(); e->op = prev().type; @@ -322,85 +308,56 @@ private: ctx()->s_expr.push(std::move(e)); } + // PASS void exprNot() { parse_expression(PREC_LOGICAL_NOT + 1); - ctx()->s_expr.push( - expr_prev_line(ctx()->s_expr.popx()) - ); + ctx()->s_expr.push(expr_prev_line(ctx()->s_expr.popx())); } + // PASS void exprUnaryOp(){ - TokenIndex type = prev().type; + TokenIndex op = prev().type; parse_expression(PREC_UNARY + 1); - Expr_ e; - switch(type){ + switch(op){ case TK("-"): - e = expr_prev_line(ctx()->s_expr.popx()); + ctx()->s_expr.push(expr_prev_line(ctx()->s_expr.popx())); + break; case TK("*"): - e = expr_prev_line(ctx()->s_expr.popx()); + ctx()->s_expr.push(expr_prev_line(ctx()->s_expr.popx())); + break; default: UNREACHABLE(); } - ctx()->s_expr.push(std::move(e)); } - // () is just for change precedence + // PASS void exprGroup(){ match_newlines(mode()==REPL_MODE); - EXPR_TUPLE(); + EXPR_TUPLE(); // () is just for change precedence match_newlines(mode()==REPL_MODE); consume(TK(")")); } - // void _consume_comp(Opcode op0, Opcode op1, int _patch, int _body_start){ - // int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); - // int _body_end = co()->codes.size(); - // co()->codes[_patch].op = OP_JUMP_ABSOLUTE; - // co()->codes[_patch].arg = _body_end; - // emit(op0, 0); - // EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); - // match_newlines(mode()==REPL_MODE); - - // int _skipPatch = emit(OP_JUMP_ABSOLUTE); - // int _cond_start = co()->codes.size(); - // int _cond_end_return = -1; - // if(match(TK("if"))) { - // EXPR_TUPLE(); - // _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); - // } - // patch_jump(_skipPatch); - - // emit(OP_GET_ITER); - // co()->_enter_block(FOR_LOOP); - // emit(OP_FOR_ITER); - - // if(_cond_end_return != -1) { // there is an if condition - // emit(OP_JUMP_ABSOLUTE, _cond_start); - // patch_jump(_cond_end_return); - // int ifpatch = emit(OP_POP_JUMP_IF_FALSE); - // emit(OP_JUMP_ABSOLUTE, _body_start); - // patch_jump(_body_end_return); - // emit(op1); - // patch_jump(ifpatch); - // }else{ - // emit(OP_JUMP_ABSOLUTE, _body_start); - // patch_jump(_body_end_return); - // emit(op1); - // } - - // emit(OP_LOOP_CONTINUE, -1, true); - // co()->_exit_block(); - // match_newlines(mode()==REPL_MODE); - // } - + // PASS template void _consume_comp(Expr_ expr){ static_assert(std::is_base_of::value); std::unique_ptr ce = std::make_unique(); ce->expr = std::move(expr); - // ... + EXPR_TUPLE(); // must be a lvalue + ce->vars = ctx()->s_expr.popx(); + consume(TK("in")); + EXPR(); + ce->iter = ctx()->s_expr.popx(); + match_newlines(mode()==REPL_MODE); + if(match(TK("if"))){ + EXPR(); + ce->cond = ctx()->s_expr.popx(); + } ctx()->s_expr.push(std::move(ce)); + match_newlines(mode()==REPL_MODE); } + // PASS void exprList() { auto e = expr_prev_line(); do { @@ -414,15 +371,15 @@ private: consume(TK("]")); return; } + match_newlines(mode()==REPL_MODE); } while (match(TK(","))); - match_newlines(mode()==REPL_MODE); consume(TK("]")); ctx()->s_expr.push(std::move(e)); } - // {...} may be dict or set + // PASS void exprMap() { - bool parsing_dict = false; + bool parsing_dict = false; // {...} may be dict or set std::vector items; do { match_newlines(mode()==REPL_MODE); @@ -446,6 +403,7 @@ private: consume(TK("}")); return; } + match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK("}")); if(items.size()==0 || parsing_dict){ @@ -457,8 +415,10 @@ private: } } + // PASS void exprCall() { auto e = expr_prev_line(); + e->callable = ctx()->s_expr.popx(); do { match_newlines(mode()==REPL_MODE); if (curr().type==TK(")")) break; @@ -487,12 +447,12 @@ private: // } } + // PASS void exprName(){ - ctx()->s_expr.push( - expr_prev_line(prev().str(), name_scope()) - ); + ctx()->s_expr.push(expr_prev_line(prev().str(), name_scope())); } + // PASS void exprAttrib() { consume(TK("@id")); ctx()->s_expr.push( @@ -500,6 +460,7 @@ private: ); } + // PASS void exprSubscr() { auto e = expr_prev_line(); std::vector items; @@ -526,10 +487,9 @@ private: ctx()->s_expr.push(std::move(e)); } + // PASS void exprLiteral0() { - ctx()->s_expr.push( - expr_prev_line(prev().type) - ); + ctx()->s_expr.push(expr_prev_line(prev().type)); } void compile_block_body() { @@ -599,7 +559,7 @@ private: consume_end_stmt(); } - void parse_expression(int precedence, ExprAction action=EXPR_PUSH_STACK) { + void parse_expression(int precedence, bool push_stack=true) { advance(); PrattCallback prefix = rules[prev().type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); @@ -611,16 +571,11 @@ private: if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); } - switch(action){ - case EXPR_PUSH_STACK: break; - case EXPR_RVALUE: ctx()->emit_rvalue(); break; - case EXPR_LVALUE: ctx()->emit_lvalue(); break; - default: UNREACHABLE(); - } + if(!push_stack) ctx()->emit_expr(); } void compile_if_stmt() { - EXPR(EXPR_RVALUE); // condition + EXPR(true); // condition int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); if (match(TK("elif"))) { @@ -640,7 +595,7 @@ private: void compile_while_loop() { ctx()->enter_block(WHILE_LOOP); - EXPR(EXPR_RVALUE); // condition + EXPR(true); // condition int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); @@ -649,9 +604,10 @@ private: } void compile_for_loop() { - EXPR_TUPLE(EXPR_LVALUE); + EXPR_TUPLE(); + ctx()->emit_lvalue(); consume(TK("in")); - EXPR_TUPLE(EXPR_RVALUE); + EXPR(true); ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); ctx()->enter_block(FOR_LOOP); ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); @@ -689,10 +645,8 @@ private: } void compile_decorated(){ - EXPR(EXPR_RVALUE); - if(!match_newlines(mode()==REPL_MODE)){ - SyntaxError("expected a new line after '@'"); - } + EXPR(true); + if(!match_newlines(mode()==REPL_MODE)) SyntaxError(); ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line); consume(TK("def")); compile_function(); @@ -718,7 +672,7 @@ private: break; case TK("yield"): if (contexts.size() <= 1) SyntaxError("'yield' outside function"); - EXPR_TUPLE(EXPR_RVALUE); + EXPR_TUPLE(true); // if yield present, the function is a generator ctx()->co->is_generator = true; ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); @@ -729,7 +683,7 @@ private: if(match_end_stmt()){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); }else{ - EXPR_TUPLE(EXPR_RVALUE); + EXPR_TUPLE(true); consume_end_stmt(); } ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line); @@ -746,13 +700,14 @@ private: case TK("pass"): consume_end_stmt(); break; /*************************************************/ case TK("assert"): - EXPR_TUPLE(EXPR_RVALUE); + EXPR_TUPLE(true); // TODO: change OP_ASSERT impl in ceval.h ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); consume_end_stmt(); break; case TK("del"): - EXPR_TUPLE(EXPR_LVALUE); + EXPR_TUPLE(); + ctx()->emit_lvalue(); ctx()->emit(OP_DELETE_REF, BC_NOARG, kw_line); consume_end_stmt(); break; @@ -767,7 +722,7 @@ private: consume(TK("@id")); int dummy_t = ctx()->add_name(prev().str(), NAME_SPECIAL); if(match(TK("(")) && !match(TK(")"))){ - EXPR(EXPR_RVALUE); consume(TK(")")); + EXPR(true); consume(TK(")")); }else{ ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); } @@ -775,7 +730,7 @@ private: consume_end_stmt(); } break; case TK("with"): { - EXPR(EXPR_RVALUE); + EXPR(true); consume(TK("as")); consume(TK("@id")); int index = ctx()->add_name(prev().str(), name_scope()); @@ -953,6 +908,7 @@ public: if(mode()==EVAL_MODE) { EXPR_TUPLE(); consume(TK("@eof")); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); return code; }else if(mode()==JSON_MODE){ @@ -962,6 +918,7 @@ public: else if(match(TK("["))) exprList(); else SyntaxError("expect a JSON object or array"); consume(TK("@eof")); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); return code; } diff --git a/src/expr.h b/src/expr.h index 4145e741..2fe92068 100644 --- a/src/expr.h +++ b/src/expr.h @@ -343,6 +343,7 @@ struct AttribExpr: Expr{ }; struct CallExpr: Expr{ + Expr_ callable; std::vector args; std::vector> kwargs; Str str() const override { return "()"; } diff --git a/src/frame.h b/src/frame.h index e2b9bc13..98e872c9 100644 --- a/src/frame.h +++ b/src/frame.h @@ -53,10 +53,6 @@ struct Frame { // return ss.str(); // } - bool has_next_bytecode() const { - return _next_ip < co->codes.size(); - } - PyObject* pop(){ #if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); From b56978fd3d0ff954d1d61f29d8fd7d7d90b4751b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 1 Apr 2023 22:30:43 +0800 Subject: [PATCH 25/73] up --- src/compiler.h | 1 + src/expr.h | 69 ++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 774e4540..d8660a4d 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -830,6 +830,7 @@ private: } void compile_function(){ + // TODO: bug, if there are multiple decorators, will cause error bool has_decorator = !co()->codes.empty() && co()->codes.back().op == OP_SETUP_DECORATOR; Function func; StrName obj_name; diff --git a/src/expr.h b/src/expr.h index 2fe92068..7106f82a 100644 --- a/src/expr.h +++ b/src/expr.h @@ -16,8 +16,8 @@ struct Expr{ virtual void emit(CodeEmitContext* ctx) = 0; virtual Str str() const = 0; - virtual void emit_lvalue(CodeEmitContext* ctx){ - throw std::runtime_error("emit_lvalue() is not supported"); + virtual void emit_ref(CodeEmitContext* ctx){ + throw std::runtime_error("emit_ref() is not supported"); } }; @@ -108,8 +108,14 @@ struct NameExpr: Expr{ int index = ctx->add_name(name, scope); ctx->emit(OP_LOAD_NAME, index, line); } + + void emit_ref(CodeEmitContext* ctx) override { + int index = ctx->add_name(name, scope); + ctx->emit(OP_LOAD_NAME_REF, index, line); + } }; + struct StarredExpr: Expr{ Expr_ child; StarredExpr(Expr_&& child): child(std::move(child)) {} @@ -119,6 +125,11 @@ struct StarredExpr: Expr{ child->emit(ctx); ctx->emit(OP_UNARY_STAR, (int)false, line); } + + void emit_ref(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_STAR, (int)true, line); + } }; struct NegatedExpr: Expr{ @@ -232,28 +243,58 @@ struct SliceExpr: Expr{ Expr_ stop; Expr_ step; Str str() const override { return "slice()"; } + + void emit(CodeEmitContext* ctx) override { + if(start){ + start->emit(ctx); + }else{ + ctx->emit(OP_LOAD_NONE, BC_NOARG, line); + } + + if(stop){ + stop->emit(ctx); + }else{ + ctx->emit(OP_LOAD_NONE, BC_NOARG, line); + } + + if(step){ + step->emit(ctx); + }else{ + ctx->emit(OP_LOAD_NONE, BC_NOARG, line); + } + + ctx->emit(OP_BUILD_SLICE, BC_NOARG, line); + } }; -struct ListExpr: Expr{ +struct SequenceExpr: Expr{ std::vector items; - Str str() const override { return "[]"; } + virtual Opcode opcode() const = 0; + + void emit(CodeEmitContext* ctx) override { + for(auto& item: items) item->emit(ctx); + ctx->emit(opcode(), items.size(), line); + } }; -struct DictExpr: Expr{ - std::vector items; // each item is a DictItemExpr - DictExpr(std::vector&& items): items(std::move(items)) {} - Str str() const override { return "{}"; } +struct ListExpr: SequenceExpr{ + Str str() const override { return "list()"; } + Opcode opcode() const override { return OP_BUILD_LIST; } }; -struct SetExpr: Expr{ - std::vector items; - SetExpr(std::vector&& items): items(std::move(items)) {} - Str str() const override { return "{}"; } +struct DictExpr: SequenceExpr{ + Str str() const override { return "dict()"; } + Opcode opcode() const override { return OP_BUILD_MAP; } }; -struct TupleExpr: Expr{ - std::vector items; +struct SetExpr: SequenceExpr{ + Str str() const override { return "set()"; } + Opcode opcode() const override { return OP_BUILD_SET; } +}; + +struct TupleExpr: SequenceExpr{ Str str() const override { return "tuple()"; } + Opcode opcode() const override { return OP_BUILD_TUPLE; } }; struct CompExpr: Expr{ From 4d7b9d1c7c05acef801bc38a489f557ea9ed1343 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 1 Apr 2023 22:43:39 +0800 Subject: [PATCH 26/73] up --- src/compiler.h | 11 +++++++---- src/expr.h | 1 + 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index d8660a4d..52b459fb 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -359,21 +359,24 @@ private: // PASS void exprList() { - auto e = expr_prev_line(); + int line = prev().line; + std::vector items; do { match_newlines(mode()==REPL_MODE); if (curr().type == TK("]")) break; EXPR(); - e->items.push_back(ctx()->s_expr.popx()); + items.push_back(ctx()->s_expr.popx()); match_newlines(mode()==REPL_MODE); - if(e->items.size()==1 && match(TK("for"))){ - _consume_comp(std::move(e->items[0])); + if(items.size()==1 && match(TK("for"))){ + _consume_comp(std::move(items[0])); consume(TK("]")); return; } match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK("]")); + auto e = expr_prev_line(std::move(items)); + e->line = line; // override line ctx()->s_expr.push(std::move(e)); } diff --git a/src/expr.h b/src/expr.h index 7106f82a..f08e671d 100644 --- a/src/expr.h +++ b/src/expr.h @@ -269,6 +269,7 @@ struct SliceExpr: Expr{ struct SequenceExpr: Expr{ std::vector items; + SequenceExpr(std::vector&& items): items(std::move(items)) {} virtual Opcode opcode() const = 0; void emit(CodeEmitContext* ctx) override { From b88cd6604628714cde4619133eb1341e3067c1ff Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Sat, 1 Apr 2023 16:52:35 +0000 Subject: [PATCH 27/73] up --- src/compiler.h | 9 ++++++--- src/expr.h | 47 ++++++++++++++++++++++++++++++++++++++--------- src/ref.h | 1 + 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 52b459fb..10eb8886 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -676,13 +676,13 @@ private: case TK("yield"): if (contexts.size() <= 1) SyntaxError("'yield' outside function"); EXPR_TUPLE(true); - // if yield present, the function is a generator + // if yield present, mark the function as generator ctx()->co->is_generator = true; ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); consume_end_stmt(); break; case TK("return"): - if (contexts.size() <= 1) SyntaxError("'return' outside function"); + if (contexts.size() <= 1) SyntaxError("'ret urn' outside function"); if(match_end_stmt()){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); }else{ @@ -710,7 +710,10 @@ private: break; case TK("del"): EXPR_TUPLE(); - ctx()->emit_lvalue(); + Expr_ e = ctx()->s_expr.popx(); + switch(e->ref_type()){ + case EXPR_NAME_REF: + } ctx()->emit(OP_DELETE_REF, BC_NOARG, kw_line); consume_end_stmt(); break; diff --git a/src/expr.h b/src/expr.h index f08e671d..ba9ab990 100644 --- a/src/expr.h +++ b/src/expr.h @@ -10,14 +10,25 @@ namespace pkpy{ struct CodeEmitContext; + +enum ExprRefType{ + EXPR_NO_REF, + EXPR_NAME_REF, + EXPR_ATTR_REF, + EXPR_INDEX_REF, + EXPR_STARRED_REF, + EXPR_TUPLE_REF +}; + struct Expr{ int line = 0; virtual ~Expr() = default; virtual void emit(CodeEmitContext* ctx) = 0; virtual Str str() const = 0; + virtual std::vector children() = 0; - virtual void emit_ref(CodeEmitContext* ctx){ - throw std::runtime_error("emit_ref() is not supported"); + virtual ExprRefType ref_type() const { + return EXPR_NO_REF; } }; @@ -96,6 +107,7 @@ struct CodeEmitContext{ } }; + struct NameExpr: Expr{ Str name; NameScope scope; @@ -109,9 +121,8 @@ struct NameExpr: Expr{ ctx->emit(OP_LOAD_NAME, index, line); } - void emit_ref(CodeEmitContext* ctx) override { - int index = ctx->add_name(name, scope); - ctx->emit(OP_LOAD_NAME_REF, index, line); + ExprRefType ref_type() const override { + return EXPR_NAME_REF; } }; @@ -126,12 +137,12 @@ struct StarredExpr: Expr{ ctx->emit(OP_UNARY_STAR, (int)false, line); } - void emit_ref(CodeEmitContext* ctx) override { - child->emit(ctx); - ctx->emit(OP_UNARY_STAR, (int)true, line); + ExprRefType ref_type() const override { + return EXPR_STARRED_REF; } }; + struct NegatedExpr: Expr{ Expr_ child; NegatedExpr(Expr_&& child): child(std::move(child)) {} @@ -296,6 +307,10 @@ struct SetExpr: SequenceExpr{ struct TupleExpr: SequenceExpr{ Str str() const override { return "tuple()"; } Opcode opcode() const override { return OP_BUILD_TUPLE; } + + ExprRefType ref_type() const override { + return EXPR_TUPLE_REF; + } }; struct CompExpr: Expr{ @@ -330,7 +345,7 @@ struct LambdaExpr: Expr{ void emit(CodeEmitContext* ctx) override { VM* vm = ctx->vm; ctx->emit(OP_LOAD_FUNCTION, ctx->add_const(VAR(func)), line); - if(scope == NAME_LOCAL) ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, line); + if(scope == NAME_LOCAL) ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, BC_KEEPLINE); } }; @@ -374,6 +389,16 @@ struct SubscrExpr: Expr{ Expr_ a; Expr_ b; Str str() const override { return "a[b]"; } + + void emit(CodeEmitContext* ctx) override{ + a->emit(ctx); + b->emit(ctx); + ctx->emit(OP_BUILD_INDEX, BC_NOARG, line); + } + + ExprRefType ref_type() const override { + return EXPR_INDEX_REF; + } }; struct AttribExpr: Expr{ @@ -382,6 +407,10 @@ struct AttribExpr: Expr{ AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {} AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} Str str() const override { return "a.b"; } + + ExprRefType ref_type() const override { + return EXPR_ATTR_REF; + } }; struct CallExpr: Expr{ diff --git a/src/ref.h b/src/ref.h index 8026929e..11b5efc5 100644 --- a/src/ref.h +++ b/src/ref.h @@ -69,6 +69,7 @@ struct NameRef : BaseRef { } }; + struct AttrRef : BaseRef { mutable PyObject* obj; NameRef attr; From 449fb9a2f880adfa6addaef941e1965b29af2801 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 14:14:41 +0800 Subject: [PATCH 28/73] up --- src/ceval.h | 113 ++++++++++++++++--------- src/codeobject.h | 4 +- src/compiler.h | 70 +++++++--------- src/expr.h | 210 ++++++++++++++++++++++++++++++++++++----------- src/frame.h | 41 ++------- src/opcodes.h | 17 +++- src/ref.h | 5 -- 7 files changed, 295 insertions(+), 165 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index e72c9c09..9a45f1b3 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -26,49 +26,86 @@ inline PyObject* VM::run_frame(Frame* frame){ Function& f = CAST(Function&, frame->top()); // reference f._closure = frame->_locals; } continue; - case OP_LOAD_NAME_REF: { - frame->push(PyRef(NameRef(frame->co->names[byte.arg]))); - } continue; - case OP_LOAD_NAME: { - frame->push(NameRef(frame->co->names[byte.arg]).get(this, frame)); - } continue; - case OP_STORE_NAME: { - auto& p = frame->co->names[byte.arg]; - NameRef(p).set(this, frame, frame->pop()); - } continue; - case OP_BUILD_ATTR_REF: case OP_BUILD_ATTR: { - auto& attr = frame->co->names[byte.arg]; - PyObject* obj = frame->pop_value(this); - AttrRef ref = AttrRef(obj, NameRef(attr)); - if(byte.op == OP_BUILD_ATTR) frame->push(ref.get(this, frame)); - else frame->push(PyRef(ref)); - } continue; - case OP_BUILD_INDEX: { - PyObject* index = frame->pop_value(this); - auto ref = IndexRef(frame->pop_value(this), index); - if(byte.arg > 0) frame->push(ref.get(this, frame)); - else frame->push(PyRef(ref)); - } continue; - case OP_FAST_INDEX: case OP_FAST_INDEX_REF: { - auto& a = frame->co->names[byte.arg & 0xFFFF]; - auto& x = frame->co->names[(byte.arg >> 16) & 0xFFFF]; - auto ref = IndexRef(NameRef(a).get(this, frame), NameRef(x).get(this, frame)); - if(byte.op == OP_FAST_INDEX) frame->push(ref.get(this, frame)); - else frame->push(PyRef(ref)); - } continue; case OP_ROT_TWO: ::std::swap(frame->top(), frame->top_1()); continue; - case OP_STORE_REF: { - PyRef_AS_C(frame->top_1())->set(this, frame, frame->top_value(this)); - frame->_pop(); frame->_pop(); - } continue; - case OP_DELETE_REF: - PyRef_AS_C(frame->top())->del(this, frame); - frame->_pop(); - continue; case OP_BUILD_TUPLE: { Args items = frame->pop_n_values_reversed(this, byte.arg); frame->push(VAR(std::move(items))); } continue; + /*****************************************/ + case OP_LOAD_NAME: { + // TODO: use name resolution linked list to optimize this + StrName name = frame->co->names[byte.arg]; + PyObject* val; + val = frame->f_locals().try_get(name); + if(val != nullptr) { frame->push(val); continue; } + val = frame->f_closure_try_get(name); + if(val != nullptr) { frame->push(val); continue; } + val = frame->f_globals().try_get(name); + if(val != nullptr) { frame->push(val); continue; } + val = vm->builtins->attr().try_get(name); + if(val != nullptr) { frame->push(val); continue; } + vm->NameError(name); + } continue; + case OP_LOAD_ATTR: { + PyObject* a = frame->top(); + StrName name = frame->co->names[byte.arg]; + frame->top() = getattr(a, name); + } continue; + case OP_LOAD_SUBSCR: { + PyObject* b = frame->popx(); + PyObject* a = frame->top(); + frame->top() = fast_call(__getitem__, Args{a, b}); + } continue; + case OP_STORE_LOCAL: { + StrName name = frame->co->names[byte.arg]; + frame->f_locals().set(name, frame->popx()); + } continue; + case OP_STORE_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + frame->f_globals().set(name, frame->popx()); + } continue; + case OP_STORE_ATTR: { + StrName name = frame->co->names[byte.arg]; + PyObject* a = frame->popx(); + PyObject* val = frame->popx(); + setattr(a, name, val); + } continue; + case OP_STORE_SUBSCR: { + Args args(3); + args[1] = frame->popx(); // b + args[0] = frame->popx(); // a + args[2] = frame->popx(); // val + fast_call(__setitem__, std::move(args)); + } continue; + case OP_DELETE_LOCAL: { + StrName name = frame->co->names[byte.arg]; + if(frame->f_locals().contains(name)){ + frame->f_locals().erase(name); + }else{ + NameError(name); + } + } continue; + case OP_DELETE_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + if(frame->f_globals().contains(name)){ + frame->f_globals().erase(name); + }else{ + NameError(name); + } + } continue; + case OP_DELETE_ATTR: { + PyObject* a = frame->popx(); + StrName name = frame->co->names[byte.arg]; + if(!a->is_attr_valid()) TypeError("cannot delete attribute"); + if(!a->attr().contains(name)) AttributeError(a, name); + a->attr().erase(name); + } continue; + case OP_DELETE_SUBSCR: { + PyObject* b = frame->popx(); + PyObject* a = frame->popx(); + fast_call(__delitem__, Args{a, b}); + } continue; + /*****************************************/ case OP_BUILD_TUPLE_REF: { Args items = frame->pop_n_reversed(byte.arg); frame->push(PyRef(TupleRef(std::move(items)))); diff --git a/src/codeobject.h b/src/codeobject.h index 3ac01caa..b238bc7a 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -25,7 +25,7 @@ inline const char* OP_NAMES[] = { }; struct Bytecode{ - uint8_t op; + uint16_t op; uint16_t block; int arg; int line; @@ -61,7 +61,7 @@ struct CodeObject { std::vector codes; List consts; - std::vector> names; + std::vector names; std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; diff --git a/src/compiler.h b/src/compiler.h index 10eb8886..a4647e49 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -173,7 +173,7 @@ private: } template - std::unique_ptr expr_prev_line(Args&&... args) { + std::unique_ptr make_expr(Args&&... args) { std::unique_ptr expr = std::make_unique(std::forward(args)...); expr->line = prev().line; return expr; @@ -183,17 +183,17 @@ private: // PASS void exprLiteral(){ - ctx()->s_expr.push(expr_prev_line(prev().value)); + ctx()->s_expr.push(make_expr(prev().value)); } // PASS void exprFString(){ - ctx()->s_expr.push(expr_prev_line(std::get(prev().value))); + ctx()->s_expr.push(make_expr(std::get(prev().value))); } // PASS void exprLambda(){ - auto e = expr_prev_line(); + auto e = make_expr(); e->func.name = ""; e->scope = name_scope(); if(!match(TK(":"))){ @@ -260,7 +260,7 @@ private: // PASS void exprTuple(){ - auto e = expr_prev_line(); + auto e = make_expr(); do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok e->items.push_back(ctx()->s_expr.popx()); @@ -270,7 +270,7 @@ private: // PASS void exprOr(){ - auto e = expr_prev_line(); + auto e = make_expr(); e->lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_OR + 1); e->rhs = ctx()->s_expr.popx(); @@ -279,7 +279,7 @@ private: // PASS void exprAnd(){ - auto e = expr_prev_line(); + auto e = make_expr(); e->lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_AND + 1); e->rhs = ctx()->s_expr.popx(); @@ -288,7 +288,7 @@ private: // PASS void exprTernary(){ - auto e = expr_prev_line(); + auto e = make_expr(); e->cond = ctx()->s_expr.popx(); EXPR(); // if true e->true_expr = ctx()->s_expr.popx(); @@ -300,7 +300,7 @@ private: // PASS void exprBinaryOp(){ - auto e = expr_prev_line(); + auto e = make_expr(); e->op = prev().type; e->lhs = ctx()->s_expr.popx(); parse_expression(rules[e->op].precedence + 1); @@ -311,7 +311,7 @@ private: // PASS void exprNot() { parse_expression(PREC_LOGICAL_NOT + 1); - ctx()->s_expr.push(expr_prev_line(ctx()->s_expr.popx())); + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); } // PASS @@ -320,10 +320,10 @@ private: parse_expression(PREC_UNARY + 1); switch(op){ case TK("-"): - ctx()->s_expr.push(expr_prev_line(ctx()->s_expr.popx())); + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); break; case TK("*"): - ctx()->s_expr.push(expr_prev_line(ctx()->s_expr.popx())); + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); break; default: UNREACHABLE(); } @@ -375,7 +375,7 @@ private: match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK("]")); - auto e = expr_prev_line(std::move(items)); + auto e = make_expr(std::move(items)); e->line = line; // override line ctx()->s_expr.push(std::move(e)); } @@ -392,7 +392,7 @@ private: if(parsing_dict){ consume(TK(":")); EXPR(); - auto dict_item = expr_prev_line(); + auto dict_item = make_expr(); dict_item->key = ctx()->s_expr.popx(); dict_item->value = ctx()->s_expr.popx(); items.push_back(std::move(dict_item)); @@ -410,17 +410,17 @@ private: } while (match(TK(","))); consume(TK("}")); if(items.size()==0 || parsing_dict){ - auto e = expr_prev_line(std::move(items)); + auto e = make_expr(std::move(items)); ctx()->s_expr.push(std::move(e)); }else{ - auto e = expr_prev_line(std::move(items)); + auto e = make_expr(std::move(items)); ctx()->s_expr.push(std::move(e)); } } // PASS void exprCall() { - auto e = expr_prev_line(); + auto e = make_expr(); e->callable = ctx()->s_expr.popx(); do { match_newlines(mode()==REPL_MODE); @@ -434,38 +434,32 @@ private: } else{ if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument"); EXPR(); - // if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; e->args.push_back(ctx()->s_expr.popx()); } match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK(")")); + if(e->args.size() > 32767) SyntaxError("too many positional arguments"); + if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments"); ctx()->s_expr.push(std::move(e)); - // if(ARGC > 32767) SyntaxError("too many positional arguments"); - // if(KWARGC > 32767) SyntaxError("too many keyword arguments"); - // if(KWARGC > 0){ - // emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); - // }else{ - // emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); - // } } // PASS void exprName(){ - ctx()->s_expr.push(expr_prev_line(prev().str(), name_scope())); + ctx()->s_expr.push(make_expr(prev().str(), name_scope())); } // PASS void exprAttrib() { consume(TK("@id")); ctx()->s_expr.push( - expr_prev_line(ctx()->s_expr.popx(), prev().str()) + make_expr(ctx()->s_expr.popx(), prev().str()) ); } // PASS void exprSubscr() { - auto e = expr_prev_line(); + auto e = make_expr(); std::vector items; do { EXPR_TUPLE(); @@ -477,7 +471,7 @@ private: e->b = std::move(items[0]); break; case 2: case 3: { - auto slice = expr_prev_line(); + auto slice = make_expr(); slice->start = std::move(items[0]); slice->stop = std::move(items[1]); if(items.size()==3){ @@ -492,7 +486,7 @@ private: // PASS void exprLiteral0() { - ctx()->s_expr.push(expr_prev_line(prev().type)); + ctx()->s_expr.push(make_expr(prev().type)); } void compile_block_body() { @@ -708,15 +702,6 @@ private: ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); consume_end_stmt(); break; - case TK("del"): - EXPR_TUPLE(); - Expr_ e = ctx()->s_expr.popx(); - switch(e->ref_type()){ - case EXPR_NAME_REF: - } - ctx()->emit(OP_DELETE_REF, BC_NOARG, kw_line); - consume_end_stmt(); - break; case TK("global"): do { consume(TK("@id")); @@ -735,6 +720,13 @@ private: ctx()->emit(OP_RAISE, dummy_t, kw_line); consume_end_stmt(); } break; + case TK("del"): { + EXPR_TUPLE(); + Expr_ e = ctx()->s_expr.popx(); + bool ok = e->emit_del(ctx()); + if(!ok) SyntaxError(); + consume_end_stmt(); + } break; case TK("with"): { EXPR(true); consume(TK("as")); diff --git a/src/expr.h b/src/expr.h index ba9ab990..f66e82c0 100644 --- a/src/expr.h +++ b/src/expr.h @@ -11,25 +11,20 @@ namespace pkpy{ struct CodeEmitContext; -enum ExprRefType{ - EXPR_NO_REF, - EXPR_NAME_REF, - EXPR_ATTR_REF, - EXPR_INDEX_REF, - EXPR_STARRED_REF, - EXPR_TUPLE_REF -}; - struct Expr{ int line = 0; virtual ~Expr() = default; virtual void emit(CodeEmitContext* ctx) = 0; virtual Str str() const = 0; - virtual std::vector children() = 0; - virtual ExprRefType ref_type() const { - return EXPR_NO_REF; - } + virtual std::vector children() const { return {}; } + virtual bool is_starred() const { return false; } + + // for OP_DELETE_XXX + virtual bool emit_del(CodeEmitContext* ctx) { return false; } + + // for OP_STORE_XXX + virtual bool emit_store(CodeEmitContext* ctx) { return false; } }; struct CodeEmitContext{ @@ -67,13 +62,12 @@ struct CodeEmitContext{ void emit_expr(){ if(s_expr.size() != 1) UNREACHABLE(); Expr_ expr = s_expr.popx(); - // emit - // ... + expr->emit(this); } int emit(Opcode opcode, int arg, int line) { co->codes.push_back( - Bytecode{(uint8_t)opcode, (uint16_t)curr_block_i, arg, line} + Bytecode{(uint16_t)opcode, (uint16_t)curr_block_i, arg, line} ); int i = co->codes.size() - 1; if(line==BC_KEEPLINE && i>=1) co->codes[i].line = co->codes[i-1].line; @@ -91,13 +85,11 @@ struct CodeEmitContext{ return true; } - int add_name(StrName name, NameScope scope){ - if(scope == NAME_LOCAL && co->global_names.count(name)) scope = NAME_GLOBAL; - auto p = std::make_pair(name, scope); + int add_name(StrName name){ for(int i=0; inames.size(); i++){ - if(co->names[i] == p) return i; + if(co->names[i] == name) return i; } - co->names.push_back(p); + co->names.push_back(name); return co->names.size() - 1; } @@ -107,7 +99,7 @@ struct CodeEmitContext{ } }; - +// PASS struct NameExpr: Expr{ Str name; NameScope scope; @@ -117,59 +109,99 @@ struct NameExpr: Expr{ Str str() const override { return "$" + name; } void emit(CodeEmitContext* ctx) override { - int index = ctx->add_name(name, scope); + int index = ctx->add_name(name); ctx->emit(OP_LOAD_NAME, index, line); } - ExprRefType ref_type() const override { - return EXPR_NAME_REF; + bool emit_del(CodeEmitContext* ctx) override { + int index = ctx->add_name(name); + switch(scope){ + case NAME_LOCAL: + ctx->emit(OP_DELETE_LOCAL, index, line); + break; + case NAME_GLOBAL: + ctx->emit(OP_DELETE_GLOBAL, index, line); + break; + default: UNREACHABLE(); break; + } + return true; + } + + bool emit_store(CodeEmitContext* ctx) override { + int index = ctx->add_name(name); + switch(scope){ + case NAME_LOCAL: + ctx->emit(OP_STORE_LOCAL, index, line); + break; + case NAME_GLOBAL: + ctx->emit(OP_STORE_GLOBAL, index, line); + break; + default: UNREACHABLE(); break; + } + return true; } }; - +// *号运算符,作为左值和右值效果不同 struct StarredExpr: Expr{ Expr_ child; StarredExpr(Expr_&& child): child(std::move(child)) {} Str str() const override { return "*"; } + std::vector children() const override { return {child.get()}; } + + bool is_starred() const override { return true; } + void emit(CodeEmitContext* ctx) override { child->emit(ctx); - ctx->emit(OP_UNARY_STAR, (int)false, line); + // as a rvalue, we should do unpack here + //ctx->emit(OP_UNARY_STAR, (int)false, line); } - ExprRefType ref_type() const override { - return EXPR_STARRED_REF; + bool emit_store(CodeEmitContext* ctx) override { + child->emit(ctx); + // as a lvalue, we should do pack here + //ctx->emit(OP_UNARY_STAR, (int)true, line); + return true; } }; - +// PASS struct NegatedExpr: Expr{ Expr_ child; NegatedExpr(Expr_&& child): child(std::move(child)) {} Str str() const override { return "-"; } + std::vector children() const override { return {child.get()}; } + void emit(CodeEmitContext* ctx) override { child->emit(ctx); ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); } }; +// PASS struct NotExpr: Expr{ Expr_ child; NotExpr(Expr_&& child): child(std::move(child)) {} Str str() const override { return "not"; } + std::vector children() const override { return {child.get()}; } + void emit(CodeEmitContext* ctx) override { child->emit(ctx); ctx->emit(OP_UNARY_NOT, BC_NOARG, line); } }; +// PASS struct AndExpr: Expr{ Expr_ lhs; Expr_ rhs; Str str() const override { return "and"; } + std::vector children() const override { return {lhs.get(), rhs.get()}; } + void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line); @@ -178,11 +210,14 @@ struct AndExpr: Expr{ } }; +// PASS struct OrExpr: Expr{ Expr_ lhs; Expr_ rhs; Str str() const override { return "or"; } + std::vector children() const override { return {lhs.get(), rhs.get()}; } + void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line); @@ -249,12 +284,18 @@ struct LiteralExpr: Expr{ } }; +// PASS struct SliceExpr: Expr{ Expr_ start; Expr_ stop; Expr_ step; Str str() const override { return "slice()"; } + std::vector children() const override { + // may contain nullptr + return {start.get(), stop.get(), step.get()}; + } + void emit(CodeEmitContext* ctx) override { if(start){ start->emit(ctx); @@ -278,11 +319,30 @@ struct SliceExpr: Expr{ } }; +struct DictItemExpr: Expr{ + Expr_ key; + Expr_ value; + Str str() const override { return "k:v"; } + std::vector children() const override { return {key.get(), value.get()}; } + + void emit(CodeEmitContext* ctx) override { + key->emit(ctx); + value->emit(ctx); + ctx->emit(OP_BUILD_TUPLE, 2, line); + } +}; + struct SequenceExpr: Expr{ std::vector items; SequenceExpr(std::vector&& items): items(std::move(items)) {} virtual Opcode opcode() const = 0; + std::vector children() const override { + std::vector ret; + for(auto& item: items) ret.push_back(item.get()); + return ret; + } + void emit(CodeEmitContext* ctx) override { for(auto& item: items) item->emit(ctx); ctx->emit(opcode(), items.size(), line); @@ -308,8 +368,9 @@ struct TupleExpr: SequenceExpr{ Str str() const override { return "tuple()"; } Opcode opcode() const override { return OP_BUILD_TUPLE; } - ExprRefType ref_type() const override { - return EXPR_TUPLE_REF; + bool emit_store(CodeEmitContext* ctx) override { + // ... + return true; } }; @@ -318,14 +379,6 @@ struct CompExpr: Expr{ Expr_ vars; // loop vars Expr_ iter; // loop iter Expr_ cond; // optional if condition - virtual void emit_expr() = 0; -}; - -// a:b -struct DictItemExpr: Expr{ - Expr_ key; - Expr_ value; - Str str() const override { return "k:v"; } }; struct ListCompExpr: CompExpr{ @@ -345,7 +398,9 @@ struct LambdaExpr: Expr{ void emit(CodeEmitContext* ctx) override { VM* vm = ctx->vm; ctx->emit(OP_LOAD_FUNCTION, ctx->add_const(VAR(func)), line); - if(scope == NAME_LOCAL) ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, BC_KEEPLINE); + if(scope == NAME_LOCAL){ + ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, BC_KEEPLINE); + } } }; @@ -393,11 +448,21 @@ struct SubscrExpr: Expr{ void emit(CodeEmitContext* ctx) override{ a->emit(ctx); b->emit(ctx); - ctx->emit(OP_BUILD_INDEX, BC_NOARG, line); + ctx->emit(OP_LOAD_SUBSCR, BC_NOARG, line); } - ExprRefType ref_type() const override { - return EXPR_INDEX_REF; + bool emit_del(CodeEmitContext* ctx) override { + a->emit(ctx); + b->emit(ctx); + ctx->emit(OP_DELETE_SUBSCR, BC_NOARG, line); + return true; + } + + bool emit_store(CodeEmitContext* ctx) override { + a->emit(ctx); + b->emit(ctx); + ctx->emit(OP_STORE_SUBSCR, BC_NOARG, line); + return true; } }; @@ -408,16 +473,56 @@ struct AttribExpr: Expr{ AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} Str str() const override { return "a.b"; } - ExprRefType ref_type() const override { - return EXPR_ATTR_REF; + void emit(CodeEmitContext* ctx) override{ + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_LOAD_ATTR, index, line); + } + + bool emit_del(CodeEmitContext* ctx) override { + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_DELETE_ATTR, index, line); + return true; + } + + bool emit_store(CodeEmitContext* ctx) override { + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_STORE_ATTR, index, line); + return true; } }; +// PASS struct CallExpr: Expr{ Expr_ callable; std::vector args; std::vector> kwargs; - Str str() const override { return "()"; } + Str str() const override { return "call(...)"; } + + std::vector children() const override { + std::vector ret; + for(auto& item: args) ret.push_back(item.get()); + // ...ignore kwargs for simplicity + return ret; + } + + bool need_unpack() const { + for(auto& item: args) if(item->is_starred()) return true; + return false; + } + + void emit(CodeEmitContext* ctx) override { + callable->emit(ctx); + int KWARGC = (int)kwargs.size(); + int ARGC = (int)args.size(); + if(KWARGC > 0){ + ctx->emit(need_unpack() ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC<<16)|ARGC, line); + }else{ + ctx->emit(need_unpack() ? OP_CALL_UNPACK : OP_CALL, ARGC, line); + } + } }; struct BinaryExpr: Expr{ @@ -426,6 +531,10 @@ struct BinaryExpr: Expr{ Expr_ rhs; Str str() const override { return TK_STR(op); } + std::vector children() const override { + return {lhs.get(), rhs.get()}; + } + void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); rhs->emit(ctx); @@ -459,13 +568,18 @@ struct BinaryExpr: Expr{ } }; +// PASS struct TernaryExpr: Expr{ Expr_ cond; Expr_ true_expr; Expr_ false_expr; Str str() const override { - return "cond ? true_expr : false_expr"; + return "cond ? t : f"; + } + + std::vector children() const override { + return {cond.get(), true_expr.get(), false_expr.get()}; } void emit(CodeEmitContext* ctx) override { diff --git a/src/frame.h b/src/frame.h index 98e872c9..e52aff00 100644 --- a/src/frame.h +++ b/src/frame.h @@ -53,34 +53,20 @@ struct Frame { // return ss.str(); // } - PyObject* pop(){ -#if DEBUG_EXTRA_CHECK - if(_data.empty()) throw std::runtime_error("_data.empty() is true"); -#endif - PyObject* v = _data.back(); - _data.pop_back(); - return v; - } - - void _pop(){ + void pop(){ #if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif _data.pop_back(); } - void try_deref(VM*, PyObject*&); - - PyObject* pop_value(VM* vm){ - PyObject* value = pop(); - try_deref(vm, value); - return value; - } - - PyObject* top_value(VM* vm){ - PyObject* value = top(); - try_deref(vm, value); - return value; + PyObject* popx(){ +#if DEBUG_EXTRA_CHECK + if(_data.empty()) throw std::runtime_error("_data.empty() is true"); +#endif + PyObject* ret = _data.back(); + _data.pop_back(); + return ret; } PyObject*& top(){ @@ -141,18 +127,9 @@ struct Frame { } } - Args pop_n_values_reversed(VM* vm, int n){ - Args v(n); - for(int i=n-1; i>=0; i--){ - v[i] = pop(); - try_deref(vm, v[i]); - } - return v; - } - Args pop_n_reversed(int n){ Args v(n); - for(int i=n-1; i>=0; i--) v[i] = pop(); + for(int i=n-1; i>=0; i--) v[i] = popx(); return v; } diff --git a/src/opcodes.h b/src/opcodes.h index 693a579d..97a45a22 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -71,7 +71,6 @@ OPCODE(BUILD_ATTR_REF) OPCODE(STORE_NAME) OPCODE(STORE_FUNCTION) OPCODE(STORE_REF) -OPCODE(DELETE_REF) OPCODE(TRY_BLOCK_ENTER) OPCODE(TRY_BLOCK_EXIT) @@ -89,4 +88,20 @@ OPCODE(BEGIN_CLASS) OPCODE(END_CLASS) OPCODE(STORE_CLASS_ATTR) +/**************************/ +OPCODE(LOAD_NAME) +OPCODE(LOAD_ATTR) +OPCODE(LOAD_SUBSCR) + +OPCODE(STORE_LOCAL) +OPCODE(STORE_GLOBAL) +OPCODE(STORE_ATTR) +OPCODE(STORE_SUBSCR) + +OPCODE(DELETE_LOCAL) +OPCODE(DELETE_GLOBAL) +OPCODE(DELETE_ATTR) +OPCODE(DELETE_SUBSCR) +/**************************/ + #endif \ No newline at end of file diff --git a/src/ref.h b/src/ref.h index 11b5efc5..ca33ad18 100644 --- a/src/ref.h +++ b/src/ref.h @@ -83,11 +83,6 @@ struct AttrRef : BaseRef { vm->setattr(obj, attr.name(), std::move(val)); } - void del(VM* vm, Frame* frame) const{ - if(!obj->is_attr_valid()) vm->TypeError("cannot delete attribute"); - if(!obj->attr().contains(attr.name())) vm->AttributeError(obj, attr.name()); - obj->attr().erase(attr.name()); - } }; struct IndexRef : BaseRef { From d54cd8413822671de933d5f472e99d4bf591bfc0 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 14:46:59 +0800 Subject: [PATCH 29/73] up --- amalgamate.py | 2 +- src/ceval.h | 8 +- src/codeobject.h | 7 +- src/compiler.h | 197 +++++++++++++++++++---------------------------- src/expr.h | 44 ++++++++++- src/opcodes.h | 9 +-- src/ref.h | 179 ------------------------------------------ 7 files changed, 131 insertions(+), 315 deletions(-) delete mode 100644 src/ref.h diff --git a/amalgamate.py b/amalgamate.py index 8327b940..b5cd8fd5 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -8,7 +8,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: pipeline = [ ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], ["obj.h", "codeobject.h", "frame.h"], - ["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"], + ["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] ] diff --git a/src/ceval.h b/src/ceval.h index 9a45f1b3..cb873dc2 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -2,7 +2,6 @@ #include "common.h" #include "vm.h" -#include "ref.h" namespace pkpy{ @@ -135,10 +134,11 @@ inline PyObject* VM::run_frame(Frame* frame){ PyObject* cls = frame->top(); cls->attr().set(name.first, std::move(obj)); } continue; - case OP_RETURN_VALUE: return frame->pop_value(this); + case OP_RETURN_VALUE: return frame->popx(); case OP_PRINT_EXPR: { - PyObject* expr = frame->top_value(this); + PyObject* expr = frame->top(); // use top() here to avoid accidental gc if(expr != None) *_stdout << CAST(Str, asRepr(expr)) << '\n'; + frame->pop(); } continue; case OP_POP_TOP: frame->_pop(); continue; case OP_BINARY_OP: { @@ -239,7 +239,7 @@ inline PyObject* VM::run_frame(Frame* frame){ PyObject* obj = frame->pop_value(this); call(frame->top_1(), "add", Args{obj}); } continue; - case OP_DUP_TOP_VALUE: frame->push(frame->top_value(this)); continue; + case OP_DUP_TOP: frame->push(frame->top()); continue; case OP_UNARY_STAR: { if(byte.arg > 0){ // rvalue frame->top() = VAR(StarWrapper(frame->top_value(this), true)); diff --git a/src/codeobject.h b/src/codeobject.h index b238bc7a..e851fb31 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -5,12 +5,7 @@ namespace pkpy{ -enum NameScope { - NAME_LOCAL = 0, - NAME_GLOBAL, - NAME_ATTR, - NAME_SPECIAL, -}; +enum NameScope { NAME_LOCAL, NAME_GLOBAL }; enum Opcode { #define OPCODE(name) OP_##name, diff --git a/src/compiler.h b/src/compiler.h index a4647e49..6348e264 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -24,10 +24,18 @@ class Compiler { std::unique_ptr lexer; stack contexts; std::map rules; - bool used = false; VM* vm; + bool used; + // for parsing token stream + int i = 0; + std::vector tokens; + + const Token& prev() { return tokens.at(i-1); } + const Token& curr() { return tokens.at(i); } + const Token& next() { return tokens.at(i+1); } + const Token& peek(int offset) { return tokens.at(i+offset); } + void advance() { i++; } - CodeObject* co() const{ return contexts.top().co.get(); } CodeEmitContext* ctx() { return &contexts.top(); } CompileMode mode() const{ return lexer->src->mode; } NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } @@ -41,6 +49,7 @@ class Compiler { void pop_context(){ if(!ctx()->s_expr.empty()) UNREACHABLE(); + // if last instruction is not return, add a default return None if(ctx()->co->codes.back().op != OP_RETURN_VALUE){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); @@ -52,6 +61,7 @@ class Compiler { public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ this->vm = vm; + this->used = false; this->lexer = std::make_unique( make_sp(source, filename, mode) ); @@ -60,25 +70,32 @@ public: #define METHOD(name) &Compiler::name #define NO_INFIX nullptr, PREC_NONE for(TokenIndex i=0; i")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; - rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; - rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; - rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB }; + rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL }; + rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT }; + rules[TK("{")] = { METHOD(exprMap), NO_INFIX }; + rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM }; + rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM }; + rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT }; + rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; + rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; + rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; + rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; + rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; + rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR }; + rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR }; + rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; + rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; @@ -93,13 +110,6 @@ public: rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; - rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; - rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; - rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; - rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR }; - rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR }; #undef METHOD #undef NO_INFIX @@ -118,15 +128,6 @@ public: } private: - int i = 0; - std::vector tokens; - - const Token& prev() { return tokens.at(i-1); } - const Token& curr() { return tokens.at(i); } - const Token& next() { return tokens.at(i+1); } - const Token& peek(int offset) { return tokens.at(i+offset); } - void advance() { i++; } - bool match(TokenIndex expected) { if (curr().type != expected) return false; advance(); @@ -164,12 +165,14 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } - void EXPR(ExprAction action=EXPR_PUSH_STACK) { - parse_expression(PREC_TUPLE + 1, action); + /*************************************************/ + + void EXPR(bool push_stack=true) { + parse_expression(PREC_TUPLE+1, push_stack); } - void EXPR_TUPLE(ExprAction action=EXPR_PUSH_STACK) { - parse_expression(PREC_TUPLE, action); + void EXPR_TUPLE(bool push_stack=true) { + parse_expression(PREC_TUPLE, push_stack); } template @@ -179,8 +182,6 @@ private: return expr; } - /********************************************/ - // PASS void exprLiteral(){ ctx()->s_expr.push(make_expr(prev().value)); @@ -201,63 +202,12 @@ private: consume(TK(":")); } e->func.code = push_context(lexer->src, ""); - // https://github.com/blueloveTH/pocketpy/issues/37 - EXPR(true); + EXPR(true); // https://github.com/blueloveTH/pocketpy/issues/37 ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); ctx()->s_expr.push(std::move(e)); } - void exprAssign(){ - // if(co()->codes.empty()) UNREACHABLE(); - // bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; - // int _name_arg = co()->codes.back().arg; - // // if the last op is OP_LOAD_NAME_REF, remove it - // // because we will emit OP_STORE_NAME or OP_STORE_CLASS_ATTR - // if(is_load_name_ref) co()->codes.pop_back(); - - // co()->_rvalue += 1; - // TokenIndex op = prev().type; - // if(op == TK("=")) { // a = (expr) - // EXPR_TUPLE(); - // if(is_load_name_ref){ - // auto op = ctx()->is_compiling_class ? OP_STORE_CLASS_ATTR : OP_STORE_NAME; - // emit(op, _name_arg); - // }else{ - // if(ctx()->is_compiling_class) SyntaxError(); - // emit(OP_STORE_REF); - // } - // }else{ // a += (expr) -> a = a + (expr) - // if(ctx()->is_compiling_class) SyntaxError(); - // if(is_load_name_ref){ - // emit(OP_LOAD_NAME, _name_arg); - // }else{ - // emit(OP_DUP_TOP_VALUE); - // } - // EXPR(); - // switch (op) { - // case TK("+="): emit(OP_BINARY_OP, 0); break; - // case TK("-="): emit(OP_BINARY_OP, 1); break; - // case TK("*="): emit(OP_BINARY_OP, 2); break; - // case TK("/="): emit(OP_BINARY_OP, 3); break; - // case TK("//="): emit(OP_BINARY_OP, 4); break; - // case TK("%="): emit(OP_BINARY_OP, 5); break; - // case TK("<<="): emit(OP_BITWISE_OP, 0); break; - // case TK(">>="): emit(OP_BITWISE_OP, 1); break; - // case TK("&="): emit(OP_BITWISE_OP, 2); break; - // case TK("|="): emit(OP_BITWISE_OP, 3); break; - // case TK("^="): emit(OP_BITWISE_OP, 4); break; - // default: UNREACHABLE(); - // } - // if(is_load_name_ref){ - // emit(OP_STORE_NAME, _name_arg); - // }else{ - // emit(OP_STORE_REF); - // } - // } - // co()->_rvalue -= 1; - } - // PASS void exprTuple(){ auto e = make_expr(); @@ -540,17 +490,17 @@ private: return; } do { - ctx()->emit(OP_DUP_TOP_VALUE, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); consume(TK("@id")); Str name = prev().str(); - int index = ctx()->add_name(name, NAME_ATTR); - ctx()->emit(OP_BUILD_ATTR, index, prev().line); + int index = ctx()->add_name(name); + ctx()->emit(OP_LOAD_ATTR, index, prev().line); if (match(TK("as"))) { consume(TK("@id")); name = prev().str(); } - index = ctx()->add_name(name, name_scope()); - ctx()->emit(OP_STORE_NAME, index, prev().line); + index = ctx()->add_name(name); + ctx()->emit(OP_STORE_GLOBAL, index, prev().line); } while (match(TK(","))); ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); consume_end_stmt(); @@ -650,7 +600,20 @@ private: } bool try_compile_assignment(){ - + // switch (op) { + // case TK("+="): emit(OP_BINARY_OP, 0); break; + // case TK("-="): emit(OP_BINARY_OP, 1); break; + // case TK("*="): emit(OP_BINARY_OP, 2); break; + // case TK("/="): emit(OP_BINARY_OP, 3); break; + // case TK("//="): emit(OP_BINARY_OP, 4); break; + // case TK("%="): emit(OP_BINARY_OP, 5); break; + // case TK("<<="): emit(OP_BITWISE_OP, 0); break; + // case TK(">>="): emit(OP_BITWISE_OP, 1); break; + // case TK("&="): emit(OP_BITWISE_OP, 2); break; + // case TK("|="): emit(OP_BITWISE_OP, 3); break; + // case TK("^="): emit(OP_BITWISE_OP, 4); break; + // default: UNREACHABLE(); + // } } void compile_stmt() { @@ -728,16 +691,18 @@ private: consume_end_stmt(); } break; case TK("with"): { - EXPR(true); - consume(TK("as")); - consume(TK("@id")); - int index = ctx()->add_name(prev().str(), name_scope()); - emit(OP_STORE_NAME, index); - emit(OP_LOAD_NAME_REF, index); - emit(OP_WITH_ENTER); - compile_block_body(); - emit(OP_LOAD_NAME_REF, index); - emit(OP_WITH_EXIT); + // TODO: reimpl this + UNREACHABLE(); + // EXPR(true); + // consume(TK("as")); + // consume(TK("@id")); + // int index = ctx()->add_name(prev().str(), name_scope()); + // emit(OP_STORE_NAME, index); + // emit(OP_LOAD_NAME_REF, index); + // emit(OP_WITH_ENTER); + // compile_block_body(); + // emit(OP_LOAD_NAME_REF, index); + // emit(OP_WITH_EXIT); } break; /*************************************************/ // TODO: refactor goto/label use special $ syntax @@ -749,21 +714,21 @@ private: consume_end_stmt(); break; case TK("goto"): - if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); + if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL)); consume_end_stmt(); break; /*************************************************/ - // dangling expression or assignment + // handle dangling expression or assignment default: { EXPR_TUPLE(true); - bool assigment = try_compile_assignment(); - if(!assigment){ + if(!try_compile_assignment()){ if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){ emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); + }else{ + emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); } - emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); } consume_end_stmt(); } diff --git a/src/expr.h b/src/expr.h index f66e82c0..8f35faba 100644 --- a/src/expr.h +++ b/src/expr.h @@ -594,4 +594,46 @@ struct TernaryExpr: Expr{ }; -} // namespace pkpy \ No newline at end of file +} // namespace pkpy + + +// struct TupleRef : BaseRef { +// Tuple objs; +// TupleRef(Tuple&& objs) : objs(std::move(objs)) {} + +// PyObject* get(VM* vm, Frame* frame) const{ +// Tuple args(objs.size()); +// for (int i = 0; i < objs.size(); i++) { +// args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame); +// } +// return VAR(std::move(args)); +// } + +// void set(VM* vm, Frame* frame, PyObject* val) const{ +// val = vm->asIter(val); +// BaseIter* iter = vm->PyIter_AS_C(val); +// for(int i=0; itp_star_wrapper)){ +// auto& star = _CAST(StarWrapper&, objs[i]); +// if(star.rvalue) vm->ValueError("can't use starred expression here"); +// if(i != objs.size()-1) vm->ValueError("* can only be used at the end"); +// auto ref = vm->PyRef_AS_C(star.obj); +// List list; +// while((x = iter->next()) != nullptr) list.push_back(x); +// ref->set(vm, frame, VAR(std::move(list))); +// return; +// }else{ +// x = iter->next(); +// if(x == nullptr) vm->ValueError("not enough values to unpack"); +// vm->PyRef_AS_C(objs[i])->set(vm, frame, x); +// } +// } +// PyObject* x = iter->next(); +// if(x != nullptr) vm->ValueError("too many values to unpack"); +// } + +// void del(VM* vm, Frame* frame) const{ +// for(int i=0; iPyRef_AS_C(objs[i])->del(vm, frame); +// } +// }; \ No newline at end of file diff --git a/src/opcodes.h b/src/opcodes.h index 97a45a22..0a2f2636 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -2,7 +2,7 @@ OPCODE(NO_OP) OPCODE(POP_TOP) -OPCODE(DUP_TOP_VALUE) +OPCODE(DUP_TOP) OPCODE(CALL) OPCODE(CALL_UNPACK) OPCODE(CALL_KWARGS) @@ -57,20 +57,13 @@ OPCODE(LOAD_FALSE) OPCODE(LOAD_EVAL_FN) OPCODE(LOAD_FUNCTION) OPCODE(LOAD_ELLIPSIS) -OPCODE(LOAD_NAME) -OPCODE(LOAD_NAME_REF) OPCODE(ASSERT) OPCODE(EXCEPTION_MATCH) OPCODE(RAISE) OPCODE(RE_RAISE) -OPCODE(BUILD_INDEX) -OPCODE(BUILD_ATTR) -OPCODE(BUILD_ATTR_REF) -OPCODE(STORE_NAME) OPCODE(STORE_FUNCTION) -OPCODE(STORE_REF) OPCODE(TRY_BLOCK_ENTER) OPCODE(TRY_BLOCK_EXIT) diff --git a/src/ref.h b/src/ref.h deleted file mode 100644 index ca33ad18..00000000 --- a/src/ref.h +++ /dev/null @@ -1,179 +0,0 @@ -#pragma once - -#include "obj.h" -#include "vm.h" - -namespace pkpy { - -struct BaseRef { - virtual PyObject* get(VM*, Frame*) const = 0; - virtual void set(VM*, Frame*, PyObject*) const = 0; - virtual void del(VM*, Frame*) const = 0; - virtual ~BaseRef() = default; -}; - -struct NameRef : BaseRef { - const std::pair pair; - StrName name() const { return pair.first; } - NameScope scope() const { return pair.second; } - NameRef(const std::pair& pair) : pair(pair) {} - - PyObject* get(VM* vm, Frame* frame) const{ - PyObject* val; - val = frame->f_locals().try_get(name()); - if(val != nullptr) return val; - val = frame->f_closure_try_get(name()); - if(val != nullptr) return val; - val = frame->f_globals().try_get(name()); - if(val != nullptr) return val; - val = vm->builtins->attr().try_get(name()); - if(val != nullptr) return val; - vm->NameError(name()); - return nullptr; - } - - void set(VM* vm, Frame* frame, PyObject* val) const{ - switch(scope()) { - case NAME_LOCAL: frame->f_locals().set(name(), val); break; - case NAME_GLOBAL: - if(frame->f_locals().try_set(name(), val)) return; - frame->f_globals().set(name(), val); - break; - default: UNREACHABLE(); - } - } - - void del(VM* vm, Frame* frame) const{ - switch(scope()) { - case NAME_LOCAL: { - if(frame->f_locals().contains(name())){ - frame->f_locals().erase(name()); - }else{ - vm->NameError(name()); - } - } break; - case NAME_GLOBAL: - { - if(frame->f_locals().contains(name())){ - frame->f_locals().erase(name()); - }else{ - if(frame->f_globals().contains(name())){ - frame->f_globals().erase(name()); - }else{ - vm->NameError(name()); - } - } - } break; - default: UNREACHABLE(); - } - } -}; - - -struct AttrRef : BaseRef { - mutable PyObject* obj; - NameRef attr; - AttrRef(PyObject* obj, NameRef attr) : obj(obj), attr(attr) {} - - PyObject* get(VM* vm, Frame* frame) const{ - return vm->getattr(obj, attr.name()); - } - - void set(VM* vm, Frame* frame, PyObject* val) const{ - vm->setattr(obj, attr.name(), std::move(val)); - } - -}; - -struct IndexRef : BaseRef { - mutable PyObject* obj; - PyObject* index; - IndexRef(PyObject* obj, PyObject* index) : obj(obj), index(index) {} - - PyObject* get(VM* vm, Frame* frame) const{ - return vm->fast_call(__getitem__, Args{obj, index}); - } - - void set(VM* vm, Frame* frame, PyObject* val) const{ - vm->fast_call(__setitem__, Args{obj, index, val}); - } - - void del(VM* vm, Frame* frame) const{ - vm->fast_call(__delitem__, Args{obj, index}); - } -}; - -struct TupleRef : BaseRef { - Tuple objs; - TupleRef(Tuple&& objs) : objs(std::move(objs)) {} - - PyObject* get(VM* vm, Frame* frame) const{ - Tuple args(objs.size()); - for (int i = 0; i < objs.size(); i++) { - args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame); - } - return VAR(std::move(args)); - } - - void set(VM* vm, Frame* frame, PyObject* val) const{ - val = vm->asIter(val); - BaseIter* iter = vm->PyIter_AS_C(val); - for(int i=0; itp_star_wrapper)){ - auto& star = _CAST(StarWrapper&, objs[i]); - if(star.rvalue) vm->ValueError("can't use starred expression here"); - if(i != objs.size()-1) vm->ValueError("* can only be used at the end"); - auto ref = vm->PyRef_AS_C(star.obj); - List list; - while((x = iter->next()) != nullptr) list.push_back(x); - ref->set(vm, frame, VAR(std::move(list))); - return; - }else{ - x = iter->next(); - if(x == nullptr) vm->ValueError("not enough values to unpack"); - vm->PyRef_AS_C(objs[i])->set(vm, frame, x); - } - } - PyObject* x = iter->next(); - if(x != nullptr) vm->ValueError("too many values to unpack"); - } - - void del(VM* vm, Frame* frame) const{ - for(int i=0; iPyRef_AS_C(objs[i])->del(vm, frame); - } -}; - - -template -PyObject* VM::PyRef(P&& value) { - static_assert(std::is_base_of_v>); - return heap.gcnew

(tp_ref, std::forward

(value)); -} - -inline const BaseRef* VM::PyRef_AS_C(PyObject* obj) -{ - if(!is_type(obj, tp_ref)) TypeError("expected an l-value"); - return static_cast(obj->value()); -} - -/***** Frame's Impl *****/ -inline void Frame::try_deref(VM* vm, PyObject*& v){ - if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this); -} - -/***** GC's Impl *****/ -template<> inline void _mark(AttrRef& t){ - OBJ_MARK(t.obj); -} - -template<> inline void _mark(IndexRef& t){ - OBJ_MARK(t.obj); - OBJ_MARK(t.index); -} - -template<> inline void _mark(TupleRef& t){ - _mark(t.objs); -} - -} // namespace pkpy \ No newline at end of file From 8364adef700d97202e1dfe73da6cd6e0dc90e17d Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 15:50:33 +0800 Subject: [PATCH 30/73] up --- src/ceval.h | 102 ++++++++++++++++++++++--------------------------- src/compiler.h | 30 ++++++++------- src/frame.h | 31 ++++++++------- src/gc.h | 2 +- src/namedict.h | 4 +- src/obj.h | 4 +- src/opcodes.h | 15 ++++---- src/str.h | 2 + 8 files changed, 95 insertions(+), 95 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index cb873dc2..21041305 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -26,23 +26,15 @@ inline PyObject* VM::run_frame(Frame* frame){ f._closure = frame->_locals; } continue; case OP_ROT_TWO: ::std::swap(frame->top(), frame->top_1()); continue; - case OP_BUILD_TUPLE: { - Args items = frame->pop_n_values_reversed(this, byte.arg); - frame->push(VAR(std::move(items))); - } continue; /*****************************************/ case OP_LOAD_NAME: { - // TODO: use name resolution linked list to optimize this StrName name = frame->co->names[byte.arg]; PyObject* val; - val = frame->f_locals().try_get(name); - if(val != nullptr) { frame->push(val); continue; } - val = frame->f_closure_try_get(name); - if(val != nullptr) { frame->push(val); continue; } - val = frame->f_globals().try_get(name); - if(val != nullptr) { frame->push(val); continue; } - val = vm->builtins->attr().try_get(name); - if(val != nullptr) { frame->push(val); continue; } + int i = 0; // names[0] is ensured to be non-null + do{ + val = frame->names[i++]->try_get(name); + if(val != nullptr){ frame->push(val); break; } + }while(frame->names[i] != nullptr); vm->NameError(name); } continue; case OP_LOAD_ATTR: { @@ -51,9 +43,10 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->top() = getattr(a, name); } continue; case OP_LOAD_SUBSCR: { - PyObject* b = frame->popx(); - PyObject* a = frame->top(); - frame->top() = fast_call(__getitem__, Args{a, b}); + Args args(2); + args[1] = frame->popx(); // b + args[0] = frame->top(); // a + frame->top() = fast_call(__getitem__, std::move(args)); } continue; case OP_STORE_LOCAL: { StrName name = frame->co->names[byte.arg]; @@ -105,34 +98,58 @@ inline PyObject* VM::run_frame(Frame* frame){ fast_call(__delitem__, Args{a, b}); } continue; /*****************************************/ - case OP_BUILD_TUPLE_REF: { - Args items = frame->pop_n_reversed(byte.arg); - frame->push(PyRef(TupleRef(std::move(items)))); + case OP_BUILD_LIST: + frame->push(VAR(frame->popx_n_reversed(byte.arg).to_list())); + continue; + case OP_BUILD_DICT: { + PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); + PyObject* obj = call(builtins->attr(m_dict), Args{t}); + frame->push(obj); + } continue; + case OP_BUILD_SET: { + PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); + PyObject* obj = call(builtins->attr(m_set), Args{t}); + frame->push(obj); + } continue; + case OP_BUILD_SLICE: { + PyObject* step = frame->popx(); + PyObject* stop = frame->popx(); + PyObject* start = frame->popx(); + Slice s; + if(start != None) { s.start = CAST(int, start);} + if(stop != None) { s.stop = CAST(int, stop);} + if(step != None) { s.step = CAST(int, step);} + frame->push(VAR(s)); + } continue; + case OP_BUILD_TUPLE: { + Tuple items = frame->popx_n_reversed(byte.arg); + frame->push(VAR(std::move(items))); } continue; case OP_BUILD_STRING: { - Args items = frame->pop_n_values_reversed(this, byte.arg); + Args items = frame->popx_n_reversed(byte.arg); StrStream ss; for(int i=0; ipush(VAR(ss.str())); } continue; + /*****************************************/ case OP_LOAD_EVAL_FN: frame->push(builtins->attr(m_eval)); continue; case OP_BEGIN_CLASS: { - auto& name = frame->co->names[byte.arg]; - PyObject* clsBase = frame->pop_value(this); + StrName name = frame->co->names[byte.arg]; + PyObject* clsBase = frame->popx(); if(clsBase == None) clsBase = _t(tp_object); check_type(clsBase, tp_type); - PyObject* cls = new_type_object(frame->_module, name.first, OBJ_GET(Type, clsBase)); + PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, clsBase)); frame->push(cls); } continue; case OP_END_CLASS: { - PyObject* cls = frame->pop(); + PyObject* cls = frame->popx(); cls->attr()._try_perfect_rehash(); }; continue; case OP_STORE_CLASS_ATTR: { - auto& name = frame->co->names[byte.arg]; - PyObject* obj = frame->pop_value(this); + StrName name = frame->co->names[byte.arg]; + PyObject* obj = frame->popx(); PyObject* cls = frame->top(); - cls->attr().set(name.first, std::move(obj)); + cls->attr().set(name, obj); } continue; case OP_RETURN_VALUE: return frame->popx(); case OP_PRINT_EXPR: { @@ -204,27 +221,7 @@ inline PyObject* VM::run_frame(Frame* frame){ _error(type, msg); } continue; case OP_RE_RAISE: _raise(); continue; - case OP_BUILD_LIST: - frame->push(VAR(frame->pop_n_values_reversed(this, byte.arg).to_list())); - continue; - case OP_BUILD_MAP: { - List list(byte.arg); - for(int i=0; ipop_value(this); - PyObject* key = frame->pop_value(this); - list[i] = VAR(Tuple({key, value})); - } - PyObject* d_arg = VAR(std::move(list)); - PyObject* obj = call(builtins->attr("dict"), Args{d_arg}); - frame->push(obj); - } continue; - case OP_BUILD_SET: { - PyObject* list = VAR( - frame->pop_n_values_reversed(this, byte.arg).to_list() - ); - PyObject* obj = call(builtins->attr("set"), Args{list}); - frame->push(obj); - } continue; + case OP_LIST_APPEND: { PyObject* obj = frame->pop_value(this); List& list = CAST(List&, frame->top_1()); @@ -310,14 +307,7 @@ inline PyObject* VM::run_frame(Frame* frame){ if(asBool(expr)==True) frame->jump_abs(byte.arg); else frame->pop_value(this); } continue; - case OP_BUILD_SLICE: { - PyObject* stop = frame->pop_value(this); - PyObject* start = frame->pop_value(this); - Slice s; - if(start != None) { s.start = CAST(int, start);} - if(stop != None) { s.stop = CAST(int, stop);} - frame->push(VAR(s)); - } continue; + case OP_IMPORT_NAME: { StrName name = frame->co->names[byte.arg].first; PyObject* ext_mod = _modules.try_get(name); diff --git a/src/compiler.h b/src/compiler.h index 6348e264..08068351 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -142,6 +142,10 @@ private: } } + bool match_newlines_repl(){ + return match_newlines(mode()==REPL_MODE); + } + bool match_newlines(bool repl_throw=false) { bool consumed = false; if (curr().type == TK("@eol")) { @@ -281,9 +285,9 @@ private: // PASS void exprGroup(){ - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); EXPR_TUPLE(); // () is just for change precedence - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); consume(TK(")")); } @@ -298,13 +302,13 @@ private: consume(TK("in")); EXPR(); ce->iter = ctx()->s_expr.popx(); - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); if(match(TK("if"))){ EXPR(); ce->cond = ctx()->s_expr.popx(); } ctx()->s_expr.push(std::move(ce)); - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); } // PASS @@ -312,17 +316,17 @@ private: int line = prev().line; std::vector items; do { - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); if (curr().type == TK("]")) break; EXPR(); items.push_back(ctx()->s_expr.popx()); - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); if(items.size()==1 && match(TK("for"))){ _consume_comp(std::move(items[0])); consume(TK("]")); return; } - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); } while (match(TK(","))); consume(TK("]")); auto e = make_expr(std::move(items)); @@ -335,7 +339,7 @@ private: bool parsing_dict = false; // {...} may be dict or set std::vector items; do { - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); if (curr().type == TK("}")) break; EXPR(); if(curr().type == TK(":")) parsing_dict = true; @@ -349,14 +353,14 @@ private: }else{ items.push_back(ctx()->s_expr.popx()); } - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); if(items.size()==1 && match(TK("for"))){ if(parsing_dict) _consume_comp(std::move(items[0])); else _consume_comp(std::move(items[0])); consume(TK("}")); return; } - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); } while (match(TK(","))); consume(TK("}")); if(items.size()==0 || parsing_dict){ @@ -373,7 +377,7 @@ private: auto e = make_expr(); e->callable = ctx()->s_expr.popx(); do { - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); if (curr().type==TK(")")) break; if(curr().type==TK("@id") && next().type==TK("=")) { consume(TK("@id")); @@ -386,7 +390,7 @@ private: EXPR(); e->args.push_back(ctx()->s_expr.popx()); } - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); } while (match(TK(","))); consume(TK(")")); if(e->args.size() > 32767) SyntaxError("too many positional arguments"); @@ -709,7 +713,7 @@ private: case TK("label"): if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); - bool ok = co()->add_label(prev().str()); + bool ok = ctx()->add_label(prev().str()); if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists"); consume_end_stmt(); break; diff --git a/src/frame.h b/src/frame.h index e52aff00..1440b2d1 100644 --- a/src/frame.h +++ b/src/frame.h @@ -14,24 +14,23 @@ struct Frame { const CodeObject* co; PyObject* _module; NameDict_ _locals; - NameDict_ _closure; const uint64_t id; std::vector>> s_try_block; + const NameDict* names[5]; // name resolution array, zero terminated - NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); } + NameDict& f_locals() noexcept { return *_locals; } NameDict& f_globals() noexcept { return _module->attr(); } - PyObject* f_closure_try_get(StrName name) noexcept { - if(_closure == nullptr) return nullptr; - return _closure->try_get(name); + Frame(const CodeObject_& co, PyObject* _module, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr) + : co(co.get()), _module(_module), _locals(_locals), id(kFrameGlobalId++) { + memset(names, 0, sizeof(names)); + int i = 0; + if(_locals != nullptr) names[i++] = _locals.get(); + if(_closure != nullptr) names[i++] = _closure.get(); + names[i++] = &_module->attr(); + // names[i++] = builtins } - Frame(const CodeObject_& co, - PyObject* _module, - const NameDict_& _locals=nullptr, - const NameDict_& _closure=nullptr) - : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { } - const Bytecode& next_bytecode() { _ip = _next_ip++; return co->codes[_ip]; @@ -127,7 +126,7 @@ struct Frame { } } - Args pop_n_reversed(int n){ + Args popx_n_reversed(int n){ Args v(n); for(int i=n-1; i>=0; i--) v[i] = popx(); return v; @@ -135,9 +134,13 @@ struct Frame { void _mark() const { for(PyObject* obj : _data) OBJ_MARK(obj); - if(_locals != nullptr) _locals->_mark(); - if(_closure != nullptr) _closure->_mark(); OBJ_MARK(_module); + + int i = 0; // names[0] is ensured to be non-null + do{ + names[i++]->_mark(); + }while(names[i] != nullptr); + for(auto& p : s_try_block){ for(PyObject* obj : p.second) OBJ_MARK(obj); } diff --git a/src/gc.h b/src/gc.h index 746bef60..7ce845e6 100644 --- a/src/gc.h +++ b/src/gc.h @@ -114,7 +114,7 @@ struct ManagedHeap{ void mark(VM* vm); }; -inline void NameDict::_mark(){ +inline void NameDict::_mark() const{ for(uint16_t i=0; i<_capacity; i++){ if(_items[i].first.empty()) continue; OBJ_MARK(_items[i].second); diff --git a/src/namedict.h b/src/namedict.h index 5623fce4..4a98f5d2 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -125,7 +125,7 @@ while(!_items[i].first.empty()) { \ _rehash(false); // do not resize } - PyObject* try_get(StrName key){ + PyObject* try_get(StrName key) const{ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) return nullptr; @@ -180,7 +180,7 @@ while(!_items[i].first.empty()) { \ return v; } - void _mark(); + void _mark() const; #undef HASH_PROBE #undef _hash }; diff --git a/src/obj.h b/src/obj.h index c7c38a90..ae7f7d3b 100644 --- a/src/obj.h +++ b/src/obj.h @@ -65,9 +65,11 @@ struct StarWrapper { using Super = std::pair; +// TODO: re-examine the design of Slice struct Slice { int start = 0; - int stop = 0x7fffffff; + int stop = 0x7fffffff; + int step = 1; void normalize(int len){ if(start < 0) start += len; diff --git a/src/opcodes.h b/src/opcodes.h index 0a2f2636..b0ebd237 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -20,14 +20,6 @@ OPCODE(UNARY_NEGATIVE) OPCODE(UNARY_NOT) OPCODE(UNARY_STAR) -OPCODE(BUILD_LIST) -OPCODE(BUILD_MAP) -OPCODE(BUILD_SET) -OPCODE(BUILD_SLICE) -OPCODE(BUILD_TUPLE) -OPCODE(BUILD_TUPLE_REF) -OPCODE(BUILD_STRING) - OPCODE(LIST_APPEND) OPCODE(MAP_ADD) OPCODE(SET_ADD) @@ -96,5 +88,12 @@ OPCODE(DELETE_GLOBAL) OPCODE(DELETE_ATTR) OPCODE(DELETE_SUBSCR) /**************************/ +OPCODE(BUILD_LIST) +OPCODE(BUILD_DICT) +OPCODE(BUILD_SET) +OPCODE(BUILD_SLICE) +OPCODE(BUILD_TUPLE) +OPCODE(BUILD_STRING) +/**************************/ #endif \ No newline at end of file diff --git a/src/str.h b/src/str.h index d5119619..37102a5f 100644 --- a/src/str.h +++ b/src/str.h @@ -209,6 +209,8 @@ const StrName __call__ = StrName::get("__call__"); const StrName m_eval = StrName::get("eval"); const StrName m_self = StrName::get("self"); +const StrName m_dict = StrName::get("dict"); +const StrName m_set = StrName::get("set"); const StrName __enter__ = StrName::get("__enter__"); const StrName __exit__ = StrName::get("__exit__"); From d1f5d31849335dbff4d2af1b31a25f3c00391abb Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 16:38:51 +0800 Subject: [PATCH 31/73] up --- src/ceval.h | 220 +++++++++++++++++++++++++++----------------------- src/expr.h | 11 ++- src/opcodes.h | 76 ++++++++--------- src/str.h | 2 +- src/vm.h | 52 ++++-------- 5 files changed, 181 insertions(+), 180 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 21041305..139c31ae 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -13,19 +13,28 @@ inline PyObject* VM::run_frame(Frame* frame){ switch (byte.op) { case OP_NO_OP: continue; - case OP_SETUP_DECORATOR: continue; + /*****************************************/ + case OP_POP_TOP: frame->pop(); continue; + case OP_DUP_TOP: frame->push(frame->top()); continue; + case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); continue; + case OP_PRINT_EXPR: { + PyObject* obj = frame->top(); // use top() here to avoid accidental gc + if(obj != None) *_stdout << CAST(Str, asRepr(obj)) << '\n'; + frame->pop(); + } continue; + /*****************************************/ case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); continue; + case OP_LOAD_NONE: frame->push(None); continue; + case OP_LOAD_TRUE: frame->push(True); continue; + case OP_LOAD_FALSE: frame->push(False); continue; + case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue; + case OP_LOAD_BUILTINS_EVAL: frame->push(builtins->attr(m_eval)); continue; case OP_LOAD_FUNCTION: { PyObject* obj = frame->co->consts[byte.arg]; - Function f = CAST(Function, obj); // copy - f._module = frame->_module; - frame->push(VAR(f)); + Function f = CAST(Function, obj); // copy it! + f._module = frame->_module; // setup module + frame->push(VAR(std::move(f))); } continue; - case OP_SETUP_CLOSURE: { - Function& f = CAST(Function&, frame->top()); // reference - f._closure = frame->_locals; - } continue; - case OP_ROT_TWO: ::std::swap(frame->top(), frame->top_1()); continue; /*****************************************/ case OP_LOAD_NAME: { StrName name = frame->co->names[byte.arg]; @@ -116,9 +125,9 @@ inline PyObject* VM::run_frame(Frame* frame){ PyObject* stop = frame->popx(); PyObject* start = frame->popx(); Slice s; - if(start != None) { s.start = CAST(int, start);} - if(stop != None) { s.stop = CAST(int, stop);} - if(step != None) { s.step = CAST(int, step);} + if(start != None) s.start = CAST(int, start); + if(stop != None) s.stop = CAST(int, stop); + if(step != None) s.step = CAST(int, step); frame->push(VAR(s)); } continue; case OP_BUILD_TUPLE: { @@ -132,7 +141,99 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->push(VAR(ss.str())); } continue; /*****************************************/ - case OP_LOAD_EVAL_FN: frame->push(builtins->attr(m_eval)); continue; + case OP_BINARY_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); + } continue; + case OP_COMPARE_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(COMPARE_SPECIAL_METHODS[byte.arg], std::move(args)); + } continue; + case OP_BITWISE_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); + } continue; + case OP_IS_OP: { + PyObject* rhs = frame->popx(); + PyObject* lhs = frame->top(); + bool ret_c = lhs == rhs; + if(byte.arg == 1) ret_c = !ret_c; + frame->top() = VAR(ret_c); + } continue; + case OP_CONTAINS_OP: { + Args args(2); + args[0] = frame->popx(); + args[1] = frame->top(); + PyObject* ret = fast_call(__contains__, std::move(args)); + bool ret_c = CAST(bool, ret); + if(byte.arg == 1) ret_c = !ret_c; + frame->top() = VAR(ret_c); + } continue; + /*****************************************/ + case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); continue; + case OP_SAFE_JUMP_ABSOLUTE: frame->jump_abs_safe(byte.arg); continue; + case OP_POP_JUMP_IF_FALSE: + if(!asBool(frame->popx())) frame->jump_abs(byte.arg); + continue; + case OP_JUMP_IF_TRUE_OR_POP: + if(asBool(frame->top()) == true) frame->jump_abs(byte.arg); + else frame->pop(); + continue; + case OP_JUMP_IF_FALSE_OR_POP: + if(asBool(frame->top()) == false) frame->jump_abs(byte.arg); + else frame->pop(); + continue; + case OP_LOOP_CONTINUE: { + int target = frame->co->blocks[byte.block].start; + frame->jump_abs(target); + } continue; + case OP_LOOP_BREAK: { + int target = frame->co->blocks[byte.block].end; + frame->jump_abs_safe(target); + } continue; + case OP_GOTO: { + StrName label = frame->co->names[byte.arg]; + auto it = frame->co->labels.find(label); + if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); + frame->jump_abs_safe(it->second); + } continue; + /*****************************************/ + // TODO: examine this later + case OP_CALL: case OP_CALL_UNPACK: { + Args args = frame->popx_n_reversed(byte.arg); + if(byte.op == OP_CALL_UNPACK) unpack_args(args); + PyObject* callable = frame->popx(); + PyObject* ret = call(callable, std::move(args), no_arg(), true); + if(ret == _py_op_call) return ret; + frame->push(std::move(ret)); + } continue; + case OP_CALL_KWARGS: case OP_CALL_KWARGS_UNPACK: { + int ARGC = byte.arg & 0xFFFF; + int KWARGC = (byte.arg >> 16) & 0xFFFF; + Args kwargs = frame->popx_n_reversed(KWARGC*2); + Args args = frame->popx_n_reversed(ARGC); + if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); + PyObject* callable = frame->popx(); + PyObject* ret = call(callable, std::move(args), kwargs, true); + if(ret == _py_op_call) return ret; + frame->push(std::move(ret)); + } continue; + case OP_RETURN_VALUE: return frame->popx(); + /*****************************************/ + + /*****************************************/ + case OP_SETUP_DECORATOR: continue; + + case OP_SETUP_CLOSURE: { + Function& f = CAST(Function&, frame->top()); // reference + f._closure = frame->_locals; + } continue; case OP_BEGIN_CLASS: { StrName name = frame->co->names[byte.arg]; PyObject* clsBase = frame->popx(); @@ -151,43 +252,7 @@ inline PyObject* VM::run_frame(Frame* frame){ PyObject* cls = frame->top(); cls->attr().set(name, obj); } continue; - case OP_RETURN_VALUE: return frame->popx(); - case OP_PRINT_EXPR: { - PyObject* expr = frame->top(); // use top() here to avoid accidental gc - if(expr != None) *_stdout << CAST(Str, asRepr(expr)) << '\n'; - frame->pop(); - } continue; - case OP_POP_TOP: frame->_pop(); continue; - case OP_BINARY_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_BITWISE_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_COMPARE_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - frame->top() = fast_call(CMP_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_IS_OP: { - PyObject* rhs = frame->pop_value(this); - bool ret_c = rhs == frame->top_value(this); - if(byte.arg == 1) ret_c = !ret_c; - frame->top() = VAR(ret_c); - } continue; - case OP_CONTAINS_OP: { - PyObject* rhs = frame->pop_value(this); - bool ret_c = CAST(bool, call(rhs, __contains__, Args{frame->pop_value(this)})); - if(byte.arg == 1) ret_c = !ret_c; - frame->push(VAR(ret_c)); - } continue; + case OP_UNARY_NEGATIVE: frame->top() = num_negated(frame->top_value(this)); continue; @@ -196,13 +261,7 @@ inline PyObject* VM::run_frame(Frame* frame){ PyObject* obj_bool = asBool(obj); frame->push(VAR(!_CAST(bool, obj_bool))); } continue; - case OP_POP_JUMP_IF_FALSE: - if(!_CAST(bool, asBool(frame->pop_value(this)))) frame->jump_abs(byte.arg); - continue; - case OP_LOAD_NONE: frame->push(None); continue; - case OP_LOAD_TRUE: frame->push(True); continue; - case OP_LOAD_FALSE: frame->push(False); continue; - case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue; + case OP_ASSERT: { PyObject* _msg = frame->pop_value(this); Str msg = CAST(Str, asStr(_msg)); @@ -236,7 +295,6 @@ inline PyObject* VM::run_frame(Frame* frame){ PyObject* obj = frame->pop_value(this); call(frame->top_1(), "add", Args{obj}); } continue; - case OP_DUP_TOP: frame->push(frame->top()); continue; case OP_UNARY_STAR: { if(byte.arg > 0){ // rvalue frame->top() = VAR(StarWrapper(frame->top_value(this), true)); @@ -245,33 +303,6 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->top() = VAR(StarWrapper(frame->top(), false)); } } continue; - case OP_CALL_KWARGS_UNPACK: case OP_CALL_KWARGS: { - int ARGC = byte.arg & 0xFFFF; - int KWARGC = (byte.arg >> 16) & 0xFFFF; - Args kwargs = frame->pop_n_values_reversed(this, KWARGC*2); - Args args = frame->pop_n_values_reversed(this, ARGC); - if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); - PyObject* callable = frame->pop_value(this); - PyObject* ret = call(callable, std::move(args), kwargs, true); - if(ret == _py_op_call) return ret; - frame->push(std::move(ret)); - } continue; - case OP_CALL_UNPACK: case OP_CALL: { - Args args = frame->pop_n_values_reversed(this, byte.arg); - if(byte.op == OP_CALL_UNPACK) unpack_args(args); - PyObject* callable = frame->pop_value(this); - PyObject* ret = call(callable, std::move(args), no_arg(), true); - if(ret == _py_op_call) return ret; - frame->push(std::move(ret)); - } continue; - case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); continue; - case OP_SAFE_JUMP_ABSOLUTE: frame->jump_abs_safe(byte.arg); continue; - case OP_GOTO: { - StrName label = frame->co->names[byte.arg].first; - auto it = frame->co->labels.find(label); - if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); - frame->jump_abs_safe(it->second); - } continue; case OP_GET_ITER: { PyObject* obj = frame->pop_value(this); PyObject* iter = asIter(obj); @@ -289,24 +320,7 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->jump_abs_safe(blockEnd); } } continue; - case OP_LOOP_CONTINUE: { - int blockStart = frame->co->blocks[byte.block].start; - frame->jump_abs(blockStart); - } continue; - case OP_LOOP_BREAK: { - int blockEnd = frame->co->blocks[byte.block].end; - frame->jump_abs_safe(blockEnd); - } continue; - case OP_JUMP_IF_FALSE_OR_POP: { - PyObject* expr = frame->top_value(this); - if(asBool(expr)==False) frame->jump_abs(byte.arg); - else frame->pop_value(this); - } continue; - case OP_JUMP_IF_TRUE_OR_POP: { - PyObject* expr = frame->top_value(this); - if(asBool(expr)==True) frame->jump_abs(byte.arg); - else frame->pop_value(this); - } continue; + case OP_IMPORT_NAME: { StrName name = frame->co->names[byte.arg].first; diff --git a/src/expr.h b/src/expr.h index 8f35faba..fdf8c555 100644 --- a/src/expr.h +++ b/src/expr.h @@ -425,7 +425,7 @@ struct FStringExpr: Expr{ ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); size++; } - ctx->emit(OP_LOAD_EVAL_FN, BC_NOARG, line); + ctx->emit(OP_LOAD_BUILTINS_EVAL, BC_NOARG, line); ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line); ctx->emit(OP_CALL, 1, line); size++; @@ -514,7 +514,16 @@ struct CallExpr: Expr{ } void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; callable->emit(ctx); + // emit args + for(auto& item: args) item->emit(ctx); + // emit kwargs + for(auto& item: kwargs){ + // TODO: optimize this + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(item.first)), line); + item.second->emit(ctx); + } int KWARGC = (int)kwargs.size(); int ARGC = (int)args.size(); if(KWARGC > 0){ diff --git a/src/opcodes.h b/src/opcodes.h index b0ebd237..98c0e425 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -1,21 +1,5 @@ #ifdef OPCODE -OPCODE(NO_OP) -OPCODE(POP_TOP) -OPCODE(DUP_TOP) -OPCODE(CALL) -OPCODE(CALL_UNPACK) -OPCODE(CALL_KWARGS) -OPCODE(CALL_KWARGS_UNPACK) -OPCODE(RETURN_VALUE) -OPCODE(ROT_TWO) - -OPCODE(BINARY_OP) -OPCODE(COMPARE_OP) -OPCODE(BITWISE_OP) -OPCODE(IS_OP) -OPCODE(CONTAINS_OP) - OPCODE(UNARY_NEGATIVE) OPCODE(UNARY_NOT) OPCODE(UNARY_STAR) @@ -23,32 +7,14 @@ OPCODE(UNARY_STAR) OPCODE(LIST_APPEND) OPCODE(MAP_ADD) OPCODE(SET_ADD) + OPCODE(IMPORT_NAME) -OPCODE(PRINT_EXPR) OPCODE(GET_ITER) OPCODE(FOR_ITER) OPCODE(WITH_ENTER) OPCODE(WITH_EXIT) -OPCODE(LOOP_BREAK) -OPCODE(LOOP_CONTINUE) - -OPCODE(POP_JUMP_IF_FALSE) -OPCODE(JUMP_ABSOLUTE) -OPCODE(SAFE_JUMP_ABSOLUTE) -OPCODE(JUMP_IF_TRUE_OR_POP) -OPCODE(JUMP_IF_FALSE_OR_POP) - -OPCODE(GOTO) - -OPCODE(LOAD_CONST) -OPCODE(LOAD_NONE) -OPCODE(LOAD_TRUE) -OPCODE(LOAD_FALSE) -OPCODE(LOAD_EVAL_FN) -OPCODE(LOAD_FUNCTION) -OPCODE(LOAD_ELLIPSIS) OPCODE(ASSERT) OPCODE(EXCEPTION_MATCH) @@ -62,9 +28,6 @@ OPCODE(TRY_BLOCK_EXIT) OPCODE(YIELD_VALUE) -OPCODE(FAST_INDEX) // a[x] -OPCODE(FAST_INDEX_REF) // a[x] - OPCODE(SETUP_CLOSURE) OPCODE(SETUP_DECORATOR) OPCODE(STORE_ALL_NAMES) @@ -73,6 +36,22 @@ OPCODE(BEGIN_CLASS) OPCODE(END_CLASS) OPCODE(STORE_CLASS_ATTR) + +/**************************/ +OPCODE(NO_OP) +/**************************/ +OPCODE(POP_TOP) +OPCODE(DUP_TOP) +OPCODE(ROT_TWO) +OPCODE(PRINT_EXPR) +/**************************/ +OPCODE(LOAD_CONST) +OPCODE(LOAD_NONE) +OPCODE(LOAD_TRUE) +OPCODE(LOAD_FALSE) +OPCODE(LOAD_ELLIPSIS) +OPCODE(LOAD_BUILTINS_EVAL) +OPCODE(LOAD_FUNCTION) /**************************/ OPCODE(LOAD_NAME) OPCODE(LOAD_ATTR) @@ -95,5 +74,26 @@ OPCODE(BUILD_SLICE) OPCODE(BUILD_TUPLE) OPCODE(BUILD_STRING) /**************************/ +OPCODE(BINARY_OP) +OPCODE(COMPARE_OP) +OPCODE(BITWISE_OP) +OPCODE(IS_OP) +OPCODE(CONTAINS_OP) +/**************************/ +OPCODE(JUMP_ABSOLUTE) +OPCODE(SAFE_JUMP_ABSOLUTE) +OPCODE(POP_JUMP_IF_FALSE) +OPCODE(JUMP_IF_TRUE_OR_POP) +OPCODE(JUMP_IF_FALSE_OR_POP) +OPCODE(LOOP_CONTINUE) +OPCODE(LOOP_BREAK) +OPCODE(GOTO) +/**************************/ +OPCODE(CALL) +OPCODE(CALL_UNPACK) +OPCODE(CALL_KWARGS) +OPCODE(CALL_KWARGS_UNPACK) +OPCODE(RETURN_VALUE) +/**************************/ #endif \ No newline at end of file diff --git a/src/str.h b/src/str.h index 37102a5f..9cb4a001 100644 --- a/src/str.h +++ b/src/str.h @@ -214,7 +214,7 @@ const StrName m_set = StrName::get("set"); const StrName __enter__ = StrName::get("__enter__"); const StrName __exit__ = StrName::get("__exit__"); -const StrName CMP_SPECIAL_METHODS[] = { +const StrName COMPARE_SPECIAL_METHODS[] = { StrName::get("__lt__"), StrName::get("__le__"), StrName::get("__eq__"), StrName::get("__ne__"), StrName::get("__gt__"), StrName::get("__ge__") }; diff --git a/src/vm.h b/src/vm.h index 383a00cf..cf868ba3 100644 --- a/src/vm.h +++ b/src/vm.h @@ -320,7 +320,7 @@ public: CodeObject_ compile(Str source, Str filename, CompileMode mode); PyObject* num_negated(PyObject* obj); f64 num_to_float(PyObject* obj); - PyObject* asBool(PyObject* obj); + bool asBool(PyObject* obj); i64 hash(PyObject* obj); PyObject* asRepr(PyObject* obj); PyObject* new_module(StrName name); @@ -357,30 +357,13 @@ inline void CodeObject::optimize(VM* vm){ perfect_locals_capacity = find_next_capacity(base_n); perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys); - // for(int i=1; inum_negated(consts[pos]); - // } - - // if(i>=2 && codes[i].op == OP_BUILD_INDEX){ - // const Bytecode& a = codes[i-1]; - // const Bytecode& x = codes[i-2]; - // if(codes[i].arg == 1){ - // if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){ - // codes[i].op = OP_FAST_INDEX; - // }else continue; - // }else{ - // if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){ - // codes[i].op = OP_FAST_INDEX_REF; - // }else continue; - // } - // codes[i].arg = (a.arg << 16) | x.arg; - // codes[i-1].op = OP_NO_OP; - // codes[i-2].op = OP_NO_OP; - // } - // } + for(int i=1; inum_negated(consts[pos]); + } + } // pre-compute sn in co_consts for(int i=0; i 0); + return CAST(i64, ret) > 0; } - return True; + return true; } inline i64 VM::hash(PyObject* obj){ @@ -617,11 +600,6 @@ inline Str VM::disassemble(CodeObject_ co){ if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){ argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")"; } - if(byte.op == OP_FAST_INDEX || byte.op == OP_FAST_INDEX_REF){ - auto& a = co->names[byte.arg & 0xFFFF]; - auto& x = co->names[(byte.arg >> 16) & 0xFFFF]; - argStr += " (" + a.first.str() + '[' + x.first.str() + "])"; - } ss << argStr; // ss << pad(argStr, 20); // may overflow // ss << co->blocks[byte.block].to_string(); From a6b46717110bff4f29e7b1ecd3261b208d685a9b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 20:24:27 +0800 Subject: [PATCH 32/73] up --- src/ceval.h | 210 ++++++++++++++++++++++++-------------------------- src/expr.h | 2 +- src/frame.h | 21 ++++- src/obj.h | 3 +- src/opcodes.h | 35 ++++----- src/str.h | 1 + src/vm.h | 3 +- 7 files changed, 143 insertions(+), 132 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 139c31ae..a8a17795 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,7 +7,13 @@ namespace pkpy{ inline PyObject* VM::run_frame(Frame* frame){ while(true){ - heap._auto_collect(this); // gc + /* NOTE: + * Be aware of accidental gc! + * DO NOT leave any strong reference of PyObject* in the C stack + * For example, frame->popx() returns a strong reference which may be dangerous + * `Args` containing strong references is safe if it is passed to `call` or `fast_call` + */ + heap._auto_collect(this); const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -18,7 +24,7 @@ inline PyObject* VM::run_frame(Frame* frame){ case OP_DUP_TOP: frame->push(frame->top()); continue; case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); continue; case OP_PRINT_EXPR: { - PyObject* obj = frame->top(); // use top() here to avoid accidental gc + PyObject* obj = frame->top(); // use top() to avoid accidental gc if(obj != None) *_stdout << CAST(Str, asRepr(obj)) << '\n'; frame->pop(); } continue; @@ -28,7 +34,7 @@ inline PyObject* VM::run_frame(Frame* frame){ case OP_LOAD_TRUE: frame->push(True); continue; case OP_LOAD_FALSE: frame->push(False); continue; case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue; - case OP_LOAD_BUILTINS_EVAL: frame->push(builtins->attr(m_eval)); continue; + case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); continue; case OP_LOAD_FUNCTION: { PyObject* obj = frame->co->consts[byte.arg]; Function f = CAST(Function, obj); // copy it! @@ -67,9 +73,10 @@ inline PyObject* VM::run_frame(Frame* frame){ } continue; case OP_STORE_ATTR: { StrName name = frame->co->names[byte.arg]; - PyObject* a = frame->popx(); - PyObject* val = frame->popx(); + PyObject* a = frame->top(); + PyObject* val = frame->top_1(); setattr(a, name, val); + frame->pop_n(2); } continue; case OP_STORE_SUBSCR: { Args args(3); @@ -135,9 +142,12 @@ inline PyObject* VM::run_frame(Frame* frame){ frame->push(VAR(std::move(items))); } continue; case OP_BUILD_STRING: { - Args items = frame->popx_n_reversed(byte.arg); + // asStr() may run extra bytecode + // so we use top_n_reversed() in order to avoid accidental gc + Args items = frame->top_n_reversed(byte.arg); StrStream ss; for(int i=0; ipop_n(byte.arg); frame->push(VAR(ss.str())); } continue; /*****************************************/ @@ -177,7 +187,6 @@ inline PyObject* VM::run_frame(Frame* frame){ } continue; /*****************************************/ case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); continue; - case OP_SAFE_JUMP_ABSOLUTE: frame->jump_abs_safe(byte.arg); continue; case OP_POP_JUMP_IF_FALSE: if(!asBool(frame->popx())) frame->jump_abs(byte.arg); continue; @@ -195,13 +204,13 @@ inline PyObject* VM::run_frame(Frame* frame){ } continue; case OP_LOOP_BREAK: { int target = frame->co->blocks[byte.block].end; - frame->jump_abs_safe(target); + frame->jump_abs_break(target); } continue; case OP_GOTO: { StrName label = frame->co->names[byte.arg]; auto it = frame->co->labels.find(label); if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); - frame->jump_abs_safe(it->second); + frame->jump_abs_break(it->second); } continue; /*****************************************/ // TODO: examine this later @@ -226,139 +235,124 @@ inline PyObject* VM::run_frame(Frame* frame){ } continue; case OP_RETURN_VALUE: return frame->popx(); /*****************************************/ - - /*****************************************/ - case OP_SETUP_DECORATOR: continue; - - case OP_SETUP_CLOSURE: { - Function& f = CAST(Function&, frame->top()); // reference - f._closure = frame->_locals; - } continue; - case OP_BEGIN_CLASS: { - StrName name = frame->co->names[byte.arg]; - PyObject* clsBase = frame->popx(); - if(clsBase == None) clsBase = _t(tp_object); - check_type(clsBase, tp_type); - PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, clsBase)); - frame->push(cls); - } continue; - case OP_END_CLASS: { - PyObject* cls = frame->popx(); - cls->attr()._try_perfect_rehash(); - }; continue; - case OP_STORE_CLASS_ATTR: { - StrName name = frame->co->names[byte.arg]; - PyObject* obj = frame->popx(); - PyObject* cls = frame->top(); - cls->attr().set(name, obj); - } continue; - - case OP_UNARY_NEGATIVE: - frame->top() = num_negated(frame->top_value(this)); - continue; - case OP_UNARY_NOT: { - PyObject* obj = frame->pop_value(this); - PyObject* obj_bool = asBool(obj); - frame->push(VAR(!_CAST(bool, obj_bool))); - } continue; - - case OP_ASSERT: { - PyObject* _msg = frame->pop_value(this); - Str msg = CAST(Str, asStr(_msg)); - PyObject* expr = frame->pop_value(this); - if(asBool(expr) != True) _error("AssertionError", msg); - } continue; - case OP_EXCEPTION_MATCH: { - const auto& e = CAST(Exception&, frame->top()); - StrName name = frame->co->names[byte.arg].first; - frame->push(VAR(e.match_type(name))); - } continue; - case OP_RAISE: { - PyObject* obj = frame->pop_value(this); - Str msg = obj == None ? "" : CAST(Str, asStr(obj)); - StrName type = frame->co->names[byte.arg].first; - _error(type, msg); - } continue; - case OP_RE_RAISE: _raise(); continue; - case OP_LIST_APPEND: { - PyObject* obj = frame->pop_value(this); + PyObject* obj = frame->popx(); List& list = CAST(List&, frame->top_1()); - list.push_back(std::move(obj)); + list.push_back(obj); } continue; - case OP_MAP_ADD: { - PyObject* value = frame->pop_value(this); - PyObject* key = frame->pop_value(this); - call(frame->top_1(), __setitem__, Args{key, value}); + case OP_DICT_ADD: { + PyObject* kv = frame->popx(); + // we do copy here to avoid accidental gc in `kv` + // TODO: optimize to avoid copy + call(frame->top_1(), __setitem__, CAST(Tuple, kv)); } continue; case OP_SET_ADD: { - PyObject* obj = frame->pop_value(this); - call(frame->top_1(), "add", Args{obj}); - } continue; - case OP_UNARY_STAR: { - if(byte.arg > 0){ // rvalue - frame->top() = VAR(StarWrapper(frame->top_value(this), true)); - }else{ - PyRef_AS_C(frame->top()); // check ref - frame->top() = VAR(StarWrapper(frame->top(), false)); - } - } continue; - case OP_GET_ITER: { - PyObject* obj = frame->pop_value(this); - PyObject* iter = asIter(obj); - check_type(frame->top(), tp_ref); - PyIter_AS_C(iter)->loop_var = frame->pop(); - frame->push(std::move(iter)); + PyObject* obj = frame->popx(); + call(frame->top_1(), m_add, Args{obj}); } continue; + /*****************************************/ + case OP_UNARY_NEGATIVE: + frame->top() = num_negated(frame->top()); + continue; + case OP_UNARY_NOT: + frame->top() = VAR(!asBool(frame->top())); + continue; + case OP_UNARY_STAR: + frame->top() = VAR(StarWrapper(frame->top())); + continue; + /*****************************************/ + case OP_GET_ITER: + frame->top() = asIter(frame->top()); + continue; case OP_FOR_ITER: { BaseIter* it = PyIter_AS_C(frame->top()); PyObject* obj = it->next(); if(obj != nullptr){ - PyRef_AS_C(it->loop_var)->set(this, frame, std::move(obj)); + frame->push(obj); }else{ - int blockEnd = frame->co->blocks[byte.block].end; - frame->jump_abs_safe(blockEnd); + int target = frame->co->blocks[byte.block].end; + frame->jump_abs_break(target); } } continue; - - + /*****************************************/ case OP_IMPORT_NAME: { - StrName name = frame->co->names[byte.arg].first; + StrName name = frame->co->names[byte.arg]; PyObject* ext_mod = _modules.try_get(name); if(ext_mod == nullptr){ Str source; - auto it2 = _lazy_modules.find(name); - if(it2 == _lazy_modules.end()){ + auto it = _lazy_modules.find(name); + if(it == _lazy_modules.end()){ bool ok = false; source = _read_file_cwd(name.str() + ".py", &ok); if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found"); }else{ - source = it2->second; - _lazy_modules.erase(it2); + source = it->second; + _lazy_modules.erase(it); } CodeObject_ code = compile(source, name.str(), EXEC_MODE); PyObject* new_mod = new_module(name); _exec(code, new_mod); - frame->push(new_mod); new_mod->attr()._try_perfect_rehash(); - }else{ - frame->push(ext_mod); } + frame->push(ext_mod); } continue; - case OP_STORE_ALL_NAMES: { - PyObject* obj = frame->pop_value(this); + case OP_IMPORT_STAR: { + PyObject* obj = frame->popx(); for(auto& [name, value]: obj->attr().items()){ Str s = name.str(); if(s.empty() || s[0] == '_') continue; frame->f_globals().set(name, value); } }; continue; - case OP_YIELD_VALUE: return _py_op_yield; - // TODO: using "goto" inside with block may cause __exit__ not called - case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); continue; - case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); continue; - case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); continue; - case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); continue; + /*****************************************/ + /*****************************************/ + // case OP_SETUP_DECORATOR: continue; + // case OP_SETUP_CLOSURE: { + // Function& f = CAST(Function&, frame->top()); // reference + // f._closure = frame->_locals; + // } continue; + // case OP_BEGIN_CLASS: { + // StrName name = frame->co->names[byte.arg]; + // PyObject* clsBase = frame->popx(); + // if(clsBase == None) clsBase = _t(tp_object); + // check_type(clsBase, tp_type); + // PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, clsBase)); + // frame->push(cls); + // } continue; + // case OP_END_CLASS: { + // PyObject* cls = frame->popx(); + // cls->attr()._try_perfect_rehash(); + // }; continue; + // case OP_STORE_CLASS_ATTR: { + // StrName name = frame->co->names[byte.arg]; + // PyObject* obj = frame->popx(); + // PyObject* cls = frame->top(); + // cls->attr().set(name, obj); + // } continue; + // case OP_ASSERT: { + // PyObject* _msg = frame->pop_value(this); + // Str msg = CAST(Str, asStr(_msg)); + // PyObject* expr = frame->pop_value(this); + // if(asBool(expr) != True) _error("AssertionError", msg); + // } continue; + // case OP_EXCEPTION_MATCH: { + // const auto& e = CAST(Exception&, frame->top()); + // StrName name = frame->co->names[byte.arg].first; + // frame->push(VAR(e.match_type(name))); + // } continue; + // case OP_RAISE: { + // PyObject* obj = frame->pop_value(this); + // Str msg = obj == None ? "" : CAST(Str, asStr(obj)); + // StrName type = frame->co->names[byte.arg].first; + // _error(type, msg); + // } continue; + // case OP_RE_RAISE: _raise(); continue; + // case OP_YIELD_VALUE: return _py_op_yield; + // // TODO: using "goto" inside with block may cause __exit__ not called + // case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); continue; + // case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); continue; + // case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); continue; + // case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); continue; default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); } } diff --git a/src/expr.h b/src/expr.h index fdf8c555..c8ef18f7 100644 --- a/src/expr.h +++ b/src/expr.h @@ -425,7 +425,7 @@ struct FStringExpr: Expr{ ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); size++; } - ctx->emit(OP_LOAD_BUILTINS_EVAL, BC_NOARG, line); + ctx->emit(OP_LOAD_BUILTIN_EVAL, BC_NOARG, line); ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line); ctx->emit(OP_CALL, 1, line); size++; diff --git a/src/frame.h b/src/frame.h index 1440b2d1..072719b8 100644 --- a/src/frame.h +++ b/src/frame.h @@ -82,6 +82,13 @@ struct Frame { return _data[_data.size()-2]; } + PyObject*& top_2(){ +#if DEBUG_EXTRA_CHECK + if(_data.size() < 3) throw std::runtime_error("_data.size() < 3"); +#endif + return _data[_data.size()-3]; + } + template void push(T&& obj){ _data.push_back(std::forward(obj)); } @@ -98,7 +105,7 @@ struct Frame { bool jump_to_exception_handler(){ if(s_try_block.empty()) return false; - PyObject* obj = pop(); + PyObject* obj = popx(); auto& p = s_try_block.back(); _data = std::move(p.second); _data.push_back(obj); @@ -113,7 +120,7 @@ struct Frame { return co->blocks[i].parent; } - void jump_abs_safe(int target){ + void jump_abs_break(int target){ const Bytecode& prev = co->codes[_ip]; int i = prev.block; _next_ip = target; @@ -132,6 +139,16 @@ struct Frame { return v; } + Args top_n_reversed(int n){ + Args v(n); + for(int i=0; i; diff --git a/src/opcodes.h b/src/opcodes.h index 98c0e425..07325b0c 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -1,18 +1,6 @@ #ifdef OPCODE -OPCODE(UNARY_NEGATIVE) -OPCODE(UNARY_NOT) -OPCODE(UNARY_STAR) - -OPCODE(LIST_APPEND) -OPCODE(MAP_ADD) -OPCODE(SET_ADD) - -OPCODE(IMPORT_NAME) - -OPCODE(GET_ITER) -OPCODE(FOR_ITER) - +/**************************/ OPCODE(WITH_ENTER) OPCODE(WITH_EXIT) @@ -30,13 +18,11 @@ OPCODE(YIELD_VALUE) OPCODE(SETUP_CLOSURE) OPCODE(SETUP_DECORATOR) -OPCODE(STORE_ALL_NAMES) OPCODE(BEGIN_CLASS) OPCODE(END_CLASS) OPCODE(STORE_CLASS_ATTR) - /**************************/ OPCODE(NO_OP) /**************************/ @@ -50,7 +36,7 @@ OPCODE(LOAD_NONE) OPCODE(LOAD_TRUE) OPCODE(LOAD_FALSE) OPCODE(LOAD_ELLIPSIS) -OPCODE(LOAD_BUILTINS_EVAL) +OPCODE(LOAD_BUILTIN_EVAL) OPCODE(LOAD_FUNCTION) /**************************/ OPCODE(LOAD_NAME) @@ -81,7 +67,6 @@ OPCODE(IS_OP) OPCODE(CONTAINS_OP) /**************************/ OPCODE(JUMP_ABSOLUTE) -OPCODE(SAFE_JUMP_ABSOLUTE) OPCODE(POP_JUMP_IF_FALSE) OPCODE(JUMP_IF_TRUE_OR_POP) OPCODE(JUMP_IF_FALSE_OR_POP) @@ -95,5 +80,19 @@ OPCODE(CALL_KWARGS) OPCODE(CALL_KWARGS_UNPACK) OPCODE(RETURN_VALUE) /**************************/ - +OPCODE(LIST_APPEND) +OPCODE(DICT_ADD) +OPCODE(SET_ADD) +/**************************/ +OPCODE(UNARY_NEGATIVE) +OPCODE(UNARY_NOT) +OPCODE(UNARY_STAR) +/**************************/ +OPCODE(GET_ITER) +OPCODE(FOR_ITER) +/**************************/ +OPCODE(IMPORT_NAME) +OPCODE(IMPORT_STAR) +/**************************/ +/**************************/ #endif \ No newline at end of file diff --git a/src/str.h b/src/str.h index 9cb4a001..53d71297 100644 --- a/src/str.h +++ b/src/str.h @@ -211,6 +211,7 @@ const StrName m_eval = StrName::get("eval"); const StrName m_self = StrName::get("self"); const StrName m_dict = StrName::get("dict"); const StrName m_set = StrName::get("set"); +const StrName m_add = StrName::get("add"); const StrName __enter__ = StrName::get("__enter__"); const StrName __exit__ = StrName::get("__exit__"); diff --git a/src/vm.h b/src/vm.h index cf868ba3..12b3be44 100644 --- a/src/vm.h +++ b/src/vm.h @@ -566,7 +566,7 @@ inline Str VM::disassemble(CodeObject_ co){ std::vector jumpTargets; for(auto byte : co->codes){ - if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_SAFE_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ + if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ jumpTargets.push_back(byte.arg); } } @@ -682,6 +682,7 @@ inline void VM::init_builtin_types(){ for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } +// TODO: args here may be garbage collected accidentally inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ if(is_type(callable, tp_type)){ PyObject* new_f = callable->attr().try_get(__new__); From ebe319332748baa2bf7c0e4374b7a1775965c7b5 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 21:05:24 +0800 Subject: [PATCH 33/73] up --- src/codeobject.h | 1 + src/compiler.h | 83 ++++++++++++++++++++++++------------------------ src/expr.h | 14 ++++++-- 3 files changed, 54 insertions(+), 44 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index e851fb31..1795f2c5 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -61,6 +61,7 @@ struct CodeObject { std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; + // may be.. just use a large NameDict? uint32_t perfect_locals_capacity = 2; uint32_t perfect_hash_seed = 0; diff --git a/src/compiler.h b/src/compiler.h index 08068351..6b3dbae6 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -1,13 +1,6 @@ #pragma once -#include "codeobject.h" -#include "common.h" -#include "lexer.h" -#include "error.h" -#include "ceval.h" #include "expr.h" -#include "obj.h" -#include "str.h" namespace pkpy{ @@ -21,9 +14,9 @@ struct PrattRule{ }; class Compiler { + inline static PrattRule rules[kTokenCount]; std::unique_ptr lexer; stack contexts; - std::map rules; VM* vm; bool used; // for parsing token stream @@ -33,7 +26,6 @@ class Compiler { const Token& prev() { return tokens.at(i-1); } const Token& curr() { return tokens.at(i); } const Token& next() { return tokens.at(i+1); } - const Token& peek(int offset) { return tokens.at(i+offset); } void advance() { i++; } CodeEmitContext* ctx() { return &contexts.top(); } @@ -49,7 +41,7 @@ class Compiler { void pop_context(){ if(!ctx()->s_expr.empty()) UNREACHABLE(); - // if last instruction is not return, add a default return None + // if the last op does not return, add a default return None if(ctx()->co->codes.back().op != OP_RETURN_VALUE){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); @@ -58,14 +50,7 @@ class Compiler { contexts.pop(); } -public: - Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ - this->vm = vm; - this->used = false; - this->lexer = std::make_unique( - make_sp(source, filename, mode) - ); - + static void init_pratt_rules(){ // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #define METHOD(name) &Compiler::name #define NO_INFIX nullptr, PREC_NONE @@ -112,22 +97,8 @@ public: rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; #undef METHOD #undef NO_INFIX - - // rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - // rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; } -private: bool match(TokenIndex expected) { if (curr().type != expected) return false; advance(); @@ -206,7 +177,7 @@ private: consume(TK(":")); } e->func.code = push_context(lexer->src, ""); - EXPR(true); // https://github.com/blueloveTH/pocketpy/issues/37 + EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37 ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); ctx()->s_expr.push(std::move(e)); @@ -464,8 +435,8 @@ private: Str _compile_import() { consume(TK("@id")); Str name = prev().str(); - int index = ctx()->add_name(name, NAME_SPECIAL); - ctx()->emit(OP_IMPORT_NAME, index, peek(-2).line); + int index = ctx()->add_name(name); + ctx()->emit(OP_IMPORT_NAME, index, prev().line); return name; } @@ -525,8 +496,9 @@ private: if(!push_stack) ctx()->emit_expr(); } + // PASS void compile_if_stmt() { - EXPR(true); // condition + EXPR(false); // condition int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); if (match(TK("elif"))) { @@ -544,9 +516,10 @@ private: } } + // PASS void compile_while_loop() { ctx()->enter_block(WHILE_LOOP); - EXPR(true); // condition + EXPR(false); // condition int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); @@ -556,12 +529,17 @@ private: void compile_for_loop() { EXPR_TUPLE(); - ctx()->emit_lvalue(); + Expr_ vars = ctx()->s_expr.popx(); consume(TK("in")); - EXPR(true); + EXPR(false); ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); ctx()->enter_block(FOR_LOOP); ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + // set variables and handle implicit unpack + bool ok = vars->emit_store(ctx()); + // this error occurs in `vars` instead of this line + // but...nevermind + if(!ok) SyntaxError(); compile_block_body(); ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); ctx()->exit_block(); @@ -596,7 +574,7 @@ private: } void compile_decorated(){ - EXPR(true); + EXPR(false); if(!match_newlines(mode()==REPL_MODE)) SyntaxError(); ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line); consume(TK("def")); @@ -680,7 +658,7 @@ private: consume(TK("@id")); int dummy_t = ctx()->add_name(prev().str(), NAME_SPECIAL); if(match(TK("(")) && !match(TK(")"))){ - EXPR(true); consume(TK(")")); + EXPR(false); consume(TK(")")); }else{ ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); } @@ -697,7 +675,7 @@ private: case TK("with"): { // TODO: reimpl this UNREACHABLE(); - // EXPR(true); + // EXPR(false); // consume(TK("as")); // consume(TK("@id")); // int index = ctx()->add_name(prev().str(), name_scope()); @@ -859,6 +837,27 @@ private: void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); } public: + Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ + this->vm = vm; + this->used = false; + this->lexer = std::make_unique( + make_sp(source, filename, mode) + ); + if(rules.empty()) init_pratt_rules(); + // rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; + // rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + // rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + } + CodeObject_ compile(){ if(used) UNREACHABLE(); used = true; diff --git a/src/expr.h b/src/expr.h index c8ef18f7..d3007139 100644 --- a/src/expr.h +++ b/src/expr.h @@ -6,7 +6,6 @@ #include "error.h" #include "ceval.h" - namespace pkpy{ struct CodeEmitContext; @@ -369,7 +368,18 @@ struct TupleExpr: SequenceExpr{ Opcode opcode() const override { return OP_BUILD_TUPLE; } bool emit_store(CodeEmitContext* ctx) override { - // ... + // assume TOS is an iterable + // unpack it and emit several OP_STORE + // https://docs.python.org/3/library/dis.html#opcode-UNPACK_SEQUENCE + // https://docs.python.org/3/library/dis.html#opcode-UNPACK_EX + return true; + } + + bool emit_del(CodeEmitContext* ctx) override{ + for(auto& e: items){ + bool ok = e->emit_del(ctx); + if(!ok) return false; + } return true; } }; From 718ba988d5e56f28257552afce50cc47dd0405ec Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 21:37:10 +0800 Subject: [PATCH 34/73] up --- src/compiler.h | 95 +++++++++++++++++++++++++------------------------- src/expr.h | 38 ++++++++++++++------ src/opcodes.h | 2 ++ 3 files changed, 77 insertions(+), 58 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 6b3dbae6..5d62c235 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -448,8 +448,9 @@ class Compiler { consume(TK("@id")); name = prev().str(); } - int index = ctx()->add_name(name, name_scope()); - ctx()->emit(OP_STORE_NAME, index, prev().line); + int index = ctx()->add_name(name); + auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL; + ctx()->emit(op, index, prev().line); } while (match(TK(","))); consume_end_stmt(); } @@ -459,8 +460,8 @@ class Compiler { _compile_import(); consume(TK("import")); if (match(TK("*"))) { - if(name_scope() != NAME_GLOBAL) SyntaxError("import * can only be used in global scope"); - ctx()->emit(OP_STORE_ALL_NAMES, BC_NOARG, prev().line); + if(name_scope() != NAME_GLOBAL) SyntaxError("import * should be used in global scope"); + ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line); consume_end_stmt(); return; } @@ -475,7 +476,8 @@ class Compiler { name = prev().str(); } index = ctx()->add_name(name); - ctx()->emit(OP_STORE_GLOBAL, index, prev().line); + auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL; + ctx()->emit(op, index, prev().line); } while (match(TK(","))); ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); consume_end_stmt(); @@ -527,6 +529,7 @@ class Compiler { ctx()->exit_block(); } + // PASS void compile_for_loop() { EXPR_TUPLE(); Expr_ vars = ctx()->s_expr.popx(); @@ -535,42 +538,39 @@ class Compiler { ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); ctx()->enter_block(FOR_LOOP); ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); - // set variables and handle implicit unpack bool ok = vars->emit_store(ctx()); - // this error occurs in `vars` instead of this line - // but...nevermind - if(!ok) SyntaxError(); + if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind compile_block_body(); ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); ctx()->exit_block(); } void compile_try_except() { - ctx()->enter_block(TRY_EXCEPT); - ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line); - compile_block_body(); - ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE); - std::vector patches = { - ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) - }; - ctx()->exit_block(); + // ctx()->enter_block(TRY_EXCEPT); + // ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line); + // compile_block_body(); + // ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE); + // std::vector patches = { + // ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) + // }; + // ctx()->exit_block(); - do { - consume(TK("except")); - if(match(TK("@id"))){ - int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL); - emit(OP_EXCEPTION_MATCH, name_idx); - }else{ - emit(OP_LOAD_TRUE); - } - int patch = emit(OP_POP_JUMP_IF_FALSE); - emit(OP_POP_TOP); // pop the exception on match - compile_block_body(); - patches.push_back(emit(OP_JUMP_ABSOLUTE)); - patch_jump(patch); - }while(curr().type == TK("except")); - emit(OP_RE_RAISE); // no match, re-raise - for (int patch : patches) patch_jump(patch); + // do { + // consume(TK("except")); + // if(match(TK("@id"))){ + // int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL); + // emit(OP_EXCEPTION_MATCH, name_idx); + // }else{ + // emit(OP_LOAD_TRUE); + // } + // int patch = emit(OP_POP_JUMP_IF_FALSE); + // emit(OP_POP_TOP); // pop the exception on match + // compile_block_body(); + // patches.push_back(emit(OP_JUMP_ABSOLUTE)); + // patch_jump(patch); + // }while(curr().type == TK("except")); + // emit(OP_RE_RAISE); // no match, re-raise + // for (int patch : patches) patch_jump(patch); } void compile_decorated(){ @@ -650,13 +650,13 @@ class Compiler { case TK("global"): do { consume(TK("@id")); - co()->global_names.insert(prev().str()); + ctx()->co->global_names.insert(prev().str()); } while (match(TK(","))); consume_end_stmt(); break; case TK("raise"): { consume(TK("@id")); - int dummy_t = ctx()->add_name(prev().str(), NAME_SPECIAL); + int dummy_t = ctx()->add_name(prev().str()); if(match(TK("(")) && !match(TK(")"))){ EXPR(false); consume(TK(")")); }else{ @@ -688,17 +688,17 @@ class Compiler { } break; /*************************************************/ // TODO: refactor goto/label use special $ syntax - case TK("label"): + case TK("label"): { if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); bool ok = ctx()->add_label(prev().str()); if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists"); consume_end_stmt(); - break; + } break; case TK("goto"): if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); - emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL)); + ctx()->emit(OP_GOTO, ctx()->add_name(prev().str()), prev().line); consume_end_stmt(); break; /*************************************************/ @@ -707,9 +707,9 @@ class Compiler { EXPR_TUPLE(true); if(!try_compile_assignment()){ if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){ - emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); }else{ - emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); } } consume_end_stmt(); @@ -717,21 +717,22 @@ class Compiler { } } + // PASS void compile_class(){ consume(TK("@id")); - int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL); - int super_cls_name_idx = -1; + int namei = ctx()->add_name(prev().str()); + int super_namei = -1; if(match(TK("(")) && match(TK("@id"))){ - super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL); + super_namei = ctx()->add_name(prev().str()); consume(TK(")")); } - if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); - else emit(OP_LOAD_NAME, super_cls_name_idx); - emit(OP_BEGIN_CLASS, cls_name_idx); + if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); + else ctx()->emit(OP_LOAD_NAME, super_namei, prev().line); + ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE); ctx()->is_compiling_class = true; compile_block_body(); ctx()->is_compiling_class = false; - emit(OP_END_CLASS); + ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE); } void _compile_f_args(Function& func, bool enable_type_hints){ diff --git a/src/expr.h b/src/expr.h index d3007139..977bf1d4 100644 --- a/src/expr.h +++ b/src/expr.h @@ -23,7 +23,7 @@ struct Expr{ virtual bool emit_del(CodeEmitContext* ctx) { return false; } // for OP_STORE_XXX - virtual bool emit_store(CodeEmitContext* ctx) { return false; } + [[nodiscard]] virtual bool emit_store(CodeEmitContext* ctx) { return false; } }; struct CodeEmitContext{ @@ -153,15 +153,12 @@ struct StarredExpr: Expr{ void emit(CodeEmitContext* ctx) override { child->emit(ctx); - // as a rvalue, we should do unpack here - //ctx->emit(OP_UNARY_STAR, (int)false, line); + ctx->emit(OP_UNARY_STAR, BC_NOARG, line); } bool emit_store(CodeEmitContext* ctx) override { - child->emit(ctx); - // as a lvalue, we should do pack here - //ctx->emit(OP_UNARY_STAR, (int)true, line); - return true; + // simply proxy to child + return child->emit_store(ctx); } }; @@ -368,10 +365,29 @@ struct TupleExpr: SequenceExpr{ Opcode opcode() const override { return OP_BUILD_TUPLE; } bool emit_store(CodeEmitContext* ctx) override { - // assume TOS is an iterable - // unpack it and emit several OP_STORE - // https://docs.python.org/3/library/dis.html#opcode-UNPACK_SEQUENCE - // https://docs.python.org/3/library/dis.html#opcode-UNPACK_EX + // TOS is an iterable + // items may contain StarredExpr, we should check it + int starred_i = -1; + for(int i=0; iis_starred()) continue; + if(starred_i == -1) starred_i = i; + else return false; // multiple StarredExpr not allowed + } + + if(starred_i == -1){ + // Unpacks TOS into count individual values, which are put onto the stack right-to-left. + ctx->emit(OP_UNPACK_SEQUENCE, items.size(), line); + }else{ + // starred assignment target must be in a tuple + if(items.size() == 1) return false; + // starred assignment target must be the last one (differ from CPython) + if(starred_i != items.size()-1) return false; + ctx->emit(OP_UNPACK_EX, items.size()-1, line); + } + for(auto& e: items){ + bool ok = e->emit_store(ctx); + if(!ok) return false; + } return true; } diff --git a/src/opcodes.h b/src/opcodes.h index 07325b0c..ffd63177 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -94,5 +94,7 @@ OPCODE(FOR_ITER) OPCODE(IMPORT_NAME) OPCODE(IMPORT_STAR) /**************************/ +OPCODE(UNPACK_SEQUENCE) +OPCODE(UNPACK_EX) /**************************/ #endif \ No newline at end of file From 6bb307734c74c3ef6e0aae2bde15852db9146526 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 2 Apr 2023 22:33:47 +0800 Subject: [PATCH 35/73] up --- src/compiler.h | 52 +++++++++++++----- src/expr.h | 96 ++++++++++++-------------------- src/iter.h | 41 +++++++------- src/lexer.h | 27 +++++---- src/obj.h | 6 +- src/vm.h | 146 +++++++++++++++++++++++-------------------------- 6 files changed, 178 insertions(+), 190 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 5d62c235..dc898d6c 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -582,20 +582,41 @@ class Compiler { } bool try_compile_assignment(){ - // switch (op) { - // case TK("+="): emit(OP_BINARY_OP, 0); break; - // case TK("-="): emit(OP_BINARY_OP, 1); break; - // case TK("*="): emit(OP_BINARY_OP, 2); break; - // case TK("/="): emit(OP_BINARY_OP, 3); break; - // case TK("//="): emit(OP_BINARY_OP, 4); break; - // case TK("%="): emit(OP_BINARY_OP, 5); break; - // case TK("<<="): emit(OP_BITWISE_OP, 0); break; - // case TK(">>="): emit(OP_BITWISE_OP, 1); break; - // case TK("&="): emit(OP_BITWISE_OP, 2); break; - // case TK("|="): emit(OP_BITWISE_OP, 3); break; - // case TK("^="): emit(OP_BITWISE_OP, 4); break; - // default: UNREACHABLE(); - // } + Expr_ lhs = ctx()->s_expr.popx(); + switch (curr().type) { + // case TK("+="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 0); break; + // case TK("-="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 1); break; + // case TK("*="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 2); break; + // case TK("/="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 3); break; + // case TK("//="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 4); break; + // case TK("%="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 5); break; + // case TK("<<="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 0); break; + // case TK(">>="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 1); break; + // case TK("&="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 2); break; + // case TK("|="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 3); break; + // case TK("^="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 4); break; + // case TK("="): advance(); break; + case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): + case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { + advance(); + auto e = make_expr(); + e->op = prev().type; + e->lhs = lhs; // here should be a copy + EXPR_TUPLE(); + e->rhs = ctx()->s_expr.popx(); + // ... + } break; + case TK("="): advance(); break; + default: return false; + } + if(prev().type == TK("=")){ + EXPR_TUPLE(); + Expr_ rhs = ctx()->s_expr.popx(); + // do assign here + // lhs = rhs + return true; + } + return true; } void compile_stmt() { @@ -704,8 +725,9 @@ class Compiler { /*************************************************/ // handle dangling expression or assignment default: { - EXPR_TUPLE(true); + EXPR_TUPLE(); if(!try_compile_assignment()){ + ctx()->emit_expr(); if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){ ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); }else{ diff --git a/src/expr.h b/src/expr.h index 977bf1d4..a4c0ea0b 100644 --- a/src/expr.h +++ b/src/expr.h @@ -18,6 +18,7 @@ struct Expr{ virtual std::vector children() const { return {}; } virtual bool is_starred() const { return false; } + virtual bool is_literal() const { return false; } // for OP_DELETE_XXX virtual bool emit_del(CodeEmitContext* ctx) { return false; } @@ -162,20 +163,6 @@ struct StarredExpr: Expr{ } }; -// PASS -struct NegatedExpr: Expr{ - Expr_ child; - NegatedExpr(Expr_&& child): child(std::move(child)) {} - Str str() const override { return "-"; } - - std::vector children() const override { return {child.get()}; } - - void emit(CodeEmitContext* ctx) override { - child->emit(ctx); - ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); - } -}; - // PASS struct NotExpr: Expr{ Expr_ child; @@ -265,19 +252,48 @@ struct LiteralExpr: Expr{ if(std::holds_alternative(value)){ obj = VAR(std::get(value)); } - if(std::holds_alternative(value)){ obj = VAR(std::get(value)); } - if(std::holds_alternative(value)){ obj = VAR(std::get(value)); } - - if(!obj) UNREACHABLE(); + if(obj == nullptr) UNREACHABLE(); int index = ctx->add_const(obj); ctx->emit(OP_LOAD_CONST, index, line); } + + bool is_literal() const override { return true; } +}; + +// PASS +struct NegatedExpr: Expr{ + Expr_ child; + NegatedExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "-"; } + + std::vector children() const override { return {child.get()}; } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + // if child is a int of float, do constant folding + if(child->is_literal()){ + LiteralExpr* lit = static_cast(child.get()); + PyObject* obj = nullptr; + if(std::holds_alternative(lit->value)){ + obj = VAR(std::get(lit->value)); + } + if(std::holds_alternative(lit->value)){ + obj = VAR(std::get(lit->value)); + } + if(obj != nullptr){ + ctx->emit(OP_LOAD_CONST, ctx()->add_const(obj), line); + return; + } + } + child->emit(ctx); + ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); + } }; // PASS @@ -629,46 +645,4 @@ struct TernaryExpr: Expr{ }; -} // namespace pkpy - - -// struct TupleRef : BaseRef { -// Tuple objs; -// TupleRef(Tuple&& objs) : objs(std::move(objs)) {} - -// PyObject* get(VM* vm, Frame* frame) const{ -// Tuple args(objs.size()); -// for (int i = 0; i < objs.size(); i++) { -// args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame); -// } -// return VAR(std::move(args)); -// } - -// void set(VM* vm, Frame* frame, PyObject* val) const{ -// val = vm->asIter(val); -// BaseIter* iter = vm->PyIter_AS_C(val); -// for(int i=0; itp_star_wrapper)){ -// auto& star = _CAST(StarWrapper&, objs[i]); -// if(star.rvalue) vm->ValueError("can't use starred expression here"); -// if(i != objs.size()-1) vm->ValueError("* can only be used at the end"); -// auto ref = vm->PyRef_AS_C(star.obj); -// List list; -// while((x = iter->next()) != nullptr) list.push_back(x); -// ref->set(vm, frame, VAR(std::move(list))); -// return; -// }else{ -// x = iter->next(); -// if(x == nullptr) vm->ValueError("not enough values to unpack"); -// vm->PyRef_AS_C(objs[i])->set(vm, frame, x); -// } -// } -// PyObject* x = iter->next(); -// if(x != nullptr) vm->ValueError("too many values to unpack"); -// } - -// void del(VM* vm, Frame* frame) const{ -// for(int i=0; iPyRef_AS_C(objs[i])->del(vm, frame); -// } -// }; \ No newline at end of file +} // namespace pkpy \ No newline at end of file diff --git a/src/iter.h b/src/iter.h index f8ecdb5c..25b20176 100644 --- a/src/iter.h +++ b/src/iter.h @@ -6,10 +6,10 @@ namespace pkpy{ class RangeIter : public BaseIter { i64 current; - Range r; + Range r; // copy by value, so we don't need to keep ref public: - RangeIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { - this->r = OBJ_GET(Range, _ref); + RangeIter(VM* vm, PyObject* ref) : BaseIter(vm) { + this->r = OBJ_GET(Range, ref); this->current = r.start; } @@ -26,28 +26,37 @@ public: template class ArrayIter : public BaseIter { - size_t index = 0; - const T* p; + int index; + PyObject* ref; public: - ArrayIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { p = &OBJ_GET(T, _ref);} - PyObject* next(){ + ArrayIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {} + + PyObject* next() override{ + const T* p = &OBJ_GET(T, ref); if(index == p->size()) return nullptr; return p->operator[](index++); } + + void _mark() override { + OBJ_MARK(ref); + } }; class StringIter : public BaseIter { int index = 0; - Str* str; + PyObject* ref; public: - StringIter(VM* vm, PyObject* _ref) : BaseIter(vm, _ref) { - str = &OBJ_GET(Str, _ref); - } + StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref) {} - PyObject* next() { + PyObject* next() override{ + Str* str = &OBJ_GET(Str, ref); if(index == str->u8_length()) return nullptr; return VAR(str->u8_getitem(index++)); } + + void _mark() override { + OBJ_MARK(ref); + } }; inline PyObject* Generator::next(){ @@ -58,20 +67,14 @@ inline PyObject* Generator::next(){ frame = std::move(vm->callstack.top()); vm->callstack.pop(); state = 1; - return frame->pop_value(vm); + return frame->popx(); }else{ state = 2; return nullptr; } } -inline void BaseIter::_mark() { - if(_ref != nullptr) OBJ_MARK(_ref); - if(loop_var != nullptr) OBJ_MARK(loop_var); -} - inline void Generator::_mark(){ - BaseIter::_mark(); if(frame!=nullptr) frame->_mark(); } diff --git a/src/lexer.h b/src/lexer.h index 7551421e..74c8f96b 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -9,21 +9,23 @@ namespace pkpy{ typedef uint8_t TokenIndex; constexpr const char* kTokens[] = { + "is not", "not in", "@eof", "@eol", "@sof", - ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::", - "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->", - "<<", ">>", "&", "|", "^", "?", "@", - "==", "!=", ">=", "<=", - "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=", + "@id", "@num", "@str", "@fstr", + "@indent", "@dedent", + /*****************************************/ + "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed + "*", "*=", "/", "/=", "//", "//=", "%", "%=", + "&", "&=", "|", "|=", "^", "^=", + "<<", "<<=", ">>", ">>=", + /*****************************************/ + ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "::", + "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=", /** KW_BEGIN **/ "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", "goto", "label", // extended keywords, not available in cpython - "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", - /** KW_END **/ - "is not", "not in", - "@id", "@num", "@str", "@fstr", - "@indent", "@dedent" + "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise" }; using TokenValue = std::variant; @@ -40,12 +42,9 @@ constexpr TokenIndex TK(const char token[]) { } #define TK_STR(t) kTokens[t] -const TokenIndex kTokenKwBegin = TK("class"); -const TokenIndex kTokenKwEnd = TK("raise"); - const std::map kTokenKwMap = [](){ std::map map; - for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k; + for(int k=TK("class"); k&& frame) - : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {} + : BaseIter(vm), frame(std::move(frame)), state(0) {} PyObject* next() override; void _mark() override; @@ -351,19 +351,11 @@ inline PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ } inline void CodeObject::optimize(VM* vm){ - std::vector keys; - for(auto& p: names) if(p.second == NAME_LOCAL) keys.push_back(p.first); - uint32_t base_n = (uint32_t)(keys.size() / kLocalsLoadFactor + 0.5); + // here we simple pass all names, but only some of them are NAME_LOCAL + // TODO: ... + uint32_t base_n = (uint32_t)(names.size() / kLocalsLoadFactor + 0.5); perfect_locals_capacity = find_next_capacity(base_n); - perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys); - - for(int i=1; inum_negated(consts[pos]); - } - } + perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, names); // pre-compute sn in co_consts for(int i=0; i= n) return s.substr(0, n); - return s + std::string(n - s.size(), ' '); - }; + return ""; + // auto pad = [](const Str& s, const int n){ + // if(s.size() >= n) return s.substr(0, n); + // return s + std::string(n - s.size(), ' '); + // }; - std::vector jumpTargets; - for(auto byte : co->codes){ - if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ - jumpTargets.push_back(byte.arg); - } - } - StrStream ss; - ss << std::string(54, '-') << '\n'; - ss << co->name << ":\n"; - int prev_line = -1; - for(int i=0; icodes.size(); i++){ - const Bytecode& byte = co->codes[i]; - if(byte.op == OP_NO_OP) continue; - Str line = std::to_string(byte.line); - if(byte.line == prev_line) line = ""; - else{ - if(prev_line != -1) ss << "\n"; - prev_line = byte.line; - } + // std::vector jumpTargets; + // for(auto byte : co->codes){ + // if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ + // jumpTargets.push_back(byte.arg); + // } + // } + // StrStream ss; + // ss << std::string(54, '-') << '\n'; + // ss << co->name << ":\n"; + // int prev_line = -1; + // for(int i=0; icodes.size(); i++){ + // const Bytecode& byte = co->codes[i]; + // if(byte.op == OP_NO_OP) continue; + // Str line = std::to_string(byte.line); + // if(byte.line == prev_line) line = ""; + // else{ + // if(prev_line != -1) ss << "\n"; + // prev_line = byte.line; + // } - std::string pointer; - if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){ - pointer = "-> "; - }else{ - pointer = " "; - } - ss << pad(line, 8) << pointer << pad(std::to_string(i), 3); - ss << " " << pad(OP_NAMES[byte.op], 20) << " "; - // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); - std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); - if(byte.op == OP_LOAD_CONST){ - argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; - } - if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){ - argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")"; - } - ss << argStr; - // ss << pad(argStr, 20); // may overflow - // ss << co->blocks[byte.block].to_string(); - if(i != co->codes.size() - 1) ss << '\n'; - } - StrStream consts; - consts << "co_consts: "; - consts << CAST(Str, asRepr(VAR(co->consts))); + // std::string pointer; + // if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){ + // pointer = "-> "; + // }else{ + // pointer = " "; + // } + // ss << pad(line, 8) << pointer << pad(std::to_string(i), 3); + // ss << " " << pad(OP_NAMES[byte.op], 20) << " "; + // // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); + // std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); + // if(byte.op == OP_LOAD_CONST){ + // argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; + // } + // if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){ + // argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")"; + // } + // ss << argStr; + // // ss << pad(argStr, 20); // may overflow + // // ss << co->blocks[byte.block].to_string(); + // if(i != co->codes.size() - 1) ss << '\n'; + // } + // StrStream consts; + // consts << "co_consts: "; + // consts << CAST(Str, asRepr(VAR(co->consts))); - StrStream names; - names << "co_names: "; - List list; - for(int i=0; inames.size(); i++){ - list.push_back(VAR(co->names[i].first.str())); - } - names << CAST(Str, asRepr(VAR(list))); - ss << '\n' << consts.str() << '\n' << names.str() << '\n'; + // StrStream names; + // names << "co_names: "; + // List list; + // for(int i=0; inames.size(); i++){ + // list.push_back(VAR(co->names[i].first.str())); + // } + // names << CAST(Str, asRepr(VAR(list))); + // ss << '\n' << consts.str() << '\n' << names.str() << '\n'; - for(int i=0; iconsts.size(); i++){ - PyObject* obj = co->consts[i]; - if(is_type(obj, tp_function)){ - const auto& f = CAST(Function&, obj); - ss << disassemble(f.code); - } - } - return Str(ss.str()); + // for(int i=0; iconsts.size(); i++){ + // PyObject* obj = co->consts[i]; + // if(is_type(obj, tp_function)){ + // const auto& f = CAST(Function&, obj); + // ss << disassemble(f.code); + // } + // } + // return Str(ss.str()); } inline void VM::init_builtin_types(){ @@ -769,7 +762,6 @@ inline void VM::unpack_args(Args& args){ for(int i=0; i Date: Sun, 2 Apr 2023 22:38:05 +0800 Subject: [PATCH 36/73] Update compiler.h --- src/compiler.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index dc898d6c..7fae7ff2 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -867,18 +867,6 @@ public: make_sp(source, filename, mode) ); if(rules.empty()) init_pratt_rules(); - // rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - // rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; - // rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; } CodeObject_ compile(){ From 190d2a0589e325857c188afac92327507dee6bb9 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Mon, 3 Apr 2023 13:09:46 +0000 Subject: [PATCH 37/73] up --- src/compiler.h | 40 ++++++++++++++++------------------------ src/expr.h | 29 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 7fae7ff2..522c294f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -575,6 +575,8 @@ class Compiler { void compile_decorated(){ EXPR(false); + // TODO: support multiple decorator + // use a while loop to consume '@' if(!match_newlines(mode()==REPL_MODE)) SyntaxError(); ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line); consume(TK("def")); @@ -582,40 +584,30 @@ class Compiler { } bool try_compile_assignment(){ - Expr_ lhs = ctx()->s_expr.popx(); + Expr* lhs_p = ctx()->s_expr.top().get(); + bool inplace; switch (curr().type) { - // case TK("+="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 0); break; - // case TK("-="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 1); break; - // case TK("*="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 2); break; - // case TK("/="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 3); break; - // case TK("//="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 4); break; - // case TK("%="): lhs->emit(ctx()); advance(); emit(OP_BINARY_OP, 5); break; - // case TK("<<="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 0); break; - // case TK(">>="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 1); break; - // case TK("&="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 2); break; - // case TK("|="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 3); break; - // case TK("^="): lhs->emit(ctx()); advance(); emit(OP_BITWISE_OP, 4); break; - // case TK("="): advance(); break; case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { + inplace = true; advance(); auto e = make_expr(); - e->op = prev().type; - e->lhs = lhs; // here should be a copy + e->op = prev().type - 1; // -1 to remove = + e->lhs = ctx()->s_expr.popx(); EXPR_TUPLE(); e->rhs = ctx()->s_expr.popx(); - // ... + ctx()->s_expr.push(std::move(e)); } break; - case TK("="): advance(); break; + case TK("="): + inplace = false; + advance(); + EXPR_TUPLE(); + break; default: return false; } - if(prev().type == TK("=")){ - EXPR_TUPLE(); - Expr_ rhs = ctx()->s_expr.popx(); - // do assign here - // lhs = rhs - return true; - } + ctx()->emit_expr(); + bool ok = lhs_p->emit_store(ctx()); + if(!ok) SyntaxError(); return true; } diff --git a/src/expr.h b/src/expr.h index a4c0ea0b..096a3334 100644 --- a/src/expr.h +++ b/src/expr.h @@ -421,15 +421,44 @@ struct CompExpr: Expr{ Expr_ vars; // loop vars Expr_ iter; // loop iter Expr_ cond; // optional if condition + + virtual Opcode op0() = 0; + virtual Opcode op1() = 0; + + void emit(CodeEmitContext* ctx){ + ctx->emit(op0(), 0, line); + iter->emit(ctx); + ctx->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); + ctx->enter_block(FOR_LOOP); + ctx->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + bool ok = vars->emit_store(ctx); + if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind + if(cond){ + cond->emit(ctx); + int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + ctx->emit(op1(), BC_NOARG, BC_KEEPLINE); + ctx->patch_jump(patch); + }else{ + ctx->emit(op1(), BC_NOARG, BC_KEEPLINE); + } + ctx->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx->exit_block(); + } }; struct ListCompExpr: CompExpr{ + Opcode op0() override { return OP_BUILD_LIST; } + Opcode op1() override { return OP_LIST_APPEND; } }; struct DictCompExpr: CompExpr{ + Opcode op0() override { return OP_BUILD_DI CT; } + Opcode op1() override { return OP_DICT_ADD; } }; struct SetCompExpr: CompExpr{ + Opcode op0() override { return OP_BUILD_SET; } + Opcode op1() override { return OP_SET_ADD; } }; struct LambdaExpr: Expr{ From e4e43826fe1404c148c5f256f53f773512f555b5 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 3 Apr 2023 22:21:09 +0800 Subject: [PATCH 38/73] up --- src/cffi.h | 2 - src/common.h | 5 +- src/compiler.h | 72 ++++++++++++------------- src/expr.h | 35 ++++++++----- src/frame.h | 12 ++--- src/iter.h | 6 +-- src/memory.h | 1 - src/obj.h | 8 ++- src/pocketpy.h | 2 +- src/vm.h | 140 +++++++++++++++++++++++++------------------------ 10 files changed, 148 insertions(+), 135 deletions(-) diff --git a/src/cffi.h b/src/cffi.h index fbb13a8e..117d1248 100644 --- a/src/cffi.h +++ b/src/cffi.h @@ -2,8 +2,6 @@ #include "common.h" #include "vm.h" -#include -#include namespace pkpy { diff --git a/src/common.h b/src/common.h index 2ae87052..88875867 100644 --- a/src/common.h +++ b/src/common.h @@ -26,12 +26,13 @@ #include #include #include +#include #define PK_VERSION "0.9.6" // debug macros -#define DEBUG_NO_BUILTIN_MODULES 0 -#define DEBUG_EXTRA_CHECK 1 +#define DEBUG_NO_BUILTIN_MODULES 1 +#define DEBUG_MODE 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/compiler.h b/src/compiler.h index 522c294f..0f8a371d 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -26,7 +26,7 @@ class Compiler { const Token& prev() { return tokens.at(i-1); } const Token& curr() { return tokens.at(i); } const Token& next() { return tokens.at(i+1); } - void advance() { i++; } + void advance(int delta=1) { i += delta; } CodeEmitContext* ctx() { return &contexts.top(); } CompileMode mode() const{ return lexer->src->mode; } @@ -42,7 +42,7 @@ class Compiler { void pop_context(){ if(!ctx()->s_expr.empty()) UNREACHABLE(); // if the last op does not return, add a default return None - if(ctx()->co->codes.back().op != OP_RETURN_VALUE){ + if(ctx()->co->codes.empty() || ctx()->co->codes.back().op != OP_RETURN_VALUE){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); } @@ -185,12 +185,14 @@ class Compiler { // PASS void exprTuple(){ - auto e = make_expr(); + std::vector items; do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok - e->items.push_back(ctx()->s_expr.popx()); + items.push_back(ctx()->s_expr.popx()); } while(match(TK(","))); - ctx()->s_expr.push(std::move(e)); + ctx()->s_expr.push(make_expr( + std::move(items) + )); } // PASS @@ -577,7 +579,7 @@ class Compiler { EXPR(false); // TODO: support multiple decorator // use a while loop to consume '@' - if(!match_newlines(mode()==REPL_MODE)) SyntaxError(); + if(!match_newlines_repl()) SyntaxError(); ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line); consume(TK("def")); compile_function(); @@ -585,11 +587,9 @@ class Compiler { bool try_compile_assignment(){ Expr* lhs_p = ctx()->s_expr.top().get(); - bool inplace; switch (curr().type) { case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { - inplace = true; advance(); auto e = make_expr(); e->op = prev().type - 1; // -1 to remove = @@ -599,7 +599,6 @@ class Compiler { ctx()->s_expr.push(std::move(e)); } break; case TK("="): - inplace = false; advance(); EXPR_TUPLE(); break; @@ -625,7 +624,7 @@ class Compiler { ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line); consume_end_stmt(); break; - case TK("yield"): + case TK("yield"): if (contexts.size() <= 1) SyntaxError("'yield' outside function"); EXPR_TUPLE(true); // if yield present, mark the function as generator @@ -634,7 +633,7 @@ class Compiler { consume_end_stmt(); break; case TK("return"): - if (contexts.size() <= 1) SyntaxError("'ret urn' outside function"); + if (contexts.size() <= 1) SyntaxError("'return' outside function"); if(match_end_stmt()){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); }else{ @@ -717,6 +716,7 @@ class Compiler { /*************************************************/ // handle dangling expression or assignment default: { + advance(-1); // do revert since we have pre-called advance() at the beginning EXPR_TUPLE(); if(!try_compile_assignment()){ ctx()->emit_expr(); @@ -791,7 +791,6 @@ class Compiler { void compile_function(){ // TODO: bug, if there are multiple decorators, will cause error - bool has_decorator = !co()->codes.empty() && co()->codes.back().op == OP_SETUP_DECORATOR; Function func; StrName obj_name; consume(TK("@id")); @@ -812,38 +811,38 @@ class Compiler { func.code = push_context(lexer->src, func.name.str()); compile_block_body(); pop_context(); - emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); + ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_const(VAR(func)), prev().line); + if(name_scope() == NAME_LOCAL) ctx()->emit(OP_SETUP_CLOSURE, BC_NOARG, prev().line); if(!ctx()->is_compiling_class){ if(obj_name.empty()){ - if(has_decorator) emit(OP_CALL, 1); - emit(OP_STORE_NAME, co()->add_name(func.name, name_scope())); + auto e = make_expr(func.name, name_scope()); + e->emit_store(ctx()); } else { - if(has_decorator) SyntaxError("decorator is not supported here"); - emit(OP_LOAD_NAME, co()->add_name(obj_name, name_scope())); - int index = co()->add_name(func.name, NAME_ATTR); - emit(OP_BUILD_ATTR_REF, index); - emit(OP_ROT_TWO); - emit(OP_STORE_REF); + ctx()->emit(OP_LOAD_NAME, ctx()->add_name(obj_name), prev().line); + int index = ctx()->add_name(func.name); + ctx()->emit(OP_STORE_ATTR, index, prev().line); } }else{ - if(has_decorator) emit(OP_CALL, 1); - emit(OP_STORE_CLASS_ATTR, co()->add_name(func.name, name_scope())); + ctx()->emit(OP_STORE_CLASS_ATTR, ctx()->add_name(func.name), BC_KEEPLINE); } } PyObject* read_literal(){ - if(match(TK("-"))){ - consume(TK("@num")); - PyObject* val = get_value(prev()); - return vm->num_negated(val); + advance(); + switch(prev().type){ + case TK("-"): { + consume(TK("@num")); + PyObject* val = LiteralExpr(prev().value).to_object(ctx()); + return vm->num_negated(val); + } + case TK("@num"): return LiteralExpr(prev().value).to_object(ctx()); + case TK("@str"): return LiteralExpr(prev().value).to_object(ctx()); + case TK("True"): return VAR(true); + case TK("False"): return VAR(false); + case TK("None"): return vm->None; + case TK("..."): return vm->Ellipsis; + default: break; } - if(match(TK("@num"))) return get_value(prev()); - if(match(TK("@str"))) return get_value(prev()); - if(match(TK("True"))) return VAR(true); - if(match(TK("False"))) return VAR(false); - if(match(TK("None"))) return vm->None; - if(match(TK("..."))) return vm->Ellipsis; return nullptr; } @@ -858,7 +857,8 @@ public: this->lexer = std::make_unique( make_sp(source, filename, mode) ); - if(rules.empty()) init_pratt_rules(); + // TODO: check if already initialized + init_pratt_rules(); } CodeObject_ compile(){ @@ -883,7 +883,7 @@ public: return code; }else if(mode()==JSON_MODE){ PyObject* value = read_literal(); - if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value)); + if(value != nullptr) ctx()->emit(OP_LOAD_CONST, ctx()->add_const(value), prev().line); else if(match(TK("{"))) exprMap(); else if(match(TK("["))) exprList(); else SyntaxError("expect a JSON object or array"); diff --git a/src/expr.h b/src/expr.h index 096a3334..cdb476dd 100644 --- a/src/expr.h +++ b/src/expr.h @@ -31,12 +31,7 @@ struct CodeEmitContext{ CodeObject_ co; VM* vm; stack s_expr; - CodeEmitContext(VM* vm, CodeObject_ co): co(co) {} - CodeEmitContext(const CodeEmitContext&) = delete; - CodeEmitContext& operator=(const CodeEmitContext&) = delete; - CodeEmitContext(CodeEmitContext&&) = delete; - CodeEmitContext& operator=(CodeEmitContext&&) = delete; int curr_block_i = 0; bool is_compiling_class = false; @@ -101,12 +96,11 @@ struct CodeEmitContext{ // PASS struct NameExpr: Expr{ - Str name; + StrName name; NameScope scope; - NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {} - NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {} + NameExpr(StrName name, NameScope scope): name(name), scope(scope) {} - Str str() const override { return "$" + name; } + Str str() const override { return "$" + name.str(); } void emit(CodeEmitContext* ctx) override { int index = ctx->add_name(name); @@ -246,7 +240,7 @@ struct LiteralExpr: Expr{ UNREACHABLE(); } - void emit(CodeEmitContext* ctx) override { + PyObject* to_object(CodeEmitContext* ctx){ VM* vm = ctx->vm; PyObject* obj = nullptr; if(std::holds_alternative(value)){ @@ -258,6 +252,11 @@ struct LiteralExpr: Expr{ if(std::holds_alternative(value)){ obj = VAR(std::get(value)); } + return obj; + } + + void emit(CodeEmitContext* ctx) override { + PyObject* obj = to_object(ctx); if(obj == nullptr) UNREACHABLE(); int index = ctx->add_const(obj); ctx->emit(OP_LOAD_CONST, index, line); @@ -287,7 +286,7 @@ struct NegatedExpr: Expr{ obj = VAR(std::get(lit->value)); } if(obj != nullptr){ - ctx->emit(OP_LOAD_CONST, ctx()->add_const(obj), line); + ctx->emit(OP_LOAD_CONST, ctx->add_const(obj), line); return; } } @@ -362,21 +361,25 @@ struct SequenceExpr: Expr{ }; struct ListExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; Str str() const override { return "list()"; } Opcode opcode() const override { return OP_BUILD_LIST; } }; struct DictExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; Str str() const override { return "dict()"; } - Opcode opcode() const override { return OP_BUILD_MAP; } + Opcode opcode() const override { return OP_BUILD_DICT; } }; struct SetExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; Str str() const override { return "set()"; } Opcode opcode() const override { return OP_BUILD_SET; } }; struct TupleExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; Str str() const override { return "tuple()"; } Opcode opcode() const override { return OP_BUILD_TUPLE; } @@ -432,7 +435,8 @@ struct CompExpr: Expr{ ctx->enter_block(FOR_LOOP); ctx->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); bool ok = vars->emit_store(ctx); - if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind + // this error occurs in `vars` instead of this line, but...nevermind + if(!ok) UNREACHABLE(); // TODO: raise a SyntaxError instead if(cond){ cond->emit(ctx); int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); @@ -449,16 +453,19 @@ struct CompExpr: Expr{ struct ListCompExpr: CompExpr{ Opcode op0() override { return OP_BUILD_LIST; } Opcode op1() override { return OP_LIST_APPEND; } + Str str() const override { return "listcomp()"; } }; struct DictCompExpr: CompExpr{ - Opcode op0() override { return OP_BUILD_DI CT; } + Opcode op0() override { return OP_BUILD_DICT; } Opcode op1() override { return OP_DICT_ADD; } + Str str() const override { return "dictcomp()"; } }; struct SetCompExpr: CompExpr{ Opcode op0() override { return OP_BUILD_SET; } Opcode op1() override { return OP_SET_ADD; } + Str str() const override { return "setcomp()"; } }; struct LambdaExpr: Expr{ diff --git a/src/frame.h b/src/frame.h index 072719b8..265255e6 100644 --- a/src/frame.h +++ b/src/frame.h @@ -53,14 +53,14 @@ struct Frame { // } void pop(){ -#if DEBUG_EXTRA_CHECK +#if DEBUG_MODE if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif _data.pop_back(); } PyObject* popx(){ -#if DEBUG_EXTRA_CHECK +#if DEBUG_MODE if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif PyObject* ret = _data.back(); @@ -69,21 +69,21 @@ struct Frame { } PyObject*& top(){ -#if DEBUG_EXTRA_CHECK +#if DEBUG_MODE if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } PyObject*& top_1(){ -#if DEBUG_EXTRA_CHECK +#if DEBUG_MODE if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif return _data[_data.size()-2]; } PyObject*& top_2(){ -#if DEBUG_EXTRA_CHECK +#if DEBUG_MODE if(_data.size() < 3) throw std::runtime_error("_data.size() < 3"); #endif return _data[_data.size()-3]; @@ -115,7 +115,7 @@ struct Frame { } int _exit_block(int i){ - if(co->blocks[i].type == FOR_LOOP) _pop(); + if(co->blocks[i].type == FOR_LOOP) pop(); else if(co->blocks[i].type == TRY_EXCEPT) on_try_block_exit(); return co->blocks[i].parent; } diff --git a/src/iter.h b/src/iter.h index 25b20176..0a9041d6 100644 --- a/src/iter.h +++ b/src/iter.h @@ -26,8 +26,8 @@ public: template class ArrayIter : public BaseIter { - int index; PyObject* ref; + int index; public: ArrayIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {} @@ -43,10 +43,10 @@ public: }; class StringIter : public BaseIter { - int index = 0; PyObject* ref; + int index; public: - StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref) {} + StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {} PyObject* next() override{ Str* str = &OBJ_GET(Str, ref); diff --git a/src/memory.h b/src/memory.h index 61f6246d..62d9932f 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,7 +1,6 @@ #pragma once #include "common.h" -#include namespace pkpy{ diff --git a/src/obj.h b/src/obj.h index 36d38255..e6f6c180 100644 --- a/src/obj.h +++ b/src/obj.h @@ -3,7 +3,6 @@ #include "common.h" #include "namedict.h" #include "tuplelist.h" -#include namespace pkpy { @@ -142,9 +141,14 @@ struct Py_ : PyObject { }; #define OBJ_GET(T, obj) (((Py_*)(obj))->_value) -#define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__)) #define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_mark() +#if DEBUG_NO_BUILTIN_MODULES +#define OBJ_NAME(obj) Str("") +#else +#define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__)) +#endif + const int kTpIntIndex = 2; const int kTpFloatIndex = 3; diff --git a/src/pocketpy.h b/src/pocketpy.h index f5fe5aac..938e92fd 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -542,7 +542,7 @@ inline void init_builtins(VM* _vm) { }); /************ PyBool ************/ - _vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(vm->asBool(args[0]))); + _vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(VAR(vm->asBool(args[0])))); _vm->bind_method<0>("bool", "__repr__", [](VM* vm, Args& args) { bool val = CAST(bool, args[0]); diff --git a/src/vm.h b/src/vm.h index 778583aa..7e26b37a 100644 --- a/src/vm.h +++ b/src/vm.h @@ -93,7 +93,7 @@ public: } Frame* top_frame() const { -#if DEBUG_EXTRA_CHECK +#if DEBUG_MODE if(callstack.empty()) UNREACHABLE(); #endif return callstack.top().get(); @@ -166,14 +166,18 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); - // if(_module == _main) std::cout << disassemble(code) << '\n'; + if(_module == _main) std::cout << disassemble(code) << '\n'; return _exec(code, _module); }catch (const Exception& e){ *_stderr << e.summary() << '\n'; - }catch (const std::exception& e) { + + } +#if !DEBUG_MODE + catch (const std::exception& e) { *_stderr << "An std::exception occurred! It could be a bug.\n"; *_stderr << e.what() << '\n'; } +#endif callstack = {}; return nullptr; } @@ -289,6 +293,7 @@ public: void NameError(StrName name){ _error("NameError", "name " + name.str().escape(true) + " is not defined"); } void AttributeError(PyObject* obj, StrName name){ + // OBJ_NAME calls getattr, which may lead to a infinite recursion _error("AttributeError", "type " + OBJ_NAME(_t(obj)).escape(true) + " has no attribute " + name.str().escape(true)); } @@ -551,74 +556,73 @@ inline PyObject* VM::new_module(StrName name) { } inline Str VM::disassemble(CodeObject_ co){ - return ""; - // auto pad = [](const Str& s, const int n){ - // if(s.size() >= n) return s.substr(0, n); - // return s + std::string(n - s.size(), ' '); - // }; + auto pad = [](const Str& s, const int n){ + if(s.size() >= n) return s.substr(0, n); + return s + std::string(n - s.size(), ' '); + }; - // std::vector jumpTargets; - // for(auto byte : co->codes){ - // if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ - // jumpTargets.push_back(byte.arg); - // } - // } - // StrStream ss; - // ss << std::string(54, '-') << '\n'; - // ss << co->name << ":\n"; - // int prev_line = -1; - // for(int i=0; icodes.size(); i++){ - // const Bytecode& byte = co->codes[i]; - // if(byte.op == OP_NO_OP) continue; - // Str line = std::to_string(byte.line); - // if(byte.line == prev_line) line = ""; - // else{ - // if(prev_line != -1) ss << "\n"; - // prev_line = byte.line; - // } + std::vector jumpTargets; + for(auto byte : co->codes){ + if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ + jumpTargets.push_back(byte.arg); + } + } + StrStream ss; + ss << std::string(54, '-') << '\n'; + ss << co->name << ":\n"; + int prev_line = -1; + for(int i=0; icodes.size(); i++){ + const Bytecode& byte = co->codes[i]; + if(byte.op == OP_NO_OP) continue; + Str line = std::to_string(byte.line); + if(byte.line == prev_line) line = ""; + else{ + if(prev_line != -1) ss << "\n"; + prev_line = byte.line; + } - // std::string pointer; - // if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){ - // pointer = "-> "; - // }else{ - // pointer = " "; - // } - // ss << pad(line, 8) << pointer << pad(std::to_string(i), 3); - // ss << " " << pad(OP_NAMES[byte.op], 20) << " "; - // // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); - // std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); - // if(byte.op == OP_LOAD_CONST){ - // argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; - // } - // if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){ - // argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")"; - // } - // ss << argStr; - // // ss << pad(argStr, 20); // may overflow - // // ss << co->blocks[byte.block].to_string(); - // if(i != co->codes.size() - 1) ss << '\n'; - // } - // StrStream consts; - // consts << "co_consts: "; - // consts << CAST(Str, asRepr(VAR(co->consts))); + std::string pointer; + if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){ + pointer = "-> "; + }else{ + pointer = " "; + } + ss << pad(line, 8) << pointer << pad(std::to_string(i), 3); + ss << " " << pad(OP_NAMES[byte.op], 20) << " "; + // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); + std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); + if(byte.op == OP_LOAD_CONST){ + argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; + } + if(byte.op == OP_LOAD_NAME || byte.op == OP_STORE_LOCAL || byte.op == OP_STORE_GLOBAL){ + argStr += " (" + co->names[byte.arg].str().escape(true) + ")"; + } + ss << argStr; + // ss << pad(argStr, 20); // may overflow + // ss << co->blocks[byte.block].to_string(); + if(i != co->codes.size() - 1) ss << '\n'; + } + StrStream consts; + consts << "co_consts: "; + consts << CAST(Str, asRepr(VAR(co->consts))); - // StrStream names; - // names << "co_names: "; - // List list; - // for(int i=0; inames.size(); i++){ - // list.push_back(VAR(co->names[i].first.str())); - // } - // names << CAST(Str, asRepr(VAR(list))); - // ss << '\n' << consts.str() << '\n' << names.str() << '\n'; + StrStream names; + names << "co_names: "; + List list; + for(int i=0; inames.size(); i++){ + list.push_back(VAR(co->names[i].str())); + } + names << CAST(Str, asRepr(VAR(list))); + ss << '\n' << consts.str() << '\n' << names.str() << '\n'; - // for(int i=0; iconsts.size(); i++){ - // PyObject* obj = co->consts[i]; - // if(is_type(obj, tp_function)){ - // const auto& f = CAST(Function&, obj); - // ss << disassemble(f.code); - // } - // } - // return Str(ss.str()); + for(int i=0; iconsts.size(); i++){ + PyObject* obj = co->consts[i]; + if(is_type(obj, tp_function)){ + const auto& f = CAST(Function&, obj); + ss << disassemble(f.code); + } + } + return Str(ss.str()); } inline void VM::init_builtin_types(){ @@ -877,7 +881,7 @@ inline PyObject* VM::_exec(){ }catch(HandledException& e){ continue; }catch(UnhandledException& e){ - PyObject* obj = frame->pop(); + PyObject* obj = frame->popx(); Exception& _e = CAST(Exception&, obj); _e.st_push(frame->snapshot()); callstack.pop(); From e5ced9d1948ae57754f2b3b530855f2b0c9437a9 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 3 Apr 2023 22:44:12 +0800 Subject: [PATCH 39/73] up --- src/compiler.h | 3 ++- src/vm.h | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 0f8a371d..f7cced24 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -604,7 +604,8 @@ class Compiler { break; default: return false; } - ctx()->emit_expr(); + Expr_ rhs = ctx()->s_expr.popx(); + rhs->emit(ctx()); bool ok = lhs_p->emit_store(ctx()); if(!ok) SyntaxError(); return true; diff --git a/src/vm.h b/src/vm.h index 7e26b37a..a615b5a4 100644 --- a/src/vm.h +++ b/src/vm.h @@ -590,12 +590,25 @@ inline Str VM::disassemble(CodeObject_ co){ ss << pad(line, 8) << pointer << pad(std::to_string(i), 3); ss << " " << pad(OP_NAMES[byte.op], 20) << " "; // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); - std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); - if(byte.op == OP_LOAD_CONST){ - argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; - } - if(byte.op == OP_LOAD_NAME || byte.op == OP_STORE_LOCAL || byte.op == OP_STORE_GLOBAL){ - argStr += " (" + co->names[byte.arg].str().escape(true) + ")"; + Str argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); + switch(byte.op){ + case OP_LOAD_CONST: + argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; + break; + case OP_LOAD_NAME: case OP_STORE_LOCAL: case OP_STORE_GLOBAL: + case OP_LOAD_ATTR: case OP_STORE_ATTR: case OP_DELETE_ATTR: + case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: + argStr += " (" + co->names[byte.arg].str().escape(true) + ")"; + break; + case OP_BINARY_OP: + argStr += " (" + BINARY_SPECIAL_METHODS[byte.arg].str() + ")"; + break; + case OP_COMPARE_OP: + argStr += " (" + COMPARE_SPECIAL_METHODS[byte.arg].str() + ")"; + break; + case OP_BITWISE_OP: + argStr += " (" + BITWISE_SPECIAL_METHODS[byte.arg].str() + ")"; + break; } ss << argStr; // ss << pad(argStr, 20); // may overflow From 1e73f7e1bf352b27177a87c61f5d0abd476b3848 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 4 Apr 2023 21:24:25 +0800 Subject: [PATCH 40/73] up --- src/ceval.h | 694 +++++++++++++++++++++++++------------------------ src/compiler.h | 4 + 2 files changed, 354 insertions(+), 344 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index a8a17795..acfcbc70 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -5,358 +5,364 @@ namespace pkpy{ -inline PyObject* VM::run_frame(Frame* frame){ - while(true){ - /* NOTE: - * Be aware of accidental gc! - * DO NOT leave any strong reference of PyObject* in the C stack - * For example, frame->popx() returns a strong reference which may be dangerous - * `Args` containing strong references is safe if it is passed to `call` or `fast_call` - */ - heap._auto_collect(this); +#define DISPATCH() goto __NEXT_STEP - const Bytecode& byte = frame->next_bytecode(); - switch (byte.op) - { - case OP_NO_OP: continue; - /*****************************************/ - case OP_POP_TOP: frame->pop(); continue; - case OP_DUP_TOP: frame->push(frame->top()); continue; - case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); continue; - case OP_PRINT_EXPR: { - PyObject* obj = frame->top(); // use top() to avoid accidental gc - if(obj != None) *_stdout << CAST(Str, asRepr(obj)) << '\n'; - frame->pop(); - } continue; - /*****************************************/ - case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); continue; - case OP_LOAD_NONE: frame->push(None); continue; - case OP_LOAD_TRUE: frame->push(True); continue; - case OP_LOAD_FALSE: frame->push(False); continue; - case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue; - case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); continue; - case OP_LOAD_FUNCTION: { - PyObject* obj = frame->co->consts[byte.arg]; - Function f = CAST(Function, obj); // copy it! - f._module = frame->_module; // setup module - frame->push(VAR(std::move(f))); - } continue; - /*****************************************/ - case OP_LOAD_NAME: { - StrName name = frame->co->names[byte.arg]; - PyObject* val; - int i = 0; // names[0] is ensured to be non-null - do{ - val = frame->names[i++]->try_get(name); - if(val != nullptr){ frame->push(val); break; } - }while(frame->names[i] != nullptr); - vm->NameError(name); - } continue; - case OP_LOAD_ATTR: { - PyObject* a = frame->top(); - StrName name = frame->co->names[byte.arg]; - frame->top() = getattr(a, name); - } continue; - case OP_LOAD_SUBSCR: { - Args args(2); - args[1] = frame->popx(); // b - args[0] = frame->top(); // a - frame->top() = fast_call(__getitem__, std::move(args)); - } continue; - case OP_STORE_LOCAL: { - StrName name = frame->co->names[byte.arg]; - frame->f_locals().set(name, frame->popx()); - } continue; - case OP_STORE_GLOBAL: { - StrName name = frame->co->names[byte.arg]; - frame->f_globals().set(name, frame->popx()); - } continue; - case OP_STORE_ATTR: { - StrName name = frame->co->names[byte.arg]; - PyObject* a = frame->top(); - PyObject* val = frame->top_1(); - setattr(a, name, val); - frame->pop_n(2); - } continue; - case OP_STORE_SUBSCR: { - Args args(3); - args[1] = frame->popx(); // b - args[0] = frame->popx(); // a - args[2] = frame->popx(); // val - fast_call(__setitem__, std::move(args)); - } continue; - case OP_DELETE_LOCAL: { - StrName name = frame->co->names[byte.arg]; - if(frame->f_locals().contains(name)){ - frame->f_locals().erase(name); - }else{ - NameError(name); +inline PyObject* VM::run_frame(Frame* frame){ +__NEXT_STEP:; + /* NOTE: + * Be aware of accidental gc! + * DO NOT leave any strong reference of PyObject* in the C stack + * For example, frame->popx() returns a strong reference which may be dangerous + * `Args` containing strong references is safe if it is passed to `call` or `fast_call` + */ + heap._auto_collect(this); + + const Bytecode& byte = frame->next_bytecode(); + switch (byte.op) + { + case OP_NO_OP: DISPATCH(); + /*****************************************/ + case OP_POP_TOP: frame->pop(); DISPATCH(); + case OP_DUP_TOP: frame->push(frame->top()); DISPATCH(); + case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); DISPATCH(); + case OP_PRINT_EXPR: { + PyObject* obj = frame->top(); // use top() to avoid accidental gc + if(obj != None) *_stdout << CAST(Str, asRepr(obj)) << '\n'; + frame->pop(); + } DISPATCH(); + /*****************************************/ + case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); DISPATCH(); + case OP_LOAD_NONE: frame->push(None); DISPATCH(); + case OP_LOAD_TRUE: frame->push(True); DISPATCH(); + case OP_LOAD_FALSE: frame->push(False); DISPATCH(); + case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); DISPATCH(); + case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); DISPATCH(); + case OP_LOAD_FUNCTION: { + PyObject* obj = frame->co->consts[byte.arg]; + Function f = CAST(Function, obj); // copy it! + f._module = frame->_module; // setup module + frame->push(VAR(std::move(f))); + } DISPATCH(); + /*****************************************/ + case OP_LOAD_NAME: { + StrName name = frame->co->names[byte.arg]; + PyObject* val; + int i = 0; // names[0] is ensured to be non-null + do{ + val = frame->names[i++]->try_get(name); + if(val != nullptr){ + frame->push(val); + DISPATCH(); } - } continue; - case OP_DELETE_GLOBAL: { - StrName name = frame->co->names[byte.arg]; - if(frame->f_globals().contains(name)){ - frame->f_globals().erase(name); - }else{ - NameError(name); - } - } continue; - case OP_DELETE_ATTR: { - PyObject* a = frame->popx(); - StrName name = frame->co->names[byte.arg]; - if(!a->is_attr_valid()) TypeError("cannot delete attribute"); - if(!a->attr().contains(name)) AttributeError(a, name); - a->attr().erase(name); - } continue; - case OP_DELETE_SUBSCR: { - PyObject* b = frame->popx(); - PyObject* a = frame->popx(); - fast_call(__delitem__, Args{a, b}); - } continue; - /*****************************************/ - case OP_BUILD_LIST: - frame->push(VAR(frame->popx_n_reversed(byte.arg).to_list())); - continue; - case OP_BUILD_DICT: { - PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); - PyObject* obj = call(builtins->attr(m_dict), Args{t}); + }while(frame->names[i] != nullptr); + vm->NameError(name); + } DISPATCH(); + case OP_LOAD_ATTR: { + PyObject* a = frame->top(); + StrName name = frame->co->names[byte.arg]; + frame->top() = getattr(a, name); + } DISPATCH(); + case OP_LOAD_SUBSCR: { + Args args(2); + args[1] = frame->popx(); // b + args[0] = frame->top(); // a + frame->top() = fast_call(__getitem__, std::move(args)); + } DISPATCH(); + case OP_STORE_LOCAL: { + StrName name = frame->co->names[byte.arg]; + frame->f_locals().set(name, frame->popx()); + } DISPATCH(); + case OP_STORE_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + frame->f_globals().set(name, frame->popx()); + } DISPATCH(); + case OP_STORE_ATTR: { + StrName name = frame->co->names[byte.arg]; + PyObject* a = frame->top(); + PyObject* val = frame->top_1(); + setattr(a, name, val); + frame->pop_n(2); + } DISPATCH(); + case OP_STORE_SUBSCR: { + Args args(3); + args[1] = frame->popx(); // b + args[0] = frame->popx(); // a + args[2] = frame->popx(); // val + fast_call(__setitem__, std::move(args)); + } DISPATCH(); + case OP_DELETE_LOCAL: { + StrName name = frame->co->names[byte.arg]; + if(frame->f_locals().contains(name)){ + frame->f_locals().erase(name); + }else{ + NameError(name); + } + } DISPATCH(); + case OP_DELETE_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + if(frame->f_globals().contains(name)){ + frame->f_globals().erase(name); + }else{ + NameError(name); + } + } DISPATCH(); + case OP_DELETE_ATTR: { + PyObject* a = frame->popx(); + StrName name = frame->co->names[byte.arg]; + if(!a->is_attr_valid()) TypeError("cannot delete attribute"); + if(!a->attr().contains(name)) AttributeError(a, name); + a->attr().erase(name); + } DISPATCH(); + case OP_DELETE_SUBSCR: { + PyObject* b = frame->popx(); + PyObject* a = frame->popx(); + fast_call(__delitem__, Args{a, b}); + } DISPATCH(); + /*****************************************/ + case OP_BUILD_LIST: + frame->push(VAR(frame->popx_n_reversed(byte.arg).to_list())); + DISPATCH(); + case OP_BUILD_DICT: { + PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); + PyObject* obj = call(builtins->attr(m_dict), Args{t}); + frame->push(obj); + } DISPATCH(); + case OP_BUILD_SET: { + PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); + PyObject* obj = call(builtins->attr(m_set), Args{t}); + frame->push(obj); + } DISPATCH(); + case OP_BUILD_SLICE: { + PyObject* step = frame->popx(); + PyObject* stop = frame->popx(); + PyObject* start = frame->popx(); + Slice s; + if(start != None) s.start = CAST(int, start); + if(stop != None) s.stop = CAST(int, stop); + if(step != None) s.step = CAST(int, step); + frame->push(VAR(s)); + } DISPATCH(); + case OP_BUILD_TUPLE: { + Tuple items = frame->popx_n_reversed(byte.arg); + frame->push(VAR(std::move(items))); + } DISPATCH(); + case OP_BUILD_STRING: { + // asStr() may run extra bytecode + // so we use top_n_reversed() in order to avoid accidental gc + Args items = frame->top_n_reversed(byte.arg); + StrStream ss; + for(int i=0; ipop_n(byte.arg); + frame->push(VAR(ss.str())); + } DISPATCH(); + /*****************************************/ + case OP_BINARY_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); + } DISPATCH(); + case OP_COMPARE_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(COMPARE_SPECIAL_METHODS[byte.arg], std::move(args)); + } DISPATCH(); + case OP_BITWISE_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); + } DISPATCH(); + case OP_IS_OP: { + PyObject* rhs = frame->popx(); + PyObject* lhs = frame->top(); + bool ret_c = lhs == rhs; + if(byte.arg == 1) ret_c = !ret_c; + frame->top() = VAR(ret_c); + } DISPATCH(); + case OP_CONTAINS_OP: { + Args args(2); + args[0] = frame->popx(); + args[1] = frame->top(); + PyObject* ret = fast_call(__contains__, std::move(args)); + bool ret_c = CAST(bool, ret); + if(byte.arg == 1) ret_c = !ret_c; + frame->top() = VAR(ret_c); + } DISPATCH(); + /*****************************************/ + case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); DISPATCH(); + case OP_POP_JUMP_IF_FALSE: + if(!asBool(frame->popx())) frame->jump_abs(byte.arg); + DISPATCH(); + case OP_JUMP_IF_TRUE_OR_POP: + if(asBool(frame->top()) == true) frame->jump_abs(byte.arg); + else frame->pop(); + DISPATCH(); + case OP_JUMP_IF_FALSE_OR_POP: + if(asBool(frame->top()) == false) frame->jump_abs(byte.arg); + else frame->pop(); + DISPATCH(); + case OP_LOOP_CONTINUE: { + int target = frame->co->blocks[byte.block].start; + frame->jump_abs(target); + } DISPATCH(); + case OP_LOOP_BREAK: { + int target = frame->co->blocks[byte.block].end; + frame->jump_abs_break(target); + } DISPATCH(); + case OP_GOTO: { + StrName label = frame->co->names[byte.arg]; + auto it = frame->co->labels.find(label); + if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); + frame->jump_abs_break(it->second); + } DISPATCH(); + /*****************************************/ + // TODO: examine this later + case OP_CALL: case OP_CALL_UNPACK: { + Args args = frame->popx_n_reversed(byte.arg); + if(byte.op == OP_CALL_UNPACK) unpack_args(args); + PyObject* callable = frame->popx(); + PyObject* ret = call(callable, std::move(args), no_arg(), true); + if(ret == _py_op_call) return ret; + frame->push(std::move(ret)); + } DISPATCH(); + case OP_CALL_KWARGS: case OP_CALL_KWARGS_UNPACK: { + int ARGC = byte.arg & 0xFFFF; + int KWARGC = (byte.arg >> 16) & 0xFFFF; + Args kwargs = frame->popx_n_reversed(KWARGC*2); + Args args = frame->popx_n_reversed(ARGC); + if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); + PyObject* callable = frame->popx(); + PyObject* ret = call(callable, std::move(args), kwargs, true); + if(ret == _py_op_call) return ret; + frame->push(std::move(ret)); + } DISPATCH(); + case OP_RETURN_VALUE: return frame->popx(); + /*****************************************/ + case OP_LIST_APPEND: { + PyObject* obj = frame->popx(); + List& list = CAST(List&, frame->top_1()); + list.push_back(obj); + } DISPATCH(); + case OP_DICT_ADD: { + PyObject* kv = frame->popx(); + // we do copy here to avoid accidental gc in `kv` + // TODO: optimize to avoid copy + call(frame->top_1(), __setitem__, CAST(Tuple, kv)); + } DISPATCH(); + case OP_SET_ADD: { + PyObject* obj = frame->popx(); + call(frame->top_1(), m_add, Args{obj}); + } DISPATCH(); + /*****************************************/ + case OP_UNARY_NEGATIVE: + frame->top() = num_negated(frame->top()); + DISPATCH(); + case OP_UNARY_NOT: + frame->top() = VAR(!asBool(frame->top())); + DISPATCH(); + case OP_UNARY_STAR: + frame->top() = VAR(StarWrapper(frame->top())); + DISPATCH(); + /*****************************************/ + case OP_GET_ITER: + frame->top() = asIter(frame->top()); + DISPATCH(); + case OP_FOR_ITER: { + BaseIter* it = PyIter_AS_C(frame->top()); + PyObject* obj = it->next(); + if(obj != nullptr){ frame->push(obj); - } continue; - case OP_BUILD_SET: { - PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); - PyObject* obj = call(builtins->attr(m_set), Args{t}); - frame->push(obj); - } continue; - case OP_BUILD_SLICE: { - PyObject* step = frame->popx(); - PyObject* stop = frame->popx(); - PyObject* start = frame->popx(); - Slice s; - if(start != None) s.start = CAST(int, start); - if(stop != None) s.stop = CAST(int, stop); - if(step != None) s.step = CAST(int, step); - frame->push(VAR(s)); - } continue; - case OP_BUILD_TUPLE: { - Tuple items = frame->popx_n_reversed(byte.arg); - frame->push(VAR(std::move(items))); - } continue; - case OP_BUILD_STRING: { - // asStr() may run extra bytecode - // so we use top_n_reversed() in order to avoid accidental gc - Args items = frame->top_n_reversed(byte.arg); - StrStream ss; - for(int i=0; ipop_n(byte.arg); - frame->push(VAR(ss.str())); - } continue; - /*****************************************/ - case OP_BINARY_OP: { - Args args(2); - args[1] = frame->popx(); // lhs - args[0] = frame->top(); // rhs - frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_COMPARE_OP: { - Args args(2); - args[1] = frame->popx(); // lhs - args[0] = frame->top(); // rhs - frame->top() = fast_call(COMPARE_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_BITWISE_OP: { - Args args(2); - args[1] = frame->popx(); // lhs - args[0] = frame->top(); // rhs - frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_IS_OP: { - PyObject* rhs = frame->popx(); - PyObject* lhs = frame->top(); - bool ret_c = lhs == rhs; - if(byte.arg == 1) ret_c = !ret_c; - frame->top() = VAR(ret_c); - } continue; - case OP_CONTAINS_OP: { - Args args(2); - args[0] = frame->popx(); - args[1] = frame->top(); - PyObject* ret = fast_call(__contains__, std::move(args)); - bool ret_c = CAST(bool, ret); - if(byte.arg == 1) ret_c = !ret_c; - frame->top() = VAR(ret_c); - } continue; - /*****************************************/ - case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); continue; - case OP_POP_JUMP_IF_FALSE: - if(!asBool(frame->popx())) frame->jump_abs(byte.arg); - continue; - case OP_JUMP_IF_TRUE_OR_POP: - if(asBool(frame->top()) == true) frame->jump_abs(byte.arg); - else frame->pop(); - continue; - case OP_JUMP_IF_FALSE_OR_POP: - if(asBool(frame->top()) == false) frame->jump_abs(byte.arg); - else frame->pop(); - continue; - case OP_LOOP_CONTINUE: { - int target = frame->co->blocks[byte.block].start; - frame->jump_abs(target); - } continue; - case OP_LOOP_BREAK: { + }else{ int target = frame->co->blocks[byte.block].end; frame->jump_abs_break(target); - } continue; - case OP_GOTO: { - StrName label = frame->co->names[byte.arg]; - auto it = frame->co->labels.find(label); - if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); - frame->jump_abs_break(it->second); - } continue; - /*****************************************/ - // TODO: examine this later - case OP_CALL: case OP_CALL_UNPACK: { - Args args = frame->popx_n_reversed(byte.arg); - if(byte.op == OP_CALL_UNPACK) unpack_args(args); - PyObject* callable = frame->popx(); - PyObject* ret = call(callable, std::move(args), no_arg(), true); - if(ret == _py_op_call) return ret; - frame->push(std::move(ret)); - } continue; - case OP_CALL_KWARGS: case OP_CALL_KWARGS_UNPACK: { - int ARGC = byte.arg & 0xFFFF; - int KWARGC = (byte.arg >> 16) & 0xFFFF; - Args kwargs = frame->popx_n_reversed(KWARGC*2); - Args args = frame->popx_n_reversed(ARGC); - if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); - PyObject* callable = frame->popx(); - PyObject* ret = call(callable, std::move(args), kwargs, true); - if(ret == _py_op_call) return ret; - frame->push(std::move(ret)); - } continue; - case OP_RETURN_VALUE: return frame->popx(); - /*****************************************/ - case OP_LIST_APPEND: { - PyObject* obj = frame->popx(); - List& list = CAST(List&, frame->top_1()); - list.push_back(obj); - } continue; - case OP_DICT_ADD: { - PyObject* kv = frame->popx(); - // we do copy here to avoid accidental gc in `kv` - // TODO: optimize to avoid copy - call(frame->top_1(), __setitem__, CAST(Tuple, kv)); - } continue; - case OP_SET_ADD: { - PyObject* obj = frame->popx(); - call(frame->top_1(), m_add, Args{obj}); - } continue; - /*****************************************/ - case OP_UNARY_NEGATIVE: - frame->top() = num_negated(frame->top()); - continue; - case OP_UNARY_NOT: - frame->top() = VAR(!asBool(frame->top())); - continue; - case OP_UNARY_STAR: - frame->top() = VAR(StarWrapper(frame->top())); - continue; - /*****************************************/ - case OP_GET_ITER: - frame->top() = asIter(frame->top()); - continue; - case OP_FOR_ITER: { - BaseIter* it = PyIter_AS_C(frame->top()); - PyObject* obj = it->next(); - if(obj != nullptr){ - frame->push(obj); - }else{ - int target = frame->co->blocks[byte.block].end; - frame->jump_abs_break(target); - } - } continue; - /*****************************************/ - case OP_IMPORT_NAME: { - StrName name = frame->co->names[byte.arg]; - PyObject* ext_mod = _modules.try_get(name); - if(ext_mod == nullptr){ - Str source; - auto it = _lazy_modules.find(name); - if(it == _lazy_modules.end()){ - bool ok = false; - source = _read_file_cwd(name.str() + ".py", &ok); - if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found"); - }else{ - source = it->second; - _lazy_modules.erase(it); - } - CodeObject_ code = compile(source, name.str(), EXEC_MODE); - PyObject* new_mod = new_module(name); - _exec(code, new_mod); - new_mod->attr()._try_perfect_rehash(); - } - frame->push(ext_mod); - } continue; - case OP_IMPORT_STAR: { - PyObject* obj = frame->popx(); - for(auto& [name, value]: obj->attr().items()){ - Str s = name.str(); - if(s.empty() || s[0] == '_') continue; - frame->f_globals().set(name, value); - } - }; continue; - /*****************************************/ - /*****************************************/ - // case OP_SETUP_DECORATOR: continue; - // case OP_SETUP_CLOSURE: { - // Function& f = CAST(Function&, frame->top()); // reference - // f._closure = frame->_locals; - // } continue; - // case OP_BEGIN_CLASS: { - // StrName name = frame->co->names[byte.arg]; - // PyObject* clsBase = frame->popx(); - // if(clsBase == None) clsBase = _t(tp_object); - // check_type(clsBase, tp_type); - // PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, clsBase)); - // frame->push(cls); - // } continue; - // case OP_END_CLASS: { - // PyObject* cls = frame->popx(); - // cls->attr()._try_perfect_rehash(); - // }; continue; - // case OP_STORE_CLASS_ATTR: { - // StrName name = frame->co->names[byte.arg]; - // PyObject* obj = frame->popx(); - // PyObject* cls = frame->top(); - // cls->attr().set(name, obj); - // } continue; - // case OP_ASSERT: { - // PyObject* _msg = frame->pop_value(this); - // Str msg = CAST(Str, asStr(_msg)); - // PyObject* expr = frame->pop_value(this); - // if(asBool(expr) != True) _error("AssertionError", msg); - // } continue; - // case OP_EXCEPTION_MATCH: { - // const auto& e = CAST(Exception&, frame->top()); - // StrName name = frame->co->names[byte.arg].first; - // frame->push(VAR(e.match_type(name))); - // } continue; - // case OP_RAISE: { - // PyObject* obj = frame->pop_value(this); - // Str msg = obj == None ? "" : CAST(Str, asStr(obj)); - // StrName type = frame->co->names[byte.arg].first; - // _error(type, msg); - // } continue; - // case OP_RE_RAISE: _raise(); continue; - // case OP_YIELD_VALUE: return _py_op_yield; - // // TODO: using "goto" inside with block may cause __exit__ not called - // case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); continue; - // case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); continue; - // case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); continue; - // case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); continue; - default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); } + } DISPATCH(); + /*****************************************/ + case OP_IMPORT_NAME: { + StrName name = frame->co->names[byte.arg]; + PyObject* ext_mod = _modules.try_get(name); + if(ext_mod == nullptr){ + Str source; + auto it = _lazy_modules.find(name); + if(it == _lazy_modules.end()){ + bool ok = false; + source = _read_file_cwd(name.str() + ".py", &ok); + if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found"); + }else{ + source = it->second; + _lazy_modules.erase(it); + } + CodeObject_ code = compile(source, name.str(), EXEC_MODE); + PyObject* new_mod = new_module(name); + _exec(code, new_mod); + new_mod->attr()._try_perfect_rehash(); + } + frame->push(ext_mod); + } DISPATCH(); + case OP_IMPORT_STAR: { + PyObject* obj = frame->popx(); + for(auto& [name, value]: obj->attr().items()){ + Str s = name.str(); + if(s.empty() || s[0] == '_') continue; + frame->f_globals().set(name, value); + } + }; DISPATCH(); + /*****************************************/ + /*****************************************/ + // case OP_SETUP_DECORATOR: DISPATCH(); + // case OP_SETUP_CLOSURE: { + // Function& f = CAST(Function&, frame->top()); // reference + // f._closure = frame->_locals; + // } DISPATCH(); + // case OP_BEGIN_CLASS: { + // StrName name = frame->co->names[byte.arg]; + // PyObject* clsBase = frame->popx(); + // if(clsBase == None) clsBase = _t(tp_object); + // check_type(clsBase, tp_type); + // PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, clsBase)); + // frame->push(cls); + // } DISPATCH(); + // case OP_END_CLASS: { + // PyObject* cls = frame->popx(); + // cls->attr()._try_perfect_rehash(); + // }; DISPATCH(); + // case OP_STORE_CLASS_ATTR: { + // StrName name = frame->co->names[byte.arg]; + // PyObject* obj = frame->popx(); + // PyObject* cls = frame->top(); + // cls->attr().set(name, obj); + // } DISPATCH(); + // case OP_ASSERT: { + // PyObject* _msg = frame->pop_value(this); + // Str msg = CAST(Str, asStr(_msg)); + // PyObject* expr = frame->pop_value(this); + // if(asBool(expr) != True) _error("AssertionError", msg); + // } DISPATCH(); + // case OP_EXCEPTION_MATCH: { + // const auto& e = CAST(Exception&, frame->top()); + // StrName name = frame->co->names[byte.arg].first; + // frame->push(VAR(e.match_type(name))); + // } DISPATCH(); + // case OP_RAISE: { + // PyObject* obj = frame->pop_value(this); + // Str msg = obj == None ? "" : CAST(Str, asStr(obj)); + // StrName type = frame->co->names[byte.arg].first; + // _error(type, msg); + // } DISPATCH(); + // case OP_RE_RAISE: _raise(); DISPATCH(); + // case OP_YIELD_VALUE: return _py_op_yield; + // // TODO: using "goto" inside with block may cause __exit__ not called + // case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); DISPATCH(); + // case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); DISPATCH(); + // case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); DISPATCH(); + // case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); DISPATCH(); + default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); } UNREACHABLE(); } +#undef DISPATCH + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index f7cced24..40c70d6f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -587,9 +587,11 @@ class Compiler { bool try_compile_assignment(){ Expr* lhs_p = ctx()->s_expr.top().get(); + bool inplace; switch (curr().type) { case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { + inplace = true; advance(); auto e = make_expr(); e->op = prev().type - 1; // -1 to remove = @@ -599,6 +601,7 @@ class Compiler { ctx()->s_expr.push(std::move(e)); } break; case TK("="): + inplace = false; advance(); EXPR_TUPLE(); break; @@ -608,6 +611,7 @@ class Compiler { rhs->emit(ctx()); bool ok = lhs_p->emit_store(ctx()); if(!ok) SyntaxError(); + if(!inplace) ctx()->s_expr.pop(); return true; } From 3d40a3d51f3770b44c6661f86f3f5b3019923e04 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 4 Apr 2023 22:01:56 +0800 Subject: [PATCH 41/73] up --- src/error.h | 2 +- src/expr.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/error.h b/src/error.h index 34d4874d..561ce40d 100644 --- a/src/error.h +++ b/src/error.h @@ -50,7 +50,7 @@ struct SourceData { this->filename = filename; this->source = ss.str(); - line_starts.push_back(source); + line_starts.push_back(this->source.c_str()); this->mode = mode; } diff --git a/src/expr.h b/src/expr.h index cdb476dd..bad4d233 100644 --- a/src/expr.h +++ b/src/expr.h @@ -280,10 +280,10 @@ struct NegatedExpr: Expr{ LiteralExpr* lit = static_cast(child.get()); PyObject* obj = nullptr; if(std::holds_alternative(lit->value)){ - obj = VAR(std::get(lit->value)); + obj = VAR(-std::get(lit->value)); } if(std::holds_alternative(lit->value)){ - obj = VAR(std::get(lit->value)); + obj = VAR(-std::get(lit->value)); } if(obj != nullptr){ ctx->emit(OP_LOAD_CONST, ctx->add_const(obj), line); From 22e909d7e17f7764c16349ae533b95b7335391fd Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 4 Apr 2023 22:44:40 +0800 Subject: [PATCH 42/73] up --- src/expr.h | 9 ++++++--- src/vm.h | 6 ++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/expr.h b/src/expr.h index bad4d233..4d2354f0 100644 --- a/src/expr.h +++ b/src/expr.h @@ -28,10 +28,10 @@ struct Expr{ }; struct CodeEmitContext{ - CodeObject_ co; VM* vm; + CodeObject_ co; stack s_expr; - CodeEmitContext(VM* vm, CodeObject_ co): co(co) {} + CodeEmitContext(VM* vm, CodeObject_ co): vm(vm), co(co) {} int curr_block_i = 0; bool is_compiling_class = false; @@ -65,7 +65,10 @@ struct CodeEmitContext{ Bytecode{(uint16_t)opcode, (uint16_t)curr_block_i, arg, line} ); int i = co->codes.size() - 1; - if(line==BC_KEEPLINE && i>=1) co->codes[i].line = co->codes[i-1].line; + if(line==BC_KEEPLINE){ + if(i>=1) co->codes[i].line = co->codes[i-1].line; + else co->codes[i].line = 1; + } return i; } diff --git a/src/vm.h b/src/vm.h index a615b5a4..f649d3e4 100644 --- a/src/vm.h +++ b/src/vm.h @@ -568,12 +568,9 @@ inline Str VM::disassemble(CodeObject_ co){ } } StrStream ss; - ss << std::string(54, '-') << '\n'; - ss << co->name << ":\n"; int prev_line = -1; for(int i=0; icodes.size(); i++){ const Bytecode& byte = co->codes[i]; - if(byte.op == OP_NO_OP) continue; Str line = std::to_string(byte.line); if(byte.line == prev_line) line = ""; else{ @@ -626,12 +623,13 @@ inline Str VM::disassemble(CodeObject_ co){ list.push_back(VAR(co->names[i].str())); } names << CAST(Str, asRepr(VAR(list))); - ss << '\n' << consts.str() << '\n' << names.str() << '\n'; + ss << '\n' << consts.str() << '\n' << names.str(); for(int i=0; iconsts.size(); i++){ PyObject* obj = co->consts[i]; if(is_type(obj, tp_function)){ const auto& f = CAST(Function&, obj); + ss << "\n\n" << "Disassembly of " << f.name.str() << ":\n"; ss << disassemble(f.code); } } From a187e5bcdb5cc873892a1024d55ebd41e15084e8 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 4 Apr 2023 22:55:57 +0800 Subject: [PATCH 43/73] up --- src/codeobject.h | 1 + src/obj.h | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index 1795f2c5..c390aaf3 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -60,6 +60,7 @@ struct CodeObject { std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; + std::vector functions; // may be.. just use a large NameDict? uint32_t perfect_locals_capacity = 2; diff --git a/src/obj.h b/src/obj.h index e6f6c180..7d64a098 100644 --- a/src/obj.h +++ b/src/obj.h @@ -24,7 +24,7 @@ struct NativeFunc { PyObject* operator()(VM* vm, Args& args) const; }; -struct Function { +struct FunctionDecl { StrName name; CodeObject_ code; std::vector args; @@ -32,10 +32,6 @@ struct Function { NameDict kwargs; // empty if no k=v std::vector kwargs_order; - // runtime settings - PyObject* _module = nullptr; - NameDict_ _closure = nullptr; - bool has_name(StrName val) const { bool _0 = std::find(args.begin(), args.end(), val) != args.end(); bool _1 = starred_arg == val; @@ -44,6 +40,12 @@ struct Function { } }; +struct Function{ + const FunctionDecl* decl; + PyObject* _module = nullptr; + NameDict_ _closure = nullptr; +}; + struct BoundMethod { PyObject* obj; PyObject* method; From 6452acf3275e9fee12034ee2f2f46802c6e88e33 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 13:57:40 +0800 Subject: [PATCH 44/73] up --- src/ceval.h | 11 +++-------- src/codeobject.h | 6 +++++- src/compiler.h | 13 ++++++------- src/expr.h | 14 ++++++++------ src/gc.h | 2 -- src/obj.h | 4 ++-- src/opcodes.h | 1 - src/pocketpy.h | 2 +- src/vm.h | 32 ++++++++++++++------------------ 9 files changed, 39 insertions(+), 46 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index acfcbc70..5bdefb92 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -38,10 +38,9 @@ __NEXT_STEP:; case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); DISPATCH(); case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); DISPATCH(); case OP_LOAD_FUNCTION: { - PyObject* obj = frame->co->consts[byte.arg]; - Function f = CAST(Function, obj); // copy it! - f._module = frame->_module; // setup module - frame->push(VAR(std::move(f))); + const FunctionDecl* decl = &frame->co->func_decls[byte.arg]; + PyObject* obj = VAR(Function({decl, frame->_module, frame->_locals})); + frame->push(obj); } DISPATCH(); /*****************************************/ case OP_LOAD_NAME: { @@ -312,10 +311,6 @@ __NEXT_STEP:; /*****************************************/ /*****************************************/ // case OP_SETUP_DECORATOR: DISPATCH(); - // case OP_SETUP_CLOSURE: { - // Function& f = CAST(Function&, frame->top()); // reference - // f._closure = frame->_locals; - // } DISPATCH(); // case OP_BEGIN_CLASS: { // StrName name = frame->co->names[byte.arg]; // PyObject* clsBase = frame->popx(); diff --git a/src/codeobject.h b/src/codeobject.h index c390aaf3..ab74e909 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -60,7 +60,7 @@ struct CodeObject { std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; - std::vector functions; + std::vector func_decls; // may be.. just use a large NameDict? uint32_t perfect_locals_capacity = 2; @@ -70,6 +70,10 @@ struct CodeObject { void _mark() const { for(PyObject* v : consts) OBJ_MARK(v); + for(auto& decl: func_decls){ + decl.kwargs._mark(); + decl.code->_mark(); + } } }; diff --git a/src/compiler.h b/src/compiler.h index 40c70d6f..274b63b9 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -170,13 +170,13 @@ class Compiler { // PASS void exprLambda(){ auto e = make_expr(); - e->func.name = ""; + e->decl.name = ""; e->scope = name_scope(); if(!match(TK(":"))){ - _compile_f_args(e->func, false); + _compile_f_args(e->decl, false); consume(TK(":")); } - e->func.code = push_context(lexer->src, ""); + e->decl.code = push_context(lexer->src, ""); EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37 ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); @@ -754,7 +754,7 @@ class Compiler { ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE); } - void _compile_f_args(Function& func, bool enable_type_hints){ + void _compile_f_args(FunctionDecl& func, bool enable_type_hints){ int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs do { if(state == 3) SyntaxError("**kwargs should be the last argument"); @@ -796,7 +796,7 @@ class Compiler { void compile_function(){ // TODO: bug, if there are multiple decorators, will cause error - Function func; + FunctionDecl func; StrName obj_name; consume(TK("@id")); func.name = prev().str(); @@ -816,8 +816,7 @@ class Compiler { func.code = push_context(lexer->src, func.name.str()); compile_block_body(); pop_context(); - ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_const(VAR(func)), prev().line); - if(name_scope() == NAME_LOCAL) ctx()->emit(OP_SETUP_CLOSURE, BC_NOARG, prev().line); + ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(func), prev().line); if(!ctx()->is_compiling_class){ if(obj_name.empty()){ auto e = make_expr(func.name, name_scope()); diff --git a/src/expr.h b/src/expr.h index 4d2354f0..463c2aff 100644 --- a/src/expr.h +++ b/src/expr.h @@ -95,6 +95,11 @@ struct CodeEmitContext{ co->consts.push_back(v); return co->consts.size() - 1; } + + int add_func_decl(FunctionDecl decl){ + co->func_decls.push_back(decl); + return co->func_decls.size() - 1; + } }; // PASS @@ -472,16 +477,13 @@ struct SetCompExpr: CompExpr{ }; struct LambdaExpr: Expr{ - Function func; + FunctionDecl decl; NameScope scope; Str str() const override { return ""; } void emit(CodeEmitContext* ctx) override { - VM* vm = ctx->vm; - ctx->emit(OP_LOAD_FUNCTION, ctx->add_const(VAR(func)), line); - if(scope == NAME_LOCAL){ - ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, BC_KEEPLINE); - } + int index = ctx->add_func_decl(decl); + ctx->emit(OP_LOAD_FUNCTION, index, line); } }; diff --git a/src/gc.h b/src/gc.h index 7ce845e6..89286c5d 100644 --- a/src/gc.h +++ b/src/gc.h @@ -130,8 +130,6 @@ template<> inline void _mark(Tuple& t){ } template<> inline void _mark(Function& t){ - t.code->_mark(); - t.kwargs._mark(); if(t._module != nullptr) OBJ_MARK(t._module); if(t._closure != nullptr) t._closure->_mark(); } diff --git a/src/obj.h b/src/obj.h index 7d64a098..3dd46322 100644 --- a/src/obj.h +++ b/src/obj.h @@ -42,8 +42,8 @@ struct FunctionDecl { struct Function{ const FunctionDecl* decl; - PyObject* _module = nullptr; - NameDict_ _closure = nullptr; + PyObject* _module; + NameDict_ _closure; }; struct BoundMethod { diff --git a/src/opcodes.h b/src/opcodes.h index ffd63177..46a81360 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -16,7 +16,6 @@ OPCODE(TRY_BLOCK_EXIT) OPCODE(YIELD_VALUE) -OPCODE(SETUP_CLOSURE) OPCODE(SETUP_DECORATOR) OPCODE(BEGIN_CLASS) diff --git a/src/pocketpy.h b/src/pocketpy.h index 938e92fd..58b75418 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -627,7 +627,7 @@ inline void add_module_dis(VM* vm){ vm->bind_func<1>(mod, "dis", [](VM* vm, Args& args) { PyObject* f = args[0]; if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, args[0]).method; - CodeObject_ code = CAST(Function, f).code; + CodeObject_ code = CAST(Function&, f).decl->code; (*vm->_stdout) << vm->disassemble(code); return vm->None; }); diff --git a/src/vm.h b/src/vm.h index f649d3e4..f40c1000 100644 --- a/src/vm.h +++ b/src/vm.h @@ -625,13 +625,9 @@ inline Str VM::disassemble(CodeObject_ co){ names << CAST(Str, asRepr(VAR(list))); ss << '\n' << consts.str() << '\n' << names.str(); - for(int i=0; iconsts.size(); i++){ - PyObject* obj = co->consts[i]; - if(is_type(obj, tp_function)){ - const auto& f = CAST(Function&, obj); - ss << "\n\n" << "Disassembly of " << f.name.str() << ":\n"; - ss << disassemble(f.code); - } + for(auto& decl: co->func_decls){ + ss << "\n\n" << "Disassembly of " << decl.name.str() << ":\n"; + ss << disassemble(decl.code); } return Str(ss.str()); } @@ -718,13 +714,13 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo } else if(is_type(callable, tp_function)){ const Function& fn = CAST(Function&, callable); NameDict_ locals = make_sp( - fn.code->perfect_locals_capacity, + fn.decl->code->perfect_locals_capacity, kLocalsLoadFactor, - fn.code->perfect_hash_seed + fn.decl->code->perfect_hash_seed ); int i = 0; - for(StrName name : fn.args){ + for(StrName name : fn.decl->args){ if(i < args.size()){ locals->set(name, args[i++]); continue; @@ -732,14 +728,14 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo TypeError("missing positional argument " + name.str().escape(true)); } - locals->update(fn.kwargs); + locals->update(fn.decl->kwargs); - if(!fn.starred_arg.empty()){ + if(!fn.decl->starred_arg.empty()){ List vargs; // handle *args while(i < args.size()) vargs.push_back(args[i++]); - locals->set(fn.starred_arg, VAR(Tuple(std::move(vargs)))); + locals->set(fn.decl->starred_arg, VAR(Tuple(std::move(vargs)))); }else{ - for(StrName key : fn.kwargs_order){ + for(StrName key : fn.decl->kwargs_order){ if(i < args.size()){ locals->set(key, args[i++]); }else{ @@ -751,14 +747,14 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo for(int i=0; ikwargs.contains(key)){ + TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.decl->name.str() + "()"); } locals->set(key, kwargs[i+1]); } PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; - auto _frame = _new_frame(fn.code, _module, locals, fn._closure); - if(fn.code->is_generator) return PyIter(Generator(this, std::move(_frame))); + auto _frame = _new_frame(fn.decl->code, _module, locals, fn._closure); + if(fn.decl->code->is_generator) return PyIter(Generator(this, std::move(_frame))); callstack.push(std::move(_frame)); if(opCall) return _py_op_call; return _exec(); From 9077da4d6c8c671560cd009628a9dfdc046fde7f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 14:38:14 +0800 Subject: [PATCH 45/73] fix a bug --- src/ceval.h | 4 ++-- src/codeobject.h | 6 +++--- src/compiler.h | 38 ++++++++++++++++++-------------------- src/expr.h | 10 ++++++++-- src/obj.h | 6 ++++-- src/vm.h | 4 ++-- 6 files changed, 37 insertions(+), 31 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 5bdefb92..60557718 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -15,7 +15,7 @@ __NEXT_STEP:; * For example, frame->popx() returns a strong reference which may be dangerous * `Args` containing strong references is safe if it is passed to `call` or `fast_call` */ - heap._auto_collect(this); + //heap._auto_collect(this); const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -38,7 +38,7 @@ __NEXT_STEP:; case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); DISPATCH(); case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); DISPATCH(); case OP_LOAD_FUNCTION: { - const FunctionDecl* decl = &frame->co->func_decls[byte.arg]; + FuncDecl_ decl = frame->co->func_decls[byte.arg]; PyObject* obj = VAR(Function({decl, frame->_module, frame->_locals})); frame->push(obj); } DISPATCH(); diff --git a/src/codeobject.h b/src/codeobject.h index ab74e909..dd2682e6 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -60,7 +60,7 @@ struct CodeObject { std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; - std::vector func_decls; + std::vector func_decls; // may be.. just use a large NameDict? uint32_t perfect_locals_capacity = 2; @@ -71,8 +71,8 @@ struct CodeObject { void _mark() const { for(PyObject* v : consts) OBJ_MARK(v); for(auto& decl: func_decls){ - decl.kwargs._mark(); - decl.code->_mark(); + decl->kwargs._mark(); + decl->code->_mark(); } } }; diff --git a/src/compiler.h b/src/compiler.h index 274b63b9..81a453af 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -169,14 +169,12 @@ class Compiler { // PASS void exprLambda(){ - auto e = make_expr(); - e->decl.name = ""; - e->scope = name_scope(); + auto e = make_expr(name_scope()); if(!match(TK(":"))){ _compile_f_args(e->decl, false); consume(TK(":")); } - e->decl.code = push_context(lexer->src, ""); + e->decl->code = push_context(lexer->src, e->decl->name.str()); EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37 ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); @@ -754,7 +752,7 @@ class Compiler { ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE); } - void _compile_f_args(FunctionDecl& func, bool enable_type_hints){ + void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){ int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs do { if(state == 3) SyntaxError("**kwargs should be the last argument"); @@ -769,7 +767,7 @@ class Compiler { consume(TK("@id")); const Str& name = prev().str(); - if(func.has_name(name)) SyntaxError("duplicate argument name"); + if(decl->has_name(name)) SyntaxError("duplicate argument name"); // eat type hints if(enable_type_hints && match(TK(":"))) consume(TK("@id")); @@ -778,16 +776,16 @@ class Compiler { switch (state) { - case 0: func.args.push_back(name); break; - case 1: func.starred_arg = name; state+=1; break; + case 0: decl->args.push_back(name); break; + case 1: decl->starred_arg = name; state+=1; break; case 2: { consume(TK("=")); PyObject* value = read_literal(); if(value == nullptr){ SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type)); } - func.kwargs.set(name, value); - func.kwargs_order.push_back(name); + decl->kwargs.set(name, value); + decl->kwargs_order.push_back(name); } break; case 3: SyntaxError("**kwargs is not supported yet"); break; } @@ -796,38 +794,38 @@ class Compiler { void compile_function(){ // TODO: bug, if there are multiple decorators, will cause error - FunctionDecl func; + FuncDecl_ decl = make_sp(); StrName obj_name; consume(TK("@id")); - func.name = prev().str(); + decl->name = prev().str(); if(!ctx()->is_compiling_class && match(TK("::"))){ consume(TK("@id")); - obj_name = func.name; - func.name = prev().str(); + obj_name = decl->name; + decl->name = prev().str(); } consume(TK("(")); if (!match(TK(")"))) { - _compile_f_args(func, true); + _compile_f_args(decl, true); consume(TK(")")); } if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = push_context(lexer->src, func.name.str()); + decl->code = push_context(lexer->src, decl->name.str()); compile_block_body(); pop_context(); - ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(func), prev().line); + ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line); if(!ctx()->is_compiling_class){ if(obj_name.empty()){ - auto e = make_expr(func.name, name_scope()); + auto e = make_expr(decl->name, name_scope()); e->emit_store(ctx()); } else { ctx()->emit(OP_LOAD_NAME, ctx()->add_name(obj_name), prev().line); - int index = ctx()->add_name(func.name); + int index = ctx()->add_name(decl->name); ctx()->emit(OP_STORE_ATTR, index, prev().line); } }else{ - ctx()->emit(OP_STORE_CLASS_ATTR, ctx()->add_name(func.name), BC_KEEPLINE); + ctx()->emit(OP_STORE_CLASS_ATTR, ctx()->add_name(decl->name), BC_KEEPLINE); } } diff --git a/src/expr.h b/src/expr.h index 463c2aff..21df0215 100644 --- a/src/expr.h +++ b/src/expr.h @@ -96,7 +96,7 @@ struct CodeEmitContext{ return co->consts.size() - 1; } - int add_func_decl(FunctionDecl decl){ + int add_func_decl(FuncDecl_ decl){ co->func_decls.push_back(decl); return co->func_decls.size() - 1; } @@ -477,9 +477,15 @@ struct SetCompExpr: CompExpr{ }; struct LambdaExpr: Expr{ - FunctionDecl decl; + FuncDecl_ decl; NameScope scope; Str str() const override { return ""; } + + LambdaExpr(NameScope scope){ + this->decl = make_sp(); + this->decl->name = ""; + this->scope = scope; + } void emit(CodeEmitContext* ctx) override { int index = ctx->add_func_decl(decl); diff --git a/src/obj.h b/src/obj.h index 3dd46322..372b69ae 100644 --- a/src/obj.h +++ b/src/obj.h @@ -24,7 +24,7 @@ struct NativeFunc { PyObject* operator()(VM* vm, Args& args) const; }; -struct FunctionDecl { +struct FuncDecl { StrName name; CodeObject_ code; std::vector args; @@ -40,8 +40,10 @@ struct FunctionDecl { } }; +using FuncDecl_ = shared_ptr; + struct Function{ - const FunctionDecl* decl; + FuncDecl_ decl; PyObject* _module; NameDict_ _closure; }; diff --git a/src/vm.h b/src/vm.h index f40c1000..846b05f6 100644 --- a/src/vm.h +++ b/src/vm.h @@ -626,8 +626,8 @@ inline Str VM::disassemble(CodeObject_ co){ ss << '\n' << consts.str() << '\n' << names.str(); for(auto& decl: co->func_decls){ - ss << "\n\n" << "Disassembly of " << decl.name.str() << ":\n"; - ss << disassemble(decl.code); + ss << "\n\n" << "Disassembly of " << decl->name.str() << ":\n"; + ss << disassemble(decl->code); } return Str(ss.str()); } From 66e6a6b559e7ccde13bce461d0a838a64abec6ed Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 16:24:56 +0800 Subject: [PATCH 46/73] up --- src/ceval.h | 14 ++++++++++++++ src/compiler.h | 7 ++++++- src/expr.h | 18 ++++++++++++++---- src/gc.h | 19 +++++++++++++++++++ 4 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 60557718..a6c266c8 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -309,6 +309,20 @@ __NEXT_STEP:; } }; DISPATCH(); /*****************************************/ + case OP_UNPACK_SEQUENCE: { + // asIter or iter->next may run bytecode + // accidential gc may happen + // lock the gc via RAII + auto _lock = heap.gc_scope_lock(); + PyObject* obj = asIter(frame->popx()); + BaseIter* iter = PyIter_AS_C(obj); + for(int i=0; inext(); + if(item == nullptr) ValueError("not enough values to unpack"); + frame->push(item); + } + if(iter->next() != nullptr) ValueError("too many values to unpack"); + }; DISPATCH(); /*****************************************/ // case OP_SETUP_DECORATOR: DISPATCH(); // case OP_BEGIN_CLASS: { diff --git a/src/compiler.h b/src/compiler.h index 81a453af..4f214360 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -40,7 +40,9 @@ class Compiler { } void pop_context(){ - if(!ctx()->s_expr.empty()) UNREACHABLE(); + if(!ctx()->s_expr.empty()){ + throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr()); + } // if the last op does not return, add a default return None if(ctx()->co->codes.empty() || ctx()->co->codes.back().op != OP_RETURN_VALUE){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); @@ -184,6 +186,7 @@ class Compiler { // PASS void exprTuple(){ std::vector items; + items.push_back(ctx()->s_expr.popx()); do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok items.push_back(ctx()->s_expr.popx()); @@ -385,6 +388,7 @@ class Compiler { // PASS void exprSubscr() { auto e = make_expr(); + e->a = ctx()->s_expr.popx(); std::vector items; do { EXPR_TUPLE(); @@ -605,6 +609,7 @@ class Compiler { break; default: return false; } + std::cout << ctx()->_log_s_expr() << std::endl; Expr_ rhs = ctx()->s_expr.popx(); rhs->emit(ctx()); bool ok = lhs_p->emit_store(ctx()); diff --git a/src/expr.h b/src/expr.h index 21df0215..da0bc366 100644 --- a/src/expr.h +++ b/src/expr.h @@ -5,6 +5,7 @@ #include "lexer.h" #include "error.h" #include "ceval.h" +#include "str.h" namespace pkpy{ @@ -55,11 +56,19 @@ struct CodeEmitContext{ // clear the expression stack and generate bytecode void emit_expr(){ - if(s_expr.size() != 1) UNREACHABLE(); + if(s_expr.size() != 1){ + throw std::runtime_error("s_expr.size() != 1\n" + _log_s_expr()); + } Expr_ expr = s_expr.popx(); expr->emit(this); } + Str _log_s_expr(){ + StrStream ss; + for(auto& e: s_expr.data()) ss << e->str() << " "; + return ss.str(); + } + int emit(Opcode opcode, int arg, int line) { co->codes.push_back( Bytecode{(uint16_t)opcode, (uint16_t)curr_block_i, arg, line} @@ -411,8 +420,9 @@ struct TupleExpr: SequenceExpr{ if(starred_i != items.size()-1) return false; ctx->emit(OP_UNPACK_EX, items.size()-1, line); } - for(auto& e: items){ - bool ok = e->emit_store(ctx); + // do reverse emit + for(int i=items.size()-1; i>=0; i--){ + bool ok = items[i]->emit_store(ctx); if(!ok) return false; } return true; @@ -480,7 +490,7 @@ struct LambdaExpr: Expr{ FuncDecl_ decl; NameScope scope; Str str() const override { return ""; } - + LambdaExpr(NameScope scope){ this->decl = make_sp(); this->decl->name = ""; diff --git a/src/gc.h b/src/gc.h index 89286c5d..f0619150 100644 --- a/src/gc.h +++ b/src/gc.h @@ -47,6 +47,23 @@ struct ManagedHeap{ int gc_threshold = kMinGCThreshold; int gc_counter = 0; + /********************/ + int _gc_lock_counter = 0; + struct ScopeLock{ + ManagedHeap* heap; + ScopeLock(ManagedHeap* heap): heap(heap){ + heap->_gc_lock_counter++; + } + ~ScopeLock(){ + heap->_gc_lock_counter--; + } + }; + + ScopeLock gc_scope_lock(){ + return ScopeLock(this); + } + /********************/ + template PyObject* gcnew(Type type, T&& val){ PyObject* obj = new Py_>(type, std::forward(val)); @@ -98,6 +115,7 @@ struct ManagedHeap{ void _delete_hook(VM* vm, PyObject* obj); void _auto_collect(VM* vm){ + if(_gc_lock_counter > 0) return; if(gc_counter < gc_threshold) return; gc_counter = 0; collect(vm); @@ -106,6 +124,7 @@ struct ManagedHeap{ } int collect(VM* vm){ + if(_gc_lock_counter > 0) UNREACHABLE(); mark(vm); int freed = sweep(vm); return freed; From f0069e109a58607c3aaa5778b8e195f34eb28408 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 16:52:30 +0800 Subject: [PATCH 47/73] up --- src/ceval.h | 20 ++++++++++++++------ src/compiler.h | 5 +++++ src/expr.h | 4 ++-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index a6c266c8..e7b91b95 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -309,11 +309,9 @@ __NEXT_STEP:; } }; DISPATCH(); /*****************************************/ - case OP_UNPACK_SEQUENCE: { - // asIter or iter->next may run bytecode - // accidential gc may happen - // lock the gc via RAII - auto _lock = heap.gc_scope_lock(); + case OP_UNPACK_SEQUENCE: case OP_UNPACK_EX: { + // asIter or iter->next may run bytecode, accidential gc may happen + auto _lock = heap.gc_scope_lock(); // lock the gc via RAII!! PyObject* obj = asIter(frame->popx()); BaseIter* iter = PyIter_AS_C(obj); for(int i=0; ipush(item); } - if(iter->next() != nullptr) ValueError("too many values to unpack"); + if(byte.op == OP_UNPACK_EX){ + List extras; + while(true){ + PyObject* item = iter->next(); + if(item == nullptr) break; + extras.push_back(item); + } + frame->push(VAR(extras)); + }else{ + if(iter->next() != nullptr) ValueError("too many values to unpack"); + } }; DISPATCH(); /*****************************************/ // case OP_SETUP_DECORATOR: DISPATCH(); diff --git a/src/compiler.h b/src/compiler.h index 4f214360..9b070200 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -611,6 +611,11 @@ class Compiler { } std::cout << ctx()->_log_s_expr() << std::endl; Expr_ rhs = ctx()->s_expr.popx(); + + if(lhs_p->is_starred() || rhs->is_starred()){ + SyntaxError("can't use starred expression here"); + } + rhs->emit(ctx()); bool ok = lhs_p->emit_store(ctx()); if(!ok) SyntaxError(); diff --git a/src/expr.h b/src/expr.h index da0bc366..bd3cfd92 100644 --- a/src/expr.h +++ b/src/expr.h @@ -153,7 +153,6 @@ struct NameExpr: Expr{ } }; -// *号运算符,作为左值和右值效果不同 struct StarredExpr: Expr{ Expr_ child; StarredExpr(Expr_&& child): child(std::move(child)) {} @@ -411,13 +410,14 @@ struct TupleExpr: SequenceExpr{ } if(starred_i == -1){ - // Unpacks TOS into count individual values, which are put onto the stack right-to-left. ctx->emit(OP_UNPACK_SEQUENCE, items.size(), line); }else{ // starred assignment target must be in a tuple if(items.size() == 1) return false; // starred assignment target must be the last one (differ from CPython) if(starred_i != items.size()-1) return false; + // a,*b = [1,2,3] + // stack is [1,2,3] -> [1,[2,3]] ctx->emit(OP_UNPACK_EX, items.size()-1, line); } // do reverse emit From 14433b1210a3ebd7aa5868c9944ed93f453064bd Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 17:43:34 +0800 Subject: [PATCH 48/73] up --- src/ceval.h | 41 +++++++++++++++++++++++------------------ src/compiler.h | 3 ++- src/opcodes.h | 9 +++++---- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index e7b91b95..2a7a1308 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -319,6 +319,7 @@ __NEXT_STEP:; if(item == nullptr) ValueError("not enough values to unpack"); frame->push(item); } + // handle extra items if(byte.op == OP_UNPACK_EX){ List extras; while(true){ @@ -332,25 +333,29 @@ __NEXT_STEP:; } }; DISPATCH(); /*****************************************/ + case OP_BEGIN_CLASS: { + StrName name = frame->co->names[byte.arg]; + PyObject* super_cls = frame->popx(); + if(super_cls == None) super_cls = _t(tp_object); + check_type(super_cls, tp_type); + PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, super_cls)); + frame->push(cls); + } DISPATCH(); + case OP_END_CLASS: { + PyObject* cls = frame->popx(); + cls->attr()._try_perfect_rehash(); + }; DISPATCH(); + case OP_STORE_CLASS_ATTR: { + StrName name = frame->co->names[byte.arg]; + PyObject* obj = frame->popx(); + PyObject* cls = frame->top(); + cls->attr().set(name, obj); + } DISPATCH(); + /*****************************************/ + /*****************************************/ + /*****************************************/ // case OP_SETUP_DECORATOR: DISPATCH(); - // case OP_BEGIN_CLASS: { - // StrName name = frame->co->names[byte.arg]; - // PyObject* clsBase = frame->popx(); - // if(clsBase == None) clsBase = _t(tp_object); - // check_type(clsBase, tp_type); - // PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, clsBase)); - // frame->push(cls); - // } DISPATCH(); - // case OP_END_CLASS: { - // PyObject* cls = frame->popx(); - // cls->attr()._try_perfect_rehash(); - // }; DISPATCH(); - // case OP_STORE_CLASS_ATTR: { - // StrName name = frame->co->names[byte.arg]; - // PyObject* obj = frame->popx(); - // PyObject* cls = frame->top(); - // cls->attr().set(name, obj); - // } DISPATCH(); + // case OP_ASSERT: { // PyObject* _msg = frame->pop_value(this); // Str msg = CAST(Str, asStr(_msg)); diff --git a/src/compiler.h b/src/compiler.h index 9b070200..c4d2be1f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -835,7 +835,8 @@ class Compiler { ctx()->emit(OP_STORE_ATTR, index, prev().line); } }else{ - ctx()->emit(OP_STORE_CLASS_ATTR, ctx()->add_name(decl->name), BC_KEEPLINE); + int index = ctx()->add_name(decl->name); + ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line); } } diff --git a/src/opcodes.h b/src/opcodes.h index 46a81360..7ba77d2a 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -18,10 +18,6 @@ OPCODE(YIELD_VALUE) OPCODE(SETUP_DECORATOR) -OPCODE(BEGIN_CLASS) -OPCODE(END_CLASS) -OPCODE(STORE_CLASS_ATTR) - /**************************/ OPCODE(NO_OP) /**************************/ @@ -96,4 +92,9 @@ OPCODE(IMPORT_STAR) OPCODE(UNPACK_SEQUENCE) OPCODE(UNPACK_EX) /**************************/ +// TODO: examine this +OPCODE(BEGIN_CLASS) +OPCODE(END_CLASS) +OPCODE(STORE_CLASS_ATTR) +/**************************/ #endif \ No newline at end of file From cf5fadd3b077cb4bbce7416fd919877d786677f8 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 19:25:43 +0800 Subject: [PATCH 49/73] up --- src/ceval.h | 57 +++++++++++++++++++++++++++----------------------- src/compiler.h | 22 ++++++++++++------- src/expr.h | 4 ++++ src/opcodes.h | 30 ++++++++++---------------- 4 files changed, 61 insertions(+), 52 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 2a7a1308..526850e4 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -238,6 +238,7 @@ __NEXT_STEP:; frame->push(std::move(ret)); } DISPATCH(); case OP_RETURN_VALUE: return frame->popx(); + case OP_YIELD_VALUE: return _py_op_yield; /*****************************************/ case OP_LIST_APPEND: { PyObject* obj = frame->popx(); @@ -352,35 +353,39 @@ __NEXT_STEP:; cls->attr().set(name, obj); } DISPATCH(); /*****************************************/ - /*****************************************/ - /*****************************************/ - // case OP_SETUP_DECORATOR: DISPATCH(); - - // case OP_ASSERT: { - // PyObject* _msg = frame->pop_value(this); - // Str msg = CAST(Str, asStr(_msg)); - // PyObject* expr = frame->pop_value(this); - // if(asBool(expr) != True) _error("AssertionError", msg); - // } DISPATCH(); - // case OP_EXCEPTION_MATCH: { - // const auto& e = CAST(Exception&, frame->top()); - // StrName name = frame->co->names[byte.arg].first; - // frame->push(VAR(e.match_type(name))); - // } DISPATCH(); - // case OP_RAISE: { - // PyObject* obj = frame->pop_value(this); - // Str msg = obj == None ? "" : CAST(Str, asStr(obj)); - // StrName type = frame->co->names[byte.arg].first; - // _error(type, msg); - // } DISPATCH(); - // case OP_RE_RAISE: _raise(); DISPATCH(); - // case OP_YIELD_VALUE: return _py_op_yield; // // TODO: using "goto" inside with block may cause __exit__ not called // case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); DISPATCH(); // case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); DISPATCH(); - // case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); DISPATCH(); - // case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); DISPATCH(); - default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); + /*****************************************/ + case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); DISPATCH(); + case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); DISPATCH(); + /*****************************************/ + case OP_ASSERT: { + PyObject* obj = frame->top(); + Str msg; + if(is_type(obj, tp_tuple)){ + auto& t = CAST(Tuple&, obj); + if(t.size() != 2) ValueError("assert tuple must have 2 elements"); + obj = t[0]; + msg = CAST(Str&, asStr(t[1])); + } + bool ok = asBool(obj); + frame->pop(); + if(!ok) _error("AssertionError", msg); + } DISPATCH(); + case OP_EXCEPTION_MATCH: { + const auto& e = CAST(Exception&, frame->top()); + StrName name = frame->co->names[byte.arg]; + frame->push(VAR(e.match_type(name))); + } DISPATCH(); + case OP_RAISE: { + PyObject* obj = frame->popx(); + Str msg = obj == None ? "" : CAST(Str, asStr(obj)); + StrName type = frame->co->names[byte.arg]; + _error(type, msg); + } DISPATCH(); + case OP_RE_RAISE: _raise(); DISPATCH(); + default: throw std::runtime_error(OP_NAMES[byte.op] + std::string(" is not implemented")); } UNREACHABLE(); } diff --git a/src/compiler.h b/src/compiler.h index c4d2be1f..1215d4f2 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -578,13 +578,14 @@ class Compiler { } void compile_decorated(){ - EXPR(false); - // TODO: support multiple decorator - // use a while loop to consume '@' - if(!match_newlines_repl()) SyntaxError(); - ctx()->emit(OP_SETUP_DECORATOR, BC_NOARG, prev().line); + std::vector decorators; + do{ + EXPR(); + decorators.push_back(ctx()->s_expr.popx()); + if(!match_newlines_repl()) SyntaxError(); + }while(match(TK("@"))); consume(TK("def")); - compile_function(); + compile_function(decorators); } bool try_compile_assignment(){ @@ -593,6 +594,7 @@ class Compiler { switch (curr().type) { case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { + if(ctx()->is_compiling_class) SyntaxError(); inplace = true; advance(); auto e = make_expr(); @@ -802,7 +804,7 @@ class Compiler { } while (match(TK(","))); } - void compile_function(){ + void compile_function(const std::vector& decorators={}){ // TODO: bug, if there are multiple decorators, will cause error FuncDecl_ decl = make_sp(); StrName obj_name; @@ -825,6 +827,12 @@ class Compiler { compile_block_body(); pop_context(); ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line); + // add decorators + for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){ + (*it)->emit(ctx()); + ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line); + ctx()->emit(OP_CALL, 1, (*it)->line); + } if(!ctx()->is_compiling_class){ if(obj_name.empty()){ auto e = make_expr(decl->name, name_scope()); diff --git a/src/expr.h b/src/expr.h index bd3cfd92..da2e25ca 100644 --- a/src/expr.h +++ b/src/expr.h @@ -140,6 +140,10 @@ struct NameExpr: Expr{ bool emit_store(CodeEmitContext* ctx) override { int index = ctx->add_name(name); + if(ctx->is_compiling_class){ + ctx->emit(OP_STORE_CLASS_ATTR, index, line); + return true; + } switch(scope){ case NAME_LOCAL: ctx->emit(OP_STORE_LOCAL, index, line); diff --git a/src/opcodes.h b/src/opcodes.h index 7ba77d2a..de44b343 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -1,23 +1,5 @@ #ifdef OPCODE -/**************************/ -OPCODE(WITH_ENTER) -OPCODE(WITH_EXIT) - -OPCODE(ASSERT) -OPCODE(EXCEPTION_MATCH) -OPCODE(RAISE) -OPCODE(RE_RAISE) - -OPCODE(STORE_FUNCTION) - -OPCODE(TRY_BLOCK_ENTER) -OPCODE(TRY_BLOCK_EXIT) - -OPCODE(YIELD_VALUE) - -OPCODE(SETUP_DECORATOR) - /**************************/ OPCODE(NO_OP) /**************************/ @@ -74,6 +56,7 @@ OPCODE(CALL_UNPACK) OPCODE(CALL_KWARGS) OPCODE(CALL_KWARGS_UNPACK) OPCODE(RETURN_VALUE) +OPCODE(YIELD_VALUE) /**************************/ OPCODE(LIST_APPEND) OPCODE(DICT_ADD) @@ -92,9 +75,18 @@ OPCODE(IMPORT_STAR) OPCODE(UNPACK_SEQUENCE) OPCODE(UNPACK_EX) /**************************/ -// TODO: examine this OPCODE(BEGIN_CLASS) OPCODE(END_CLASS) OPCODE(STORE_CLASS_ATTR) /**************************/ +OPCODE(WITH_ENTER) +OPCODE(WITH_EXIT) +/**************************/ +OPCODE(TRY_BLOCK_ENTER) +OPCODE(TRY_BLOCK_EXIT) +OPCODE(ASSERT) +OPCODE(EXCEPTION_MATCH) +OPCODE(RAISE) +OPCODE(RE_RAISE) +/**************************/ #endif \ No newline at end of file From 3cafab90299c3517da4d9e4919e2624a48b61417 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 19:28:20 +0800 Subject: [PATCH 50/73] Update compiler.h --- src/compiler.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 1215d4f2..530e234e 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -641,7 +641,7 @@ class Compiler { break; case TK("yield"): if (contexts.size() <= 1) SyntaxError("'yield' outside function"); - EXPR_TUPLE(true); + EXPR_TUPLE(false); // if yield present, mark the function as generator ctx()->co->is_generator = true; ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); @@ -652,7 +652,7 @@ class Compiler { if(match_end_stmt()){ ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); }else{ - EXPR_TUPLE(true); + EXPR_TUPLE(false); consume_end_stmt(); } ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line); @@ -669,7 +669,7 @@ class Compiler { case TK("pass"): consume_end_stmt(); break; /*************************************************/ case TK("assert"): - EXPR_TUPLE(true); + EXPR_TUPLE(false); // TODO: change OP_ASSERT impl in ceval.h ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); consume_end_stmt(); From baf7ad6b502adbc0e92ca38203cea87f3a083224 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 22:19:59 +0800 Subject: [PATCH 51/73] up --- src/ceval.h | 5 +- src/codeobject.h | 2 +- src/common.h | 11 ++-- src/compiler.h | 161 +++++++++++++++++++++++++++++++---------------- src/expr.h | 4 +- src/frame.h | 49 +++++++-------- src/pocketpy.h | 18 +++--- src/vm.h | 19 +++--- 8 files changed, 168 insertions(+), 101 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 526850e4..fd164d02 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -18,6 +18,9 @@ __NEXT_STEP:; //heap._auto_collect(this); const Bytecode& byte = frame->next_bytecode(); + + // std::cout << frame->stack_info() << " " << OP_NAMES[byte.op] << std::endl; + switch (byte.op) { case OP_NO_OP: DISPATCH(); @@ -296,7 +299,7 @@ __NEXT_STEP:; } CodeObject_ code = compile(source, name.str(), EXEC_MODE); PyObject* new_mod = new_module(name); - _exec(code, new_mod); + _exec(code, new_mod, builtins); new_mod->attr()._try_perfect_rehash(); } frame->push(ext_mod); diff --git a/src/codeobject.h b/src/codeobject.h index dd2682e6..264f2db9 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -57,7 +57,7 @@ struct CodeObject { std::vector codes; List consts; std::vector names; - std::set global_names; + std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; std::vector func_decls; diff --git a/src/common.h b/src/common.h index 88875867..c337e398 100644 --- a/src/common.h +++ b/src/common.h @@ -28,16 +28,19 @@ #include #include -#define PK_VERSION "0.9.6" +#define PK_VERSION "0.9.7" // debug macros -#define DEBUG_NO_BUILTIN_MODULES 1 -#define DEBUG_MODE 1 +#define DEBUG_NO_BUILTIN_MODULES 0 +#define DEBUG_EXTRA_CHECK 1 +#define DEBUG_DIS_REPL 0 +#define DEBUG_DIS_REPL_MIN 1 +#define DEBUG_FULL_EXCEPTION 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 #else -#define PK_ENABLE_FILEIO 1 +#define PK_ENABLE_FILEIO (1-DEBUG_NO_BUILTIN_MODULES) #endif #if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__) diff --git a/src/compiler.h b/src/compiler.h index 530e234e..725327ba 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -1,5 +1,7 @@ #pragma once +#include "codeobject.h" +#include "common.h" #include "expr.h" namespace pkpy{ @@ -152,6 +154,17 @@ class Compiler { parse_expression(PREC_TUPLE, push_stack); } + // special case for `for loop` and `comp` + Expr_ EXPR_VARS(){ + std::vector items; + do { + consume(TK("@id")); + items.push_back(make_expr(prev().str(), name_scope())); + } while(match(TK(","))); + if(items.size()==1) return std::move(items[0]); + return make_expr(std::move(items)); + } + template std::unique_ptr make_expr(Args&&... args) { std::unique_ptr expr = std::make_unique(std::forward(args)...); @@ -269,10 +282,9 @@ class Compiler { template void _consume_comp(Expr_ expr){ static_assert(std::is_base_of::value); - std::unique_ptr ce = std::make_unique(); + std::unique_ptr ce = make_expr(); ce->expr = std::move(expr); - EXPR_TUPLE(); // must be a lvalue - ce->vars = ctx()->s_expr.popx(); + ce->vars = EXPR_VARS(); consume(TK("in")); EXPR(); ce->iter = ctx()->s_expr.popx(); @@ -374,7 +386,12 @@ class Compiler { // PASS void exprName(){ - ctx()->s_expr.push(make_expr(prev().str(), name_scope())); + Str name = prev().str(); + NameScope scope = name_scope(); + if(ctx()->co->global_names.count(name)){ + scope = NAME_GLOBAL; + } + ctx()->s_expr.push(make_expr(name, scope)); } // PASS @@ -389,26 +406,65 @@ class Compiler { void exprSubscr() { auto e = make_expr(); e->a = ctx()->s_expr.popx(); - std::vector items; - do { - EXPR_TUPLE(); - items.push_back(ctx()->s_expr.popx()); - } while(match(TK(":"))); - consume(TK("]")); - switch(items.size()){ - case 1: - e->b = std::move(items[0]); - break; - case 2: case 3: { - auto slice = make_expr(); - slice->start = std::move(items[0]); - slice->stop = std::move(items[1]); - if(items.size()==3){ - slice->step = std::move(items[2]); - } - e->b = std::move(slice); - } break; - default: SyntaxError(); break; + auto slice = make_expr(); + bool is_slice = false; + // a[<0> : state<3> : state<5>] + int state = 0; + do{ + switch(state){ + case 0: + if(match(TK(":"))){ + is_slice=true; + state=2; + break; + } + if(match(TK("]"))) SyntaxError(); + EXPR_TUPLE(); + slice->start = ctx()->s_expr.popx(); + state=1; + break; + case 1: + if(match(TK(":"))){ + is_slice=true; + state=2; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + SyntaxError("expected ':' or ']'"); + break; + case 2: + if(match(TK(":"))){ + state=4; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + EXPR_TUPLE(); + slice->stop = ctx()->s_expr.popx(); + state=3; + break; + case 3: + if(match(TK(":"))){ + state=4; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + SyntaxError("expected ':' or ']'"); + break; + case 4: + if(match(TK("]"))) goto __SUBSCR_END; + EXPR_TUPLE(); + slice->step = ctx()->s_expr.popx(); + state=5; + break; + case 5: consume(TK("]")); goto __SUBSCR_END; + } + }while(true); +__SUBSCR_END: + if(is_slice){ + e->b = std::move(slice); + }else{ + if(state != 1) UNREACHABLE(); + e->b = std::move(slice->start); } ctx()->s_expr.push(std::move(e)); } @@ -535,8 +591,7 @@ class Compiler { // PASS void compile_for_loop() { - EXPR_TUPLE(); - Expr_ vars = ctx()->s_expr.popx(); + Expr_ vars = EXPR_VARS(); consume(TK("in")); EXPR(false); ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); @@ -550,31 +605,32 @@ class Compiler { } void compile_try_except() { - // ctx()->enter_block(TRY_EXCEPT); - // ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line); - // compile_block_body(); - // ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE); - // std::vector patches = { - // ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) - // }; - // ctx()->exit_block(); - - // do { - // consume(TK("except")); - // if(match(TK("@id"))){ - // int name_idx = ctx()->add_name(prev().str(), NAME_SPECIAL); - // emit(OP_EXCEPTION_MATCH, name_idx); - // }else{ - // emit(OP_LOAD_TRUE); - // } - // int patch = emit(OP_POP_JUMP_IF_FALSE); - // emit(OP_POP_TOP); // pop the exception on match - // compile_block_body(); - // patches.push_back(emit(OP_JUMP_ABSOLUTE)); - // patch_jump(patch); - // }while(curr().type == TK("except")); - // emit(OP_RE_RAISE); // no match, re-raise - // for (int patch : patches) patch_jump(patch); + ctx()->enter_block(TRY_EXCEPT); + ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line); + compile_block_body(); + ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE); + std::vector patches = { + ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) + }; + ctx()->exit_block(); + do { + consume(TK("except")); + if(match(TK("@id"))){ + int namei = ctx()->add_name(prev().str()); + ctx()->emit(OP_EXCEPTION_MATCH, namei, prev().line); + }else{ + ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE); + } + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + // pop the exception on match + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); + compile_block_body(); + patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)); + ctx()->patch_jump(patch); + }while(curr().type == TK("except")); + // no match, re-raise + ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE); + for (int patch : patches) ctx()->patch_jump(patch); } void compile_decorated(){ @@ -611,7 +667,7 @@ class Compiler { break; default: return false; } - std::cout << ctx()->_log_s_expr() << std::endl; + // std::cout << ctx()->_log_s_expr() << std::endl; Expr_ rhs = ctx()->s_expr.popx(); if(lhs_p->is_starred() || rhs->is_starred()){ @@ -785,7 +841,6 @@ class Compiler { if(enable_type_hints && match(TK(":"))) consume(TK("@id")); if(state == 0 && curr().type == TK("=")) state = 2; - switch (state) { case 0: decl->args.push_back(name); break; diff --git a/src/expr.h b/src/expr.h index da2e25ca..ec00dad3 100644 --- a/src/expr.h +++ b/src/expr.h @@ -357,8 +357,8 @@ struct DictItemExpr: Expr{ std::vector children() const override { return {key.get(), value.get()}; } void emit(CodeEmitContext* ctx) override { - key->emit(ctx); value->emit(ctx); + key->emit(ctx); // reverse order ctx->emit(OP_BUILD_TUPLE, 2, line); } }; @@ -462,9 +462,11 @@ struct CompExpr: Expr{ if(cond){ cond->emit(ctx); int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + expr->emit(ctx); ctx->emit(op1(), BC_NOARG, BC_KEEPLINE); ctx->patch_jump(patch); }else{ + expr->emit(ctx); ctx->emit(op1(), BC_NOARG, BC_KEEPLINE); } ctx->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); diff --git a/src/frame.h b/src/frame.h index 265255e6..23d85b60 100644 --- a/src/frame.h +++ b/src/frame.h @@ -14,6 +14,7 @@ struct Frame { const CodeObject* co; PyObject* _module; NameDict_ _locals; + NameDict_ _closure; const uint64_t id; std::vector>> s_try_block; const NameDict* names[5]; // name resolution array, zero terminated @@ -21,14 +22,16 @@ struct Frame { NameDict& f_locals() noexcept { return *_locals; } NameDict& f_globals() noexcept { return _module->attr(); } - Frame(const CodeObject_& co, PyObject* _module, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr) - : co(co.get()), _module(_module), _locals(_locals), id(kFrameGlobalId++) { + Frame(const CodeObject_& co, PyObject* _module, PyObject* builtins, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr) + : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { memset(names, 0, sizeof(names)); int i = 0; if(_locals != nullptr) names[i++] = _locals.get(); if(_closure != nullptr) names[i++] = _closure.get(); - names[i++] = &_module->attr(); - // names[i++] = builtins + names[i++] = &_module->attr(); // borrowed reference + if(builtins != nullptr){ + names[i++] = &builtins->attr(); // borrowed reference + } } const Bytecode& next_bytecode() { @@ -41,26 +44,26 @@ struct Frame { return co->src->snapshot(line); } - // Str stack_info(){ - // StrStream ss; - // ss << "["; - // for(int i=0; i<_data.size(); i++){ - // ss << OBJ_TP_NAME(_data[i]); - // if(i != _data.size()-1) ss << ", "; - // } - // ss << "]"; - // return ss.str(); - // } + Str stack_info(){ + StrStream ss; + ss << "["; + for(int i=0; i<_data.size(); i++){ + ss << (i64)_data[i]; + if(i != _data.size()-1) ss << ", "; + } + ss << "]"; + return ss.str(); + } void pop(){ -#if DEBUG_MODE +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif _data.pop_back(); } PyObject* popx(){ -#if DEBUG_MODE +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif PyObject* ret = _data.back(); @@ -69,21 +72,21 @@ struct Frame { } PyObject*& top(){ -#if DEBUG_MODE +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } PyObject*& top_1(){ -#if DEBUG_MODE +#if DEBUG_EXTRA_CHECK if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif return _data[_data.size()-2]; } PyObject*& top_2(){ -#if DEBUG_MODE +#if DEBUG_EXTRA_CHECK if(_data.size() < 3) throw std::runtime_error("_data.size() < 3"); #endif return _data[_data.size()-3]; @@ -152,12 +155,8 @@ struct Frame { void _mark() const { for(PyObject* obj : _data) OBJ_MARK(obj); OBJ_MARK(_module); - - int i = 0; // names[0] is ensured to be non-null - do{ - names[i++]->_mark(); - }while(names[i] != nullptr); - + _locals->_mark(); + _closure->_mark(); for(auto& p : s_try_block){ for(PyObject* obj : p.second) OBJ_MARK(obj); } diff --git a/src/pocketpy.h b/src/pocketpy.h index 58b75418..7ef361ed 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -15,7 +15,9 @@ inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { try{ return compiler.compile(); }catch(Exception& e){ - // std::cout << e.summary() << std::endl; +#if DEBUG_FULL_EXCEPTION + std::cerr << e.summary() << std::endl; +#endif _error(e); return nullptr; } @@ -93,12 +95,12 @@ inline void init_builtins(VM* _vm) { _vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) { CodeObject_ code = vm->compile(CAST(Str&, args[0]), "", EVAL_MODE); - return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); + return vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); }); _vm->bind_builtin_func<1>("exec", [](VM* vm, Args& args) { CodeObject_ code = vm->compile(CAST(Str&, args[0]), "", EXEC_MODE); - vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); + vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); return vm->None; }); @@ -597,7 +599,7 @@ inline void add_module_json(VM* vm){ vm->bind_func<1>(mod, "loads", [](VM* vm, Args& args) { const Str& expr = CAST(Str&, args[0]); CodeObject_ code = vm->compile(expr, "", JSON_MODE); - return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); + return vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); }); vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__, no_arg()))); @@ -750,7 +752,7 @@ inline void add_module_random(VM* vm){ PyObject* mod = vm->new_module("random"); Random::register_class(vm, mod); CodeObject_ code = vm->compile(kPythonLibs["random"], "random.py", EXEC_MODE); - vm->_exec(code, mod); + vm->_exec(code, mod, vm->builtins); } inline void add_module_gc(VM* vm){ @@ -778,11 +780,11 @@ inline void VM::post_init(){ } CodeObject_ code = compile(kPythonLibs["builtins"], "", EXEC_MODE); - this->_exec(code, this->builtins); + this->_exec(code, this->builtins, nullptr); code = compile(kPythonLibs["_dict"], "", EXEC_MODE); - this->_exec(code, this->builtins); + this->_exec(code, this->builtins, nullptr); code = compile(kPythonLibs["_set"], "", EXEC_MODE); - this->_exec(code, this->builtins); + this->_exec(code, this->builtins, nullptr); // property is defined in builtins.py so we need to add it after builtins is loaded _t(tp_object)->attr().set(__class__, property(CPP_LAMBDA(vm->_t(args[0])))); diff --git a/src/vm.h b/src/vm.h index 846b05f6..6197e70b 100644 --- a/src/vm.h +++ b/src/vm.h @@ -93,7 +93,7 @@ public: } Frame* top_frame() const { -#if DEBUG_MODE +#if DEBUG_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); #endif return callstack.top().get(); @@ -166,13 +166,15 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); +#if DEBUG_DIS_REPL if(_module == _main) std::cout << disassemble(code) << '\n'; - return _exec(code, _module); +#endif + return _exec(code, _module, builtins); }catch (const Exception& e){ *_stderr << e.summary() << '\n'; } -#if !DEBUG_MODE +#if !DEBUG_FULL_EXCEPTION catch (const std::exception& e) { *_stderr << "An std::exception occurred! It could be a bug.\n"; *_stderr << e.what() << '\n'; @@ -607,11 +609,12 @@ inline Str VM::disassemble(CodeObject_ co){ argStr += " (" + BITWISE_SPECIAL_METHODS[byte.arg].str() + ")"; break; } - ss << argStr; - // ss << pad(argStr, 20); // may overflow - // ss << co->blocks[byte.block].to_string(); + ss << pad(argStr, 40); // may overflow + ss << co->blocks[byte.block].type; if(i != co->codes.size() - 1) ss << '\n'; } + +#if !DEBUG_DIS_REPL_MIN StrStream consts; consts << "co_consts: "; consts << CAST(Str, asRepr(VAR(co->consts))); @@ -624,7 +627,7 @@ inline Str VM::disassemble(CodeObject_ co){ } names << CAST(Str, asRepr(VAR(list))); ss << '\n' << consts.str() << '\n' << names.str(); - +#endif for(auto& decl: co->func_decls){ ss << "\n\n" << "Disassembly of " << decl->name.str() << ":\n"; ss << disassemble(decl->code); @@ -753,7 +756,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo locals->set(key, kwargs[i+1]); } PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; - auto _frame = _new_frame(fn.decl->code, _module, locals, fn._closure); + auto _frame = _new_frame(fn.decl->code, _module, builtins, locals, fn._closure); if(fn.decl->code->is_generator) return PyIter(Generator(this, std::move(_frame))); callstack.push(std::move(_frame)); if(opCall) return _py_op_call; From 818ee2981e19b339c033180790aa188109298159 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 5 Apr 2023 22:43:43 +0800 Subject: [PATCH 52/73] up --- scripts/run_tests.py | 6 +++++- src/ceval.h | 7 ++----- src/common.h | 2 +- src/compiler.h | 2 +- src/frame.h | 13 ++++--------- tests/25_rawstring.py | 2 ++ 6 files changed, 15 insertions(+), 17 deletions(-) diff --git a/scripts/run_tests.py b/scripts/run_tests.py index 8641d221..f444f235 100644 --- a/scripts/run_tests.py +++ b/scripts/run_tests.py @@ -27,7 +27,11 @@ def test_dir(path): print(f' cpython: {_1 - _0:.6f}s (100%)') print(f' pocketpy: {_2 - _1:.6f}s ({(_2 - _1) / (_1 - _0) * 100:.2f}%)') else: - if not test_file(filepath): exit(1) + if not test_file(filepath): + print('-' * 50) + print("TEST FAILED! Press any key to continue...") + input() + if len(sys.argv) == 2: assert 'benchmark' in sys.argv[1] diff --git a/src/ceval.h b/src/ceval.h index fd164d02..0805683b 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -149,11 +149,8 @@ __NEXT_STEP:; frame->push(VAR(std::move(items))); } DISPATCH(); case OP_BUILD_STRING: { - // asStr() may run extra bytecode - // so we use top_n_reversed() in order to avoid accidental gc - Args items = frame->top_n_reversed(byte.arg); - StrStream ss; - for(int i=0; i=0; i--) ss << CAST(Str&, asStr(frame->top_n(i))); frame->pop_n(byte.arg); frame->push(VAR(ss.str())); } DISPATCH(); diff --git a/src/common.h b/src/common.h index c337e398..205c73a3 100644 --- a/src/common.h +++ b/src/common.h @@ -35,7 +35,7 @@ #define DEBUG_EXTRA_CHECK 1 #define DEBUG_DIS_REPL 0 #define DEBUG_DIS_REPL_MIN 1 -#define DEBUG_FULL_EXCEPTION 1 +#define DEBUG_FULL_EXCEPTION 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/compiler.h b/src/compiler.h index 725327ba..5757c458 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -952,7 +952,7 @@ public: match_newlines(); // skip possible leading '\n' if(mode()==EVAL_MODE) { - EXPR_TUPLE(); + EXPR_TUPLE(false); consume(TK("@eof")); ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); diff --git a/src/frame.h b/src/frame.h index 23d85b60..2b193a07 100644 --- a/src/frame.h +++ b/src/frame.h @@ -85,11 +85,12 @@ struct Frame { return _data[_data.size()-2]; } - PyObject*& top_2(){ + PyObject*& top_n(int n){ + n += 1; #if DEBUG_EXTRA_CHECK - if(_data.size() < 3) throw std::runtime_error("_data.size() < 3"); + if(_data.size() < n) throw std::runtime_error("_data.size() < n"); #endif - return _data[_data.size()-3]; + return _data[_data.size()-n]; } template @@ -142,12 +143,6 @@ struct Frame { return v; } - Args top_n_reversed(int n){ - Args v(n); - for(int i=0; i->{s}<-<- {123} ''' From 7ce783360f648e3697998cc012fd68165b266cf0 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Thu, 6 Apr 2023 04:35:09 +0000 Subject: [PATCH 53/73] up --- src/ceval.h | 13 +++++++++---- src/common.h | 10 ++++++---- src/compiler.h | 21 ++++++++++----------- src/expr.h | 12 ++++++++++++ src/frame.h | 10 +++++++--- src/pocketpy.h | 7 +++---- src/vm.h | 5 +++-- tests/70_random.py | 24 ++++++++++++------------ 8 files changed, 62 insertions(+), 40 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 0805683b..d50cf46b 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -15,11 +15,14 @@ __NEXT_STEP:; * For example, frame->popx() returns a strong reference which may be dangerous * `Args` containing strong references is safe if it is passed to `call` or `fast_call` */ - //heap._auto_collect(this); +#if !DEBUG_NO_GC + heap._auto_collect(this); +#endif const Bytecode& byte = frame->next_bytecode(); - - // std::cout << frame->stack_info() << " " << OP_NAMES[byte.op] << std::endl; +#if DEBUG_CEVAL_STEP + std::cout << frame->stack_info() << " " << OP_NAMES[byte.op] << std::endl; +#endif switch (byte.op) { @@ -298,8 +301,10 @@ __NEXT_STEP:; PyObject* new_mod = new_module(name); _exec(code, new_mod, builtins); new_mod->attr()._try_perfect_rehash(); + frame->push(new_mod); + }else{ + frame->push(ext_mod); } - frame->push(ext_mod); } DISPATCH(); case OP_IMPORT_STAR: { PyObject* obj = frame->popx(); diff --git a/src/common.h b/src/common.h index 205c73a3..45caad03 100644 --- a/src/common.h +++ b/src/common.h @@ -32,15 +32,17 @@ // debug macros #define DEBUG_NO_BUILTIN_MODULES 0 -#define DEBUG_EXTRA_CHECK 1 -#define DEBUG_DIS_REPL 0 -#define DEBUG_DIS_REPL_MIN 1 +#define DEBUG_EXTRA_CHECK 0 +#define DEBUG_DIS_EXEC_REPL 0 +#define DEBUG_DIS_EXEC_REPL_MIN 1 +#define DEBUG_CEVAL_STEP 0 #define DEBUG_FULL_EXCEPTION 0 +#define DEBUG_NO_GC 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 #else -#define PK_ENABLE_FILEIO (1-DEBUG_NO_BUILTIN_MODULES) +#define PK_ENABLE_FILEIO 0 // TODO: refactor this #endif #if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__) diff --git a/src/compiler.h b/src/compiler.h index 5757c458..e18db025 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -757,15 +757,15 @@ __SUBSCR_END: } break; case TK("with"): { // TODO: reimpl this - UNREACHABLE(); - // EXPR(false); - // consume(TK("as")); - // consume(TK("@id")); - // int index = ctx()->add_name(prev().str(), name_scope()); + EXPR(false); + ctx()->emit(OP_POP_TOP, BC_NOARG, prev().line); + consume(TK("as")); + consume(TK("@id")); + // int index = ctx()->add_name(prev().str()); // emit(OP_STORE_NAME, index); // emit(OP_LOAD_NAME_REF, index); // emit(OP_WITH_ENTER); - // compile_block_body(); + compile_block_body(); // emit(OP_LOAD_NAME_REF, index); // emit(OP_WITH_EXIT); } break; @@ -958,12 +958,11 @@ public: pop_context(); return code; }else if(mode()==JSON_MODE){ - PyObject* value = read_literal(); - if(value != nullptr) ctx()->emit(OP_LOAD_CONST, ctx()->add_const(value), prev().line); - else if(match(TK("{"))) exprMap(); - else if(match(TK("["))) exprList(); - else SyntaxError("expect a JSON object or array"); + EXPR(); + Expr_ e = ctx()->s_expr.popx(); + if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array"); consume(TK("@eof")); + e->emit(ctx()); ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); return code; diff --git a/src/expr.h b/src/expr.h index ec00dad3..e8ccced3 100644 --- a/src/expr.h +++ b/src/expr.h @@ -20,6 +20,7 @@ struct Expr{ virtual std::vector children() const { return {}; } virtual bool is_starred() const { return false; } virtual bool is_literal() const { return false; } + virtual bool is_json_object() const { return false; } // for OP_DELETE_XXX virtual bool emit_del(CodeEmitContext* ctx) { return false; } @@ -238,6 +239,8 @@ struct Literal0Expr: Expr{ default: UNREACHABLE(); } } + + bool is_json_object() const override { return true; } }; // @num, @str which needs to invoke OP_LOAD_CONST @@ -283,6 +286,7 @@ struct LiteralExpr: Expr{ } bool is_literal() const override { return true; } + bool is_json_object() const override { return true; } }; // PASS @@ -313,6 +317,10 @@ struct NegatedExpr: Expr{ child->emit(ctx); ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); } + + bool is_json_object() const override { + return child->is_literal(); + } }; // PASS @@ -384,12 +392,16 @@ struct ListExpr: SequenceExpr{ using SequenceExpr::SequenceExpr; Str str() const override { return "list()"; } Opcode opcode() const override { return OP_BUILD_LIST; } + + bool is_json_object() const override { return true; } }; struct DictExpr: SequenceExpr{ using SequenceExpr::SequenceExpr; Str str() const override { return "dict()"; } Opcode opcode() const override { return OP_BUILD_DICT; } + + bool is_json_object() const override { return true; } }; struct SetExpr: SequenceExpr{ diff --git a/src/frame.h b/src/frame.h index 2b193a07..ebecb893 100644 --- a/src/frame.h +++ b/src/frame.h @@ -46,7 +46,7 @@ struct Frame { Str stack_info(){ StrStream ss; - ss << "["; + ss << id << " ["; for(int i=0; i<_data.size(); i++){ ss << (i64)_data[i]; if(i != _data.size()-1) ss << ", "; @@ -93,8 +93,12 @@ struct Frame { return _data[_data.size()-n]; } - template - void push(T&& obj){ _data.push_back(std::forward(obj)); } + void push(PyObject* obj){ +#if DEBUG_EXTRA_CHECK + if(obj == nullptr) throw std::runtime_error("obj == nullptr"); +#endif + _data.push_back(obj); + } void jump_abs(int i){ _next_ip = i; } void jump_rel(int i){ _next_ip += i; } diff --git a/src/pocketpy.h b/src/pocketpy.h index 7ef361ed..b67257b5 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -172,11 +172,10 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<0>("object", "__repr__", [](VM* vm, Args& args) { PyObject* self = args[0]; - std::uintptr_t addr = is_tagged(self) ? 0 : (uintptr_t)self; + if(is_tagged(self)) self = nullptr; StrStream ss; - ss << std::hex << addr; - Str s = "<" + OBJ_NAME(vm->_t(self)) + " object at 0x" + ss.str() + ">"; - return VAR(s); + ss << "<" << OBJ_NAME(vm->_t(self)) << " object at " << std::hex << self << ">"; + return VAR(ss.str()); }); _vm->bind_method<1>("object", "__eq__", CPP_LAMBDA(VAR(args[0] == args[1]))); diff --git a/src/vm.h b/src/vm.h index 6197e70b..df28f100 100644 --- a/src/vm.h +++ b/src/vm.h @@ -166,7 +166,7 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); -#if DEBUG_DIS_REPL +#if DEBUG_DIS_EXEC_REPL if(_module == _main) std::cout << disassemble(code) << '\n'; #endif return _exec(code, _module, builtins); @@ -596,6 +596,7 @@ inline Str VM::disassemble(CodeObject_ co){ break; case OP_LOAD_NAME: case OP_STORE_LOCAL: case OP_STORE_GLOBAL: case OP_LOAD_ATTR: case OP_STORE_ATTR: case OP_DELETE_ATTR: + case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: argStr += " (" + co->names[byte.arg].str().escape(true) + ")"; break; @@ -614,7 +615,7 @@ inline Str VM::disassemble(CodeObject_ co){ if(i != co->codes.size() - 1) ss << '\n'; } -#if !DEBUG_DIS_REPL_MIN +#if !DEBUG_DIS_EXEC_REPL_MIN StrStream consts; consts << "co_consts: "; consts << CAST(Str, asRepr(VAR(co->consts))); diff --git a/tests/70_random.py b/tests/70_random.py index 93d576ed..85bfc2b0 100644 --- a/tests/70_random.py +++ b/tests/70_random.py @@ -11,21 +11,21 @@ r.shuffle(a) r.choice(a) r.choice(b) -from sys import version as v +# from sys import version as v -assert type(v) is str +# assert type(v) is str -class Context: - def __init__(self): - self.x = 0 +# class Context: +# def __init__(self): +# self.x = 0 - def __enter__(self): - self.x = 1 +# def __enter__(self): +# self.x = 1 - def __exit__(self): - self.x = 2 +# def __exit__(self): +# self.x = 2 -with Context() as c: - assert c.x == 1 +# with Context() as c: +# assert c.x == 1 -assert c.x == 2 \ No newline at end of file +# assert c.x == 2 \ No newline at end of file From e69b66f0b78cd032f0e318ec6be3a6d02bcb574a Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 6 Apr 2023 14:51:18 +0800 Subject: [PATCH 54/73] impl `small_vector` --- amalgamate.py | 2 +- src/common.h | 16 -------- src/frame.h | 9 ++-- src/memory.h | 1 + src/vector.h | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 20 deletions(-) create mode 100644 src/vector.h diff --git a/amalgamate.py b/amalgamate.py index b5cd8fd5..99ef93fe 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -6,7 +6,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: OPCODES_TEXT = f.read() pipeline = [ - ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], + ["common.h", "vector.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], ["obj.h", "codeobject.h", "frame.h"], ["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] diff --git a/src/common.h b/src/common.h index 45caad03..b0175192 100644 --- a/src/common.h +++ b/src/common.h @@ -107,22 +107,6 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept { return is_int(a) && is_int(b); } -template -class stack{ - std::vector vec; -public: - void push(const T& t){ vec.push_back(t); } - void push(T&& t){ vec.push_back(std::move(t)); } - void pop(){ vec.pop_back(); } - void clear(){ vec.clear(); } - bool empty() const { return vec.empty(); } - size_t size() const { return vec.size(); } - T& top(){ return vec.back(); } - const T& top() const { return vec.back(); } - T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; } - const std::vector& data() const { return vec; } -}; - struct Expr; typedef std::unique_ptr Expr_; diff --git a/src/frame.h b/src/frame.h index ebecb893..c0f1c285 100644 --- a/src/frame.h +++ b/src/frame.h @@ -1,13 +1,16 @@ #pragma once #include "codeobject.h" +#include "vector.h" namespace pkpy{ static THREAD_LOCAL uint64_t kFrameGlobalId = 0; +using ValueStack = small_vector; + struct Frame { - std::vector _data; + ValueStack _data; int _ip = -1; int _next_ip = 0; @@ -16,7 +19,7 @@ struct Frame { NameDict_ _locals; NameDict_ _closure; const uint64_t id; - std::vector>> s_try_block; + std::vector> s_try_block; const NameDict* names[5]; // name resolution array, zero terminated NameDict& f_locals() noexcept { return *_locals; } @@ -148,7 +151,7 @@ struct Frame { } void pop_n(int n){ - _data.resize(_data.size()-n); + _data.pop_back_n(n); } void _mark() const { diff --git a/src/memory.h b/src/memory.h index 62d9932f..e7559865 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,6 +1,7 @@ #pragma once #include "common.h" +#include "vector.h" namespace pkpy{ diff --git a/src/vector.h b/src/vector.h new file mode 100644 index 00000000..e4f05474 --- /dev/null +++ b/src/vector.h @@ -0,0 +1,112 @@ +#pragma once + +#include "common.h" + +namespace pkpy{ + +template +struct small_vector{ + int _size; + int _capacity; + T* _data; + T _buffer[N]; + + small_vector(): _size(0), _capacity(N) { + static_assert(std::is_pod_v); + _data = _buffer; + } + + small_vector(const small_vector& other): _size(other._size), _capacity(other._capacity) { + if(other.is_small()){ + _data = _buffer; + memcpy(_buffer, other._buffer, sizeof(T) * _size); + } else { + _data = (T*)malloc(sizeof(T) * _capacity); + memcpy(_data, other._data, sizeof(T) * _size); + } + } + + small_vector(small_vector&& other) noexcept { + _size = other._size; + _capacity = other._capacity; + if(other.is_small()){ + _data = _buffer; + memcpy(_buffer, other._buffer, sizeof(T) * _size); + } else { + _data = other._data; + other._data = other._buffer; + } + } + + small_vector& operator=(small_vector&& other) noexcept { + if (!is_small()) free(_data); + _size = other._size; + _capacity = other._capacity; + if(other.is_small()){ + _data = _buffer; + memcpy(_buffer, other._buffer, sizeof(T) * _size); + } else { + _data = other._data; + other._data = other._buffer; + } + return *this; + } + + template + void push_back(__ValueT&& t) { + if (_size == _capacity) { + _capacity *= 2; + if (is_small()) { + _data = (T*)malloc(sizeof(T) * _capacity); + memcpy(_data, _buffer, sizeof(T) * _size); + } else { + _data = (T*)realloc(_data, sizeof(T) * _capacity); + } + } + _data[_size++] = std::forward<__ValueT>(t); + } + + void pop_back() { _size--; } + + T& operator[](int index) { return _data[index]; } + const T& operator[](int index) const { return _data[index]; } + + T* begin() { return _data; } + T* end() { return _data + _size; } + const T* begin() const { return _data; } + const T* end() const { return _data + _size; } + T& back() { return _data[_size - 1]; } + const T& back() const { return _data[_size - 1]; } + + bool empty() const { return _size == 0; } + int size() const { return _size; } + T* data() { return _data; } + const T* data() const { return _data; } + bool is_small() const { return _data == _buffer; } + void pop_back_n(int n) { _size -= n; } + + ~small_vector() { + if (!is_small()) free(_data); + } +}; + + +template > +class stack{ + Container vec; +public: + void push(const T& t){ vec.push_back(t); } + void push(T&& t){ vec.push_back(std::move(t)); } + void pop(){ vec.pop_back(); } + void clear(){ vec.clear(); } + bool empty() const { return vec.empty(); } + size_t size() const { return vec.size(); } + T& top(){ return vec.back(); } + const T& top() const { return vec.back(); } + T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; } + const Container& data() const { return vec; } +}; + +template +using small_stack = stack>; +} // namespace pkpy \ No newline at end of file From 352688e9ae9b090742ebd30b5bc6d34fc268f8ee Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 6 Apr 2023 14:58:38 +0800 Subject: [PATCH 55/73] up --- src/error.h | 5 +++-- src/lexer.h | 2 +- src/vector.h | 5 ++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/error.h b/src/error.h index 561ce40d..6a82967e 100644 --- a/src/error.h +++ b/src/error.h @@ -75,9 +75,10 @@ struct SourceData { }; class Exception { + using StackTrace = stack; StrName type; Str msg; - stack stacktrace; + StackTrace stacktrace; public: Exception(StrName type, Str msg): type(type), msg(msg) {} bool match_type(StrName type) const { return this->type == type;} @@ -89,7 +90,7 @@ public: } Str summary() const { - stack st(stacktrace); + StackTrace st(stacktrace); StrStream ss; if(is_re) ss << "Traceback (most recent call last):\n"; while(!st.empty()) { ss << st.top() << '\n'; st.pop(); } diff --git a/src/lexer.h b/src/lexer.h index 74c8f96b..3b5b762c 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -100,7 +100,7 @@ struct Lexer { const char* curr_char; int current_line = 1; std::vector nexts; - stack indents; + small_stack indents; int brackets_level = 0; bool used = false; diff --git a/src/vector.h b/src/vector.h index e4f05474..e952e0c3 100644 --- a/src/vector.h +++ b/src/vector.h @@ -52,6 +52,9 @@ struct small_vector{ return *this; } + // remove copy assignment + small_vector& operator=(const small_vector& other) = delete; + template void push_back(__ValueT&& t) { if (_size == _capacity) { @@ -107,6 +110,6 @@ public: const Container& data() const { return vec; } }; -template +template using small_stack = stack>; } // namespace pkpy \ No newline at end of file From 6b9b5945a35fd62e0dadacf054356f055140c4fe Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 6 Apr 2023 15:25:13 +0800 Subject: [PATCH 56/73] fix a bug --- src/codeobject.h | 8 ++------ src/common.h | 6 +++--- src/frame.h | 8 ++++---- src/gc.h | 22 ++++++++++++++-------- src/iter.h | 12 ++++++------ src/namedict.h | 2 +- src/obj.h | 16 +++++++++------- src/vm.h | 10 ++++------ 8 files changed, 43 insertions(+), 41 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index 264f2db9..640cadbd 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -68,14 +68,10 @@ struct CodeObject { void optimize(VM* vm); - void _mark() const { + void _gc_mark() const { for(PyObject* v : consts) OBJ_MARK(v); - for(auto& decl: func_decls){ - decl->kwargs._mark(); - decl->code->_mark(); - } + for(auto& decl: func_decls) decl->_gc_mark(); } }; - } // namespace pkpy \ No newline at end of file diff --git a/src/common.h b/src/common.h index b0175192..771f79d8 100644 --- a/src/common.h +++ b/src/common.h @@ -33,11 +33,11 @@ // debug macros #define DEBUG_NO_BUILTIN_MODULES 0 #define DEBUG_EXTRA_CHECK 0 -#define DEBUG_DIS_EXEC_REPL 0 -#define DEBUG_DIS_EXEC_REPL_MIN 1 +#define DEBUG_DIS_EXEC 0 +#define DEBUG_DIS_EXEC_MIN 1 #define DEBUG_CEVAL_STEP 0 #define DEBUG_FULL_EXCEPTION 0 -#define DEBUG_NO_GC 1 +#define DEBUG_NO_GC 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/frame.h b/src/frame.h index c0f1c285..aef2fd21 100644 --- a/src/frame.h +++ b/src/frame.h @@ -154,15 +154,15 @@ struct Frame { _data.pop_back_n(n); } - void _mark() const { + void _gc_mark() const { for(PyObject* obj : _data) OBJ_MARK(obj); OBJ_MARK(_module); - _locals->_mark(); - _closure->_mark(); + if(_locals != nullptr) _locals->_gc_mark(); + if(_closure != nullptr) _closure->_gc_mark(); for(auto& p : s_try_block){ for(PyObject* obj : p.second) OBJ_MARK(obj); } - co->_mark(); + co->_gc_mark(); } }; diff --git a/src/gc.h b/src/gc.h index f0619150..311f6b5f 100644 --- a/src/gc.h +++ b/src/gc.h @@ -133,36 +133,42 @@ struct ManagedHeap{ void mark(VM* vm); }; -inline void NameDict::_mark() const{ +inline void NameDict::_gc_mark() const{ for(uint16_t i=0; i<_capacity; i++){ if(_items[i].first.empty()) continue; OBJ_MARK(_items[i].second); } } -template<> inline void _mark(List& t){ +inline void FuncDecl::_gc_mark() const{ + code->_gc_mark(); + kwargs._gc_mark(); +} + +template<> inline void _gc_mark(List& t){ for(PyObject* obj: t) OBJ_MARK(obj); } -template<> inline void _mark(Tuple& t){ +template<> inline void _gc_mark(Tuple& t){ for(int i=0; i inline void _mark(Function& t){ +template<> inline void _gc_mark(Function& t){ + t.decl->_gc_mark(); if(t._module != nullptr) OBJ_MARK(t._module); - if(t._closure != nullptr) t._closure->_mark(); + if(t._closure != nullptr) t._closure->_gc_mark(); } -template<> inline void _mark(BoundMethod& t){ +template<> inline void _gc_mark(BoundMethod& t){ OBJ_MARK(t.obj); OBJ_MARK(t.method); } -template<> inline void _mark(StarWrapper& t){ +template<> inline void _gc_mark(StarWrapper& t){ OBJ_MARK(t.obj); } -template<> inline void _mark(Super& t){ +template<> inline void _gc_mark(Super& t){ OBJ_MARK(t.first); } // NOTE: std::function may capture some PyObject*, they can not be marked diff --git a/src/iter.h b/src/iter.h index 0a9041d6..e7cf6f04 100644 --- a/src/iter.h +++ b/src/iter.h @@ -37,7 +37,7 @@ public: return p->operator[](index++); } - void _mark() override { + void _gc_mark() const override { OBJ_MARK(ref); } }; @@ -54,7 +54,7 @@ public: return VAR(str->u8_getitem(index++)); } - void _mark() override { + void _gc_mark() const override { OBJ_MARK(ref); } }; @@ -74,14 +74,14 @@ inline PyObject* Generator::next(){ } } -inline void Generator::_mark(){ - if(frame!=nullptr) frame->_mark(); +inline void Generator::_gc_mark() const{ + if(frame != nullptr) frame->_gc_mark(); } template -void _mark(T& t){ +void _gc_mark(T& t) { if constexpr(std::is_base_of_v){ - t._mark(); + t._gc_mark(); } } diff --git a/src/namedict.h b/src/namedict.h index 4a98f5d2..0a8f085f 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -180,7 +180,7 @@ while(!_items[i].first.empty()) { \ return v; } - void _mark() const; + void _gc_mark() const; #undef HASH_PROBE #undef _hash }; diff --git a/src/obj.h b/src/obj.h index 372b69ae..2362bf95 100644 --- a/src/obj.h +++ b/src/obj.h @@ -38,6 +38,8 @@ struct FuncDecl { bool _2 = kwargs.contains(val); return _0 || _1 || _2; } + + void _gc_mark() const; }; using FuncDecl_ = shared_ptr; @@ -87,7 +89,7 @@ protected: VM* vm; public: BaseIter(VM* vm) : vm(vm) {} - virtual void _mark() {} + virtual void _gc_mark() const {} virtual PyObject* next() = 0; virtual ~BaseIter() = default; }; @@ -107,14 +109,14 @@ struct PyObject { NameDict& attr() noexcept { return *_attr; } PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; - virtual void _mark() = 0; + virtual void _obj_gc_mark() = 0; PyObject(Type type) : type(type) {} virtual ~PyObject() { delete _attr; } }; template -void _mark(T& t); +void _gc_mark(T& t); template struct Py_ : PyObject { @@ -136,16 +138,16 @@ struct Py_ : PyObject { } void* value() override { return &_value; } - void _mark() override { + void _obj_gc_mark() override { if(gc.marked) return; gc.marked = true; - if(_attr != nullptr) _attr->_mark(); - pkpy::_mark(_value); // handle PyObject* inside _value `T` + if(_attr != nullptr) _attr->_gc_mark(); + pkpy::_gc_mark(_value); // handle PyObject* inside _value `T` } }; #define OBJ_GET(T, obj) (((Py_*)(obj))->_value) -#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_mark() +#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_obj_gc_mark() #if DEBUG_NO_BUILTIN_MODULES #define OBJ_NAME(obj) Str("") diff --git a/src/vm.h b/src/vm.h index df28f100..bd946aea 100644 --- a/src/vm.h +++ b/src/vm.h @@ -36,7 +36,7 @@ public: : BaseIter(vm), frame(std::move(frame)), state(0) {} PyObject* next() override; - void _mark() override; + void _gc_mark() const override; }; struct PyTypeInfo{ @@ -166,7 +166,7 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); -#if DEBUG_DIS_EXEC_REPL +#if DEBUG_DIS_EXEC if(_module == _main) std::cout << disassemble(code) << '\n'; #endif return _exec(code, _module, builtins); @@ -615,7 +615,7 @@ inline Str VM::disassemble(CodeObject_ co){ if(i != co->codes.size() - 1) ss << '\n'; } -#if !DEBUG_DIS_EXEC_REPL_MIN +#if !DEBUG_DIS_EXEC_MIN StrStream consts; consts << "co_consts: "; consts << CAST(Str, asRepr(VAR(co->consts))); @@ -909,9 +909,7 @@ inline PyObject* VM::_exec(){ inline void ManagedHeap::mark(VM *vm) { for(PyObject* obj: _no_gc) OBJ_MARK(obj); - for(auto& frame : vm->callstack.data()){ - frame->_mark(); - } + for(auto& frame : vm->callstack.data()) frame->_gc_mark(); } inline void ManagedHeap::_delete_hook(VM *vm, PyObject *obj){ From c694e1e67d285703026f979e2ad2fd680aeceae8 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 6 Apr 2023 15:52:04 +0800 Subject: [PATCH 57/73] up --- src/ceval.h | 2 +- src/common.h | 5 ++-- src/compiler.h | 2 -- src/expr.h | 2 ++ src/gc.h | 63 +++++++++++++------------------------------------- src/obj.h | 2 ++ src/pocketpy.h | 2 +- src/vm.h | 19 +++++++-------- 8 files changed, 33 insertions(+), 64 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index d50cf46b..6893f83d 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -15,7 +15,7 @@ __NEXT_STEP:; * For example, frame->popx() returns a strong reference which may be dangerous * `Args` containing strong references is safe if it is passed to `call` or `fast_call` */ -#if !DEBUG_NO_GC +#if !DEBUG_NO_AUTO_GC heap._auto_collect(this); #endif diff --git a/src/common.h b/src/common.h index 771f79d8..a14023c1 100644 --- a/src/common.h +++ b/src/common.h @@ -33,11 +33,12 @@ // debug macros #define DEBUG_NO_BUILTIN_MODULES 0 #define DEBUG_EXTRA_CHECK 0 -#define DEBUG_DIS_EXEC 0 +#define DEBUG_DIS_EXEC 1 #define DEBUG_DIS_EXEC_MIN 1 #define DEBUG_CEVAL_STEP 0 #define DEBUG_FULL_EXCEPTION 0 -#define DEBUG_NO_GC 0 +#define DEBUG_NO_AUTO_GC 1 +#define DEBUG_GC_STATS 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/compiler.h b/src/compiler.h index e18db025..37ed7c59 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -726,7 +726,6 @@ __SUBSCR_END: /*************************************************/ case TK("assert"): EXPR_TUPLE(false); - // TODO: change OP_ASSERT impl in ceval.h ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); consume_end_stmt(); break; @@ -860,7 +859,6 @@ __SUBSCR_END: } void compile_function(const std::vector& decorators={}){ - // TODO: bug, if there are multiple decorators, will cause error FuncDecl_ decl = make_sp(); StrName obj_name; consume(TK("@id")); diff --git a/src/expr.h b/src/expr.h index e8ccced3..58ba7cb9 100644 --- a/src/expr.h +++ b/src/expr.h @@ -632,6 +632,8 @@ struct CallExpr: Expr{ void emit(CodeEmitContext* ctx) override { VM* vm = ctx->vm; + // TODO: if callable is a AttrExpr, we should try to use `fast_call` + // instead of use `boundmethod` proxy callable->emit(ctx); // emit args for(auto& item: args) item->emit(ctx); diff --git a/src/gc.h b/src/gc.h index 311f6b5f..dbb22d23 100644 --- a/src/gc.h +++ b/src/gc.h @@ -5,45 +5,15 @@ #include "codeobject.h" #include "namedict.h" -/* -0: object -1: type -2: int -3: float -4: bool -5: str -6: list -7: tuple -8: slice -9: range -10: module -11: _ref -12: _star_wrapper -13: function -14: native_function -15: iterator -16: bound_method -17: super -18: Exception -19: NoneType -20: ellipsis -21: _py_op_call -22: _py_op_yield -23: re.Match -24: random.Random -25: io.FileIO -26: property -27: staticmethod -28: dict -29: set -*/ - namespace pkpy { struct ManagedHeap{ std::vector _no_gc; std::vector gen; + VM* vm; + + ManagedHeap(VM* vm): vm(vm) {} - static const int kMinGCThreshold = 700; + static const int kMinGCThreshold = 4096; int gc_threshold = kMinGCThreshold; int gc_counter = 0; @@ -84,19 +54,20 @@ struct ManagedHeap{ ~ManagedHeap(){ for(PyObject* obj: _no_gc) delete obj; - // for(auto& [type, count]: deleted){ - // std::cout << "GC: " << type << "=" << count << std::endl; - // } +#if DEBUG_GC_STATS + for(auto& [type, count]: deleted){ + std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl; + } +#endif } - int sweep(VM* vm){ + int sweep(){ std::vector alive; for(PyObject* obj: gen){ if(obj->gc.marked){ obj->gc.marked = false; alive.push_back(obj); }else{ - // _delete_hook(vm, obj); deleted[obj->type] += 1; delete obj; } @@ -112,25 +83,23 @@ struct ManagedHeap{ return freed; } - void _delete_hook(VM* vm, PyObject* obj); - - void _auto_collect(VM* vm){ + void _auto_collect(){ if(_gc_lock_counter > 0) return; if(gc_counter < gc_threshold) return; gc_counter = 0; - collect(vm); + collect(); gc_threshold = gen.size() * 2; if(gc_threshold < kMinGCThreshold) gc_threshold = kMinGCThreshold; } - int collect(VM* vm){ + int collect(){ if(_gc_lock_counter > 0) UNREACHABLE(); - mark(vm); - int freed = sweep(vm); + mark(); + int freed = sweep(); return freed; } - void mark(VM* vm); + void mark(); }; inline void NameDict::_gc_mark() const{ diff --git a/src/obj.h b/src/obj.h index 2362bf95..d5cbca40 100644 --- a/src/obj.h +++ b/src/obj.h @@ -149,6 +149,8 @@ struct Py_ : PyObject { #define OBJ_GET(T, obj) (((Py_*)(obj))->_value) #define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_obj_gc_mark() +Str obj_type_name(VM* vm, Type type); + #if DEBUG_NO_BUILTIN_MODULES #define OBJ_NAME(obj) Str("") #else diff --git a/src/pocketpy.h b/src/pocketpy.h index b67257b5..2b125f56 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -756,7 +756,7 @@ inline void add_module_random(VM* vm){ inline void add_module_gc(VM* vm){ PyObject* mod = vm->new_module("gc"); - vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect(vm)))); + vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect()))); } inline void VM::post_init(){ diff --git a/src/vm.h b/src/vm.h index bd946aea..b28cd20a 100644 --- a/src/vm.h +++ b/src/vm.h @@ -4,6 +4,7 @@ #include "frame.h" #include "error.h" #include "gc.h" +#include "obj.h" namespace pkpy{ @@ -75,10 +76,10 @@ public: Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; Type tp_list, tp_tuple; Type tp_function, tp_native_function, tp_iterator, tp_bound_method; - Type tp_slice, tp_range, tp_module, tp_ref; + Type tp_slice, tp_range, tp_module; Type tp_super, tp_exception, tp_star_wrapper; - VM(bool use_stdio){ + VM(bool use_stdio) : heap(this){ this->vm = this; this->use_stdio = use_stdio; if(use_stdio){ @@ -317,7 +318,7 @@ public: } ~VM() { - heap.collect(this); + heap.collect(); if(!use_stdio){ delete _stdout; delete _stderr; @@ -344,8 +345,6 @@ public: void bind_func(PyObject*, Str, NativeFuncRaw); void _error(Exception); PyObject* _exec(); - template PyObject* PyRef(P&&); - const BaseRef* PyRef_AS_C(PyObject* obj); void post_init(); }; @@ -652,7 +651,6 @@ inline void VM::init_builtin_types(){ tp_slice = _new_type_object("slice"); tp_range = _new_type_object("range"); tp_module = _new_type_object("module"); - tp_ref = _new_type_object("_ref"); tp_star_wrapper = _new_type_object("_star_wrapper"); tp_function = _new_type_object("function"); tp_native_function = _new_type_object("native_function"); @@ -690,7 +688,7 @@ inline void VM::init_builtin_types(){ for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } -// TODO: args here may be garbage collected accidentally +// TODO: callable/args here may be garbage collected accidentally inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ if(is_type(callable, tp_type)){ PyObject* new_f = callable->attr().try_get(__new__); @@ -907,14 +905,13 @@ inline PyObject* VM::_exec(){ } } -inline void ManagedHeap::mark(VM *vm) { +inline void ManagedHeap::mark() { for(PyObject* obj: _no_gc) OBJ_MARK(obj); for(auto& frame : vm->callstack.data()) frame->_gc_mark(); } -inline void ManagedHeap::_delete_hook(VM *vm, PyObject *obj){ - Type t = OBJ_GET(Type, vm->_t(obj)); - std::cout << "delete " << vm->_all_types[t].name << " at " << obj << std::endl; +inline Str obj_type_name(VM *vm, Type type){ + return vm->_all_types[type].name; } } // namespace pkpy \ No newline at end of file From 5d6d109164ddff788fd2ab30f16f7c71b7b8c007 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 6 Apr 2023 22:42:53 +0800 Subject: [PATCH 58/73] up --- src/ceval.h | 21 ++++++++++++++++++- src/common.h | 4 ++-- src/compiler.h | 2 ++ src/expr.h | 56 +++++++++++++++----------------------------------- src/opcodes.h | 2 ++ src/pocketpy.h | 5 ++++- src/vm.h | 47 +++++++++++++++++++++++++++++++++++++++--- 7 files changed, 91 insertions(+), 46 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 6893f83d..4c9e29de 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -48,6 +48,7 @@ __NEXT_STEP:; PyObject* obj = VAR(Function({decl, frame->_module, frame->_locals})); frame->push(obj); } DISPATCH(); + case OP_LOAD_NULL: frame->push(_py_null); DISPATCH(); /*****************************************/ case OP_LOAD_NAME: { StrName name = frame->co->names[byte.arg]; @@ -67,6 +68,13 @@ __NEXT_STEP:; StrName name = frame->co->names[byte.arg]; frame->top() = getattr(a, name); } DISPATCH(); + case OP_LOAD_METHOD: { + PyObject* a = frame->top(); + StrName name = frame->co->names[byte.arg]; + PyObject* self; + frame->top() = get_unbound_method(a, name, &self); + frame->push(self); + } DISPATCH(); case OP_LOAD_SUBSCR: { Args args(2); args[1] = frame->popx(); // b @@ -222,7 +230,13 @@ __NEXT_STEP:; /*****************************************/ // TODO: examine this later case OP_CALL: case OP_CALL_UNPACK: { - Args args = frame->popx_n_reversed(byte.arg); + int ARGC = byte.arg; + + bool method_call = frame->top_n(ARGC) != _py_null; + if(method_call) ARGC++; // add self into args + Args args = frame->popx_n_reversed(ARGC); + if(!method_call) frame->pop(); + if(byte.op == OP_CALL_UNPACK) unpack_args(args); PyObject* callable = frame->popx(); PyObject* ret = call(callable, std::move(args), no_arg(), true); @@ -233,7 +247,12 @@ __NEXT_STEP:; int ARGC = byte.arg & 0xFFFF; int KWARGC = (byte.arg >> 16) & 0xFFFF; Args kwargs = frame->popx_n_reversed(KWARGC*2); + + bool method_call = frame->top_n(ARGC) != _py_null; + if(method_call) ARGC++; // add self into args Args args = frame->popx_n_reversed(ARGC); + if(!method_call) frame->pop(); + if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); PyObject* callable = frame->popx(); PyObject* ret = call(callable, std::move(args), kwargs, true); diff --git a/src/common.h b/src/common.h index a14023c1..853f6967 100644 --- a/src/common.h +++ b/src/common.h @@ -33,12 +33,12 @@ // debug macros #define DEBUG_NO_BUILTIN_MODULES 0 #define DEBUG_EXTRA_CHECK 0 -#define DEBUG_DIS_EXEC 1 +#define DEBUG_DIS_EXEC 0 #define DEBUG_DIS_EXEC_MIN 1 #define DEBUG_CEVAL_STEP 0 #define DEBUG_FULL_EXCEPTION 0 #define DEBUG_NO_AUTO_GC 1 -#define DEBUG_GC_STATS 0 +#define DEBUG_GC_STATS 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/compiler.h b/src/compiler.h index 37ed7c59..050dd7d5 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -884,6 +884,8 @@ __SUBSCR_END: for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){ (*it)->emit(ctx()); ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line); + ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); ctx()->emit(OP_CALL, 1, (*it)->line); } if(!ctx()->is_compiling_class){ diff --git a/src/expr.h b/src/expr.h index 58ba7cb9..ece07980 100644 --- a/src/expr.h +++ b/src/expr.h @@ -17,10 +17,10 @@ struct Expr{ virtual void emit(CodeEmitContext* ctx) = 0; virtual Str str() const = 0; - virtual std::vector children() const { return {}; } virtual bool is_starred() const { return false; } virtual bool is_literal() const { return false; } virtual bool is_json_object() const { return false; } + virtual bool is_attrib() const { return false; } // for OP_DELETE_XXX virtual bool emit_del(CodeEmitContext* ctx) { return false; } @@ -163,8 +163,6 @@ struct StarredExpr: Expr{ StarredExpr(Expr_&& child): child(std::move(child)) {} Str str() const override { return "*"; } - std::vector children() const override { return {child.get()}; } - bool is_starred() const override { return true; } void emit(CodeEmitContext* ctx) override { @@ -184,8 +182,6 @@ struct NotExpr: Expr{ NotExpr(Expr_&& child): child(std::move(child)) {} Str str() const override { return "not"; } - std::vector children() const override { return {child.get()}; } - void emit(CodeEmitContext* ctx) override { child->emit(ctx); ctx->emit(OP_UNARY_NOT, BC_NOARG, line); @@ -198,8 +194,6 @@ struct AndExpr: Expr{ Expr_ rhs; Str str() const override { return "and"; } - std::vector children() const override { return {lhs.get(), rhs.get()}; } - void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line); @@ -214,8 +208,6 @@ struct OrExpr: Expr{ Expr_ rhs; Str str() const override { return "or"; } - std::vector children() const override { return {lhs.get(), rhs.get()}; } - void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line); @@ -295,8 +287,6 @@ struct NegatedExpr: Expr{ NegatedExpr(Expr_&& child): child(std::move(child)) {} Str str() const override { return "-"; } - std::vector children() const override { return {child.get()}; } - void emit(CodeEmitContext* ctx) override { VM* vm = ctx->vm; // if child is a int of float, do constant folding @@ -330,11 +320,6 @@ struct SliceExpr: Expr{ Expr_ step; Str str() const override { return "slice()"; } - std::vector children() const override { - // may contain nullptr - return {start.get(), stop.get(), step.get()}; - } - void emit(CodeEmitContext* ctx) override { if(start){ start->emit(ctx); @@ -362,7 +347,6 @@ struct DictItemExpr: Expr{ Expr_ key; Expr_ value; Str str() const override { return "k:v"; } - std::vector children() const override { return {key.get(), value.get()}; } void emit(CodeEmitContext* ctx) override { value->emit(ctx); @@ -376,12 +360,6 @@ struct SequenceExpr: Expr{ SequenceExpr(std::vector&& items): items(std::move(items)) {} virtual Opcode opcode() const = 0; - std::vector children() const override { - std::vector ret; - for(auto& item: items) ret.push_back(item.get()); - return ret; - } - void emit(CodeEmitContext* ctx) override { for(auto& item: items) item->emit(ctx); ctx->emit(opcode(), items.size(), line); @@ -543,6 +521,7 @@ struct FStringExpr: Expr{ size++; } ctx->emit(OP_LOAD_BUILTIN_EVAL, BC_NOARG, line); + ctx->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line); ctx->emit(OP_CALL, 1, line); size++; @@ -609,6 +588,14 @@ struct AttribExpr: Expr{ ctx->emit(OP_STORE_ATTR, index, line); return true; } + + void emit_method(CodeEmitContext* ctx) { + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_LOAD_METHOD, index, line); + } + + bool is_attrib() const override { return true; } }; // PASS @@ -618,13 +605,6 @@ struct CallExpr: Expr{ std::vector> kwargs; Str str() const override { return "call(...)"; } - std::vector children() const override { - std::vector ret; - for(auto& item: args) ret.push_back(item.get()); - // ...ignore kwargs for simplicity - return ret; - } - bool need_unpack() const { for(auto& item: args) if(item->is_starred()) return true; return false; @@ -634,7 +614,13 @@ struct CallExpr: Expr{ VM* vm = ctx->vm; // TODO: if callable is a AttrExpr, we should try to use `fast_call` // instead of use `boundmethod` proxy - callable->emit(ctx); + if(callable->is_attrib()){ + auto p = static_cast(callable.get()); + p->emit_method(ctx); + }else{ + callable->emit(ctx); + ctx->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); + } // emit args for(auto& item: args) item->emit(ctx); // emit kwargs @@ -659,10 +645,6 @@ struct BinaryExpr: Expr{ Expr_ rhs; Str str() const override { return TK_STR(op); } - std::vector children() const override { - return {lhs.get(), rhs.get()}; - } - void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); rhs->emit(ctx); @@ -706,10 +688,6 @@ struct TernaryExpr: Expr{ return "cond ? t : f"; } - std::vector children() const override { - return {cond.get(), true_expr.get(), false_expr.get()}; - } - void emit(CodeEmitContext* ctx) override { cond->emit(ctx); int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line); diff --git a/src/opcodes.h b/src/opcodes.h index de44b343..ed2bb83a 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -15,9 +15,11 @@ OPCODE(LOAD_FALSE) OPCODE(LOAD_ELLIPSIS) OPCODE(LOAD_BUILTIN_EVAL) OPCODE(LOAD_FUNCTION) +OPCODE(LOAD_NULL) /**************************/ OPCODE(LOAD_NAME) OPCODE(LOAD_ATTR) +OPCODE(LOAD_METHOD) OPCODE(LOAD_SUBSCR) OPCODE(STORE_LOCAL) diff --git a/src/pocketpy.h b/src/pocketpy.h index 2b125f56..1d4b90f7 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -2,6 +2,7 @@ #include "ceval.h" #include "compiler.h" +#include "obj.h" #include "repl.h" #include "iter.h" #include "cffi.h" @@ -68,7 +69,9 @@ inline void init_builtins(VM* _vm) { vm->check_type(args[0], vm->tp_type); Type type = OBJ_GET(Type, args[0]); if(!vm->isinstance(args[1], type)){ - vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); + Str _0 = obj_type_name(vm, OBJ_GET(Type, vm->_t(args[1]))); + Str _1 = obj_type_name(vm, type); + vm->TypeError("super(): " + _0.escape(true) + " is not an instance of " + _1.escape(true)); } Type base = vm->_all_types[type].base; return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); diff --git a/src/vm.h b/src/vm.h index b28cd20a..fdabfca8 100644 --- a/src/vm.h +++ b/src/vm.h @@ -60,6 +60,7 @@ public: PyObject* _py_op_call; PyObject* _py_op_yield; + PyObject* _py_null; PyObject* None; PyObject* True; PyObject* False; @@ -337,6 +338,7 @@ public: PyObject* call(PyObject* callable, Args args, const Args& kwargs, bool opCall); void unpack_args(Args& args); PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true, bool class_only=false); + PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self); template void setattr(PyObject* obj, StrName name, T&& value); template @@ -594,7 +596,7 @@ inline Str VM::disassemble(CodeObject_ co){ argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; break; case OP_LOAD_NAME: case OP_STORE_LOCAL: case OP_STORE_GLOBAL: - case OP_LOAD_ATTR: case OP_STORE_ATTR: case OP_DELETE_ATTR: + case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: argStr += " (" + co->names[byte.arg].str().escape(true) + ")"; @@ -663,6 +665,7 @@ inline void VM::init_builtin_types(){ this->Ellipsis = heap._new(_new_type_object("ellipsis"), {}); this->True = heap._new(tp_bool, {}); this->False = heap._new(tp_bool, {}); + this->_py_null = heap._new(_new_type_object("_py_null"), {}); this->_py_op_call = heap._new(_new_type_object("_py_op_call"), {}); this->_py_op_yield = heap._new(_new_type_object("_py_op_yield"), {}); @@ -682,7 +685,6 @@ inline void VM::init_builtin_types(){ post_init(); for(int i=0; i<_all_types.size(); i++){ - // std::cout << i << ": " << _all_types[i].name << std::endl; _all_types[i].obj->attr()._try_perfect_rehash(); } for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); @@ -786,6 +788,7 @@ inline void VM::unpack_args(Args& args){ // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ + // TODO: class_only impl may not be correct PyObject* objtype = _t(obj); // handle super() proxy if(is_type(obj, tp_super)){ @@ -815,6 +818,39 @@ inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool c return nullptr; } +// used by OP_LOAD_METHOD +// try to load a unbound method (fallback to `getattr` if not found) +inline PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self){ + *self = _py_null; + // TODO: class_only impl may not be correct + PyObject* objtype = _t(obj); + // handle super() proxy + if(is_type(obj, tp_super)){ + const Super& super = OBJ_GET(Super, obj); + obj = super.first; + objtype = _t(super.second); + } + PyObject* cls_var = find_name_in_mro(objtype, name); + if(cls_var != nullptr){ + // handle descriptor + PyObject* descr_get = _t(cls_var)->attr().try_get(__get__); + if(descr_get != nullptr) return call(descr_get, Args{cls_var, obj}); + } + // handle instance __dict__ + if(!is_tagged(obj) && obj->is_attr_valid()){ + PyObject* val = obj->attr().try_get(name); + if(val != nullptr) return val; + } + if(cls_var != nullptr){ + if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ + *self = obj; + } + return cls_var; + } + AttributeError(obj, name); + return nullptr; +} + template inline void VM::setattr(PyObject* obj, StrName name, T&& value){ static_assert(std::is_same_v, PyObject*>); @@ -894,7 +930,12 @@ inline PyObject* VM::_exec(){ Exception& _e = CAST(Exception&, obj); _e.st_push(frame->snapshot()); callstack.pop(); - if(callstack.empty()) throw _e; + if(callstack.empty()){ +#if DEBUG_FULL_EXCEPTION + std::cerr << _e.summary() << std::endl; +#endif + throw _e; + } frame = callstack.top().get(); frame->push(obj); if(frame->id < base_id) throw ToBeRaisedException(); From 12419105bd9cf76cc0f11704edefefb553cc3c32 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Thu, 6 Apr 2023 16:36:13 +0000 Subject: [PATCH 59/73] up --- src/ceval.h | 7 +-- src/pocketpy.h | 6 +-- src/vm.h | 120 ++++++++++++++++++++++++++----------------------- 3 files changed, 70 insertions(+), 63 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index 4c9e29de..f253a7fc 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -72,7 +72,7 @@ __NEXT_STEP:; PyObject* a = frame->top(); StrName name = frame->co->names[byte.arg]; PyObject* self; - frame->top() = get_unbound_method(a, name, &self); + frame->top() = get_unbound_method(a, name, &self, true, true); frame->push(self); } DISPATCH(); case OP_LOAD_SUBSCR: { @@ -271,11 +271,12 @@ __NEXT_STEP:; PyObject* kv = frame->popx(); // we do copy here to avoid accidental gc in `kv` // TODO: optimize to avoid copy - call(frame->top_1(), __setitem__, CAST(Tuple, kv)); + Tuple& t = CAST(Tuple& ,kv); + fast_call(__setitem__, Args{frame->top_1(), t[0], t[1]}); } DISPATCH(); case OP_SET_ADD: { PyObject* obj = frame->popx(); - call(frame->top_1(), m_add, Args{obj}); + fast_call(m_add, Args{frame->top_1(), obj}); } DISPATCH(); /*****************************************/ case OP_UNARY_NEGATIVE: diff --git a/src/pocketpy.h b/src/pocketpy.h index 1d4b90f7..42e33ecf 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -115,7 +115,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("repr", CPP_LAMBDA(vm->asRepr(args[0]))); - _vm->bind_builtin_func<1>("len", CPP_LAMBDA(vm->call(args[0], __len__, no_arg()))); + _vm->bind_builtin_func<1>("len", CPP_LAMBDA(vm->fast_call(__len__, Args{args[0]}))); _vm->bind_builtin_func<1>("hash", [](VM* vm, Args& args){ i64 value = vm->hash(args[0]); @@ -604,7 +604,7 @@ inline void add_module_json(VM* vm){ return vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); }); - vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__, no_arg()))); + vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->fast_call(__json__, Args{args[0]}))); } inline void add_module_math(VM* vm){ @@ -965,7 +965,7 @@ extern "C" { ss << f_header; for(int i=0; icall(args[i], pkpy::__json__, pkpy::no_arg()); + pkpy::PyObject* x = vm->fast_call(pkpy::__json__, pkpy::Args{args[i]}); ss << pkpy::CAST(pkpy::Str&, x); } char* packet = strdup(ss.str().c_str()); diff --git a/src/vm.h b/src/vm.h index fdabfca8..a6f58a8a 100644 --- a/src/vm.h +++ b/src/vm.h @@ -5,6 +5,7 @@ #include "error.h" #include "gc.h" #include "obj.h" +#include "str.h" namespace pkpy{ @@ -102,15 +103,17 @@ public: } PyObject* asStr(PyObject* obj){ - PyObject* f = getattr(obj, __str__, false, true); - if(f != nullptr) return call(f, no_arg()); + PyObject* self; + PyObject* f = get_unbound_method(obj, __str__, &self, false); + if(self != _py_null) return call(f, Args{self}); return asRepr(obj); } PyObject* asIter(PyObject* obj){ if(is_type(obj, tp_iterator)) return obj; - PyObject* iter_f = getattr(obj, __iter__, false, true); - if(iter_f != nullptr) return call(iter_f, no_arg()); + PyObject* self; + PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false); + if(self != _py_null) return call(iter_f, Args{self}); TypeError(OBJ_NAME(_t(obj)).escape(true) + " object is not iterable"); return nullptr; } @@ -157,13 +160,6 @@ public: return call(callable, std::forward(args), no_arg(), false); } - template - std::enable_if_t, Args>, PyObject*> - call(PyObject* obj, const StrName name, ArgT&& args){ - PyObject* callable = getattr(obj, name, true, true); - return call(callable, std::forward(args), no_arg(), false); - } - PyObject* exec(Str source, Str filename, CompileMode mode, PyObject* _module=nullptr){ if(_module == nullptr) _module = _main; try { @@ -337,8 +333,8 @@ public: void init_builtin_types(); PyObject* call(PyObject* callable, Args args, const Args& kwargs, bool opCall); void unpack_args(Args& args); - PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true, bool class_only=false); - PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self); + PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true); + PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false); template void setattr(PyObject* obj, StrName name, T&& value); template @@ -513,9 +509,10 @@ inline bool VM::asBool(PyObject* obj){ if(obj == None) return false; if(is_type(obj, tp_int)) return CAST(i64, obj) != 0; if(is_type(obj, tp_float)) return CAST(f64, obj) != 0.0; - PyObject* len_f = getattr(obj, __len__, false, true); - if(len_f != nullptr){ - PyObject* ret = call(len_f, no_arg()); + PyObject* self; + PyObject* len_f = get_unbound_method(obj, __len__, &self, false); + if(self != _py_null){ + PyObject* ret = call(len_f, Args{self}); return CAST(i64, ret) > 0; } return true; @@ -545,7 +542,8 @@ inline i64 VM::hash(PyObject* obj){ } inline PyObject* VM::asRepr(PyObject* obj){ - return call(obj, __repr__, no_arg()); + // TODO: fastcall does not take care of super() proxy! + return fast_call(__repr__, Args{obj}); } inline PyObject* VM::new_module(StrName name) { @@ -699,8 +697,10 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo obj = call(new_f, std::move(args), kwargs, false); }else{ obj = heap.gcnew(OBJ_GET(Type, callable), {}); - PyObject* init_f = getattr(obj, __init__, false, true); - if (init_f != nullptr) call(init_f, std::move(args), kwargs, false); + PyObject* self; + PyObject* init_f = get_unbound_method(obj, __init__, &self, false); + args.extend_self(self); + if (self != _py_null) call(init_f, std::move(args), kwargs, false); } return obj; } @@ -764,8 +764,10 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo return _exec(); } - PyObject* call_f = getattr(callable, __call__, false, true); - if(call_f != nullptr){ + PyObject* self; + PyObject* call_f = get_unbound_method(callable, __call__, &self, false); + if(self != _py_null){ + args.extend_self(self); return call(call_f, std::move(args), kwargs, false); } TypeError(OBJ_NAME(_t(callable)).escape(true) + " object is not callable"); @@ -787,41 +789,7 @@ inline void VM::unpack_args(Args& args){ } // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance -inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ - // TODO: class_only impl may not be correct - PyObject* objtype = _t(obj); - // handle super() proxy - if(is_type(obj, tp_super)){ - const Super& super = OBJ_GET(Super, obj); - obj = super.first; - objtype = _t(super.second); - } - PyObject* cls_var = find_name_in_mro(objtype, name); - if(cls_var != nullptr){ - // handle descriptor - PyObject* descr_get = _t(cls_var)->attr().try_get(__get__); - if(descr_get != nullptr) return call(descr_get, Args{cls_var, obj}); - } - // handle instance __dict__ - if(!class_only && !is_tagged(obj) && obj->is_attr_valid()){ - PyObject* val = obj->attr().try_get(name); - if(val != nullptr) return val; - } - if(cls_var != nullptr){ - // bound method is non-data descriptor - if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ - return VAR(BoundMethod(obj, cls_var)); - } - return cls_var; - } - if(throw_err) AttributeError(obj, name); - return nullptr; -} - -// used by OP_LOAD_METHOD -// try to load a unbound method (fallback to `getattr` if not found) -inline PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self){ - *self = _py_null; +inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){ // TODO: class_only impl may not be correct PyObject* objtype = _t(obj); // handle super() proxy @@ -841,13 +809,51 @@ inline PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** PyObject* val = obj->attr().try_get(name); if(val != nullptr) return val; } + if(cls_var != nullptr){ + // bound method is non-data descriptor + if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ + return VAR(BoundMethod(obj, cls_var)); + } + return cls_var; + } + if(throw_err) AttributeError(obj, name); + return nullptr; +} + +// used by OP_LOAD_METHOD +// try to load a unbound method (fallback to `getattr` if not found) +inline PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err, bool fallback){ + *self = _py_null; + // TODO: class_only impl may not be correct + PyObject* objtype = _t(obj); + // handle super() proxy + if(is_type(obj, tp_super)){ + const Super& super = OBJ_GET(Super, obj); + obj = super.first; + objtype = _t(super.second); + } + PyObject* cls_var = find_name_in_mro(objtype, name); + + if(fallback){ + if(cls_var != nullptr){ + // handle descriptor + PyObject* descr_get = _t(cls_var)->attr().try_get(__get__); + if(descr_get != nullptr) return call(descr_get, Args{cls_var, obj}); + } + // handle instance __dict__ + if(!is_tagged(obj) && obj->is_attr_valid()){ + PyObject* val = obj->attr().try_get(name); + if(val != nullptr) return val; + } + } + if(cls_var != nullptr){ if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ *self = obj; } return cls_var; } - AttributeError(obj, name); + if(throw_err) AttributeError(obj, name); return nullptr; } From 4e63588589f1d00dc1d12f4402a7b8a246f21230 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Thu, 6 Apr 2023 17:21:22 +0000 Subject: [PATCH 60/73] up --- src/common.h | 2 +- src/pocketpy.h | 16 ++++++++-------- src/tuplelist.h | 3 ++- src/vector.h | 43 ++++++++++++++++++++++++++++++++++--------- src/vm.h | 2 +- 5 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/common.h b/src/common.h index 853f6967..ad90a7b0 100644 --- a/src/common.h +++ b/src/common.h @@ -38,7 +38,7 @@ #define DEBUG_CEVAL_STEP 0 #define DEBUG_FULL_EXCEPTION 0 #define DEBUG_NO_AUTO_GC 1 -#define DEBUG_GC_STATS 1 +#define DEBUG_GC_STATS 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/pocketpy.h b/src/pocketpy.h index 42e33ecf..81be48c5 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -429,7 +429,7 @@ inline void init_builtins(VM* _vm) { List& self = CAST(List&, args[0]); PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); - self.insert(self.end(), list.begin(), list.end()); + self.extend(list); return vm->None; }); @@ -444,7 +444,7 @@ inline void init_builtins(VM* _vm) { int n = CAST(int, args[1]); List result; result.reserve(self.size() * n); - for(int i = 0; i < n; i++) result.insert(result.end(), self.begin(), self.end()); + for(int i = 0; i < n; i++) result.extend(self); return VAR(std::move(result)); }); @@ -454,7 +454,7 @@ inline void init_builtins(VM* _vm) { if(index < 0) index += self.size(); if(index < 0) index = 0; if(index > self.size()) index = self.size(); - self.insert(self.begin() + index, args[2]); + self.insert(index, args[2]); return vm->None; }); @@ -467,10 +467,10 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<1>("list", "__add__", [](VM* vm, Args& args) { const List& self = CAST(List&, args[0]); - const List& obj = CAST(List&, args[1]); - List new_list = self; - new_list.insert(new_list.end(), obj.begin(), obj.end()); - return VAR(new_list); + const List& other = CAST(List&, args[1]); + List new_list(self); // copy construct + new_list.extend(other); + return VAR(std::move(new_list)); }); _vm->bind_method<0>("list", "__len__", [](VM* vm, Args& args) { @@ -510,7 +510,7 @@ inline void init_builtins(VM* _vm) { List& self = CAST(List&, args[0]); int index = CAST(int, args[1]); index = vm->normalized_index(index, self.size()); - self.erase(self.begin() + index); + self.erase(index); return vm->None; }); diff --git a/src/tuplelist.h b/src/tuplelist.h index 4c9cda33..9ec0c71b 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -6,7 +6,7 @@ namespace pkpy { -using List = std::vector; +using List = small_vector; class Args { inline static THREAD_LOCAL FreeListA _pool; @@ -60,6 +60,7 @@ public: List to_list() noexcept { List ret(_size); + // TODO: use move/memcpy for(int i=0; i<_size; i++) ret[i] = _args[i]; return ret; } diff --git a/src/vector.h b/src/vector.h index e952e0c3..c921dc17 100644 --- a/src/vector.h +++ b/src/vector.h @@ -16,6 +16,11 @@ struct small_vector{ _data = _buffer; } + small_vector(int size): _size(size), _capacity(N){ + _data = _buffer; + reserve(size); + } + small_vector(const small_vector& other): _size(other._size), _capacity(other._capacity) { if(other.is_small()){ _data = _buffer; @@ -57,19 +62,25 @@ struct small_vector{ template void push_back(__ValueT&& t) { - if (_size == _capacity) { - _capacity *= 2; - if (is_small()) { - _data = (T*)malloc(sizeof(T) * _capacity); - memcpy(_data, _buffer, sizeof(T) * _size); - } else { - _data = (T*)realloc(_data, sizeof(T) * _capacity); - } - } + if (_size == _capacity) reserve(_capacity*2); _data[_size++] = std::forward<__ValueT>(t); } + void reserve(int cap){ + if(cap < _capacity) return; + _capacity = cap; + if (is_small()) { + _data = (T*)malloc(sizeof(T) * _capacity); + memcpy(_data, _buffer, sizeof(T) * _size); + } else { + _data = (T*)realloc(_data, sizeof(T) * _capacity); + } + } + void pop_back() { _size--; } + void extend(const small_vector& other){ + for(int i=0; i + void insert(int i, __ValueT&& val){ + if (_size == _capacity) reserve(_capacity*2); + for(int j=_size; j>i; j--) _data[j] = _data[j-1]; + _data[i] = std::forward<__ValueT>(val); + _size++; + } + + void erase(int i){ + for(int j=i; j<_size-1; j++) _data[j] = _data[j+1]; + _size--; + } ~small_vector() { if (!is_small()) free(_data); diff --git a/src/vm.h b/src/vm.h index a6f58a8a..fa51dcea 100644 --- a/src/vm.h +++ b/src/vm.h @@ -780,7 +780,7 @@ inline void VM::unpack_args(Args& args){ if(is_type(args[i], tp_star_wrapper)){ auto& star = _CAST(StarWrapper&, args[i]); List& list = CAST(List&, asList(star.obj)); - unpacked.insert(unpacked.end(), list.begin(), list.end()); + unpacked.extend(list); }else{ unpacked.push_back(args[i]); } From 97d0b17b8edcb2615d4bbc31e3eefa18df5f8aa2 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 7 Apr 2023 11:16:08 +0800 Subject: [PATCH 61/73] Update vector.h --- src/vector.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vector.h b/src/vector.h index c921dc17..d5ed28f6 100644 --- a/src/vector.h +++ b/src/vector.h @@ -16,9 +16,10 @@ struct small_vector{ _data = _buffer; } - small_vector(int size): _size(size), _capacity(N){ + small_vector(int size): _size(0), _capacity(N){ _data = _buffer; reserve(size); + _size = size; } small_vector(const small_vector& other): _size(other._size), _capacity(other._capacity) { From 6a27bc8bda59b6c98acfb252c577dde9ffb901c1 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 7 Apr 2023 18:15:26 +0800 Subject: [PATCH 62/73] up --- .gitignore | 1 + src/ceval.h | 2 +- src/common.h | 5 +- src/gc.h | 16 +++- src/memory.h | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 240 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 33c370e4..67e402d6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ plugins/godot/godot-cpp/ src/_generated.h profile.sh test +tmp.rar diff --git a/src/ceval.h b/src/ceval.h index f253a7fc..de39b215 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -16,7 +16,7 @@ __NEXT_STEP:; * `Args` containing strong references is safe if it is passed to `call` or `fast_call` */ #if !DEBUG_NO_AUTO_GC - heap._auto_collect(this); + heap._auto_collect(); #endif const Bytecode& byte = frame->next_bytecode(); diff --git a/src/common.h b/src/common.h index ad90a7b0..2303030f 100644 --- a/src/common.h +++ b/src/common.h @@ -36,8 +36,9 @@ #define DEBUG_DIS_EXEC 0 #define DEBUG_DIS_EXEC_MIN 1 #define DEBUG_CEVAL_STEP 0 -#define DEBUG_FULL_EXCEPTION 0 -#define DEBUG_NO_AUTO_GC 1 +#define DEBUG_FULL_EXCEPTION 1 +#define DEBUG_MEMORY_POOL 0 +#define DEBUG_NO_AUTO_GC 0 #define DEBUG_GC_STATS 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) diff --git a/src/gc.h b/src/gc.h index dbb22d23..4cb16eab 100644 --- a/src/gc.h +++ b/src/gc.h @@ -1,6 +1,7 @@ #pragma once #include "common.h" +#include "memory.h" #include "obj.h" #include "codeobject.h" #include "namedict.h" @@ -10,6 +11,7 @@ struct ManagedHeap{ std::vector _no_gc; std::vector gen; VM* vm; + MemoryPool<> pool; ManagedHeap(VM* vm): vm(vm) {} @@ -36,7 +38,8 @@ struct ManagedHeap{ template PyObject* gcnew(Type type, T&& val){ - PyObject* obj = new Py_>(type, std::forward(val)); + using __T = Py_>; + PyObject* obj = new(pool.alloc<__T>()) __T(type, std::forward(val)); gen.push_back(obj); gc_counter++; return obj; @@ -44,16 +47,19 @@ struct ManagedHeap{ template PyObject* _new(Type type, T&& val){ - PyObject* obj = new Py_>(type, std::forward(val)); + using __T = Py_>; + PyObject* obj = new(pool.alloc<__T>()) __T(type, std::forward(val)); obj->gc.enabled = false; _no_gc.push_back(obj); return obj; } +#if DEBUG_GC_STATS inline static std::map deleted; +#endif ~ManagedHeap(){ - for(PyObject* obj: _no_gc) delete obj; + for(PyObject* obj: _no_gc) obj->~PyObject(), pool.dealloc(obj); #if DEBUG_GC_STATS for(auto& [type, count]: deleted){ std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl; @@ -68,8 +74,10 @@ struct ManagedHeap{ obj->gc.marked = false; alive.push_back(obj); }else{ +#if DEBUG_GC_STATS deleted[obj->type] += 1; - delete obj; +#endif + obj->~PyObject(), pool.dealloc(obj); } } diff --git a/src/memory.h b/src/memory.h index e7559865..2fc53ba8 100644 --- a/src/memory.h +++ b/src/memory.h @@ -105,4 +105,227 @@ struct FreeListA { } }; + +struct LinkedListNode{ + LinkedListNode* prev; + LinkedListNode* next; +}; + +template +struct DoubleLinkedList{ + static_assert(std::is_base_of_v); + int _size; + LinkedListNode head; + LinkedListNode tail; + + DoubleLinkedList(): _size(0){ + head.prev = nullptr; + head.next = &tail; + tail.prev = &head; + tail.next = nullptr; + } + + void push_back(T* node){ + node->prev = tail.prev; + node->next = &tail; + tail.prev->next = node; + tail.prev = node; + _size++; + } + + void push_front(T* node){ + node->prev = &head; + node->next = head.next; + head.next->prev = node; + head.next = node; + _size++; + } + + void pop_back(){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::pop_back() called on empty list"); +#endif + tail.prev->prev->next = &tail; + tail.prev = tail.prev->prev; + _size--; + } + + void pop_front(){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::pop_front() called on empty list"); +#endif + head.next->next->prev = &head; + head.next = head.next->next; + _size--; + } + + T* back() const { +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::back() called on empty list"); +#endif + return static_cast(tail.prev); + } + + T* front() const { +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::front() called on empty list"); +#endif + return static_cast(head.next); + } + + void erase(T* node){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::erase() called on empty list"); + LinkedListNode* n = head.next; + while(n != &tail){ + if(n == node) break; + n = n->next; + } + if(n != node) throw std::runtime_error("DoubleLinkedList::erase() called on node not in the list"); +#endif + node->prev->next = node->next; + node->next->prev = node->prev; + _size--; + } + + void move_all_back(DoubleLinkedList& other){ + if(other.empty()) return; + other.tail.prev->next = &tail; + tail.prev->next = other.head.next; + other.head.next->prev = tail.prev; + tail.prev = other.tail.prev; + _size += other._size; + other.head.next = &other.tail; + other.tail.prev = &other.head; + other._size = 0; + } + + bool empty() const { +#if DEBUG_MEMORY_POOL + if(size() == 0){ + if(head.next != &tail || tail.prev != &head){ + throw std::runtime_error("DoubleLinkedList::size() returned 0 but the list is not empty"); + } + return true; + } +#endif + return _size == 0; + } + + int size() const { return _size; } + + void apply(std::function func){ + LinkedListNode* p = head.next; + while(p != &tail){ + LinkedListNode* next = p->next; + func(static_cast(p)); + p = next; + } + } +}; + +template +struct MemoryPool{ + static const size_t __MaxBlocks = 256*1024 / __BlockSize; + struct Block{ + void* arena; + char data[__BlockSize]; + }; + + struct Arena: LinkedListNode{ + Block _blocks[__MaxBlocks]; + Block* _free_list[__MaxBlocks]; + int _free_list_size; + + Arena(): _free_list_size(__MaxBlocks) { + for(int i=0; i<__MaxBlocks; i++){ + _blocks[i].arena = this; + _free_list[i] = &_blocks[i]; + } + } + + bool empty() const { return _free_list_size == 0; } + bool full() const { return _free_list_size == __MaxBlocks; } + + Block* alloc(){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("Arena::alloc() called on empty arena"); +#endif + _free_list_size--; + return _free_list[_free_list_size]; + } + + void dealloc(Block* block){ +#if DEBUG_MEMORY_POOL + if(full()) throw std::runtime_error("Arena::dealloc() called on full arena"); +#endif + _free_list[_free_list_size] = block; + _free_list_size++; + } + }; + + DoubleLinkedList _arenas; + DoubleLinkedList _empty_arenas; + DoubleLinkedList _full_arenas; + + template + void* alloc() { return alloc(sizeof(__T)); } + + void* alloc(size_t size){ + if(size > __BlockSize){ + void* p = malloc(sizeof(void*) + size); + memset(p, 0, sizeof(void*)); + return (char*)p + sizeof(void*); + } + + if(_arenas.empty()){ + if(_full_arenas.empty()){ + _arenas.push_back(new Arena()); + }else{ + _arenas.move_all_back(_full_arenas); + } + } + Arena* arena = _arenas.back(); + void* p = arena->alloc()->data; + if(arena->empty()){ + _arenas.pop_back(); + _empty_arenas.push_back(arena); + } + return p; + } + + void dealloc(void* p){ + Block* block = (Block*)((char*)p - sizeof(void*)); + if(block->arena == nullptr){ + free(block); + }else{ + Arena* arena = (Arena*)block->arena; + if(arena->empty()){ + _empty_arenas.erase(arena); + _arenas.push_front(arena); + arena->dealloc(block); + }else{ + arena->dealloc(block); + if(arena->full()){ // && _arenas.size() > 2 + _arenas.erase(arena); + if(_full_arenas.size() < 8){ + _full_arenas.push_back(arena); + }else{ + delete arena; + } + } + } + } + } + + ~MemoryPool(){ + // std::cout << _arenas.size() << std::endl; + // std::cout << _empty_arenas.size() << std::endl; + // std::cout << _full_arenas.size() << std::endl; + _arenas.apply([](Arena* arena){ delete arena; }); + _empty_arenas.apply([](Arena* arena){ delete arena; }); + _full_arenas.apply([](Arena* arena){ delete arena; }); + } +}; + }; // namespace pkpy From 7efb4403883b54547b97944b0c6ece00e9148674 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 7 Apr 2023 21:36:45 +0800 Subject: [PATCH 63/73] up --- src/compiler.h | 1 + src/frame.h | 2 +- src/lexer.h | 2 +- src/memory.h | 46 +++------------- src/namedict.h | 21 ++++--- src/new_str.h | 143 ++++++++++++++++++++++++++++++++++++++++++++++++ src/tuplelist.h | 13 ++--- src/vector.h | 77 ++++++++++---------------- 8 files changed, 201 insertions(+), 104 deletions(-) create mode 100644 src/new_str.h diff --git a/src/compiler.h b/src/compiler.h index 050dd7d5..35cf07c9 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -55,6 +55,7 @@ class Compiler { } static void init_pratt_rules(){ + if(rules[TK(".")].precedence != PREC_NONE) return; // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #define METHOD(name) &Compiler::name #define NO_INFIX nullptr, PREC_NONE diff --git a/src/frame.h b/src/frame.h index aef2fd21..7b162f43 100644 --- a/src/frame.h +++ b/src/frame.h @@ -7,7 +7,7 @@ namespace pkpy{ static THREAD_LOCAL uint64_t kFrameGlobalId = 0; -using ValueStack = small_vector; +using ValueStack = pod_vector; struct Frame { ValueStack _data; diff --git a/src/lexer.h b/src/lexer.h index 3b5b762c..74c8f96b 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -100,7 +100,7 @@ struct Lexer { const char* curr_char; int current_line = 1; std::vector nexts; - small_stack indents; + stack indents; int brackets_level = 0; bool used = false; diff --git a/src/memory.h b/src/memory.h index 2fc53ba8..24d78f90 100644 --- a/src/memory.h +++ b/src/memory.h @@ -1,7 +1,6 @@ #pragma once #include "common.h" -#include "vector.h" namespace pkpy{ @@ -69,48 +68,12 @@ shared_ptr make_sp(Args&&... args) { return shared_ptr(p); } -template -struct FreeListA { - std::vector buckets[__Bucket+1]; - - T* alloc(int n){ - static_assert(std::is_standard_layout_v); - T* p; - if(n > __Bucket || buckets[n].empty()){ - p = (T*)malloc(sizeof(T) * n); - }else{ - p = buckets[n].back(); - buckets[n].pop_back(); - } - if constexpr(__ZeroInit){ - // the constructor of T should be equivalent to zero initialization - memset((void*)p, 0, sizeof(T) * n); - } - return p; - } - - void dealloc(T* p, int n){ - if(p == nullptr) return; - if(n > __Bucket || buckets[n].size() >= 80){ - free(p); - }else{ - buckets[n].push_back(p); - } - } - - ~FreeListA(){ - for(int i=0; i<=__Bucket; i++){ - for(T* p : buckets[i]) free(p); - } - } -}; - - struct LinkedListNode{ LinkedListNode* prev; LinkedListNode* next; }; + template struct DoubleLinkedList{ static_assert(std::is_base_of_v); @@ -295,6 +258,9 @@ struct MemoryPool{ } void dealloc(void* p){ +#if DEBUG_MEMORY_POOL + if(p == nullptr) throw std::runtime_error("MemoryPool::dealloc() called on nullptr"); +#endif Block* block = (Block*)((char*)p - sizeof(void*)); if(block->arena == nullptr){ free(block); @@ -328,4 +294,8 @@ struct MemoryPool{ } }; +inline MemoryPool<64> pool64; +inline MemoryPool<128> pool128; +inline MemoryPool<256> pool256; + }; // namespace pkpy diff --git a/src/namedict.h b/src/namedict.h index 0a8f085f..ba40eae6 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -34,8 +34,6 @@ inline static uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vect struct NameDict { using Item = std::pair; - inline static THREAD_LOCAL FreeListA _pool; - uint16_t _capacity; uint16_t _size; float _load_factor; @@ -43,31 +41,36 @@ struct NameDict { uint16_t _mask; Item* _items; + void _alloc(int cap){ + _items = (Item*)pool128.alloc(cap * sizeof(Item)); + memset(_items, 0, cap * sizeof(Item)); + } + NameDict(uint16_t capacity=2, float load_factor=0.67, uint16_t hash_seed=kHashSeeds[0]): _capacity(capacity), _size(0), _load_factor(load_factor), _hash_seed(hash_seed), _mask(capacity-1) { - _items = _pool.alloc(_capacity); + _alloc(capacity); } NameDict(const NameDict& other) { memcpy(this, &other, sizeof(NameDict)); - _items = _pool.alloc(_capacity); + _alloc(_capacity); for(int i=0; i<_capacity; i++){ _items[i] = other._items[i]; } } NameDict& operator=(const NameDict& other) { - _pool.dealloc(_items, _capacity); + pool128.dealloc(_items); memcpy(this, &other, sizeof(NameDict)); - _items = _pool.alloc(_capacity); + _alloc(_capacity); for(int i=0; i<_capacity; i++){ _items[i] = other._items[i]; } return *this; } - ~NameDict(){ _pool.dealloc(_items, _capacity); } + ~NameDict(){ pool128.dealloc(_items); } NameDict(NameDict&&) = delete; NameDict& operator=(NameDict&&) = delete; @@ -109,7 +112,7 @@ while(!_items[i].first.empty()) { \ _capacity = find_next_capacity(_capacity * 2); _mask = _capacity - 1; } - _items = _pool.alloc(_capacity); + _alloc(_capacity); for(uint16_t i=0; i + +namespace pkpy{ + +struct String{ + char* data; + int size; + + String(): data((char*)pool64.alloc(0)), size(0) {} + String(int size): data((char*)pool64.alloc(size)), size(size) {} + String(const char* str) { + size = strlen(str); + data = (char*)pool64.alloc(size); + memcpy(data, str, size); + } + + String(const String& other): data((char*)pool64.alloc(other.size)), size(other.size) { + memcpy(data, other.data, size); + } + + String(String&& other): data(other.data), size(other.size) { + other.data = nullptr; + } + + String& operator=(const String& other){ + if(data!=nullptr) pool64.dealloc(data); + size = other.size; + data = (char*)pool64.alloc(size); + memcpy(data, other.data, size); + return *this; + } + + String& operator=(String&& other){ + if(data!=nullptr) pool64.dealloc(data); + size = other.size; + data = other.data; + other.data = nullptr; + return *this; + } + + ~String(){ + if(data!=nullptr) pool64.dealloc(data); + } + + char operator[](int idx) const { + return data[idx]; + } + + int length() const { + return size; + } + + String operator+(const String& other) const { + String ret(size + other.size); + memcpy(ret.data, data, size); + memcpy(ret.data + size, other.data, other.size); + return ret; + } + + friend std::ostream& operator<<(std::ostream& os, const String& str){ + os.write(str.data, str.size); + return os; + } + + bool operator==(const String& other) const { + if(size != other.size) return false; + return memcmp(data, other.data, size) == 0; + } + + bool operator!=(const String& other) const { + if(size != other.size) return true; + return memcmp(data, other.data, size) != 0; + } + + bool operator<(const String& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret < 0; + return size < other.size; + } + + bool operator>(const String& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret > 0; + return size > other.size; + } + + bool operator<=(const String& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret < 0; + return size <= other.size; + } + + bool operator>=(const String& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret > 0; + return size >= other.size; + } + + String substr(int start, int len) const { + String ret(len); + memcpy(ret.data, data + start, len); + return ret; + } + + String substr(int start) const { + return substr(start, size - start); + } + + char* dup_c_str() const { + char* p = (char*)malloc(size + 1); + memcpy(p, data, size); + p[size] = 0; + return p; + } + + std::string_view view() const { + return std::string_view(data, size); + } + + std::string str() const { + return std::string(data, size); + } + + String lstrip() const { + std::string copy = str(); + copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { + // std::isspace(c) does not working on windows (Debug) + return c != ' ' && c != '\t' && c != '\r' && c != '\n'; + })); + return String(copy.c_str()); + } +}; + +struct UnicodeString: String{ + +}; + + +} // namespace pkpy \ No newline at end of file diff --git a/src/tuplelist.h b/src/tuplelist.h index 9ec0c71b..0abc993f 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -3,19 +3,18 @@ #include "common.h" #include "memory.h" #include "str.h" +#include "vector.h" namespace pkpy { -using List = small_vector; +using List = pod_vector; class Args { - inline static THREAD_LOCAL FreeListA _pool; - PyObject** _args; int _size; void _alloc(int n){ - this->_args = (n==0) ? nullptr : _pool.alloc(n); + this->_args = (n==0) ? nullptr : (PyObject**)pool64.alloc(n * sizeof(void*)); this->_size = n; } @@ -48,7 +47,7 @@ public: PyObject* operator[](int i) const { return _args[i]; } Args& operator=(Args&& other) noexcept { - _pool.dealloc(_args, _size); + if(_args!=nullptr) pool64.dealloc(_args); this->_args = other._args; this->_size = other._size; other._args = nullptr; @@ -71,10 +70,10 @@ public: _alloc(old_size+1); _args[0] = self; for(int i=0; i -struct small_vector{ +template +struct pod_vector{ + static_assert(128 % sizeof(T) == 0); + static_assert(std::is_pod_v); + static constexpr int N = 128 / sizeof(T); + static_assert(N > 4); int _size; int _capacity; T* _data; - T _buffer[N]; - small_vector(): _size(0), _capacity(N) { - static_assert(std::is_pod_v); - _data = _buffer; + pod_vector(): _size(0), _capacity(N) { + _data = (T*)pool128.alloc(_capacity * sizeof(T)); } - small_vector(int size): _size(0), _capacity(N){ - _data = _buffer; - reserve(size); - _size = size; + pod_vector(int size): _size(size), _capacity(std::max(N, size)) { + _data = (T*)pool128.alloc(_capacity * sizeof(T)); } - small_vector(const small_vector& other): _size(other._size), _capacity(other._capacity) { - if(other.is_small()){ - _data = _buffer; - memcpy(_buffer, other._buffer, sizeof(T) * _size); - } else { - _data = (T*)malloc(sizeof(T) * _capacity); - memcpy(_data, other._data, sizeof(T) * _size); - } + pod_vector(const pod_vector& other): _size(other._size), _capacity(other._capacity) { + _data = (T*)pool128.alloc(_capacity * sizeof(T)); + memcpy(_data, other._data, sizeof(T) * _size); } - small_vector(small_vector&& other) noexcept { + pod_vector(pod_vector&& other) noexcept { _size = other._size; _capacity = other._capacity; - if(other.is_small()){ - _data = _buffer; - memcpy(_buffer, other._buffer, sizeof(T) * _size); - } else { - _data = other._data; - other._data = other._buffer; - } + _data = other._data; + other._data = nullptr; } - small_vector& operator=(small_vector&& other) noexcept { - if (!is_small()) free(_data); + pod_vector& operator=(pod_vector&& other) noexcept { + if(_data!=nullptr) pool128.dealloc(_data); _size = other._size; _capacity = other._capacity; - if(other.is_small()){ - _data = _buffer; - memcpy(_buffer, other._buffer, sizeof(T) * _size); - } else { - _data = other._data; - other._data = other._buffer; - } + _data = other._data; + other._data = nullptr; return *this; } // remove copy assignment - small_vector& operator=(const small_vector& other) = delete; + pod_vector& operator=(const pod_vector& other) = delete; template void push_back(__ValueT&& t) { @@ -70,16 +56,12 @@ struct small_vector{ void reserve(int cap){ if(cap < _capacity) return; _capacity = cap; - if (is_small()) { - _data = (T*)malloc(sizeof(T) * _capacity); - memcpy(_data, _buffer, sizeof(T) * _size); - } else { - _data = (T*)realloc(_data, sizeof(T) * _capacity); - } + if(_data!=nullptr) pool128.dealloc(_data); + _data = (T*)pool128.alloc(_capacity * sizeof(T)); } void pop_back() { _size--; } - void extend(const small_vector& other){ + void extend(const pod_vector& other){ for(int i=0; i -using small_stack = stack>; +template +using pod_stack = stack>; } // namespace pkpy \ No newline at end of file From 95c3644dc22fbc0406c543d5d64099c3b47dfb26 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 7 Apr 2023 22:40:20 +0800 Subject: [PATCH 64/73] up --- src/common.h | 4 +++- src/frame.h | 9 +++++++++ src/memory.h | 6 ++++-- src/obj.h | 5 ++++- src/vector.h | 6 +++++- src/vm.h | 15 +++++++++++---- 6 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/common.h b/src/common.h index 2303030f..fee6d8fe 100644 --- a/src/common.h +++ b/src/common.h @@ -36,11 +36,13 @@ #define DEBUG_DIS_EXEC 0 #define DEBUG_DIS_EXEC_MIN 1 #define DEBUG_CEVAL_STEP 0 -#define DEBUG_FULL_EXCEPTION 1 +#define DEBUG_FULL_EXCEPTION 0 #define DEBUG_MEMORY_POOL 0 #define DEBUG_NO_AUTO_GC 0 #define DEBUG_GC_STATS 0 +#define DEBUG_FRAME_USE_POOL 1 + #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 #else diff --git a/src/frame.h b/src/frame.h index 7b162f43..69f02cd3 100644 --- a/src/frame.h +++ b/src/frame.h @@ -1,6 +1,7 @@ #pragma once #include "codeobject.h" +#include "memory.h" #include "vector.h" namespace pkpy{ @@ -166,4 +167,12 @@ struct Frame { } }; + +#if DEBUG_FRAME_USE_POOL +inline void frame_deleter (Frame* p) { pool256.dealloc(p); } +using Frame_ = std::unique_ptr; +#else +using Frame_ = std::unique_ptr; +#endif + }; // namespace pkpy \ No newline at end of file diff --git a/src/memory.h b/src/memory.h index 24d78f90..e5d8255f 100644 --- a/src/memory.h +++ b/src/memory.h @@ -231,6 +231,8 @@ struct MemoryPool{ DoubleLinkedList _empty_arenas; DoubleLinkedList _full_arenas; + static constexpr int FULL_ARENA_SIZE = 4; + template void* alloc() { return alloc(sizeof(__T)); } @@ -272,9 +274,9 @@ struct MemoryPool{ arena->dealloc(block); }else{ arena->dealloc(block); - if(arena->full()){ // && _arenas.size() > 2 + if(arena->full() && _arenas.size()>2){ _arenas.erase(arena); - if(_full_arenas.size() < 8){ + if(_full_arenas.size() < FULL_ARENA_SIZE){ _full_arenas.push_back(arena); }else{ delete arena; diff --git a/src/obj.h b/src/obj.h index d5cbca40..8d9fe55d 100644 --- a/src/obj.h +++ b/src/obj.h @@ -160,7 +160,10 @@ Str obj_type_name(VM* vm, Type type); const int kTpIntIndex = 2; const int kTpFloatIndex = 3; -inline bool is_type(PyObject* obj, Type type) noexcept { +inline bool is_type(PyObject* obj, Type type) { +#if DEBUG_EXTRA_CHECK + if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr"); +#endif switch(type.index){ case kTpIntIndex: return is_int(obj); case kTpFloatIndex: return is_float(obj); diff --git a/src/vector.h b/src/vector.h index 25b21e0b..8caeb362 100644 --- a/src/vector.h +++ b/src/vector.h @@ -56,8 +56,12 @@ struct pod_vector{ void reserve(int cap){ if(cap < _capacity) return; _capacity = cap; - if(_data!=nullptr) pool128.dealloc(_data); + T* old_data = _data; _data = (T*)pool128.alloc(_capacity * sizeof(T)); + if(old_data!=nullptr){ + memcpy(_data, old_data, sizeof(T) * _size); + pool128.dealloc(old_data); + } } void pop_back() { _size--; } diff --git a/src/vm.h b/src/vm.h index fa51dcea..7cf26ce9 100644 --- a/src/vm.h +++ b/src/vm.h @@ -4,8 +4,10 @@ #include "frame.h" #include "error.h" #include "gc.h" +#include "memory.h" #include "obj.h" #include "str.h" +#include namespace pkpy{ @@ -31,10 +33,10 @@ Str _read_file_cwd(const Str& name, bool* ok); class Generator: public BaseIter { - std::unique_ptr frame; + Frame_ frame; int state; // 0,1,2 public: - Generator(VM* vm, std::unique_ptr&& frame) + Generator(VM* vm, Frame_&& frame) : BaseIter(vm), frame(std::move(frame)), state(0) {} PyObject* next() override; @@ -51,7 +53,7 @@ class VM { VM* vm; // self reference for simplify code public: ManagedHeap heap; - stack< std::unique_ptr > callstack; + stack< Frame_ > callstack; std::vector _all_types; PyObject* run_frame(Frame* frame); @@ -183,11 +185,16 @@ public: } template - std::unique_ptr _new_frame(Args&&... args){ + Frame_ _new_frame(Args&&... args){ if(callstack.size() > recursionlimit){ _error("RecursionError", "maximum recursion depth exceeded"); } +#if DEBUG_FRAME_USE_POOL + Frame* frame = new(pool256.alloc(sizeof(Frame))) Frame(std::forward(args)...); + return Frame_(frame, &frame_deleter); +#else return std::make_unique(std::forward(args)...); +#endif } template From e03ce955fc536678b4663b2730719488c56f9169 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Sat, 8 Apr 2023 06:36:13 +0000 Subject: [PATCH 65/73] up --- src/ceval.h | 26 +++++++++++++++++--------- src/common.h | 2 +- src/compiler.h | 4 ++-- src/frame.h | 17 ++++++----------- src/opcodes.h | 1 + src/pocketpy.h | 14 +++++++------- src/vm.h | 7 ++++--- 7 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index de39b215..c04b29f7 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -53,14 +53,22 @@ __NEXT_STEP:; case OP_LOAD_NAME: { StrName name = frame->co->names[byte.arg]; PyObject* val; - int i = 0; // names[0] is ensured to be non-null - do{ - val = frame->names[i++]->try_get(name); - if(val != nullptr){ - frame->push(val); - DISPATCH(); - } - }while(frame->names[i] != nullptr); + val = frame->f_locals().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = frame->f_closure_try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = frame->f_globals().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = vm->builtins->attr().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + vm->NameError(name); + } DISPATCH(); + case OP_LOAD_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + PyObject* val = frame->f_globals().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = vm->builtins->attr().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } vm->NameError(name); } DISPATCH(); case OP_LOAD_ATTR: { @@ -319,7 +327,7 @@ __NEXT_STEP:; } CodeObject_ code = compile(source, name.str(), EXEC_MODE); PyObject* new_mod = new_module(name); - _exec(code, new_mod, builtins); + _exec(code, new_mod); new_mod->attr()._try_perfect_rehash(); frame->push(new_mod); }else{ diff --git a/src/common.h b/src/common.h index fee6d8fe..8717cba5 100644 --- a/src/common.h +++ b/src/common.h @@ -41,7 +41,7 @@ #define DEBUG_NO_AUTO_GC 0 #define DEBUG_GC_STATS 0 -#define DEBUG_FRAME_USE_POOL 1 +#define DEBUG_FRAME_USE_POOL 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 diff --git a/src/compiler.h b/src/compiler.h index 35cf07c9..6175d1c0 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -812,7 +812,7 @@ __SUBSCR_END: consume(TK(")")); } if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); - else ctx()->emit(OP_LOAD_NAME, super_namei, prev().line); + else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line); ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE); ctx()->is_compiling_class = true; compile_block_body(); @@ -894,7 +894,7 @@ __SUBSCR_END: auto e = make_expr(decl->name, name_scope()); e->emit_store(ctx()); } else { - ctx()->emit(OP_LOAD_NAME, ctx()->add_name(obj_name), prev().line); + ctx()->emit(OP_LOAD_GLOBAL, ctx()->add_name(obj_name), prev().line); int index = ctx()->add_name(decl->name); ctx()->emit(OP_STORE_ATTR, index, prev().line); } diff --git a/src/frame.h b/src/frame.h index 69f02cd3..70d11921 100644 --- a/src/frame.h +++ b/src/frame.h @@ -21,21 +21,16 @@ struct Frame { NameDict_ _closure; const uint64_t id; std::vector> s_try_block; - const NameDict* names[5]; // name resolution array, zero terminated - NameDict& f_locals() noexcept { return *_locals; } + NameDict& f_locals() noexcept { return _locals!=nullptr ? *_locals : _module->attr(); } NameDict& f_globals() noexcept { return _module->attr(); } + PyObject* f_closure_try_get(StrName name){ + if(_closure == nullptr) return nullptr; + return _closure->try_get(name); + } - Frame(const CodeObject_& co, PyObject* _module, PyObject* builtins, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr) + Frame(const CodeObject_& co, PyObject* _module, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr) : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { - memset(names, 0, sizeof(names)); - int i = 0; - if(_locals != nullptr) names[i++] = _locals.get(); - if(_closure != nullptr) names[i++] = _closure.get(); - names[i++] = &_module->attr(); // borrowed reference - if(builtins != nullptr){ - names[i++] = &builtins->attr(); // borrowed reference - } } const Bytecode& next_bytecode() { diff --git a/src/opcodes.h b/src/opcodes.h index ed2bb83a..87d9e88f 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -18,6 +18,7 @@ OPCODE(LOAD_FUNCTION) OPCODE(LOAD_NULL) /**************************/ OPCODE(LOAD_NAME) +OPCODE(LOAD_GLOBAL) OPCODE(LOAD_ATTR) OPCODE(LOAD_METHOD) OPCODE(LOAD_SUBSCR) diff --git a/src/pocketpy.h b/src/pocketpy.h index 81be48c5..4ce389d1 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -98,12 +98,12 @@ inline void init_builtins(VM* _vm) { _vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) { CodeObject_ code = vm->compile(CAST(Str&, args[0]), "", EVAL_MODE); - return vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); + return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); }); _vm->bind_builtin_func<1>("exec", [](VM* vm, Args& args) { CodeObject_ code = vm->compile(CAST(Str&, args[0]), "", EXEC_MODE); - vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); + vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); return vm->None; }); @@ -601,7 +601,7 @@ inline void add_module_json(VM* vm){ vm->bind_func<1>(mod, "loads", [](VM* vm, Args& args) { const Str& expr = CAST(Str&, args[0]); CodeObject_ code = vm->compile(expr, "", JSON_MODE); - return vm->_exec(code, vm->top_frame()->_module, vm->builtins, vm->top_frame()->_locals); + return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); }); vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->fast_call(__json__, Args{args[0]}))); @@ -754,7 +754,7 @@ inline void add_module_random(VM* vm){ PyObject* mod = vm->new_module("random"); Random::register_class(vm, mod); CodeObject_ code = vm->compile(kPythonLibs["random"], "random.py", EXEC_MODE); - vm->_exec(code, mod, vm->builtins); + vm->_exec(code, mod); } inline void add_module_gc(VM* vm){ @@ -782,11 +782,11 @@ inline void VM::post_init(){ } CodeObject_ code = compile(kPythonLibs["builtins"], "", EXEC_MODE); - this->_exec(code, this->builtins, nullptr); + this->_exec(code, this->builtins); code = compile(kPythonLibs["_dict"], "", EXEC_MODE); - this->_exec(code, this->builtins, nullptr); + this->_exec(code, this->builtins); code = compile(kPythonLibs["_set"], "", EXEC_MODE); - this->_exec(code, this->builtins, nullptr); + this->_exec(code, this->builtins); // property is defined in builtins.py so we need to add it after builtins is loaded _t(tp_object)->attr().set(__class__, property(CPP_LAMBDA(vm->_t(args[0])))); diff --git a/src/vm.h b/src/vm.h index 7cf26ce9..f694e3a1 100644 --- a/src/vm.h +++ b/src/vm.h @@ -169,7 +169,7 @@ public: #if DEBUG_DIS_EXEC if(_module == _main) std::cout << disassemble(code) << '\n'; #endif - return _exec(code, _module, builtins); + return _exec(code, _module); }catch (const Exception& e){ *_stderr << e.summary() << '\n'; @@ -600,7 +600,8 @@ inline Str VM::disassemble(CodeObject_ co){ case OP_LOAD_CONST: argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; break; - case OP_LOAD_NAME: case OP_STORE_LOCAL: case OP_STORE_GLOBAL: + case OP_LOAD_NAME: case OP_LOAD_GLOBAL: + case OP_STORE_LOCAL: case OP_STORE_GLOBAL: case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: @@ -764,7 +765,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo locals->set(key, kwargs[i+1]); } PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; - auto _frame = _new_frame(fn.decl->code, _module, builtins, locals, fn._closure); + auto _frame = _new_frame(fn.decl->code, _module, locals, fn._closure); if(fn.decl->code->is_generator) return PyIter(Generator(this, std::move(_frame))); callstack.push(std::move(_frame)); if(opCall) return _py_op_call; From ef3e172fbd888c4cff4426299023154893ad7d6b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 8 Apr 2023 15:48:47 +0800 Subject: [PATCH 66/73] up --- benchmarks/fib.py | 4 +++- run_profile.sh | 5 +++++ src/common.h | 2 -- src/frame.h | 13 +++++++------ src/gc.h | 12 +++++------- src/memory.h | 14 ++++++++++---- src/vm.h | 8 ++------ 7 files changed, 32 insertions(+), 26 deletions(-) create mode 100644 run_profile.sh diff --git a/benchmarks/fib.py b/benchmarks/fib.py index 377a1a87..1bb89670 100644 --- a/benchmarks/fib.py +++ b/benchmarks/fib.py @@ -3,4 +3,6 @@ def fib(n): return n return fib(n-1) + fib(n-2) -assert fib(32) == 2178309 \ No newline at end of file +assert fib(32) == 2178309 + +# 7049155 calls \ No newline at end of file diff --git a/run_profile.sh b/run_profile.sh new file mode 100644 index 00000000..d345e39c --- /dev/null +++ b/run_profile.sh @@ -0,0 +1,5 @@ +# THIS SCRIPT IS NOT WORKING +clang++ -pg -O2 -std=c++17 -fno-rtti -stdlib=libc++ -Wall -o pocketpy src/main.cpp +time ./pocketpy benchmarks/fib.py +gprof pocketpy gmon.out > gprof.txt +rm gmon.out \ No newline at end of file diff --git a/src/common.h b/src/common.h index 8717cba5..5a41d237 100644 --- a/src/common.h +++ b/src/common.h @@ -41,8 +41,6 @@ #define DEBUG_NO_AUTO_GC 0 #define DEBUG_GC_STATS 0 -#define DEBUG_FRAME_USE_POOL 0 - #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 #else diff --git a/src/frame.h b/src/frame.h index 70d11921..ec253a36 100644 --- a/src/frame.h +++ b/src/frame.h @@ -163,11 +163,12 @@ struct Frame { }; -#if DEBUG_FRAME_USE_POOL -inline void frame_deleter (Frame* p) { pool256.dealloc(p); } -using Frame_ = std::unique_ptr; -#else -using Frame_ = std::unique_ptr; -#endif +struct FrameDeleter{ + void operator()(Frame* frame) const { + frame->~Frame(); + pool128.dealloc(frame); + } +}; +using Frame_ = std::unique_ptr; }; // namespace pkpy \ No newline at end of file diff --git a/src/gc.h b/src/gc.h index 4cb16eab..74739fa2 100644 --- a/src/gc.h +++ b/src/gc.h @@ -11,11 +11,9 @@ struct ManagedHeap{ std::vector _no_gc; std::vector gen; VM* vm; - MemoryPool<> pool; - ManagedHeap(VM* vm): vm(vm) {} - static const int kMinGCThreshold = 4096; + static const int kMinGCThreshold = 3072; int gc_threshold = kMinGCThreshold; int gc_counter = 0; @@ -39,7 +37,7 @@ struct ManagedHeap{ template PyObject* gcnew(Type type, T&& val){ using __T = Py_>; - PyObject* obj = new(pool.alloc<__T>()) __T(type, std::forward(val)); + PyObject* obj = new(pool128.alloc<__T>()) __T(type, std::forward(val)); gen.push_back(obj); gc_counter++; return obj; @@ -48,7 +46,7 @@ struct ManagedHeap{ template PyObject* _new(Type type, T&& val){ using __T = Py_>; - PyObject* obj = new(pool.alloc<__T>()) __T(type, std::forward(val)); + PyObject* obj = new(pool128.alloc<__T>()) __T(type, std::forward(val)); obj->gc.enabled = false; _no_gc.push_back(obj); return obj; @@ -59,7 +57,7 @@ struct ManagedHeap{ #endif ~ManagedHeap(){ - for(PyObject* obj: _no_gc) obj->~PyObject(), pool.dealloc(obj); + for(PyObject* obj: _no_gc) obj->~PyObject(), pool128.dealloc(obj); #if DEBUG_GC_STATS for(auto& [type, count]: deleted){ std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl; @@ -77,7 +75,7 @@ struct ManagedHeap{ #if DEBUG_GC_STATS deleted[obj->type] += 1; #endif - obj->~PyObject(), pool.dealloc(obj); + obj->~PyObject(), pool128.dealloc(obj); } } diff --git a/src/memory.h b/src/memory.h index e5d8255f..6b1f8278 100644 --- a/src/memory.h +++ b/src/memory.h @@ -210,6 +210,13 @@ struct MemoryPool{ bool empty() const { return _free_list_size == 0; } bool full() const { return _free_list_size == __MaxBlocks; } + void tidy(){ +#if DEBUG_MEMORY_POOL + if(!full()) throw std::runtime_error("Arena::tidy() called on non-full arena"); +#endif + std::sort(_free_list, _free_list+__MaxBlocks); + } + Block* alloc(){ #if DEBUG_MEMORY_POOL if(empty()) throw std::runtime_error("Arena::alloc() called on empty arena"); @@ -244,6 +251,7 @@ struct MemoryPool{ } if(_arenas.empty()){ + // std::cout << _arenas.size() << ',' << _empty_arenas.size() << ',' << _full_arenas.size() << std::endl; if(_full_arenas.empty()){ _arenas.push_back(new Arena()); }else{ @@ -277,6 +285,7 @@ struct MemoryPool{ if(arena->full() && _arenas.size()>2){ _arenas.erase(arena); if(_full_arenas.size() < FULL_ARENA_SIZE){ + // arena->tidy(); _full_arenas.push_back(arena); }else{ delete arena; @@ -287,9 +296,6 @@ struct MemoryPool{ } ~MemoryPool(){ - // std::cout << _arenas.size() << std::endl; - // std::cout << _empty_arenas.size() << std::endl; - // std::cout << _full_arenas.size() << std::endl; _arenas.apply([](Arena* arena){ delete arena; }); _empty_arenas.apply([](Arena* arena){ delete arena; }); _full_arenas.apply([](Arena* arena){ delete arena; }); @@ -298,6 +304,6 @@ struct MemoryPool{ inline MemoryPool<64> pool64; inline MemoryPool<128> pool128; -inline MemoryPool<256> pool256; +// inline MemoryPool<256> pool256; }; // namespace pkpy diff --git a/src/vm.h b/src/vm.h index f694e3a1..da085687 100644 --- a/src/vm.h +++ b/src/vm.h @@ -189,12 +189,8 @@ public: if(callstack.size() > recursionlimit){ _error("RecursionError", "maximum recursion depth exceeded"); } -#if DEBUG_FRAME_USE_POOL - Frame* frame = new(pool256.alloc(sizeof(Frame))) Frame(std::forward(args)...); - return Frame_(frame, &frame_deleter); -#else - return std::make_unique(std::forward(args)...); -#endif + Frame* frame = new(pool128.alloc()) Frame(std::forward(args)...); + return Frame_(frame); } template From 12e07391ee896e910735fd9423481a5fd092f9f1 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 8 Apr 2023 15:53:53 +0800 Subject: [PATCH 67/73] up Update memory.h --- src/common.h | 1 + src/memory.h | 30 +++++++++++++----------------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/common.h b/src/common.h index 5a41d237..e9ce5ecf 100644 --- a/src/common.h +++ b/src/common.h @@ -38,6 +38,7 @@ #define DEBUG_CEVAL_STEP 0 #define DEBUG_FULL_EXCEPTION 0 #define DEBUG_MEMORY_POOL 0 +#define DEBUG_NO_MEMORY_POOL 0 #define DEBUG_NO_AUTO_GC 0 #define DEBUG_GC_STATS 0 diff --git a/src/memory.h b/src/memory.h index 6b1f8278..8bbe953b 100644 --- a/src/memory.h +++ b/src/memory.h @@ -199,8 +199,9 @@ struct MemoryPool{ Block _blocks[__MaxBlocks]; Block* _free_list[__MaxBlocks]; int _free_list_size; + bool dirty; - Arena(): _free_list_size(__MaxBlocks) { + Arena(): _free_list_size(__MaxBlocks), dirty(false){ for(int i=0; i<__MaxBlocks; i++){ _blocks[i].arena = this; _free_list[i] = &_blocks[i]; @@ -222,6 +223,7 @@ struct MemoryPool{ if(empty()) throw std::runtime_error("Arena::alloc() called on empty arena"); #endif _free_list_size--; + if(_free_list_size == 0) dirty = true; return _free_list[_free_list_size]; } @@ -236,14 +238,14 @@ struct MemoryPool{ DoubleLinkedList _arenas; DoubleLinkedList _empty_arenas; - DoubleLinkedList _full_arenas; - - static constexpr int FULL_ARENA_SIZE = 4; template void* alloc() { return alloc(sizeof(__T)); } void* alloc(size_t size){ +#if DEBUG_NO_MEMORY_POOL + return malloc(size); +#endif if(size > __BlockSize){ void* p = malloc(sizeof(void*) + size); memset(p, 0, sizeof(void*)); @@ -252,11 +254,7 @@ struct MemoryPool{ if(_arenas.empty()){ // std::cout << _arenas.size() << ',' << _empty_arenas.size() << ',' << _full_arenas.size() << std::endl; - if(_full_arenas.empty()){ - _arenas.push_back(new Arena()); - }else{ - _arenas.move_all_back(_full_arenas); - } + _arenas.push_back(new Arena()); } Arena* arena = _arenas.back(); void* p = arena->alloc()->data; @@ -268,6 +266,10 @@ struct MemoryPool{ } void dealloc(void* p){ +#if DEBUG_NO_MEMORY_POOL + free(p); + return; +#endif #if DEBUG_MEMORY_POOL if(p == nullptr) throw std::runtime_error("MemoryPool::dealloc() called on nullptr"); #endif @@ -282,14 +284,9 @@ struct MemoryPool{ arena->dealloc(block); }else{ arena->dealloc(block); - if(arena->full() && _arenas.size()>2){ + if(arena->full() && arena->dirty){ _arenas.erase(arena); - if(_full_arenas.size() < FULL_ARENA_SIZE){ - // arena->tidy(); - _full_arenas.push_back(arena); - }else{ - delete arena; - } + delete arena; } } } @@ -298,7 +295,6 @@ struct MemoryPool{ ~MemoryPool(){ _arenas.apply([](Arena* arena){ delete arena; }); _empty_arenas.apply([](Arena* arena){ delete arena; }); - _full_arenas.apply([](Arena* arena){ delete arena; }); } }; From d4775a70167e9ff7e445d1720bb19192f2c4aa5f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 8 Apr 2023 16:02:40 +0800 Subject: [PATCH 68/73] Update memory.h Update frame.h Update frame.h --- src/frame.h | 4 ++-- src/memory.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/frame.h b/src/frame.h index ec253a36..d63d9530 100644 --- a/src/frame.h +++ b/src/frame.h @@ -6,7 +6,7 @@ namespace pkpy{ -static THREAD_LOCAL uint64_t kFrameGlobalId = 0; +static THREAD_LOCAL i64 kFrameGlobalId = 0; using ValueStack = pod_vector; @@ -19,7 +19,7 @@ struct Frame { PyObject* _module; NameDict_ _locals; NameDict_ _closure; - const uint64_t id; + const i64 id; std::vector> s_try_block; NameDict& f_locals() noexcept { return _locals!=nullptr ? *_locals : _module->attr(); } diff --git a/src/memory.h b/src/memory.h index 8bbe953b..5c10a8ad 100644 --- a/src/memory.h +++ b/src/memory.h @@ -223,7 +223,6 @@ struct MemoryPool{ if(empty()) throw std::runtime_error("Arena::alloc() called on empty arena"); #endif _free_list_size--; - if(_free_list_size == 0) dirty = true; return _free_list[_free_list_size]; } @@ -260,6 +259,7 @@ struct MemoryPool{ void* p = arena->alloc()->data; if(arena->empty()){ _arenas.pop_back(); + arena->dirty = true; _empty_arenas.push_back(arena); } return p; From 5e13149a4dd32aabbb3b50c526a73bbf6ca9db3c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 8 Apr 2023 16:22:41 +0800 Subject: [PATCH 69/73] up --- run_profile.sh | 1 + src/vm.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/run_profile.sh b/run_profile.sh index d345e39c..652107c4 100644 --- a/run_profile.sh +++ b/run_profile.sh @@ -1,5 +1,6 @@ # THIS SCRIPT IS NOT WORKING clang++ -pg -O2 -std=c++17 -fno-rtti -stdlib=libc++ -Wall -o pocketpy src/main.cpp time ./pocketpy benchmarks/fib.py +mv benchmarks/gmon.out . gprof pocketpy gmon.out > gprof.txt rm gmon.out \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index da085687..7b8439b9 100644 --- a/src/vm.h +++ b/src/vm.h @@ -601,7 +601,7 @@ inline Str VM::disassemble(CodeObject_ co){ case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: - argStr += " (" + co->names[byte.arg].str().escape(true) + ")"; + argStr += " (" + co->names[byte.arg].str() + ")"; break; case OP_BINARY_OP: argStr += " (" + BINARY_SPECIAL_METHODS[byte.arg].str() + ")"; From 2e464491177b11016cc4a085624dc7b145cb77f6 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 8 Apr 2023 22:41:10 +0800 Subject: [PATCH 70/73] up --- src/ceval.h | 2 -- src/lexer.h | 11 +++----- src/new_str.h | 78 ++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 60 insertions(+), 31 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index c04b29f7..d54a7fff 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -277,8 +277,6 @@ __NEXT_STEP:; } DISPATCH(); case OP_DICT_ADD: { PyObject* kv = frame->popx(); - // we do copy here to avoid accidental gc in `kv` - // TODO: optimize to avoid copy Tuple& t = CAST(Tuple& ,kv); fast_call(__setitem__, Args{frame->top_1(), t[0], t[1]}); } DISPATCH(); diff --git a/src/lexer.h b/src/lexer.h index 74c8f96b..cf96cd8e 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -2,6 +2,7 @@ #include "common.h" #include "error.h" +#include "new_str.h" #include "str.h" namespace pkpy{ @@ -170,13 +171,9 @@ struct Lexer { int eat_name() { curr_char--; while(true){ - uint8_t c = peekchar(); - int u8bytes = 0; - if((c & 0b10000000) == 0b00000000) u8bytes = 1; - else if((c & 0b11100000) == 0b11000000) u8bytes = 2; - else if((c & 0b11110000) == 0b11100000) u8bytes = 3; - else if((c & 0b11111000) == 0b11110000) u8bytes = 4; - else return 1; + unsigned char c = peekchar(); + int u8bytes = utf8len(c); + if(u8bytes == 0) return 1; if(u8bytes == 1){ if(isalpha(c) || c=='_' || isdigit(c)) { curr_char++; diff --git a/src/new_str.h b/src/new_str.h index 73d96d1d..031c8433 100644 --- a/src/new_str.h +++ b/src/new_str.h @@ -2,33 +2,52 @@ #include "common.h" #include "memory.h" -#include namespace pkpy{ -struct String{ - char* data; - int size; +inline int utf8len(unsigned char c){ + if((c & 0b10000000) == 0) return 1; + if((c & 0b11100000) == 0b11000000) return 2; + if((c & 0b11110000) == 0b11100000) return 3; + if((c & 0b11111000) == 0b11110000) return 4; + if((c & 0b11111100) == 0b11111000) return 5; + if((c & 0b11111110) == 0b11111100) return 6; + return 0; +} - String(): data((char*)pool64.alloc(0)), size(0) {} - String(int size): data((char*)pool64.alloc(size)), size(size) {} - String(const char* str) { - size = strlen(str); +struct String{ + int size; + bool is_ascii; + char* data; + + String(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {} + + String(int size, bool is_ascii): size(size), is_ascii(is_ascii) { data = (char*)pool64.alloc(size); - memcpy(data, str, size); } - String(const String& other): data((char*)pool64.alloc(other.size)), size(other.size) { + String(const char* str): size(strlen(str)), is_ascii(true) { + data = (char*)pool64.alloc(size); + for(int i=0; i 0){ + j += utf8len(data[j]); + i--; + } + return j; + } + + String u8_getitem(int i) const { + i = _u8_index(i); + return substr(i, utf8len(data[i])); + } + + String u8_slice(int start, int end) const{ + start = _u8_index(start); + end = _u8_index(end); + return substr(start, end - start); + } }; -struct UnicodeString: String{ - -}; - - } // namespace pkpy \ No newline at end of file From 7324f897b51c8dfbbe01ee52723f8e11b42b4854 Mon Sep 17 00:00:00 2001 From: BLUELOVETH Date: Sat, 8 Apr 2023 16:17:58 +0000 Subject: [PATCH 71/73] up --- amalgamate.py | 2 +- src/iter.h | 2 + src/lexer.h | 1 - src/memory.h | 1 - src/new_str.h | 177 -------------------------------- src/pocketpy.h | 52 +++++----- src/str.h | 271 +++++++++++++++++++++++++++++++++++++------------ src/vm.h | 6 +- 8 files changed, 239 insertions(+), 273 deletions(-) delete mode 100644 src/new_str.h diff --git a/amalgamate.py b/amalgamate.py index 99ef93fe..9775e061 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -6,7 +6,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: OPCODES_TEXT = f.read() pipeline = [ - ["common.h", "vector.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], + ["common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], ["obj.h", "codeobject.h", "frame.h"], ["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] diff --git a/src/iter.h b/src/iter.h index e7cf6f04..bd048c05 100644 --- a/src/iter.h +++ b/src/iter.h @@ -49,6 +49,8 @@ public: StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {} PyObject* next() override{ + // TODO: optimize this to use iterator + // operator[] is O(n) complexity Str* str = &OBJ_GET(Str, ref); if(index == str->u8_length()) return nullptr; return VAR(str->u8_getitem(index++)); diff --git a/src/lexer.h b/src/lexer.h index cf96cd8e..6ed245ea 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -2,7 +2,6 @@ #include "common.h" #include "error.h" -#include "new_str.h" #include "str.h" namespace pkpy{ diff --git a/src/memory.h b/src/memory.h index 5c10a8ad..fe84cbbe 100644 --- a/src/memory.h +++ b/src/memory.h @@ -73,7 +73,6 @@ struct LinkedListNode{ LinkedListNode* next; }; - template struct DoubleLinkedList{ static_assert(std::is_base_of_v); diff --git a/src/new_str.h b/src/new_str.h deleted file mode 100644 index 031c8433..00000000 --- a/src/new_str.h +++ /dev/null @@ -1,177 +0,0 @@ -#pragma once - -#include "common.h" -#include "memory.h" - -namespace pkpy{ - -inline int utf8len(unsigned char c){ - if((c & 0b10000000) == 0) return 1; - if((c & 0b11100000) == 0b11000000) return 2; - if((c & 0b11110000) == 0b11100000) return 3; - if((c & 0b11111000) == 0b11110000) return 4; - if((c & 0b11111100) == 0b11111000) return 5; - if((c & 0b11111110) == 0b11111100) return 6; - return 0; -} - -struct String{ - int size; - bool is_ascii; - char* data; - - String(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {} - - String(int size, bool is_ascii): size(size), is_ascii(is_ascii) { - data = (char*)pool64.alloc(size); - } - - String(const char* str): size(strlen(str)), is_ascii(true) { - data = (char*)pool64.alloc(size); - for(int i=0; i(const String& other) const { - int ret = strncmp(data, other.data, std::min(size, other.size)); - if(ret != 0) return ret > 0; - return size > other.size; - } - - bool operator<=(const String& other) const { - int ret = strncmp(data, other.data, std::min(size, other.size)); - if(ret != 0) return ret < 0; - return size <= other.size; - } - - bool operator>=(const String& other) const { - int ret = strncmp(data, other.data, std::min(size, other.size)); - if(ret != 0) return ret > 0; - return size >= other.size; - } - - String substr(int start, int len) const { - String ret(len, is_ascii); - memcpy(ret.data, data + start, len); - return ret; - } - - char* dup_c_str() const { - char* p = (char*)malloc(size + 1); - memcpy(p, data, size); - p[size] = 0; - return p; - } - - std::string_view view() const { - return std::string_view(data, size); - } - - std::string str() const { - return std::string(data, size); - } - - String lstrip() const { - std::string copy = str(); - copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { - // std::isspace(c) does not working on windows (Debug) - return c != ' ' && c != '\t' && c != '\r' && c != '\n'; - })); - return String(copy.c_str()); - } - - /*************unicode*************/ - - int _u8_index(int i) const{ - if(is_ascii) return i; - int j = 0; - while(i > 0){ - j += utf8len(data[j]); - i--; - } - return j; - } - - String u8_getitem(int i) const { - i = _u8_index(i); - return substr(i, utf8len(data[i])); - } - - String u8_slice(int start, int end) const{ - start = _u8_index(start); - end = _u8_index(end); - return substr(start, end - start); - } -}; - -} // namespace pkpy \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h index 4ce389d1..3459c8e3 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -131,8 +131,8 @@ inline void init_builtins(VM* _vm) { _vm->bind_builtin_func<1>("ord", [](VM* vm, Args& args) { const Str& s = CAST(Str&, args[0]); - if (s.size() != 1) vm->TypeError("ord() expected an ASCII character"); - return VAR((i64)(s.c_str()[0])); + if (s.length()!=1) vm->TypeError("ord() expected an ASCII character"); + return VAR((i64)(s[0])); }); _vm->bind_builtin_func<2>("hasattr", [](VM* vm, Args& args) { @@ -237,8 +237,8 @@ inline void init_builtins(VM* _vm) { const Str& s = CAST(Str&, args[0]); try{ size_t parsed = 0; - i64 val = S_TO_INT(s, &parsed, 10); - if(parsed != s.size()) throw std::invalid_argument(""); + i64 val = S_TO_INT(s.str(), &parsed, 10); + if(parsed != s.length()) throw std::invalid_argument(""); return VAR(val); }catch(std::invalid_argument&){ vm->ValueError("invalid literal for int(): " + s.escape(true)); @@ -284,7 +284,7 @@ inline void init_builtins(VM* _vm) { if(s == "inf") return VAR(INFINITY); if(s == "-inf") return VAR(-INFINITY); try{ - f64 val = S_TO_FLOAT(s); + f64 val = S_TO_FLOAT(s.str()); return VAR(val); }catch(std::invalid_argument&){ vm->ValueError("invalid literal for float(): '" + s + "'"); @@ -327,7 +327,7 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<1>("str", "__contains__", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); const Str& other = CAST(Str&, args[1]); - return VAR(self.find(other) != Str::npos); + return VAR(self.index(other) != -1); }); _vm->bind_method<0>("str", "__str__", CPP_LAMBDA(args[0])); @@ -361,7 +361,7 @@ inline void init_builtins(VM* _vm) { if(is_type(args[1], vm->tp_slice)){ Slice s = _CAST(Slice, args[1]); s.normalize(self.u8_length()); - return VAR(self.u8_substr(s.start, s.stop)); + return VAR(self.u8_slice(s.start, s.stop)); } int index = CAST(int, args[1]); @@ -382,28 +382,25 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<2>("str", "replace", [](VM* vm, Args& args) { - const Str& _self = CAST(Str&, args[0]); - const Str& _old = CAST(Str&, args[1]); - const Str& _new = CAST(Str&, args[2]); - Str _copy = _self; - size_t pos = 0; - while ((pos = _copy.find(_old, pos)) != std::string::npos) { - _copy.replace(pos, _old.length(), _new); - pos += _new.length(); - } - return VAR(_copy); + const Str& self = CAST(Str&, args[0]); + const Str& old = CAST(Str&, args[1]); + const Str& new_ = CAST(Str&, args[2]); + return VAR(self.replace(old, new_)); }); _vm->bind_method<1>("str", "startswith", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); const Str& prefix = CAST(Str&, args[1]); - return VAR(self.find(prefix) == 0); + return VAR(self.index(prefix) == 0); }); _vm->bind_method<1>("str", "endswith", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); const Str& suffix = CAST(Str&, args[1]); - return VAR(self.rfind(suffix) == self.length() - suffix.length()); + int offset = self.length() - suffix.length(); + if(offset < 0) return vm->False; + bool ok = memcmp(self.data+offset, suffix.data, suffix.length()) == 0; + return VAR(ok); }); _vm->bind_method<1>("str", "join", [](VM* vm, Args& args) { @@ -664,13 +661,15 @@ struct ReMatch { } }; -inline PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){ +inline PyObject* _regex_search(const Str& _pattern, const Str& _string, bool fromStart, VM* vm){ + std::string pattern = _pattern.str(); + std::string string = _string.str(); std::regex re(pattern); std::smatch m; if(std::regex_search(string, m, re)){ if(fromStart && m.position() != 0) return vm->None; - i64 start = string._to_u8_index(m.position()); - i64 end = string._to_u8_index(m.position() + m.length()); + i64 start = _string._u8_index(m.position()); + i64 end = _string._u8_index(m.position() + m.length()); return VAR_T(ReMatch, start, end, m); } return vm->None; @@ -695,14 +694,15 @@ inline void add_module_re(VM* vm){ vm->bind_func<3>(mod, "sub", [](VM* vm, Args& args) { const Str& pattern = CAST(Str&, args[0]); const Str& repl = CAST(Str&, args[1]); - const Str& string = CAST(Str&, args[2]); - std::regex re(pattern); + const Str& _string = CAST(Str&, args[2]); + std::regex re(pattern.str()); + std::string string = _string.str(); return VAR(std::regex_replace(string, re, repl)); }); vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) { - const Str& pattern = CAST(Str&, args[0]); - const Str& string = CAST(Str&, args[1]); + std::string pattern = CAST(Str&, args[0]).str(); + std::string string = CAST(Str&, args[1]).str(); std::regex re(pattern); std::sregex_token_iterator it(string.begin(), string.end(), re, -1); std::sregex_token_iterator end; diff --git a/src/str.h b/src/str.h index 53d71297..af0e1acb 100644 --- a/src/str.h +++ b/src/str.h @@ -1,67 +1,182 @@ #pragma once #include "common.h" +#include "memory.h" namespace pkpy { typedef std::stringstream StrStream; -class Str : public std::string { - mutable std::vector* _u8_index = nullptr; +inline int utf8len(unsigned char c){ + if((c & 0b10000000) == 0) return 1; + if((c & 0b11100000) == 0b11000000) return 2; + if((c & 0b11110000) == 0b11100000) return 3; + if((c & 0b11111000) == 0b11110000) return 4; + if((c & 0b11111100) == 0b11111000) return 5; + if((c & 0b11111110) == 0b11111100) return 6; + return 0; +} - void utf8_lazy_init() const{ - if(_u8_index != nullptr) return; - _u8_index = new std::vector(); - _u8_index->reserve(size()); - if(size() > 65535) throw std::runtime_error("str has more than 65535 bytes."); - for(uint16_t i = 0; i < size(); i++){ - // https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80 - if((at(i) & 0xC0) != 0x80) _u8_index->push_back(i); - } - } -public: +struct Str{ + int size; + bool is_ascii; + char* data; uint16_t _cached_sn_index = 0; - Str() : std::string() {} - Str(const char* s) : std::string(s) {} - Str(const char* s, size_t n) : std::string(s, n) {} - Str(const std::string& s) : std::string(s) {} - Str(const Str& s) : std::string(s) { - if(s._u8_index != nullptr){ - _u8_index = new std::vector(*s._u8_index); + Str(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {} + + Str(int size, bool is_ascii): size(size), is_ascii(is_ascii) { + data = (char*)pool64.alloc(size); + } + +#define STR_INIT() \ + data = (char*)pool64.alloc(size); \ + for(int i=0; ibegin(), _u8_index->end(), index); - if(p != _u8_index->end() && *p != index) UNREACHABLE(); - return p - _u8_index->begin(); + Str(std::string_view s): size(s.size()), is_ascii(true) { + STR_INIT() } - int u8_length() const { - utf8_lazy_init(); - return _u8_index->size(); + Str(const char* s): size(strlen(s)), is_ascii(true) { + STR_INIT() } - Str u8_getitem(int i) const{ - return u8_substr(i, i+1); + Str(const char* s, int len): size(len), is_ascii(true) { + STR_INIT() } - Str u8_substr(int start, int end) const{ - utf8_lazy_init(); - if(start >= end) return Str(); - int c_end = end >= _u8_index->size() ? size() : _u8_index->at(end); - return substr(_u8_index->at(start), c_end - _u8_index->at(start)); +#undef STR_INIT + + Str(const Str& other): size(other.size), is_ascii(other.is_ascii) { + data = (char*)pool64.alloc(size); + memcpy(data, other.data, size); + } + + Str(Str&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) { + other.data = nullptr; + other.size = 0; + } + + Str& operator=(const Str& other){ + if(data!=nullptr) pool64.dealloc(data); + size = other.size; + is_ascii = other.is_ascii; + data = (char*)pool64.alloc(size); + memcpy(data, other.data, size); + return *this; + } + + Str& operator=(Str&& other) noexcept{ + if(data!=nullptr) pool64.dealloc(data); + size = other.size; + is_ascii = other.is_ascii; + data = other.data; + other.data = nullptr; + return *this; + } + + ~Str(){ + if(data!=nullptr) pool64.dealloc(data); + } + + char operator[](int idx) const { + return data[idx]; + } + + int length() const { + return size; + } + + size_t hash() const{ + return std::hash()(sv()); + } + + Str operator+(const Str& other) const { + Str ret(size + other.size, is_ascii && other.is_ascii); + memcpy(ret.data, data, size); + memcpy(ret.data + size, other.data, other.size); + return ret; + } + + Str operator+(const char* p) const { + Str other(p); + return *this + other; + } + + friend Str operator+(const char* p, const Str& str){ + Str other(p); + return other + str; + } + + friend std::ostream& operator<<(std::ostream& os, const Str& str){ + os.write(str.data, str.size); + return os; + } + + bool operator==(const Str& other) const { + if(size != other.size) return false; + return memcmp(data, other.data, size) == 0; + } + + bool operator!=(const Str& other) const { + if(size != other.size) return true; + return memcmp(data, other.data, size) != 0; + } + + bool operator<(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret < 0; + return size < other.size; + } + + bool operator>(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret > 0; + return size > other.size; + } + + bool operator<=(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret < 0; + return size <= other.size; + } + + bool operator>=(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret > 0; + return size >= other.size; + } + + Str substr(int start, int len) const { + Str ret(len, is_ascii); + memcpy(ret.data, data + start, len); + return ret; + } + + char* c_str_dup() const { + char* p = (char*)malloc(size + 1); + memcpy(p, data, size); + p[size] = 0; + return p; + } + + std::string_view sv() const { + return std::string_view(data, size); + } + + std::string str() const { + return std::string(data, size); } Str lstrip() const { - Str copy(*this); + std::string copy = str(); copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { // std::isspace(c) does not working on windows (Debug) return c != ' ' && c != '\t' && c != '\r' && c != '\n'; @@ -69,10 +184,6 @@ public: return Str(copy); } - size_t hash() const { - return std::hash()(*this); - } - Str escape(bool single_quote) const { StrStream ss; ss << (single_quote ? '\'' : '"'); @@ -104,24 +215,60 @@ public: return ss.str(); } - Str& operator=(const Str& s){ - this->std::string::operator=(s); - delete _u8_index; - if(s._u8_index != nullptr){ - _u8_index = new std::vector(*s._u8_index); + int index(const Str& sub) const { + auto p = std::search(data, data + size, sub.data, sub.data + sub.size); + if(p == data + size) return -1; + return p - data; + } + + Str replace(const Str& old, const Str& new_) const { + StrStream ss; + int i = 0; + while(i < size){ + int j = index(old); + if(j == -1){ + ss << substr(i, size - i); + break; + } + ss << substr(i, j - i); + ss << new_; + i = j + old.size; } - return *this; + return ss.str(); } - Str& operator=(Str&& s){ - this->std::string::operator=(std::move(s)); - delete _u8_index; - this->_u8_index = s._u8_index; - s._u8_index = nullptr; - return *this; + /*************unicode*************/ + + int _u8_index(int i) const{ + if(is_ascii) return i; + int j = 0; + while(i > 0){ + j += utf8len(data[j]); + i--; + } + return j; } - ~Str(){ delete _u8_index;} + Str u8_getitem(int i) const{ + i = _u8_index(i); + return substr(i, utf8len(data[i])); + } + + Str u8_slice(int start, int end) const{ + // TODO: optimize this + start = _u8_index(start); + end = _u8_index(end); + return substr(start, end - start); + } + + int u8_length() const { + if(is_ascii) return size; + int ret = 0; + for(int i=0; i> _interned; static std::vector _r_interned; - static StrName get(const Str& s){ - return get(s.c_str()); - } - - static StrName get(const char* s){ + static StrName get(std::string_view s){ auto it = _interned.find(s); if(it != _interned.end()) return StrName(it->second); uint16_t index = (uint16_t)(_r_interned.size() + 1); diff --git a/src/vm.h b/src/vm.h index 7b8439b9..5d2ce1c4 100644 --- a/src/vm.h +++ b/src/vm.h @@ -561,8 +561,8 @@ inline PyObject* VM::new_module(StrName name) { inline Str VM::disassemble(CodeObject_ co){ auto pad = [](const Str& s, const int n){ - if(s.size() >= n) return s.substr(0, n); - return s + std::string(n - s.size(), ' '); + if(s.length() >= n) return s.substr(0, n); + return s + std::string(n - s.length(), ' '); }; std::vector jumpTargets; @@ -591,7 +591,7 @@ inline Str VM::disassemble(CodeObject_ co){ ss << pad(line, 8) << pointer << pad(std::to_string(i), 3); ss << " " << pad(OP_NAMES[byte.op], 20) << " "; // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); - Str argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); + std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); switch(byte.op){ case OP_LOAD_CONST: argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; From c07ae35b8e40509a4f75487511e5963fa9f6fc94 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 9 Apr 2023 16:11:00 +0800 Subject: [PATCH 72/73] reimpl `Str` --- src/ceval.h | 16 +++--- src/cffi.h | 20 +++---- src/compiler.h | 14 ++--- src/error.h | 23 ++++---- src/expr.h | 76 +++++++++++++-------------- src/frame.h | 4 +- src/gc.h | 8 +-- src/lexer.h | 13 ++--- src/namedict.h | 4 +- src/pocketpy.h | 73 +++++++++++++------------- src/str.h | 140 ++++++++++++++++++++++++++++++++----------------- src/vm.h | 90 ++++++++++++++----------------- 12 files changed, 255 insertions(+), 226 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index d54a7fff..edad3433 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -33,7 +33,7 @@ __NEXT_STEP:; case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); DISPATCH(); case OP_PRINT_EXPR: { PyObject* obj = frame->top(); // use top() to avoid accidental gc - if(obj != None) *_stdout << CAST(Str, asRepr(obj)) << '\n'; + if(obj != None) *_stdout << CAST(Str&, asRepr(obj)) << '\n'; frame->pop(); } DISPATCH(); /*****************************************/ @@ -168,7 +168,7 @@ __NEXT_STEP:; frame->push(VAR(std::move(items))); } DISPATCH(); case OP_BUILD_STRING: { - StrStream ss; // asStr() may run extra bytecode + std::stringstream ss; // asStr() may run extra bytecode for(int i=byte.arg-1; i>=0; i--) ss << CAST(Str&, asStr(frame->top_n(i))); frame->pop_n(byte.arg); frame->push(VAR(ss.str())); @@ -232,7 +232,7 @@ __NEXT_STEP:; case OP_GOTO: { StrName label = frame->co->names[byte.arg]; auto it = frame->co->labels.find(label); - if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); + if(it == frame->co->labels.end()) _error("KeyError", fmt("label ", label.escape(), " not found")); frame->jump_abs_break(it->second); } DISPATCH(); /*****************************************/ @@ -317,13 +317,13 @@ __NEXT_STEP:; auto it = _lazy_modules.find(name); if(it == _lazy_modules.end()){ bool ok = false; - source = _read_file_cwd(name.str() + ".py", &ok); - if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found"); + source = _read_file_cwd(fmt(name, ".py"), &ok); + if(!ok) _error("ImportError", fmt("module ", name.escape(), " not found")); }else{ source = it->second; _lazy_modules.erase(it); } - CodeObject_ code = compile(source, name.str(), EXEC_MODE); + CodeObject_ code = compile(source, name.sv(), EXEC_MODE); PyObject* new_mod = new_module(name); _exec(code, new_mod); new_mod->attr()._try_perfect_rehash(); @@ -335,7 +335,7 @@ __NEXT_STEP:; case OP_IMPORT_STAR: { PyObject* obj = frame->popx(); for(auto& [name, value]: obj->attr().items()){ - Str s = name.str(); + std::string_view s = name.sv(); if(s.empty() || s[0] == '_') continue; frame->f_globals().set(name, value); } @@ -416,7 +416,7 @@ __NEXT_STEP:; _error(type, msg); } DISPATCH(); case OP_RE_RAISE: _raise(); DISPATCH(); - default: throw std::runtime_error(OP_NAMES[byte.op] + std::string(" is not implemented")); + default: throw std::runtime_error(fmt(OP_NAMES[byte.op], " is not implemented")); } UNREACHABLE(); } diff --git a/src/cffi.h b/src/cffi.h index 117d1248..7e532117 100644 --- a/src/cffi.h +++ b/src/cffi.h @@ -131,14 +131,14 @@ struct TypeDB{ return index == 0 ? nullptr : &_by_index[index-1]; } - const TypeInfo* get(const char name[]) const { + const TypeInfo* get(std::string_view name) const { auto it = _by_name.find(name); if(it == _by_name.end()) return nullptr; return get(it->second); } const TypeInfo* get(const Str& s) const { - return get(s.c_str()); + return get(s.sv()); } template @@ -203,7 +203,7 @@ struct Pointer{ vm->bind_method<0>(type, "__repr__", [](VM* vm, Args& args) { Pointer& self = CAST(Pointer&, args[0]); - StrStream ss; + std::stringstream ss; ss << "<" << self.ctype->name; for(int i=0; i"; @@ -319,7 +319,7 @@ struct Pointer{ Pointer _to(VM* vm, StrName name){ auto it = ctype->members.find(name); if(it == ctype->members.end()){ - vm->AttributeError(Str("struct '") + ctype->name + "' has no member " + name.str().escape(true)); + vm->AttributeError(fmt("struct '", ctype->name, "' has no member ", name.escape())); } const MemberInfo& info = it->second; return {info.type, level, ptr+info.offset}; @@ -390,7 +390,7 @@ struct CType{ vm->bind_static_method<1>(type, "__new__", [](VM* vm, Args& args) { const Str& name = CAST(Str&, args[0]); const TypeInfo* type = _type_db.get(name); - if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true)); + if(type == nullptr) vm->TypeError("unknown type: " + name.escape()); return VAR_T(CType, type); }); @@ -432,22 +432,22 @@ inline void add_module_c(VM* vm){ Pointer& self = CAST(Pointer&, args[0]); const Str& name = CAST(Str&, args[1]); int level = 0; - for(int i=name.size()-1; i>=0; i--){ + for(int i=name.length()-1; i>=0; i--){ if(name[i] == '*') level++; else break; } if(level == 0) vm->TypeError("expect a pointer type, such as 'int*'"); - Str type_s = name.substr(0, name.size()-level); + Str type_s = name.substr(0, name.length()-level); const TypeInfo* type = _type_db.get(type_s); - if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape(true)); + if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape()); return VAR_T(Pointer, type, level, self.ptr); }); vm->bind_func<1>(mod, "sizeof", [](VM* vm, Args& args) { const Str& name = CAST(Str&, args[0]); - if(name.find('*') != Str::npos) return VAR(sizeof(void*)); + if(name.index("*") != -1) return VAR(sizeof(void*)); const TypeInfo* type = _type_db.get(name); - if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true)); + if(type == nullptr) vm->TypeError("unknown type: " + name.escape()); return VAR(type->size); }); diff --git a/src/compiler.h b/src/compiler.h index 6175d1c0..77d33c3f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -112,9 +112,9 @@ class Compiler { void consume(TokenIndex expected) { if (!match(expected)){ - StrStream ss; - ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'"; - SyntaxError(ss.str()); + SyntaxError( + fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'") + ); } } @@ -190,7 +190,7 @@ class Compiler { _compile_f_args(e->decl, false); consume(TK(":")); } - e->decl->code = push_context(lexer->src, e->decl->name.str()); + e->decl->code = push_context(lexer->src, e->decl->name.sv()); EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37 ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); @@ -775,7 +775,7 @@ __SUBSCR_END: if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); bool ok = ctx()->add_label(prev().str()); - if(!ok) SyntaxError("label " + prev().str().escape(true) + " already exists"); + if(!ok) SyntaxError("label " + prev().str().escape() + " already exists"); consume_end_stmt(); } break; case TK("goto"): @@ -877,7 +877,7 @@ __SUBSCR_END: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - decl->code = push_context(lexer->src, decl->name.str()); + decl->code = push_context(lexer->src, decl->name.sv()); compile_block_body(); pop_context(); ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line); @@ -928,7 +928,7 @@ __SUBSCR_END: void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); } public: - Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ + Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode){ this->vm = vm; this->used = false; this->lexer = std::make_unique( diff --git a/src/error.h b/src/error.h index 6a82967e..bd7182cd 100644 --- a/src/error.h +++ b/src/error.h @@ -38,14 +38,15 @@ struct SourceData { return {_start, i}; } - SourceData(const char* source, Str filename, CompileMode mode) { + SourceData(const Str& source, const Str& filename, CompileMode mode) { + int index = 0; // Skip utf8 BOM if there is any. - if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; + if (strncmp(source.begin(), "\xEF\xBB\xBF", 3) == 0) index += 3; // Remove all '\r' - StrStream ss; - while(*source != '\0'){ - if(*source != '\r') ss << *source; - source++; + std::stringstream ss; + while(index < source.length()){ + if(source[index] != '\r') ss << source[index]; + index++; } this->filename = filename; @@ -55,14 +56,14 @@ struct SourceData { } Str snapshot(int lineno, const char* cursor=nullptr){ - StrStream ss; + std::stringstream ss; ss << " " << "File \"" << filename << "\", line " << lineno << '\n'; std::pair pair = get_line(lineno); Str line = ""; int removed_spaces = 0; if(pair.first && pair.second){ line = Str(pair.first, pair.second-pair.first).lstrip(); - removed_spaces = pair.second - pair.first - line.size(); + removed_spaces = pair.second - pair.first - line.length(); if(line.empty()) line = ""; } ss << " " << line; @@ -91,11 +92,11 @@ public: Str summary() const { StackTrace st(stacktrace); - StrStream ss; + std::stringstream ss; if(is_re) ss << "Traceback (most recent call last):\n"; while(!st.empty()) { ss << st.top() << '\n'; st.pop(); } - if (!msg.empty()) ss << type.str() << ": " << msg; - else ss << type.str(); + if (!msg.empty()) ss << type.sv() << ": " << msg; + else ss << type.sv(); return ss.str(); } }; diff --git a/src/expr.h b/src/expr.h index ece07980..6a337824 100644 --- a/src/expr.h +++ b/src/expr.h @@ -15,7 +15,7 @@ struct Expr{ int line = 0; virtual ~Expr() = default; virtual void emit(CodeEmitContext* ctx) = 0; - virtual Str str() const = 0; + virtual std::string str() const = 0; virtual bool is_starred() const { return false; } virtual bool is_literal() const { return false; } @@ -23,7 +23,7 @@ struct Expr{ virtual bool is_attrib() const { return false; } // for OP_DELETE_XXX - virtual bool emit_del(CodeEmitContext* ctx) { return false; } + [[nodiscard]] virtual bool emit_del(CodeEmitContext* ctx) { return false; } // for OP_STORE_XXX [[nodiscard]] virtual bool emit_store(CodeEmitContext* ctx) { return false; } @@ -64,8 +64,8 @@ struct CodeEmitContext{ expr->emit(this); } - Str _log_s_expr(){ - StrStream ss; + std::string _log_s_expr(){ + std::stringstream ss; for(auto& e: s_expr.data()) ss << e->str() << " "; return ss.str(); } @@ -118,7 +118,7 @@ struct NameExpr: Expr{ NameScope scope; NameExpr(StrName name, NameScope scope): name(name), scope(scope) {} - Str str() const override { return "$" + name.str(); } + std::string str() const override { return fmt("Name(", name.escape(), ")"); } void emit(CodeEmitContext* ctx) override { int index = ctx->add_name(name); @@ -161,7 +161,7 @@ struct NameExpr: Expr{ struct StarredExpr: Expr{ Expr_ child; StarredExpr(Expr_&& child): child(std::move(child)) {} - Str str() const override { return "*"; } + std::string str() const override { return "Starred()"; } bool is_starred() const override { return true; } @@ -180,7 +180,7 @@ struct StarredExpr: Expr{ struct NotExpr: Expr{ Expr_ child; NotExpr(Expr_&& child): child(std::move(child)) {} - Str str() const override { return "not"; } + std::string str() const override { return "Not()"; } void emit(CodeEmitContext* ctx) override { child->emit(ctx); @@ -192,7 +192,7 @@ struct NotExpr: Expr{ struct AndExpr: Expr{ Expr_ lhs; Expr_ rhs; - Str str() const override { return "and"; } + std::string str() const override { return "And()"; } void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); @@ -206,7 +206,7 @@ struct AndExpr: Expr{ struct OrExpr: Expr{ Expr_ lhs; Expr_ rhs; - Str str() const override { return "or"; } + std::string str() const override { return "Or()"; } void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); @@ -220,7 +220,7 @@ struct OrExpr: Expr{ struct Literal0Expr: Expr{ TokenIndex token; Literal0Expr(TokenIndex token): token(token) {} - Str str() const override { return TK_STR(token); } + std::string str() const override { return TK_STR(token); } void emit(CodeEmitContext* ctx) override { switch (token) { @@ -239,7 +239,7 @@ struct Literal0Expr: Expr{ struct LiteralExpr: Expr{ TokenValue value; LiteralExpr(TokenValue value): value(value) {} - Str str() const override { + std::string str() const override { if(std::holds_alternative(value)){ return std::to_string(std::get(value)); } @@ -249,7 +249,8 @@ struct LiteralExpr: Expr{ } if(std::holds_alternative(value)){ - return std::get(value).escape(true); + Str s = std::get(value).escape(); + return s.str(); } UNREACHABLE(); @@ -285,7 +286,7 @@ struct LiteralExpr: Expr{ struct NegatedExpr: Expr{ Expr_ child; NegatedExpr(Expr_&& child): child(std::move(child)) {} - Str str() const override { return "-"; } + std::string str() const override { return "Negated()"; } void emit(CodeEmitContext* ctx) override { VM* vm = ctx->vm; @@ -318,7 +319,7 @@ struct SliceExpr: Expr{ Expr_ start; Expr_ stop; Expr_ step; - Str str() const override { return "slice()"; } + std::string str() const override { return "Slice()"; } void emit(CodeEmitContext* ctx) override { if(start){ @@ -346,7 +347,7 @@ struct SliceExpr: Expr{ struct DictItemExpr: Expr{ Expr_ key; Expr_ value; - Str str() const override { return "k:v"; } + std::string str() const override { return "DictItem()"; } void emit(CodeEmitContext* ctx) override { value->emit(ctx); @@ -368,7 +369,7 @@ struct SequenceExpr: Expr{ struct ListExpr: SequenceExpr{ using SequenceExpr::SequenceExpr; - Str str() const override { return "list()"; } + std::string str() const override { return "List()"; } Opcode opcode() const override { return OP_BUILD_LIST; } bool is_json_object() const override { return true; } @@ -376,7 +377,7 @@ struct ListExpr: SequenceExpr{ struct DictExpr: SequenceExpr{ using SequenceExpr::SequenceExpr; - Str str() const override { return "dict()"; } + std::string str() const override { return "Dict()"; } Opcode opcode() const override { return OP_BUILD_DICT; } bool is_json_object() const override { return true; } @@ -384,13 +385,13 @@ struct DictExpr: SequenceExpr{ struct SetExpr: SequenceExpr{ using SequenceExpr::SequenceExpr; - Str str() const override { return "set()"; } + std::string str() const override { return "Set()"; } Opcode opcode() const override { return OP_BUILD_SET; } }; struct TupleExpr: SequenceExpr{ using SequenceExpr::SequenceExpr; - Str str() const override { return "tuple()"; } + std::string str() const override { return "Tuple()"; } Opcode opcode() const override { return OP_BUILD_TUPLE; } bool emit_store(CodeEmitContext* ctx) override { @@ -467,25 +468,25 @@ struct CompExpr: Expr{ struct ListCompExpr: CompExpr{ Opcode op0() override { return OP_BUILD_LIST; } Opcode op1() override { return OP_LIST_APPEND; } - Str str() const override { return "listcomp()"; } + std::string str() const override { return "ListComp()"; } }; struct DictCompExpr: CompExpr{ Opcode op0() override { return OP_BUILD_DICT; } Opcode op1() override { return OP_DICT_ADD; } - Str str() const override { return "dictcomp()"; } + std::string str() const override { return "DictComp()"; } }; struct SetCompExpr: CompExpr{ Opcode op0() override { return OP_BUILD_SET; } Opcode op1() override { return OP_SET_ADD; } - Str str() const override { return "setcomp()"; } + std::string str() const override { return "SetComp()"; } }; struct LambdaExpr: Expr{ FuncDecl_ decl; NameScope scope; - Str str() const override { return ""; } + std::string str() const override { return "Lambda()"; } LambdaExpr(NameScope scope){ this->decl = make_sp(); @@ -502,21 +503,21 @@ struct LambdaExpr: Expr{ struct FStringExpr: Expr{ Str src; FStringExpr(const Str& src): src(src) {} - Str str() const override { - return "f" + src.escape(true); + std::string str() const override { + return fmt("f", src.escape()); } void emit(CodeEmitContext* ctx) override { VM* vm = ctx->vm; static const std::regex pattern(R"(\{(.*?)\})"); - std::sregex_iterator begin(src.begin(), src.end(), pattern); - std::sregex_iterator end; + std::cregex_iterator begin(src.begin(), src.end(), pattern); + std::cregex_iterator end; int size = 0; int i = 0; for(auto it = begin; it != end; it++) { - std::smatch m = *it; + std::cmatch m = *it; if (i < m.position()) { - std::string literal = src.substr(i, m.position() - i); + Str literal = src.substr(i, m.position() - i); ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); size++; } @@ -527,8 +528,8 @@ struct FStringExpr: Expr{ size++; i = (int)(m.position() + m.length()); } - if (i < src.size()) { - std::string literal = src.substr(i, src.size() - i); + if (i < src.length()) { + Str literal = src.substr(i, src.length() - i); ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); size++; } @@ -539,7 +540,7 @@ struct FStringExpr: Expr{ struct SubscrExpr: Expr{ Expr_ a; Expr_ b; - Str str() const override { return "a[b]"; } + std::string str() const override { return "Subscr()"; } void emit(CodeEmitContext* ctx) override{ a->emit(ctx); @@ -567,7 +568,7 @@ struct AttribExpr: Expr{ Str b; AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {} AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} - Str str() const override { return "a.b"; } + std::string str() const override { return "Attrib()"; } void emit(CodeEmitContext* ctx) override{ a->emit(ctx); @@ -603,7 +604,7 @@ struct CallExpr: Expr{ Expr_ callable; std::vector args; std::vector> kwargs; - Str str() const override { return "call(...)"; } + std::string str() const override { return "Call()"; } bool need_unpack() const { for(auto& item: args) if(item->is_starred()) return true; @@ -643,7 +644,7 @@ struct BinaryExpr: Expr{ TokenIndex op; Expr_ lhs; Expr_ rhs; - Str str() const override { return TK_STR(op); } + std::string str() const override { return TK_STR(op); } void emit(CodeEmitContext* ctx) override { lhs->emit(ctx); @@ -683,10 +684,7 @@ struct TernaryExpr: Expr{ Expr_ cond; Expr_ true_expr; Expr_ false_expr; - - Str str() const override { - return "cond ? t : f"; - } + std::string str() const override { return "Ternary()"; } void emit(CodeEmitContext* ctx) override { cond->emit(ctx); diff --git a/src/frame.h b/src/frame.h index d63d9530..752e7cac 100644 --- a/src/frame.h +++ b/src/frame.h @@ -43,8 +43,8 @@ struct Frame { return co->src->snapshot(line); } - Str stack_info(){ - StrStream ss; + std::string stack_info(){ + std::stringstream ss; ss << id << " ["; for(int i=0; i<_data.size(); i++){ ss << (i64)_data[i]; diff --git a/src/gc.h b/src/gc.h index 74739fa2..752a18cb 100644 --- a/src/gc.h +++ b/src/gc.h @@ -37,7 +37,7 @@ struct ManagedHeap{ template PyObject* gcnew(Type type, T&& val){ using __T = Py_>; - PyObject* obj = new(pool128.alloc<__T>()) __T(type, std::forward(val)); + PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward(val)); gen.push_back(obj); gc_counter++; return obj; @@ -46,7 +46,7 @@ struct ManagedHeap{ template PyObject* _new(Type type, T&& val){ using __T = Py_>; - PyObject* obj = new(pool128.alloc<__T>()) __T(type, std::forward(val)); + PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward(val)); obj->gc.enabled = false; _no_gc.push_back(obj); return obj; @@ -57,7 +57,7 @@ struct ManagedHeap{ #endif ~ManagedHeap(){ - for(PyObject* obj: _no_gc) obj->~PyObject(), pool128.dealloc(obj); + for(PyObject* obj: _no_gc) obj->~PyObject(), pool64.dealloc(obj); #if DEBUG_GC_STATS for(auto& [type, count]: deleted){ std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl; @@ -75,7 +75,7 @@ struct ManagedHeap{ #if DEBUG_GC_STATS deleted[obj->type] += 1; #endif - obj->~PyObject(), pool128.dealloc(obj); + obj->~PyObject(), pool64.dealloc(obj); } } diff --git a/src/lexer.h b/src/lexer.h index 6ed245ea..88698729 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -57,12 +57,13 @@ struct Token{ TokenValue value; Str str() const { return Str(start, length);} + std::string_view sv() const { return std::string_view(start, length);} - Str info() const { - StrStream ss; - Str raw = str(); - if (raw == Str("\n")) raw = "\\n"; - ss << line << ": " << TK_STR(type) << " '" << raw << "'"; + std::string info() const { + std::stringstream ss; + ss << line << ": " << TK_STR(type) << " '" << ( + sv()=="\n" ? "\\n" : sv() + ) << "'"; return ss.str(); } }; @@ -171,7 +172,7 @@ struct Lexer { curr_char--; while(true){ unsigned char c = peekchar(); - int u8bytes = utf8len(c); + int u8bytes = utf8len(c, true); if(u8bytes == 0) return 1; if(u8bytes == 1){ if(isalpha(c) || c=='_' || isdigit(c)) { diff --git a/src/namedict.h b/src/namedict.h index ba40eae6..5ea056cf 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -87,7 +87,7 @@ while(!_items[i].first.empty()) { \ PyObject* operator[](StrName key) const { bool ok; uint16_t i; HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); + if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key)); return _items[i].second; } @@ -159,7 +159,7 @@ while(!_items[i].first.empty()) { \ void erase(StrName key){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); + if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key)); _items[i].first = StrName(); _items[i].second = nullptr; _size--; diff --git a/src/pocketpy.h b/src/pocketpy.h index 3459c8e3..d1169135 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -12,7 +12,7 @@ namespace pkpy { inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { - Compiler compiler(this, source.c_str(), filename, mode); + Compiler compiler(this, source, filename, mode); try{ return compiler.compile(); }catch(Exception& e){ @@ -71,7 +71,7 @@ inline void init_builtins(VM* _vm) { if(!vm->isinstance(args[1], type)){ Str _0 = obj_type_name(vm, OBJ_GET(Type, vm->_t(args[1]))); Str _1 = obj_type_name(vm, type); - vm->TypeError("super(): " + _0.escape(true) + " is not an instance of " + _1.escape(true)); + vm->TypeError("super(): " + _0.escape() + " is not an instance of " + _1.escape()); } Type base = vm->_all_types[type].base; return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); @@ -150,7 +150,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("hex", [](VM* vm, Args& args) { - StrStream ss; + std::stringstream ss; ss << std::hex << CAST(i64, args[0]); return VAR("0x" + ss.str()); }); @@ -169,14 +169,14 @@ inline void init_builtins(VM* _vm) { std::vector keys = t_attr.keys(); names.insert(keys.begin(), keys.end()); List ret; - for (StrName name : names) ret.push_back(VAR(name.str())); + for (StrName name : names) ret.push_back(VAR(name.sv())); return VAR(std::move(ret)); }); _vm->bind_method<0>("object", "__repr__", [](VM* vm, Args& args) { PyObject* self = args[0]; if(is_tagged(self)) self = nullptr; - StrStream ss; + std::stringstream ss; ss << "<" << OBJ_NAME(vm->_t(self)) << " object at " << std::hex << self << ">"; return VAR(ss.str()); }); @@ -241,7 +241,7 @@ inline void init_builtins(VM* _vm) { if(parsed != s.length()) throw std::invalid_argument(""); return VAR(val); }catch(std::invalid_argument&){ - vm->ValueError("invalid literal for int(): " + s.escape(true)); + vm->ValueError("invalid literal for int(): " + s.escape()); } } vm->TypeError("int() argument must be a int, float, bool or str"); @@ -297,7 +297,7 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<0>("float", "__repr__", [](VM* vm, Args& args) { f64 val = CAST(f64, args[0]); if(std::isinf(val) || std::isnan(val)) return VAR(std::to_string(val)); - StrStream ss; + std::stringstream ss; ss << std::setprecision(std::numeric_limits::max_digits10-1-2) << val; std::string s = ss.str(); if(std::all_of(s.begin()+1, s.end(), isdigit)) s += ".0"; @@ -335,7 +335,7 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<0>("str", "__repr__", [](VM* vm, Args& args) { const Str& _self = CAST(Str&, args[0]); - return VAR(_self.escape(true)); + return VAR(_self.escape()); }); _vm->bind_method<0>("str", "__json__", [](VM* vm, Args& args) { @@ -405,7 +405,7 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<1>("str", "join", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); - StrStream ss; + FastStrStream ss; PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); for (int i = 0; i < list.size(); ++i) { @@ -639,8 +639,8 @@ struct ReMatch { i64 start; i64 end; - std::smatch m; - ReMatch(i64 start, i64 end, std::smatch m) : start(start), end(end), m(m) {} + std::cmatch m; + ReMatch(i64 start, i64 end, std::cmatch m) : start(start), end(end), m(m) {} static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_method<-1>(type, "__init__", CPP_NOT_IMPLEMENTED()); @@ -661,15 +661,13 @@ struct ReMatch { } }; -inline PyObject* _regex_search(const Str& _pattern, const Str& _string, bool fromStart, VM* vm){ - std::string pattern = _pattern.str(); - std::string string = _string.str(); - std::regex re(pattern); - std::smatch m; - if(std::regex_search(string, m, re)){ - if(fromStart && m.position() != 0) return vm->None; - i64 start = _string._u8_index(m.position()); - i64 end = _string._u8_index(m.position() + m.length()); +inline PyObject* _regex_search(const Str& pattern, const Str& string, bool from_start, VM* vm){ + std::regex re(pattern.begin(), pattern.end()); + std::cmatch m; + if(std::regex_search(string.begin(), string.end(), m, re)){ + if(from_start && m.position() != 0) return vm->None; + i64 start = string._byte_index_to_unicode(m.position()); + i64 end = string._byte_index_to_unicode(m.position() + m.length()); return VAR_T(ReMatch, start, end, m); } return vm->None; @@ -694,18 +692,17 @@ inline void add_module_re(VM* vm){ vm->bind_func<3>(mod, "sub", [](VM* vm, Args& args) { const Str& pattern = CAST(Str&, args[0]); const Str& repl = CAST(Str&, args[1]); - const Str& _string = CAST(Str&, args[2]); - std::regex re(pattern.str()); - std::string string = _string.str(); - return VAR(std::regex_replace(string, re, repl)); + const Str& string = CAST(Str&, args[2]); + std::regex re(pattern.begin(), pattern.end()); + return VAR(std::regex_replace(string.str(), re, repl.str())); }); vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) { - std::string pattern = CAST(Str&, args[0]).str(); - std::string string = CAST(Str&, args[1]).str(); - std::regex re(pattern); - std::sregex_token_iterator it(string.begin(), string.end(), re, -1); - std::sregex_token_iterator end; + const Str& pattern = CAST(Str&, args[0]); + const Str& string = CAST(Str&, args[1]); + std::regex re(pattern.begin(), pattern.end()); + std::cregex_token_iterator it(string.begin(), string.end(), re, -1); + std::cregex_token_iterator end; List vec; for(; it != end; ++it){ vec.push_back(VAR(it->str())); @@ -863,8 +860,8 @@ extern "C" { pkpy::PyObject* val = vm->_main->attr().try_get(name); if(val == nullptr) return nullptr; try{ - pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(val)); - return strdup(repr.c_str()); + pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(val)); + return repr.c_str_dup(); }catch(...){ return nullptr; } @@ -879,8 +876,8 @@ extern "C" { pkpy::PyObject* ret = vm->exec(source, "", pkpy::EVAL_MODE); if(ret == nullptr) return nullptr; try{ - pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(ret)); - return strdup(repr.c_str()); + pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(ret)); + return repr.c_str_dup(); }catch(...){ return nullptr; } @@ -917,12 +914,12 @@ extern "C" { /// /// Return a json representing the result. char* pkpy_vm_read_output(pkpy::VM* vm){ - if(vm->use_stdio) return nullptr; - pkpy::StrStream* s_out = (pkpy::StrStream*)(vm->_stdout); - pkpy::StrStream* s_err = (pkpy::StrStream*)(vm->_stderr); + if(vm->is_stdio_used()) return nullptr; + std::stringstream* s_out = (std::stringstream*)(vm->_stdout); + std::stringstream* s_err = (std::stringstream*)(vm->_stderr); pkpy::Str _stdout = s_out->str(); pkpy::Str _stderr = s_err->str(); - pkpy::StrStream ss; + std::stringstream ss; ss << '{' << "\"stdout\": " << _stdout.escape(false); ss << ", " << "\"stderr\": " << _stderr.escape(false) << '}'; s_out->str(""); s_err->str(""); @@ -961,7 +958,7 @@ extern "C" { std::string f_header = std::string(mod) + '.' + name + '#' + std::to_string(kGlobalBindId++); pkpy::PyObject* obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod); vm->bind_func<-1>(obj, name, [ret_code, f_header](pkpy::VM* vm, const pkpy::Args& args){ - pkpy::StrStream ss; + std::stringstream ss; ss << f_header; for(int i=0; i()(sv()); } + Str& operator=(const Str& other){ if(data!=nullptr) pool64.dealloc(data); size = other.size; @@ -86,18 +93,6 @@ struct Str{ if(data!=nullptr) pool64.dealloc(data); } - char operator[](int idx) const { - return data[idx]; - } - - int length() const { - return size; - } - - size_t hash() const{ - return std::hash()(sv()); - } - Str operator+(const Str& other) const { Str ret(size + other.size, is_ascii && other.is_ascii); memcpy(ret.data, data, size); @@ -116,7 +111,7 @@ struct Str{ } friend std::ostream& operator<<(std::ostream& os, const Str& str){ - os.write(str.data, str.size); + if(str.data!=nullptr) os.write(str.data, str.size); return os; } @@ -136,6 +131,16 @@ struct Str{ return size < other.size; } + bool operator<(const std::string_view& other) const { + int ret = strncmp(data, other.data(), std::min(size, (int)other.size())); + if(ret != 0) return ret < 0; + return size < (int)other.size(); + } + + friend bool operator<(const std::string_view& other, const Str& str){ + return str > other; + } + bool operator>(const Str& other) const { int ret = strncmp(data, other.data, std::min(size, other.size)); if(ret != 0) return ret > 0; @@ -176,7 +181,7 @@ struct Str{ } Str lstrip() const { - std::string copy = str(); + std::string copy(data, size); copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { // std::isspace(c) does not working on windows (Debug) return c != ' ' && c != '\t' && c != '\r' && c != '\n'; @@ -184,8 +189,8 @@ struct Str{ return Str(copy); } - Str escape(bool single_quote) const { - StrStream ss; + Str escape(bool single_quote=true) const { + std::stringstream ss; ss << (single_quote ? '\'' : '"'); for (int i=0; ioperator[](i); @@ -215,31 +220,32 @@ struct Str{ return ss.str(); } - int index(const Str& sub) const { - auto p = std::search(data, data + size, sub.data, sub.data + sub.size); + int index(const Str& sub, int start=0) const { + auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size); if(p == data + size) return -1; return p - data; } Str replace(const Str& old, const Str& new_) const { - StrStream ss; - int i = 0; - while(i < size){ - int j = index(old); - if(j == -1){ - ss << substr(i, size - i); + std::stringstream ss; + int start = 0; + while(true){ + int i = index(old, start); + if(i == -1){ + ss << substr(start, size - start); break; } - ss << substr(i, j - i); + ss << substr(start, i - start); ss << new_; - i = j + old.size; + start = i + old.size; } return ss.str(); } /*************unicode*************/ - int _u8_index(int i) const{ + // TODO: check error + int _unicode_index_to_byte(int i) const{ if(is_ascii) return i; int j = 0; while(i > 0){ @@ -249,28 +255,39 @@ struct Str{ return j; } + int _byte_index_to_unicode(int n) const{ + if(is_ascii) return n; + int cnt = 0; + for(int i=0; i +inline std::string fmt(Args&&... args) { + std::stringstream ss; + (ss << ... << args); + return ss.str(); +} + const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101}; @@ -289,15 +306,19 @@ struct StrName { StrName(uint16_t index): index(index) {} StrName(const char* s): index(get(s).index) {} StrName(const Str& s){ - if(s._cached_sn_index != 0){ - index = s._cached_sn_index; - } else { - index = get(s.sv()).index; - } + index = get(s.sv()).index; } - const Str& str() const { return _r_interned[index-1]; } + std::string_view sv() const { return _r_interned[index-1].sv(); } bool empty() const { return index == 0; } + friend std::ostream& operator<<(std::ostream& os, const StrName& sn){ + return os << sn.sv(); + } + + Str escape() const { + return _r_interned[index-1].escape(); + } + bool operator==(const StrName& other) const noexcept { return this->index == other.index; } @@ -327,6 +348,31 @@ struct StrName { } }; +struct FastStrStream{ + pod_vector parts; + + FastStrStream& operator<<(const Str& s){ + parts.push_back(&s); + return *this; + } + + Str str() const{ + int len = 0; + bool is_ascii = true; + for(auto& s: parts){ + len += s->length(); + is_ascii &= s->is_ascii; + } + Str result(len, is_ascii); + char* p = result.data; + for(auto& s: parts){ + memcpy(p, s->data, s->length()); + p += s->length(); + } + return result; + } +}; + inline std::map> StrName::_interned; inline std::vector StrName::_r_interned; diff --git a/src/vm.h b/src/vm.h index 5d2ce1c4..e294a538 100644 --- a/src/vm.h +++ b/src/vm.h @@ -58,8 +58,8 @@ public: PyObject* run_frame(Frame* frame); - NameDict _modules; // loaded modules - std::map _lazy_modules; // lazy loaded modules + NameDict _modules; // loaded modules + std::map _lazy_modules; // lazy loaded modules PyObject* _py_op_call; PyObject* _py_op_yield; @@ -71,7 +71,8 @@ public: PyObject* builtins; // builtins module PyObject* _main; // __main__ module - bool use_stdio; + std::stringstream _stdout_buffer; + std::stringstream _stderr_buffer; std::ostream* _stdout; std::ostream* _stderr; int recursionlimit = 1000; @@ -85,18 +86,13 @@ public: VM(bool use_stdio) : heap(this){ this->vm = this; - this->use_stdio = use_stdio; - if(use_stdio){ - this->_stdout = &std::cout; - this->_stderr = &std::cerr; - }else{ - this->_stdout = new StrStream(); - this->_stderr = new StrStream(); - } - + this->_stdout = use_stdio ? &std::cout : &_stdout_buffer; + this->_stderr = use_stdio ? &std::cerr : &_stderr_buffer; init_builtin_types(); } + bool is_stdio_used() const { return _stdout == &std::cout; } + Frame* top_frame() const { #if DEBUG_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); @@ -116,7 +112,7 @@ public: PyObject* self; PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false); if(self != _py_null) return call(iter_f, Args{self}); - TypeError(OBJ_NAME(_t(obj)).escape(true) + " object is not iterable"); + TypeError(OBJ_NAME(_t(obj)).escape() + " object is not iterable"); return nullptr; } @@ -210,7 +206,7 @@ public: PyTypeInfo info{ .obj = obj, .base = base, - .name = (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.str()): name.str() + .name = (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv() }; if(mod != nullptr) mod->attr().set(name, obj); _all_types.push_back(info); @@ -226,7 +222,7 @@ public: PyObject* obj = builtins->attr().try_get(type); if(obj == nullptr){ for(auto& t: _all_types) if(t.name == type) return t.obj; - throw std::runtime_error("type not found: " + type); + throw std::runtime_error(fmt("type not found: ", type)); } return obj; } @@ -293,18 +289,18 @@ public: void ZeroDivisionError(){ _error("ZeroDivisionError", "division by zero"); } void IndexError(const Str& msg){ _error("IndexError", msg); } void ValueError(const Str& msg){ _error("ValueError", msg); } - void NameError(StrName name){ _error("NameError", "name " + name.str().escape(true) + " is not defined"); } + void NameError(StrName name){ _error("NameError", fmt("name ", name.escape() + " is not defined")); } void AttributeError(PyObject* obj, StrName name){ // OBJ_NAME calls getattr, which may lead to a infinite recursion - _error("AttributeError", "type " + OBJ_NAME(_t(obj)).escape(true) + " has no attribute " + name.str().escape(true)); + _error("AttributeError", fmt("type ", OBJ_NAME(_t(obj)).escape(), " has no attribute ", name.escape())); } void AttributeError(Str msg){ _error("AttributeError", msg); } void check_type(PyObject* obj, Type type){ if(is_type(obj, type)) return; - TypeError("expected " + OBJ_NAME(_t(type)).escape(true) + ", but got " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", but got " + OBJ_NAME(_t(obj)).escape()); } PyObject* _t(Type t){ @@ -317,13 +313,7 @@ public: return _all_types[OBJ_GET(Type, _t(obj->type)).index].obj; } - ~VM() { - heap.collect(); - if(!use_stdio){ - delete _stdout; - delete _stderr; - } - } + ~VM() { heap.collect(); } CodeObject_ compile(Str source, Str filename, CompileMode mode); PyObject* num_negated(PyObject* obj); @@ -363,14 +353,6 @@ inline void CodeObject::optimize(VM* vm){ uint32_t base_n = (uint32_t)(names.size() / kLocalsLoadFactor + 0.5); perfect_locals_capacity = find_next_capacity(base_n); perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, names); - - // pre-compute sn in co_consts - for(int i=0; itp_str)){ - Str& s = OBJ_GET(Str, consts[i]); - s._cached_sn_index = StrName::get(s.c_str()).index; - } - } } DEF_NATIVE_2(Str, tp_str) @@ -482,6 +464,10 @@ inline PyObject* py_var(VM* vm, std::string val){ return VAR(Str(std::move(val))); } +inline PyObject* py_var(VM* vm, std::string_view val){ + return VAR(Str(val)); +} + template void _check_py_class(VM* vm, PyObject* obj){ vm->check_type(obj, T::_type(vm)); @@ -493,7 +479,7 @@ inline PyObject* VM::num_negated(PyObject* obj){ }else if(is_float(obj)){ return VAR(-CAST(f64, obj)); } - TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape()); return nullptr; } @@ -503,7 +489,7 @@ inline f64 VM::num_to_float(PyObject* obj){ } else if (is_int(obj)){ return (f64)CAST(i64, obj); } - TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape()); return 0; } @@ -540,7 +526,7 @@ inline i64 VM::hash(PyObject* obj){ f64 val = CAST(f64, obj); return (i64)std::hash()(val); } - TypeError("unhashable type: " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("unhashable type: " + OBJ_NAME(_t(obj)).escape()); return 0; } @@ -551,7 +537,7 @@ inline PyObject* VM::asRepr(PyObject* obj){ inline PyObject* VM::new_module(StrName name) { PyObject* obj = heap._new(tp_module, DummyModule()); - obj->attr().set(__name__, VAR(name.str())); + obj->attr().set(__name__, VAR(name.sv())); // we do not allow override in order to avoid memory leak // it is because Module objects are not garbage collected if(_modules.contains(name)) UNREACHABLE(); @@ -571,7 +557,7 @@ inline Str VM::disassemble(CodeObject_ co){ jumpTargets.push_back(byte.arg); } } - StrStream ss; + std::stringstream ss; int prev_line = -1; for(int i=0; icodes.size(); i++){ const Bytecode& byte = co->codes[i]; @@ -594,23 +580,23 @@ inline Str VM::disassemble(CodeObject_ co){ std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); switch(byte.op){ case OP_LOAD_CONST: - argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; + argStr += fmt(" (", CAST(Str, asRepr(co->consts[byte.arg])), ")"); break; case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_STORE_LOCAL: case OP_STORE_GLOBAL: case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: - argStr += " (" + co->names[byte.arg].str() + ")"; + argStr += fmt(" (", co->names[byte.arg].sv(), ")"); break; case OP_BINARY_OP: - argStr += " (" + BINARY_SPECIAL_METHODS[byte.arg].str() + ")"; + argStr += fmt(" (", BINARY_SPECIAL_METHODS[byte.arg], ")"); break; case OP_COMPARE_OP: - argStr += " (" + COMPARE_SPECIAL_METHODS[byte.arg].str() + ")"; + argStr += fmt(" (", COMPARE_SPECIAL_METHODS[byte.arg], ")"); break; case OP_BITWISE_OP: - argStr += " (" + BITWISE_SPECIAL_METHODS[byte.arg].str() + ")"; + argStr += fmt(" (", BITWISE_SPECIAL_METHODS[byte.arg], ")"); break; } ss << pad(argStr, 40); // may overflow @@ -619,21 +605,21 @@ inline Str VM::disassemble(CodeObject_ co){ } #if !DEBUG_DIS_EXEC_MIN - StrStream consts; + std::stringstream consts; consts << "co_consts: "; - consts << CAST(Str, asRepr(VAR(co->consts))); + consts << CAST(Str&, asRepr(VAR(co->consts))); - StrStream names; + std::stringstream names; names << "co_names: "; List list; for(int i=0; inames.size(); i++){ - list.push_back(VAR(co->names[i].str())); + list.push_back(VAR(co->names[i].sv())); } names << CAST(Str, asRepr(VAR(list))); ss << '\n' << consts.str() << '\n' << names.str(); #endif for(auto& decl: co->func_decls){ - ss << "\n\n" << "Disassembly of " << decl->name.str() << ":\n"; + ss << "\n\n" << "Disassembly of " << decl->name << ":\n"; ss << disassemble(decl->code); } return Str(ss.str()); @@ -733,7 +719,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo locals->set(name, args[i++]); continue; } - TypeError("missing positional argument " + name.str().escape(true)); + TypeError(fmt("missing positional argument ", name.escape())); } locals->update(fn.decl->kwargs); @@ -756,7 +742,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo for(int i=0; ikwargs.contains(key)){ - TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.decl->name.str() + "()"); + TypeError(fmt(key.escape(), " is an invalid keyword argument for ", fn.decl->name, "()")); } locals->set(key, kwargs[i+1]); } @@ -774,7 +760,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo args.extend_self(self); return call(call_f, std::move(args), kwargs, false); } - TypeError(OBJ_NAME(_t(callable)).escape(true) + " object is not callable"); + TypeError(OBJ_NAME(_t(callable)).escape() + " object is not callable"); return None; } @@ -880,7 +866,7 @@ inline void VM::setattr(PyObject* obj, StrName name, T&& value){ if(descr_set != nullptr){ call(descr_set, Args{cls_var, obj, std::forward(value)}); }else{ - TypeError("readonly attribute: " + name.str().escape(true)); + TypeError(fmt("readonly attribute: ", name.escape())); } return; } From 970d48f90cac50ea8f7fd6a7a152455fdf648acc Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 9 Apr 2023 16:34:13 +0800 Subject: [PATCH 73/73] up Update main.yml Update main.yml Update main.yml up up --- .github/workflows.rar | Bin 1227 -> 0 bytes .github/workflows/main.yml | 126 +++++++++++++++++++++++++++++++++++++ build.py | 2 +- run_profile.sh | 1 - run_profile_test.sh | 10 +++ src/common.h | 1 + src/vm.h | 10 +-- 7 files changed, 143 insertions(+), 7 deletions(-) delete mode 100644 .github/workflows.rar create mode 100644 .github/workflows/main.yml create mode 100644 run_profile_test.sh diff --git a/.github/workflows.rar b/.github/workflows.rar deleted file mode 100644 index 81c8ec1d2a4878c49d57e099b87ee5ce03c0e574..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1227 zcmV;+1T_0nVR9iF2LS-{;ezW60R;yD1_1$pfPesRB3rO80s{-q2n5j|pg1}=!1P%j+9;xgF(rw$TIq`tQMoMlh~C~}#fzo%Ytg%J;`lRcHn5&y zJSPb_{CM>6oIMy%Ilw*ullUW^ArYZI5;r^WJINeaN&HDVGyQ}ga6r_j_s(=p{PKTM zwv0T4s5@T=;-^6MEb$<;?UIGq{sYbfOp(e~KMx`*4pIi7OaL>JJ_pdtna<{j=bP}O znQQ7Jdyh2o6QD7o%%tRA;Q6DJp_(@#!IV-Iu6+jgm0`p7Z1`8^Jy3Z-no2o->k-J3 z%fW&WlTSaQ>)WC@WDv9VJp_1h9El`h;It`-4S(IHR-o(+|1zXo15I(~b_hKKox1>s zL$Asa2)Ib}gX1d<&Yw>^OEPxH04XDLK#T@M2hx7>KInn=dw+#zk_gP{O(@jzQLl!t z&AAMnD{I`C{gD9#%D#N6TP{SQ$zPIS=1NABL`V`v-T%~hoX-e;XxVWax3j9G9ld>> zy^3l4N~Mnq9nr6F>V!=Jq$ykc6c=Oj@aji19ISclsg=Kq;gUlu(TF4u0N51FsU{1g z@6;}R?N>@v%J@Lzge@E@+r3#kRS0(3w^W;To&c@WQpYp(?wCuP&Ps2NqQY$~=Fe1C zT+fEY?T5j$09)E`Old;dnrv_5ngamc)HcQH=Tx7t+bBF#T4D8L%lUSCn#K<*jgIJf!LULpjc8P|PNN zEFLjy>Y>rcQ|bR+wZHDmN=FVx+QB`QCW+mQA0(KM{U$KR5p@%_x#sf}IST3MUY=Cy zUy+-B^0c6_btl5hGQn+D`bkbcM&Ksbi7wcj{SwSJBR_%Vile7Bm$|izsrbvWwQ?z< z9m?(>PihIk=`W5D!Xw{`|KHP*qIOu{cMYZ{_UP_iFL!h;13+cQffPo|&;`MXDQemQ z<@C#bR*zMQuIBJ_Fg^XaErbr>dWhrs&461oa31Qk^yU=IAo=Vi(fkq1SB&L87`GxU>d5Smpqr}Cp zI~PcqdfWG%%3x<&k(E;=kB}-&8uNb4>gTchEIj^#(djoBEa}C5kH3l^wP#gkjxUSg z0{hb%$Xg!;l<)?G9g1W~grC?EoD&HI77TN$X>6PkRJQD43S9&p5 zwc-q$(LL*;IVkE> .coverage/coverage.txt +mv *.gcov .coverage +rm main.gc* + +# -fprofile-instr-generate -fcoverage-mapping +# llvm-cov-15 show main.gc -instr-profile=default.profraw -format=html -output-dir .coverage \ No newline at end of file diff --git a/src/common.h b/src/common.h index e9ce5ecf..5e9661f6 100644 --- a/src/common.h +++ b/src/common.h @@ -3,6 +3,7 @@ #ifdef _MSC_VER #pragma warning (disable:4267) #pragma warning (disable:4101) +#pragma warning (disable:4244) #define _CRT_NONSTDC_NO_DEPRECATE #define strdup _strdup #endif diff --git a/src/vm.h b/src/vm.h index e294a538..f4eb40e0 100644 --- a/src/vm.h +++ b/src/vm.h @@ -204,9 +204,9 @@ public: PyObject* new_type_object(PyObject* mod, StrName name, Type base){ PyObject* obj = heap._new(tp_type, _all_types.size()); PyTypeInfo info{ - .obj = obj, - .base = base, - .name = (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv() + obj, + base, + (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv() }; if(mod != nullptr) mod->attr().set(name, obj); _all_types.push_back(info); @@ -626,8 +626,8 @@ inline Str VM::disassemble(CodeObject_ co){ } inline void VM::init_builtin_types(){ - _all_types.push_back({.obj = heap._new(Type(1), Type(0)), .base = -1, .name = "object"}); - _all_types.push_back({.obj = heap._new(Type(1), Type(1)), .base = 0, .name = "type"}); + _all_types.push_back({heap._new(Type(1), Type(0)), -1, "object"}); + _all_types.push_back({heap._new(Type(1), Type(1)), 0, "type"}); tp_object = 0; tp_type = 1; tp_int = _new_type_object("int");