diff --git a/src/codeobject.h b/src/codeobject.h index 98fdb47f..fcd503bb 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -89,6 +89,10 @@ struct CodeObject { return consts.size() - 1; } + void _mark() const { + for(PyObject* v : consts) OBJ_MARK(v); + } + /************************************************/ int _curr_block_i = 0; int _rvalue = 0; diff --git a/src/common.h b/src/common.h index 3f69ac1e..06530277 100644 --- a/src/common.h +++ b/src/common.h @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -18,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -28,6 +26,7 @@ #include #include #include +#include #define PK_VERSION "0.9.5" #define PK_EXTRA_CHECK 0 @@ -101,4 +100,35 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept { return is_int(a) && is_int(b); } + +template +class queue{ + std::list list; +public: + void push(const T& t){ list.push_back(t); } + void push(T&& t){ list.push_back(std::move(t)); } + void pop(){ list.pop_front(); } + void clear(){ list.clear(); } + bool empty() const { return list.empty(); } + size_t size() const { return list.size(); } + T& front(){ return list.front(); } + const T& front() const { return list.front(); } + const std::list& data() const { return list; } +}; + +template +class stack{ + std::vector vec; +public: + void push(const T& t){ vec.push_back(t); } + void push(T&& t){ vec.push_back(std::move(t)); } + void pop(){ vec.pop_back(); } + void clear(){ vec.clear(); } + bool empty() const { return vec.empty(); } + size_t size() const { return vec.size(); } + T& top(){ return vec.back(); } + const T& top() const { return vec.back(); } + const std::vector& data() const { return vec; } +}; + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index c20cc606..942828d2 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -21,7 +21,7 @@ enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; class Compiler { std::unique_ptr parser; - std::stack codes; + stack codes; int lexing_count = 0; bool used = false; VM* vm; diff --git a/src/error.h b/src/error.h index 945e929c..d0732b50 100644 --- a/src/error.h +++ b/src/error.h @@ -72,7 +72,7 @@ struct SourceData { class Exception { StrName type; Str msg; - std::stack stacktrace; + stack stacktrace; public: Exception(StrName type, Str msg): type(type), msg(msg) {} bool match_type(StrName type) const { return this->type == type;} @@ -84,7 +84,7 @@ public: } Str summary() const { - std::stack st(stacktrace); + stack st(stacktrace); StrStream ss; if(is_re) ss << "Traceback (most recent call last):\n"; while(!st.empty()) { ss << st.top() << '\n'; st.pop(); } diff --git a/src/frame.h b/src/frame.h index 37848a11..4ab98631 100644 --- a/src/frame.h +++ b/src/frame.h @@ -159,6 +159,17 @@ struct Frame { for(int i=n-1; i>=0; i--) v[i] = pop(); return v; } + + void _mark() const { + for(PyObject* obj : _data) OBJ_MARK(obj); + if(_locals != nullptr) _locals->_mark(); + if(_closure != nullptr) _closure->_mark(); + OBJ_MARK(_module); + for(auto& p : s_try_block){ + for(PyObject* obj : p.second) OBJ_MARK(obj); + } + co->_mark(); + } }; }; // namespace pkpy \ No newline at end of file diff --git a/src/gc.h b/src/gc.h index bb1e2f36..3fb5b76b 100644 --- a/src/gc.h +++ b/src/gc.h @@ -1,37 +1,84 @@ #pragma once #include "obj.h" +#include "codeobject.h" +#include "namedict.h" namespace pkpy { - struct ManagedHeap{ - std::vector heap; +struct ManagedHeap{ + std::vector gen; - void _add(PyObject* obj){ - obj->gc.enabled = true; - heap.push_back(obj); - } + template + PyObject* gcnew(Type type, T&& val){ + PyObject* obj = new Py_>(type, std::forward(val)); + gen.push_back(obj); + return obj; + } - void sweep(){ - std::vector alive; - for(PyObject* obj: heap){ - if(obj->gc.marked){ - obj->gc.marked = false; - alive.push_back(obj); - }else{ - delete obj; - } + template + PyObject* _new(Type type, T&& val){ + return gcnew(type, std::forward(val)); + } + + int sweep(){ + std::vector alive; + for(PyObject* obj: gen){ + if(obj->gc.marked){ + obj->gc.marked = false; + alive.push_back(obj); + }else{ + delete obj; } - heap.clear(); - heap.swap(alive); } + int freed = gen.size() - alive.size(); + gen.clear(); + gen.swap(alive); + return freed; + } - void collect(VM* vm){ - std::vector roots = get_roots(vm); - for(PyObject* obj: roots) obj->mark(); - sweep(); - } + int collect(VM* vm){ + mark(vm); + return sweep(); + } - std::vector get_roots(VM* vm); - }; + void mark(VM* vm); +}; + + +inline void NameDict::_mark(){ + for(uint16_t i=0; i<_capacity; i++){ + if(_items[i].first.empty()) continue; + OBJ_MARK(_items[i].second); + } +} + +template<> inline void _mark(List& t){ + for(PyObject* obj: t) OBJ_MARK(obj); +} + +template<> inline void _mark(Tuple& t){ + for(int i=0; i inline void _mark(Function& t){ + t.code->_mark(); + t.kwargs._mark(); + if(t._module != nullptr) OBJ_MARK(t._module); + if(t._closure != nullptr) t._closure->_mark(); +} + +template<> inline void _mark(BoundMethod& t){ + OBJ_MARK(t.obj); + OBJ_MARK(t.method); +} + +template<> inline void _mark(StarWrapper& t){ + OBJ_MARK(t.obj); +} + +template<> inline void _mark(Super& t){ + OBJ_MARK(t.first); +} +// NOTE: std::function may capture some PyObject*, they can not be marked } // namespace pkpy \ No newline at end of file diff --git a/src/iter.h b/src/iter.h index 42dd0c08..464b48de 100644 --- a/src/iter.h +++ b/src/iter.h @@ -65,4 +65,21 @@ inline PyObject* Generator::next(){ } } +inline void BaseIter::_mark() { + if(_ref != nullptr) OBJ_MARK(_ref); + if(loop_var != nullptr) OBJ_MARK(loop_var); +} + +inline void Generator::_mark(){ + BaseIter::_mark(); + frame->_mark(); +} + +template +void _mark(T& t){ + if constexpr(std::is_base_of_v){ + t._mark(); + } +} + } // namespace pkpy \ No newline at end of file diff --git a/src/namedict.h b/src/namedict.h index 0cd24e36..5623fce4 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -180,12 +180,7 @@ while(!_items[i].first.empty()) { \ return v; } - void apply_v(void(*f)(PyObject*)) { - for(uint16_t i=0; i<_capacity; i++){ - if(_items[i].first.empty()) continue; - f(_items[i].second); - } - } + void _mark(); #undef HASH_PROBE #undef _hash }; diff --git a/src/obj.h b/src/obj.h index 3593b366..634debbe 100644 --- a/src/obj.h +++ b/src/obj.h @@ -63,6 +63,8 @@ struct StarWrapper { StarWrapper(PyObject* obj, bool rvalue): obj(obj), rvalue(rvalue) {} }; +using Super = std::pair; + struct Slice { int start = 0; int stop = 0x7fffffff; @@ -84,16 +86,13 @@ public: virtual PyObject* next() = 0; PyObject* loop_var; BaseIter(VM* vm, PyObject* _ref) : vm(vm), _ref(_ref) {} + virtual void _mark(); virtual ~BaseIter() = default; }; -template struct is_container_gc : std::false_type {}; -template struct is_container_gc> : std::true_type {}; - struct GCHeader { - bool enabled; // whether this object is managed by GC bool marked; // whether this object is marked - GCHeader() : enabled(false), marked(false) {} + GCHeader() : marked(false) {} }; struct PyObject { @@ -105,12 +104,15 @@ struct PyObject { NameDict& attr() noexcept { return *_attr; } PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; - virtual void mark() = 0; + virtual void _mark() = 0; PyObject(Type type) : type(type) {} virtual ~PyObject() { delete _attr; } }; +template +void _mark(T& t); + template struct Py_ : PyObject { T _value; @@ -131,16 +133,17 @@ struct Py_ : PyObject { } void* value() override { return &_value; } - void mark() override { - if(!gc.enabled || gc.marked) return; + void _mark() override { + if(gc.marked) return; gc.marked = true; - if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); }); - if constexpr (is_container_gc::value) _value._mark(); + if(is_attr_valid()) attr()._mark(); + pkpy::_mark(_value); // handle PyObject* inside _value `T` } }; #define OBJ_GET(T, obj) (((Py_*)(obj))->_value) #define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__)) +#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_mark() const int kTpIntIndex = 2; const int kTpFloatIndex = 3; @@ -210,7 +213,7 @@ __T _py_cast(VM* vm, PyObject* obj) { } #define VAR(x) py_var(vm, x) -#define VAR_T(T, ...) vm->gcnew(T::_type(vm), T(__VA_ARGS__)) +#define VAR_T(T, ...) vm->heap.gcnew(T::_type(vm), T(__VA_ARGS__)) #define CAST(T, x) py_cast(vm, x) #define _CAST(T, x) _py_cast(vm, x) diff --git a/src/parser.h b/src/parser.h index da36b888..60c280b2 100644 --- a/src/parser.h +++ b/src/parser.h @@ -101,8 +101,8 @@ struct Parser { const char* curr_char; int current_line = 1; Token prev, curr; - std::queue nexts; - std::stack indents; + queue nexts; + stack indents; int brackets_level = 0; diff --git a/src/pocketpy.h b/src/pocketpy.h index 13502ec0..a036a8bd 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -69,7 +69,7 @@ inline void init_builtins(VM* _vm) { vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); } Type base = vm->_all_types[type].base; - return vm->gcnew(vm->tp_super, Super(args[1], base)); + return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); }); _vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) { @@ -757,7 +757,7 @@ inline void add_module_random(VM* vm){ inline void add_module_gc(VM* vm){ PyObject* mod = vm->new_module("gc"); - vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->gc_collect()))); + vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect(vm)))); } inline void VM::post_init(){ diff --git a/src/ref.h b/src/ref.h index 7a6f0310..8129b316 100644 --- a/src/ref.h +++ b/src/ref.h @@ -152,7 +152,7 @@ struct TupleRef : BaseRef { template PyObject* VM::PyRef(P&& value) { static_assert(std::is_base_of_v>); - return gcnew

(tp_ref, std::forward

(value)); + return heap.gcnew

(tp_ref, std::forward

(value)); } inline const BaseRef* VM::PyRef_AS_C(PyObject* obj) @@ -166,4 +166,18 @@ inline void Frame::try_deref(VM* vm, PyObject*& v){ if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this); } +/***** GC's Impl *****/ +template<> inline void _mark(AttrRef& t){ + OBJ_MARK(obj); +} + +template<> inline void _mark(IndexRef& t){ + OBJ_MARK(obj); + OBJ_MARK(index); +} + +template<> inline void _mark(TupleRef& t){ + _mark(t.objs); +} + } // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index 01bd7289..1e8c02ce 100644 --- a/src/vm.h +++ b/src/vm.h @@ -24,8 +24,8 @@ Str _read_file_cwd(const Str& name, bool* ok); template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - inline PyObject* py_var(VM* vm, const ctype& value) { return vm->gcnew(vm->ptype, value);} \ - inline PyObject* py_var(VM* vm, ctype&& value) { return vm->gcnew(vm->ptype, std::move(value));} + inline PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \ + inline PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));} class Generator: public BaseIter { @@ -35,7 +35,8 @@ public: Generator(VM* vm, std::unique_ptr&& frame) : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {} - PyObject* next(); + PyObject* next() override; + void _mark() override; }; struct PyTypeInfo{ @@ -46,9 +47,9 @@ struct PyTypeInfo{ class VM { VM* vm; // self reference for simplify code - ManagedHeap heap; public: - std::stack< std::unique_ptr > callstack; + ManagedHeap heap; + stack< std::unique_ptr > callstack; std::vector _all_types; PyObject* run_frame(Frame* frame); @@ -56,15 +57,12 @@ public: NameDict _modules; // loaded modules std::map _lazy_modules; // lazy loaded modules - // singleton objects, need_gc=false PyObject* _py_op_call; PyObject* _py_op_yield; PyObject* None; PyObject* True; PyObject* False; PyObject* Ellipsis; - - // managed by _modules, need_gc=false PyObject* builtins; // builtins module PyObject* _main; // __main__ module @@ -73,6 +71,13 @@ public: std::ostream* _stderr; int recursionlimit = 1000; + // for quick access + Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; + Type tp_list, tp_tuple; + Type tp_function, tp_native_function, tp_iterator, tp_bound_method; + Type tp_slice, tp_range, tp_module, tp_ref; + Type tp_super, tp_exception, tp_star_wrapper; + VM(bool use_stdio){ this->vm = this; this->use_stdio = use_stdio; @@ -118,7 +123,7 @@ public: do{ val = cls->attr().try_get(name); if(val != nullptr) return val; - Type cls_t = static_cast*>(cls)->_value; + Type cls_t = OBJ_GET(Type, cls); Type base = _all_types[cls_t].base; if(base.index == -1) break; cls = _all_types[base].obj; @@ -144,18 +149,6 @@ public: return nullptr; } - i64 gc_collect(){ - heap.collect(this); - return 0; - } - - template - PyObject* gcnew(Type type, T&& val){ - PyObject* obj = new Py_>(type, std::forward(val)); - heap._add(obj); - return obj; - } - template std::enable_if_t, Args>, PyObject*> call(PyObject* callable, ArgT&& args){ @@ -200,12 +193,12 @@ public: PyObject* property(NativeFuncRaw fget){ PyObject* p = builtins->attr("property"); - PyObject* method = gcnew(tp_native_function, NativeFunc(fget, 1, false)); + PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false)); return call(p, Args{method}); } PyObject* new_type_object(PyObject* mod, StrName name, Type base){ - PyObject* obj = new Py_(tp_type, _all_types.size()); + PyObject* obj = heap._new(tp_type, _all_types.size()); PyTypeInfo info{ .obj = obj, .base = base, @@ -263,17 +256,10 @@ public: return index; } - // for quick access - Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; - Type tp_list, tp_tuple; - Type tp_function, tp_native_function, tp_iterator, tp_bound_method; - Type tp_slice, tp_range, tp_module, tp_ref; - Type tp_super, tp_exception, tp_star_wrapper; - template PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); - return gcnew

(tp_iterator, std::forward

(value)); + return heap.gcnew

(tp_iterator, std::forward

(value)); } BaseIter* PyIter_AS_C(PyObject* obj) @@ -323,6 +309,7 @@ public: } ~VM() { + heap.collect(this); if(!use_stdio){ delete _stdout; delete _stderr; @@ -578,7 +565,7 @@ inline PyObject* VM::asRepr(PyObject* obj){ } inline PyObject* VM::new_module(StrName name) { - PyObject* obj = new Py_(tp_module, DummyModule()); + PyObject* obj = heap._new(tp_module, DummyModule()); obj->attr().set(__name__, VAR(name.str())); // we do not allow override in order to avoid memory leak // it is because Module objects are not garbage collected @@ -666,8 +653,8 @@ inline void VM::init_builtin_types(){ // PyTypeObject is managed by _all_types // PyModuleObject is managed by _modules // They are not managed by GC, so we use a simple "new" - _all_types.push_back({.obj = new Py_(Type(1), Type(0)), .base = -1, .name = "object"}); - _all_types.push_back({.obj = new Py_(Type(1), Type(1)), .base = 0, .name = "type"}); + _all_types.push_back({.obj = heap._new(Type(1), Type(0)), .base = -1, .name = "object"}); + _all_types.push_back({.obj = heap._new(Type(1), Type(1)), .base = 0, .name = "type"}); tp_object = 0; tp_type = 1; tp_int = _new_type_object("int"); @@ -690,12 +677,12 @@ inline void VM::init_builtin_types(){ tp_super = _new_type_object("super"); tp_exception = _new_type_object("Exception"); - this->None = new Py_(_new_type_object("NoneType"), {}); - this->Ellipsis = new Py_(_new_type_object("ellipsis"), {}); - this->True = new Py_(tp_bool, {}); - this->False = new Py_(tp_bool, {}); - this->_py_op_call = new Py_(_new_type_object("_py_op_call"), {}); - this->_py_op_yield = new Py_(_new_type_object("_py_op_yield"), {}); + this->None = heap._new(_new_type_object("NoneType"), {}); + this->Ellipsis = heap._new(_new_type_object("ellipsis"), {}); + this->True = heap._new(tp_bool, {}); + this->False = heap._new(tp_bool, {}); + this->_py_op_call = heap._new(_new_type_object("_py_op_call"), {}); + this->_py_op_yield = heap._new(_new_type_object("_py_op_yield"), {}); this->builtins = new_module("builtins"); this->_main = new_module("__main__"); @@ -723,7 +710,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo if(new_f != nullptr){ obj = call(new_f, std::move(args), kwargs, false); }else{ - obj = gcnew(OBJ_GET(Type, callable), {}); + obj = heap.gcnew(OBJ_GET(Type, callable), {}); PyObject* init_f = getattr(obj, __init__, false, true); if (init_f != nullptr) call(init_f, std::move(args), kwargs, false); } @@ -812,8 +799,6 @@ inline void VM::unpack_args(Args& args){ args = Args(std::move(unpacked)); } -using Super = std::pair; - // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){ PyObject* objtype = _t(obj); @@ -936,10 +921,11 @@ inline PyObject* VM::_exec(){ } } -inline std::vector ManagedHeap::get_roots(VM *vm) { - std::vector roots; - // ... - return roots; +inline void ManagedHeap::mark(VM *vm) { + // iterate callstack frames + for(auto& frame : vm->callstack.data()){ + frame->_mark(); + } } } // namespace pkpy \ No newline at end of file