From 9e82b52a05b089350e702fb1109ffa6db3a8fa5f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 26 Apr 2025 19:37:06 +0800 Subject: [PATCH] improve gc --- include/pocketpy/interpreter/frame.h | 2 +- include/pocketpy/interpreter/heap.h | 3 +- include/pocketpy/interpreter/types.h | 4 +- include/pocketpy/interpreter/vm.h | 17 +++- include/pocketpy/objects/codeobject.h | 4 +- include/pocketpy/objects/namedict.h | 2 +- include/pocketpy/objects/object.h | 2 +- src/interpreter/frame.c | 4 +- src/interpreter/heap.c | 2 + src/interpreter/vm.c | 132 ++++++++++++++------------ src/modules/array2d.c | 2 +- src/objects/namedict.c | 12 ++- src/public/modules.c | 4 +- 13 files changed, 106 insertions(+), 84 deletions(-) diff --git a/include/pocketpy/interpreter/frame.h b/include/pocketpy/interpreter/frame.h index 9cda5531..2475a477 100644 --- a/include/pocketpy/interpreter/frame.h +++ b/include/pocketpy/interpreter/frame.h @@ -68,5 +68,5 @@ int Frame__prepare_jump_exception_handler(py_Frame* self, ValueStack*); UnwindTarget* Frame__find_unwind_target(py_Frame* self, int iblock); void Frame__set_unwind_target(py_Frame* self, py_TValue* sp); -void Frame__gc_mark(py_Frame* self); +void Frame__gc_mark(py_Frame* self, c11_vector* p_stack); SourceLocation Frame__source_location(py_Frame* self); \ No newline at end of file diff --git a/include/pocketpy/interpreter/heap.h b/include/pocketpy/interpreter/heap.h index fe1fc058..c04dfed8 100644 --- a/include/pocketpy/interpreter/heap.h +++ b/include/pocketpy/interpreter/heap.h @@ -5,7 +5,8 @@ typedef struct ManagedHeap { MultiPool small_objects; - c11_vector /* PyObject* */ large_objects; + c11_vector /* PyObject_p */ large_objects; + c11_vector /* PyObject_p */ gc_roots; int freed_ma[3]; int gc_threshold; // threshold for gc_counter diff --git a/include/pocketpy/interpreter/types.h b/include/pocketpy/interpreter/types.h index a775cc73..5c4dd460 100644 --- a/include/pocketpy/interpreter/types.h +++ b/include/pocketpy/interpreter/types.h @@ -24,5 +24,5 @@ typedef struct { typedef c11_vector List; -void c11_chunked_array2d__mark(void* ud); -void function__gc_mark(void* ud); \ No newline at end of file +void c11_chunked_array2d__mark(void* ud, c11_vector* p_stack); +void function__gc_mark(void* ud, c11_vector* p_stack); \ No newline at end of file diff --git a/include/pocketpy/interpreter/vm.h b/include/pocketpy/interpreter/vm.h index d4ed4686..a63caced 100644 --- a/include/pocketpy/interpreter/vm.h +++ b/include/pocketpy/interpreter/vm.h @@ -34,14 +34,14 @@ typedef struct VM { py_Callbacks callbacks; - py_TValue ascii_literals[128+1]; + py_TValue ascii_literals[128 + 1]; py_TValue last_retval; py_TValue curr_exception; int recursion_depth; int max_recursion_depth; - + bool is_curr_exc_handled; // handled by try-except block but not cleared yet py_TValue reg[8]; // users' registers @@ -64,10 +64,19 @@ void VM__dtor(VM* self); void VM__push_frame(VM* self, py_Frame* frame); void VM__pop_frame(VM* self); -bool pk__parse_int_slice(py_Ref slice, int length, int* restrict start, int* restrict stop, int* restrict step); +bool pk__parse_int_slice(py_Ref slice, + int length, + int* restrict start, + int* restrict stop, + int* restrict step); bool pk__normalize_index(int* index, int length); -#define pk__mark_value(val) if((val)->is_ptr && !(val)->_obj->gc_marked) PyObject__mark((val)->_obj) +#define pk__mark_value(val) \ + if((val)->is_ptr && !(val)->_obj->gc_marked) { \ + PyObject* obj = (val)->_obj; \ + obj->gc_marked = true; \ + c11_vector__push(PyObject*, p_stack, obj); \ + } bool pk__object_new(int argc, py_Ref argv); py_TypeInfo* pk__type_info(py_Type type); diff --git a/include/pocketpy/objects/codeobject.h b/include/pocketpy/objects/codeobject.h index 1583ba3b..47f8233b 100644 --- a/include/pocketpy/objects/codeobject.h +++ b/include/pocketpy/objects/codeobject.h @@ -89,7 +89,7 @@ typedef struct CodeObject { void CodeObject__ctor(CodeObject* self, SourceData_ src, c11_sv name); void CodeObject__dtor(CodeObject* self); int CodeObject__add_varname(CodeObject* self, py_Name name); -void CodeObject__gc_mark(const CodeObject* self); +void CodeObject__gc_mark(const CodeObject* self, c11_vector* p_stack); typedef struct FuncDeclKwArg { int index; // index in co->varnames @@ -122,7 +122,7 @@ void FuncDecl__add_arg(FuncDecl* self, py_Name name); void FuncDecl__add_kwarg(FuncDecl* self, py_Name name, const py_TValue* value); void FuncDecl__add_starred_arg(FuncDecl* self, py_Name name); void FuncDecl__add_starred_kwarg(FuncDecl* self, py_Name name); -void FuncDecl__gc_mark(const FuncDecl* self); +void FuncDecl__gc_mark(const FuncDecl* self, c11_vector* p_stack); // runtime function typedef struct Function { diff --git a/include/pocketpy/objects/namedict.h b/include/pocketpy/objects/namedict.h index 50dd0572..3b232cf2 100644 --- a/include/pocketpy/objects/namedict.h +++ b/include/pocketpy/objects/namedict.h @@ -25,4 +25,4 @@ void ModuleDict__dtor(ModuleDict* self); void ModuleDict__set(ModuleDict* self, const char* key, py_TValue val); py_TValue* ModuleDict__try_get(ModuleDict* self, const char* path); bool ModuleDict__contains(ModuleDict* self, const char* path); -void ModuleDict__apply_mark(ModuleDict* self); +void ModuleDict__apply_mark(ModuleDict* self, c11_vector* p_stack); diff --git a/include/pocketpy/objects/object.h b/include/pocketpy/objects/object.h index 1f63be0f..ce2b28d4 100644 --- a/include/pocketpy/objects/object.h +++ b/include/pocketpy/objects/object.h @@ -24,4 +24,4 @@ void* PyObject__userdata(PyObject* self); #define PK_OBJ_SLOTS_SIZE(slots) ((slots) >= 0 ? sizeof(py_TValue) * (slots) : sizeof(NameDict)) void PyObject__dtor(PyObject* self); -void PyObject__mark(PyObject* self); + diff --git a/src/interpreter/frame.c b/src/interpreter/frame.c index 2e7f4824..396f239d 100644 --- a/src/interpreter/frame.c +++ b/src/interpreter/frame.c @@ -114,10 +114,10 @@ void Frame__set_unwind_target(py_Frame* self, py_TValue* sp) { } } -void Frame__gc_mark(py_Frame* self) { +void Frame__gc_mark(py_Frame* self, c11_vector* p_stack) { pk__mark_value(self->globals); if(self->is_locals_special) pk__mark_value(self->locals); - CodeObject__gc_mark(self->co); + CodeObject__gc_mark(self->co, p_stack); } int Frame__lineno(const py_Frame* self) { diff --git a/src/interpreter/heap.c b/src/interpreter/heap.c index 496f94b6..dd81128f 100644 --- a/src/interpreter/heap.c +++ b/src/interpreter/heap.c @@ -7,6 +7,7 @@ void ManagedHeap__ctor(ManagedHeap* self) { MultiPool__ctor(&self->small_objects); c11_vector__ctor(&self->large_objects, sizeof(PyObject*)); + c11_vector__ctor(&self->gc_roots, sizeof(PyObject*)); for(int i = 0; i < c11__count_array(self->freed_ma); i++) { self->freed_ma[i] = PK_GC_MIN_THRESHOLD; @@ -26,6 +27,7 @@ void ManagedHeap__dtor(ManagedHeap* self) { PK_FREE(obj); } c11_vector__dtor(&self->large_objects); + c11_vector__dtor(&self->gc_roots); } void ManagedHeap__collect_if_needed(ManagedHeap* self) { diff --git a/src/interpreter/vm.c b/src/interpreter/vm.c index 5a42a96a..544c74f5 100644 --- a/src/interpreter/vm.c +++ b/src/interpreter/vm.c @@ -606,84 +606,30 @@ void PyObject__dtor(PyObject* self) { if(self->slots == -1) NameDict__dtor(PyObject__dict(self)); } -void PyObject__mark(PyObject* obj) { - assert(!obj->gc_marked); - - obj->gc_marked = true; - - if(obj->slots > 0) { - py_TValue* p = PyObject__slots(obj); - for(int i = 0; i < obj->slots; i++) - pk__mark_value(p + i); - } else if(obj->slots == -1) { - NameDict* namedict = PyObject__dict(obj); - for(int i = 0; i < namedict->length; i++) { - NameDict_KV* kv = c11__at(NameDict_KV, namedict, i); - pk__mark_value(&kv->value); - } - } - - void* ud = PyObject__userdata(obj); - switch(obj->type) { - case tp_list: { - List* self = ud; - for(int i = 0; i < self->length; i++) { - pk__mark_value(c11__at(py_TValue, self, i)); - } - break; - } - case tp_dict: { - Dict* self = ud; - for(int i = 0; i < self->entries.length; i++) { - DictEntry* entry = c11__at(DictEntry, &self->entries, i); - if(py_isnil(&entry->key)) continue; - pk__mark_value(&entry->key); - pk__mark_value(&entry->val); - } - break; - } - case tp_generator: { - Generator* self = ud; - if(self->frame) Frame__gc_mark(self->frame); - break; - } - case tp_function: { - function__gc_mark(ud); - break; - } - case tp_code: { - CodeObject* self = ud; - CodeObject__gc_mark(self); - break; - } - case tp_chunked_array2d: { - c11_chunked_array2d__mark(ud); - } - default: return; - } -} - -void FuncDecl__gc_mark(const FuncDecl* self) { - CodeObject__gc_mark(&self->code); +void FuncDecl__gc_mark(const FuncDecl* self, c11_vector* p_stack) { + CodeObject__gc_mark(&self->code, p_stack); for(int j = 0; j < self->kwargs.length; j++) { FuncDeclKwArg* kw = c11__at(FuncDeclKwArg, &self->kwargs, j); pk__mark_value(&kw->value); } } -void CodeObject__gc_mark(const CodeObject* self) { +void CodeObject__gc_mark(const CodeObject* self, c11_vector* p_stack) { for(int i = 0; i < self->consts.length; i++) { py_TValue* p = c11__at(py_TValue, &self->consts, i); pk__mark_value(p); } for(int i = 0; i < self->func_decls.length; i++) { FuncDecl_ decl = c11__getitem(FuncDecl_, &self->func_decls, i); - FuncDecl__gc_mark(decl); + FuncDecl__gc_mark(decl, p_stack); } } void ManagedHeap__mark(ManagedHeap* self) { VM* vm = pk_current_vm; + c11_vector* p_stack = &self->gc_roots; + assert(p_stack->length == 0); + // mark value stack for(py_TValue* p = vm->stack.begin; p != vm->stack.end; p++) { pk__mark_value(p); @@ -693,7 +639,7 @@ void ManagedHeap__mark(ManagedHeap* self) { pk__mark_value(&vm->ascii_literals[i]); } // mark modules - ModuleDict__apply_mark(&vm->modules); + ModuleDict__apply_mark(&vm->modules, p_stack); // mark types int types_length = vm->types.length; // 0-th type is placeholder @@ -720,7 +666,7 @@ void ManagedHeap__mark(ManagedHeap* self) { } // mark frame for(py_Frame* frame = vm->top_frame; frame; frame = frame->f_back) { - Frame__gc_mark(frame); + Frame__gc_mark(frame, p_stack); } // mark vm's registers pk__mark_value(&vm->last_retval); @@ -733,6 +679,66 @@ void ManagedHeap__mark(ManagedHeap* self) { RInternedEntry* entry = c11__at(RInternedEntry, &vm->names.r_interned, i); pk__mark_value(&entry->obj); } + + /*****************************/ + while(p_stack->length > 0) { + PyObject* obj = c11_vector__back(PyObject*, p_stack); + c11_vector__pop(p_stack); + + assert(obj->gc_marked); + + if(obj->slots > 0) { + py_TValue* p = PyObject__slots(obj); + for(int i = 0; i < obj->slots; i++) + pk__mark_value(p + i); + } else if(obj->slots == -1) { + NameDict* namedict = PyObject__dict(obj); + for(int i = 0; i < namedict->length; i++) { + NameDict_KV* kv = c11__at(NameDict_KV, namedict, i); + pk__mark_value(&kv->value); + } + } + + void* ud = PyObject__userdata(obj); + switch(obj->type) { + case tp_list: { + List* self = ud; + for(int i = 0; i < self->length; i++) { + py_TValue* val = c11__at(py_TValue, self, i); + pk__mark_value(val); + } + break; + } + case tp_dict: { + Dict* self = ud; + for(int i = 0; i < self->entries.length; i++) { + DictEntry* entry = c11__at(DictEntry, &self->entries, i); + if(py_isnil(&entry->key)) continue; + pk__mark_value(&entry->key); + pk__mark_value(&entry->val); + } + break; + } + case tp_generator: { + Generator* self = ud; + if(self->frame) Frame__gc_mark(self->frame, p_stack); + break; + } + case tp_function: { + function__gc_mark(ud, p_stack); + break; + } + case tp_code: { + CodeObject* self = ud; + CodeObject__gc_mark(self, p_stack); + break; + } + case tp_chunked_array2d: { + c11_chunked_array2d__mark(ud, p_stack); + break; + } + } + } } void pk_print_stack(VM* self, py_Frame* frame, Bytecode byte) { diff --git a/src/modules/array2d.c b/src/modules/array2d.c index 3a6eef31..3df9d468 100644 --- a/src/modules/array2d.c +++ b/src/modules/array2d.c @@ -1222,7 +1222,7 @@ void c11_chunked_array2d__dtor(c11_chunked_array2d* self) { c11_chunked_array2d_chunks__dtor(&self->chunks); } -void c11_chunked_array2d__mark(void* ud) { +void c11_chunked_array2d__mark(void* ud, c11_vector* p_stack) { c11_chunked_array2d* self = ud; pk__mark_value(&self->default_T); pk__mark_value(&self->context_builder); diff --git a/src/objects/namedict.c b/src/objects/namedict.c index 1ce1f67c..03ae3b1c 100644 --- a/src/objects/namedict.c +++ b/src/objects/namedict.c @@ -75,8 +75,12 @@ bool ModuleDict__contains(ModuleDict* self, const char* path) { return ModuleDict__try_get(self, path) != NULL; } -void ModuleDict__apply_mark(ModuleDict *self) { - if(!self->module._obj->gc_marked) PyObject__mark(self->module._obj); - if(self->left) ModuleDict__apply_mark(self->left); - if(self->right) ModuleDict__apply_mark(self->right); +void ModuleDict__apply_mark(ModuleDict* self, c11_vector* p_stack) { + PyObject* obj = self->module._obj; + if(!obj->gc_marked) { + obj->gc_marked = true; + c11_vector__push(PyObject*, p_stack, obj); + } + if(self->left) ModuleDict__apply_mark(self->left, p_stack); + if(self->right) ModuleDict__apply_mark(self->right, p_stack); } diff --git a/src/public/modules.c b/src/public/modules.c index 3efb3627..762ef229 100644 --- a/src/public/modules.c +++ b/src/public/modules.c @@ -759,7 +759,7 @@ py_TValue pk_builtins__register() { return *builtins; } -void function__gc_mark(void* ud) { +void function__gc_mark(void* ud, c11_vector* p_stack) { Function* func = ud; if(func->globals) pk__mark_value(func->globals); if(func->closure) { @@ -769,7 +769,7 @@ void function__gc_mark(void* ud) { pk__mark_value(&kv->value); } } - FuncDecl__gc_mark(func->decl); + FuncDecl__gc_mark(func->decl, p_stack); } static bool function__doc__(int argc, py_Ref argv) {