add setup_gc_debug_callback (#406)

* [no ci] backup

* [no ci] backup

* [no ci] fix

* [no ci] fix

* [no ci] fix

* [no ci] fix

* [no ci] fix

* [no ci] fix

* [no ci] fix

* [no ci] fix

---------

Co-authored-by: 张皓晟 <2067144018@qq.com>
This commit is contained in:
BLUELOVETH 2025-11-23 16:30:22 +08:00 committed by GitHub
parent a6d0d9b04f
commit c45a5df1e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 191 additions and 20 deletions

View File

@ -2,6 +2,7 @@
#include "pocketpy/objects/object.h" #include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/objectpool.h" #include "pocketpy/interpreter/objectpool.h"
#include <time.h>
typedef struct ManagedHeap { typedef struct ManagedHeap {
MultiPool small_objects; MultiPool small_objects;
@ -12,14 +13,40 @@ typedef struct ManagedHeap {
int gc_threshold; // threshold for gc_counter int gc_threshold; // threshold for gc_counter
int gc_counter; // objects created since last gc int gc_counter; // objects created since last gc
bool gc_enabled; bool gc_enabled;
py_TValue debug_callback;
} ManagedHeap; } ManagedHeap;
typedef struct {
clock_t start;
clock_t mark_end;
clock_t swpet_end;
int types_length;
int* small_types;
int* large_types;
int small_freed;
int large_freed;
struct {
int before;
int after;
int upper;
int lower;
int avg_freed;
float free_ratio;
} auto_thres;
} ManagedHeapSwpetInfo;
void ManagedHeap__ctor(ManagedHeap* self); void ManagedHeap__ctor(ManagedHeap* self);
void ManagedHeap__dtor(ManagedHeap* self); void ManagedHeap__dtor(ManagedHeap* self);
ManagedHeapSwpetInfo* ManagedHeapSwpetInfo__new();
void ManagedHeapSwpetInfo__delete(ManagedHeapSwpetInfo* self);
void ManagedHeap__collect_if_needed(ManagedHeap* self); void ManagedHeap__collect_if_needed(ManagedHeap* self);
int ManagedHeap__collect(ManagedHeap* self); int ManagedHeap__collect(ManagedHeap* self);
int ManagedHeap__sweep(ManagedHeap* self); int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info);
#define ManagedHeap__new(self, type, slots, udsize) \ #define ManagedHeap__new(self, type, slots, udsize) \
ManagedHeap__gcnew((self), (type), (slots), (udsize)) ManagedHeap__gcnew((self), (type), (slots), (udsize))

View File

@ -31,7 +31,7 @@ typedef struct MultiPool {
} MultiPool; } MultiPool;
void* MultiPool__alloc(MultiPool* self, int size); void* MultiPool__alloc(MultiPool* self, int size);
int MultiPool__sweep_dealloc(MultiPool* self); int MultiPool__sweep_dealloc(MultiPool* self, int* out_types);
void MultiPool__ctor(MultiPool* self); void MultiPool__ctor(MultiPool* self);
void MultiPool__dtor(MultiPool* self); void MultiPool__dtor(MultiPool* self);
c11_string* MultiPool__summary(MultiPool* self); c11_string* MultiPool__summary(MultiPool* self);

View File

@ -1,4 +1,4 @@
from typing import Self, Literal from typing import Self, Literal, Callable
from vmath import vec2, vec2i from vmath import vec2, vec2i
class TValue[T]: class TValue[T]:
@ -16,6 +16,8 @@ configmacros: dict[str, int]
def memory_usage() -> str: def memory_usage() -> str:
"""Return a summary of the memory usage.""" """Return a summary of the memory usage."""
def setup_gc_debug_callback(cb: Callable[[str], None]) -> None:
"""Setup a callback that will be triggered at the end of GC."""
def is_user_defined_type(t: type) -> bool: def is_user_defined_type(t: type) -> bool:
"""Check if a type is user-defined. This means the type was created by executing python `class` statement.""" """Check if a type is user-defined. This means the type was created by executing python `class` statement."""

View File

@ -2,6 +2,7 @@
#include "pocketpy/config.h" #include "pocketpy/config.h"
#include "pocketpy/interpreter/objectpool.h" #include "pocketpy/interpreter/objectpool.h"
#include "pocketpy/objects/base.h" #include "pocketpy/objects/base.h"
#include "pocketpy/common/sstream.h"
#include "pocketpy/pocketpy.h" #include "pocketpy/pocketpy.h"
#include <assert.h> #include <assert.h>
@ -16,6 +17,7 @@ void ManagedHeap__ctor(ManagedHeap* self) {
self->gc_threshold = PK_GC_MIN_THRESHOLD; self->gc_threshold = PK_GC_MIN_THRESHOLD;
self->gc_counter = 0; self->gc_counter = 0;
self->gc_enabled = true; self->gc_enabled = true;
self->debug_callback = *py_None();
} }
void ManagedHeap__dtor(ManagedHeap* self) { void ManagedHeap__dtor(ManagedHeap* self) {
@ -31,10 +33,80 @@ void ManagedHeap__dtor(ManagedHeap* self) {
c11_vector__dtor(&self->gc_roots); c11_vector__dtor(&self->gc_roots);
} }
static void ManagedHeap__fire_debug_callback(ManagedHeap* self, ManagedHeapSwpetInfo* out_info) {
assert(out_info != NULL);
c11_sbuf buf;
c11_sbuf__ctor(&buf);
const clock_t CLOCKS_PER_MS = CLOCKS_PER_SEC / 1000;
const char* DIVIDER = "------------------------------------------------------------\n";
clock_t start = out_info->start / CLOCKS_PER_MS;
clock_t mark_ms = (out_info->mark_end - out_info->start) / CLOCKS_PER_MS;
clock_t swpet_ms = (out_info->swpet_end - out_info->mark_end) / CLOCKS_PER_MS;
c11_sbuf__write_cstr(&buf, DIVIDER);
pk_sprintf(&buf, "start: %f\n", (double)start / 1000);
pk_sprintf(&buf, "mark_ms: %i\n", (py_i64)mark_ms);
pk_sprintf(&buf, "swpet_ms: %i\n", (py_i64)swpet_ms);
pk_sprintf(&buf, "total_ms: %i\n", (py_i64)(mark_ms + swpet_ms));
c11_sbuf__write_cstr(&buf, DIVIDER);
pk_sprintf(&buf, "types_length: %d\n", out_info->types_length);
pk_sprintf(&buf, "small_freed: %d\n", out_info->small_freed);
pk_sprintf(&buf, "large_freed: %d\n", out_info->large_freed);
c11_sbuf__write_cstr(&buf, DIVIDER);
if(out_info->small_freed != 0 || out_info->large_freed != 0) {
char line_buf[256];
for(int i = 0; i < out_info->types_length; i++) {
const char* type_name = py_tpname(i);
int s_freed = out_info->small_types[i];
int l_freed = out_info->large_types[i];
if(s_freed == 0 && l_freed == 0) continue;
snprintf(line_buf,
sizeof(line_buf),
"[%-24s] small: %6d large: %6d\n",
type_name,
s_freed,
l_freed);
c11_sbuf__write_cstr(&buf, line_buf);
}
c11_sbuf__write_cstr(&buf, DIVIDER);
}
pk_sprintf(&buf, "auto_thres.before: %d\n", out_info->auto_thres.before);
pk_sprintf(&buf, "auto_thres.after: %d\n", out_info->auto_thres.after);
pk_sprintf(&buf, "auto_thres.upper: %d\n", out_info->auto_thres.upper);
pk_sprintf(&buf, "auto_thres.lower: %d\n", out_info->auto_thres.lower);
pk_sprintf(&buf, "auto_thres.avg_freed: %d\n", out_info->auto_thres.avg_freed);
pk_sprintf(&buf, "auto_thres.free_ratio: %f\n", out_info->auto_thres.free_ratio);
c11_sbuf__write_cstr(&buf, DIVIDER);
py_push(&self->debug_callback);
py_pushnil();
py_StackRef arg = py_pushtmp();
c11_sbuf__py_submit(&buf, arg);
bool ok = py_vectorcall(1, 0);
if(!ok) {
char* msg = py_formatexc();
c11__abort("gc_debug_callback error!!\n%s", msg);
}
}
void ManagedHeap__collect_if_needed(ManagedHeap* self) { void ManagedHeap__collect_if_needed(ManagedHeap* self) {
if(!self->gc_enabled) return; if(!self->gc_enabled) return;
if(self->gc_counter < self->gc_threshold) return; if(self->gc_counter < self->gc_threshold) return;
int freed = ManagedHeap__collect(self); self->gc_counter = 0;
ManagedHeapSwpetInfo* out_info = NULL;
if(!py_isnone(&self->debug_callback)) out_info = ManagedHeapSwpetInfo__new();
ManagedHeap__mark(self);
if(out_info) out_info->mark_end = clock();
int freed = ManagedHeap__sweep(self, out_info);
if(out_info) out_info->swpet_end = clock();
// adjust `gc_threshold` based on `freed_ma` // adjust `gc_threshold` based on `freed_ma`
self->freed_ma[0] = self->freed_ma[1]; self->freed_ma[0] = self->freed_ma[1];
self->freed_ma[1] = self->freed_ma[2]; self->freed_ma[1] = self->freed_ma[2];
@ -44,22 +116,49 @@ void ManagedHeap__collect_if_needed(ManagedHeap* self) {
const int lower = PK_GC_MIN_THRESHOLD / 2; const int lower = PK_GC_MIN_THRESHOLD / 2;
float free_ratio = (float)avg_freed / self->gc_threshold; float free_ratio = (float)avg_freed / self->gc_threshold;
int new_threshold = self->gc_threshold * (1.5f / free_ratio); int new_threshold = self->gc_threshold * (1.5f / free_ratio);
// printf("gc_threshold=%d, avg_freed=%d, new_threshold=%d\n", self->gc_threshold, avg_freed, if(out_info) {
// new_threshold); out_info->auto_thres.before = self->gc_threshold;
out_info->auto_thres.after = new_threshold;
out_info->auto_thres.upper = upper;
out_info->auto_thres.lower = lower;
out_info->auto_thres.avg_freed = avg_freed;
out_info->auto_thres.free_ratio = free_ratio;
}
self->gc_threshold = c11__min(c11__max(new_threshold, lower), upper); self->gc_threshold = c11__min(c11__max(new_threshold, lower), upper);
if(!py_isnone(&self->debug_callback)) {
ManagedHeap__fire_debug_callback(self, out_info);
ManagedHeapSwpetInfo__delete(out_info);
}
} }
int ManagedHeap__collect(ManagedHeap* self) { int ManagedHeap__collect(ManagedHeap* self) {
self->gc_counter = 0; self->gc_counter = 0;
ManagedHeapSwpetInfo* out_info = NULL;
if(!py_isnone(&self->debug_callback)) out_info = ManagedHeapSwpetInfo__new();
ManagedHeap__mark(self); ManagedHeap__mark(self);
int freed = ManagedHeap__sweep(self); if(out_info) out_info->mark_end = clock();
// printf("GC: collected %d objects\n", freed); int freed = ManagedHeap__sweep(self, out_info);
if(out_info) out_info->swpet_end = clock();
if(out_info) {
out_info->auto_thres.before = self->gc_threshold;
out_info->auto_thres.after = self->gc_threshold;
}
if(!py_isnone(&self->debug_callback)) {
ManagedHeap__fire_debug_callback(self, out_info);
ManagedHeapSwpetInfo__delete(out_info);
}
return freed; return freed;
} }
int ManagedHeap__sweep(ManagedHeap* self) { int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info) {
// small_objects // small_objects
int small_freed = MultiPool__sweep_dealloc(&self->small_objects); int small_freed =
MultiPool__sweep_dealloc(&self->small_objects, out_info ? out_info->small_types : NULL);
// large_objects // large_objects
int large_living_count = 0; int large_living_count = 0;
for(int i = 0; i < self->large_objects.length; i++) { for(int i = 0; i < self->large_objects.length; i++) {
@ -69,6 +168,7 @@ int ManagedHeap__sweep(ManagedHeap* self) {
c11__setitem(PyObject*, &self->large_objects, large_living_count, obj); c11__setitem(PyObject*, &self->large_objects, large_living_count, obj);
large_living_count++; large_living_count++;
} else { } else {
if(out_info) out_info->large_types[obj->type]++;
PyObject__dtor(obj); PyObject__dtor(obj);
PK_FREE(obj); PK_FREE(obj);
} }
@ -76,8 +176,10 @@ int ManagedHeap__sweep(ManagedHeap* self) {
// shrink `self->large_objects` // shrink `self->large_objects`
int large_freed = self->large_objects.length - large_living_count; int large_freed = self->large_objects.length - large_living_count;
self->large_objects.length = large_living_count; self->large_objects.length = large_living_count;
// printf("large_freed=%d\n", large_freed); if(out_info) {
// printf("small_freed=%d\n", small_freed); out_info->small_freed = small_freed;
out_info->large_freed = large_freed;
}
return small_freed + large_freed; return small_freed + large_freed;
} }

View File

@ -36,7 +36,7 @@ static void* PoolArena__alloc(PoolArena* self) {
return self->data + index * self->block_size; return self->data + index * self->block_size;
} }
static int PoolArena__sweep_dealloc(PoolArena* self) { static int PoolArena__sweep_dealloc(PoolArena* self, int* out_types) {
int freed = 0; int freed = 0;
self->unused_length = 0; self->unused_length = 0;
for(int i = 0; i < self->block_count; i++) { for(int i = 0; i < self->block_count; i++) {
@ -48,6 +48,7 @@ static int PoolArena__sweep_dealloc(PoolArena* self) {
} else { } else {
if(!obj->gc_marked) { if(!obj->gc_marked) {
// not marked, need to free // not marked, need to free
if(out_types) out_types[obj->type]++;
PyObject__dtor(obj); PyObject__dtor(obj);
obj->type = 0; obj->type = 0;
freed++; freed++;
@ -91,7 +92,10 @@ static void* Pool__alloc(Pool* self) {
return ptr; return ptr;
} }
static int Pool__sweep_dealloc(Pool* self, c11_vector* arenas, c11_vector* no_free_arenas) { static int Pool__sweep_dealloc(Pool* self,
c11_vector* arenas,
c11_vector* no_free_arenas,
int* out_types) {
c11_vector__clear(arenas); c11_vector__clear(arenas);
c11_vector__clear(no_free_arenas); c11_vector__clear(no_free_arenas);
@ -99,7 +103,7 @@ static int Pool__sweep_dealloc(Pool* self, c11_vector* arenas, c11_vector* no_fr
for(int i = 0; i < self->arenas.length; i++) { for(int i = 0; i < self->arenas.length; i++) {
PoolArena* item = c11__getitem(PoolArena*, &self->arenas, i); PoolArena* item = c11__getitem(PoolArena*, &self->arenas, i);
assert(item->unused_length > 0); assert(item->unused_length > 0);
freed += PoolArena__sweep_dealloc(item); freed += PoolArena__sweep_dealloc(item, out_types);
if(item->unused_length == item->block_count) { if(item->unused_length == item->block_count) {
// all free // all free
if(arenas->length > 0) { if(arenas->length > 0) {
@ -116,7 +120,7 @@ static int Pool__sweep_dealloc(Pool* self, c11_vector* arenas, c11_vector* no_fr
} }
for(int i = 0; i < self->no_free_arenas.length; i++) { for(int i = 0; i < self->no_free_arenas.length; i++) {
PoolArena* item = c11__getitem(PoolArena*, &self->no_free_arenas, i); PoolArena* item = c11__getitem(PoolArena*, &self->no_free_arenas, i);
freed += PoolArena__sweep_dealloc(item); freed += PoolArena__sweep_dealloc(item, out_types);
if(item->unused_length == 0) { if(item->unused_length == 0) {
// still no free // still no free
c11_vector__push(PoolArena*, no_free_arenas, item); c11_vector__push(PoolArena*, no_free_arenas, item);
@ -146,7 +150,7 @@ void* MultiPool__alloc(MultiPool* self, int size) {
return NULL; return NULL;
} }
int MultiPool__sweep_dealloc(MultiPool* self) { int MultiPool__sweep_dealloc(MultiPool* self, int* out_types) {
c11_vector arenas; c11_vector arenas;
c11_vector no_free_arenas; c11_vector no_free_arenas;
c11_vector__ctor(&arenas, sizeof(PoolArena*)); c11_vector__ctor(&arenas, sizeof(PoolArena*));
@ -154,7 +158,7 @@ int MultiPool__sweep_dealloc(MultiPool* self) {
int freed = 0; int freed = 0;
for(int i = 0; i < kMultiPoolCount; i++) { for(int i = 0; i < kMultiPoolCount; i++) {
Pool* item = &self->pools[i]; Pool* item = &self->pools[i];
freed += Pool__sweep_dealloc(item, &arenas, &no_free_arenas); freed += Pool__sweep_dealloc(item, &arenas, &no_free_arenas, out_types);
} }
c11_vector__dtor(&arenas); c11_vector__dtor(&arenas);
c11_vector__dtor(&no_free_arenas); c11_vector__dtor(&no_free_arenas);

View File

@ -685,6 +685,8 @@ void ManagedHeap__mark(ManagedHeap* self) {
for(int i = 0; i < c11__count_array(vm->reg); i++) { for(int i = 0; i < c11__count_array(vm->reg); i++) {
pk__mark_value(&vm->reg[i]); pk__mark_value(&vm->reg[i]);
} }
// mark gc debug callback
pk__mark_value(&vm->heap.debug_callback);
// mark user func // mark user func
if(vm->callbacks.gc_mark) vm->callbacks.gc_mark(pk__mark_value_func, p_stack); if(vm->callbacks.gc_mark) vm->callbacks.gc_mark(pk__mark_value_func, p_stack);
/*****************************/ /*****************************/

View File

@ -128,4 +128,25 @@ void PyObject__dtor(PyObject* self) {
NameDict* dict = PyObject__dict(self); NameDict* dict = PyObject__dict(self);
NameDict__dtor(dict); NameDict__dtor(dict);
} }
} }
ManagedHeapSwpetInfo* ManagedHeapSwpetInfo__new() {
ManagedHeapSwpetInfo* self = py_malloc(sizeof(ManagedHeapSwpetInfo));
memset(self, 0, sizeof(ManagedHeapSwpetInfo));
self->types_length = pk_current_vm->types.length;
self->small_types = py_malloc(sizeof(int) * self->types_length);
self->large_types = py_malloc(sizeof(int) * self->types_length);
for(int i = 0; i < self->types_length; i++) {
self->small_types[i] = 0;
self->large_types[i] = 0;
}
self->start = clock();
return self;
}
void ManagedHeapSwpetInfo__delete(ManagedHeapSwpetInfo* self) {
py_free(self->small_types);
py_free(self->large_types);
memset(self, 0, sizeof(ManagedHeapSwpetInfo));
py_free(self);
}

View File

@ -57,6 +57,14 @@ static bool pkpy_memory_usage(int argc, py_Ref argv) {
return true; return true;
} }
static bool pkpy_setup_gc_debug_callback(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
ManagedHeap* heap = &pk_current_vm->heap;
heap->debug_callback = *argv;
py_newnone(py_retval());
return true;
}
static bool pkpy_is_user_defined_type(int argc, py_Ref argv) { static bool pkpy_is_user_defined_type(int argc, py_Ref argv) {
PY_CHECK_ARGC(1); PY_CHECK_ARGC(1);
PY_CHECK_ARG_TYPE(0, tp_type); PY_CHECK_ARG_TYPE(0, tp_type);
@ -530,6 +538,7 @@ void pk__add_module_pkpy() {
py_pop(); py_pop();
py_bindfunc(mod, "memory_usage", pkpy_memory_usage); py_bindfunc(mod, "memory_usage", pkpy_memory_usage);
py_bindfunc(mod, "setup_gc_debug_callback", pkpy_setup_gc_debug_callback);
py_bindfunc(mod, "is_user_defined_type", pkpy_is_user_defined_type); py_bindfunc(mod, "is_user_defined_type", pkpy_is_user_defined_type);
py_bindfunc(mod, "currentvm", pkpy_currentvm); py_bindfunc(mod, "currentvm", pkpy_currentvm);

View File

@ -1,7 +1,11 @@
import gc import gc
from pkpy import setup_gc_debug_callback
setup_gc_debug_callback(print)
gc.collect()
def create_garbage(): def create_garbage():
a = [(1,2) for i in range(10000)] a = [(1,2) for i in range(20000)]
return a return a
create_garbage() create_garbage()