Add more gc control to improve performance (#510 )

* add more gc control * fix * Update gc.c * Update ceval.c * [no ci] Update 711_gc.py
add __mod__
2026-05-06 18:23:38 +00:00 · 2026-04-29 13:55:41 +08:00 · 2026-04-29 12:54:04 +08:00 · 2026-04-23 11:06:38 +08:00
12 changed files with 116 additions and 44 deletions
--- a/include/pocketpy/objects/object.h
+++ b/include/pocketpy/objects/object.h
@ -6,8 +6,8 @@
 typedef struct PyObject {
    py_Type type;  // we have a duplicated type here for convenience
    uint8_t size_8b;
-    bool gc_marked;
+    uint8_t gc_marked;  // lsb (self is marked), 2nd lsb (no recursively mark)
-    int slots;  // number of slots in the object
+    int slots;          // number of slots in the object
    char flex[];
 } PyObject;
@ -25,11 +25,11 @@ void* PyObject__userdata(PyObject* self);
 void PyObject__dtor(PyObject* self);
 #define pk__mark_value(val)                                                                        \
-    if((val)->is_ptr && !(val)->_obj->gc_marked) {                                                 \
+    if((val)->is_ptr) {                                                                            \
        PyObject* obj = (val)->_obj;                                                               \
-        obj->gc_marked = true;                                                                     \
+        if(!(obj->gc_marked & 0b01)) {                                                             \
-        c11_vector__push(PyObject*, p_stack, obj);                                                 \
+            obj->gc_marked |= 0b01;                                                                \
            if(!(obj->gc_marked & 0b10)) { c11_vector__push(PyObject*, p_stack, obj); }            \
        }                                                                                          \
    }
--- a/include/typings/gc.pyi
+++ b/include/typings/gc.pyi
@ -25,3 +25,16 @@ def collect_hint() -> int:
 def setup_debug_callback(cb: Callable[[Literal['start', 'stop'], str], None] | None) -> None:
    """Setup a callback that will be triggered at the end of each collection."""
 def is_tracked(obj: object) -> bool:
    """Return true if the object is tracked recursively."""
 def track(obj: object) -> None:
    """Start tracking this object recursively."""
 def untrack(obj: object) -> None:
    """Stop tracking this object recursively.
    This improves performance for container objects with value types like `list[int]`.
    """
--- a/include/typings/vmath.pyi
+++ b/include/typings/vmath.pyi
@ -31,6 +31,7 @@ class _vecI[T]:
    @overload
    def __mul__(self, other: T) -> T: ...
    def __floordiv__(self, other: int) -> T: ...
    def __mod__(self, other: int) -> T: ...
    def __hash__(self) -> int: ...
--- a/src/bindings/py_number.c
+++ b/src/bindings/py_number.c
@ -127,7 +127,7 @@ static bool number__pow__(int argc, py_Ref argv) {
 static py_i64 i64_abs(py_i64 x) { return x < 0 ? -x : x; }
-static py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b) {
+py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b) {
    assert(b != 0);
    if(a == 0) return 0;
    if((a < 0) == (b < 0)) {
@ -137,7 +137,7 @@ static py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b) {
    }
 }
-static py_i64 cpy11__fast_mod(py_i64 a, py_i64 b) {
+py_i64 cpy11__fast_mod(py_i64 a, py_i64 b) {
    assert(b != 0);
    if(a == 0) return 0;
    py_i64 res;
--- a/src/interpreter/ceval.c
+++ b/src/interpreter/ceval.c
@ -539,11 +539,15 @@ __NEXT_STEP:
            DISPATCH();
        }
        case OP_BUILD_TUPLE: {
            bool need_track = false;
            py_TValue tmp;
            py_Ref p = py_newtuple(&tmp, byte.arg);
            py_TValue* begin = SP() - byte.arg;
-            for(int i = 0; i < byte.arg; i++)
+            for(int i = 0; i < byte.arg; i++) {
                p[i] = begin[i];
                if(p[i].is_ptr) need_track = true;
            }
            if(!need_track) tmp._obj->gc_marked |= 0b10;
            SP() = begin;
            PUSH(&tmp);
            DISPATCH();
--- a/src/interpreter/heap.c
+++ b/src/interpreter/heap.c
@ -202,8 +202,8 @@ int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info) {
    int large_living_count = 0;
    for(int i = 0; i < self->large_objects.length; i++) {
        PyObject* obj = c11__getitem(PyObject*, &self->large_objects, i);
-        if(obj->gc_marked) {
+        if(obj->gc_marked & 0b01) {
-            obj->gc_marked = false;
+            obj->gc_marked &= 0b10;
            c11__setitem(PyObject*, &self->large_objects, large_living_count, obj);
            large_living_count++;
        } else {
@ -238,7 +238,7 @@ PyObject* ManagedHeap__gcnew(ManagedHeap* self, py_Type type, int slots, int uds
    }
    obj->type = type;
    obj->size_8b = size_8b;
-    obj->gc_marked = false;
+    obj->gc_marked = 0;
    obj->slots = slots;
    // initialize slots or dict
--- a/src/interpreter/objectpool.c
+++ b/src/interpreter/objectpool.c
@ -38,16 +38,16 @@ static int PoolArena__sweep_dealloc(PoolArena* self, int* out_types) {
            self->unused[self->unused_length] = i;
            self->unused_length++;
        } else {
-            if(!obj->gc_marked) {
+            if(obj->gc_marked & 0b01) {
                // marked, clear mark
                obj->gc_marked &= 0b10;
            } else {
                // not marked, need to free
                if(out_types) out_types[obj->type]++;
                PyObject__dtor(obj);
                obj->type = 0;
                self->unused[self->unused_length] = i;
                self->unused_length++;
            } else {
                // marked, clear mark
                obj->gc_marked = false;
            }
        }
    }
--- a/src/interpreter/vm.c
+++ b/src/interpreter/vm.c
@ -265,10 +265,10 @@ void VM__ctor(VM* self) {
    pk__add_module_unicodedata();
    pk__add_module_conio();
-    pk__add_module_lz4();       // optional
+    pk__add_module_lz4();        // optional
-    pk__add_module_cute_png();  // optional
+    pk__add_module_cute_png();   // optional
-    pk__add_module_msgpack();   // optional
+    pk__add_module_msgpack();    // optional
-    py__add_module_periphery(); // optional
+    py__add_module_periphery();  // optional
    pk__add_module_pkpy();
    pk__add_module_picoterm();
@ -697,7 +697,7 @@ void ManagedHeap__mark(ManagedHeap* self) {
        PyObject* obj = c11_vector__back(PyObject*, p_stack);
        c11_vector__pop(p_stack);
-        assert(obj->gc_marked);
+        assert(obj->gc_marked & 0b01);
        if(obj->slots > 0) {
            py_TValue* p = PyObject__slots(obj);
--- a/src/modules/gc.c
+++ b/src/modules/gc.c
@ -48,6 +48,33 @@ static bool gc_setup_debug_callback(int argc, py_Ref argv) {
    return true;
 }
 static bool gc_is_tracked(int argc, py_Ref argv) {
    PY_CHECK_ARGC(1);
    if(!argv->is_ptr) {
        py_newbool(py_retval(), false);
        return true;
    }
    bool res = !(argv->_obj->gc_marked & 0b10);
    py_newbool(py_retval(), res);
    return true;
 }
 static bool gc_track(int argc, py_Ref argv) {
    PY_CHECK_ARGC(1);
    if(!argv->is_ptr) return TypeError("gc.track() only accepts objects");
    argv->_obj->gc_marked &= 0b01;
    py_newnone(py_retval());
    return true;
 }
 static bool gc_untrack(int argc, py_Ref argv) {
    PY_CHECK_ARGC(1);
    if(!argv->is_ptr) return TypeError("gc.untrack() only accepts objects");
    argv->_obj->gc_marked |= 0b10;
    py_newnone(py_retval());
    return true;
 }
 void pk__add_module_gc() {
    py_Ref mod = py_newmodule("gc");
@ -58,4 +85,8 @@ void pk__add_module_gc() {
    py_bindfunc(mod, "collect", gc_collect);
    py_bindfunc(mod, "collect_hint", gc_collect_hint);
    py_bindfunc(mod, "setup_debug_callback", gc_setup_debug_callback);
    py_bindfunc(mod, "is_tracked", gc_is_tracked);
    py_bindfunc(mod, "track", gc_track);
    py_bindfunc(mod, "untrack", gc_untrack);
 }
--- a/src/modules/vmath.c
+++ b/src/modules/vmath.c
@ -8,6 +8,9 @@
 static bool isclose(float a, float b) { return dmath_fabs(a - b) < 1e-4; }
 py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b);
 py_i64 cpy11__fast_mod(py_i64 a, py_i64 b);
 #define DEFINE_VEC_FIELD(name, T, Tc, field)                                                       \
    static bool name##__##field(int argc, py_Ref argv) {                                           \
        PY_CHECK_ARGC(1);                                                                          \
@ -204,7 +207,7 @@ static py_Ref _const(py_Type type, const char* name) {
        float sum = 0;                                                                             \
        for(int i = 0; i < D; i++)                                                                 \
            sum += v.data[i] * v.data[i];                                                          \
-        py_newfloat(py_retval(), dmath_sqrt(sum));                                                \
+        py_newfloat(py_retval(), dmath_sqrt(sum));                                                 \
        return true;                                                                               \
    }                                                                                              \
    static bool vec##D##_length_squared(int argc, py_Ref argv) {                                   \
@ -234,7 +237,7 @@ static py_Ref _const(py_Type type, const char* name) {
        for(int i = 0; i < D; i++)                                                                 \
            len += self.data[i] * self.data[i];                                                    \
        if(isclose(len, 0)) return ZeroDivisionError("cannot normalize zero vector");              \
-        len = dmath_sqrt(len);                                                                          \
+        len = dmath_sqrt(len);                                                                     \
        c11_vec##D res;                                                                            \
        for(int i = 0; i < D; i++)                                                                 \
            res.data[i] = self.data[i] / len;                                                      \
@ -314,7 +317,17 @@ DEF_VECTOR_OPS(3)
        c11_vec##D##i a = py_tovec##D##i(&argv[0]);                                                \
        py_i64 b = py_toint(&argv[1]);                                                             \
        for(int i = 0; i < D; i++)                                                                 \
-            a.data[i] /= b;                                                                        \
+            a.data[i] = cpy11__fast_floor_div(a.data[i], b);                                       \
        py_newvec##D##i(py_retval(), a);                                                           \
        return true;                                                                               \
    }                                                                                              \
    static bool vec##D##i##__mod__(int argc, py_Ref argv) {                                        \
        PY_CHECK_ARGC(2);                                                                          \
        PY_CHECK_ARG_TYPE(1, tp_int);                                                              \
        c11_vec##D##i a = py_tovec##D##i(&argv[0]);                                                \
        py_i64 b = py_toint(&argv[1]);                                                             \
        for(int i = 0; i < D; i++)                                                                 \
            a.data[i] = cpy11__fast_mod(a.data[i], b);                                             \
        py_newvec##D##i(py_retval(), a);                                                           \
        return true;                                                                               \
    }
@ -381,7 +394,8 @@ static bool vec2_angle_STATIC(int argc, py_Ref argv) {
    PY_CHECK_ARGC(2);
    PY_CHECK_ARG_TYPE(0, tp_vec2);
    PY_CHECK_ARG_TYPE(1, tp_vec2);
-    float val = dmath_atan2(argv[1]._vec2.y, argv[1]._vec2.x) - dmath_atan2(argv[0]._vec2.y, argv[0]._vec2.x);
+    float val = dmath_atan2(argv[1]._vec2.y, argv[1]._vec2.x) -
                dmath_atan2(argv[0]._vec2.y, argv[0]._vec2.x);
    if(val > DMATH_PI) val -= 2 * (float)DMATH_PI;
    if(val < -DMATH_PI) val += 2 * (float)DMATH_PI;
    py_newfloat(py_retval(), val);
@ -1237,6 +1251,7 @@ void pk__add_module_vmath() {
    py_bindmagic(vec2i, __sub__, vec2i__sub__);
    py_bindmagic(vec2i, __mul__, vec2i__mul__);
    py_bindmagic(vec2i, __floordiv__, vec2i__floordiv__);
    py_bindmagic(vec2i, __mod__, vec2i__mod__);
    py_bindmagic(vec2i, __eq__, vec2i__eq__);
    py_bindmagic(vec2i, __ne__, vec2i__ne__);
    py_bindmagic(vec2i, __hash__, vec2i__hash__);
@ -1262,6 +1277,7 @@ void pk__add_module_vmath() {
    py_bindmagic(vec3i, __sub__, vec3i__sub__);
    py_bindmagic(vec3i, __mul__, vec3i__mul__);
    py_bindmagic(vec3i, __floordiv__, vec3i__floordiv__);
    py_bindmagic(vec3i, __mod__, vec3i__mod__);
    py_bindmagic(vec3i, __eq__, vec3i__eq__);
    py_bindmagic(vec3i, __ne__, vec3i__ne__);
    py_bindmagic(vec3i, __hash__, vec3i__hash__);
@ -1289,6 +1305,7 @@ void pk__add_module_vmath() {
    py_bindmagic(vec4i, __sub__, vec4i__sub__);
    py_bindmagic(vec4i, __mul__, vec4i__mul__);
    py_bindmagic(vec4i, __floordiv__, vec4i__floordiv__);
    py_bindmagic(vec4i, __mod__, vec4i__mod__);
    py_bindmagic(vec4i, __eq__, vec4i__eq__);
    py_bindmagic(vec4i, __ne__, vec4i__ne__);
    py_bindmagic(vec4i, __hash__, vec4i__hash__);
--- a/tests/711_gc.py
+++ b/tests/711_gc.py
@ -12,4 +12,10 @@ gc.collect()
 create_garbage()
 create_garbage()
-create_garbage()
+c = create_garbage()
 assert gc.is_tracked(c) == True
 gc.untrack(c)
 assert gc.is_tracked(c) == False
 gc.track(c)
 assert gc.is_tracked(c) == True
--- a/tests/930_deterministic_float.py
+++ b/tests/930_deterministic_float.py
@ -54,18 +54,18 @@ assertEqual(math.isnan(math.nan), True)
 # test exp
 assertEqual(math.exp(0), 1.0)
 assertEqual(math.exp(1), math.e)
-assertEqual(math.exp(1.5), 4.48168907033806362960604019463) #4.481689070338065 - 8.881784197001252e-16)
+assertEqual(math.exp(1.5), 4.48168907033806362960604019463)
-assertEqual(math.exp(3), 20.0855369231876608182574273087) #20.08553692318767 - 3.552713678800501e-15)
+assertEqual(math.exp(3), 20.0855369231876608182574273087)
-assertEqual(math.exp(-3), 0.04978706836786396527916309651) #0.04978706836786394 + 6.938893903907228e-18)
+assertEqual(math.exp(-3), 0.04978706836786396527916309651)
 assertEqual(math.exp(-2.253647), 0.1050155336754953 - 1.387778780781446e-17)
-assertEqual(math.exp(4.729036), 113.186398052200445363268954679) #113.1863980522005 - 4.263256414560601e-14)
+assertEqual(math.exp(4.729036), 113.186398052200445363268954679)
 # test log series
 assertEqual(math.log(0), -math.inf)
 assertEqual(math.log(1), 0.0)
 assertEqual(math.log(2), 0.69314718055994530942)
 assertEqual(math.log(math.e), 1.0)
-assertEqual(math.log(10), 2.30258509299404545700440394284) #2.30258509299404568402)
+assertEqual(math.log(10), 2.30258509299404545700440394284)
 assertEqual(math.log(28.897124), 3.363742074595449)
 assertEqual(math.log2(math.e), 1.4426950408889634074)
 assertEqual(math.log2(78.781291), 6.299781153677818)
@ -77,13 +77,13 @@ assertEqual(math.pow(2,2), 4.0)
 assertEqual(math.pow(1.41421356237309504880, 2), 2.0 + 4.440892098500626e-16)
 assertEqual(math.pow(0.70710678118654752440, 2), 0.5000000000000001)
 assertEqual(math.pow(-1.255782,-3), -0.5049603042167915)
-assertEqual(math.pow(6.127042, 4.071529), 1604.40754645674428502388764172) #1604.407546456745 + 2.273736754432321e-13)
+assertEqual(math.pow(6.127042, 4.071529), 1604.40754645674428502388764172)
 # test sqrt
-assertEqual(math.sqrt(2), 1.41421356237309492343001693370) #1.41421356237309504880)
+assertEqual(math.sqrt(2), 1.41421356237309492343001693370)
 assertEqual(math.sqrt(math.pi), 1.772453850905516 - 2.220446049250313e-16)
 assertEqual(math.sqrt(125.872509), 11.21929182257062)
-assertEqual(math.sqrt(1225.296280), 35.0042323155358019448613049462) #35.00423231553579)
+assertEqual(math.sqrt(1225.296280), 35.0042323155358019448613049462)
 # test cos, sin, tan
 assertEqual(math.cos(0), 1.0)
@ -112,19 +112,19 @@ assertEqual(math.asin(1), 1.570796326794897 - 4.440892098500626e-16)
 assertEqual(math.asin(-0.225895), -0.2278616865773913 + 2.775557561562891e-17)
 assertEqual(math.asin(0.955658), 1.271886195819423 + 4.440892098500626e-16)
 assertEqual(math.atan(0), 0.0)
-assertEqual(math.atan(1), 0.78539816339744839002179332965) #0.7853981633974483)
+assertEqual(math.atan(1), 0.78539816339744839002179332965)
-assertEqual(math.atan(-3.758927), -1.3107852846106160527028805518) #-1.310785284610617 - 4.440892098500626e-16)
+assertEqual(math.atan(-3.758927), -1.3107852846106160527028805518)
-assertEqual(math.atan(35.789293), 1.54286227728011748894232368911) #1.542862277280122)
+assertEqual(math.atan(35.789293), 1.54286227728011748894232368911)
 # test atan2
-assertEqual(math.atan2(math.pi/4, math.pi/4), 0.78539816339744839002179332965) #0.7853981633974483)
+assertEqual(math.atan2(math.pi/4, math.pi/4), 0.78539816339744839002179332965)
-assertEqual(math.atan2(-math.pi/4, math.pi/4), -0.7853981633974483900217933296) #-0.7853981633974483)
+assertEqual(math.atan2(-math.pi/4, math.pi/4), -0.7853981633974483900217933296)
 assertEqual(math.atan2(-math.pi/4, -math.pi/4), -2.356194490192345)
 assertEqual(math.atan2(math.pi/4, -math.pi/4), 2.356194490192345)
-assertEqual(math.atan2(1.573823, 0.685329), 1.16010368292465315676054160576) #1.160103682924653)
+assertEqual(math.atan2(1.573823, 0.685329), 1.16010368292465315676054160576)
-assertEqual(math.atan2(-0.899663, 0.668972), -0.9314162757114096136135117376) #-0.9314162757114095)
+assertEqual(math.atan2(-0.899663, 0.668972), -0.9314162757114096136135117376)
-# assertEqual(math.atan2(-0.762894, -0.126497), -1.7351133471732969049128314509) #-1.735113347173296 - 4.440892098500626e-16)
+assertEqual(math.atan2(-0.762894, -0.126497), -1.735113347173297126957436375960)
-# assertEqual(math.atan2(0.468463, -0.992734), 2.70068341069237316531825854326) #2.700683410692374 - 4.440892098500626e-16)
+assertEqual(math.atan2(0.468463, -0.992734), 2.700683410692373609407468393329)
 # test fsum, sum
 fsum_sin = math.fsum([math.sin(i) for i in range(5000)])
Author	SHA1	Message	Date
BLUELOVETH	c624833cfb	Add more gc control to improve performance (#510 ) * add more gc control * fix * Update gc.c * Update ceval.c * [no ci] Update 711_gc.py	2026-04-29 13:55:41 +08:00
blueloveTH	c39d86e999	add `__mod__`	2026-04-29 12:54:04 +08:00
blueloveTH	826b6f40f9	Update 930_deterministic_float.py	2026-04-23 11:06:38 +08:00