Compare commits

..

3 Commits

Author SHA1 Message Date
BLUELOVETH
c624833cfb
Add more gc control to improve performance (#510)
* add more gc control

* fix

* Update gc.c

* Update ceval.c

* [no ci] Update 711_gc.py
2026-04-29 13:55:41 +08:00
blueloveTH
c39d86e999 add __mod__ 2026-04-29 12:54:04 +08:00
blueloveTH
826b6f40f9 Update 930_deterministic_float.py 2026-04-23 11:06:38 +08:00
12 changed files with 116 additions and 44 deletions

View File

@ -6,8 +6,8 @@
typedef struct PyObject { typedef struct PyObject {
py_Type type; // we have a duplicated type here for convenience py_Type type; // we have a duplicated type here for convenience
uint8_t size_8b; uint8_t size_8b;
bool gc_marked; uint8_t gc_marked; // lsb (self is marked), 2nd lsb (no recursively mark)
int slots; // number of slots in the object int slots; // number of slots in the object
char flex[]; char flex[];
} PyObject; } PyObject;
@ -25,11 +25,11 @@ void* PyObject__userdata(PyObject* self);
void PyObject__dtor(PyObject* self); void PyObject__dtor(PyObject* self);
#define pk__mark_value(val) \ #define pk__mark_value(val) \
if((val)->is_ptr && !(val)->_obj->gc_marked) { \ if((val)->is_ptr) { \
PyObject* obj = (val)->_obj; \ PyObject* obj = (val)->_obj; \
obj->gc_marked = true; \ if(!(obj->gc_marked & 0b01)) { \
c11_vector__push(PyObject*, p_stack, obj); \ obj->gc_marked |= 0b01; \
if(!(obj->gc_marked & 0b10)) { c11_vector__push(PyObject*, p_stack, obj); } \
} \
} }

View File

@ -25,3 +25,16 @@ def collect_hint() -> int:
def setup_debug_callback(cb: Callable[[Literal['start', 'stop'], str], None] | None) -> None: def setup_debug_callback(cb: Callable[[Literal['start', 'stop'], str], None] | None) -> None:
"""Setup a callback that will be triggered at the end of each collection.""" """Setup a callback that will be triggered at the end of each collection."""
def is_tracked(obj: object) -> bool:
"""Return true if the object is tracked recursively."""
def track(obj: object) -> None:
"""Start tracking this object recursively."""
def untrack(obj: object) -> None:
"""Stop tracking this object recursively.
This improves performance for container objects with value types like `list[int]`.
"""

View File

@ -31,6 +31,7 @@ class _vecI[T]:
@overload @overload
def __mul__(self, other: T) -> T: ... def __mul__(self, other: T) -> T: ...
def __floordiv__(self, other: int) -> T: ... def __floordiv__(self, other: int) -> T: ...
def __mod__(self, other: int) -> T: ...
def __hash__(self) -> int: ... def __hash__(self) -> int: ...

View File

@ -127,7 +127,7 @@ static bool number__pow__(int argc, py_Ref argv) {
static py_i64 i64_abs(py_i64 x) { return x < 0 ? -x : x; } static py_i64 i64_abs(py_i64 x) { return x < 0 ? -x : x; }
static py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b) { py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b) {
assert(b != 0); assert(b != 0);
if(a == 0) return 0; if(a == 0) return 0;
if((a < 0) == (b < 0)) { if((a < 0) == (b < 0)) {
@ -137,7 +137,7 @@ static py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b) {
} }
} }
static py_i64 cpy11__fast_mod(py_i64 a, py_i64 b) { py_i64 cpy11__fast_mod(py_i64 a, py_i64 b) {
assert(b != 0); assert(b != 0);
if(a == 0) return 0; if(a == 0) return 0;
py_i64 res; py_i64 res;

View File

@ -539,11 +539,15 @@ __NEXT_STEP:
DISPATCH(); DISPATCH();
} }
case OP_BUILD_TUPLE: { case OP_BUILD_TUPLE: {
bool need_track = false;
py_TValue tmp; py_TValue tmp;
py_Ref p = py_newtuple(&tmp, byte.arg); py_Ref p = py_newtuple(&tmp, byte.arg);
py_TValue* begin = SP() - byte.arg; py_TValue* begin = SP() - byte.arg;
for(int i = 0; i < byte.arg; i++) for(int i = 0; i < byte.arg; i++) {
p[i] = begin[i]; p[i] = begin[i];
if(p[i].is_ptr) need_track = true;
}
if(!need_track) tmp._obj->gc_marked |= 0b10;
SP() = begin; SP() = begin;
PUSH(&tmp); PUSH(&tmp);
DISPATCH(); DISPATCH();

View File

@ -202,8 +202,8 @@ int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info) {
int large_living_count = 0; int large_living_count = 0;
for(int i = 0; i < self->large_objects.length; i++) { for(int i = 0; i < self->large_objects.length; i++) {
PyObject* obj = c11__getitem(PyObject*, &self->large_objects, i); PyObject* obj = c11__getitem(PyObject*, &self->large_objects, i);
if(obj->gc_marked) { if(obj->gc_marked & 0b01) {
obj->gc_marked = false; obj->gc_marked &= 0b10;
c11__setitem(PyObject*, &self->large_objects, large_living_count, obj); c11__setitem(PyObject*, &self->large_objects, large_living_count, obj);
large_living_count++; large_living_count++;
} else { } else {
@ -238,7 +238,7 @@ PyObject* ManagedHeap__gcnew(ManagedHeap* self, py_Type type, int slots, int uds
} }
obj->type = type; obj->type = type;
obj->size_8b = size_8b; obj->size_8b = size_8b;
obj->gc_marked = false; obj->gc_marked = 0;
obj->slots = slots; obj->slots = slots;
// initialize slots or dict // initialize slots or dict

View File

@ -38,16 +38,16 @@ static int PoolArena__sweep_dealloc(PoolArena* self, int* out_types) {
self->unused[self->unused_length] = i; self->unused[self->unused_length] = i;
self->unused_length++; self->unused_length++;
} else { } else {
if(!obj->gc_marked) { if(obj->gc_marked & 0b01) {
// marked, clear mark
obj->gc_marked &= 0b10;
} else {
// not marked, need to free // not marked, need to free
if(out_types) out_types[obj->type]++; if(out_types) out_types[obj->type]++;
PyObject__dtor(obj); PyObject__dtor(obj);
obj->type = 0; obj->type = 0;
self->unused[self->unused_length] = i; self->unused[self->unused_length] = i;
self->unused_length++; self->unused_length++;
} else {
// marked, clear mark
obj->gc_marked = false;
} }
} }
} }

View File

@ -265,10 +265,10 @@ void VM__ctor(VM* self) {
pk__add_module_unicodedata(); pk__add_module_unicodedata();
pk__add_module_conio(); pk__add_module_conio();
pk__add_module_lz4(); // optional pk__add_module_lz4(); // optional
pk__add_module_cute_png(); // optional pk__add_module_cute_png(); // optional
pk__add_module_msgpack(); // optional pk__add_module_msgpack(); // optional
py__add_module_periphery(); // optional py__add_module_periphery(); // optional
pk__add_module_pkpy(); pk__add_module_pkpy();
pk__add_module_picoterm(); pk__add_module_picoterm();
@ -697,7 +697,7 @@ void ManagedHeap__mark(ManagedHeap* self) {
PyObject* obj = c11_vector__back(PyObject*, p_stack); PyObject* obj = c11_vector__back(PyObject*, p_stack);
c11_vector__pop(p_stack); c11_vector__pop(p_stack);
assert(obj->gc_marked); assert(obj->gc_marked & 0b01);
if(obj->slots > 0) { if(obj->slots > 0) {
py_TValue* p = PyObject__slots(obj); py_TValue* p = PyObject__slots(obj);

View File

@ -48,6 +48,33 @@ static bool gc_setup_debug_callback(int argc, py_Ref argv) {
return true; return true;
} }
static bool gc_is_tracked(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
if(!argv->is_ptr) {
py_newbool(py_retval(), false);
return true;
}
bool res = !(argv->_obj->gc_marked & 0b10);
py_newbool(py_retval(), res);
return true;
}
static bool gc_track(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
if(!argv->is_ptr) return TypeError("gc.track() only accepts objects");
argv->_obj->gc_marked &= 0b01;
py_newnone(py_retval());
return true;
}
static bool gc_untrack(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
if(!argv->is_ptr) return TypeError("gc.untrack() only accepts objects");
argv->_obj->gc_marked |= 0b10;
py_newnone(py_retval());
return true;
}
void pk__add_module_gc() { void pk__add_module_gc() {
py_Ref mod = py_newmodule("gc"); py_Ref mod = py_newmodule("gc");
@ -58,4 +85,8 @@ void pk__add_module_gc() {
py_bindfunc(mod, "collect", gc_collect); py_bindfunc(mod, "collect", gc_collect);
py_bindfunc(mod, "collect_hint", gc_collect_hint); py_bindfunc(mod, "collect_hint", gc_collect_hint);
py_bindfunc(mod, "setup_debug_callback", gc_setup_debug_callback); py_bindfunc(mod, "setup_debug_callback", gc_setup_debug_callback);
py_bindfunc(mod, "is_tracked", gc_is_tracked);
py_bindfunc(mod, "track", gc_track);
py_bindfunc(mod, "untrack", gc_untrack);
} }

View File

@ -8,6 +8,9 @@
static bool isclose(float a, float b) { return dmath_fabs(a - b) < 1e-4; } static bool isclose(float a, float b) { return dmath_fabs(a - b) < 1e-4; }
py_i64 cpy11__fast_floor_div(py_i64 a, py_i64 b);
py_i64 cpy11__fast_mod(py_i64 a, py_i64 b);
#define DEFINE_VEC_FIELD(name, T, Tc, field) \ #define DEFINE_VEC_FIELD(name, T, Tc, field) \
static bool name##__##field(int argc, py_Ref argv) { \ static bool name##__##field(int argc, py_Ref argv) { \
PY_CHECK_ARGC(1); \ PY_CHECK_ARGC(1); \
@ -204,7 +207,7 @@ static py_Ref _const(py_Type type, const char* name) {
float sum = 0; \ float sum = 0; \
for(int i = 0; i < D; i++) \ for(int i = 0; i < D; i++) \
sum += v.data[i] * v.data[i]; \ sum += v.data[i] * v.data[i]; \
py_newfloat(py_retval(), dmath_sqrt(sum)); \ py_newfloat(py_retval(), dmath_sqrt(sum)); \
return true; \ return true; \
} \ } \
static bool vec##D##_length_squared(int argc, py_Ref argv) { \ static bool vec##D##_length_squared(int argc, py_Ref argv) { \
@ -234,7 +237,7 @@ static py_Ref _const(py_Type type, const char* name) {
for(int i = 0; i < D; i++) \ for(int i = 0; i < D; i++) \
len += self.data[i] * self.data[i]; \ len += self.data[i] * self.data[i]; \
if(isclose(len, 0)) return ZeroDivisionError("cannot normalize zero vector"); \ if(isclose(len, 0)) return ZeroDivisionError("cannot normalize zero vector"); \
len = dmath_sqrt(len); \ len = dmath_sqrt(len); \
c11_vec##D res; \ c11_vec##D res; \
for(int i = 0; i < D; i++) \ for(int i = 0; i < D; i++) \
res.data[i] = self.data[i] / len; \ res.data[i] = self.data[i] / len; \
@ -314,7 +317,17 @@ DEF_VECTOR_OPS(3)
c11_vec##D##i a = py_tovec##D##i(&argv[0]); \ c11_vec##D##i a = py_tovec##D##i(&argv[0]); \
py_i64 b = py_toint(&argv[1]); \ py_i64 b = py_toint(&argv[1]); \
for(int i = 0; i < D; i++) \ for(int i = 0; i < D; i++) \
a.data[i] /= b; \ a.data[i] = cpy11__fast_floor_div(a.data[i], b); \
py_newvec##D##i(py_retval(), a); \
return true; \
} \
static bool vec##D##i##__mod__(int argc, py_Ref argv) { \
PY_CHECK_ARGC(2); \
PY_CHECK_ARG_TYPE(1, tp_int); \
c11_vec##D##i a = py_tovec##D##i(&argv[0]); \
py_i64 b = py_toint(&argv[1]); \
for(int i = 0; i < D; i++) \
a.data[i] = cpy11__fast_mod(a.data[i], b); \
py_newvec##D##i(py_retval(), a); \ py_newvec##D##i(py_retval(), a); \
return true; \ return true; \
} }
@ -381,7 +394,8 @@ static bool vec2_angle_STATIC(int argc, py_Ref argv) {
PY_CHECK_ARGC(2); PY_CHECK_ARGC(2);
PY_CHECK_ARG_TYPE(0, tp_vec2); PY_CHECK_ARG_TYPE(0, tp_vec2);
PY_CHECK_ARG_TYPE(1, tp_vec2); PY_CHECK_ARG_TYPE(1, tp_vec2);
float val = dmath_atan2(argv[1]._vec2.y, argv[1]._vec2.x) - dmath_atan2(argv[0]._vec2.y, argv[0]._vec2.x); float val = dmath_atan2(argv[1]._vec2.y, argv[1]._vec2.x) -
dmath_atan2(argv[0]._vec2.y, argv[0]._vec2.x);
if(val > DMATH_PI) val -= 2 * (float)DMATH_PI; if(val > DMATH_PI) val -= 2 * (float)DMATH_PI;
if(val < -DMATH_PI) val += 2 * (float)DMATH_PI; if(val < -DMATH_PI) val += 2 * (float)DMATH_PI;
py_newfloat(py_retval(), val); py_newfloat(py_retval(), val);
@ -1237,6 +1251,7 @@ void pk__add_module_vmath() {
py_bindmagic(vec2i, __sub__, vec2i__sub__); py_bindmagic(vec2i, __sub__, vec2i__sub__);
py_bindmagic(vec2i, __mul__, vec2i__mul__); py_bindmagic(vec2i, __mul__, vec2i__mul__);
py_bindmagic(vec2i, __floordiv__, vec2i__floordiv__); py_bindmagic(vec2i, __floordiv__, vec2i__floordiv__);
py_bindmagic(vec2i, __mod__, vec2i__mod__);
py_bindmagic(vec2i, __eq__, vec2i__eq__); py_bindmagic(vec2i, __eq__, vec2i__eq__);
py_bindmagic(vec2i, __ne__, vec2i__ne__); py_bindmagic(vec2i, __ne__, vec2i__ne__);
py_bindmagic(vec2i, __hash__, vec2i__hash__); py_bindmagic(vec2i, __hash__, vec2i__hash__);
@ -1262,6 +1277,7 @@ void pk__add_module_vmath() {
py_bindmagic(vec3i, __sub__, vec3i__sub__); py_bindmagic(vec3i, __sub__, vec3i__sub__);
py_bindmagic(vec3i, __mul__, vec3i__mul__); py_bindmagic(vec3i, __mul__, vec3i__mul__);
py_bindmagic(vec3i, __floordiv__, vec3i__floordiv__); py_bindmagic(vec3i, __floordiv__, vec3i__floordiv__);
py_bindmagic(vec3i, __mod__, vec3i__mod__);
py_bindmagic(vec3i, __eq__, vec3i__eq__); py_bindmagic(vec3i, __eq__, vec3i__eq__);
py_bindmagic(vec3i, __ne__, vec3i__ne__); py_bindmagic(vec3i, __ne__, vec3i__ne__);
py_bindmagic(vec3i, __hash__, vec3i__hash__); py_bindmagic(vec3i, __hash__, vec3i__hash__);
@ -1289,6 +1305,7 @@ void pk__add_module_vmath() {
py_bindmagic(vec4i, __sub__, vec4i__sub__); py_bindmagic(vec4i, __sub__, vec4i__sub__);
py_bindmagic(vec4i, __mul__, vec4i__mul__); py_bindmagic(vec4i, __mul__, vec4i__mul__);
py_bindmagic(vec4i, __floordiv__, vec4i__floordiv__); py_bindmagic(vec4i, __floordiv__, vec4i__floordiv__);
py_bindmagic(vec4i, __mod__, vec4i__mod__);
py_bindmagic(vec4i, __eq__, vec4i__eq__); py_bindmagic(vec4i, __eq__, vec4i__eq__);
py_bindmagic(vec4i, __ne__, vec4i__ne__); py_bindmagic(vec4i, __ne__, vec4i__ne__);
py_bindmagic(vec4i, __hash__, vec4i__hash__); py_bindmagic(vec4i, __hash__, vec4i__hash__);

View File

@ -12,4 +12,10 @@ gc.collect()
create_garbage() create_garbage()
create_garbage() create_garbage()
create_garbage() c = create_garbage()
assert gc.is_tracked(c) == True
gc.untrack(c)
assert gc.is_tracked(c) == False
gc.track(c)
assert gc.is_tracked(c) == True

View File

@ -54,18 +54,18 @@ assertEqual(math.isnan(math.nan), True)
# test exp # test exp
assertEqual(math.exp(0), 1.0) assertEqual(math.exp(0), 1.0)
assertEqual(math.exp(1), math.e) assertEqual(math.exp(1), math.e)
assertEqual(math.exp(1.5), 4.48168907033806362960604019463) #4.481689070338065 - 8.881784197001252e-16) assertEqual(math.exp(1.5), 4.48168907033806362960604019463)
assertEqual(math.exp(3), 20.0855369231876608182574273087) #20.08553692318767 - 3.552713678800501e-15) assertEqual(math.exp(3), 20.0855369231876608182574273087)
assertEqual(math.exp(-3), 0.04978706836786396527916309651) #0.04978706836786394 + 6.938893903907228e-18) assertEqual(math.exp(-3), 0.04978706836786396527916309651)
assertEqual(math.exp(-2.253647), 0.1050155336754953 - 1.387778780781446e-17) assertEqual(math.exp(-2.253647), 0.1050155336754953 - 1.387778780781446e-17)
assertEqual(math.exp(4.729036), 113.186398052200445363268954679) #113.1863980522005 - 4.263256414560601e-14) assertEqual(math.exp(4.729036), 113.186398052200445363268954679)
# test log series # test log series
assertEqual(math.log(0), -math.inf) assertEqual(math.log(0), -math.inf)
assertEqual(math.log(1), 0.0) assertEqual(math.log(1), 0.0)
assertEqual(math.log(2), 0.69314718055994530942) assertEqual(math.log(2), 0.69314718055994530942)
assertEqual(math.log(math.e), 1.0) assertEqual(math.log(math.e), 1.0)
assertEqual(math.log(10), 2.30258509299404545700440394284) #2.30258509299404568402) assertEqual(math.log(10), 2.30258509299404545700440394284)
assertEqual(math.log(28.897124), 3.363742074595449) assertEqual(math.log(28.897124), 3.363742074595449)
assertEqual(math.log2(math.e), 1.4426950408889634074) assertEqual(math.log2(math.e), 1.4426950408889634074)
assertEqual(math.log2(78.781291), 6.299781153677818) assertEqual(math.log2(78.781291), 6.299781153677818)
@ -77,13 +77,13 @@ assertEqual(math.pow(2,2), 4.0)
assertEqual(math.pow(1.41421356237309504880, 2), 2.0 + 4.440892098500626e-16) assertEqual(math.pow(1.41421356237309504880, 2), 2.0 + 4.440892098500626e-16)
assertEqual(math.pow(0.70710678118654752440, 2), 0.5000000000000001) assertEqual(math.pow(0.70710678118654752440, 2), 0.5000000000000001)
assertEqual(math.pow(-1.255782,-3), -0.5049603042167915) assertEqual(math.pow(-1.255782,-3), -0.5049603042167915)
assertEqual(math.pow(6.127042, 4.071529), 1604.40754645674428502388764172) #1604.407546456745 + 2.273736754432321e-13) assertEqual(math.pow(6.127042, 4.071529), 1604.40754645674428502388764172)
# test sqrt # test sqrt
assertEqual(math.sqrt(2), 1.41421356237309492343001693370) #1.41421356237309504880) assertEqual(math.sqrt(2), 1.41421356237309492343001693370)
assertEqual(math.sqrt(math.pi), 1.772453850905516 - 2.220446049250313e-16) assertEqual(math.sqrt(math.pi), 1.772453850905516 - 2.220446049250313e-16)
assertEqual(math.sqrt(125.872509), 11.21929182257062) assertEqual(math.sqrt(125.872509), 11.21929182257062)
assertEqual(math.sqrt(1225.296280), 35.0042323155358019448613049462) #35.00423231553579) assertEqual(math.sqrt(1225.296280), 35.0042323155358019448613049462)
# test cos, sin, tan # test cos, sin, tan
assertEqual(math.cos(0), 1.0) assertEqual(math.cos(0), 1.0)
@ -112,19 +112,19 @@ assertEqual(math.asin(1), 1.570796326794897 - 4.440892098500626e-16)
assertEqual(math.asin(-0.225895), -0.2278616865773913 + 2.775557561562891e-17) assertEqual(math.asin(-0.225895), -0.2278616865773913 + 2.775557561562891e-17)
assertEqual(math.asin(0.955658), 1.271886195819423 + 4.440892098500626e-16) assertEqual(math.asin(0.955658), 1.271886195819423 + 4.440892098500626e-16)
assertEqual(math.atan(0), 0.0) assertEqual(math.atan(0), 0.0)
assertEqual(math.atan(1), 0.78539816339744839002179332965) #0.7853981633974483) assertEqual(math.atan(1), 0.78539816339744839002179332965)
assertEqual(math.atan(-3.758927), -1.3107852846106160527028805518) #-1.310785284610617 - 4.440892098500626e-16) assertEqual(math.atan(-3.758927), -1.3107852846106160527028805518)
assertEqual(math.atan(35.789293), 1.54286227728011748894232368911) #1.542862277280122) assertEqual(math.atan(35.789293), 1.54286227728011748894232368911)
# test atan2 # test atan2
assertEqual(math.atan2(math.pi/4, math.pi/4), 0.78539816339744839002179332965) #0.7853981633974483) assertEqual(math.atan2(math.pi/4, math.pi/4), 0.78539816339744839002179332965)
assertEqual(math.atan2(-math.pi/4, math.pi/4), -0.7853981633974483900217933296) #-0.7853981633974483) assertEqual(math.atan2(-math.pi/4, math.pi/4), -0.7853981633974483900217933296)
assertEqual(math.atan2(-math.pi/4, -math.pi/4), -2.356194490192345) assertEqual(math.atan2(-math.pi/4, -math.pi/4), -2.356194490192345)
assertEqual(math.atan2(math.pi/4, -math.pi/4), 2.356194490192345) assertEqual(math.atan2(math.pi/4, -math.pi/4), 2.356194490192345)
assertEqual(math.atan2(1.573823, 0.685329), 1.16010368292465315676054160576) #1.160103682924653) assertEqual(math.atan2(1.573823, 0.685329), 1.16010368292465315676054160576)
assertEqual(math.atan2(-0.899663, 0.668972), -0.9314162757114096136135117376) #-0.9314162757114095) assertEqual(math.atan2(-0.899663, 0.668972), -0.9314162757114096136135117376)
# assertEqual(math.atan2(-0.762894, -0.126497), -1.7351133471732969049128314509) #-1.735113347173296 - 4.440892098500626e-16) assertEqual(math.atan2(-0.762894, -0.126497), -1.735113347173297126957436375960)
# assertEqual(math.atan2(0.468463, -0.992734), 2.70068341069237316531825854326) #2.700683410692374 - 4.440892098500626e-16) assertEqual(math.atan2(0.468463, -0.992734), 2.700683410692373609407468393329)
# test fsum, sum # test fsum, sum
fsum_sin = math.fsum([math.sin(i) for i in range(5000)]) fsum_sin = math.fsum([math.sin(i) for i in range(5000)])