diff --git a/include/pocketpy/common/strname.h b/include/pocketpy/common/strname.h deleted file mode 100644 index a7407b8a..00000000 --- a/include/pocketpy/common/strname.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include -#include "pocketpy/common/str.h" - -void py_Name__initialize(); -void py_Name__finalize(); diff --git a/include/pocketpy/interpreter/frame.h b/include/pocketpy/interpreter/frame.h index 04e5a6ab..64972ec3 100644 --- a/include/pocketpy/interpreter/frame.h +++ b/include/pocketpy/interpreter/frame.h @@ -4,7 +4,6 @@ #include "pocketpy/objects/codeobject.h" #include "pocketpy/objects/namedict.h" #include "pocketpy/objects/object.h" -#include "pocketpy/common/strname.h" #include "pocketpy/pocketpy.h" py_TValue* FastLocals__try_get_by_name(py_TValue* locals, const CodeObject* co, py_Name name); diff --git a/include/pocketpy/interpreter/name.h b/include/pocketpy/interpreter/name.h new file mode 100644 index 00000000..b1f08520 --- /dev/null +++ b/include/pocketpy/interpreter/name.h @@ -0,0 +1,18 @@ +#pragma once + +#include "pocketpy/objects/base.h" +#include "pocketpy/common/smallmap.h" + +typedef struct { + char* data; // null-terminated data + int size; // size of the data excluding the null-terminator + py_TValue obj; // cached `str` object (lazy initialized) +} RInternedEntry; + +typedef struct { + c11_smallmap_s2n interned; + c11_vector /* T=RInternedEntry */ r_interned; +} InternedNames; + +void InternedNames__ctor(InternedNames* self); +void InternedNames__dtor(InternedNames* self); diff --git a/include/pocketpy/interpreter/vm.h b/include/pocketpy/interpreter/vm.h index 0080b8fe..9b0ab866 100644 --- a/include/pocketpy/interpreter/vm.h +++ b/include/pocketpy/interpreter/vm.h @@ -7,6 +7,7 @@ #include "pocketpy/interpreter/frame.h" #include "pocketpy/interpreter/modules.h" #include "pocketpy/interpreter/typeinfo.h" +#include "pocketpy/interpreter/name.h" // TODO: // 1. __eq__ and __ne__ fallbacks @@ -41,6 +42,7 @@ typedef struct VM { py_StackRef __curr_function; py_TValue __vectorcall_buffer[PK_MAX_CO_VARNAMES]; + InternedNames names; FixedMemoryPool pool_frame; ManagedHeap heap; ValueStack stack; // put `stack` at the end for better cache locality diff --git a/include/pocketpy/objects/error.h b/include/pocketpy/objects/error.h index 4ca63d35..d483767c 100644 --- a/include/pocketpy/objects/error.h +++ b/include/pocketpy/objects/error.h @@ -1,10 +1,6 @@ #pragma once -#include "pocketpy/common/str.h" -#include "pocketpy/common/strname.h" -#include "pocketpy/objects/codeobject.h" #include "pocketpy/objects/sourcedata.h" -#include "pocketpy/objects/object.h" #include "pocketpy/pocketpy.h" typedef struct{ diff --git a/src/common/strname.c b/src/common/strname.c deleted file mode 100644 index 6ba3805e..00000000 --- a/src/common/strname.c +++ /dev/null @@ -1,83 +0,0 @@ -#include "pocketpy/common/strname.h" -#include "pocketpy/common/smallmap.h" -#include "pocketpy/common/utils.h" -#include "pocketpy/common/vector.h" -#include "pocketpy/pocketpy.h" - -#include - -typedef struct { - char* data; // null-terminated data - int size; // size of the data excluding the null-terminator - py_TValue* ref; // cached `str` object (lazy initialized) -} RInternedEntry; - -// TODO: use a more efficient data structure -static c11_smallmap_s2n _interned; -static c11_vector /* T=RInternedEntry */ _r_interned; - -void py_Name__initialize() { - c11_smallmap_s2n__ctor(&_interned); - c11_vector__ctor(&_r_interned, sizeof(RInternedEntry)); - -#define MAGIC_METHOD(x) \ - if(x != py_name(#x)) abort(); -#include "pocketpy/xmacros/magics.h" -#undef MAGIC_METHOD -} - -void py_Name__finalize() { - // free all char* - for(int i = 0; i < _r_interned.length; i++) { - PK_FREE(c11__getitem(RInternedEntry, &_r_interned, i).data); - } - c11_smallmap_s2n__dtor(&_interned); - c11_vector__dtor(&_r_interned); -} - -py_Name py_name(const char* name) { return py_namev((c11_sv){name, strlen(name)}); } - -py_Name py_namev(c11_sv name) { - uint16_t index = c11_smallmap_s2n__get(&_interned, name, 0); - if(index != 0) return index; - // generate new index - if(_interned.length > 65530) c11__abort("py_Name index overflow"); - // NOTE: we must allocate the string in the heap so iterators are not invalidated - char* p = PK_MALLOC(name.size + 1); - memcpy(p, name.data, name.size); - p[name.size] = '\0'; - RInternedEntry entry; - entry.data = p; - entry.size = name.size; - entry.ref = NULL; - c11_vector__push(RInternedEntry, &_r_interned, entry); - index = _r_interned.length; // 1-based - // save to _interned - c11_smallmap_s2n__set(&_interned, (c11_sv){p, name.size}, index); - assert(_interned.length == _r_interned.length); - return index; -} - -const char* py_name2str(py_Name index) { - assert(index > 0 && index <= _interned.length); - return c11__getitem(RInternedEntry, &_r_interned, index - 1).data; -} - -c11_sv py_name2sv(py_Name index) { - assert(index > 0 && index <= _interned.length); - RInternedEntry entry = c11__getitem(RInternedEntry, &_r_interned, index - 1); - return (c11_sv){entry.data, entry.size}; -} - -py_GlobalRef py_name2ref(py_Name index) { - assert(index > 0 && index <= _interned.length); - RInternedEntry entry = c11__getitem(RInternedEntry, &_r_interned, index - 1); - if(entry.ref == NULL){ - entry.ref = PK_MALLOC(16); // ... - c11_sv sv; - sv.data = entry.data; - sv.size = entry.size; - py_newstrv(entry.ref, sv); - } - return entry.ref; -} diff --git a/src/interpreter/name.c b/src/interpreter/name.c new file mode 100644 index 00000000..1a34c2ad --- /dev/null +++ b/src/interpreter/name.c @@ -0,0 +1,77 @@ +#include "pocketpy/interpreter/name.h" +#include "pocketpy/interpreter/vm.h" + +void InternedNames__ctor(InternedNames* self) { + c11_smallmap_s2n__ctor(&self->interned); + c11_vector__ctor(&self->r_interned, sizeof(RInternedEntry)); + + // initialize all magic names +#define MAGIC_METHOD(x) \ + if(x != py_name(#x)) abort(); +#include "pocketpy/xmacros/magics.h" +#undef MAGIC_METHOD +} + +void InternedNames__dtor(InternedNames* self) { + for(int i = 0; i < self->r_interned.length; i++) { + PK_FREE(c11__getitem(RInternedEntry, &self->r_interned, i).data); + } + c11_smallmap_s2n__dtor(&self->interned); + c11_vector__dtor(&self->r_interned); +} + +py_Name py_name(const char* name) { + c11_sv sv; + sv.data = name; + sv.size = strlen(name); + return py_namev(sv); +} + +py_Name py_namev(c11_sv name) { + InternedNames* self = &pk_current_vm->names; + uint16_t index = c11_smallmap_s2n__get(&self->interned, name, 0); + if(index != 0) return index; + // generate new index + if(self->interned.length > 65530) c11__abort("py_Name index overflow"); + // NOTE: we must allocate the string in the heap so iterators are not invalidated + char* p = PK_MALLOC(name.size + 1); + memcpy(p, name.data, name.size); + p[name.size] = '\0'; + RInternedEntry entry; + entry.data = p; + entry.size = name.size; + entry.obj.type = tp_nil; + c11_vector__push(RInternedEntry, &self->r_interned, entry); + index = self->r_interned.length; // 1-based + // save to _interned + c11_smallmap_s2n__set(&self->interned, (c11_sv){p, name.size}, index); + assert(self->interned.length == self->r_interned.length); + return index; +} + +const char* py_name2str(py_Name index) { + InternedNames* self = &pk_current_vm->names; + assert(index > 0 && index <= self->interned.length); + return c11__getitem(RInternedEntry, &self->r_interned, index - 1).data; +} + +c11_sv py_name2sv(py_Name index) { + InternedNames* self = &pk_current_vm->names; + assert(index > 0 && index <= self->interned.length); + RInternedEntry entry = c11__getitem(RInternedEntry, &self->r_interned, index - 1); + return (c11_sv){entry.data, entry.size}; +} + +py_GlobalRef py_name2ref(py_Name index) { + InternedNames* self = &pk_current_vm->names; + assert(index > 0 && index <= self->interned.length); + RInternedEntry* entry = c11__at(RInternedEntry, &self->r_interned, index - 1); + if(entry->obj.type == tp_nil){ + c11_sv sv; + sv.data = entry->data; + sv.size = entry->size; + py_newstrv(&entry->obj, sv); + } + return &entry->obj; +} + diff --git a/src/interpreter/vm.c b/src/interpreter/vm.c index 44f41737..933b78bc 100644 --- a/src/interpreter/vm.c +++ b/src/interpreter/vm.c @@ -57,6 +57,7 @@ static void py_TypeInfo__ctor(py_TypeInfo* self, void VM__ctor(VM* self) { self->top_frame = NULL; + InternedNames__ctor(&self->names); ModuleDict__ctor(&self->modules, NULL, *py_NIL()); TypeList__ctor(&self->types); @@ -258,6 +259,7 @@ void VM__dtor(VM* self) { TypeList__dtor(&self->types); FixedMemoryPool__dtor(&self->pool_frame); ValueStack__clear(&self->stack); + InternedNames__dtor(&self->names); } void VM__push_frame(VM* self, Frame* frame) { @@ -281,7 +283,11 @@ static void _clip_int(int* value, int min, int max) { if(*value > max) *value = max; } -bool pk__parse_int_slice(py_Ref slice, int length, int* restrict start, int* restrict stop, int* restrict step) { +bool pk__parse_int_slice(py_Ref slice, + int length, + int* restrict start, + int* restrict stop, + int* restrict step) { if(py_isint(slice)) { int index = py_toint(slice); bool ok = pk__normalize_index(&index, length); @@ -431,9 +437,8 @@ static bool co->name->data); } else { // add to **kwargs - bool ok = py_dict_setitem(&buffer[decl->starred_kwarg], - py_name2ref(key), - &p1[2 * j + 1]); + bool ok = + py_dict_setitem(&buffer[decl->starred_kwarg], py_name2ref(key), &p1[2 * j + 1]); if(!ok) return false; } } @@ -480,7 +485,8 @@ FrameResult VM__vectorcall(VM* self, uint16_t argc, uint16_t kwargc, bool opcall // submit the call if(!fn->cfunc) { // python function - VM__push_frame(self, Frame__new(co, p0, fn->module, fn->globals, argv, true, false)); + VM__push_frame(self, + Frame__new(co, p0, fn->module, fn->globals, argv, true, false)); return opcall ? RES_CALL : VM__run_top_frame(self); } else { // decl-based binding @@ -509,7 +515,8 @@ FrameResult VM__vectorcall(VM* self, uint16_t argc, uint16_t kwargc, bool opcall // submit the call if(!fn->cfunc) { // python function - VM__push_frame(self, Frame__new(co, p0, fn->module, fn->globals, argv, true, false)); + VM__push_frame(self, + Frame__new(co, p0, fn->module, fn->globals, argv, true, false)); return opcall ? RES_CALL : VM__run_top_frame(self); } else { // decl-based binding @@ -693,6 +700,11 @@ void ManagedHeap__mark(ManagedHeap* self) { for(int i = 0; i < c11__count_array(vm->reg); i++) { pk__mark_value(&vm->reg[i]); } + // mark interned names + for(int i = 0; i < vm->names.r_interned.length; i++) { + RInternedEntry* entry = c11__at(RInternedEntry, &vm->names.r_interned, i); + pk__mark_value(&entry->obj); + } } void pk_print_stack(VM* self, Frame* frame, Bytecode byte) { diff --git a/src/public/internal.c b/src/public/internal.c index 277789ee..6516b436 100644 --- a/src/public/internal.c +++ b/src/public/internal.c @@ -27,8 +27,6 @@ void py_initialize() { static_assert(sizeof(py_TValue) == 16, "sizeof(py_TValue) != 16"); static_assert(offsetof(py_TValue, extra) == 4, "offsetof(py_TValue, extra) != 4"); - py_Name__initialize(); - pk_current_vm = pk_all_vm[0] = &pk_default_vm; // initialize some convenient references @@ -61,7 +59,6 @@ void py_finalize() { pk_current_vm = &pk_default_vm; VM__dtor(&pk_default_vm); pk_current_vm = NULL; - py_Name__finalize(); } void py_switchvm(int index) {