diff --git a/src/builtins.h b/src/builtins.h index 690f85d8..98fb087e 100644 --- a/src/builtins.h +++ b/src/builtins.h @@ -210,7 +210,7 @@ list.__new__ = lambda obj: [i for i in obj] # https://github.com/python/cpython/blob/main/Objects/dictobject.c class dict: - def __init__(self, capacity=16): + def __init__(self, capacity=12): self._capacity = capacity self._a = [None] * self._capacity self._len = 0 @@ -243,7 +243,7 @@ class dict: else: self._a[i] = [key, value] self._len += 1 - if self._len > self._capacity * 0.8: + if self._len > self._capacity * 0.67: self._capacity *= 2 self.__rehash() diff --git a/src/codeobject.h b/src/codeobject.h index 1f7cc095..4a7bee34 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -64,8 +64,8 @@ struct CodeObject { std::map global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; - - int recommended_hashmap_capacity = 8; + + int ideal_locals_capacity = 4; void optimize(VM* vm); diff --git a/src/common.h b/src/common.h index 7e37c9cf..76aa33ef 100644 --- a/src/common.h +++ b/src/common.h @@ -47,7 +47,9 @@ typedef int64_t i64; typedef double f64; #endif -struct Dummy { char _; }; +struct Dummy { }; +struct DummyInstance { }; +struct DummyModule { }; #define DUMMY_VAL Dummy() struct Type { @@ -67,4 +69,6 @@ struct Type { #define RAW(T) std::remove_const_t> -const float kNameDictLoadFactor = 0.8; \ No newline at end of file +const float kLocalsLoadFactor = 0.8; +const float kInstAttrLoadFactor = 0.67; +const float kTypeAttrLoadFactor = 0.34; \ No newline at end of file diff --git a/src/namedict.h b/src/namedict.h index 4eebad21..58cf3572 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -11,9 +11,11 @@ struct NameDictNode{ struct NameDict { int _capacity; int _size; + float _load_factor; NameDictNode* _a; - NameDict(int capacity=2): _capacity(capacity), _size(0) { + NameDict(int capacity=4, float load_factor=0.67): + _capacity(capacity), _size(0), _load_factor(load_factor) { _a = new NameDictNode[_capacity]; } @@ -38,12 +40,13 @@ struct NameDict { int size() const { return _size; } +//https://github.com/python/cpython/blob/main/Objects/dictobject.c#L175 #define HASH_PROBE(key, ok, i) \ int i = (key).index % _capacity; \ bool ok = false; \ while(!_a[i].empty()) { \ if(_a[i].first == (key)) { ok = true; break; } \ - i = (i + 1) % _capacity; \ + i = (5*i + 1) % _capacity; \ } #define HASH_PROBE_OVERRIDE(key, ok, i) \ @@ -51,7 +54,7 @@ struct NameDict { ok = false; \ while(!_a[i].empty()) { \ if(_a[i].first == (key)) { ok = true; break; } \ - i = (i + 1) % _capacity; \ + i = (5*i + 1) % _capacity; \ } const PyVar& operator[](StrName key) const { @@ -65,15 +68,15 @@ struct NameDict { if(!ok) { _a[i].first = key; _size++; - if(_size > _capacity * kNameDictLoadFactor){ - __rehash_2x(); + if(_size > _capacity * _load_factor){ + _rehash_2x(); HASH_PROBE_OVERRIDE(key, ok, i); } } return _a[i].second; } - void __rehash_2x(){ + void _rehash_2x(){ NameDictNode* old_a = _a; int old_capacity = _capacity; _capacity *= 2; @@ -122,8 +125,8 @@ struct NameDict { if(!ok) { _a[i].first = key; _size++; - if(_size > _capacity * kNameDictLoadFactor){ - __rehash_2x(); + if(_size > _capacity * _load_factor){ + _rehash_2x(); HASH_PROBE_OVERRIDE(key, ok, i); } } @@ -151,4 +154,7 @@ struct NameDict { inline iterator begin() const { return iterator(this, 0); } inline iterator end() const { return iterator(this, _capacity); } + +#undef HASH_PROBE +#undef HASH_PROBE_OVERRIDE }; \ No newline at end of file diff --git a/src/obj.h b/src/obj.h index 1c0d75f5..fec434dd 100644 --- a/src/obj.h +++ b/src/obj.h @@ -25,7 +25,7 @@ struct Function { CodeObject_ code; std::vector args; StrName starred_arg; // empty if no *arg - pkpy::NameDict kwargs; // empty if no k=v + pkpy::NameDict kwargs; // empty if no k=v std::vector kwargs_order; // runtime settings @@ -97,8 +97,10 @@ struct Py_ : PyObject { Py_(Type type, T&& val): PyObject(type), _value(std::move(val)) { _init(); } inline void _init() noexcept { - if constexpr (std::is_same_v || std::is_same_v) { - _attr = new pkpy::NameDict(); + if constexpr (std::is_same_v || std::is_same_v) { + _attr = new pkpy::NameDict(8, kTypeAttrLoadFactor); + }else if constexpr(std::is_same_v){ + _attr = new pkpy::NameDict(4, kInstAttrLoadFactor); }else{ _attr = nullptr; } diff --git a/src/pocketpy.h b/src/pocketpy.h index 409cc7eb..7a11341d 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -774,6 +774,22 @@ void add_module_random(VM* vm){ vm->_exec(code, mod); } +void VM::post_init(){ + init_builtins(this); + add_module_sys(this); + add_module_time(this); + add_module_json(this); + add_module_math(this); + add_module_re(this); + add_module_dis(this); + add_module_random(this); + add_module_io(this); + add_module_os(this); + + CodeObject_ code = compile(kBuiltinsCode, "", EXEC_MODE); + this->_exec(code, this->builtins); +} + class _PkExported{ public: @@ -879,21 +895,7 @@ extern "C" { __EXPORT /// Create a virtual machine. VM* pkpy_new_vm(bool use_stdio){ - VM* vm = PKPY_ALLOCATE(VM, use_stdio); - init_builtins(vm); - add_module_sys(vm); - add_module_time(vm); - add_module_json(vm); - add_module_math(vm); - add_module_re(vm); - add_module_dis(vm); - add_module_random(vm); - add_module_io(vm); - add_module_os(vm); - - CodeObject_ code = vm->compile(kBuiltinsCode, "", EXEC_MODE); - vm->_exec(code, vm->builtins); - return vm; + return PKPY_ALLOCATE(VM, use_stdio); } __EXPORT diff --git a/src/vm.h b/src/vm.h index 7adcfe17..cc10960f 100644 --- a/src/vm.h +++ b/src/vm.h @@ -131,7 +131,7 @@ public: if(new_f != nullptr){ obj = call(*new_f, args, kwargs, false); }else{ - obj = new_object(_callable, DUMMY_VAL); + obj = new_object(_callable, DummyInstance()); PyVarOrNull init_f = getattr(obj, __init__, false); if (init_f != nullptr) call(init_f, args, kwargs, false); } @@ -151,7 +151,9 @@ public: return f(this, args); } else if(is_type(*callable, tp_function)){ const pkpy::Function& fn = PyFunction_AS_C(*callable); - auto _locals = pkpy::make_shared(fn.code->recommended_hashmap_capacity); + auto _locals = pkpy::make_shared( + fn.code->ideal_locals_capacity, kLocalsLoadFactor + ); pkpy::NameDict& locals = *_locals; int i = 0; @@ -323,7 +325,7 @@ public: } PyVar new_module(StrName name) { - PyVar obj = new_object(tp_module, DUMMY_VAL); + PyVar obj = new_object(tp_module, DummyModule()); setattr(obj, __name__, PyStr(name.str())); _modules[name] = obj; return obj; @@ -635,8 +637,12 @@ public: for (auto& name : pb_types) { setattr(builtins, name, _types[name]); } + + post_init(); } + void post_init(); + i64 hash(const PyVar& obj){ if (is_type(obj, tp_str)) return PyStr_AS_C(obj).hash(); if (is_int(obj)) return PyInt_AS_C(obj); @@ -860,8 +866,9 @@ PyVar pkpy::NativeFunc::operator()(VM* vm, pkpy::Args& args) const{ void CodeObject::optimize(VM* vm){ int n = 0; for(auto& p: names) if(p.second == NAME_LOCAL) n++; - recommended_hashmap_capacity = (int)(n / kNameDictLoadFactor + 1.5); - if(recommended_hashmap_capacity < 2) recommended_hashmap_capacity = 2; + int base_n = (int)(n / kLocalsLoadFactor + 1.5); + ideal_locals_capacity = 2; + while(ideal_locals_capacity < base_n) ideal_locals_capacity *= 2; for(int i=1; i