diff --git a/src/ceval.h b/src/ceval.h index 490e3352..4369fb8f 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -273,21 +273,21 @@ PyVar VM::run_frame(Frame* frame){ } continue; case OP_IMPORT_NAME: { StrName name = frame->co->names[byte.arg].first; - auto it = _modules.find(name); - if(it == _modules.end()){ + PyVar* ext_mod = _modules.try_get(name); + if(ext_mod == nullptr){ auto it2 = _lazy_modules.find(name); if(it2 == _lazy_modules.end()){ _error("ImportError", "module " + name.str().escape(true) + " not found"); }else{ const Str& source = it2->second; CodeObject_ code = compile(source, name.str(), EXEC_MODE); - PyVar _m = new_module(name); - _exec(code, _m); - frame->push(_m); + PyVar new_mod = new_module(name); + _exec(code, new_mod); + frame->push(new_mod); _lazy_modules.erase(it2); } }else{ - frame->push(it->second); + frame->push(*ext_mod); } } continue; case OP_YIELD_VALUE: return _py_op_yield; diff --git a/src/compiler.h b/src/compiler.h index 0977929f..347d520f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -1002,7 +1002,7 @@ __LISTCOMP: if(value == nullptr){ SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type)); } - func.kwargs[name] = value; + func.kwargs.set(name, value); func.kwargs_order.push_back(name); } break; case 3: SyntaxError("**kwargs is not supported yet"); break; diff --git a/src/namedict.h b/src/namedict.h index 29c64f0b..f993e292 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -5,18 +5,17 @@ #include "str.h" namespace pkpy{ - const std::vector kHashSeeds = {0, 3259656564, 3106121857, 2774518055, 4085946151, 4274771677, 4047908201, 2149081045, 4160441109, 4127125901, 3109730425, 2794687362, 2806137727, 2642447290, 4070996945, 3580743775, 3719956858, 2960278187, 3568486238, 3125361093, 2232173865, 4043238260, 3265527710, 2206062780, 3968387223, 3144295694, 3293736932, 3196583945, 3832534010, 3311528523, 4258510773, 4049882022, 3058077580, 2446794117, 2330081744, 2563269634, 3848248775, 2197398712, 2874906918, 3012473024, 3477039876, 2710692860, 2806508231, 3893239503, 3929140074, 3145323261, 3593960112, 2451662716, 2545939029, 2475647797, 2790321726, 4166873680, 3504262692, 3140715282, 3078827310, 3177714229, 3006241931, 3777800785, 3621627818, 3163832382, 2166076714, 3622591406, 3299007679, 2915427082, 3939911590, 4145015468, 2791077264, 3916399405, 3330576709, 2466029172, 3534773842, 2690327419, 2487859383, 3687001303, 2615131117, 3057598651, 2548471802, 3145782646, 3895406770, 2150621965, 2179753887, 2159855306, 2439700132, 2397760304, 3405860607, 4268549710, 2779408554, 2485874456, 3796299954, 4179315997, 2380599704, 3210079474, 3951990603, 3342489194, 2997361581, 3576131817, 3163713423, 2467495451, 4190562029, 2588496185}; - const std::vector kPrimes = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599}; + const std::vector kHashSeeds = {0, 2619911537, 3657312521, 3108729646, 3527288759, 3054146033, 3436461329, 3073779540, 2262929533, 3564928174, 2823402058, 4053700272, 3710518398, 2193895465, 3616465673, 2370151435, 3911946797, 2518033560, 4090755824, 2554076140, 2922670102, 2817437464, 3058134240, 4015911568, 2683197236, 3580292421, 2489282276, 2198476149, 3059557576, 3251314740, 2164089808, 3407729628, 4006319879, 3563204365, 2959032770, 3699872774, 3285955347, 2886756578, 2727979131, 3987926730, 3558848942, 3667783180, 3427603538, 2678804156, 3899695574, 3497073252, 4125590716, 3439003736, 3166960007, 2341256547, 3498025667, 2434275284, 2294495502, 2454032734, 2622845447, 2237894924, 4127773463, 2899873446, 3826047724, 2772822995, 4021041972, 3585330008, 3442671856, 4033639492, 4190375370, 3423510541, 3993284300, 3399740404, 2346010479, 2665226039, 3989420676, 2430396952, 4162553639, 3318451871, 2451157282, 3888084520, 4216786107, 3630490447, 3686500437, 4270289137, 2845436680, 3990477872, 3386727112, 2603155603, 2533548133, 2476236382, 2752268515, 2714540624, 3649552071, 2486775129, 3447438497, 2660214659, 3171847655, 2173117107, 2777204947, 3473126570, 2874563719, 3710212439, 3882999260, 3884415651, 3939886653, 2513961523, 3259070705, 4076001992, 3695924943, 2630642728, 2302962913, 3977147010, 4229898948, 3278694988, 3668138471, 4174657761, 2681204139, 2468496171, 3953941369, 4216451258, 3986080889, 3355338704, 3484226746, 3964851958, 4063196140, 3210555673, 3972895759, 2762823957}; + const std::vector kPrimes = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353}; - uint32_t find_next_prime(uint32_t n){ + uint32_t find_next_capacity(uint32_t n){ auto it = std::lower_bound(kPrimes.begin(), kPrimes.end(), n); if(it == kPrimes.end()) return n; return *it; } inline uint32_t _hash(StrName key, uint32_t capacity, uint32_t hash_seed){ - uint32_t i = key.index * (uint32_t)2654435761; - return (i ^ hash_seed) % capacity; + return (key.index ^ hash_seed) % capacity; } uint32_t find_perfect_hash_seed(uint32_t capacity, const std::vector& keys){ @@ -29,7 +28,15 @@ namespace pkpy{ uint32_t index = _hash(key, capacity, kHashSeeds[i]); indices.insert(index); } - float score = indices.size() / (float)keys.size(); + float find_hit_score = indices.size() / (float)keys.size(); + std::vector indices_vec(indices.begin(), indices.end()); + std::sort(indices_vec.begin(), indices_vec.end()); + float find_miss_score = indices.size(); + for(int j=1; j b.second; }); @@ -51,7 +58,7 @@ namespace pkpy{ NameDict(uint32_t capacity=2, float load_factor=0.67, uint32_t hash_seed=0): _capacity(capacity), _size(0), _load_factor(load_factor), - _hash_seed(hash_seed), _a(new NameDictNode[capacity]) {} + _hash_seed(hash_seed), _a(new NameDictNode[_capacity]) {} NameDict(const NameDict& other) { this->_capacity = other._capacity; @@ -69,14 +76,6 @@ namespace pkpy{ uint32_t size() const { return _size; } #define HASH_PROBE(key, ok, i) \ - bool ok = false; uint32_t i; \ - i = _hash(key, _capacity, _hash_seed); \ - while(!_a[i].empty()) { \ - if(_a[i].first == (key)) { ok = true; break; } \ - i = (i + 1) % _capacity; \ - } - -#define HASH_PROBE_OVERRIDE(key, ok, i) \ ok = false; \ i = _hash(key, _capacity, _hash_seed); \ while(!_a[i].empty()) { \ @@ -85,58 +84,63 @@ namespace pkpy{ } const PyVar& operator[](StrName key) const { + bool ok; uint32_t i; HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found"); + if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); return _a[i].second; } - [[nodiscard]] PyVar& operator[](StrName key){ + PyVar& get(StrName key){ + bool ok; uint32_t i; + HASH_PROBE(key, ok, i); + if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); + return _a[i].second; + } + + template + void set(StrName key, T&& value){ + bool ok; uint32_t i; HASH_PROBE(key, ok, i); if(!ok) { - _a[i].first = key; _size++; - if(_size > _capacity * _load_factor){ + if(_size > _capacity*_load_factor){ _rehash(true); - HASH_PROBE_OVERRIDE(key, ok, i); + HASH_PROBE(key, ok, i); } + _a[i].first = key; } - return _a[i].second; + _a[i].second = std::forward(value); } void _rehash(bool resize){ NameDictNode* old_a = _a; uint32_t old_capacity = _capacity; - if(resize) _capacity = find_next_prime(_capacity * 2); - _size = 0; + if(resize) _capacity = find_next_capacity(_capacity * 2); _a = new NameDictNode[_capacity]; for(uint32_t i=0; i keys; - for(uint32_t i=0; i<_capacity; i++){ - if(_a[i].empty()) continue; - keys.push_back(_a[i].first); - } - _hash_seed = find_perfect_hash_seed(_capacity, keys); + _hash_seed = find_perfect_hash_seed(_capacity, keys()); _rehash(false); // do not resize } inline PyVar* try_get(StrName key){ + bool ok; uint32_t i; HASH_PROBE(key, ok, i); if(!ok) return nullptr; return &_a[i].second; } inline bool try_set(StrName key, PyVar&& value){ + bool ok; uint32_t i; HASH_PROBE(key, ok, i); if(!ok) return false; _a[i].second = std::move(value); @@ -144,64 +148,70 @@ namespace pkpy{ } inline bool contains(StrName key) const { + bool ok; uint32_t i; HASH_PROBE(key, ok, i); return ok; } - ~NameDict(){ delete[] _a;} + ~NameDict(){ delete[] _a; } - struct iterator { - const NameDict* _dict; - uint32_t i; - iterator() = default; - iterator(const NameDict* dict, uint32_t i): _dict(dict), i(i) { _skip_empty(); } - inline void _skip_empty(){ while(i < _dict->_capacity && _dict->_a[i].empty()) i++;} - inline iterator& operator++(){ i++; _skip_empty(); return *this;} - - inline bool operator!=(const iterator& other) const { return i != other.i; } - inline bool operator==(const iterator& other) const { return i == other.i; } - - inline NameDictNode* operator->() const { return &_dict->_a[i]; } - }; - - template - void emplace(StrName key, T&& value){ - HASH_PROBE(key, ok, i); - if(!ok) { - _a[i].first = key; - _size++; - if(_size > _capacity * _load_factor){ - _rehash(true); - HASH_PROBE_OVERRIDE(key, ok, i); - } + void update(const NameDict& other){ + for(uint32_t i=0; i(value); - } - - void insert(iterator begin, iterator end){ - for(auto it = begin; it != end; ++it){ - emplace(it->first, it->second); - } - } - - iterator find(StrName key) const{ - HASH_PROBE(key, ok, i); - if(!ok) return end(); - return iterator(this, i); } void erase(StrName key){ + bool ok; uint32_t i; HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found"); + if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); _a[i] = NameDictNode(); _size--; } - inline iterator begin() const { return iterator(this, 0); } - inline iterator end() const { return iterator(this, _capacity); } + void print_stats(){ + std::map stats; + for(uint32_t i=0; i<_capacity; i++){ + if(_a[i].empty()) continue; + stats[_a[i].first] = 1; + } + for(auto [key, _]: stats){ + bool ok = false; uint32_t i; + i = _hash(key, _capacity, _hash_seed); + while(!_a[i].empty()) { + if(_a[i].first == (key)) { ok = true; break; } + i = (i + 1) % _capacity; + stats[key]++; + } + } + for(uint32_t i=0; i<_capacity; i++){ + if(_a[i].empty()) { + std::cout << i << ": " << std::endl; + continue; + } + std::cout << i << ": <" << _a[i].first.str() << ", " << stats[_a[i].first] << '>' << std::endl; + } + } - #undef HASH_PROBE - #undef HASH_PROBE_OVERRIDE + std::vector items() const { + std::vector v; + for(uint32_t i=0; i<_capacity; i++){ + if(_a[i].empty()) continue; + v.push_back(_a[i]); + } + return v; + } + + std::vector keys() const { + std::vector v; + for(uint32_t i=0; i<_capacity; i++){ + if(_a[i].empty()) continue; + v.push_back(_a[i].first); + } + return v; + } +#undef HASH_PROBE }; } // namespace pkpy \ No newline at end of file diff --git a/src/obj.h b/src/obj.h index b6101ae2..83af12c2 100644 --- a/src/obj.h +++ b/src/obj.h @@ -83,7 +83,7 @@ struct PyObject { inline bool is_attr_valid() const noexcept { return _attr != nullptr; } inline pkpy::NameDict& attr() noexcept { return *_attr; } - inline PyVar& attr(StrName name) noexcept { return (*_attr)[name]; } + inline const PyVar& attr(StrName name) const noexcept { return _attr->get(name); } virtual void* value() = 0; PyObject(Type type) : type(type) {} @@ -99,9 +99,9 @@ struct Py_ : PyObject { inline void _init() noexcept { if constexpr (std::is_same_v || std::is_same_v) { - _attr = new pkpy::NameDict(5, kTypeAttrLoadFactor); + _attr = new pkpy::NameDict(4, kTypeAttrLoadFactor); }else if constexpr(std::is_same_v){ - _attr = new pkpy::NameDict(5, kInstAttrLoadFactor); + _attr = new pkpy::NameDict(4, kInstAttrLoadFactor); }else{ _attr = nullptr; } diff --git a/src/pocketpy.h b/src/pocketpy.h index 35b4a327..9e7ceacc 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -59,6 +59,11 @@ void init_builtins(VM* _vm) { return vm->None; }); + // _vm->bind_builtin_func<1>("test", [](VM* vm, pkpy::Args& args) { + // args[0]->attr().print_stats(); + // return vm->None; + // }); + _vm->bind_builtin_func<0>("super", [](VM* vm, pkpy::Args& args) { const PyVar* self = vm->top_frame()->f_locals().try_get(m_self); if(self == nullptr) vm->TypeError("super() can only be called in a class"); @@ -125,18 +130,16 @@ void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("dir", [](VM* vm, pkpy::Args& args) { - std::vector names; + std::set names; if(args[0]->is_attr_valid()){ - for(auto it = args[0]->attr().begin(); it != args[0]->attr().end(); ++it){ - names.push_back(it->first); - } + std::vector keys = args[0]->attr().keys(); + names.insert(keys.begin(), keys.end()); } const pkpy::NameDict& t_attr = vm->_t(args[0])->attr(); - for(auto it = t_attr.begin(); it != t_attr.end(); ++it){ - if (std::find(names.begin(), names.end(), it->first) == names.end()) names.push_back(it->first); - } + std::vector keys = t_attr.keys(); + names.insert(keys.begin(), keys.end()); pkpy::List ret; - for (const auto& name : names) ret.push_back(vm->PyStr(name.str())); + for (StrName name : names) ret.push_back(vm->PyStr(name.str())); return vm->PyList(std::move(ret)); }); @@ -850,10 +853,10 @@ extern "C" { /// Return `__repr__` of the result. /// If the variable is not found, return `nullptr`. char* pkpy_vm_get_global(VM* vm, const char* name){ - auto it = vm->_main->attr().find(name); - if(it == vm->_main->attr().end()) return nullptr; + PyVar* val = vm->_main->attr().try_get(name); + if(val == nullptr) return nullptr; try{ - Str _repr = vm->PyStr_AS_C(vm->asRepr(it->second)); + Str _repr = vm->PyStr_AS_C(vm->asRepr(*val)); return strdup(_repr.c_str()); }catch(...){ return nullptr; diff --git a/src/str.h b/src/str.h index 87e52fc1..875939b3 100644 --- a/src/str.h +++ b/src/str.h @@ -157,33 +157,30 @@ struct StrName { return this->index > other.index; } - static std::map> _interned; - static std::vector _r_interned; + static std::map> _interned; + static std::map> _r_interned; inline static StrName get(const Str& s){ return get(s.c_str()); } + // https://github.com/python/cpython/blob/main/Objects/dictobject.c#L175 + static uint64_t _j; + static StrName get(const char* s){ auto it = _interned.find(s); if(it != _interned.end()) return StrName(it->second); - int index = _r_interned.size(); + _j = (5*_j + 1) & 0xffffffff; + uint32_t index = (uint32_t)_j; _interned[s] = index; - _r_interned.push_back(s); + _r_interned[index] = s; return StrName(index); } }; -// declare static members -std::map> StrName::_interned; -std::vector StrName::_r_interned; - -template<> -struct std::hash { - inline size_t operator()(const StrName& name) const { - return name.index; - } -}; +std::map> StrName::_interned; +std::map> StrName::_r_interned; +uint64_t StrName::_j = 1; const StrName __class__ = StrName::get("__class__"); const StrName __base__ = StrName::get("__base__"); diff --git a/src/vm.h b/src/vm.h index c85d6734..995efa00 100644 --- a/src/vm.h +++ b/src/vm.h @@ -160,22 +160,22 @@ public: int i = 0; for(StrName name : fn.args){ if(i < args.size()){ - locals->emplace(name, std::move(args[i++])); + locals->set(name, std::move(args[i++])); continue; } TypeError("missing positional argument " + name.str().escape(true)); } - locals->insert(fn.kwargs.begin(), fn.kwargs.end()); + locals->update(fn.kwargs); if(!fn.starred_arg.empty()){ pkpy::List vargs; // handle *args while(i < args.size()) vargs.push_back(std::move(args[i++])); - locals->emplace(fn.starred_arg, PyTuple(std::move(vargs))); + locals->set(fn.starred_arg, PyTuple(std::move(vargs))); }else{ for(StrName key : fn.kwargs_order){ if(i < args.size()){ - locals->emplace(key, std::move(args[i++])); + locals->set(key, std::move(args[i++])); }else{ break; } @@ -188,7 +188,7 @@ public: if(!fn.kwargs.contains(key)){ TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.name.str() + "()"); } - locals->emplace(key, kwargs[i+1]); + locals->set(key, kwargs[i+1]); } const PyVar& _module = fn._module != nullptr ? fn._module : top_frame()->_module; auto _frame = _new_frame(fn.code, _module, locals, fn._closure); @@ -295,7 +295,7 @@ public: Type _new_type_object(StrName name, Type base=0) { PyVar obj = pkpy::make_shared>(tp_type, _all_types.size()); setattr(obj, __base__, _t(base)); - _types[name] = obj; + _types.set(name, obj); _all_types.push_back(obj); return OBJ_GET(Type, obj); } @@ -328,7 +328,7 @@ public: PyVar new_module(StrName name) { PyVar obj = new_object(tp_module, DummyModule()); setattr(obj, __name__, PyStr(name.str())); - _modules[name] = obj; + _modules.set(name, obj); return obj; } @@ -378,7 +378,7 @@ public: PyObject* p = obj.get(); while(p->type == tp_super) p = static_cast(p->value())->get(); if(!p->is_attr_valid()) TypeError("cannot set attribute"); - p->attr(name) = std::forward(value); + p->attr().set(name, std::forward(value)); } template @@ -604,8 +604,8 @@ public: _all_types.push_back(_tp_type); tp_object = 0; tp_type = 1; - _types["object"] = _tp_object; - _types["type"] = _tp_type; + _types.set("object", _tp_object); + _types.set("type", _tp_type); tp_int = _new_type_object("int"); tp_float = _new_type_object("float"); @@ -639,8 +639,8 @@ public: setattr(_t(tp_type), __base__, _t(tp_object)); setattr(_t(tp_object), __base__, None); - for(auto it = _types.begin(); it != _types.end(); ++it){ - setattr(it->second, __name__, PyStr(it->first.str())); + for(auto [k, v]: _types.items()){ + setattr(v, __name__, PyStr(k.str())); } std::vector pb_types = {"type", "object", "bool", "int", "float", "str", "list", "tuple", "range"}; @@ -649,8 +649,8 @@ public: } post_init(); - for(auto it = _types.begin(); it != _types.end(); ++it){ - it->second->attr()._try_perfect_rehash(); + for(auto [k, v]: _types.items()){ + v->attr()._try_perfect_rehash(); } builtins->attr()._try_perfect_rehash(); } @@ -770,10 +770,10 @@ PyVar NameRef::get(VM* vm, Frame* frame) const{ void NameRef::set(VM* vm, Frame* frame, PyVar val) const{ switch(scope()) { - case NAME_LOCAL: frame->f_locals()[name()] = std::move(val); break; + case NAME_LOCAL: frame->f_locals().set(name(), std::move(val)); break; case NAME_GLOBAL: if(frame->f_locals().try_set(name(), std::move(val))) return; - frame->f_globals()[name()] = std::move(val); + frame->f_globals().set(name(), std::move(val)); break; default: UNREACHABLE(); } @@ -879,7 +879,7 @@ void CodeObject::optimize(VM* vm){ std::vector keys; for(auto& p: names) if(p.second == NAME_LOCAL) keys.push_back(p.first); uint32_t base_n = (uint32_t)(keys.size() / kLocalsLoadFactor + 0.5); - perfect_locals_capacity = pkpy::find_next_prime(base_n); + perfect_locals_capacity = pkpy::find_next_capacity(base_n); perfect_hash_seed = pkpy::find_perfect_hash_seed(perfect_locals_capacity, keys); for(int i=1; i