diff --git a/src/codeobject.h b/src/codeobject.h index 6ae0b7d8..1f7cc095 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -64,6 +64,8 @@ struct CodeObject { std::map global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; + + int recommended_hashmap_capacity = 8; void optimize(VM* vm); diff --git a/src/common.h b/src/common.h index b1112583..7e37c9cf 100644 --- a/src/common.h +++ b/src/common.h @@ -24,6 +24,7 @@ #include #include #include +#include // #include // namespace fs = std::filesystem; @@ -64,4 +65,6 @@ struct Type { //#define THREAD_LOCAL thread_local #define THREAD_LOCAL -#define RAW(T) std::remove_const_t> \ No newline at end of file +#define RAW(T) std::remove_const_t> + +const float kNameDictLoadFactor = 0.8; \ No newline at end of file diff --git a/src/namedict.h b/src/namedict.h new file mode 100644 index 00000000..4eebad21 --- /dev/null +++ b/src/namedict.h @@ -0,0 +1,154 @@ +#pragma once + +#include "safestl.h" + +struct NameDictNode{ + StrName first; + PyVar second; + inline bool empty() const { return first.empty(); } +}; + +struct NameDict { + int _capacity; + int _size; + NameDictNode* _a; + + NameDict(int capacity=2): _capacity(capacity), _size(0) { + _a = new NameDictNode[_capacity]; + } + + NameDict(const NameDict& other) { + this->_capacity = other._capacity; + this->_size = other._size; + this->_a = new NameDictNode[_capacity]; + for(int i=0; i<_capacity; i++) _a[i] = other._a[i]; + } + + NameDict& operator=(const NameDict& other){ + delete[] _a; + this->_capacity = other._capacity; + this->_size = other._size; + this->_a = new NameDictNode[_capacity]; + for(int i=0; i<_capacity; i++) _a[i] = other._a[i]; + return *this; + } + + NameDict(NameDict&&) = delete; + NameDict& operator=(NameDict&&) = delete; + + int size() const { return _size; } + +#define HASH_PROBE(key, ok, i) \ + int i = (key).index % _capacity; \ + bool ok = false; \ + while(!_a[i].empty()) { \ + if(_a[i].first == (key)) { ok = true; break; } \ + i = (i + 1) % _capacity; \ + } + +#define HASH_PROBE_OVERRIDE(key, ok, i) \ + i = (key).index % _capacity; \ + ok = false; \ + while(!_a[i].empty()) { \ + if(_a[i].first == (key)) { ok = true; break; } \ + i = (i + 1) % _capacity; \ + } + + const PyVar& operator[](StrName key) const { + HASH_PROBE(key, ok, i); + if(!ok) throw std::out_of_range("NameDict key not found"); + return _a[i].second; + } + + [[nodiscard]] PyVar& operator[](StrName key){ + HASH_PROBE(key, ok, i); + if(!ok) { + _a[i].first = key; + _size++; + if(_size > _capacity * kNameDictLoadFactor){ + __rehash_2x(); + HASH_PROBE_OVERRIDE(key, ok, i); + } + } + return _a[i].second; + } + + void __rehash_2x(){ + NameDictNode* old_a = _a; + int old_capacity = _capacity; + _capacity *= 2; + _size = 0; + _a = new NameDictNode[_capacity]; + for(int i=0; i_capacity && _dict->_a[i].empty()) i++;} + inline iterator& operator++(){ i++; _skip_empty(); return *this;} + + inline bool operator!=(const iterator& other) const { return i != other.i; } + inline bool operator==(const iterator& other) const { return i == other.i; } + + inline NameDictNode* operator->() const { return &_dict->_a[i]; } + }; + + template + void emplace(StrName key, T&& value){ + HASH_PROBE(key, ok, i); + if(!ok) { + _a[i].first = key; + _size++; + if(_size > _capacity * kNameDictLoadFactor){ + __rehash_2x(); + HASH_PROBE_OVERRIDE(key, ok, i); + } + } + _a[i].second = std::forward(value); + } + + void insert(iterator begin, iterator end){ + for(auto it = begin; it != end; ++it){ + emplace(it->first, it->second); + } + } + + iterator find(StrName key) const{ + HASH_PROBE(key, ok, i); + if(!ok) return end(); + return iterator(this, i); + } + + void erase(StrName key){ + HASH_PROBE(key, ok, i); + if(!ok) throw std::out_of_range("NameDict key not found"); + _a[i] = NameDictNode(); + _size--; + } + + inline iterator begin() const { return iterator(this, 0); } + inline iterator end() const { return iterator(this, _capacity); } +}; \ No newline at end of file diff --git a/src/safestl.h b/src/safestl.h index f8fa0256..6485f105 100644 --- a/src/safestl.h +++ b/src/safestl.h @@ -11,39 +11,33 @@ typedef PyVar PyVarRef; #include "hash_table5.hpp" namespace pkpy { - template - using HashMap = emhash5::HashMap; -} + #include "namedict.h" + // template + // using HashMap = emhash5::HashMap; + // typedef HashMap NameDict; + + class List: public std::vector { + PyVar& at(size_t) = delete; -namespace pkpy{ -class List: public std::vector { - PyVar& at(size_t) = delete; - - inline void _check_index(size_t i) const { - if (i >= size()){ - auto msg = "std::vector index out of range, " + std::to_string(i) + " not in [0, " + std::to_string(size()) + ")"; - throw std::out_of_range(msg); + inline void _check_index(size_t i) const { + if (i >= size()){ + auto msg = "std::vector index out of range, " + std::to_string(i) + " not in [0, " + std::to_string(size()) + ")"; + throw std::out_of_range(msg); + } + } + public: + PyVar& operator[](size_t i) { + _check_index(i); + return std::vector::operator[](i); } - } -public: - PyVar& operator[](size_t i) { - _check_index(i); - return std::vector::operator[](i); - } - const PyVar& operator[](size_t i) const { - _check_index(i); - return std::vector::operator[](i); - } + const PyVar& operator[](size_t i) const { + _check_index(i); + return std::vector::operator[](i); + } - using std::vector::vector; -}; - - -} - -namespace pkpy { - typedef HashMap NameDict; + using std::vector::vector; + }; class Args { static THREAD_LOCAL SmallArrayPool _pool; @@ -140,8 +134,5 @@ namespace pkpy { } typedef Args Tuple; - - // declare static members THREAD_LOCAL SmallArrayPool Args::_pool; - // THREAD_LOCAL SmallArrayPool NameDict::_pool; } // namespace pkpy \ No newline at end of file diff --git a/src/str.h b/src/str.h index 8cfb72a0..3b56096b 100644 --- a/src/str.h +++ b/src/str.h @@ -140,7 +140,6 @@ struct StrName { StrName(const Str& s): index(get(s).index) {} inline const Str& str() const { return _r_interned[index]; } inline bool empty() const { return index == -1; } - inline void reset() { index = -1; } inline bool operator==(const StrName& other) const noexcept { return this->index == other.index; diff --git a/src/vm.h b/src/vm.h index 8db5c15c..7adcfe17 100644 --- a/src/vm.h +++ b/src/vm.h @@ -25,7 +25,7 @@ public: pkpy::NameDict _types; pkpy::NameDict _modules; // loaded modules - pkpy::HashMap _lazy_modules; // lazy loaded modules + std::map _lazy_modules; // lazy loaded modules PyVar None, True, False, Ellipsis; bool use_stdio; @@ -151,7 +151,7 @@ public: return f(this, args); } else if(is_type(*callable, tp_function)){ const pkpy::Function& fn = PyFunction_AS_C(*callable); - pkpy::shared_ptr _locals = pkpy::make_shared(); + auto _locals = pkpy::make_shared(fn.code->recommended_hashmap_capacity); pkpy::NameDict& locals = *_locals; int i = 0; @@ -172,7 +172,7 @@ public: }else{ for(StrName key : fn.kwargs_order){ if(i < args.size()){ - locals[key] = args[i++]; + locals.emplace(key, args[i++]); }else{ break; } @@ -185,8 +185,7 @@ public: if(!fn.kwargs.contains(key)){ TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.name + "()"); } - const PyVar& val = kwargs[i+1]; - locals[key] = val; + locals.emplace(key, kwargs[i+1]); } PyVar _module = fn._module != nullptr ? fn._module : top_frame()->_module; auto _frame = _new_frame(fn.code, _module, _locals, fn._closure); @@ -212,10 +211,12 @@ public: }catch (const pkpy::Exception& e){ *_stderr << e.summary() << '\n'; } +#ifdef _NDEBUG catch (const std::exception& e) { *_stderr << "An std::exception occurred! It could be a bug.\n"; *_stderr << e.what() << '\n'; } +#endif callstack = {}; return nullptr; } @@ -857,6 +858,11 @@ PyVar pkpy::NativeFunc::operator()(VM* vm, pkpy::Args& args) const{ } void CodeObject::optimize(VM* vm){ + int n = 0; + for(auto& p: names) if(p.second == NAME_LOCAL) n++; + recommended_hashmap_capacity = (int)(n / kNameDictLoadFactor + 1.5); + if(recommended_hashmap_capacity < 2) recommended_hashmap_capacity = 2; + for(int i=1; i