From d871d91adb1f57a0bc4c2b62c930be483181e4a8 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 14:09:45 +0800 Subject: [PATCH 01/30] Impl Dict and DictIter in c11 --- include/pocketpy/interpreter/iter.hpp | 4 +- include/pocketpy/objects/dict.h | 113 +++++++++++++ include/pocketpy/objects/dict.hpp | 108 +++++++----- include/pocketpy/objects/pyvar.h | 54 ++++++ src/interpreter/iter.cpp | 13 +- src/interpreter/vm.cpp | 31 ---- src/objects/dict.c | 228 ++++++++++++++++++++++++++ src/objects/dict.cpp | 180 -------------------- src/objects/pyvar.cpp | 17 ++ src/pocketpy.cpp | 10 +- 10 files changed, 495 insertions(+), 263 deletions(-) create mode 100644 include/pocketpy/objects/dict.h create mode 100644 include/pocketpy/objects/pyvar.h create mode 100644 src/objects/dict.c delete mode 100644 src/objects/dict.cpp create mode 100644 src/objects/pyvar.cpp diff --git a/include/pocketpy/interpreter/iter.hpp b/include/pocketpy/interpreter/iter.hpp index 130c08f7..a1917679 100644 --- a/include/pocketpy/interpreter/iter.hpp +++ b/include/pocketpy/interpreter/iter.hpp @@ -74,9 +74,9 @@ struct Generator { struct DictItemsIter { PyVar ref; - int i; + pkpy_DictIter it; - DictItemsIter(PyVar ref) : ref(ref) { i = PK_OBJ_GET(Dict, ref)._head_idx; } + DictItemsIter(PyVar ref) : ref(ref) { it = PK_OBJ_GET(Dict, ref).iter(); } void _gc_mark(VM* vm) const { vm->obj_gc_mark(ref); } diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h new file mode 100644 index 00000000..aa56ae42 --- /dev/null +++ b/include/pocketpy/objects/dict.h @@ -0,0 +1,113 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pocketpy/objects/pyvar.h" +#include "pocketpy/common/vector.h" + +typedef struct { + unsigned int _version; /** used internelly to detect iterator invalidation */ + int count; /** number of elements in the dictionary */ + c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ + int _htcap; /** capacity of the hashtable, always a power of 2 */ + void* _hashtable; /** contains indecies, can be `u8`, `u16` or `u32` according to size*/ +} pkpy_Dict; + +typedef struct { + const pkpy_Dict* _dict; + unsigned int _version; + int _index; +} pkpy_DictIter; + +/** + * @brief `pkpy_Dict` constructor + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__ctor(pkpy_Dict* self); + +/** + * @brief `pkpy_Dict` destructor + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__dtor(pkpy_Dict* self); + +/** + * @brief Copy a `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @return a new `pkpy_Dict` instance, must be destructed by the caller + */ +pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self); + +/** + * @brief Set a key-value pair into the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to set + * @param val value to set + * @return `true` if the key is newly added, `false` if the key already exists + */ +bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val); + +/** + * @brief Check if a key exists in the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to check + * @return `true` if the key exists, `false` otherwise + */ +bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Remove a key from the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to remove + * @return `true` if the key was found and removed, `false` if the key doesn't exist + */ +bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Try to get a value from the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to get + * @return the value associated with the key, `NULL` if the key doesn't exist + */ +const pkpy_Var* pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Update the `pkpy_Dict` with another one + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param other `pkpy_Dict` instance to update with + */ +void pkpy_Dict__update(pkpy_Dict* self, void *vm, const pkpy_Dict* other); + +/** + * @brief Clear the `pkpy_Dict` + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__clear(pkpy_Dict* self); + +/** + * @brief Iterate over the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @return an iterator over the `pkpy_Dict` + */ +pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict* self); + +/** + * @brief Iterate over the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param key key will be filled with the current key, can be `NULL` if not needed + * @param value value will be filled with the current value, can be `NULL` if not needed + * @return `true` if the iteration is still valid, `false` otherwise + */ +bool pkpy_DictIter__next(pkpy_DictIter* self, pkpy_Var* key, pkpy_Var* value); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 853f0d25..8272a45c 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -2,63 +2,95 @@ #include "pocketpy/objects/base.hpp" #include "pocketpy/objects/tuplelist.hpp" +#include "pocketpy/objects/dict.h" namespace pkpy { -struct Dict { - struct Item { - PyVar first; - PyVar second; - int prev; - int next; - }; +struct Dict : private pkpy_Dict { + Dict() { + pkpy_Dict__ctor(this); + } - constexpr static int __Capacity = 8; - constexpr static float __LoadFactor = 0.67f; + Dict(Dict&& other) { + std::memcpy(this, &other, sizeof(Dict)); + pkpy_Dict__ctor(&other); + } - int _capacity; - int _mask; - int _size; - int _critical_size; - int _head_idx; // for order preserving - int _tail_idx; // for order preserving - Item* _items; - - Dict(); - Dict(Dict&& other); - Dict(const Dict& other); + Dict(const Dict& other) { + // OPTIMIZEME: reduce copy + auto clone = pkpy_Dict__copy(&other); + std::memcpy(this, &clone, sizeof(Dict)); + } + Dict& operator= (const Dict&) = delete; Dict& operator= (Dict&&) = delete; - int size() const { return _size; } + int size() const { return count; } - void _probe_0(VM* vm, PyVar key, bool& ok, int& i) const; - void _probe_1(VM* vm, PyVar key, bool& ok, int& i) const; + void set(VM* vm, PyVar key, PyVar val) { + pkpy_Dict__set(this, vm, *reinterpret_cast<::pkpy_Var*>(&key), *reinterpret_cast<::pkpy_Var*>(&val)); + } - void set(VM* vm, PyVar key, PyVar val); - void _rehash(VM* vm); + PyVar try_get(VM* vm, PyVar key) const { + auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + if (!res) return nullptr; + return *reinterpret_cast(&res); + } - PyVar try_get(VM* vm, PyVar key) const; + bool contains(VM* vm, PyVar key) const { + return pkpy_Dict__contains(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + } - bool contains(VM* vm, PyVar key) const; - bool del(VM* vm, PyVar key); - void update(VM* vm, const Dict& other); + bool del(VM* vm, PyVar key) { + return pkpy_Dict__del(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + } + + void update(VM* vm, const Dict& other) { + pkpy_Dict__update(this, vm, &other); + } template void apply(__Func f) const { - int i = _head_idx; - while(i != -1) { - f(_items[i].first, _items[i].second); - i = _items[i].next; + pkpy_DictIter it = iter(); + PyVar key, val; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + f(key, val); } } - Tuple keys() const; - Tuple values() const; - void clear(); - ~Dict(); + Tuple keys() const { + Tuple res(count); + pkpy_DictIter it = iter(); + PyVar key, val; + int i = 0; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + res[i++] = key; + } + return res; + } - void __alloc_items(); + Tuple values() const { + Tuple res(count); + pkpy_DictIter it = iter(); + PyVar key, val; + int i = 0; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + res[i++] = val; + } + return res; + } + + pkpy_DictIter iter() const { + return pkpy_Dict__iter(this); + } + + void clear() { + pkpy_Dict__clear(this); + } + + ~Dict() { + pkpy_Dict__dtor(this); + } void _gc_mark(VM*) const; }; diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h new file mode 100644 index 00000000..bc8fd593 --- /dev/null +++ b/include/pocketpy/objects/pyvar.h @@ -0,0 +1,54 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * @brief A python value in pocketpy. + */ +typedef struct { + // TODO: implement + + union { + int type; + char buf[16]; + }; +} pkpy_Var; + +/** + * @brief Check if the pkpy_Var is null. + * @param self The variable to check. + * @return True if the variable is null, false otherwise. + */ +#define pkpy_Var__is_null(self) ((self)->type == 0) + +/** + * @brief Set the variable to null. + * @param self The variable to set. + */ +#define pkpy_Var__set_null(self) do { (self)->type = 0; } while(0) + +/** + * @brief Check if two pkpy_Vars are equal, respects to __eq__ method. + * @param vm The virtual machine. + * @param a The first pkpy_Var. + * @param b The second pkpy_Var. + * @return True if the pkpy_Vars are equal, false otherwise. + */ +bool pkpy_Var__eq__(void *vm, pkpy_Var a, pkpy_Var b); + +/** + * @brief Get the hash of the pkpy_Var, respects to __hash__ method. + * @param vm The virtual machine. + * @param a The pkpy_Var to hash. + * @return The hash of the pkpy_Var. + */ +int64_t pkpy_Var__hash__(void *vm, pkpy_Var a); + +#ifdef __cplusplus +} +#endif diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 18c484d0..56d5f1f6 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -117,12 +117,13 @@ void DictItemsIter::_register(VM* vm, PyObject* mod, PyObject* type) { }); vm->bind__next__(type->as(), [](VM* vm, PyVar _0) -> unsigned { DictItemsIter& self = _CAST(DictItemsIter&, _0); - Dict& d = PK_OBJ_GET(Dict, self.ref); - if(self.i == -1) return 0; - vm->s_data.push(d._items[self.i].first); - vm->s_data.push(d._items[self.i].second); - self.i = d._items[self.i].next; - return 2; + PyVar key, val; + if (pkpy_DictIter__next(&self.it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + vm->s_data.push(key); + vm->s_data.push(val); + return 2; + } + return 0; }); } diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 10ad986c..40ca619b 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -1628,37 +1628,6 @@ BIND_BINARY_SPECIAL(__xor__) #undef BIND_BINARY_SPECIAL -void Dict::_probe_0(VM* vm, PyVar key, bool& ok, int& i) const { - ok = false; - i64 hash = vm->py_hash(key); - i = hash & _mask; - for(int j = 0; j < _capacity; j++) { - if(_items[i].first != nullptr) { - if(vm->py_eq(_items[i].first, key)) { - ok = true; - break; - } - } else { - if(_items[i].second == nullptr) break; - } - // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166 - i = ((5 * i) + 1) & _mask; - } -} - -void Dict::_probe_1(VM* vm, PyVar key, bool& ok, int& i) const { - ok = false; - i = vm->py_hash(key) & _mask; - while(_items[i].first != nullptr) { - if(vm->py_eq(_items[i].first, key)) { - ok = true; - break; - } - // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166 - i = ((5 * i) + 1) & _mask; - } -} - #if PK_ENABLE_PROFILER void NextBreakpoint::_step(VM* vm) { int curr_callstack_size = vm->callstack.size(); diff --git a/src/objects/dict.c b/src/objects/dict.c new file mode 100644 index 00000000..62f89924 --- /dev/null +++ b/src/objects/dict.c @@ -0,0 +1,228 @@ +#include "pocketpy/objects/dict.h" +#include "pocketpy/common/utils.h" +#include +#include +#include + +struct pkpy_DictEntry { + int64_t hash; + pkpy_Var key; + pkpy_Var val; +}; + +inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { + if(self->count < 255) return 1; + if(self->count < 65535) return 2; + return 4; +} + +inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { + if(self->count < 255) return 255; + if(self->count < 65535) return 65535; + return 4294967295; +} + +inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } + +void pkpy_Dict__ctor(pkpy_Dict* self) { + self->_version = 0; + self->count = 0; + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + self->_htcap = 16; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); +} + +void pkpy_Dict__dtor(pkpy_Dict* self) { + c11_vector__dtor(&self->_entries); + free(self->_hashtable); +} + +pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { + int ht_size = pkpy_Dict__ht_byte_size(self); + void* ht_clone = malloc(ht_size); + memcpy(ht_clone, self->_hashtable, ht_size); + return (pkpy_Dict){._version = 0, + .count = self->count, + ._entries = c11_vector__copy(&self->_entries), + ._htcap = self->_htcap, + ._hashtable = ht_clone}; +} + +static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { + int sz = pkpy_Dict__idx_size(self); + switch(sz) { + case 1: return ((uint8_t*)self->_hashtable)[h]; + case 2: return ((uint16_t*)self->_hashtable)[h]; + case 4: return ((uint32_t*)self->_hashtable)[h]; + default: PK_UNREACHABLE(); + } +} + +static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { + int sz = pkpy_Dict__idx_size(self); + switch(sz) { + case 1: ((uint8_t*)self->_hashtable)[h] = v; break; + case 2: ((uint16_t*)self->_hashtable)[h] = v; break; + case 4: ((uint32_t*)self->_hashtable)[h] = v; break; + default: PK_UNREACHABLE(); + } +} + +static int pkpy_Dict__probe(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; + for(int h = hash & mask;; h = (h + 1) & mask) { + int idx = pkpy_Dict__htget(self, h); + if(idx == null) return h; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(pkpy_Var__is_null(&entry->key)) return h; + if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; + } + PK_UNREACHABLE(); +} + +static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { + self->_version += 1; + free(self->_hashtable); + self->_htcap *= 2; + void* new_ht = malloc(pkpy_Dict__ht_byte_size(self)); + memset(new_ht, 0xff, pkpy_Dict__ht_byte_size(self)); + + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + pkpy_Dict__htset(self, h, i); + } +} + +static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { + int deleted_slots = self->_entries.count - self->count; + if(deleted_slots < self->_entries.count * 0.25) return false; + + // shrink + self->_version += 1; + free(self->_hashtable); + while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) + self->_htcap /= 2; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); + + c11_vector new_entries; + c11_vector__ctor(&new_entries, sizeof(struct pkpy_DictEntry)); + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int j = new_entries.count; + c11_vector__push(struct pkpy_DictEntry, &new_entries, *entry); + pkpy_Dict__htset(self, pkpy_Dict__probe(self, vm, entry->key, entry->hash), j); + } + c11_vector__dtor(&self->_entries); + self->_entries = new_entries; + return true; +} + +bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) { + self->_version += 1; + idx = self->_entries.count; + c11_vector__push(struct pkpy_DictEntry, + &self->_entries, + ((struct pkpy_DictEntry){ + .hash = hash, + .key = key, + .val = val, + })); + pkpy_Dict__htset(self, h, idx); + if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); + return true; + } + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + entry->val = val; + return false; +} + +bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) return false; + return true; +} + +bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + if(idx == null) return false; + + self->_version += 1; + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + pkpy_Var__set_null(&entry->key); + pkpy_Dict__htset(self, h, null); + pkpy_Dict__refactor(self, vm); + return true; +} + +const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) return NULL; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + return &entry->val; +} + +void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { + for(int i = 0; i < other->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &other->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + pkpy_Dict__set(self, vm, entry->key, entry->val); + } +} + +void pkpy_Dict__clear(pkpy_Dict *self) { + int v = self->_version; + pkpy_Dict__dtor(self); + pkpy_Dict__ctor(self); + self->_version = v + 1; +} + +pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { + return (pkpy_DictIter){ + ._dict = self, + ._index = 0, + ._version = self->_version, + }; +} + +bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { + if(self->_version != self->_dict->_version) return false; + if(self->_index >= self->_dict->_entries.count) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); + assert(!pkpy_Var__is_null(&entry->key)); + if (key) *key = entry->key; + if (val) *val = entry->val; + + while (self->_index < self->_dict->_entries.count) { + self->_index++; + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); + if(!pkpy_Var__is_null(&entry->key)) break; + } + return true; +} diff --git a/src/objects/dict.cpp b/src/objects/dict.cpp deleted file mode 100644 index 184d7c1c..00000000 --- a/src/objects/dict.cpp +++ /dev/null @@ -1,180 +0,0 @@ -#include "pocketpy/objects/dict.hpp" - -namespace pkpy { - -Dict::Dict() : - _capacity(__Capacity), _mask(__Capacity - 1), _size(0), _critical_size(__Capacity * __LoadFactor + 0.5f), - _head_idx(-1), _tail_idx(-1) { - __alloc_items(); -} - -void Dict::__alloc_items() { - _items = (Item*)std::malloc(_capacity * sizeof(Item)); - for(int i = 0; i < _capacity; i++) { - _items[i].first = nullptr; - _items[i].second = nullptr; - _items[i].prev = -1; - _items[i].next = -1; - } -} - -Dict::Dict(Dict&& other) { - _capacity = other._capacity; - _mask = other._mask; - _size = other._size; - _critical_size = other._critical_size; - _head_idx = other._head_idx; - _tail_idx = other._tail_idx; - _items = other._items; - other._items = nullptr; -} - -Dict::Dict(const Dict& other) { - _capacity = other._capacity; - _mask = other._mask; - _size = other._size; - _critical_size = other._critical_size; - _head_idx = other._head_idx; - _tail_idx = other._tail_idx; - // copy items - _items = (Item*)std::malloc(_capacity * sizeof(Item)); - std::memcpy(_items, other._items, _capacity * sizeof(Item)); -} - -void Dict::set(VM* vm, PyVar key, PyVar val) { - // do possible rehash - if(_size + 1 > _critical_size) _rehash(vm); - bool ok; - int i; - _probe_1(vm, key, ok, i); - if(!ok) { - _size++; - _items[i].first = key; - - // append to tail - if(_size == 0 + 1) { - _head_idx = i; - _tail_idx = i; - } else { - _items[i].prev = _tail_idx; - _items[_tail_idx].next = i; - _tail_idx = i; - } - } - _items[i].second = val; -} - -void Dict::_rehash(VM* vm) { - Item* old_items = _items; - int old_head_idx = _head_idx; - - _capacity *= 4; - _mask = _capacity - 1; - _size = 0; - _critical_size = _capacity * __LoadFactor + 0.5f; - _head_idx = -1; - _tail_idx = -1; - - __alloc_items(); - - // copy old items to new dict - int i = old_head_idx; - while(i != -1) { - set(vm, old_items[i].first, old_items[i].second); - i = old_items[i].next; - } - - std::free(old_items); -} - -PyVar Dict::try_get(VM* vm, PyVar key) const { - bool ok; - int i; - _probe_0(vm, key, ok, i); - if(!ok) return nullptr; - return _items[i].second; -} - -bool Dict::contains(VM* vm, PyVar key) const { - bool ok; - int i; - _probe_0(vm, key, ok, i); - return ok; -} - -bool Dict::del(VM* vm, PyVar key) { - bool ok; - int i; - _probe_0(vm, key, ok, i); - if(!ok) return false; - _items[i].first = nullptr; - // _items[i].second = PY_DELETED_SLOT; // do not change .second if it is not NULL, it means the slot is occupied by - // a deleted item - _size--; - - if(_size == 0) { - _head_idx = -1; - _tail_idx = -1; - } else { - if(_head_idx == i) { - _head_idx = _items[i].next; - _items[_head_idx].prev = -1; - } else if(_tail_idx == i) { - _tail_idx = _items[i].prev; - _items[_tail_idx].next = -1; - } else { - _items[_items[i].prev].next = _items[i].next; - _items[_items[i].next].prev = _items[i].prev; - } - } - _items[i].prev = -1; - _items[i].next = -1; - return true; -} - -void Dict::update(VM* vm, const Dict& other) { - other.apply([&](PyVar k, PyVar v) { - set(vm, k, v); - }); -} - -Tuple Dict::keys() const { - Tuple t(_size); - int i = _head_idx; - int j = 0; - while(i != -1) { - t[j++] = _items[i].first; - i = _items[i].next; - } - assert(j == _size); - return t; -} - -Tuple Dict::values() const { - Tuple t(_size); - int i = _head_idx; - int j = 0; - while(i != -1) { - t[j++] = _items[i].second; - i = _items[i].next; - } - assert(j == _size); - return t; -} - -void Dict::clear() { - _size = 0; - _head_idx = -1; - _tail_idx = -1; - for(int i = 0; i < _capacity; i++) { - _items[i].first = nullptr; - _items[i].second = nullptr; - _items[i].prev = -1; - _items[i].next = -1; - } -} - -Dict::~Dict() { - if(_items) std::free(_items); -} -} // namespace pkpy diff --git a/src/objects/pyvar.cpp b/src/objects/pyvar.cpp new file mode 100644 index 00000000..14ad6911 --- /dev/null +++ b/src/objects/pyvar.cpp @@ -0,0 +1,17 @@ +#include "pocketpy/objects/base.hpp" +#include "pocketpy/objects/pyvar.h" +#include "pocketpy/interpreter/vm.hpp" + +extern "C" { + +bool pkpy_Var__eq__(void *vm_, pkpy_Var a, pkpy_Var b) { + auto vm = static_cast(vm_); + return vm->py_eq(*reinterpret_cast(&a), *reinterpret_cast(&b)); +} + +int64_t pkpy_Var__hash__(void *vm_, pkpy_Var a) { + auto vm = static_cast(vm_); + return vm->py_hash(*reinterpret_cast(&a)); +} + +} diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 23d3596a..26105865 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1493,12 +1493,10 @@ void __init_builtins(VM* _vm) { if(!vm->isinstance(_1, vm->tp_dict)) return vm->NotImplemented; Dict& other = _CAST(Dict&, _1); if(self.size() != other.size()) return vm->False; - for(int i = 0; i < self._capacity; i++) { - auto item = self._items[i]; - if(item.first == nullptr) continue; - PyVar value = other.try_get(vm, item.first); - if(value == nullptr) return vm->False; - if(!vm->py_eq(item.second, value)) return vm->False; + pkpy_DictIter it = self.iter(); + PyVar key, val; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + if(!vm->py_eq(val, other.try_get(vm, key))) return vm->False; } return vm->True; }); From 249656039aa7c5df78c195144b9c4965df62f5be Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 14:36:48 +0800 Subject: [PATCH 02/30] fix uninitialize --- src/objects/dict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/objects/dict.c b/src/objects/dict.c index 62f89924..3ac785b5 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -30,6 +30,7 @@ void pkpy_Dict__ctor(pkpy_Dict* self) { c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); self->_htcap = 16; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); } void pkpy_Dict__dtor(pkpy_Dict* self) { From f4e9293643cb9da43a60da55439be15b7554b3ee Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 15:03:19 +0800 Subject: [PATCH 03/30] ... --- include/pocketpy/objects/dict.hpp | 2 +- include/pocketpy/objects/pyvar.h | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 8272a45c..1c51161c 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -34,7 +34,7 @@ struct Dict : private pkpy_Dict { PyVar try_get(VM* vm, PyVar key) const { auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); if (!res) return nullptr; - return *reinterpret_cast(&res); + return PyVar(*reinterpret_cast(&res)); } bool contains(VM* vm, PyVar key) const { diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h index bc8fd593..5947c4d9 100644 --- a/include/pocketpy/objects/pyvar.h +++ b/include/pocketpy/objects/pyvar.h @@ -11,12 +11,8 @@ extern "C" { * @brief A python value in pocketpy. */ typedef struct { - // TODO: implement - - union { - int type; - char buf[16]; - }; + int type; + int _0, _1, _2; } pkpy_Var; /** From 73c9c5a2280805e5c94cbb0945fa4736e39d4950 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 16:26:06 +0800 Subject: [PATCH 04/30] fix RE --- include/pocketpy/objects/dict.hpp | 2 +- include/pocketpy/objects/pyvar.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 1c51161c..6c854961 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -34,7 +34,7 @@ struct Dict : private pkpy_Dict { PyVar try_get(VM* vm, PyVar key) const { auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); if (!res) return nullptr; - return PyVar(*reinterpret_cast(&res)); + return *reinterpret_cast(res); } bool contains(VM* vm, PyVar key) const { diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h index 5947c4d9..edd7495f 100644 --- a/include/pocketpy/objects/pyvar.h +++ b/include/pocketpy/objects/pyvar.h @@ -12,7 +12,8 @@ extern "C" { */ typedef struct { int type; - int _0, _1, _2; + int _0; + int64_t _1; } pkpy_Var; /** From e455e36a3980a109df638cf5be652d532be3221f Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 16:44:41 +0800 Subject: [PATCH 05/30] fix iteration and count --- src/objects/dict.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 3ac785b5..e65675db 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -142,6 +142,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { .val = val, })); pkpy_Dict__htset(self, h, idx); + self->count += 1; if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); return true; } @@ -173,6 +174,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { pkpy_Var__set_null(&entry->key); pkpy_Dict__htset(self, h, null); pkpy_Dict__refactor(self, vm); + self->count -= 1; return true; } @@ -203,10 +205,19 @@ void pkpy_Dict__clear(pkpy_Dict *self) { self->_version = v + 1; } +static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { + do { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(!pkpy_Var__is_null(&entry->key)) break; + idx++; + } while (idx < self->_entries.count); + return idx; +} + pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { return (pkpy_DictIter){ ._dict = self, - ._index = 0, + ._index = pkpy_Dict__next_entry_idx(self, 0), ._version = self->_version, }; } @@ -220,10 +231,6 @@ bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { if (key) *key = entry->key; if (val) *val = entry->val; - while (self->_index < self->_dict->_entries.count) { - self->_index++; - struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); - if(!pkpy_Var__is_null(&entry->key)) break; - } + self->_index = pkpy_Dict__next_entry_idx(self->_dict, self->_index + 1); return true; } From b2d5708fd83e0ba7fc17fdd344a6ec53ec95a65d Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 17:03:16 +0800 Subject: [PATCH 06/30] fix --- src/objects/dict.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index e65675db..2014303c 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -11,14 +11,14 @@ struct pkpy_DictEntry { }; inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { - if(self->count < 255) return 1; - if(self->count < 65535) return 2; + if(self->_htcap < 255) return 1; + if(self->_htcap < 65535) return 2; return 4; } inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { - if(self->count < 255) return 255; - if(self->count < 65535) return 65535; + if(self->_htcap < 255) return 255; + if(self->_htcap < 65535) return 65535; return 4294967295; } @@ -87,8 +87,8 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { self->_version += 1; free(self->_hashtable); self->_htcap *= 2; - void* new_ht = malloc(pkpy_Dict__ht_byte_size(self)); - memset(new_ht, 0xff, pkpy_Dict__ht_byte_size(self)); + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); for(int i = 0; i < self->_entries.count; i++) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); @@ -206,6 +206,7 @@ void pkpy_Dict__clear(pkpy_Dict *self) { } static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { + if (idx >= self->_entries.count) return idx; do { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(!pkpy_Var__is_null(&entry->key)) break; From 8458e49a307264640239457de26edb547d927437 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 18:56:55 +0800 Subject: [PATCH 07/30] Minimum deleted slot required to refactor --- CMakeLists.txt | 4 ++-- src/objects/dict.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db995efa..38d74742 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,8 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-") add_compile_options(/wd4267 /wd4244) else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") # disable -Wshorten-64-to-32 for apple if(APPLE) diff --git a/src/objects/dict.c b/src/objects/dict.c index 2014303c..69f2557c 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -101,7 +101,7 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int deleted_slots = self->_entries.count - self->count; - if(deleted_slots < self->_entries.count * 0.25) return false; + if(deleted_slots >= 8 && deleted_slots < self->_entries.count * 0.25) return false; // shrink self->_version += 1; From f28b2f152e9b1678e3f6c2291f74b5d1831701bd Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 21:51:03 +0800 Subject: [PATCH 08/30] Fix --- src/objects/dict.c | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 69f2557c..f1c91590 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -99,33 +99,6 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { } } -static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { - int deleted_slots = self->_entries.count - self->count; - if(deleted_slots >= 8 && deleted_slots < self->_entries.count * 0.25) return false; - - // shrink - self->_version += 1; - free(self->_hashtable); - while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) - self->_htcap /= 2; - self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); - memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); - - c11_vector new_entries; - c11_vector__ctor(&new_entries, sizeof(struct pkpy_DictEntry)); - for(int i = 0; i < self->_entries.count; i++) { - struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); - if(pkpy_Var__is_null(&entry->key)) continue; - - int j = new_entries.count; - c11_vector__push(struct pkpy_DictEntry, &new_entries, *entry); - pkpy_Dict__htset(self, pkpy_Dict__probe(self, vm, entry->key, entry->hash), j); - } - c11_vector__dtor(&self->_entries); - self->_entries = new_entries; - return true; -} - bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe(self, vm, key, hash); @@ -162,6 +135,33 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { return true; } +static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { + int deleted_slots = self->_entries.count - self->count; + if(deleted_slots <= 8 || deleted_slots < self->_entries.count * 0.25) return false; + + // shrink + self->_version += 1; + free(self->_hashtable); + while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) + self->_htcap /= 2; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); + + c11_vector old_entries = self->_entries; + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + for(int i = 0; i < old_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &old_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int j = self->_entries.count; + c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); + int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + pkpy_Dict__htset(self, h, j); + } + c11_vector__dtor(&old_entries); + return true; +} + bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe(self, vm, key, hash); @@ -173,8 +173,8 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); pkpy_Var__set_null(&entry->key); pkpy_Dict__htset(self, h, null); - pkpy_Dict__refactor(self, vm); self->count -= 1; + pkpy_Dict__refactor(self, vm); return true; } From 41562cf4c33db3876e87384a86ab22cb4804cb38 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 22:16:12 +0800 Subject: [PATCH 09/30] fix find and insert --- src/objects/dict.c | 49 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index f1c91590..31da0e7a 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -69,7 +69,7 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { } } -static int pkpy_Dict__probe(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h + 1) & mask) { @@ -83,6 +83,19 @@ static int pkpy_Dict__probe(const pkpy_Dict* self, void* vm, pkpy_Var key, int64 PK_UNREACHABLE(); } +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; + for(int h = hash & mask;; h = (h + 1) & mask) { + int idx = pkpy_Dict__htget(self, h); + if(idx == null) return h; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; + } + PK_UNREACHABLE(); +} + static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { self->_version += 1; free(self->_hashtable); @@ -94,14 +107,14 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); pkpy_Dict__htset(self, h, i); } } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { @@ -114,6 +127,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { .key = key, .val = val, })); + h = pkpy_Dict__probe0(self, vm, key, hash); pkpy_Dict__htset(self, h, idx); self->count += 1; if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); @@ -121,17 +135,31 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); - entry->val = val; + + if(entry->hash == hash || pkpy_Var__eq__(vm, entry->key, key)) { + entry->val = val; + } else { + self->_version += 1; + self->count += 1; + h = pkpy_Dict__probe0(self, vm, key, hash); + idx = pkpy_Dict__htget(self, h); + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + entry->key = key; + entry->val = val; + entry->hash = hash; + } return false; } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); return true; } @@ -155,7 +183,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int j = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); - int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); pkpy_Dict__htset(self, h, j); } c11_vector__dtor(&old_entries); @@ -164,15 +192,14 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; - self->_version += 1; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + self->_version += 1; pkpy_Var__set_null(&entry->key); - pkpy_Dict__htset(self, h, null); self->count -= 1; pkpy_Dict__refactor(self, vm); return true; @@ -180,7 +207,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; From 5e6226729a380e35dc1a646b010d63697b8d4189 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 22:23:23 +0800 Subject: [PATCH 10/30] skip nullptr in probe1 --- src/objects/dict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/objects/dict.c b/src/objects/dict.c index 31da0e7a..29bbb3fb 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -91,6 +91,7 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(pkpy_Var__is_null(&entry->key)) continue; if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; } PK_UNREACHABLE(); From c047eafa7eb13dfdc6e063d3701e7862627950ef Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 23:09:24 +0800 Subject: [PATCH 11/30] optimize probe0 for less __eq__ and hash compare --- src/objects/dict.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 29bbb3fb..b1453b9c 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -78,7 +78,6 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(pkpy_Var__is_null(&entry->key)) return h; - if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; } PK_UNREACHABLE(); } From 637aedabc56e333f3d41104c82fd5cd2a0ec1b9c Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 23:53:03 +0800 Subject: [PATCH 12/30] Use 4 byte hash only --- src/objects/dict.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index b1453b9c..50af5c99 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -4,8 +4,10 @@ #include #include +#define HASH_MASK ((int64_t)0xffffffff) + struct pkpy_DictEntry { - int64_t hash; + int32_t hash; pkpy_Var key; pkpy_Var val; }; @@ -69,7 +71,7 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { } } -static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h + 1) & mask) { @@ -82,7 +84,7 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 PK_UNREACHABLE(); } -static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h + 1) & mask) { @@ -113,7 +115,7 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); @@ -152,7 +154,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); @@ -191,7 +193,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; @@ -206,7 +208,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { } const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); From d25afcaeae588c0bacb3e665ee786a538050cb4f Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 00:07:14 +0800 Subject: [PATCH 13/30] remove hash from entry --- src/objects/dict.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 50af5c99..1355a524 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -4,10 +4,7 @@ #include #include -#define HASH_MASK ((int64_t)0xffffffff) - struct pkpy_DictEntry { - int32_t hash; pkpy_Var key; pkpy_Var val; }; @@ -71,10 +68,10 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { } } -static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h + 1) & mask) { + for(int h = hash & mask;; h = (h * 5 + 1) & mask) { int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; @@ -84,16 +81,16 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int PK_UNREACHABLE(); } -static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h + 1) & mask) { + for(int h = hash & mask;; h = (h * 5 + 1) & mask) { int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(pkpy_Var__is_null(&entry->key)) continue; - if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; + if(pkpy_Var__eq__(vm, entry->key, key)) return h; } PK_UNREACHABLE(); } @@ -109,13 +106,13 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); pkpy_Dict__htset(self, h, i); } } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); @@ -125,7 +122,6 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { c11_vector__push(struct pkpy_DictEntry, &self->_entries, ((struct pkpy_DictEntry){ - .hash = hash, .key = key, .val = val, })); @@ -138,7 +134,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - if(entry->hash == hash || pkpy_Var__eq__(vm, entry->key, key)) { + if(pkpy_Var__eq__(vm, entry->key, key)) { entry->val = val; } else { self->_version += 1; @@ -148,20 +144,19 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); entry->key = key; entry->val = val; - entry->hash = hash; } return false; } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + assert(pkpy_Var__eq__(vm, entry->key, key)); return true; } @@ -185,7 +180,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int j = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); - int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); pkpy_Dict__htset(self, h, j); } c11_vector__dtor(&old_entries); @@ -193,13 +188,13 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + assert(pkpy_Var__eq__(vm, entry->key, key)); self->_version += 1; pkpy_Var__set_null(&entry->key); self->count -= 1; @@ -208,14 +203,14 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { } const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + assert(pkpy_Var__eq__(vm, entry->key, key)); return &entry->val; } From 3d90bd03923a2f5cc8e24bdab2548deaf2d5a6fb Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 00:08:59 +0800 Subject: [PATCH 14/30] change cmake back --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38d74742..db995efa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,8 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-") add_compile_options(/wd4267 /wd4244) else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") # disable -Wshorten-64-to-32 for apple if(APPLE) From 6e780173f94bc2e35048f09b26c2a30cd26b7244 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 09:32:58 +0800 Subject: [PATCH 15/30] remove _version --- include/pocketpy/objects/dict.h | 1 - src/objects/dict.c | 20 ++++---------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h index aa56ae42..071b4bdc 100644 --- a/include/pocketpy/objects/dict.h +++ b/include/pocketpy/objects/dict.h @@ -9,7 +9,6 @@ extern "C" { #include "pocketpy/common/vector.h" typedef struct { - unsigned int _version; /** used internelly to detect iterator invalidation */ int count; /** number of elements in the dictionary */ c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ int _htcap; /** capacity of the hashtable, always a power of 2 */ diff --git a/src/objects/dict.c b/src/objects/dict.c index 1355a524..3e1bb7a2 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -24,7 +24,6 @@ inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } void pkpy_Dict__ctor(pkpy_Dict* self) { - self->_version = 0; self->count = 0; c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); self->_htcap = 16; @@ -41,8 +40,7 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { int ht_size = pkpy_Dict__ht_byte_size(self); void* ht_clone = malloc(ht_size); memcpy(ht_clone, self->_hashtable, ht_size); - return (pkpy_Dict){._version = 0, - .count = self->count, + return (pkpy_Dict){.count = self->count, ._entries = c11_vector__copy(&self->_entries), ._htcap = self->_htcap, ._hashtable = ht_clone}; @@ -96,7 +94,6 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 } static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { - self->_version += 1; free(self->_hashtable); self->_htcap *= 2; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); @@ -117,7 +114,6 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { - self->_version += 1; idx = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, @@ -137,7 +133,6 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { if(pkpy_Var__eq__(vm, entry->key, key)) { entry->val = val; } else { - self->_version += 1; self->count += 1; h = pkpy_Dict__probe0(self, vm, key, hash); idx = pkpy_Dict__htget(self, h); @@ -165,7 +160,6 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { if(deleted_slots <= 8 || deleted_slots < self->_entries.count * 0.25) return false; // shrink - self->_version += 1; free(self->_hashtable); while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) self->_htcap /= 2; @@ -195,7 +189,6 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); assert(pkpy_Var__eq__(vm, entry->key, key)); - self->_version += 1; pkpy_Var__set_null(&entry->key); self->count -= 1; pkpy_Dict__refactor(self, vm); @@ -223,19 +216,16 @@ void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { } void pkpy_Dict__clear(pkpy_Dict *self) { - int v = self->_version; pkpy_Dict__dtor(self); pkpy_Dict__ctor(self); - self->_version = v + 1; } static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { - if (idx >= self->_entries.count) return idx; - do { + while (idx < self->_entries.count) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(!pkpy_Var__is_null(&entry->key)) break; idx++; - } while (idx < self->_entries.count); + } return idx; } @@ -243,16 +233,14 @@ pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { return (pkpy_DictIter){ ._dict = self, ._index = pkpy_Dict__next_entry_idx(self, 0), - ._version = self->_version, }; } bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { - if(self->_version != self->_dict->_version) return false; if(self->_index >= self->_dict->_entries.count) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); - assert(!pkpy_Var__is_null(&entry->key)); + if(pkpy_Var__is_null(&entry->key)) return false; if (key) *key = entry->key; if (val) *val = entry->val; From 21fdaeaa212a1758a5b0a57846a4d4bdf64931ec Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 11:06:39 +0800 Subject: [PATCH 16/30] fix dict compare --- src/pocketpy.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 26105865..c6a7bb7d 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1496,7 +1496,9 @@ void __init_builtins(VM* _vm) { pkpy_DictIter it = self.iter(); PyVar key, val; while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { - if(!vm->py_eq(val, other.try_get(vm, key))) return vm->False; + PyVar other_val = other.try_get(vm, key); + if(other_val == nullptr) return vm->False; + if(!vm->py_eq(val, other_val)) return vm->False; } return vm->True; }); From 9390b0d6381c307c699d48c14eb1ef8ec81a5800 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 11:08:34 +0800 Subject: [PATCH 17/30] use marcos to control load factor --- src/objects/dict.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 3e1bb7a2..df285611 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -4,6 +4,8 @@ #include #include +#define DICT_MAX_LOAD 0.75 + struct pkpy_DictEntry { pkpy_Var key; pkpy_Var val; @@ -124,7 +126,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { h = pkpy_Dict__probe0(self, vm, key, hash); pkpy_Dict__htset(self, h, idx); self->count += 1; - if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); + if(self->count >= self->_htcap * DICT_MAX_LOAD) pkpy_Dict__extendht(self, vm); return true; } @@ -157,11 +159,11 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int deleted_slots = self->_entries.count - self->count; - if(deleted_slots <= 8 || deleted_slots < self->_entries.count * 0.25) return false; + if(deleted_slots <= 8 || deleted_slots < self->_entries.count * (1 - DICT_MAX_LOAD)) return false; // shrink free(self->_hashtable); - while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) + while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) self->_htcap /= 2; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); From ed2e95b3f4f90efe5cf219e15a0cd280e1887175 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 11:10:08 +0800 Subject: [PATCH 18/30] fix overflow --- src/objects/dict.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index df285611..48643235 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -17,10 +17,10 @@ inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { return 4; } -inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { +inline static unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { if(self->_htcap < 255) return 255; if(self->_htcap < 65535) return 65535; - return 4294967295; + return 4294967295u; // 2^32 - 1 } inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } @@ -48,7 +48,7 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { ._hashtable = ht_clone}; } -static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { +static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { int sz = pkpy_Dict__idx_size(self); switch(sz) { case 1: return ((uint8_t*)self->_hashtable)[h]; @@ -72,7 +72,7 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h * 5 + 1) & mask) { - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -85,7 +85,7 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h * 5 + 1) & mask) { - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -114,7 +114,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { idx = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, @@ -137,7 +137,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } else { self->count += 1; h = pkpy_Dict__probe0(self, vm, key, hash); - idx = pkpy_Dict__htget(self, h); + unsigned idx = pkpy_Dict__htget(self, h); struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); entry->key = key; entry->val = val; @@ -149,7 +149,7 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -186,7 +186,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + unsigned int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -201,7 +201,7 @@ const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); From a47b52f086aa9cfc952259f82de4d588c62e528e Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:04:39 +0800 Subject: [PATCH 19/30] optimize hashtable access --- src/objects/dict.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 48643235..08b8ccbc 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -18,9 +18,10 @@ inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { } inline static unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { - if(self->_htcap < 255) return 255; - if(self->_htcap < 65535) return 65535; - return 4294967295u; // 2^32 - 1 + // if(self->_htcap < 255) return 255; + // if(self->_htcap < 65535) return 65535; + // return 4294967295u; // 2^32 - 1 + return (1u << ((pkpy_Dict__idx_size(self) * 8) & 31)) - 1u; } inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } @@ -49,23 +50,20 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { } static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { - int sz = pkpy_Dict__idx_size(self); - switch(sz) { - case 1: return ((uint8_t*)self->_hashtable)[h]; - case 2: return ((uint16_t*)self->_hashtable)[h]; - case 4: return ((uint32_t*)self->_hashtable)[h]; - default: PK_UNREACHABLE(); - } + const int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + return (*p) & pkpy_Dict__idx_null(self); } -static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { +static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { int sz = pkpy_Dict__idx_size(self); - switch(sz) { - case 1: ((uint8_t*)self->_hashtable)[h] = v; break; - case 2: ((uint16_t*)self->_hashtable)[h] = v; break; - case 4: ((uint32_t*)self->_hashtable)[h] = v; break; - default: PK_UNREACHABLE(); - } + // switch(sz) { + // case 1: ((uint8_t*)self->_hashtable)[h] = v; break; + // case 2: ((uint16_t*)self->_hashtable)[h] = v; break; + // case 4: ((uint32_t*)self->_hashtable)[h] = v; break; + // default: PK_UNREACHABLE(); + // } + int *p = ((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self); + *p = v | (*p & ~pkpy_Dict__idx_null(self)); } static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { From 784980af93a5cf466a90ed697211f406f3533e6b Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:09:46 +0800 Subject: [PATCH 20/30] add marco PK_DICT_COMPACT_MODE --- src/objects/dict.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 08b8ccbc..6e6b4069 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -5,26 +5,32 @@ #include #define DICT_MAX_LOAD 0.75 +#define PK_DICT_COMPACT_MODE 1 struct pkpy_DictEntry { pkpy_Var key; pkpy_Var val; }; -inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { +inline extern int pkpy_Dict__idx_size(const pkpy_Dict* self) { +#if PK_DICT_COMPACT_MODE if(self->_htcap < 255) return 1; if(self->_htcap < 65535) return 2; +#endif return 4; } -inline static unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { +inline extern unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { +#if PK_DICT_COMPACT_MODE // if(self->_htcap < 255) return 255; // if(self->_htcap < 65535) return 65535; // return 4294967295u; // 2^32 - 1 return (1u << ((pkpy_Dict__idx_size(self) * 8) & 31)) - 1u; +#endif + return 4294967295u; } -inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } +inline extern int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } void pkpy_Dict__ctor(pkpy_Dict* self) { self->count = 0; @@ -50,20 +56,21 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { } static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { - const int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); +#if PK_DICT_COMPACT_MODE + const unsigned int *p = (const unsigned int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); return (*p) & pkpy_Dict__idx_null(self); +#else + return ((const unsigned int*)self->_hashtable)[h]; +#endif } static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { - int sz = pkpy_Dict__idx_size(self); - // switch(sz) { - // case 1: ((uint8_t*)self->_hashtable)[h] = v; break; - // case 2: ((uint16_t*)self->_hashtable)[h] = v; break; - // case 4: ((uint32_t*)self->_hashtable)[h] = v; break; - // default: PK_UNREACHABLE(); - // } - int *p = ((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self); +#if PK_DICT_COMPACT_MODE + unsigned int *p = (unsigned int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); *p = v | (*p & ~pkpy_Dict__idx_null(self)); +#else + ((unsigned int*)self->_hashtable)[h] = v; +#endif } static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { From 6d938d30bf34d687d23a70ca0b3cdd057798f299 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:19:16 +0800 Subject: [PATCH 21/30] make hash functions macros --- src/objects/dict.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 6e6b4069..c20f59eb 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -5,6 +5,8 @@ #include #define DICT_MAX_LOAD 0.75 +#define DICT_HASH_NEXT(h) ((h) * 5 + 1) +#define DICT_HASH_TRANS(h) ((int)((h) & 0xffffffff)) // used for tansform value from __hash__ #define PK_DICT_COMPACT_MODE 1 struct pkpy_DictEntry { @@ -73,10 +75,10 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { #endif } -static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { - const int null = pkpy_Dict__idx_null(self); - const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h * 5 + 1) & mask) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { + const unsigned int null = pkpy_Dict__idx_null(self); + const unsigned int mask = self->_htcap - 1; + for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; @@ -86,10 +88,10 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 PK_UNREACHABLE(); } -static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h * 5 + 1) & mask) { + for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; @@ -110,13 +112,14 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); + int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); + int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); pkpy_Dict__htset(self, h, i); } } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h); @@ -151,7 +154,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h); @@ -181,7 +184,8 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int j = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); - int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); + int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); + int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); pkpy_Dict__htset(self, h, j); } c11_vector__dtor(&old_entries); @@ -189,7 +193,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; @@ -203,7 +207,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { } const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h); From d1763bdef177441f8ed373a2fc44e968567329af Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:21:20 +0800 Subject: [PATCH 22/30] replace reinterpret_cast with C-style cast --- include/pocketpy/objects/dict.hpp | 14 +++++++------- src/interpreter/iter.cpp | 2 +- src/pocketpy.cpp | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 6c854961..94fe0c12 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -28,21 +28,21 @@ struct Dict : private pkpy_Dict { int size() const { return count; } void set(VM* vm, PyVar key, PyVar val) { - pkpy_Dict__set(this, vm, *reinterpret_cast<::pkpy_Var*>(&key), *reinterpret_cast<::pkpy_Var*>(&val)); + pkpy_Dict__set(this, vm, *(pkpy_Var*)(&key), *(pkpy_Var*)(&val)); } PyVar try_get(VM* vm, PyVar key) const { - auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + auto res = pkpy_Dict__try_get(this, vm, *(pkpy_Var*)(&key)); if (!res) return nullptr; return *reinterpret_cast(res); } bool contains(VM* vm, PyVar key) const { - return pkpy_Dict__contains(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + return pkpy_Dict__contains(this, vm, *(pkpy_Var*)(&key)); } bool del(VM* vm, PyVar key) { - return pkpy_Dict__del(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + return pkpy_Dict__del(this, vm, *(pkpy_Var*)(&key)); } void update(VM* vm, const Dict& other) { @@ -53,7 +53,7 @@ struct Dict : private pkpy_Dict { void apply(__Func f) const { pkpy_DictIter it = iter(); PyVar key, val; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { f(key, val); } } @@ -63,7 +63,7 @@ struct Dict : private pkpy_Dict { pkpy_DictIter it = iter(); PyVar key, val; int i = 0; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { res[i++] = key; } return res; @@ -74,7 +74,7 @@ struct Dict : private pkpy_Dict { pkpy_DictIter it = iter(); PyVar key, val; int i = 0; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { res[i++] = val; } return res; diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 56d5f1f6..d0fae96a 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -118,7 +118,7 @@ void DictItemsIter::_register(VM* vm, PyObject* mod, PyObject* type) { vm->bind__next__(type->as(), [](VM* vm, PyVar _0) -> unsigned { DictItemsIter& self = _CAST(DictItemsIter&, _0); PyVar key, val; - if (pkpy_DictIter__next(&self.it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + if (pkpy_DictIter__next(&self.it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { vm->s_data.push(key); vm->s_data.push(val); return 2; diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index c6a7bb7d..ba1456eb 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1495,7 +1495,7 @@ void __init_builtins(VM* _vm) { if(self.size() != other.size()) return vm->False; pkpy_DictIter it = self.iter(); PyVar key, val; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { PyVar other_val = other.try_get(vm, key); if(other_val == nullptr) return vm->False; if(!vm->py_eq(val, other_val)) return vm->False; From a8ca70ca74d81d1db562d347975d5cef343a31f8 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:24:22 +0800 Subject: [PATCH 23/30] more replace --- include/pocketpy/objects/dict.hpp | 2 +- src/objects/pyvar.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 94fe0c12..2d521073 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -34,7 +34,7 @@ struct Dict : private pkpy_Dict { PyVar try_get(VM* vm, PyVar key) const { auto res = pkpy_Dict__try_get(this, vm, *(pkpy_Var*)(&key)); if (!res) return nullptr; - return *reinterpret_cast(res); + return *(const PyVar*)(res); } bool contains(VM* vm, PyVar key) const { diff --git a/src/objects/pyvar.cpp b/src/objects/pyvar.cpp index 14ad6911..7b9a1851 100644 --- a/src/objects/pyvar.cpp +++ b/src/objects/pyvar.cpp @@ -4,14 +4,14 @@ extern "C" { -bool pkpy_Var__eq__(void *vm_, pkpy_Var a, pkpy_Var b) { - auto vm = static_cast(vm_); - return vm->py_eq(*reinterpret_cast(&a), *reinterpret_cast(&b)); +bool pkpy_Var__eq__(void* vm_, pkpy_Var a, pkpy_Var b) { + auto vm = (pkpy::VM*)(vm_); + return vm->py_eq(*(pkpy::PyVar*)(&a), *(pkpy::PyVar*)(&b)); } -int64_t pkpy_Var__hash__(void *vm_, pkpy_Var a) { - auto vm = static_cast(vm_); - return vm->py_hash(*reinterpret_cast(&a)); +int64_t pkpy_Var__hash__(void* vm_, pkpy_Var a) { + auto vm = (pkpy::VM*)(vm_); + return vm->py_hash(*(pkpy::PyVar*)(&a)); } } From 7549f1b95a6aab0ae815acf53bf149f38a030af3 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:28:55 +0800 Subject: [PATCH 24/30] better dict clear --- include/pocketpy/objects/dict.h | 3 ++- src/objects/dict.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h index 071b4bdc..0ad7c6a5 100644 --- a/include/pocketpy/objects/dict.h +++ b/include/pocketpy/objects/dict.h @@ -8,6 +8,7 @@ extern "C" { #include "pocketpy/objects/pyvar.h" #include "pocketpy/common/vector.h" +/** @brief `pkpy_Dict` is the Dict type in Python */ typedef struct { int count; /** number of elements in the dictionary */ c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ @@ -15,9 +16,9 @@ typedef struct { void* _hashtable; /** contains indecies, can be `u8`, `u16` or `u32` according to size*/ } pkpy_Dict; +/** @brief `pkpy_DictIter` is used to iterate over a `pkpy_Dict` */ typedef struct { const pkpy_Dict* _dict; - unsigned int _version; int _index; } pkpy_DictIter; diff --git a/src/objects/dict.c b/src/objects/dict.c index c20f59eb..9eff5c77 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -227,8 +227,15 @@ void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { } void pkpy_Dict__clear(pkpy_Dict *self) { - pkpy_Dict__dtor(self); - pkpy_Dict__ctor(self); + self->count = 0; + c11_vector__dtor(&self->_entries); + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + if (self->_hashtable > 16) { + free(self->_hashtable); + self->_htcap = 16; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + } + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); } static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { From 681b9d7dd05427db7f9115f22b73672732d53ff7 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:30:24 +0800 Subject: [PATCH 25/30] fix...and remove assert with side effect --- src/objects/dict.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 9eff5c77..be3044db 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -214,7 +214,6 @@ const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(pkpy_Var__eq__(vm, entry->key, key)); return &entry->val; } @@ -230,7 +229,7 @@ void pkpy_Dict__clear(pkpy_Dict *self) { self->count = 0; c11_vector__dtor(&self->_entries); c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); - if (self->_hashtable > 16) { + if (self->_htcap > 16) { free(self->_hashtable); self->_htcap = 16; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); From 6220ab029b8d1dd88861acc885feadbb2c6cdc1b Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:35:31 +0800 Subject: [PATCH 26/30] stop using unsigned for indecies --- src/objects/dict.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index be3044db..efd51a21 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -22,14 +22,12 @@ inline extern int pkpy_Dict__idx_size(const pkpy_Dict* self) { return 4; } -inline extern unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { +inline extern int pkpy_Dict__idx_null(const pkpy_Dict* self) { #if PK_DICT_COMPACT_MODE - // if(self->_htcap < 255) return 255; - // if(self->_htcap < 65535) return 65535; - // return 4294967295u; // 2^32 - 1 - return (1u << ((pkpy_Dict__idx_size(self) * 8) & 31)) - 1u; + if(self->_htcap < 255) return 255; + if(self->_htcap < 65535) return 65535; #endif - return 4294967295u; + return -1; } inline extern int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } @@ -57,29 +55,29 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { ._hashtable = ht_clone}; } -static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { +static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { #if PK_DICT_COMPACT_MODE - const unsigned int *p = (const unsigned int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + const int *p = (const int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); return (*p) & pkpy_Dict__idx_null(self); #else - return ((const unsigned int*)self->_hashtable)[h]; + return ((const int*)self->_hashtable)[h]; #endif } -static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { +static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { #if PK_DICT_COMPACT_MODE - unsigned int *p = (unsigned int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); *p = v | (*p & ~pkpy_Dict__idx_null(self)); #else - ((unsigned int*)self->_hashtable)[h] = v; + ((int*)self->_hashtable)[h] = v; #endif } static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { - const unsigned int null = pkpy_Dict__idx_null(self); - const unsigned int mask = self->_htcap - 1; + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -92,7 +90,7 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -122,7 +120,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { idx = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, @@ -145,7 +143,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } else { self->count += 1; h = pkpy_Dict__probe0(self, vm, key, hash); - unsigned idx = pkpy_Dict__htget(self, h); + idx = pkpy_Dict__htget(self, h); struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); entry->key = key; entry->val = val; @@ -157,7 +155,7 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -195,7 +193,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -210,7 +208,7 @@ const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); From 5e38f7debd8a89766f92d43c747dc9477c63f5a4 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:36:47 +0800 Subject: [PATCH 27/30] remove asserts with side effect --- src/objects/dict.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index efd51a21..82caff11 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -159,7 +159,6 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(pkpy_Var__eq__(vm, entry->key, key)); return true; } @@ -197,7 +196,6 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(pkpy_Var__eq__(vm, entry->key, key)); pkpy_Var__set_null(&entry->key); self->count -= 1; pkpy_Dict__refactor(self, vm); From 6649a5b9870f5ada229ac680e30e9c33ea58063e Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 13:13:24 +0800 Subject: [PATCH 28/30] never shrink --- src/objects/dict.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 82caff11..a17432f1 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -167,25 +167,29 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { if(deleted_slots <= 8 || deleted_slots < self->_entries.count * (1 - DICT_MAX_LOAD)) return false; // shrink - free(self->_hashtable); - while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) - self->_htcap /= 2; - self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + // free(self->_hashtable); + // while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) + // self->_htcap /= 2; + // self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); - c11_vector old_entries = self->_entries; - c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); - for(int i = 0; i < old_entries.count; i++) { - struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &old_entries, i); + int new_cnt = 0; + for (int i = 0; i < self->_entries.count; ++i) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + if (i > new_cnt) c11__setitem(struct pkpy_DictEntry, &self->_entries, new_cnt, *entry); + new_cnt += 1; + } + + self->_entries.count = new_cnt; + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int j = self->_entries.count; - c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); - pkpy_Dict__htset(self, h, j); + pkpy_Dict__htset(self, h, i); } - c11_vector__dtor(&old_entries); return true; } @@ -223,13 +227,7 @@ void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { void pkpy_Dict__clear(pkpy_Dict *self) { self->count = 0; - c11_vector__dtor(&self->_entries); - c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); - if (self->_htcap > 16) { - free(self->_hashtable); - self->_htcap = 16; - self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); - } + self->_entries.count = 0; memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); } From 33b110589dcead671e8060d530fc6fc5434eb82b Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 13:35:32 +0800 Subject: [PATCH 29/30] add test for dict larger than 65536 --- tests/07_dict.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/07_dict.py b/tests/07_dict.py index 6a0d3b49..8578a97d 100644 --- a/tests/07_dict.py +++ b/tests/07_dict.py @@ -159,6 +159,17 @@ try: except TypeError: pass +n = 2 ** 17 +a = {} +for i in range(n): + a[str(i)] = i + +for i in range(n): + y = a[str(i)] + +for i in range(n): + del a[str(i)] + a = {1: 2, 3: 4} a['a'] = a assert repr(a) == "{1: 2, 3: 4, 'a': {...}}" @@ -169,4 +180,3 @@ gc.collect() for k, v in a.items(): pass assert gc.collect() == 1 - From b2360315540d2585f2618f3e05fe535bae73fca3 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 14 Jun 2024 13:57:12 +0800 Subject: [PATCH 30/30] add benchmark --- benchmarks/dict_0.py | 20 ++++++++++++++++++++ benchmarks/dict_1.py | 27 +++++++++++++++++++++++++++ build_g.sh | 2 ++ 3 files changed, 49 insertions(+) create mode 100644 benchmarks/dict_0.py create mode 100644 benchmarks/dict_1.py diff --git a/benchmarks/dict_0.py b/benchmarks/dict_0.py new file mode 100644 index 00000000..9637fac5 --- /dev/null +++ b/benchmarks/dict_0.py @@ -0,0 +1,20 @@ +# test basic get/set +import random +random.seed(7) + +a = {str(i): i for i in range(100)} +a['existed'] = 0 +a['missed'] = 0 + +for i in range(1000000): + key = str(random.randint(-100, 100)) + if key in a: + a['existed'] += 1 + else: + a['missed'] += 1 + +existed = a['existed'] +missed = a['missed'] + +assert abs(existed - missed) < 10000 + diff --git a/benchmarks/dict_1.py b/benchmarks/dict_1.py new file mode 100644 index 00000000..6c5daa31 --- /dev/null +++ b/benchmarks/dict_1.py @@ -0,0 +1,27 @@ +# test deletion +rnd = 0 +keys = [] +while True: + keys.append(rnd) + rnd = ((rnd * 5) + 1) & 1023 + if rnd == 0: + break + +assert len(keys) == 1024 + +a = {k: k for k in keys} + +for i in range(10000): + if i % 2 == 0: + # del all keys + for k in keys: + del a[k] + assert len(a) == 0 + else: + # add keys back + for k in keys: + a[k] = k + assert len(a) == len(keys) + +assert len(a) == len(keys) +assert list(a.keys()) == keys # order matters diff --git a/build_g.sh b/build_g.sh index bffbb51f..5744fb07 100644 --- a/build_g.sh +++ b/build_g.sh @@ -1,3 +1,5 @@ +set -e + python prebuild.py SRC_C=$(find src/ -name "*.c")