diff --git a/benchmarks/dict_0.py b/benchmarks/dict_0.py new file mode 100644 index 00000000..9637fac5 --- /dev/null +++ b/benchmarks/dict_0.py @@ -0,0 +1,20 @@ +# test basic get/set +import random +random.seed(7) + +a = {str(i): i for i in range(100)} +a['existed'] = 0 +a['missed'] = 0 + +for i in range(1000000): + key = str(random.randint(-100, 100)) + if key in a: + a['existed'] += 1 + else: + a['missed'] += 1 + +existed = a['existed'] +missed = a['missed'] + +assert abs(existed - missed) < 10000 + diff --git a/benchmarks/dict_1.py b/benchmarks/dict_1.py new file mode 100644 index 00000000..6c5daa31 --- /dev/null +++ b/benchmarks/dict_1.py @@ -0,0 +1,27 @@ +# test deletion +rnd = 0 +keys = [] +while True: + keys.append(rnd) + rnd = ((rnd * 5) + 1) & 1023 + if rnd == 0: + break + +assert len(keys) == 1024 + +a = {k: k for k in keys} + +for i in range(10000): + if i % 2 == 0: + # del all keys + for k in keys: + del a[k] + assert len(a) == 0 + else: + # add keys back + for k in keys: + a[k] = k + assert len(a) == len(keys) + +assert len(a) == len(keys) +assert list(a.keys()) == keys # order matters diff --git a/build_g.sh b/build_g.sh index bffbb51f..5744fb07 100644 --- a/build_g.sh +++ b/build_g.sh @@ -1,3 +1,5 @@ +set -e + python prebuild.py SRC_C=$(find src/ -name "*.c") diff --git a/include/pocketpy/interpreter/iter.hpp b/include/pocketpy/interpreter/iter.hpp index 130c08f7..a1917679 100644 --- a/include/pocketpy/interpreter/iter.hpp +++ b/include/pocketpy/interpreter/iter.hpp @@ -74,9 +74,9 @@ struct Generator { struct DictItemsIter { PyVar ref; - int i; + pkpy_DictIter it; - DictItemsIter(PyVar ref) : ref(ref) { i = PK_OBJ_GET(Dict, ref)._head_idx; } + DictItemsIter(PyVar ref) : ref(ref) { it = PK_OBJ_GET(Dict, ref).iter(); } void _gc_mark(VM* vm) const { vm->obj_gc_mark(ref); } diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h new file mode 100644 index 00000000..0ad7c6a5 --- /dev/null +++ b/include/pocketpy/objects/dict.h @@ -0,0 +1,113 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pocketpy/objects/pyvar.h" +#include "pocketpy/common/vector.h" + +/** @brief `pkpy_Dict` is the Dict type in Python */ +typedef struct { + int count; /** number of elements in the dictionary */ + c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ + int _htcap; /** capacity of the hashtable, always a power of 2 */ + void* _hashtable; /** contains indecies, can be `u8`, `u16` or `u32` according to size*/ +} pkpy_Dict; + +/** @brief `pkpy_DictIter` is used to iterate over a `pkpy_Dict` */ +typedef struct { + const pkpy_Dict* _dict; + int _index; +} pkpy_DictIter; + +/** + * @brief `pkpy_Dict` constructor + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__ctor(pkpy_Dict* self); + +/** + * @brief `pkpy_Dict` destructor + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__dtor(pkpy_Dict* self); + +/** + * @brief Copy a `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @return a new `pkpy_Dict` instance, must be destructed by the caller + */ +pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self); + +/** + * @brief Set a key-value pair into the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to set + * @param val value to set + * @return `true` if the key is newly added, `false` if the key already exists + */ +bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val); + +/** + * @brief Check if a key exists in the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to check + * @return `true` if the key exists, `false` otherwise + */ +bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Remove a key from the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to remove + * @return `true` if the key was found and removed, `false` if the key doesn't exist + */ +bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Try to get a value from the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to get + * @return the value associated with the key, `NULL` if the key doesn't exist + */ +const pkpy_Var* pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Update the `pkpy_Dict` with another one + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param other `pkpy_Dict` instance to update with + */ +void pkpy_Dict__update(pkpy_Dict* self, void *vm, const pkpy_Dict* other); + +/** + * @brief Clear the `pkpy_Dict` + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__clear(pkpy_Dict* self); + +/** + * @brief Iterate over the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @return an iterator over the `pkpy_Dict` + */ +pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict* self); + +/** + * @brief Iterate over the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param key key will be filled with the current key, can be `NULL` if not needed + * @param value value will be filled with the current value, can be `NULL` if not needed + * @return `true` if the iteration is still valid, `false` otherwise + */ +bool pkpy_DictIter__next(pkpy_DictIter* self, pkpy_Var* key, pkpy_Var* value); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 853f0d25..2d521073 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -2,63 +2,95 @@ #include "pocketpy/objects/base.hpp" #include "pocketpy/objects/tuplelist.hpp" +#include "pocketpy/objects/dict.h" namespace pkpy { -struct Dict { - struct Item { - PyVar first; - PyVar second; - int prev; - int next; - }; +struct Dict : private pkpy_Dict { + Dict() { + pkpy_Dict__ctor(this); + } - constexpr static int __Capacity = 8; - constexpr static float __LoadFactor = 0.67f; + Dict(Dict&& other) { + std::memcpy(this, &other, sizeof(Dict)); + pkpy_Dict__ctor(&other); + } - int _capacity; - int _mask; - int _size; - int _critical_size; - int _head_idx; // for order preserving - int _tail_idx; // for order preserving - Item* _items; - - Dict(); - Dict(Dict&& other); - Dict(const Dict& other); + Dict(const Dict& other) { + // OPTIMIZEME: reduce copy + auto clone = pkpy_Dict__copy(&other); + std::memcpy(this, &clone, sizeof(Dict)); + } + Dict& operator= (const Dict&) = delete; Dict& operator= (Dict&&) = delete; - int size() const { return _size; } + int size() const { return count; } - void _probe_0(VM* vm, PyVar key, bool& ok, int& i) const; - void _probe_1(VM* vm, PyVar key, bool& ok, int& i) const; + void set(VM* vm, PyVar key, PyVar val) { + pkpy_Dict__set(this, vm, *(pkpy_Var*)(&key), *(pkpy_Var*)(&val)); + } - void set(VM* vm, PyVar key, PyVar val); - void _rehash(VM* vm); + PyVar try_get(VM* vm, PyVar key) const { + auto res = pkpy_Dict__try_get(this, vm, *(pkpy_Var*)(&key)); + if (!res) return nullptr; + return *(const PyVar*)(res); + } - PyVar try_get(VM* vm, PyVar key) const; + bool contains(VM* vm, PyVar key) const { + return pkpy_Dict__contains(this, vm, *(pkpy_Var*)(&key)); + } - bool contains(VM* vm, PyVar key) const; - bool del(VM* vm, PyVar key); - void update(VM* vm, const Dict& other); + bool del(VM* vm, PyVar key) { + return pkpy_Dict__del(this, vm, *(pkpy_Var*)(&key)); + } + + void update(VM* vm, const Dict& other) { + pkpy_Dict__update(this, vm, &other); + } template void apply(__Func f) const { - int i = _head_idx; - while(i != -1) { - f(_items[i].first, _items[i].second); - i = _items[i].next; + pkpy_DictIter it = iter(); + PyVar key, val; + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { + f(key, val); } } - Tuple keys() const; - Tuple values() const; - void clear(); - ~Dict(); + Tuple keys() const { + Tuple res(count); + pkpy_DictIter it = iter(); + PyVar key, val; + int i = 0; + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { + res[i++] = key; + } + return res; + } - void __alloc_items(); + Tuple values() const { + Tuple res(count); + pkpy_DictIter it = iter(); + PyVar key, val; + int i = 0; + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { + res[i++] = val; + } + return res; + } + + pkpy_DictIter iter() const { + return pkpy_Dict__iter(this); + } + + void clear() { + pkpy_Dict__clear(this); + } + + ~Dict() { + pkpy_Dict__dtor(this); + } void _gc_mark(VM*) const; }; diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h new file mode 100644 index 00000000..edd7495f --- /dev/null +++ b/include/pocketpy/objects/pyvar.h @@ -0,0 +1,51 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * @brief A python value in pocketpy. + */ +typedef struct { + int type; + int _0; + int64_t _1; +} pkpy_Var; + +/** + * @brief Check if the pkpy_Var is null. + * @param self The variable to check. + * @return True if the variable is null, false otherwise. + */ +#define pkpy_Var__is_null(self) ((self)->type == 0) + +/** + * @brief Set the variable to null. + * @param self The variable to set. + */ +#define pkpy_Var__set_null(self) do { (self)->type = 0; } while(0) + +/** + * @brief Check if two pkpy_Vars are equal, respects to __eq__ method. + * @param vm The virtual machine. + * @param a The first pkpy_Var. + * @param b The second pkpy_Var. + * @return True if the pkpy_Vars are equal, false otherwise. + */ +bool pkpy_Var__eq__(void *vm, pkpy_Var a, pkpy_Var b); + +/** + * @brief Get the hash of the pkpy_Var, respects to __hash__ method. + * @param vm The virtual machine. + * @param a The pkpy_Var to hash. + * @return The hash of the pkpy_Var. + */ +int64_t pkpy_Var__hash__(void *vm, pkpy_Var a); + +#ifdef __cplusplus +} +#endif diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 18c484d0..d0fae96a 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -117,12 +117,13 @@ void DictItemsIter::_register(VM* vm, PyObject* mod, PyObject* type) { }); vm->bind__next__(type->as(), [](VM* vm, PyVar _0) -> unsigned { DictItemsIter& self = _CAST(DictItemsIter&, _0); - Dict& d = PK_OBJ_GET(Dict, self.ref); - if(self.i == -1) return 0; - vm->s_data.push(d._items[self.i].first); - vm->s_data.push(d._items[self.i].second); - self.i = d._items[self.i].next; - return 2; + PyVar key, val; + if (pkpy_DictIter__next(&self.it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { + vm->s_data.push(key); + vm->s_data.push(val); + return 2; + } + return 0; }); } diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 10ad986c..40ca619b 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -1628,37 +1628,6 @@ BIND_BINARY_SPECIAL(__xor__) #undef BIND_BINARY_SPECIAL -void Dict::_probe_0(VM* vm, PyVar key, bool& ok, int& i) const { - ok = false; - i64 hash = vm->py_hash(key); - i = hash & _mask; - for(int j = 0; j < _capacity; j++) { - if(_items[i].first != nullptr) { - if(vm->py_eq(_items[i].first, key)) { - ok = true; - break; - } - } else { - if(_items[i].second == nullptr) break; - } - // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166 - i = ((5 * i) + 1) & _mask; - } -} - -void Dict::_probe_1(VM* vm, PyVar key, bool& ok, int& i) const { - ok = false; - i = vm->py_hash(key) & _mask; - while(_items[i].first != nullptr) { - if(vm->py_eq(_items[i].first, key)) { - ok = true; - break; - } - // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166 - i = ((5 * i) + 1) & _mask; - } -} - #if PK_ENABLE_PROFILER void NextBreakpoint::_step(VM* vm) { int curr_callstack_size = vm->callstack.size(); diff --git a/src/objects/dict.c b/src/objects/dict.c new file mode 100644 index 00000000..a17432f1 --- /dev/null +++ b/src/objects/dict.c @@ -0,0 +1,260 @@ +#include "pocketpy/objects/dict.h" +#include "pocketpy/common/utils.h" +#include +#include +#include + +#define DICT_MAX_LOAD 0.75 +#define DICT_HASH_NEXT(h) ((h) * 5 + 1) +#define DICT_HASH_TRANS(h) ((int)((h) & 0xffffffff)) // used for tansform value from __hash__ +#define PK_DICT_COMPACT_MODE 1 + +struct pkpy_DictEntry { + pkpy_Var key; + pkpy_Var val; +}; + +inline extern int pkpy_Dict__idx_size(const pkpy_Dict* self) { +#if PK_DICT_COMPACT_MODE + if(self->_htcap < 255) return 1; + if(self->_htcap < 65535) return 2; +#endif + return 4; +} + +inline extern int pkpy_Dict__idx_null(const pkpy_Dict* self) { +#if PK_DICT_COMPACT_MODE + if(self->_htcap < 255) return 255; + if(self->_htcap < 65535) return 65535; +#endif + return -1; +} + +inline extern int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } + +void pkpy_Dict__ctor(pkpy_Dict* self) { + self->count = 0; + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + self->_htcap = 16; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); +} + +void pkpy_Dict__dtor(pkpy_Dict* self) { + c11_vector__dtor(&self->_entries); + free(self->_hashtable); +} + +pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { + int ht_size = pkpy_Dict__ht_byte_size(self); + void* ht_clone = malloc(ht_size); + memcpy(ht_clone, self->_hashtable, ht_size); + return (pkpy_Dict){.count = self->count, + ._entries = c11_vector__copy(&self->_entries), + ._htcap = self->_htcap, + ._hashtable = ht_clone}; +} + +static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { +#if PK_DICT_COMPACT_MODE + const int *p = (const int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + return (*p) & pkpy_Dict__idx_null(self); +#else + return ((const int*)self->_hashtable)[h]; +#endif +} + +static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { +#if PK_DICT_COMPACT_MODE + int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + *p = v | (*p & ~pkpy_Dict__idx_null(self)); +#else + ((int*)self->_hashtable)[h] = v; +#endif +} + +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; + for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { + int idx = pkpy_Dict__htget(self, h); + if(idx == null) return h; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(pkpy_Var__is_null(&entry->key)) return h; + } + PK_UNREACHABLE(); +} + +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; + for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { + int idx = pkpy_Dict__htget(self, h); + if(idx == null) return h; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(pkpy_Var__is_null(&entry->key)) continue; + if(pkpy_Var__eq__(vm, entry->key, key)) return h; + } + PK_UNREACHABLE(); +} + +static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { + free(self->_hashtable); + self->_htcap *= 2; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); + + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); + int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); + pkpy_Dict__htset(self, h, i); + } +} + +bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); + int h = pkpy_Dict__probe1(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) { + idx = self->_entries.count; + c11_vector__push(struct pkpy_DictEntry, + &self->_entries, + ((struct pkpy_DictEntry){ + .key = key, + .val = val, + })); + h = pkpy_Dict__probe0(self, vm, key, hash); + pkpy_Dict__htset(self, h, idx); + self->count += 1; + if(self->count >= self->_htcap * DICT_MAX_LOAD) pkpy_Dict__extendht(self, vm); + return true; + } + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + + if(pkpy_Var__eq__(vm, entry->key, key)) { + entry->val = val; + } else { + self->count += 1; + h = pkpy_Dict__probe0(self, vm, key, hash); + idx = pkpy_Dict__htget(self, h); + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + entry->key = key; + entry->val = val; + } + return false; +} + +bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); + int h = pkpy_Dict__probe1(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + return true; +} + +static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { + int deleted_slots = self->_entries.count - self->count; + if(deleted_slots <= 8 || deleted_slots < self->_entries.count * (1 - DICT_MAX_LOAD)) return false; + + // shrink + // free(self->_hashtable); + // while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) + // self->_htcap /= 2; + // self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); + + int new_cnt = 0; + for (int i = 0; i < self->_entries.count; ++i) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + if (i > new_cnt) c11__setitem(struct pkpy_DictEntry, &self->_entries, new_cnt, *entry); + new_cnt += 1; + } + + self->_entries.count = new_cnt; + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); + int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); + pkpy_Dict__htset(self, h, i); + } + return true; +} + +bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); + int h = pkpy_Dict__probe1(self, vm, key, hash); + int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + if(idx == null) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + pkpy_Var__set_null(&entry->key); + self->count -= 1; + pkpy_Dict__refactor(self, vm); + return true; +} + +const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); + int h = pkpy_Dict__probe1(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) return NULL; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + return &entry->val; +} + +void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { + for(int i = 0; i < other->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &other->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + pkpy_Dict__set(self, vm, entry->key, entry->val); + } +} + +void pkpy_Dict__clear(pkpy_Dict *self) { + self->count = 0; + self->_entries.count = 0; + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); +} + +static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { + while (idx < self->_entries.count) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(!pkpy_Var__is_null(&entry->key)) break; + idx++; + } + return idx; +} + +pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { + return (pkpy_DictIter){ + ._dict = self, + ._index = pkpy_Dict__next_entry_idx(self, 0), + }; +} + +bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { + if(self->_index >= self->_dict->_entries.count) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); + if(pkpy_Var__is_null(&entry->key)) return false; + if (key) *key = entry->key; + if (val) *val = entry->val; + + self->_index = pkpy_Dict__next_entry_idx(self->_dict, self->_index + 1); + return true; +} diff --git a/src/objects/dict.cpp b/src/objects/dict.cpp deleted file mode 100644 index 184d7c1c..00000000 --- a/src/objects/dict.cpp +++ /dev/null @@ -1,180 +0,0 @@ -#include "pocketpy/objects/dict.hpp" - -namespace pkpy { - -Dict::Dict() : - _capacity(__Capacity), _mask(__Capacity - 1), _size(0), _critical_size(__Capacity * __LoadFactor + 0.5f), - _head_idx(-1), _tail_idx(-1) { - __alloc_items(); -} - -void Dict::__alloc_items() { - _items = (Item*)std::malloc(_capacity * sizeof(Item)); - for(int i = 0; i < _capacity; i++) { - _items[i].first = nullptr; - _items[i].second = nullptr; - _items[i].prev = -1; - _items[i].next = -1; - } -} - -Dict::Dict(Dict&& other) { - _capacity = other._capacity; - _mask = other._mask; - _size = other._size; - _critical_size = other._critical_size; - _head_idx = other._head_idx; - _tail_idx = other._tail_idx; - _items = other._items; - other._items = nullptr; -} - -Dict::Dict(const Dict& other) { - _capacity = other._capacity; - _mask = other._mask; - _size = other._size; - _critical_size = other._critical_size; - _head_idx = other._head_idx; - _tail_idx = other._tail_idx; - // copy items - _items = (Item*)std::malloc(_capacity * sizeof(Item)); - std::memcpy(_items, other._items, _capacity * sizeof(Item)); -} - -void Dict::set(VM* vm, PyVar key, PyVar val) { - // do possible rehash - if(_size + 1 > _critical_size) _rehash(vm); - bool ok; - int i; - _probe_1(vm, key, ok, i); - if(!ok) { - _size++; - _items[i].first = key; - - // append to tail - if(_size == 0 + 1) { - _head_idx = i; - _tail_idx = i; - } else { - _items[i].prev = _tail_idx; - _items[_tail_idx].next = i; - _tail_idx = i; - } - } - _items[i].second = val; -} - -void Dict::_rehash(VM* vm) { - Item* old_items = _items; - int old_head_idx = _head_idx; - - _capacity *= 4; - _mask = _capacity - 1; - _size = 0; - _critical_size = _capacity * __LoadFactor + 0.5f; - _head_idx = -1; - _tail_idx = -1; - - __alloc_items(); - - // copy old items to new dict - int i = old_head_idx; - while(i != -1) { - set(vm, old_items[i].first, old_items[i].second); - i = old_items[i].next; - } - - std::free(old_items); -} - -PyVar Dict::try_get(VM* vm, PyVar key) const { - bool ok; - int i; - _probe_0(vm, key, ok, i); - if(!ok) return nullptr; - return _items[i].second; -} - -bool Dict::contains(VM* vm, PyVar key) const { - bool ok; - int i; - _probe_0(vm, key, ok, i); - return ok; -} - -bool Dict::del(VM* vm, PyVar key) { - bool ok; - int i; - _probe_0(vm, key, ok, i); - if(!ok) return false; - _items[i].first = nullptr; - // _items[i].second = PY_DELETED_SLOT; // do not change .second if it is not NULL, it means the slot is occupied by - // a deleted item - _size--; - - if(_size == 0) { - _head_idx = -1; - _tail_idx = -1; - } else { - if(_head_idx == i) { - _head_idx = _items[i].next; - _items[_head_idx].prev = -1; - } else if(_tail_idx == i) { - _tail_idx = _items[i].prev; - _items[_tail_idx].next = -1; - } else { - _items[_items[i].prev].next = _items[i].next; - _items[_items[i].next].prev = _items[i].prev; - } - } - _items[i].prev = -1; - _items[i].next = -1; - return true; -} - -void Dict::update(VM* vm, const Dict& other) { - other.apply([&](PyVar k, PyVar v) { - set(vm, k, v); - }); -} - -Tuple Dict::keys() const { - Tuple t(_size); - int i = _head_idx; - int j = 0; - while(i != -1) { - t[j++] = _items[i].first; - i = _items[i].next; - } - assert(j == _size); - return t; -} - -Tuple Dict::values() const { - Tuple t(_size); - int i = _head_idx; - int j = 0; - while(i != -1) { - t[j++] = _items[i].second; - i = _items[i].next; - } - assert(j == _size); - return t; -} - -void Dict::clear() { - _size = 0; - _head_idx = -1; - _tail_idx = -1; - for(int i = 0; i < _capacity; i++) { - _items[i].first = nullptr; - _items[i].second = nullptr; - _items[i].prev = -1; - _items[i].next = -1; - } -} - -Dict::~Dict() { - if(_items) std::free(_items); -} -} // namespace pkpy diff --git a/src/objects/pyvar.cpp b/src/objects/pyvar.cpp new file mode 100644 index 00000000..7b9a1851 --- /dev/null +++ b/src/objects/pyvar.cpp @@ -0,0 +1,17 @@ +#include "pocketpy/objects/base.hpp" +#include "pocketpy/objects/pyvar.h" +#include "pocketpy/interpreter/vm.hpp" + +extern "C" { + +bool pkpy_Var__eq__(void* vm_, pkpy_Var a, pkpy_Var b) { + auto vm = (pkpy::VM*)(vm_); + return vm->py_eq(*(pkpy::PyVar*)(&a), *(pkpy::PyVar*)(&b)); +} + +int64_t pkpy_Var__hash__(void* vm_, pkpy_Var a) { + auto vm = (pkpy::VM*)(vm_); + return vm->py_hash(*(pkpy::PyVar*)(&a)); +} + +} diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 23d3596a..ba1456eb 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1493,12 +1493,12 @@ void __init_builtins(VM* _vm) { if(!vm->isinstance(_1, vm->tp_dict)) return vm->NotImplemented; Dict& other = _CAST(Dict&, _1); if(self.size() != other.size()) return vm->False; - for(int i = 0; i < self._capacity; i++) { - auto item = self._items[i]; - if(item.first == nullptr) continue; - PyVar value = other.try_get(vm, item.first); - if(value == nullptr) return vm->False; - if(!vm->py_eq(item.second, value)) return vm->False; + pkpy_DictIter it = self.iter(); + PyVar key, val; + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { + PyVar other_val = other.try_get(vm, key); + if(other_val == nullptr) return vm->False; + if(!vm->py_eq(val, other_val)) return vm->False; } return vm->True; }); diff --git a/tests/07_dict.py b/tests/07_dict.py index 6a0d3b49..8578a97d 100644 --- a/tests/07_dict.py +++ b/tests/07_dict.py @@ -159,6 +159,17 @@ try: except TypeError: pass +n = 2 ** 17 +a = {} +for i in range(n): + a[str(i)] = i + +for i in range(n): + y = a[str(i)] + +for i in range(n): + del a[str(i)] + a = {1: 2, 3: 4} a['a'] = a assert repr(a) == "{1: 2, 3: 4, 'a': {...}}" @@ -169,4 +180,3 @@ gc.collect() for k, v in a.items(): pass assert gc.collect() == 1 -