From be2aae493abe37273bb9745977a9ee4482ccce38 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 27 Jun 2025 14:21:53 +0800 Subject: [PATCH 1/2] backup --- include/pocketpy/interpreter/types.h | 12 +- src/public/py_dict.c | 243 +++++++++++++++------------ 2 files changed, 140 insertions(+), 115 deletions(-) diff --git a/include/pocketpy/interpreter/types.h b/include/pocketpy/interpreter/types.h index 5c4dd460..4dcc1bf4 100644 --- a/include/pocketpy/interpreter/types.h +++ b/include/pocketpy/interpreter/types.h @@ -3,26 +3,22 @@ #include "pocketpy/common/vector.h" #include "pocketpy/objects/base.h" -#define PK_DICT_MAX_COLLISION 4 - typedef struct { uint64_t hash; py_TValue key; py_TValue val; } DictEntry; -typedef struct { - int _[PK_DICT_MAX_COLLISION]; -} DictIndex; - typedef struct { int length; uint32_t capacity; - DictIndex* indices; + void* indices; + bool index_is_short; + uint32_t null_index_value; c11_vector /*T=DictEntry*/ entries; } Dict; typedef c11_vector List; void c11_chunked_array2d__mark(void* ud, c11_vector* p_stack); -void function__gc_mark(void* ud, c11_vector* p_stack); \ No newline at end of file +void function__gc_mark(void* ud, c11_vector* p_stack); diff --git a/src/public/py_dict.c b/src/public/py_dict.c index 347b1f26..b7796d80 100644 --- a/src/public/py_dict.c +++ b/src/public/py_dict.c @@ -51,8 +51,6 @@ static uint32_t Dict__next_cap(uint32_t cap) { } } - - typedef struct { DictEntry* curr; DictEntry* end; @@ -61,9 +59,24 @@ typedef struct { static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) { self->length = 0; - self->capacity = capacity; - self->indices = PK_MALLOC(self->capacity * sizeof(DictIndex)); - memset(self->indices, -1, self->capacity * sizeof(DictIndex)); + self->capacity = capacity; // the 1st prime + + size_t indices_size; + if(self->capacity < UINT16_MAX - 1) { + self->index_is_short = true; + indices_size = self->capacity * sizeof(uint16_t); + self->null_index_value = UINT16_MAX; + self->deleted_index_value = UINT16_MAX - 1; + } else { + self->index_is_short = false; + indices_size = self->capacity * sizeof(uint32_t); + self->null_index_value = UINT32_MAX; + self->deleted_index_value = UINT32_MAX - 1; + } + + self->indices = PK_MALLOC(indices_size); + memset(self->indices, -1, indices_size); + c11_vector__ctor(&self->entries, sizeof(DictEntry)); c11_vector__reserve(&self->entries, entries_capacity); } @@ -75,65 +88,105 @@ static void Dict__dtor(Dict* self) { c11_vector__dtor(&self->entries); } -static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) { - py_i64 hash; - if(!py_hash(key, &hash)) return false; - int idx = (uint64_t)hash % self->capacity; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) continue; +static uint32_t Dict__get_index(Dict* self, uint32_t index) { + if(self->index_is_short) { + uint16_t* indices = self->indices; + return indices[index]; + } else { + uint32_t* indices = self->indices; + return indices[index]; + } +} + +static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) { + if(self->index_is_short) { + uint16_t* indices = self->indices; + uint16_t tmp = indices[x]; + indices[x] = indices[y]; + indices[y] = tmp; + } else { + uint32_t* indices = self->indices; + uint32_t tmp = indices[x]; + indices[x] = indices[y]; + indices[y] = tmp; + } +} + +static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) { + if(self->index_is_short) { + uint16_t* indices = self->indices; + indices[index] = (uint16_t)value; + } else { + uint32_t* indices = self->indices; + indices[index] = value; + } +} + +static bool + Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) { + if(!py_hash(key, p_hash)) return false; + py_i64 hash = *p_hash; + uint32_t idx = (uint64_t)hash % self->capacity; + const uint32_t max_idx = self->capacity - 1; + while(true) { + uint32_t idx2 = Dict__get_index(self, idx); + if(idx2 == self->null_index_value) break; DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); if(entry->hash == (uint64_t)hash) { int res = py_equal(&entry->key, key); if(res == 1) { - *out = entry; + *p_idx = idx; + *p_entry = entry; return true; } if(res == -1) return false; // error } + // try next index + idx = idx < max_idx ? idx + 1 : 0; } - *out = NULL; + // not found + *p_idx = idx; + *p_entry = NULL; return true; } +static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) { + py_i64 hash; + uint32_t idx; + return Dict__probe(self, key, &hash, &idx, out); +} + static void Dict__clear(Dict* self) { - memset(self->indices, -1, self->capacity * sizeof(DictIndex)); + size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t) + : self->capacity * sizeof(uint32_t); + memset(self->indices, -1, indices_size); c11_vector__clear(&self->entries); self->length = 0; } static void Dict__rehash_2x(Dict* self) { Dict old_dict = *self; - uint32_t new_capacity = self->capacity; - -__RETRY: - // use next capacity - new_capacity = Dict__next_cap(new_capacity); + uint32_t new_capacity = Dict__next_cap(new_capacity); // create a new dict with new capacity Dict__ctor(self, new_capacity, old_dict.entries.capacity); // move entries from old dict to new dict + const uint32_t max_idx = new_capacity - 1; for(int i = 0; i < old_dict.entries.length; i++) { DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i); if(py_isnil(&old_entry->key)) continue; - int idx = old_entry->hash % new_capacity; - bool success = false; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) { - // insert new entry (empty slot) + uint32_t idx = old_entry->hash % new_capacity; + while(true) { + uint32_t idx2 = Dict__get_index(self, idx); + if(idx2 == self->null_index_value) { c11_vector__push(DictEntry, &self->entries, *old_entry); - self->indices[idx]._[i] = self->entries.length - 1; + Dict__set_index(self, idx, self->entries.length - 1); self->length++; - success = true; break; } - } - if(!success) { - Dict__dtor(self); - goto __RETRY; + // try next index + idx = idx < max_idx ? idx + 1 : 0; } } - // done Dict__dtor(&old_dict); } @@ -153,93 +206,69 @@ static void Dict__compact_entries(Dict* self) { } self->entries.length = n; // update indices - for(uint32_t i = 0; i < self->capacity; i++) { - for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) { - int idx = self->indices[i]._[j]; - if(idx == -1) continue; - self->indices[i]._[j] = mappings[idx]; - } + for(int idx = 0; idx < self->capacity; idx++) { + uint32_t idx2 = Dict__get_index(self, idx); + if(idx2 == self->null_index_value) continue; + Dict__set_index(self, idx, mappings[idx2]); } PK_FREE(mappings); } static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) { py_i64 hash; - if(!py_hash(key, &hash)) return false; - int idx = (uint64_t)hash % self->capacity; - int bad_hash_count = 0; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) { - // insert new entry - DictEntry* new_entry = c11_vector__emplace(&self->entries); - new_entry->hash = (uint64_t)hash; - new_entry->key = *key; - new_entry->val = *val; - self->indices[idx]._[i] = self->entries.length - 1; - self->length++; - return true; - } + uint32_t idx; + DictEntry* entry; + if(!Dict__probe(self, key, &hash, &idx, &entry)) return false; + if(entry) { // update existing entry - DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); - // check if they have the same hash - if(entry->hash == (uint64_t)hash) { - // check if they are equal - int res = py_equal(&entry->key, key); - if(res == 1) { - entry->val = *val; - return true; - } - if(res == -1) return false; // error - // res == 0 - bad_hash_count++; - } + entry->val = *val; + return true; } - // no empty slot found - if(bad_hash_count == PK_DICT_MAX_COLLISION) { - // all `PK_DICT_MAX_COLLISION` slots have the same hash but different keys - // we are unable to solve this collision via rehashing - return RuntimeError("dict: %d/%d/%d: maximum collision reached (hash=%i)", - self->entries.length, - self->entries.capacity, - self->capacity, - hash); - } - - if(self->capacity >= (uint32_t)self->entries.length * 10) { - return RuntimeError("dict: %d/%d/%d: minimum load factor reached", - self->entries.length, - self->entries.capacity, - self->capacity); - } - Dict__rehash_2x(self); - return Dict__set(self, key, val); + // insert new entry + DictEntry* new_entry = c11_vector__emplace(&self->entries); + new_entry->hash = (uint64_t)hash; + new_entry->key = *key; + new_entry->val = *val; + Dict__set_index(self, idx, self->entries.length - 1); + self->length++; + // check if we need to rehash + float load_factor = (float)self->length / self->capacity; + if(load_factor > 4 / 7.0f) Dict__rehash_2x(self); + return true; } /// Delete an entry from the dict. /// -1: error, 0: not found, 1: found and deleted static int Dict__pop(Dict* self, py_Ref key) { py_i64 hash; - if(!py_hash(key, &hash)) return -1; - int idx = (uint64_t)hash % self->capacity; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) continue; - DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); - if(entry->hash == (uint64_t)hash) { - int res = py_equal(&entry->key, key); - if(res == 1) { - *py_retval() = entry->val; - py_newnil(&entry->key); - self->indices[idx]._[i] = -1; - self->length--; - if(self->length < self->entries.length / 2) Dict__compact_entries(self); - return 1; - } - if(res == -1) return -1; // error - } + uint32_t idx; + DictEntry* entry; + if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1; + if(!entry) return 0; // not found + + // found the entry, delete and return it + py_assign(py_retval(), &entry->val); + Dict__set_index(self, idx, self->null_index_value); + py_newnil(&entry->key); + py_newnil(&entry->val); + self->length--; + // tidy indices + uint32_t pre_z = idx; + const uint32_t max_idx = self->capacity - 1; + uint32_t z = idx < max_idx ? idx + 1 : 0; + while(true) { + uint32_t idx2 = Dict__get_index(self, z); + if(idx2 == self->null_index_value) break; + uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash; + if(h != hash) break; + Dict__swap_index(self, pre_z, z); + pre_z = z; + z = z < max_idx ? z + 1 : 0; } - return 0; + // compact entries if necessary + if(self->entries.length > 16 && self->length < self->entries.length / 2) + Dict__compact_entries(self); + return 1; } static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) { @@ -262,13 +291,13 @@ static bool dict__new__(int argc, py_Ref argv) { py_Type cls = py_totype(argv); int slots = cls == tp_dict ? 0 : -1; Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict)); - Dict__ctor(ud, 7, 8); + Dict__ctor(ud, 7, 4); return true; } void py_newdict(py_OutRef out) { Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict)); - Dict__ctor(ud, 7, 8); + Dict__ctor(ud, 7, 4); } static bool dict__init__(int argc, py_Ref argv) { From caf7505dc2c0a79880585448402615766fdb2314 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sun, 29 Jun 2025 21:44:57 +0800 Subject: [PATCH 2/2] improve `dict` --- include/pocketpy/interpreter/types.h | 4 +- include/pocketpy/xmacros/fixedhash.h | 1 + src/objects/namedict.c | 32 +++--- src/public/py_dict.c | 155 ++++++++++++++++++--------- tests/08_dict.py | 59 +++++----- tests/99_extras.py | 5 - 6 files changed, 161 insertions(+), 95 deletions(-) diff --git a/include/pocketpy/interpreter/types.h b/include/pocketpy/interpreter/types.h index 4dcc1bf4..56535e7a 100644 --- a/include/pocketpy/interpreter/types.h +++ b/include/pocketpy/interpreter/types.h @@ -12,9 +12,9 @@ typedef struct { typedef struct { int length; uint32_t capacity; - void* indices; - bool index_is_short; uint32_t null_index_value; + bool index_is_short; + void* indices; c11_vector /*T=DictEntry*/ entries; } Dict; diff --git a/include/pocketpy/xmacros/fixedhash.h b/include/pocketpy/xmacros/fixedhash.h index 7713e495..866d0a97 100644 --- a/include/pocketpy/xmacros/fixedhash.h +++ b/include/pocketpy/xmacros/fixedhash.h @@ -124,3 +124,4 @@ bool METHOD(contains)(NAME* self, K key) { #undef less #undef partial_less #undef equal +#undef hash diff --git a/src/objects/namedict.c b/src/objects/namedict.c index b95c4448..0ff04fdc 100644 --- a/src/objects/namedict.c +++ b/src/objects/namedict.c @@ -8,7 +8,7 @@ #define HASH_PROBE_1(__k, ok, i) \ ok = false; \ - i = (uintptr_t)(__k) & self->mask; \ + i = (uintptr_t)(__k)&self->mask; \ while(self->items[i].key != NULL) { \ if(self->items[i].key == (__k)) { \ ok = true; \ @@ -101,18 +101,24 @@ bool NameDict__del(NameDict* self, py_Name key) { self->items[i].key = NULL; self->items[i].value = *py_NIL(); self->length--; - // tidy - uintptr_t pre_z = i; - uintptr_t z = (i + 1) & self->mask; - while(self->items[z].key != NULL) { - uintptr_t h = (uintptr_t)self->items[z].key & self->mask; - if(h != i) break; - // std::swap(_items[pre_z], _items[z]); - NameDict_KV tmp = self->items[pre_z]; - self->items[pre_z] = self->items[z]; - self->items[z] = tmp; - pre_z = z; - z = (z + 1) & self->mask; + /* tidy */ + uint32_t posToRemove = i; + uint32_t posToShift = posToRemove; + while(true) { + posToShift = (posToShift + 1) & self->mask; + if(self->items[posToShift].key == NULL) break; + uintptr_t hash_z = (uintptr_t)self->items[posToShift].key; + uintptr_t insertPos = hash_z & self->mask; + bool cond1 = insertPos <= posToRemove; + bool cond2 = posToRemove <= posToShift; + if((cond1 && cond2) || + // chain wrapped around capacity + (posToShift < insertPos && (cond1 || cond2))) { + NameDict_KV tmp = self->items[posToRemove]; + self->items[posToRemove] = self->items[posToShift]; + self->items[posToShift] = tmp; + posToRemove = posToShift; + } } return true; } diff --git a/src/public/py_dict.c b/src/public/py_dict.c index b7796d80..133ec0c8 100644 --- a/src/public/py_dict.c +++ b/src/public/py_dict.c @@ -5,6 +5,16 @@ #include "pocketpy/interpreter/types.h" #include "pocketpy/interpreter/vm.h" +typedef struct { + Dict* dict; // weakref for slot 0 + Dict dict_backup; + DictEntry* curr; + DictEntry* end; + int mode; // 0: keys, 1: values, 2: items +} DictIterator; + +#define Dict__step(x) ((x) < mask ? (x) + 1 : 0) + static uint32_t Dict__next_cap(uint32_t cap) { switch(cap) { case 7: return 17; @@ -51,27 +61,31 @@ static uint32_t Dict__next_cap(uint32_t cap) { } } -typedef struct { - DictEntry* curr; - DictEntry* end; - int mode; // 0: keys, 1: values, 2: items -} DictIterator; +static uint64_t Dict__hash(uint64_t key) { + // https://gist.github.com/badboy/6267743 + key = (~key) + (key << 21); // key = (key << 21) - key - 1 + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8); // key * 265 + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4); // key * 21 + key = key ^ (key >> 28); + key = key + (key << 31); + return key; +} static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) { self->length = 0; - self->capacity = capacity; // the 1st prime + self->capacity = capacity; size_t indices_size; - if(self->capacity < UINT16_MAX - 1) { + if(self->capacity < UINT16_MAX) { self->index_is_short = true; indices_size = self->capacity * sizeof(uint16_t); self->null_index_value = UINT16_MAX; - self->deleted_index_value = UINT16_MAX - 1; } else { self->index_is_short = false; indices_size = self->capacity * sizeof(uint32_t); self->null_index_value = UINT32_MAX; - self->deleted_index_value = UINT32_MAX - 1; } self->indices = PK_MALLOC(indices_size); @@ -98,17 +112,17 @@ static uint32_t Dict__get_index(Dict* self, uint32_t index) { } } -static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) { +static void Dict__swap_null_index(Dict* self, uint32_t pre_z, uint32_t z) { if(self->index_is_short) { uint16_t* indices = self->indices; - uint16_t tmp = indices[x]; - indices[x] = indices[y]; - indices[y] = tmp; + assert(indices[pre_z] == UINT16_MAX); + indices[pre_z] = indices[z]; + indices[z] = UINT16_MAX; } else { uint32_t* indices = self->indices; - uint32_t tmp = indices[x]; - indices[x] = indices[y]; - indices[y] = tmp; + assert(indices[pre_z] == UINT32_MAX); + indices[pre_z] = indices[z]; + indices[z] = UINT32_MAX; } } @@ -122,17 +136,21 @@ static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) { } } -static bool - Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) { - if(!py_hash(key, p_hash)) return false; - py_i64 hash = *p_hash; - uint32_t idx = (uint64_t)hash % self->capacity; - const uint32_t max_idx = self->capacity - 1; +static bool Dict__probe(Dict* self, + py_TValue* key, + uint64_t* p_hash, + uint32_t* p_idx, + DictEntry** p_entry) { + py_i64 h_user; + if(!py_hash(key, &h_user)) return false; + *p_hash = Dict__hash((uint64_t)h_user); + uint32_t mask = self->capacity - 1; + uint32_t idx = (*p_hash) % self->capacity; while(true) { uint32_t idx2 = Dict__get_index(self, idx); if(idx2 == self->null_index_value) break; DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); - if(entry->hash == (uint64_t)hash) { + if(entry->hash == (*p_hash)) { int res = py_equal(&entry->key, key); if(res == 1) { *p_idx = idx; @@ -142,7 +160,7 @@ static bool if(res == -1) return false; // error } // try next index - idx = idx < max_idx ? idx + 1 : 0; + idx = Dict__step(idx); } // not found *p_idx = idx; @@ -151,7 +169,7 @@ static bool } static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) { - py_i64 hash; + uint64_t hash; uint32_t idx; return Dict__probe(self, key, &hash, &idx, out); } @@ -166,14 +184,14 @@ static void Dict__clear(Dict* self) { static void Dict__rehash_2x(Dict* self) { Dict old_dict = *self; - uint32_t new_capacity = Dict__next_cap(new_capacity); + uint32_t new_capacity = Dict__next_cap(old_dict.capacity); + uint32_t mask = new_capacity - 1; // create a new dict with new capacity Dict__ctor(self, new_capacity, old_dict.entries.capacity); // move entries from old dict to new dict - const uint32_t max_idx = new_capacity - 1; for(int i = 0; i < old_dict.entries.length; i++) { DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i); - if(py_isnil(&old_entry->key)) continue; + if(py_isnil(&old_entry->key)) continue; // skip deleted uint32_t idx = old_entry->hash % new_capacity; while(true) { uint32_t idx2 = Dict__get_index(self, idx); @@ -184,14 +202,14 @@ static void Dict__rehash_2x(Dict* self) { break; } // try next index - idx = idx < max_idx ? idx + 1 : 0; + idx = Dict__step(idx); } } Dict__dtor(&old_dict); } static void Dict__compact_entries(Dict* self) { - int* mappings = PK_MALLOC(self->entries.length * sizeof(int)); + uint32_t* mappings = PK_MALLOC(self->entries.length * sizeof(uint32_t)); int n = 0; for(int i = 0; i < self->entries.length; i++) { @@ -215,7 +233,7 @@ static void Dict__compact_entries(Dict* self) { } static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) { - py_i64 hash; + uint64_t hash; uint32_t idx; DictEntry* entry; if(!Dict__probe(self, key, &hash, &idx, &entry)) return false; @@ -226,21 +244,22 @@ static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) { } // insert new entry DictEntry* new_entry = c11_vector__emplace(&self->entries); - new_entry->hash = (uint64_t)hash; + new_entry->hash = hash; new_entry->key = *key; new_entry->val = *val; Dict__set_index(self, idx, self->entries.length - 1); self->length++; // check if we need to rehash float load_factor = (float)self->length / self->capacity; - if(load_factor > 4 / 7.0f) Dict__rehash_2x(self); + if(load_factor > 0.572) Dict__rehash_2x(self); return true; } /// Delete an entry from the dict. /// -1: error, 0: not found, 1: found and deleted static int Dict__pop(Dict* self, py_Ref key) { - py_i64 hash; + // Dict__log_index(self, "before pop"); + uint64_t hash; uint32_t idx; DictEntry* entry; if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1; @@ -252,26 +271,50 @@ static int Dict__pop(Dict* self, py_Ref key) { py_newnil(&entry->key); py_newnil(&entry->val); self->length--; - // tidy indices - uint32_t pre_z = idx; - const uint32_t max_idx = self->capacity - 1; - uint32_t z = idx < max_idx ? idx + 1 : 0; + + /* tidy */ + // https://github.com/OpenHFT/Chronicle-Map/blob/820573a68471509ffc1b0584454f4a67c0be1b84/src/main/java/net/openhft/chronicle/hash/impl/CompactOffHeapLinearHashTable.java#L156 + uint32_t mask = self->capacity - 1; + uint32_t posToRemove = idx; + uint32_t posToShift = posToRemove; + // int probe_count = 0; + // int swap_count = 0; while(true) { - uint32_t idx2 = Dict__get_index(self, z); - if(idx2 == self->null_index_value) break; - uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash; - if(h != hash) break; - Dict__swap_index(self, pre_z, z); - pre_z = z; - z = z < max_idx ? z + 1 : 0; + posToShift = Dict__step(posToShift); + uint32_t idx_z = Dict__get_index(self, posToShift); + if(idx_z == self->null_index_value) break; + uint64_t hash_z = c11__at(DictEntry, &self->entries, idx_z)->hash; + uint32_t insertPos = (uint64_t)hash_z % self->capacity; + // the following condition essentially means circular permutations + // of three (r = posToRemove, s = posToShift, i = insertPos) + // positions are accepted: + // [...i..r...s.] or + // [...r..s...i.] or + // [...s..i...r.] + bool cond1 = insertPos <= posToRemove; + bool cond2 = posToRemove <= posToShift; + if((cond1 && cond2) || + // chain wrapped around capacity + (posToShift < insertPos && (cond1 || cond2))) { + Dict__swap_null_index(self, posToRemove, posToShift); + posToRemove = posToShift; + // swap_count++; + } + // probe_count++; } + // printf("Dict__pop: probe_count=%d, swap_count=%d\n", probe_count, swap_count); // compact entries if necessary - if(self->entries.length > 16 && self->length < self->entries.length / 2) - Dict__compact_entries(self); + if(self->entries.length > 16 && (self->length < self->entries.length >> 1)) { + Dict__compact_entries(self); // compact entries + } + // Dict__log_index(self, "after pop"); return 1; } static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) { + assert(mode >= 0 && mode <= 2); + self->dict = dict; + self->dict_backup = *dict; // backup the dict self->curr = dict->entries.data; self->end = self->curr + dict->entries.length; self->mode = mode; @@ -286,6 +329,10 @@ static DictEntry* DictIterator__next(DictIterator* self) { return retval; } +static bool DictIterator__modified(DictIterator* self) { + return memcmp(self->dict, &self->dict_backup, sizeof(Dict)) != 0; +} + /////////////////////////////// static bool dict__new__(int argc, py_Ref argv) { py_Type cls = py_totype(argv); @@ -455,12 +502,17 @@ static bool dict_copy(int argc, py_Ref argv) { PY_CHECK_ARGC(1); Dict* self = py_touserdata(argv); Dict* new_dict = py_newobject(py_retval(), tp_dict, 0, sizeof(Dict)); - new_dict->capacity = self->capacity; new_dict->length = self->length; + new_dict->capacity = self->capacity; + new_dict->null_index_value = self->null_index_value; + new_dict->index_is_short = self->index_is_short; + // copy entries new_dict->entries = c11_vector__copy(&self->entries); // copy indices - new_dict->indices = PK_MALLOC(new_dict->capacity * sizeof(DictIndex)); - memcpy(new_dict->indices, self->indices, new_dict->capacity * sizeof(DictIndex)); + size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t) + : self->capacity * sizeof(uint32_t); + new_dict->indices = PK_MALLOC(indices_size); + memcpy(new_dict->indices, self->indices, indices_size); return true; } @@ -557,6 +609,7 @@ py_Type pk_dict__register() { static bool dict_items__next__(int argc, py_Ref argv) { PY_CHECK_ARGC(1); DictIterator* iter = py_touserdata(py_arg(0)); + if(DictIterator__modified(iter)) return RuntimeError("dictionary modified during iteration"); DictEntry* entry = (DictIterator__next(iter)); if(entry) { switch(iter->mode) { @@ -670,4 +723,4 @@ bool py_dict_apply(py_Ref self, bool (*f)(py_Ref, py_Ref, void*), void* ctx) { return true; } -#undef PK_DICT_MAX_COLLISION \ No newline at end of file +#undef Dict__step \ No newline at end of file diff --git a/tests/08_dict.py b/tests/08_dict.py index accd3e34..992603f7 100644 --- a/tests/08_dict.py +++ b/tests/08_dict.py @@ -115,30 +115,7 @@ assert a.pop(1) == 2 assert a.pop(1, None) is None -n = 2 ** 17 -a = {} -for i in range(n): - a[str(i)] = i - -for i in range(n): - y = a[str(i)] - -for i in range(n): - del a[str(i)] - -# namedict delete test -# class A: pass -# a = A() -# b = ['0', '1'] - -# for i in range(len(data)): -# z = data[i] -# setattr(a, str(z), i) -# b.append(z) -# if i % 3 == 0: -# y = b.pop() -# delattr(a, y) - +# test getitem d = {} for i in range(-1000, 1000): d[i] = i @@ -155,3 +132,37 @@ assert list(d) == ['1', 222, '333'] assert list(d.keys()) == ['1', 222, '333'] assert list(d.values()) == [1, 2, 3] assert list(d.items()) == [('1', 1), (222, 2), ('333', 3)] + +# test del +n = 2 ** 17 +a = {} +for i in range(n): + a[str(i)] = i +for i in range(n): + del a[str(i)] +assert len(a) == 0 + +# test del with int keys +if 0: + n = 2 ** 17 + a = {} + for i in range(n): + a[i] = i + for i in range(n): + del a[i] + assert len(a) == 0 + +####################### + +# namedict delete test +class A: pass +a = A() +b = ['0', '1'] + +for i in range(len(data)): + z = data[i] + setattr(a, str(z), i) + b.append(z) + if i % 3 == 0: + y = b.pop() + delattr(a, y) \ No newline at end of file diff --git a/tests/99_extras.py b/tests/99_extras.py index b6b8d9ea..732fa283 100644 --- a/tests/99_extras.py +++ b/tests/99_extras.py @@ -103,9 +103,4 @@ class A: bad_dict = {A(): 1, A(): 2, A(): 3, A(): 4} assert len(bad_dict) == 4 -try: - bad_dict[A()] = 5 # error - exit(1) -except RuntimeError as e: - assert 'maximum collision reached' in str(e)