From be2aae493abe37273bb9745977a9ee4482ccce38 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 27 Jun 2025 14:21:53 +0800 Subject: [PATCH] backup --- include/pocketpy/interpreter/types.h | 12 +- src/public/py_dict.c | 243 +++++++++++++++------------ 2 files changed, 140 insertions(+), 115 deletions(-) diff --git a/include/pocketpy/interpreter/types.h b/include/pocketpy/interpreter/types.h index 5c4dd460..4dcc1bf4 100644 --- a/include/pocketpy/interpreter/types.h +++ b/include/pocketpy/interpreter/types.h @@ -3,26 +3,22 @@ #include "pocketpy/common/vector.h" #include "pocketpy/objects/base.h" -#define PK_DICT_MAX_COLLISION 4 - typedef struct { uint64_t hash; py_TValue key; py_TValue val; } DictEntry; -typedef struct { - int _[PK_DICT_MAX_COLLISION]; -} DictIndex; - typedef struct { int length; uint32_t capacity; - DictIndex* indices; + void* indices; + bool index_is_short; + uint32_t null_index_value; c11_vector /*T=DictEntry*/ entries; } Dict; typedef c11_vector List; void c11_chunked_array2d__mark(void* ud, c11_vector* p_stack); -void function__gc_mark(void* ud, c11_vector* p_stack); \ No newline at end of file +void function__gc_mark(void* ud, c11_vector* p_stack); diff --git a/src/public/py_dict.c b/src/public/py_dict.c index 347b1f26..b7796d80 100644 --- a/src/public/py_dict.c +++ b/src/public/py_dict.c @@ -51,8 +51,6 @@ static uint32_t Dict__next_cap(uint32_t cap) { } } - - typedef struct { DictEntry* curr; DictEntry* end; @@ -61,9 +59,24 @@ typedef struct { static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) { self->length = 0; - self->capacity = capacity; - self->indices = PK_MALLOC(self->capacity * sizeof(DictIndex)); - memset(self->indices, -1, self->capacity * sizeof(DictIndex)); + self->capacity = capacity; // the 1st prime + + size_t indices_size; + if(self->capacity < UINT16_MAX - 1) { + self->index_is_short = true; + indices_size = self->capacity * sizeof(uint16_t); + self->null_index_value = UINT16_MAX; + self->deleted_index_value = UINT16_MAX - 1; + } else { + self->index_is_short = false; + indices_size = self->capacity * sizeof(uint32_t); + self->null_index_value = UINT32_MAX; + self->deleted_index_value = UINT32_MAX - 1; + } + + self->indices = PK_MALLOC(indices_size); + memset(self->indices, -1, indices_size); + c11_vector__ctor(&self->entries, sizeof(DictEntry)); c11_vector__reserve(&self->entries, entries_capacity); } @@ -75,65 +88,105 @@ static void Dict__dtor(Dict* self) { c11_vector__dtor(&self->entries); } -static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) { - py_i64 hash; - if(!py_hash(key, &hash)) return false; - int idx = (uint64_t)hash % self->capacity; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) continue; +static uint32_t Dict__get_index(Dict* self, uint32_t index) { + if(self->index_is_short) { + uint16_t* indices = self->indices; + return indices[index]; + } else { + uint32_t* indices = self->indices; + return indices[index]; + } +} + +static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) { + if(self->index_is_short) { + uint16_t* indices = self->indices; + uint16_t tmp = indices[x]; + indices[x] = indices[y]; + indices[y] = tmp; + } else { + uint32_t* indices = self->indices; + uint32_t tmp = indices[x]; + indices[x] = indices[y]; + indices[y] = tmp; + } +} + +static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) { + if(self->index_is_short) { + uint16_t* indices = self->indices; + indices[index] = (uint16_t)value; + } else { + uint32_t* indices = self->indices; + indices[index] = value; + } +} + +static bool + Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) { + if(!py_hash(key, p_hash)) return false; + py_i64 hash = *p_hash; + uint32_t idx = (uint64_t)hash % self->capacity; + const uint32_t max_idx = self->capacity - 1; + while(true) { + uint32_t idx2 = Dict__get_index(self, idx); + if(idx2 == self->null_index_value) break; DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); if(entry->hash == (uint64_t)hash) { int res = py_equal(&entry->key, key); if(res == 1) { - *out = entry; + *p_idx = idx; + *p_entry = entry; return true; } if(res == -1) return false; // error } + // try next index + idx = idx < max_idx ? idx + 1 : 0; } - *out = NULL; + // not found + *p_idx = idx; + *p_entry = NULL; return true; } +static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) { + py_i64 hash; + uint32_t idx; + return Dict__probe(self, key, &hash, &idx, out); +} + static void Dict__clear(Dict* self) { - memset(self->indices, -1, self->capacity * sizeof(DictIndex)); + size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t) + : self->capacity * sizeof(uint32_t); + memset(self->indices, -1, indices_size); c11_vector__clear(&self->entries); self->length = 0; } static void Dict__rehash_2x(Dict* self) { Dict old_dict = *self; - uint32_t new_capacity = self->capacity; - -__RETRY: - // use next capacity - new_capacity = Dict__next_cap(new_capacity); + uint32_t new_capacity = Dict__next_cap(new_capacity); // create a new dict with new capacity Dict__ctor(self, new_capacity, old_dict.entries.capacity); // move entries from old dict to new dict + const uint32_t max_idx = new_capacity - 1; for(int i = 0; i < old_dict.entries.length; i++) { DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i); if(py_isnil(&old_entry->key)) continue; - int idx = old_entry->hash % new_capacity; - bool success = false; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) { - // insert new entry (empty slot) + uint32_t idx = old_entry->hash % new_capacity; + while(true) { + uint32_t idx2 = Dict__get_index(self, idx); + if(idx2 == self->null_index_value) { c11_vector__push(DictEntry, &self->entries, *old_entry); - self->indices[idx]._[i] = self->entries.length - 1; + Dict__set_index(self, idx, self->entries.length - 1); self->length++; - success = true; break; } - } - if(!success) { - Dict__dtor(self); - goto __RETRY; + // try next index + idx = idx < max_idx ? idx + 1 : 0; } } - // done Dict__dtor(&old_dict); } @@ -153,93 +206,69 @@ static void Dict__compact_entries(Dict* self) { } self->entries.length = n; // update indices - for(uint32_t i = 0; i < self->capacity; i++) { - for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) { - int idx = self->indices[i]._[j]; - if(idx == -1) continue; - self->indices[i]._[j] = mappings[idx]; - } + for(int idx = 0; idx < self->capacity; idx++) { + uint32_t idx2 = Dict__get_index(self, idx); + if(idx2 == self->null_index_value) continue; + Dict__set_index(self, idx, mappings[idx2]); } PK_FREE(mappings); } static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) { py_i64 hash; - if(!py_hash(key, &hash)) return false; - int idx = (uint64_t)hash % self->capacity; - int bad_hash_count = 0; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) { - // insert new entry - DictEntry* new_entry = c11_vector__emplace(&self->entries); - new_entry->hash = (uint64_t)hash; - new_entry->key = *key; - new_entry->val = *val; - self->indices[idx]._[i] = self->entries.length - 1; - self->length++; - return true; - } + uint32_t idx; + DictEntry* entry; + if(!Dict__probe(self, key, &hash, &idx, &entry)) return false; + if(entry) { // update existing entry - DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); - // check if they have the same hash - if(entry->hash == (uint64_t)hash) { - // check if they are equal - int res = py_equal(&entry->key, key); - if(res == 1) { - entry->val = *val; - return true; - } - if(res == -1) return false; // error - // res == 0 - bad_hash_count++; - } + entry->val = *val; + return true; } - // no empty slot found - if(bad_hash_count == PK_DICT_MAX_COLLISION) { - // all `PK_DICT_MAX_COLLISION` slots have the same hash but different keys - // we are unable to solve this collision via rehashing - return RuntimeError("dict: %d/%d/%d: maximum collision reached (hash=%i)", - self->entries.length, - self->entries.capacity, - self->capacity, - hash); - } - - if(self->capacity >= (uint32_t)self->entries.length * 10) { - return RuntimeError("dict: %d/%d/%d: minimum load factor reached", - self->entries.length, - self->entries.capacity, - self->capacity); - } - Dict__rehash_2x(self); - return Dict__set(self, key, val); + // insert new entry + DictEntry* new_entry = c11_vector__emplace(&self->entries); + new_entry->hash = (uint64_t)hash; + new_entry->key = *key; + new_entry->val = *val; + Dict__set_index(self, idx, self->entries.length - 1); + self->length++; + // check if we need to rehash + float load_factor = (float)self->length / self->capacity; + if(load_factor > 4 / 7.0f) Dict__rehash_2x(self); + return true; } /// Delete an entry from the dict. /// -1: error, 0: not found, 1: found and deleted static int Dict__pop(Dict* self, py_Ref key) { py_i64 hash; - if(!py_hash(key, &hash)) return -1; - int idx = (uint64_t)hash % self->capacity; - for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { - int idx2 = self->indices[idx]._[i]; - if(idx2 == -1) continue; - DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); - if(entry->hash == (uint64_t)hash) { - int res = py_equal(&entry->key, key); - if(res == 1) { - *py_retval() = entry->val; - py_newnil(&entry->key); - self->indices[idx]._[i] = -1; - self->length--; - if(self->length < self->entries.length / 2) Dict__compact_entries(self); - return 1; - } - if(res == -1) return -1; // error - } + uint32_t idx; + DictEntry* entry; + if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1; + if(!entry) return 0; // not found + + // found the entry, delete and return it + py_assign(py_retval(), &entry->val); + Dict__set_index(self, idx, self->null_index_value); + py_newnil(&entry->key); + py_newnil(&entry->val); + self->length--; + // tidy indices + uint32_t pre_z = idx; + const uint32_t max_idx = self->capacity - 1; + uint32_t z = idx < max_idx ? idx + 1 : 0; + while(true) { + uint32_t idx2 = Dict__get_index(self, z); + if(idx2 == self->null_index_value) break; + uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash; + if(h != hash) break; + Dict__swap_index(self, pre_z, z); + pre_z = z; + z = z < max_idx ? z + 1 : 0; } - return 0; + // compact entries if necessary + if(self->entries.length > 16 && self->length < self->entries.length / 2) + Dict__compact_entries(self); + return 1; } static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) { @@ -262,13 +291,13 @@ static bool dict__new__(int argc, py_Ref argv) { py_Type cls = py_totype(argv); int slots = cls == tp_dict ? 0 : -1; Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict)); - Dict__ctor(ud, 7, 8); + Dict__ctor(ud, 7, 4); return true; } void py_newdict(py_OutRef out) { Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict)); - Dict__ctor(ud, 7, 8); + Dict__ctor(ud, 7, 4); } static bool dict__init__(int argc, py_Ref argv) {