This commit is contained in:
blueloveTH 2025-06-27 14:21:53 +08:00
parent fd6f0d76b2
commit be2aae493a
2 changed files with 140 additions and 115 deletions

View File

@ -3,22 +3,18 @@
#include "pocketpy/common/vector.h" #include "pocketpy/common/vector.h"
#include "pocketpy/objects/base.h" #include "pocketpy/objects/base.h"
#define PK_DICT_MAX_COLLISION 4
typedef struct { typedef struct {
uint64_t hash; uint64_t hash;
py_TValue key; py_TValue key;
py_TValue val; py_TValue val;
} DictEntry; } DictEntry;
typedef struct {
int _[PK_DICT_MAX_COLLISION];
} DictIndex;
typedef struct { typedef struct {
int length; int length;
uint32_t capacity; uint32_t capacity;
DictIndex* indices; void* indices;
bool index_is_short;
uint32_t null_index_value;
c11_vector /*T=DictEntry*/ entries; c11_vector /*T=DictEntry*/ entries;
} Dict; } Dict;

View File

@ -51,8 +51,6 @@ static uint32_t Dict__next_cap(uint32_t cap) {
} }
} }
typedef struct { typedef struct {
DictEntry* curr; DictEntry* curr;
DictEntry* end; DictEntry* end;
@ -61,9 +59,24 @@ typedef struct {
static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) { static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) {
self->length = 0; self->length = 0;
self->capacity = capacity; self->capacity = capacity; // the 1st prime
self->indices = PK_MALLOC(self->capacity * sizeof(DictIndex));
memset(self->indices, -1, self->capacity * sizeof(DictIndex)); size_t indices_size;
if(self->capacity < UINT16_MAX - 1) {
self->index_is_short = true;
indices_size = self->capacity * sizeof(uint16_t);
self->null_index_value = UINT16_MAX;
self->deleted_index_value = UINT16_MAX - 1;
} else {
self->index_is_short = false;
indices_size = self->capacity * sizeof(uint32_t);
self->null_index_value = UINT32_MAX;
self->deleted_index_value = UINT32_MAX - 1;
}
self->indices = PK_MALLOC(indices_size);
memset(self->indices, -1, indices_size);
c11_vector__ctor(&self->entries, sizeof(DictEntry)); c11_vector__ctor(&self->entries, sizeof(DictEntry));
c11_vector__reserve(&self->entries, entries_capacity); c11_vector__reserve(&self->entries, entries_capacity);
} }
@ -75,65 +88,105 @@ static void Dict__dtor(Dict* self) {
c11_vector__dtor(&self->entries); c11_vector__dtor(&self->entries);
} }
static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) { static uint32_t Dict__get_index(Dict* self, uint32_t index) {
py_i64 hash; if(self->index_is_short) {
if(!py_hash(key, &hash)) return false; uint16_t* indices = self->indices;
int idx = (uint64_t)hash % self->capacity; return indices[index];
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { } else {
int idx2 = self->indices[idx]._[i]; uint32_t* indices = self->indices;
if(idx2 == -1) continue; return indices[index];
}
}
static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
uint16_t tmp = indices[x];
indices[x] = indices[y];
indices[y] = tmp;
} else {
uint32_t* indices = self->indices;
uint32_t tmp = indices[x];
indices[x] = indices[y];
indices[y] = tmp;
}
}
static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
indices[index] = (uint16_t)value;
} else {
uint32_t* indices = self->indices;
indices[index] = value;
}
}
static bool
Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) {
if(!py_hash(key, p_hash)) return false;
py_i64 hash = *p_hash;
uint32_t idx = (uint64_t)hash % self->capacity;
const uint32_t max_idx = self->capacity - 1;
while(true) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) break;
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
if(entry->hash == (uint64_t)hash) { if(entry->hash == (uint64_t)hash) {
int res = py_equal(&entry->key, key); int res = py_equal(&entry->key, key);
if(res == 1) { if(res == 1) {
*out = entry; *p_idx = idx;
*p_entry = entry;
return true; return true;
} }
if(res == -1) return false; // error if(res == -1) return false; // error
} }
// try next index
idx = idx < max_idx ? idx + 1 : 0;
} }
*out = NULL; // not found
*p_idx = idx;
*p_entry = NULL;
return true; return true;
} }
static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
py_i64 hash;
uint32_t idx;
return Dict__probe(self, key, &hash, &idx, out);
}
static void Dict__clear(Dict* self) { static void Dict__clear(Dict* self) {
memset(self->indices, -1, self->capacity * sizeof(DictIndex)); size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t)
: self->capacity * sizeof(uint32_t);
memset(self->indices, -1, indices_size);
c11_vector__clear(&self->entries); c11_vector__clear(&self->entries);
self->length = 0; self->length = 0;
} }
static void Dict__rehash_2x(Dict* self) { static void Dict__rehash_2x(Dict* self) {
Dict old_dict = *self; Dict old_dict = *self;
uint32_t new_capacity = self->capacity; uint32_t new_capacity = Dict__next_cap(new_capacity);
__RETRY:
// use next capacity
new_capacity = Dict__next_cap(new_capacity);
// create a new dict with new capacity // create a new dict with new capacity
Dict__ctor(self, new_capacity, old_dict.entries.capacity); Dict__ctor(self, new_capacity, old_dict.entries.capacity);
// move entries from old dict to new dict // move entries from old dict to new dict
const uint32_t max_idx = new_capacity - 1;
for(int i = 0; i < old_dict.entries.length; i++) { for(int i = 0; i < old_dict.entries.length; i++) {
DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i); DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i);
if(py_isnil(&old_entry->key)) continue; if(py_isnil(&old_entry->key)) continue;
int idx = old_entry->hash % new_capacity; uint32_t idx = old_entry->hash % new_capacity;
bool success = false; while(true) {
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { uint32_t idx2 = Dict__get_index(self, idx);
int idx2 = self->indices[idx]._[i]; if(idx2 == self->null_index_value) {
if(idx2 == -1) {
// insert new entry (empty slot)
c11_vector__push(DictEntry, &self->entries, *old_entry); c11_vector__push(DictEntry, &self->entries, *old_entry);
self->indices[idx]._[i] = self->entries.length - 1; Dict__set_index(self, idx, self->entries.length - 1);
self->length++; self->length++;
success = true;
break; break;
} }
} // try next index
if(!success) { idx = idx < max_idx ? idx + 1 : 0;
Dict__dtor(self);
goto __RETRY;
} }
} }
// done
Dict__dtor(&old_dict); Dict__dtor(&old_dict);
} }
@ -153,93 +206,69 @@ static void Dict__compact_entries(Dict* self) {
} }
self->entries.length = n; self->entries.length = n;
// update indices // update indices
for(uint32_t i = 0; i < self->capacity; i++) { for(int idx = 0; idx < self->capacity; idx++) {
for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) { uint32_t idx2 = Dict__get_index(self, idx);
int idx = self->indices[i]._[j]; if(idx2 == self->null_index_value) continue;
if(idx == -1) continue; Dict__set_index(self, idx, mappings[idx2]);
self->indices[i]._[j] = mappings[idx];
}
} }
PK_FREE(mappings); PK_FREE(mappings);
} }
static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) { static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
py_i64 hash; py_i64 hash;
if(!py_hash(key, &hash)) return false; uint32_t idx;
int idx = (uint64_t)hash % self->capacity; DictEntry* entry;
int bad_hash_count = 0; if(!Dict__probe(self, key, &hash, &idx, &entry)) return false;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { if(entry) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) {
// insert new entry
DictEntry* new_entry = c11_vector__emplace(&self->entries);
new_entry->hash = (uint64_t)hash;
new_entry->key = *key;
new_entry->val = *val;
self->indices[idx]._[i] = self->entries.length - 1;
self->length++;
return true;
}
// update existing entry // update existing entry
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); entry->val = *val;
// check if they have the same hash return true;
if(entry->hash == (uint64_t)hash) {
// check if they are equal
int res = py_equal(&entry->key, key);
if(res == 1) {
entry->val = *val;
return true;
}
if(res == -1) return false; // error
// res == 0
bad_hash_count++;
}
} }
// no empty slot found // insert new entry
if(bad_hash_count == PK_DICT_MAX_COLLISION) { DictEntry* new_entry = c11_vector__emplace(&self->entries);
// all `PK_DICT_MAX_COLLISION` slots have the same hash but different keys new_entry->hash = (uint64_t)hash;
// we are unable to solve this collision via rehashing new_entry->key = *key;
return RuntimeError("dict: %d/%d/%d: maximum collision reached (hash=%i)", new_entry->val = *val;
self->entries.length, Dict__set_index(self, idx, self->entries.length - 1);
self->entries.capacity, self->length++;
self->capacity, // check if we need to rehash
hash); float load_factor = (float)self->length / self->capacity;
} if(load_factor > 4 / 7.0f) Dict__rehash_2x(self);
return true;
if(self->capacity >= (uint32_t)self->entries.length * 10) {
return RuntimeError("dict: %d/%d/%d: minimum load factor reached",
self->entries.length,
self->entries.capacity,
self->capacity);
}
Dict__rehash_2x(self);
return Dict__set(self, key, val);
} }
/// Delete an entry from the dict. /// Delete an entry from the dict.
/// -1: error, 0: not found, 1: found and deleted /// -1: error, 0: not found, 1: found and deleted
static int Dict__pop(Dict* self, py_Ref key) { static int Dict__pop(Dict* self, py_Ref key) {
py_i64 hash; py_i64 hash;
if(!py_hash(key, &hash)) return -1; uint32_t idx;
int idx = (uint64_t)hash % self->capacity; DictEntry* entry;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) { if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1;
int idx2 = self->indices[idx]._[i]; if(!entry) return 0; // not found
if(idx2 == -1) continue;
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2); // found the entry, delete and return it
if(entry->hash == (uint64_t)hash) { py_assign(py_retval(), &entry->val);
int res = py_equal(&entry->key, key); Dict__set_index(self, idx, self->null_index_value);
if(res == 1) { py_newnil(&entry->key);
*py_retval() = entry->val; py_newnil(&entry->val);
py_newnil(&entry->key); self->length--;
self->indices[idx]._[i] = -1; // tidy indices
self->length--; uint32_t pre_z = idx;
if(self->length < self->entries.length / 2) Dict__compact_entries(self); const uint32_t max_idx = self->capacity - 1;
return 1; uint32_t z = idx < max_idx ? idx + 1 : 0;
} while(true) {
if(res == -1) return -1; // error uint32_t idx2 = Dict__get_index(self, z);
} if(idx2 == self->null_index_value) break;
uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash;
if(h != hash) break;
Dict__swap_index(self, pre_z, z);
pre_z = z;
z = z < max_idx ? z + 1 : 0;
} }
return 0; // compact entries if necessary
if(self->entries.length > 16 && self->length < self->entries.length / 2)
Dict__compact_entries(self);
return 1;
} }
static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) { static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) {
@ -262,13 +291,13 @@ static bool dict__new__(int argc, py_Ref argv) {
py_Type cls = py_totype(argv); py_Type cls = py_totype(argv);
int slots = cls == tp_dict ? 0 : -1; int slots = cls == tp_dict ? 0 : -1;
Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict)); Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
Dict__ctor(ud, 7, 8); Dict__ctor(ud, 7, 4);
return true; return true;
} }
void py_newdict(py_OutRef out) { void py_newdict(py_OutRef out) {
Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict)); Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
Dict__ctor(ud, 7, 8); Dict__ctor(ud, 7, 4);
} }
static bool dict__init__(int argc, py_Ref argv) { static bool dict__init__(int argc, py_Ref argv) {