This commit is contained in:
blueloveTH 2025-06-27 14:21:53 +08:00
parent fd6f0d76b2
commit be2aae493a
2 changed files with 140 additions and 115 deletions

View File

@ -3,22 +3,18 @@
#include "pocketpy/common/vector.h"
#include "pocketpy/objects/base.h"
#define PK_DICT_MAX_COLLISION 4
typedef struct {
uint64_t hash;
py_TValue key;
py_TValue val;
} DictEntry;
typedef struct {
int _[PK_DICT_MAX_COLLISION];
} DictIndex;
typedef struct {
int length;
uint32_t capacity;
DictIndex* indices;
void* indices;
bool index_is_short;
uint32_t null_index_value;
c11_vector /*T=DictEntry*/ entries;
} Dict;

View File

@ -51,8 +51,6 @@ static uint32_t Dict__next_cap(uint32_t cap) {
}
}
typedef struct {
DictEntry* curr;
DictEntry* end;
@ -61,9 +59,24 @@ typedef struct {
static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) {
self->length = 0;
self->capacity = capacity;
self->indices = PK_MALLOC(self->capacity * sizeof(DictIndex));
memset(self->indices, -1, self->capacity * sizeof(DictIndex));
self->capacity = capacity; // the 1st prime
size_t indices_size;
if(self->capacity < UINT16_MAX - 1) {
self->index_is_short = true;
indices_size = self->capacity * sizeof(uint16_t);
self->null_index_value = UINT16_MAX;
self->deleted_index_value = UINT16_MAX - 1;
} else {
self->index_is_short = false;
indices_size = self->capacity * sizeof(uint32_t);
self->null_index_value = UINT32_MAX;
self->deleted_index_value = UINT32_MAX - 1;
}
self->indices = PK_MALLOC(indices_size);
memset(self->indices, -1, indices_size);
c11_vector__ctor(&self->entries, sizeof(DictEntry));
c11_vector__reserve(&self->entries, entries_capacity);
}
@ -75,65 +88,105 @@ static void Dict__dtor(Dict* self) {
c11_vector__dtor(&self->entries);
}
static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
py_i64 hash;
if(!py_hash(key, &hash)) return false;
int idx = (uint64_t)hash % self->capacity;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) continue;
static uint32_t Dict__get_index(Dict* self, uint32_t index) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
return indices[index];
} else {
uint32_t* indices = self->indices;
return indices[index];
}
}
static void Dict__swap_index(Dict* self, uint32_t x, uint32_t y) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
uint16_t tmp = indices[x];
indices[x] = indices[y];
indices[y] = tmp;
} else {
uint32_t* indices = self->indices;
uint32_t tmp = indices[x];
indices[x] = indices[y];
indices[y] = tmp;
}
}
static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
indices[index] = (uint16_t)value;
} else {
uint32_t* indices = self->indices;
indices[index] = value;
}
}
static bool
Dict__probe(Dict* self, py_TValue* key, py_i64* p_hash, uint32_t* p_idx, DictEntry** p_entry) {
if(!py_hash(key, p_hash)) return false;
py_i64 hash = *p_hash;
uint32_t idx = (uint64_t)hash % self->capacity;
const uint32_t max_idx = self->capacity - 1;
while(true) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) break;
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
if(entry->hash == (uint64_t)hash) {
int res = py_equal(&entry->key, key);
if(res == 1) {
*out = entry;
*p_idx = idx;
*p_entry = entry;
return true;
}
if(res == -1) return false; // error
}
// try next index
idx = idx < max_idx ? idx + 1 : 0;
}
*out = NULL;
// not found
*p_idx = idx;
*p_entry = NULL;
return true;
}
static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
py_i64 hash;
uint32_t idx;
return Dict__probe(self, key, &hash, &idx, out);
}
static void Dict__clear(Dict* self) {
memset(self->indices, -1, self->capacity * sizeof(DictIndex));
size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t)
: self->capacity * sizeof(uint32_t);
memset(self->indices, -1, indices_size);
c11_vector__clear(&self->entries);
self->length = 0;
}
static void Dict__rehash_2x(Dict* self) {
Dict old_dict = *self;
uint32_t new_capacity = self->capacity;
__RETRY:
// use next capacity
new_capacity = Dict__next_cap(new_capacity);
uint32_t new_capacity = Dict__next_cap(new_capacity);
// create a new dict with new capacity
Dict__ctor(self, new_capacity, old_dict.entries.capacity);
// move entries from old dict to new dict
const uint32_t max_idx = new_capacity - 1;
for(int i = 0; i < old_dict.entries.length; i++) {
DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i);
if(py_isnil(&old_entry->key)) continue;
int idx = old_entry->hash % new_capacity;
bool success = false;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) {
// insert new entry (empty slot)
uint32_t idx = old_entry->hash % new_capacity;
while(true) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) {
c11_vector__push(DictEntry, &self->entries, *old_entry);
self->indices[idx]._[i] = self->entries.length - 1;
Dict__set_index(self, idx, self->entries.length - 1);
self->length++;
success = true;
break;
}
}
if(!success) {
Dict__dtor(self);
goto __RETRY;
// try next index
idx = idx < max_idx ? idx + 1 : 0;
}
}
// done
Dict__dtor(&old_dict);
}
@ -153,94 +206,70 @@ static void Dict__compact_entries(Dict* self) {
}
self->entries.length = n;
// update indices
for(uint32_t i = 0; i < self->capacity; i++) {
for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) {
int idx = self->indices[i]._[j];
if(idx == -1) continue;
self->indices[i]._[j] = mappings[idx];
}
for(int idx = 0; idx < self->capacity; idx++) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) continue;
Dict__set_index(self, idx, mappings[idx2]);
}
PK_FREE(mappings);
}
static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
py_i64 hash;
if(!py_hash(key, &hash)) return false;
int idx = (uint64_t)hash % self->capacity;
int bad_hash_count = 0;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) {
uint32_t idx;
DictEntry* entry;
if(!Dict__probe(self, key, &hash, &idx, &entry)) return false;
if(entry) {
// update existing entry
entry->val = *val;
return true;
}
// insert new entry
DictEntry* new_entry = c11_vector__emplace(&self->entries);
new_entry->hash = (uint64_t)hash;
new_entry->key = *key;
new_entry->val = *val;
self->indices[idx]._[i] = self->entries.length - 1;
Dict__set_index(self, idx, self->entries.length - 1);
self->length++;
// check if we need to rehash
float load_factor = (float)self->length / self->capacity;
if(load_factor > 4 / 7.0f) Dict__rehash_2x(self);
return true;
}
// update existing entry
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
// check if they have the same hash
if(entry->hash == (uint64_t)hash) {
// check if they are equal
int res = py_equal(&entry->key, key);
if(res == 1) {
entry->val = *val;
return true;
}
if(res == -1) return false; // error
// res == 0
bad_hash_count++;
}
}
// no empty slot found
if(bad_hash_count == PK_DICT_MAX_COLLISION) {
// all `PK_DICT_MAX_COLLISION` slots have the same hash but different keys
// we are unable to solve this collision via rehashing
return RuntimeError("dict: %d/%d/%d: maximum collision reached (hash=%i)",
self->entries.length,
self->entries.capacity,
self->capacity,
hash);
}
if(self->capacity >= (uint32_t)self->entries.length * 10) {
return RuntimeError("dict: %d/%d/%d: minimum load factor reached",
self->entries.length,
self->entries.capacity,
self->capacity);
}
Dict__rehash_2x(self);
return Dict__set(self, key, val);
}
/// Delete an entry from the dict.
/// -1: error, 0: not found, 1: found and deleted
static int Dict__pop(Dict* self, py_Ref key) {
py_i64 hash;
if(!py_hash(key, &hash)) return -1;
int idx = (uint64_t)hash % self->capacity;
for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
int idx2 = self->indices[idx]._[i];
if(idx2 == -1) continue;
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
if(entry->hash == (uint64_t)hash) {
int res = py_equal(&entry->key, key);
if(res == 1) {
*py_retval() = entry->val;
uint32_t idx;
DictEntry* entry;
if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1;
if(!entry) return 0; // not found
// found the entry, delete and return it
py_assign(py_retval(), &entry->val);
Dict__set_index(self, idx, self->null_index_value);
py_newnil(&entry->key);
self->indices[idx]._[i] = -1;
py_newnil(&entry->val);
self->length--;
if(self->length < self->entries.length / 2) Dict__compact_entries(self);
// tidy indices
uint32_t pre_z = idx;
const uint32_t max_idx = self->capacity - 1;
uint32_t z = idx < max_idx ? idx + 1 : 0;
while(true) {
uint32_t idx2 = Dict__get_index(self, z);
if(idx2 == self->null_index_value) break;
uint64_t h = c11__at(DictEntry, &self->entries, idx2)->hash;
if(h != hash) break;
Dict__swap_index(self, pre_z, z);
pre_z = z;
z = z < max_idx ? z + 1 : 0;
}
// compact entries if necessary
if(self->entries.length > 16 && self->length < self->entries.length / 2)
Dict__compact_entries(self);
return 1;
}
if(res == -1) return -1; // error
}
}
return 0;
}
static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) {
self->curr = dict->entries.data;
@ -262,13 +291,13 @@ static bool dict__new__(int argc, py_Ref argv) {
py_Type cls = py_totype(argv);
int slots = cls == tp_dict ? 0 : -1;
Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
Dict__ctor(ud, 7, 8);
Dict__ctor(ud, 7, 4);
return true;
}
void py_newdict(py_OutRef out) {
Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
Dict__ctor(ud, 7, 8);
Dict__ctor(ud, 7, 4);
}
static bool dict__init__(int argc, py_Ref argv) {