pocketpy/src/public/PyDict.c
2025-09-17 14:37:59 +08:00

753 lines
24 KiB
C

#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/common/sstream.h"
#include "pocketpy/interpreter/types.h"
#include "pocketpy/interpreter/vm.h"
typedef struct {
Dict* dict; // weakref for slot 0
Dict dict_backup;
DictEntry* curr;
DictEntry* end;
int mode; // 0: keys, 1: values, 2: items
} DictIterator;
#define Dict__step(x) ((x) < mask ? (x) + 1 : 0)
static uint32_t Dict__next_cap(uint32_t cap) {
switch(cap) {
case 7: return 17;
case 17: return 37;
case 37: return 79;
case 79: return 163;
case 163: return 331;
case 331: return 673;
case 673: return 1361;
case 1361: return 2053;
case 2053: return 3083;
case 3083: return 4637;
case 4637: return 6959;
case 6959: return 10453;
case 10453: return 15683;
case 15683: return 23531;
case 23531: return 35311;
case 35311: return 52967;
case 52967: return 79451;
case 79451: return 119179;
case 119179: return 178781;
case 178781: return 268189;
case 268189: return 402299;
case 402299: return 603457;
case 603457: return 905189;
case 905189: return 1357787;
case 1357787: return 2036687;
case 2036687: return 3055043;
case 3055043: return 4582577;
case 4582577: return 6873871;
case 6873871: return 10310819;
case 10310819: return 15466229;
case 15466229: return 23199347;
case 23199347: return 34799021;
case 34799021: return 52198537;
case 52198537: return 78297827;
case 78297827: return 117446801;
case 117446801: return 176170229;
case 176170229: return 264255353;
case 264255353: return 396383041;
case 396383041: return 594574583;
case 594574583: return 891861923;
default: c11__unreachable();
}
}
static uint64_t Dict__hash_2nd(uint64_t key) {
// https://gist.github.com/badboy/6267743
key = (~key) + (key << 21); // key = (key << 21) - key - 1
key = key ^ (key >> 24);
key = (key + (key << 3)) + (key << 8); // key * 265
key = key ^ (key >> 14);
key = (key + (key << 2)) + (key << 4); // key * 21
key = key ^ (key >> 28);
key = key + (key << 31);
return key;
}
static void Dict__ctor(Dict* self, uint32_t capacity, int entries_capacity) {
self->length = 0;
self->capacity = capacity;
size_t indices_size;
if(self->capacity < UINT16_MAX) {
self->index_is_short = true;
indices_size = self->capacity * sizeof(uint16_t);
self->null_index_value = UINT16_MAX;
} else {
self->index_is_short = false;
indices_size = self->capacity * sizeof(uint32_t);
self->null_index_value = UINT32_MAX;
}
self->indices = PK_MALLOC(indices_size);
memset(self->indices, -1, indices_size);
c11_vector__ctor(&self->entries, sizeof(DictEntry));
c11_vector__reserve(&self->entries, entries_capacity);
}
static void Dict__dtor(Dict* self) {
self->length = 0;
self->capacity = 0;
PK_FREE(self->indices);
c11_vector__dtor(&self->entries);
}
static uint32_t Dict__get_index(Dict* self, uint32_t index) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
return indices[index];
} else {
uint32_t* indices = self->indices;
return indices[index];
}
}
static void Dict__swap_null_index(Dict* self, uint32_t pre_z, uint32_t z) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
assert(indices[pre_z] == UINT16_MAX);
indices[pre_z] = indices[z];
indices[z] = UINT16_MAX;
} else {
uint32_t* indices = self->indices;
assert(indices[pre_z] == UINT32_MAX);
indices[pre_z] = indices[z];
indices[z] = UINT32_MAX;
}
}
static void Dict__set_index(Dict* self, uint32_t index, uint32_t value) {
if(self->index_is_short) {
uint16_t* indices = self->indices;
indices[index] = (uint16_t)value;
} else {
uint32_t* indices = self->indices;
indices[index] = value;
}
}
// Dict__probe won't raise exception for string keys
static bool Dict__probe(Dict* self,
py_TValue* key,
uint64_t* p_hash,
uint32_t* p_idx,
DictEntry** p_entry) {
if(py_isstr(key)) {
*p_hash = c11_sv__hash(py_tosv(key));
} else {
py_i64 h_user;
if(!py_hash(key, &h_user)) return false;
*p_hash = Dict__hash_2nd((uint64_t)h_user);
}
uint32_t mask = self->capacity - 1;
uint32_t idx = (*p_hash) % self->capacity;
while(true) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) break;
DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
if(entry->hash == (*p_hash)) {
if(py_isstr(&entry->key) && py_isstr(key)) {
c11_sv lhs = py_tosv(&entry->key);
c11_sv rhs = py_tosv(key);
if(c11__sveq(lhs, rhs)) {
*p_idx = idx;
*p_entry = entry;
return true;
}
} else {
int res = py_equal(&entry->key, key);
if(res == 1) {
*p_idx = idx;
*p_entry = entry;
return true;
}
if(res == -1) return false; // error
}
}
// try next index
idx = Dict__step(idx);
}
// not found
*p_idx = idx;
*p_entry = NULL;
return true;
}
static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
uint64_t hash;
uint32_t idx;
return Dict__probe(self, key, &hash, &idx, out);
}
static void Dict__clear(Dict* self) {
size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t)
: self->capacity * sizeof(uint32_t);
memset(self->indices, -1, indices_size);
c11_vector__clear(&self->entries);
self->length = 0;
}
static void Dict__rehash_2x(Dict* self) {
Dict old_dict = *self;
uint32_t new_capacity = Dict__next_cap(old_dict.capacity);
uint32_t mask = new_capacity - 1;
// create a new dict with new capacity
Dict__ctor(self, new_capacity, old_dict.entries.capacity);
// move entries from old dict to new dict
for(int i = 0; i < old_dict.entries.length; i++) {
DictEntry* old_entry = c11__at(DictEntry, &old_dict.entries, i);
if(py_isnil(&old_entry->key)) continue; // skip deleted
uint32_t idx = old_entry->hash % new_capacity;
while(true) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) {
c11_vector__push(DictEntry, &self->entries, *old_entry);
Dict__set_index(self, idx, self->entries.length - 1);
self->length++;
break;
}
// try next index
idx = Dict__step(idx);
}
}
Dict__dtor(&old_dict);
}
static void Dict__compact_entries(Dict* self) {
uint32_t* mappings = PK_MALLOC(self->entries.length * sizeof(uint32_t));
int n = 0;
for(int i = 0; i < self->entries.length; i++) {
DictEntry* entry = c11__at(DictEntry, &self->entries, i);
if(py_isnil(&entry->key)) continue;
mappings[i] = n;
if(i != n) {
DictEntry* new_entry = c11__at(DictEntry, &self->entries, n);
*new_entry = *entry;
}
n++;
}
self->entries.length = n;
// update indices
for(uint32_t idx = 0; idx < self->capacity; idx++) {
uint32_t idx2 = Dict__get_index(self, idx);
if(idx2 == self->null_index_value) continue;
Dict__set_index(self, idx, mappings[idx2]);
}
PK_FREE(mappings);
}
static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
uint64_t hash;
uint32_t idx;
DictEntry* entry;
if(!Dict__probe(self, key, &hash, &idx, &entry)) return false;
if(entry) {
// update existing entry
entry->val = *val;
return true;
}
// insert new entry
DictEntry* new_entry = c11_vector__emplace(&self->entries);
new_entry->hash = hash;
new_entry->key = *key;
new_entry->val = *val;
Dict__set_index(self, idx, self->entries.length - 1);
self->length++;
// check if we need to rehash
float load_factor = (float)self->length / self->capacity;
if(load_factor > (self->index_is_short ? 0.3f : 0.4f)) Dict__rehash_2x(self);
return true;
}
/// Delete an entry from the dict.
/// -1: error, 0: not found, 1: found and deleted
static int Dict__pop(Dict* self, py_Ref key) {
// Dict__log_index(self, "before pop");
uint64_t hash;
uint32_t idx;
DictEntry* entry;
if(!Dict__probe(self, key, &hash, &idx, &entry)) return -1;
if(!entry) return 0; // not found
// found the entry, delete and return it
py_assign(py_retval(), &entry->val);
Dict__set_index(self, idx, self->null_index_value);
py_newnil(&entry->key);
py_newnil(&entry->val);
self->length--;
/* tidy */
// https://github.com/OpenHFT/Chronicle-Map/blob/820573a68471509ffc1b0584454f4a67c0be1b84/src/main/java/net/openhft/chronicle/hash/impl/CompactOffHeapLinearHashTable.java#L156
uint32_t mask = self->capacity - 1;
uint32_t posToRemove = idx;
uint32_t posToShift = posToRemove;
// int probe_count = 0;
// int swap_count = 0;
while(true) {
posToShift = Dict__step(posToShift);
uint32_t idx_z = Dict__get_index(self, posToShift);
if(idx_z == self->null_index_value) break;
uint64_t hash_z = c11__at(DictEntry, &self->entries, idx_z)->hash;
uint32_t insertPos = (uint64_t)hash_z % self->capacity;
// the following condition essentially means circular permutations
// of three (r = posToRemove, s = posToShift, i = insertPos)
// positions are accepted:
// [...i..r...s.] or
// [...r..s...i.] or
// [...s..i...r.]
bool cond1 = insertPos <= posToRemove;
bool cond2 = posToRemove <= posToShift;
if((cond1 && cond2) ||
// chain wrapped around capacity
(posToShift < insertPos && (cond1 || cond2))) {
Dict__swap_null_index(self, posToRemove, posToShift);
posToRemove = posToShift;
// swap_count++;
}
// probe_count++;
}
// printf("Dict__pop: probe_count=%d, swap_count=%d\n", probe_count, swap_count);
// compact entries if necessary
if(self->entries.length > 16 && (self->length < self->entries.length >> 1)) {
Dict__compact_entries(self); // compact entries
}
// Dict__log_index(self, "after pop");
return 1;
}
static void DictIterator__ctor(DictIterator* self, Dict* dict, int mode) {
assert(mode >= 0 && mode <= 2);
self->dict = dict;
self->dict_backup = *dict; // backup the dict
self->curr = dict->entries.data;
self->end = self->curr + dict->entries.length;
self->mode = mode;
}
static DictEntry* DictIterator__next(DictIterator* self) {
DictEntry* retval;
do {
if(self->curr == self->end) return NULL;
retval = self->curr++;
} while(py_isnil(&retval->key));
return retval;
}
static bool DictIterator__modified(DictIterator* self) {
return memcmp(self->dict, &self->dict_backup, sizeof(Dict)) != 0;
}
///////////////////////////////
static bool dict__new__(int argc, py_Ref argv) {
py_Type cls = py_totype(argv);
int slots = cls == tp_dict ? 0 : -1;
Dict* ud = py_newobject(py_retval(), cls, slots, sizeof(Dict));
Dict__ctor(ud, 17, 4);
return true;
}
void py_newdict(py_OutRef out) {
Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
Dict__ctor(ud, 17, 4);
}
static bool dict__init__(int argc, py_Ref argv) {
if(argc > 2) return TypeError("dict.__init__() takes at most 2 arguments (%d given)", argc);
if(argc == 1) {
py_newnone(py_retval());
return true;
}
assert(argc == 2);
py_TValue* p;
int length = pk_arrayview(py_arg(1), &p);
if(length == -1) { return TypeError("dict.__init__() expects a list or tuple"); }
Dict* self = py_touserdata(argv);
for(int i = 0; i < length; i++) {
py_Ref tuple = &p[i];
if(!py_istuple(tuple) || py_tuple_len(tuple) != 2) {
return ValueError("dict.__init__() argument must be a list of tuple-2");
}
py_Ref key = py_tuple_getitem(tuple, 0);
py_Ref val = py_tuple_getitem(tuple, 1);
if(!Dict__set(self, key, val)) return false;
}
py_newnone(py_retval());
return true;
}
static bool dict__getitem__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
Dict* self = py_touserdata(argv);
DictEntry* entry;
if(!Dict__try_get(self, py_arg(1), &entry)) return false;
if(entry) {
*py_retval() = entry->val;
return true;
}
// try __missing__
py_Ref missing = py_tpfindmagic(argv->type, __missing__);
if(missing) return py_call(missing, argc, argv);
return KeyError(py_arg(1));
}
static bool dict__setitem__(int argc, py_Ref argv) {
PY_CHECK_ARGC(3);
Dict* self = py_touserdata(argv);
bool ok = Dict__set(self, py_arg(1), py_arg(2));
if(ok) py_newnone(py_retval());
return ok;
}
static bool dict__delitem__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
Dict* self = py_touserdata(argv);
int res = Dict__pop(self, py_arg(1));
if(res == 1) {
py_newnone(py_retval());
return true;
}
if(res == 0) return KeyError(py_arg(1));
return false;
}
static bool dict__contains__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
Dict* self = py_touserdata(argv);
DictEntry* entry;
if(!Dict__try_get(self, py_arg(1), &entry)) return false;
py_newbool(py_retval(), entry != NULL);
return true;
}
static bool dict__len__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
py_newint(py_retval(), self->length);
return true;
}
static bool dict__repr__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
c11_sbuf buf;
c11_sbuf__ctor(&buf);
c11_sbuf__write_char(&buf, '{');
bool is_first = true;
for(int i = 0; i < self->entries.length; i++) {
DictEntry* entry = c11__at(DictEntry, &self->entries, i);
if(py_isnil(&entry->key)) continue;
if(!is_first) c11_sbuf__write_cstr(&buf, ", ");
if(!py_repr(&entry->key)) return false;
c11_sbuf__write_sv(&buf, py_tosv(py_retval()));
c11_sbuf__write_cstr(&buf, ": ");
if(!py_repr(&entry->val)) return false;
c11_sbuf__write_sv(&buf, py_tosv(py_retval()));
is_first = false;
}
c11_sbuf__write_char(&buf, '}');
c11_sbuf__py_submit(&buf, py_retval());
return true;
}
static bool dict__eq__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
Dict* self = py_touserdata(py_arg(0));
if(!py_isdict(py_arg(1))) {
py_newnotimplemented(py_retval());
return true;
}
Dict* other = py_touserdata(py_arg(1));
if(self->length != other->length) {
py_newbool(py_retval(), false);
return true;
}
DictIterator iter;
DictIterator__ctor(&iter, self, 2);
// for each self key
while(1) {
DictEntry* entry = DictIterator__next(&iter);
if(!entry) break;
DictEntry* other_entry;
if(!Dict__try_get(other, &entry->key, &other_entry)) return false;
if(!other_entry) {
py_newbool(py_retval(), false);
return true;
}
if(entry->hash != other_entry->hash) {
py_newbool(py_retval(), false);
return true;
}
int res = py_equal(&entry->val, &other_entry->val);
if(res == 0) {
py_newbool(py_retval(), false);
return true;
}
if(res == -1) return false; // error
}
py_newbool(py_retval(), true);
return true;
}
static bool dict__ne__(int argc, py_Ref argv) {
if(!dict__eq__(argc, argv)) return false;
if(py_isbool(py_retval())) {
bool res = py_tobool(py_retval());
py_newbool(py_retval(), !res);
}
return true;
}
static bool dict_clear(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
Dict__clear(self);
py_newnone(py_retval());
return true;
}
static bool dict_copy(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
Dict* new_dict = py_newobject(py_retval(), tp_dict, 0, sizeof(Dict));
new_dict->length = self->length;
new_dict->capacity = self->capacity;
new_dict->null_index_value = self->null_index_value;
new_dict->index_is_short = self->index_is_short;
// copy entries
new_dict->entries = c11_vector__copy(&self->entries);
// copy indices
size_t indices_size = self->index_is_short ? self->capacity * sizeof(uint16_t)
: self->capacity * sizeof(uint32_t);
new_dict->indices = PK_MALLOC(indices_size);
memcpy(new_dict->indices, self->indices, indices_size);
return true;
}
static bool dict_update(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
PY_CHECK_ARG_TYPE(1, tp_dict);
Dict* self = py_touserdata(argv);
Dict* other = py_touserdata(py_arg(1));
for(int i = 0; i < other->entries.length; i++) {
DictEntry* entry = c11__at(DictEntry, &other->entries, i);
if(py_isnil(&entry->key)) continue;
if(!Dict__set(self, &entry->key, &entry->val)) return false;
}
py_newnone(py_retval());
return true;
}
static bool dict_get(int argc, py_Ref argv) {
Dict* self = py_touserdata(argv);
if(argc > 3) return TypeError("get() takes at most 3 arguments (%d given)", argc);
py_Ref default_val = argc == 3 ? py_arg(2) : py_None();
DictEntry* entry;
if(!Dict__try_get(self, py_arg(1), &entry)) return false;
*py_retval() = entry ? entry->val : *default_val;
return true;
}
static bool dict_pop(int argc, py_Ref argv) {
Dict* self = py_touserdata(argv);
if(argc < 2 || argc > 3) return TypeError("pop() takes 1 or 2 arguments (%d given)", argc - 1);
py_Ref default_val = argc == 3 ? py_arg(2) : py_None();
int res = Dict__pop(self, py_arg(1));
if(res == -1) return false;
if(res == 0) py_assign(py_retval(), default_val);
return true;
}
static bool dict_keys(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
DictIterator* ud = py_newobject(py_retval(), tp_dict_iterator, 1, sizeof(DictIterator));
DictIterator__ctor(ud, self, 0);
py_setslot(py_retval(), 0, argv); // keep a reference to the dict
return true;
}
static bool dict_values(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
DictIterator* ud = py_newobject(py_retval(), tp_dict_iterator, 1, sizeof(DictIterator));
DictIterator__ctor(ud, self, 1);
py_setslot(py_retval(), 0, argv); // keep a reference to the dict
return true;
}
static bool dict_items(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
Dict* self = py_touserdata(argv);
DictIterator* ud = py_newobject(py_retval(), tp_dict_iterator, 1, sizeof(DictIterator));
DictIterator__ctor(ud, self, 2);
py_setslot(py_retval(), 0, argv); // keep a reference to the dict
return true;
}
py_Type pk_dict__register() {
py_Type type = pk_newtype("dict", tp_object, NULL, (void (*)(void*))Dict__dtor, false, false);
py_bindmagic(type, __new__, dict__new__);
py_bindmagic(type, __init__, dict__init__);
py_bindmagic(type, __getitem__, dict__getitem__);
py_bindmagic(type, __setitem__, dict__setitem__);
py_bindmagic(type, __delitem__, dict__delitem__);
py_bindmagic(type, __contains__, dict__contains__);
py_bindmagic(type, __len__, dict__len__);
py_bindmagic(type, __repr__, dict__repr__);
py_bindmagic(type, __eq__, dict__eq__);
py_bindmagic(type, __ne__, dict__ne__);
py_bindmagic(type, __iter__, dict_keys);
py_bindmethod(type, "clear", dict_clear);
py_bindmethod(type, "copy", dict_copy);
py_bindmethod(type, "update", dict_update);
py_bindmethod(type, "get", dict_get);
py_bindmethod(type, "pop", dict_pop);
py_bindmethod(type, "keys", dict_keys);
py_bindmethod(type, "values", dict_values);
py_bindmethod(type, "items", dict_items);
py_setdict(py_tpobject(type), __hash__, py_None());
return type;
}
//////////////////////////
bool dict_items__next__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
DictIterator* iter = py_touserdata(py_arg(0));
if(DictIterator__modified(iter)) return RuntimeError("dictionary modified during iteration");
DictEntry* entry = (DictIterator__next(iter));
if(entry) {
switch(iter->mode) {
case 0: // keys
py_assign(py_retval(), &entry->key);
return true;
case 1: // values
py_assign(py_retval(), &entry->val);
return true;
case 2: // items
{
py_Ref p = py_newtuple(py_retval(), 2);
p[0] = entry->key;
p[1] = entry->val;
return true;
}
default: c11__unreachable();
}
}
return StopIteration();
}
bool dict_items__len__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
DictIterator* iter = py_touserdata(py_arg(0));
py_newint(py_retval(), iter->dict->length);
return true;
}
py_Type pk_dict_items__register() {
py_Type type = pk_newtype("dict_iterator", tp_object, NULL, NULL, false, true);
py_bindmagic(type, __iter__, pk_wrapper__self);
py_bindmagic(type, __next__, dict_items__next__);
py_bindmagic(type, __len__, dict_items__len__);
return type;
}
//////////////////////////
int py_dict_getitem(py_Ref self, py_Ref key) {
assert(py_isdict(self));
Dict* ud = py_touserdata(self);
DictEntry* entry;
if(!Dict__try_get(ud, key, &entry)) return -1;
if(entry) {
py_assign(py_retval(), &entry->val);
return 1;
}
return 0;
}
bool py_dict_setitem(py_Ref self, py_Ref key, py_Ref val) {
assert(py_isdict(self));
Dict* ud = py_touserdata(self);
return Dict__set(ud, key, val);
}
int py_dict_delitem(py_Ref self, py_Ref key) {
assert(py_isdict(self));
Dict* ud = py_touserdata(self);
return Dict__pop(ud, key);
}
int py_dict_getitem_by_str(py_Ref self, const char* key) {
py_Ref tmp = py_pushtmp();
py_newstr(tmp, key);
int res = py_dict_getitem(self, tmp);
py_pop();
return res;
}
bool py_dict_setitem_by_str(py_Ref self, const char* key, py_Ref val) {
py_Ref tmp = py_pushtmp();
py_newstr(tmp, key);
bool res = py_dict_setitem(self, tmp, val);
py_pop();
return res;
}
int py_dict_delitem_by_str(py_Ref self, const char* key) {
py_Ref tmp = py_pushtmp();
py_newstr(tmp, key);
int res = py_dict_delitem(self, tmp);
py_pop();
return res;
}
int py_dict_getitem_by_int(py_Ref self, py_i64 key) {
py_TValue tmp;
py_newint(&tmp, key);
return py_dict_getitem(self, &tmp);
}
bool py_dict_setitem_by_int(py_Ref self, py_i64 key, py_Ref val) {
py_TValue tmp;
py_newint(&tmp, key);
return py_dict_setitem(self, &tmp, val);
}
int py_dict_delitem_by_int(py_Ref self, py_i64 key) {
py_TValue tmp;
py_newint(&tmp, key);
return py_dict_delitem(self, &tmp);
}
int py_dict_len(py_Ref self) {
assert(py_isdict(self));
Dict* ud = py_touserdata(self);
return ud->length;
}
bool py_dict_apply(py_Ref self, bool (*f)(py_Ref, py_Ref, void*), void* ctx) {
Dict* ud = py_touserdata(self);
for(int i = 0; i < ud->entries.length; i++) {
DictEntry* entry = c11__at(DictEntry, &ud->entries, i);
if(py_isnil(&entry->key)) continue;
if(!f(&entry->key, &entry->val, ctx)) return false;
}
return true;
}
#undef Dict__step