refactor NameDict

This commit is contained in:
blueloveTH 2023-10-12 21:30:41 +08:00
parent ed26e08704
commit 99fbb7c736
6 changed files with 149 additions and 99 deletions

View File

@ -194,7 +194,7 @@ struct Py_<Function> final: PyObject {
Function _value; Function _value;
template<typename... Args> template<typename... Args>
Py_(Type type, Args&&... args): PyObject(type), _value(std::forward<Args>(args)...) { Py_(Type type, Args&&... args): PyObject(type), _value(std::forward<Args>(args)...) {
enable_instance_dict(); _enable_instance_dict();
} }
void _obj_gc_mark() override { void _obj_gc_mark() override {
_value.decl->_gc_mark(); _value.decl->_gc_mark();
@ -212,7 +212,7 @@ struct Py_<NativeFunc> final: PyObject {
NativeFunc _value; NativeFunc _value;
template<typename... Args> template<typename... Args>
Py_(Type type, Args&&... args): PyObject(type), _value(std::forward<Args>(args)...) { Py_(Type type, Args&&... args): PyObject(type), _value(std::forward<Args>(args)...) {
enable_instance_dict(); _enable_instance_dict();
} }
void _obj_gc_mark() override { void _obj_gc_mark() override {
if(_value.decl != nullptr){ if(_value.decl != nullptr){

View File

@ -20,61 +20,59 @@ struct SmallNameDict{
static const int kCapacity = 12; static const int kCapacity = 12;
int _size; bool _is_small;
std::pair<K, V> _items[kCapacity]; uint16_t _size;
K _keys[kCapacity];
V _values[kCapacity];
SmallNameDict(): _size(0) {} SmallNameDict(): _is_small(true), _size(0) {}
void set(K key, V val){
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key){
_items[i].second = val;
return;
}
}
#if PK_DEBUG_EXTRA_CHECK
if(_size == kCapacity){
throw std::runtime_error("SmallDict: capacity exceeded");
}
#endif
_items[_size++] = {key, val};
}
bool try_set(K key, V val){ bool try_set(K key, V val){
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
if(_items[i].first == key){ if(_keys[i] == key){
_items[i].second = val; _values[i] = val;
return true; return true;
} }
} }
return false; if(_size == kCapacity) return false;
_keys[_size] = key;
_values[_size] = val;
_size++;
return true;
} }
V operator[](K key) const { V operator[](K key) const {
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
if(_items[i].first == key) return _items[i].second; if(_keys[i] == key) return _values[i];
} }
throw std::out_of_range(fmt("SmallDict key not found: ", key)); throw std::out_of_range(fmt("SmallDict key not found: ", key));
} }
V get(K key) const { V try_get(K key) const {
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
if(_items[i].first == key) return _items[i].second; if(_keys[i] == key) return _values[i];
} }
return default_invalid_value<V>(); return default_invalid_value<V>();
} }
V* try_get_2(K key) {
for(int i=0; i<kCapacity; i++){
if(_keys[i] == key) return &_values[i];
}
return nullptr;
}
bool contains(K key) const { bool contains(K key) const {
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
if(_items[i].first == key) return true; if(_keys[i] == key) return true;
} }
return false; return false;
} }
bool del(K key){ bool del(K key){
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
if(_items[i].first == key){ if(_keys[i] == key){
_items[i].first = StrName(); _keys[i] = StrName();
_size--; _size--;
return true; return true;
} }
@ -85,19 +83,19 @@ struct SmallNameDict{
template<typename Func> template<typename Func>
void apply(Func func) const { void apply(Func func) const {
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
if(_items[i].first) func(_items[i].first, _items[i].second); if(!_keys[i].empty()) func(_keys[i], _values[i]);
} }
} }
void clear(){ void clear(){
for(int i=0; i<kCapacity; i++){ for(int i=0; i<kCapacity; i++){
_items[i].first = StrName(); _keys[i] = StrName();
} }
_size = 0; _size = 0;
} }
int size() const { return _size; } uint16_t size() const { return _size; }
int capacity() const { return kCapacity; } uint16_t capacity() const { return kCapacity; }
}; };
inline const uint16_t kHashSeeds[] = {9629, 43049, 13267, 59509, 39251, 1249, 27689, 9719, 19913}; inline const uint16_t kHashSeeds[] = {9629, 43049, 13267, 59509, 39251, 1249, 27689, 9719, 19913};
@ -109,13 +107,12 @@ inline uint16_t _hash(StrName key, uint16_t mask, uint16_t hash_seed){
uint16_t _find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys); uint16_t _find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys);
template<typename T> template<typename T>
struct NameDictImpl { struct LargeNameDict {
using Item = std::pair<StrName, T>; using Item = std::pair<StrName, T>;
static constexpr uint16_t __Capacity = 8; static constexpr uint16_t __Capacity = 32;
// ensure the initial capacity is ok for memory pool
static_assert(is_pod<T>::value); static_assert(is_pod<T>::value);
static_assert(sizeof(Item) * __Capacity <= 128);
bool _is_small;
float _load_factor; float _load_factor;
uint16_t _capacity; uint16_t _capacity;
uint16_t _size; uint16_t _size;
@ -144,34 +141,36 @@ while(!_items[i].first.empty()) { \
} }
#define NAMEDICT_ALLOC() \ #define NAMEDICT_ALLOC() \
_items = (Item*)pool128_alloc(_capacity * sizeof(Item)); \ _items = (Item*)malloc(_capacity * sizeof(Item)); \
memset(_items, 0, _capacity * sizeof(Item)); \ memset(_items, 0, _capacity * sizeof(Item)); \
NameDictImpl(float load_factor=0.67f): LargeNameDict(float load_factor=0.67f):
_is_small(false),
_load_factor(load_factor), _capacity(__Capacity), _size(0), _load_factor(load_factor), _capacity(__Capacity), _size(0),
_hash_seed(kHashSeeds[0]), _mask(__Capacity-1) { _hash_seed(kHashSeeds[0]), _mask(__Capacity-1) {
NAMEDICT_ALLOC() NAMEDICT_ALLOC()
} }
NameDictImpl(const NameDictImpl& other) { LargeNameDict(const LargeNameDict& other) {
memcpy(this, &other, sizeof(NameDictImpl)); memcpy(this, &other, sizeof(LargeNameDict));
NAMEDICT_ALLOC() NAMEDICT_ALLOC()
for(int i=0; i<_capacity; i++) _items[i] = other._items[i]; for(int i=0; i<_capacity; i++) _items[i] = other._items[i];
} }
NameDictImpl& operator=(const NameDictImpl& other) { LargeNameDict& operator=(const LargeNameDict& other) {
pool128_dealloc(_items); free(_items);
memcpy(this, &other, sizeof(NameDictImpl)); memcpy(this, &other, sizeof(LargeNameDict));
NAMEDICT_ALLOC() NAMEDICT_ALLOC()
for(int i=0; i<_capacity; i++) _items[i] = other._items[i]; for(int i=0; i<_capacity; i++) _items[i] = other._items[i];
return *this; return *this;
} }
~NameDictImpl(){ pool128_dealloc(_items); } ~LargeNameDict(){ free(_items); }
NameDictImpl(NameDictImpl&&) = delete; LargeNameDict(LargeNameDict&&) = delete;
NameDictImpl& operator=(NameDictImpl&&) = delete; LargeNameDict& operator=(LargeNameDict&&) = delete;
uint16_t size() const { return _size; } uint16_t size() const { return _size; }
uint16_t capacity() const { return _capacity; }
T operator[](StrName key) const { T operator[](StrName key) const {
bool ok; uint16_t i; bool ok; uint16_t i;
@ -209,7 +208,7 @@ while(!_items[i].first.empty()) { \
if(ok) FATAL_ERROR(); if(ok) FATAL_ERROR();
_items[j] = old_items[i]; _items[j] = old_items[i];
} }
pool128_dealloc(old_items); free(old_items);
} }
void _try_perfect_rehash(){ void _try_perfect_rehash(){
@ -220,11 +219,7 @@ while(!_items[i].first.empty()) { \
T try_get(StrName key) const{ T try_get(StrName key) const{
bool ok; uint16_t i; bool ok; uint16_t i;
HASH_PROBE_0(key, ok, i); HASH_PROBE_0(key, ok, i);
if(!ok){ if(!ok) return default_invalid_value<T>();
if constexpr(std::is_pointer_v<T>) return nullptr;
else if constexpr(std::is_same_v<int, T>) return -1;
else return Discarded();
}
return _items[i].second; return _items[i].second;
} }
@ -235,43 +230,20 @@ while(!_items[i].first.empty()) { \
return &_items[i].second; return &_items[i].second;
} }
bool try_set(StrName key, T val){
bool ok; uint16_t i;
HASH_PROBE_1(key, ok, i);
if(!ok) return false;
_items[i].second = val;
return true;
}
bool contains(StrName key) const { bool contains(StrName key) const {
bool ok; uint16_t i; bool ok; uint16_t i;
HASH_PROBE_0(key, ok, i); HASH_PROBE_0(key, ok, i);
return ok; return ok;
} }
void update(const NameDictImpl& other){ bool del(StrName key){
for(uint16_t i=0; i<other._capacity; i++){
auto& item = other._items[i];
if(!item.first.empty()) set(item.first, item.second);
}
}
void erase(StrName key){
bool ok; uint16_t i; bool ok; uint16_t i;
HASH_PROBE_0(key, ok, i); HASH_PROBE_0(key, ok, i);
if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key)); if(!ok) return false;
_items[i].first = StrName(); _items[i].first = StrName();
// _items[i].second = PY_DELETED_SLOT; // do not change .second if it is not zero, it means the slot is occupied by a deleted item // _items[i].second = PY_DELETED_SLOT; // do not change .second if it is not zero, it means the slot is occupied by a deleted item
_size--; _size--;
} return true;
std::vector<Item> items() const {
std::vector<Item> v;
for(uint16_t i=0; i<_capacity; i++){
if(_items[i].first.empty()) continue;
v.push_back(_items[i]);
}
return v;
} }
template<typename __Func> template<typename __Func>
@ -303,8 +275,89 @@ while(!_items[i].first.empty()) { \
#undef _hash #undef _hash
}; };
template<typename V>
struct NameDictImpl{
PK_ALWAYS_PASS_BY_POINTER(NameDictImpl)
union{
SmallNameDict<V> _small;
LargeNameDict<V> _large;
};
NameDictImpl(): _small() {}
NameDictImpl(float load_factor): _large(load_factor) {}
bool is_small() const{
const bool* p = reinterpret_cast<const bool*>(this);
return *p;
}
void set(StrName key, V val){
if(is_small()){
bool ok = _small.try_set(key, val);
if(!ok){
SmallNameDict<V> copied(_small);
// move to large name dict
new (&_large) LargeNameDict<V>();
copied.apply([&](StrName key, V val){
_large.set(key, val);
});
_large.set(key, val);
}
}else{
_large.set(key, val);
}
}
uint16_t size() const{ return is_small() ?_small.size() : _large.size(); }
uint16_t capacity() const{ return is_small() ?_small.capacity() : _large.capacity(); }
V operator[](StrName key) const { return is_small() ?_small[key] : _large[key]; }
V try_get(StrName key) const { return is_small() ?_small.try_get(key) : _large.try_get(key); }
V* try_get_2(StrName key) { return is_small() ?_small.try_get_2(key) : _large.try_get_2(key); }
bool contains(StrName key) const { return is_small() ?_small.contains(key) : _large.contains(key); }
bool del(StrName key){ return is_small() ?_small.del(key) : _large.del(key); }
void clear(){
if(is_small()) _small.clear();
else _large.clear();
}
template<typename Func>
void apply(Func func) const {
if(is_small()) _small.apply(func);
else _large.apply(func);
}
void _try_perfect_rehash(){
if(is_small()) return;
_large._try_perfect_rehash();
}
std::vector<StrName> keys() const{
std::vector<StrName> v;
apply([&](StrName key, V val){
v.push_back(key);
});
return v;
}
std::vector<std::pair<StrName, V>> items() const{
std::vector<std::pair<StrName, V>> v;
apply([&](StrName key, V val){
v.push_back({key, val});
});
return v;
}
~NameDictImpl(){
if(!is_small()) _large.~LargeNameDict<V>();
}
};
using NameDict = NameDictImpl<PyObject*>; using NameDict = NameDictImpl<PyObject*>;
using NameDict_ = std::shared_ptr<NameDict>; using NameDict_ = std::shared_ptr<NameDict>;
using NameDictInt = NameDictImpl<int>; using NameDictInt = NameDictImpl<int>;
static_assert(sizeof(NameDict) <= 128);
} // namespace pkpy } // namespace pkpy

View File

@ -151,8 +151,12 @@ struct PyObject{
virtual ~PyObject(); virtual ~PyObject();
void enable_instance_dict(float lf=kInstAttrLoadFactor) { void _enable_instance_dict() {
_attr = new(pool64_alloc<NameDict>()) NameDict(lf); _attr = new(pool128_alloc<NameDict>()) NameDict();
}
void _enable_instance_dict(float lf){
_attr = new(pool128_alloc<NameDict>()) NameDict(lf);
} }
}; };
@ -230,10 +234,9 @@ struct MappingProxy{
inline void gc_mark_namedict(NameDict& t){ inline void gc_mark_namedict(NameDict& t){
if(t.size() == 0) return; if(t.size() == 0) return;
for(uint16_t i=0; i<t._capacity; i++){ t.apply([](StrName name, PyObject* obj){
if(t._items[i].first.empty()) continue; PK_OBJ_MARK(obj);
PK_OBJ_MARK(t._items[i].second); });
}
} }
Str obj_type_name(VM* vm, Type type); Str obj_type_name(VM* vm, Type type);
@ -398,7 +401,7 @@ struct Py_<Super> final: PyObject {
template<> template<>
struct Py_<DummyInstance> final: PyObject { struct Py_<DummyInstance> final: PyObject {
Py_(Type type): PyObject(type) { Py_(Type type): PyObject(type) {
enable_instance_dict(); _enable_instance_dict();
} }
void _obj_gc_mark() override {} void _obj_gc_mark() override {}
void* _value_ptr() override { return nullptr; } void* _value_ptr() override { return nullptr; }
@ -408,7 +411,7 @@ template<>
struct Py_<Type> final: PyObject { struct Py_<Type> final: PyObject {
Type _value; Type _value;
Py_(Type type, Type val): PyObject(type), _value(val) { Py_(Type type, Type val): PyObject(type), _value(val) {
enable_instance_dict(kTypeAttrLoadFactor); _enable_instance_dict(kTypeAttrLoadFactor);
} }
void _obj_gc_mark() override {} void _obj_gc_mark() override {}
void* _value_ptr() override { return &_value; } void* _value_ptr() override { return &_value; }
@ -417,7 +420,7 @@ struct Py_<Type> final: PyObject {
template<> template<>
struct Py_<DummyModule> final: PyObject { struct Py_<DummyModule> final: PyObject {
Py_(Type type): PyObject(type) { Py_(Type type): PyObject(type) {
enable_instance_dict(kTypeAttrLoadFactor); _enable_instance_dict(kTypeAttrLoadFactor);
} }
void _obj_gc_mark() override {} void _obj_gc_mark() override {}
void* _value_ptr() override { return nullptr; } void* _value_ptr() override { return nullptr; }

View File

@ -210,24 +210,18 @@ __NEXT_STEP:;
if(slot == nullptr) vm->UnboundLocalError(_name); if(slot == nullptr) vm->UnboundLocalError(_name);
*slot = PY_NULL; *slot = PY_NULL;
}else{ }else{
if(!frame->f_globals().contains(_name)) vm->NameError(_name); if(!frame->f_globals().del(_name)) vm->NameError(_name);
frame->f_globals().erase(_name);
} }
DISPATCH(); DISPATCH();
TARGET(DELETE_GLOBAL) TARGET(DELETE_GLOBAL)
_name = StrName(byte.arg); _name = StrName(byte.arg);
if(frame->f_globals().contains(_name)){ if(!frame->f_globals().del(_name)) vm->NameError(_name);
frame->f_globals().erase(_name);
}else{
NameError(_name);
}
DISPATCH(); DISPATCH();
TARGET(DELETE_ATTR) TARGET(DELETE_ATTR)
_0 = POPX(); _0 = POPX();
_name = StrName(byte.arg); _name = StrName(byte.arg);
if(is_tagged(_0) || !_0->is_attr_valid()) TypeError("cannot delete attribute"); if(is_tagged(_0) || !_0->is_attr_valid()) TypeError("cannot delete attribute");
if(!_0->attr().contains(_name)) AttributeError(_0, _name); if(!_0->attr().del(_name)) AttributeError(_0, _name);
_0->attr().erase(_name);
DISPATCH(); DISPATCH();
TARGET(DELETE_SUBSCR) TARGET(DELETE_SUBSCR)
_1 = POPX(); _1 = POPX();

View File

@ -4,6 +4,6 @@ namespace pkpy{
PyObject::~PyObject() { PyObject::~PyObject() {
if(_attr == nullptr) return; if(_attr == nullptr) return;
_attr->~NameDict(); _attr->~NameDict();
pool64_dealloc(_attr); pool128_dealloc(_attr);
} }
} // namespace pkpy } // namespace pkpy

View File

@ -377,7 +377,7 @@ void init_builtins(VM* _vm) {
if(self->is_attr_valid()){ if(self->is_attr_valid()){
vm->TypeError("object: instance dict is already enabled"); vm->TypeError("object: instance dict is already enabled");
} }
self->enable_instance_dict(); self->_enable_instance_dict();
return vm->None; return vm->None;
}); });