refactor NameDict

This commit is contained in:
blueloveTH 2023-10-12 21:30:41 +08:00
parent ed26e08704
commit 99fbb7c736
6 changed files with 149 additions and 99 deletions

View File

@ -194,7 +194,7 @@ struct Py_<Function> final: PyObject {
Function _value;
template<typename... Args>
Py_(Type type, Args&&... args): PyObject(type), _value(std::forward<Args>(args)...) {
enable_instance_dict();
_enable_instance_dict();
}
void _obj_gc_mark() override {
_value.decl->_gc_mark();
@ -212,7 +212,7 @@ struct Py_<NativeFunc> final: PyObject {
NativeFunc _value;
template<typename... Args>
Py_(Type type, Args&&... args): PyObject(type), _value(std::forward<Args>(args)...) {
enable_instance_dict();
_enable_instance_dict();
}
void _obj_gc_mark() override {
if(_value.decl != nullptr){

View File

@ -20,61 +20,59 @@ struct SmallNameDict{
static const int kCapacity = 12;
int _size;
std::pair<K, V> _items[kCapacity];
bool _is_small;
uint16_t _size;
K _keys[kCapacity];
V _values[kCapacity];
SmallNameDict(): _size(0) {}
void set(K key, V val){
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key){
_items[i].second = val;
return;
}
}
#if PK_DEBUG_EXTRA_CHECK
if(_size == kCapacity){
throw std::runtime_error("SmallDict: capacity exceeded");
}
#endif
_items[_size++] = {key, val};
}
SmallNameDict(): _is_small(true), _size(0) {}
bool try_set(K key, V val){
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key){
_items[i].second = val;
if(_keys[i] == key){
_values[i] = val;
return true;
}
}
return false;
if(_size == kCapacity) return false;
_keys[_size] = key;
_values[_size] = val;
_size++;
return true;
}
V operator[](K key) const {
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key) return _items[i].second;
if(_keys[i] == key) return _values[i];
}
throw std::out_of_range(fmt("SmallDict key not found: ", key));
}
V get(K key) const {
V try_get(K key) const {
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key) return _items[i].second;
if(_keys[i] == key) return _values[i];
}
return default_invalid_value<V>();
}
V* try_get_2(K key) {
for(int i=0; i<kCapacity; i++){
if(_keys[i] == key) return &_values[i];
}
return nullptr;
}
bool contains(K key) const {
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key) return true;
if(_keys[i] == key) return true;
}
return false;
}
bool del(K key){
for(int i=0; i<kCapacity; i++){
if(_items[i].first == key){
_items[i].first = StrName();
if(_keys[i] == key){
_keys[i] = StrName();
_size--;
return true;
}
@ -85,19 +83,19 @@ struct SmallNameDict{
template<typename Func>
void apply(Func func) const {
for(int i=0; i<kCapacity; i++){
if(_items[i].first) func(_items[i].first, _items[i].second);
if(!_keys[i].empty()) func(_keys[i], _values[i]);
}
}
void clear(){
for(int i=0; i<kCapacity; i++){
_items[i].first = StrName();
_keys[i] = StrName();
}
_size = 0;
}
int size() const { return _size; }
int capacity() const { return kCapacity; }
uint16_t size() const { return _size; }
uint16_t capacity() const { return kCapacity; }
};
inline const uint16_t kHashSeeds[] = {9629, 43049, 13267, 59509, 39251, 1249, 27689, 9719, 19913};
@ -109,13 +107,12 @@ inline uint16_t _hash(StrName key, uint16_t mask, uint16_t hash_seed){
uint16_t _find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys);
template<typename T>
struct NameDictImpl {
struct LargeNameDict {
using Item = std::pair<StrName, T>;
static constexpr uint16_t __Capacity = 8;
// ensure the initial capacity is ok for memory pool
static constexpr uint16_t __Capacity = 32;
static_assert(is_pod<T>::value);
static_assert(sizeof(Item) * __Capacity <= 128);
bool _is_small;
float _load_factor;
uint16_t _capacity;
uint16_t _size;
@ -144,34 +141,36 @@ while(!_items[i].first.empty()) { \
}
#define NAMEDICT_ALLOC() \
_items = (Item*)pool128_alloc(_capacity * sizeof(Item)); \
_items = (Item*)malloc(_capacity * sizeof(Item)); \
memset(_items, 0, _capacity * sizeof(Item)); \
NameDictImpl(float load_factor=0.67f):
LargeNameDict(float load_factor=0.67f):
_is_small(false),
_load_factor(load_factor), _capacity(__Capacity), _size(0),
_hash_seed(kHashSeeds[0]), _mask(__Capacity-1) {
NAMEDICT_ALLOC()
}
NameDictImpl(const NameDictImpl& other) {
memcpy(this, &other, sizeof(NameDictImpl));
LargeNameDict(const LargeNameDict& other) {
memcpy(this, &other, sizeof(LargeNameDict));
NAMEDICT_ALLOC()
for(int i=0; i<_capacity; i++) _items[i] = other._items[i];
}
NameDictImpl& operator=(const NameDictImpl& other) {
pool128_dealloc(_items);
memcpy(this, &other, sizeof(NameDictImpl));
LargeNameDict& operator=(const LargeNameDict& other) {
free(_items);
memcpy(this, &other, sizeof(LargeNameDict));
NAMEDICT_ALLOC()
for(int i=0; i<_capacity; i++) _items[i] = other._items[i];
return *this;
}
~NameDictImpl(){ pool128_dealloc(_items); }
~LargeNameDict(){ free(_items); }
NameDictImpl(NameDictImpl&&) = delete;
NameDictImpl& operator=(NameDictImpl&&) = delete;
LargeNameDict(LargeNameDict&&) = delete;
LargeNameDict& operator=(LargeNameDict&&) = delete;
uint16_t size() const { return _size; }
uint16_t capacity() const { return _capacity; }
T operator[](StrName key) const {
bool ok; uint16_t i;
@ -209,7 +208,7 @@ while(!_items[i].first.empty()) { \
if(ok) FATAL_ERROR();
_items[j] = old_items[i];
}
pool128_dealloc(old_items);
free(old_items);
}
void _try_perfect_rehash(){
@ -220,11 +219,7 @@ while(!_items[i].first.empty()) { \
T try_get(StrName key) const{
bool ok; uint16_t i;
HASH_PROBE_0(key, ok, i);
if(!ok){
if constexpr(std::is_pointer_v<T>) return nullptr;
else if constexpr(std::is_same_v<int, T>) return -1;
else return Discarded();
}
if(!ok) return default_invalid_value<T>();
return _items[i].second;
}
@ -235,43 +230,20 @@ while(!_items[i].first.empty()) { \
return &_items[i].second;
}
bool try_set(StrName key, T val){
bool ok; uint16_t i;
HASH_PROBE_1(key, ok, i);
if(!ok) return false;
_items[i].second = val;
return true;
}
bool contains(StrName key) const {
bool ok; uint16_t i;
HASH_PROBE_0(key, ok, i);
return ok;
}
void update(const NameDictImpl& other){
for(uint16_t i=0; i<other._capacity; i++){
auto& item = other._items[i];
if(!item.first.empty()) set(item.first, item.second);
}
}
void erase(StrName key){
bool del(StrName key){
bool ok; uint16_t i;
HASH_PROBE_0(key, ok, i);
if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
if(!ok) return false;
_items[i].first = StrName();
// _items[i].second = PY_DELETED_SLOT; // do not change .second if it is not zero, it means the slot is occupied by a deleted item
_size--;
}
std::vector<Item> items() const {
std::vector<Item> v;
for(uint16_t i=0; i<_capacity; i++){
if(_items[i].first.empty()) continue;
v.push_back(_items[i]);
}
return v;
return true;
}
template<typename __Func>
@ -303,8 +275,89 @@ while(!_items[i].first.empty()) { \
#undef _hash
};
template<typename V>
struct NameDictImpl{
PK_ALWAYS_PASS_BY_POINTER(NameDictImpl)
union{
SmallNameDict<V> _small;
LargeNameDict<V> _large;
};
NameDictImpl(): _small() {}
NameDictImpl(float load_factor): _large(load_factor) {}
bool is_small() const{
const bool* p = reinterpret_cast<const bool*>(this);
return *p;
}
void set(StrName key, V val){
if(is_small()){
bool ok = _small.try_set(key, val);
if(!ok){
SmallNameDict<V> copied(_small);
// move to large name dict
new (&_large) LargeNameDict<V>();
copied.apply([&](StrName key, V val){
_large.set(key, val);
});
_large.set(key, val);
}
}else{
_large.set(key, val);
}
}
uint16_t size() const{ return is_small() ?_small.size() : _large.size(); }
uint16_t capacity() const{ return is_small() ?_small.capacity() : _large.capacity(); }
V operator[](StrName key) const { return is_small() ?_small[key] : _large[key]; }
V try_get(StrName key) const { return is_small() ?_small.try_get(key) : _large.try_get(key); }
V* try_get_2(StrName key) { return is_small() ?_small.try_get_2(key) : _large.try_get_2(key); }
bool contains(StrName key) const { return is_small() ?_small.contains(key) : _large.contains(key); }
bool del(StrName key){ return is_small() ?_small.del(key) : _large.del(key); }
void clear(){
if(is_small()) _small.clear();
else _large.clear();
}
template<typename Func>
void apply(Func func) const {
if(is_small()) _small.apply(func);
else _large.apply(func);
}
void _try_perfect_rehash(){
if(is_small()) return;
_large._try_perfect_rehash();
}
std::vector<StrName> keys() const{
std::vector<StrName> v;
apply([&](StrName key, V val){
v.push_back(key);
});
return v;
}
std::vector<std::pair<StrName, V>> items() const{
std::vector<std::pair<StrName, V>> v;
apply([&](StrName key, V val){
v.push_back({key, val});
});
return v;
}
~NameDictImpl(){
if(!is_small()) _large.~LargeNameDict<V>();
}
};
using NameDict = NameDictImpl<PyObject*>;
using NameDict_ = std::shared_ptr<NameDict>;
using NameDictInt = NameDictImpl<int>;
static_assert(sizeof(NameDict) <= 128);
} // namespace pkpy

View File

@ -151,8 +151,12 @@ struct PyObject{
virtual ~PyObject();
void enable_instance_dict(float lf=kInstAttrLoadFactor) {
_attr = new(pool64_alloc<NameDict>()) NameDict(lf);
void _enable_instance_dict() {
_attr = new(pool128_alloc<NameDict>()) NameDict();
}
void _enable_instance_dict(float lf){
_attr = new(pool128_alloc<NameDict>()) NameDict(lf);
}
};
@ -230,10 +234,9 @@ struct MappingProxy{
inline void gc_mark_namedict(NameDict& t){
if(t.size() == 0) return;
for(uint16_t i=0; i<t._capacity; i++){
if(t._items[i].first.empty()) continue;
PK_OBJ_MARK(t._items[i].second);
}
t.apply([](StrName name, PyObject* obj){
PK_OBJ_MARK(obj);
});
}
Str obj_type_name(VM* vm, Type type);
@ -398,7 +401,7 @@ struct Py_<Super> final: PyObject {
template<>
struct Py_<DummyInstance> final: PyObject {
Py_(Type type): PyObject(type) {
enable_instance_dict();
_enable_instance_dict();
}
void _obj_gc_mark() override {}
void* _value_ptr() override { return nullptr; }
@ -408,7 +411,7 @@ template<>
struct Py_<Type> final: PyObject {
Type _value;
Py_(Type type, Type val): PyObject(type), _value(val) {
enable_instance_dict(kTypeAttrLoadFactor);
_enable_instance_dict(kTypeAttrLoadFactor);
}
void _obj_gc_mark() override {}
void* _value_ptr() override { return &_value; }
@ -417,7 +420,7 @@ struct Py_<Type> final: PyObject {
template<>
struct Py_<DummyModule> final: PyObject {
Py_(Type type): PyObject(type) {
enable_instance_dict(kTypeAttrLoadFactor);
_enable_instance_dict(kTypeAttrLoadFactor);
}
void _obj_gc_mark() override {}
void* _value_ptr() override { return nullptr; }

View File

@ -210,24 +210,18 @@ __NEXT_STEP:;
if(slot == nullptr) vm->UnboundLocalError(_name);
*slot = PY_NULL;
}else{
if(!frame->f_globals().contains(_name)) vm->NameError(_name);
frame->f_globals().erase(_name);
if(!frame->f_globals().del(_name)) vm->NameError(_name);
}
DISPATCH();
TARGET(DELETE_GLOBAL)
_name = StrName(byte.arg);
if(frame->f_globals().contains(_name)){
frame->f_globals().erase(_name);
}else{
NameError(_name);
}
if(!frame->f_globals().del(_name)) vm->NameError(_name);
DISPATCH();
TARGET(DELETE_ATTR)
_0 = POPX();
_name = StrName(byte.arg);
if(is_tagged(_0) || !_0->is_attr_valid()) TypeError("cannot delete attribute");
if(!_0->attr().contains(_name)) AttributeError(_0, _name);
_0->attr().erase(_name);
if(!_0->attr().del(_name)) AttributeError(_0, _name);
DISPATCH();
TARGET(DELETE_SUBSCR)
_1 = POPX();

View File

@ -4,6 +4,6 @@ namespace pkpy{
PyObject::~PyObject() {
if(_attr == nullptr) return;
_attr->~NameDict();
pool64_dealloc(_attr);
pool128_dealloc(_attr);
}
} // namespace pkpy

View File

@ -377,7 +377,7 @@ void init_builtins(VM* _vm) {
if(self->is_attr_valid()){
vm->TypeError("object: instance dict is already enabled");
}
self->enable_instance_dict();
self->_enable_instance_dict();
return vm->None;
});