up

rename
This commit is contained in:
blueloveTH 2023-02-23 07:54:31 +08:00
parent fc8503cb45
commit ee46388841
8 changed files with 86 additions and 37 deletions

View File

@ -102,6 +102,7 @@ PyVar VM::run_frame(Frame* frame){
const pkpy::Function& f = PyFunction_AS_C(fn);
setattr(cls, f.name, fn);
}
cls->attr()._try_perfect_rehash();
} continue;
case OP_RETURN_VALUE: return frame->pop_value(this);
case OP_PRINT_EXPR: {

View File

@ -65,7 +65,8 @@ struct CodeObject {
std::vector<CodeBlock> blocks = { CodeBlock{NO_BLOCK, -1} };
std::map<StrName, int> labels;
int ideal_locals_capacity = 4;
uint32_t perfect_locals_capacity = 2;
uint32_t perfect_hash_seed = 0xffffffff;
void optimize(VM* vm);

View File

@ -24,6 +24,7 @@
#include <functional>
#include <iostream>
#include <map>
#include <set>
#include <algorithm>
// #include <filesystem>
// namespace fs = std::filesystem;

View File

@ -389,7 +389,7 @@ private:
_compile_f_args(func, false);
consume(TK(":"));
}
func.code = pkpy::make_shared<CodeObject>(parser->src, func.name);
func.code = pkpy::make_shared<CodeObject>(parser->src, func.name.str());
this->codes.push(func.code);
co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1;
emit(OP_RETURN_VALUE);
@ -1024,7 +1024,7 @@ __LISTCOMP:
consume(TK(")"));
}
if(match(TK("->"))) consume(TK("@id")); // eat type hints
func.code = pkpy::make_shared<CodeObject>(parser->src, func.name);
func.code = pkpy::make_shared<CodeObject>(parser->src, func.name.str());
this->codes.push(func.code);
compile_block_body();
func.code->optimize(vm);

View File

@ -5,6 +5,36 @@
#include "str.h"
namespace pkpy{
const std::vector<uint32_t> kHashSeeds = {3452556591, 3259656564, 3106121857, 2774518055, 4085946151, 4274771677, 4047908201, 2149081045, 4160441109, 4127125901, 3109730425, 2794687362, 2806137727, 2642447290, 4070996945, 3580743775, 3719956858, 2960278187, 3568486238, 3125361093, 2232173865, 4043238260, 3265527710, 2206062780, 3968387223, 3144295694, 3293736932, 3196583945, 3832534010, 3311528523, 4258510773, 4049882022, 3058077580, 2446794117, 2330081744, 2563269634, 3848248775, 2197398712, 2874906918, 3012473024, 3477039876, 2710692860, 2806508231, 3893239503, 3929140074, 3145323261, 3593960112, 2451662716, 2545939029, 2475647797, 2790321726, 4166873680, 3504262692, 3140715282, 3078827310, 3177714229, 3006241931, 3777800785, 3621627818, 3163832382, 2166076714, 3622591406, 3299007679, 2915427082, 3939911590, 4145015468, 2791077264, 3916399405, 3330576709, 2466029172, 3534773842, 2690327419, 2487859383, 3687001303, 2615131117, 3057598651, 2548471802, 3145782646, 3895406770, 2150621965, 2179753887, 2159855306, 2439700132, 2397760304, 3405860607, 4268549710, 2779408554, 2485874456, 3796299954, 4179315997, 2380599704, 3210079474, 3951990603, 3342489194, 2997361581, 3576131817, 3163713423, 2467495451, 4190562029, 2588496185};
const std::vector<uint32_t> kPrimes = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599};
uint32_t find_next_prime(uint32_t n){
auto it = std::lower_bound(kPrimes.begin(), kPrimes.end(), n);
if(it == kPrimes.end()) return n;
return *it;
}
inline uint32_t _hash(StrName key, uint32_t capacity, uint32_t hash_seed){
uint32_t i = key.index * (uint32_t)2654435761;
return (i ^ hash_seed) % capacity;
}
uint32_t find_perfect_hash_seed(uint32_t capacity, const std::vector<StrName>& keys){
if(keys.empty()) return 0xffffffff;
std::set<uint32_t> indices;
std::vector<std::pair<uint32_t, float>> scores;
for(int i=0; i<kHashSeeds.size(); i++){
indices.clear();
for(auto key: keys){
uint32_t index = _hash(key, capacity, kHashSeeds[i]);
indices.insert(index);
}
float score = indices.size() / (float)keys.size();
scores.push_back({kHashSeeds[i], score});
}
std::sort(scores.begin(), scores.end(), [](auto a, auto b){ return a.second > b.second; });
return scores[0].first;
}
struct NameDictNode{
StrName first;
@ -13,45 +43,45 @@ namespace pkpy{
};
struct NameDict {
int _capacity;
int _size;
uint32_t _capacity;
uint32_t _size;
float _load_factor;
uint32_t _hash_seed;
NameDictNode* _a;
NameDict(int capacity=4, float load_factor=0.67):
_capacity(capacity), _size(0), _load_factor(load_factor) {
_a = new NameDictNode[_capacity];
}
NameDict(uint32_t capacity=2, float load_factor=0.67, uint32_t hash_seed=0xffffffff):
_capacity(capacity), _size(0), _load_factor(load_factor),
_hash_seed(hash_seed), _a(new NameDictNode[capacity]) {}
NameDict(const NameDict& other) {
this->_capacity = other._capacity;
this->_size = other._size;
this->_load_factor = other._load_factor;
this->_hash_seed = other._hash_seed;
this->_a = new NameDictNode[_capacity];
for(int i=0; i<_capacity; i++) _a[i] = other._a[i];
for(uint32_t i=0; i<_capacity; i++) _a[i] = other._a[i];
}
NameDict& operator=(const NameDict&) = delete;
NameDict(NameDict&&) = delete;
NameDict& operator=(NameDict&&) = delete;
int size() const { return _size; }
uint32_t size() const { return _size; }
//https://github.com/python/cpython/blob/main/Objects/dictobject.c#L175
#define HASH_PROBE(key, ok, i) \
int i = (key).index & (_capacity-1); \
bool ok = false; \
bool ok = false; uint32_t i; \
i = _hash(key, _capacity, _hash_seed); \
while(!_a[i].empty()) { \
if(_a[i].first == (key)) { ok = true; break; } \
i = (5*i + 1) & (_capacity-1); \
i = (i + 1) % _capacity; \
}
#define HASH_PROBE_OVERRIDE(key, ok, i) \
i = (key).index & (_capacity-1); \
ok = false; \
i = _hash(key, _capacity, _hash_seed); \
while(!_a[i].empty()) { \
if(_a[i].first == (key)) { ok = true; break; } \
i = (5*i + 1) & (_capacity-1); \
i = (i + 1) % _capacity; \
}
const PyVar& operator[](StrName key) const {
@ -66,20 +96,20 @@ namespace pkpy{
_a[i].first = key;
_size++;
if(_size > _capacity * _load_factor){
_rehash_2x();
_rehash(true);
HASH_PROBE_OVERRIDE(key, ok, i);
}
}
return _a[i].second;
}
void _rehash_2x(){
void _rehash(bool resize){
NameDictNode* old_a = _a;
int old_capacity = _capacity;
_capacity *= 2;
uint32_t old_capacity = _capacity;
if(resize) _capacity = find_next_prime(_capacity * 2);
_size = 0;
_a = new NameDictNode[_capacity];
for(int i=0; i<old_capacity; i++){
for(uint32_t i=0; i<old_capacity; i++){
if(old_a[i].empty()) continue;
HASH_PROBE(old_a[i].first, ok, j);
if(ok) UNREACHABLE();
@ -90,6 +120,16 @@ namespace pkpy{
delete[] old_a;
}
void _try_perfect_rehash(){
std::vector<StrName> keys;
for(uint32_t i=0; i<_capacity; i++){
if(_a[i].empty()) continue;
keys.push_back(_a[i].first);
}
_hash_seed = find_perfect_hash_seed(_capacity, keys);
_rehash(false); // do not resize
}
inline PyVar* try_get(StrName key){
HASH_PROBE(key, ok, i);
if(!ok) return nullptr;
@ -112,9 +152,9 @@ namespace pkpy{
struct iterator {
const NameDict* _dict;
int i;
uint32_t i;
iterator() = default;
iterator(const NameDict* dict, int i): _dict(dict), i(i) { _skip_empty(); }
iterator(const NameDict* dict, uint32_t i): _dict(dict), i(i) { _skip_empty(); }
inline void _skip_empty(){ while(i < _dict->_capacity && _dict->_a[i].empty()) i++;}
inline iterator& operator++(){ i++; _skip_empty(); return *this;}
@ -131,7 +171,7 @@ namespace pkpy{
_a[i].first = key;
_size++;
if(_size > _capacity * _load_factor){
_rehash_2x();
_rehash(true);
HASH_PROBE_OVERRIDE(key, ok, i);
}
}

View File

@ -22,7 +22,7 @@ struct NativeFunc {
};
struct Function {
Str name;
StrName name;
CodeObject_ code;
std::vector<StrName> args;
StrName starred_arg; // empty if no *arg
@ -99,9 +99,9 @@ struct Py_ : PyObject {
inline void _init() noexcept {
if constexpr (std::is_same_v<T, Type> || std::is_same_v<T, DummyModule>) {
_attr = new pkpy::NameDict(8, kTypeAttrLoadFactor);
_attr = new pkpy::NameDict(5, kTypeAttrLoadFactor);
}else if constexpr(std::is_same_v<T, DummyInstance>){
_attr = new pkpy::NameDict(4, kInstAttrLoadFactor);
_attr = new pkpy::NameDict(5, kInstAttrLoadFactor);
}else{
_attr = nullptr;
}

View File

@ -133,7 +133,7 @@ bool is_unicode_Lo_char(uint32_t c) {
struct StrName {
int index;
uint32_t index;
StrName(): index(-1) {}
StrName(int index): index(index) {}
StrName(const char* s): index(get(s).index) {}

View File

@ -152,7 +152,9 @@ public:
} else if(is_type(*callable, tp_function)){
const pkpy::Function& fn = PyFunction_AS_C(*callable);
auto locals = pkpy::make_shared<pkpy::NameDict>(
fn.code->ideal_locals_capacity, kLocalsLoadFactor
fn.code->perfect_locals_capacity,
kLocalsLoadFactor,
fn.code->perfect_hash_seed
);
int i = 0;
@ -184,7 +186,7 @@ public:
for(int i=0; i<kwargs.size(); i+=2){
const Str& key = PyStr_AS_C(kwargs[i]);
if(!fn.kwargs.contains(key)){
TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.name + "()");
TypeError(key.escape(true) + " is an invalid keyword argument for " + fn.name.str() + "()");
}
locals->emplace(key, kwargs[i+1]);
}
@ -647,6 +649,10 @@ public:
}
post_init();
for(auto it = _types.begin(); it != _types.end(); ++it){
it->second->attr()._try_perfect_rehash();
}
builtins->attr()._try_perfect_rehash();
}
void post_init();
@ -727,6 +733,7 @@ public:
PyVar type = new_type_object(mod, T::_name(), _t(tp_object));
if(OBJ_NAME(mod) != T::_mod()) UNREACHABLE();
T::_register(this, mod, type);
type->attr()._try_perfect_rehash();
return type;
}
@ -867,12 +874,11 @@ PyVar pkpy::NativeFunc::operator()(VM* vm, pkpy::Args& args) const{
}
void CodeObject::optimize(VM* vm){
int n = 0;
for(auto& p: names) if(p.second == NAME_LOCAL) n++;
// 0->2, 1->2, 2->4, 3->4, 4->8, 5->8, 6->16
int base_n = (int)(n / kLocalsLoadFactor + 0.5);
ideal_locals_capacity = 2;
while(ideal_locals_capacity < base_n) ideal_locals_capacity *= 2;
std::vector<StrName> keys;
for(auto& p: names) if(p.second == NAME_LOCAL) keys.push_back(p.first);
uint32_t base_n = (uint32_t)(keys.size() / kLocalsLoadFactor + 0.5);
perfect_locals_capacity = pkpy::find_next_prime(base_n);
perfect_hash_seed = pkpy::find_perfect_hash_seed(perfect_locals_capacity, keys);
for(int i=1; i<codes.size(); i++){
if(codes[i].op == OP_UNARY_NEGATIVE && codes[i-1].op == OP_LOAD_CONST){