From a8db1cc5e1d6ce7364ba91aa70656563232f5ae0 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 15 Jun 2024 23:22:06 +0800 Subject: [PATCH] more move --- include/pocketpy/common/smallmap.h | 4 +-- include/pocketpy/common/str.h | 4 +++ include/pocketpy/common/str.hpp | 20 ++++++++--- include/pocketpy/common/strname.h | 18 ++++++++++ include/pocketpy/common/vector.h | 4 +++ src/common/smallmap.c | 4 +-- src/common/str.cpp | 31 ---------------- src/common/strname.c | 57 ++++++++++++++++++++++++++++++ src/common/vector.c | 7 ++++ src/compiler/lexer.cpp | 20 +++++------ src/interpreter/vm.cpp | 1 + src/objects/namedict.cpp | 4 ++- 12 files changed, 123 insertions(+), 51 deletions(-) create mode 100644 include/pocketpy/common/strname.h create mode 100644 src/common/strname.c diff --git a/include/pocketpy/common/smallmap.h b/include/pocketpy/common/smallmap.h index 3b2e04f7..c7b6cb6d 100644 --- a/include/pocketpy/common/smallmap.h +++ b/include/pocketpy/common/smallmap.h @@ -18,8 +18,8 @@ extern "C" { #define SMALLMAP_T__HEADER #define K c11_string -#define V int -#define TAG s2i +#define V uint16_t +#define TAG s2n #define less(a, b) (c11_string__cmp((a.key), (b)) < 0) #define equal(a, b) (c11_string__cmp((a), (b)) == 0) #include "pocketpy/xmacros/smallmap.h" diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index bf68929f..ece9a51b 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -32,6 +32,10 @@ PK_INLINE const char* pkpy_Str__data(const pkpy_Str* self){ return self->is_sso ? self->_inlined : self->_ptr; } +PK_INLINE c11_string pkpy_Str__sv(const pkpy_Str* self){ + return (c11_string){pkpy_Str__data(self), self->size}; +} + void pkpy_Str__ctor(pkpy_Str* self, const char* data); void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size); void pkpy_Str__dtor(pkpy_Str* self); diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index b8ba24bc..2ab20865 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -6,6 +6,7 @@ #include "pocketpy/common/vector.h" #include "pocketpy/common/vector.hpp" #include "pocketpy/common/str.h" +#include "pocketpy/common/strname.h" #include #include @@ -261,9 +262,6 @@ struct StrName { StrName(const Str& s) : index(get(s.sv()).index) {} - std::string_view sv() const; - const char* c_str() const; - bool empty() const { return index == 0; } Str escape() const { return Str(sv()).escape(); } @@ -276,8 +274,20 @@ struct StrName { bool operator> (const StrName& other) const noexcept { return sv() > other.sv(); } - static StrName get(std::string_view s); - static uint32_t _pesudo_random_index; + inline static StrName get(std::string_view s){ + uint16_t index = pkpy_StrName__map({s.data(), (int)s.size()}); + return StrName(index); + } + + std::string_view sv() const{ + c11_string s = pkpy_StrName__rmap(index); + return std::string_view(s.data, s.size); + } + + const char* c_str() const{ + c11_string s = pkpy_StrName__rmap(index); + return s.data; + } }; struct SStream: pkpy_SStream { diff --git a/include/pocketpy/common/strname.h b/include/pocketpy/common/strname.h new file mode 100644 index 00000000..aa473c8c --- /dev/null +++ b/include/pocketpy/common/strname.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include "pocketpy/common/str.h" + +#ifdef __cplusplus +extern "C" { +#endif + +uint16_t pkpy_StrName__map(c11_string name); +c11_string pkpy_StrName__rmap(uint16_t index); + +void pkpy_StrName__initialize(); +void pkpy_StrName__finalize(); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 19d3b3d4..d0691758 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -33,6 +33,7 @@ void c11_vector__dtor(c11_vector* self); c11_vector c11_vector__copy(const c11_vector* self); void c11_vector__reserve(c11_vector* self, int capacity); void c11_vector__clear(c11_vector* self); +void* c11_vector__emplace(c11_vector* self); #define c11__getitem(T, self, index) (((T*)(self)->data)[index]) #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; @@ -50,6 +51,9 @@ void c11_vector__clear(c11_vector* self); (self)->count--; \ }while(0) +#define c11_vector__back(T, self) \ + (((T*)(self)->data)[(self)->count - 1]) + #define c11_vector__extend(T, self, p, size) \ do{ \ c11_vector__reserve((self), (self)->count + (size)); \ diff --git a/src/common/smallmap.c b/src/common/smallmap.c index 0f82764b..3def6cf3 100644 --- a/src/common/smallmap.c +++ b/src/common/smallmap.c @@ -12,8 +12,8 @@ #define SMALLMAP_T__SOURCE #define K c11_string -#define V int -#define TAG s2i +#define V uint16_t +#define TAG s2n #define less(a, b) (c11_string__cmp((a.key), (b)) < 0) #define equal(a, b) (c11_string__cmp((a), (b)) == 0) #include "pocketpy/xmacros/smallmap.h" diff --git a/src/common/str.cpp b/src/common/str.cpp index be0802ed..51cdcdc9 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -8,37 +8,6 @@ namespace pkpy { -static std::map& _interned() { - static std::map interned; - return interned; -} - -static std::map& _r_interned() { - static std::map r_interned; - return r_interned; -} - -std::string_view StrName::sv() const { return _r_interned()[index]; } -const char* StrName::c_str() const { return _r_interned()[index].c_str(); } - -uint32_t StrName::_pesudo_random_index = 0; - -StrName StrName::get(std::string_view s) { - // TODO: PK_GLOBAL_SCOPE_LOCK() - auto it = _interned().find(s); - if(it != _interned().end()) return StrName(it->second); - // generate new index - // https://github.com/python/cpython/blob/3.12/Objects/dictobject.c#L175 - uint16_t index = ((_pesudo_random_index * 5) + 1) & 65535; - if(index == 0) PK_FATAL_ERROR("StrName index overflow\n") - auto res = _r_interned().emplace(index, s); - assert(res.second); - s = std::string_view(res.first->second); - _interned()[s] = index; - _pesudo_random_index = index; - return StrName(index); -} - // unary operators const StrName __repr__ = StrName::get("__repr__"); const StrName __str__ = StrName::get("__str__"); diff --git a/src/common/strname.c b/src/common/strname.c new file mode 100644 index 00000000..a629bd0c --- /dev/null +++ b/src/common/strname.c @@ -0,0 +1,57 @@ +#include "pocketpy/common/strname.h" +#include "pocketpy/common/smallmap.h" +#include "pocketpy/common/utils.h" +#include "pocketpy/common/vector.h" + +#include + +// TODO: use a more efficient data structure +static c11_smallmap_s2n _interned; +static c11_vector/*T=char* */ _r_interned; +static bool _initialized = false; + +void pkpy_StrName__initialize(){ + if(_initialized) return; + c11_smallmap_s2n__ctor(&_interned); + for(int i=0; i<_r_interned.count; i++){ + free(c11__at(char*, &_r_interned, i)); + } + c11_vector__ctor(&_r_interned, sizeof(c11_string)); + _initialized = true; +} + +void pkpy_StrName__finalize(){ + if(!_initialized) return; + c11_smallmap_s2n__dtor(&_interned); + c11_vector__dtor(&_r_interned); +} + +uint16_t pkpy_StrName__map(c11_string name){ + // TODO: PK_GLOBAL_SCOPE_LOCK() + if(!_initialized){ + pkpy_StrName__initialize(); // lazy init + } + uint16_t index = c11_smallmap_s2n__get(&_interned, name, 0); + if(index != 0) return index; + // generate new index + if(_interned.count > 65530){ + PK_FATAL_ERROR("StrName index overflow\n"); + } + // NOTE: we must allocate the string in the heap so iterators are not invalidated + char* p = malloc(name.size + 1); + memcpy(p, name.data, name.size); + p[name.size] = '\0'; + c11_vector__push(char*, &_r_interned, p); + index = _r_interned.count; // 1-based + // save to _interned + c11_smallmap_s2n__set(&_interned, (c11_string){p, name.size}, index); + assert(_interned.count == _r_interned.count); + return index; +} + +c11_string pkpy_StrName__rmap(uint16_t index){ + assert(_initialized); + assert(index > 0 && index <= _interned.count); + char* p = c11__getitem(char*, &_r_interned, index - 1); + return (c11_string){p, strlen(p)}; +} diff --git a/src/common/vector.c b/src/common/vector.c index 44e76d66..4696a0f6 100644 --- a/src/common/vector.c +++ b/src/common/vector.c @@ -55,3 +55,10 @@ void c11_vector__reserve(c11_vector* self, int capacity){ void c11_vector__clear(c11_vector* self){ self->count = 0; } + +void* c11_vector__emplace(c11_vector* self){ + if(self->count == self->capacity) c11_vector__reserve(self, self->capacity*2); + void* p = (char*)self->data + self->elem_size * self->count; + self->count++; + return p; +} diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index 1ac3c5dc..19cee3f5 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -620,14 +620,14 @@ Error* Lexer::precompile(Str* out) noexcept{ ss << "pkpy:" PK_VERSION << '\n'; // L1: version string ss << (int)src->mode << '\n'; // L2: mode - c11_smallmap_s2i token_indices; - c11_smallmap_s2i__ctor(&token_indices); + c11_smallmap_s2n token_indices; + c11_smallmap_s2n__ctor(&token_indices); for(auto token: nexts) { if(is_raw_string_used(token.type)) { c11_string token_sv = {token.start, token.length}; - if(!c11_smallmap_s2i__contains(&token_indices, token_sv)) { - c11_smallmap_s2i__set(&token_indices, token_sv, 0); + if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) { + c11_smallmap_s2n__set(&token_indices, token_sv, 0); // assert no '\n' in token.sv() for(char c: token.sv()) assert(c != '\n'); @@ -635,9 +635,9 @@ Error* Lexer::precompile(Str* out) noexcept{ } } ss << "=" << (int)token_indices.count << '\n'; // L3: raw string count - int index = 0; + uint16_t index = 0; for(int i=0; ikey << '\n'; // L4: raw strings kv->value = index++; } @@ -647,9 +647,9 @@ Error* Lexer::precompile(Str* out) noexcept{ const Token& token = nexts[i]; ss << (int)token.type << ','; if(is_raw_string_used(token.type)) { - int index = c11_smallmap_s2i__get(&token_indices, {token.start, token.length}, -1); - assert(index >= 0); - ss << index << ','; + uint16_t *p = c11_smallmap_s2n__try_get(&token_indices, {token.start, token.length}); + assert(p != NULL); + ss << (int)*p << ','; } if(i > 0 && nexts[i - 1].line == token.line) ss << ','; @@ -677,7 +677,7 @@ Error* Lexer::precompile(Str* out) noexcept{ token.value); } *out = ss.str(); - c11_smallmap_s2i__dtor(&token_indices); + c11_smallmap_s2n__dtor(&token_indices); return NULL; } diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 56b24dde..f232f135 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -83,6 +83,7 @@ struct JsonSerializer { VM::VM(bool enable_os) : heap(this), enable_os(enable_os) { Pools_initialize(); + pkpy_StrName__initialize(); this->vm = this; this->__c.error = nullptr; diff --git a/src/objects/namedict.cpp b/src/objects/namedict.cpp index 8b76fafc..5b475087 100644 --- a/src/objects/namedict.cpp +++ b/src/objects/namedict.cpp @@ -116,7 +116,9 @@ bool NameDict::contains(StrName key) const { PyVar NameDict::operator[] (StrName key) const { PyVar* val = try_get_2_likely_found(key); - if(val == nullptr) PK_FATAL_ERROR("NameDict key not found: %s\n", key.escape().c_str()) + if(val == nullptr){ + PK_FATAL_ERROR("NameDict key not found: %d (%s)\n", (int)key.index, key.escape().c_str()) + } return *val; }