From 3787a1da1da485153972d27d5ea9d0e62b079cad Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 10 Jun 2024 22:38:49 +0800 Subject: [PATCH 01/60] init --- CMakeLists.txt | 9 +- build.sh | 4 +- build_g.sh | 7 +- build_web.sh | 5 +- include/pocketpy/common/str.h | 52 +++++ include/pocketpy/common/str.hpp | 229 +++++++++++++++------ include/pocketpy/common/traits.hpp | 7 + include/pocketpy/common/utils.h | 29 +++ include/pocketpy/common/utils.hpp | 36 ---- include/pocketpy/common/vector.h | 55 ++++++ include/pocketpy/interpreter/gc.hpp | 2 +- include/pocketpy/interpreter/vm.hpp | 4 +- include/pocketpy/objects/namedict.hpp | 2 +- include/pocketpy/objects/sourcedata.hpp | 2 +- src/common/any.cpp | 2 +- src/common/str.c | 250 +++++++++++++++++++++++ src/common/str.cpp | 253 +----------------------- src/common/utils.c | 9 + src/common/vector.c | 61 ++++++ src/compiler/lexer.cpp | 3 +- src/interpreter/ceval.cpp | 2 +- src/interpreter/iter.cpp | 2 +- src/modules/io.cpp | 2 +- src/pocketpy.cpp | 11 +- tests/04_str.py | 4 +- 25 files changed, 681 insertions(+), 361 deletions(-) create mode 100644 include/pocketpy/common/str.h create mode 100644 include/pocketpy/common/utils.h delete mode 100644 include/pocketpy/common/utils.hpp create mode 100644 include/pocketpy/common/vector.h create mode 100644 src/common/str.c create mode 100644 src/common/utils.c create mode 100644 src/common/vector.c diff --git a/CMakeLists.txt b/CMakeLists.txt index c0d719cf..db995efa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,23 +2,30 @@ cmake_minimum_required(VERSION 3.10) project(pocketpy) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) if(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /utf-8 /Ox /jumptablerdata /GS-") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-") add_compile_options(/wd4267 /wd4244) else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") # disable -Wshorten-64-to-32 for apple if(APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-shorten-64-to-32") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-shorten-64-to-32") endif() endif() include_directories(${CMAKE_CURRENT_LIST_DIR}/include) -file(GLOB_RECURSE POCKETPY_SRC ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp) +file(GLOB_RECURSE POCKETPY_SRC_CPP ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp) +file(GLOB_RECURSE POCKETPY_SRC_C ${CMAKE_CURRENT_LIST_DIR}/src/*.c) +set(POCKETPY_SRC ${POCKETPY_SRC_CPP} ${POCKETPY_SRC_C}) option(PK_USE_CJSON "" OFF) if(PK_USE_CJSON) diff --git a/build.sh b/build.sh index a474d17f..d1760346 100644 --- a/build.sh +++ b/build.sh @@ -18,7 +18,9 @@ if [ $? -ne 0 ]; then exit 1 fi -SRC=$(find src/ -name "*.cpp") +SRC_C=$(find src/ -name "*.c") +SRC_CPP=$(find src/ -name "*.cpp") +SRC="$SRC_C $SRC_CPP" echo "> Compiling and linking source files... " diff --git a/build_g.sh b/build_g.sh index a634da62..340e471c 100644 --- a/build_g.sh +++ b/build_g.sh @@ -1,7 +1,10 @@ python prebuild.py -SRC=$(find src/ -name "*.cpp") +SRC_C=$(find src/ -name "*.c") +SRC_CPP=$(find src/ -name "*.cpp") +SRC="$SRC_C $SRC_CPP" -FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG" +FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG -DPK_ENABLE_OS=1" clang++ $FLAGS -o main src2/main.cpp $SRC + diff --git a/build_web.sh b/build_web.sh index 8695f700..f3006c45 100644 --- a/build_web.sh +++ b/build_web.sh @@ -3,5 +3,8 @@ python prebuild.py rm -rf web/lib mkdir web/lib -SRC=$(find src/ -name "*.cpp") +SRC_C=$(find src/ -name "*.c") +SRC_CPP=$(find src/ -name "*.cpp") +SRC="$SRC_C $SRC_CPP" + em++ $SRC -Iinclude/ -fexceptions -frtti -s -Os -sEXPORTED_FUNCTIONS=_pkpy_new_repl,_pkpy_repl_input,_pkpy_new_vm -sEXPORTED_RUNTIME_METHODS=ccall -o web/lib/pocketpy.js diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h new file mode 100644 index 00000000..585073c9 --- /dev/null +++ b/include/pocketpy/common/str.h @@ -0,0 +1,52 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct pkpy_Str{ + int size; + bool is_ascii; + bool is_sso; + union{ + char* _ptr; + char _inlined[16]; + }; +} pkpy_Str; + +inline const char* pkpy_Str__data(const pkpy_Str* self){ + return self->is_sso ? self->_inlined : self->_ptr; +} + +inline int pkpy_Str__size(const pkpy_Str* self){ + return self->size; +} + +int pkpy_utils__u8len(unsigned char c, bool suppress); +void pkpy_Str__ctor(pkpy_Str* self, const char* data); +void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size); +void pkpy_Str__dtor(pkpy_Str* self); +pkpy_Str pkpy_Str__copy(const pkpy_Str* self); +pkpy_Str pkpy_Str__concat(const pkpy_Str* self, const pkpy_Str* other); +pkpy_Str pkpy_Str__concat2(const pkpy_Str* self, const char* other, int size); +pkpy_Str pkpy_Str__substr(const pkpy_Str* self, int start); +pkpy_Str pkpy_Str__substr2(const pkpy_Str* self, int start, int size); +pkpy_Str pkpy_Str__lower(const pkpy_Str* self); +pkpy_Str pkpy_Str__upper(const pkpy_Str* self); +pkpy_Str pkpy_Str__replace(const pkpy_Str* self, char old, char new_); +pkpy_Str pkpy_Str__replace2(const pkpy_Str* self, const pkpy_Str* old, const pkpy_Str* new_); +pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str* self, int i); +pkpy_Str pkpy_Str__u8_slice(const pkpy_Str* self, int start, int stop, int step); +int pkpy_Str__u8_length(const pkpy_Str* self); +int pkpy_Str__cmp(const pkpy_Str* self, const pkpy_Str* other); +int pkpy_Str__cmp2(const pkpy_Str* self, const char* other, int size); +int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i); +int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n); +int pkpy_Str__index(const pkpy_Str* self, const pkpy_Str* sub, int start); +int pkpy_Str__count(const pkpy_Str* self, const pkpy_Str* sub); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index e4788596..4d31ddc9 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -1,80 +1,162 @@ #pragma once -#include "pocketpy/common/utils.hpp" +#include "pocketpy/common/utils.h" #include "pocketpy/common/memorypool.hpp" #include "pocketpy/common/vector.hpp" +#include "pocketpy/common/str.h" #include +#include namespace pkpy { -int utf8len(unsigned char c, bool suppress = false); struct SStream; -struct Str { - int size; - bool is_ascii; - char* data; - char _inlined[16]; +struct Str: pkpy_Str { + bool is_inlined() const { return is_sso; } - bool is_inlined() const { return data == _inlined; } + Str(){ + pkpy_Str__ctor2(this, "", 0); + } + + Str(pkpy_Str&& s){ + std::memcpy(this, &s, sizeof(pkpy_Str)); + } + + Str(const std::string& s){ + pkpy_Str__ctor2(this, s.data(), s.size()); + } + + Str(std::string_view s){ + pkpy_Str__ctor2(this, s.data(), s.size()); + } + + Str(const char* s){ + pkpy_Str__ctor2(this, s, strlen(s)); + } + + Str(const char* s, int len){ + pkpy_Str__ctor2(this, s, len); + } - Str(); - Str(int size, bool is_ascii); - Str(const std::string& s); - Str(std::string_view s); - Str(const char* s); - Str(const char* s, int len); Str(pair); // take ownership - Str(const Str& other); - Str(Str&& other); + + Str(const Str& other){ + pkpy_Str__ctor2(this, pkpy_Str__data(&other), other.size); + } + + Str(Str&& other){ + std::memcpy(this, &other, sizeof(pkpy_Str)); + other.size = 0; + other.is_sso = true; + } operator std::string_view () const { return sv(); } - - const char* begin() const { return data; } - - const char* end() const { return data + size; } - - char operator[] (int idx) const { return data[idx]; } - + const char* begin() const { return pkpy_Str__data(this); } + const char* end() const { return pkpy_Str__data(this) + size; } int length() const { return size; } - + char operator[] (int idx) const { return pkpy_Str__data(this)[idx]; } bool empty() const { return size == 0; } - size_t hash() const { return std::hash()(sv()); } - Str& operator= (const Str&); - Str operator+ (const Str&) const; - Str operator+ (const char*) const; - friend Str operator+ (const char*, const Str&); + Str& operator= (const Str& other){ + pkpy_Str__dtor(this); + pkpy_Str__ctor2(this, pkpy_Str__data(&other), other.size); + return *this; + } - bool operator== (const std::string_view other) const; - bool operator!= (const std::string_view other) const; - bool operator< (const std::string_view other) const; - friend bool operator< (const std::string_view other, const Str& str); + Str operator+ (const Str& other) const{ + return pkpy_Str__concat(this, &other); + } - bool operator== (const char* p) const; - bool operator!= (const char* p) const; + Str operator+ (const char* other) const{ + return pkpy_Str__concat2(this, other, strlen(other)); + } - bool operator== (const Str& other) const; - bool operator!= (const Str& other) const; - bool operator< (const Str& other) const; - bool operator> (const Str& other) const; - bool operator<= (const Str& other) const; - bool operator>= (const Str& other) const; + friend Str operator+ (const char* self, const Str& other){ + pkpy_Str tmp; + pkpy_Str__ctor2(&tmp, self, strlen(self)); + pkpy_Str retval = pkpy_Str__concat(&tmp, &other); + pkpy_Str__dtor(&tmp); + return retval; + } - ~Str(); + bool operator== (const std::string_view other) const{ + int res = pkpy_Str__cmp2(this, other.data(), other.size()); + return res == 0; + } - friend std::ostream& operator<< (std::ostream& os, const Str& str); + bool operator!= (const std::string_view other) const{ + int res = pkpy_Str__cmp2(this, other.data(), other.size()); + return res != 0; + } - const char* c_str() const { return data; } + bool operator< (const std::string_view other) const{ + int res = pkpy_Str__cmp2(this, other.data(), other.size()); + return res < 0; + } - std::string_view sv() const { return std::string_view(data, size); } + friend bool operator< (const std::string_view other, const Str& str){ + int res = pkpy_Str__cmp2(&str, other.data(), other.size()); + return res > 0; + } - std::string str() const { return std::string(data, size); } + bool operator== (const char* p) const{ + int res = pkpy_Str__cmp2(this, p, strlen(p)); + return res == 0; + } + + bool operator!= (const char* p) const{ + int res = pkpy_Str__cmp2(this, p, strlen(p)); + return res != 0; + } + + bool operator== (const Str& other) const{ + return pkpy_Str__cmp(this, &other) == 0; + } + bool operator!= (const Str& other) const{ + return pkpy_Str__cmp(this, &other) != 0; + } + bool operator< (const Str& other) const{ + return pkpy_Str__cmp(this, &other) < 0; + } + bool operator> (const Str& other) const{ + return pkpy_Str__cmp(this, &other) > 0; + } + bool operator<= (const Str& other) const{ + return pkpy_Str__cmp(this, &other) <= 0; + } + bool operator>= (const Str& other) const{ + return pkpy_Str__cmp(this, &other) >= 0; + } + + ~Str(){ + pkpy_Str__dtor(this); + } + + friend std::ostream& operator<< (std::ostream& os, const Str& self){ + os.write(pkpy_Str__data(&self), self.size); + return os; + } + + const char* c_str() const { return pkpy_Str__data(this); } + + std::string_view sv() const { + return std::string_view(pkpy_Str__data(this), size); + } + + std::string str() const { + return std::string(pkpy_Str__data(this), size); + } + + Str substr(int start, int size) const{ + return pkpy_Str__substr2(this, start, size); + } + + Str substr(int start) const{ + return pkpy_Str__substr(this, start); + } - Str substr(int start, int len) const; - Str substr(int start) const; Str strip(bool left, bool right, const Str& chars) const; Str strip(bool left = true, bool right = true) const; @@ -82,23 +164,52 @@ struct Str { Str rstrip() const { return strip(false, true); } - Str lower() const; - Str upper() const; + Str lower() const{ + return pkpy_Str__lower(this); + } + Str upper() const{ + return pkpy_Str__upper(this); + } + Str replace(char old, char new_) const{ + return pkpy_Str__replace(this, old, new_); + } + Str replace(const Str& old, const Str& new_) const{ + return pkpy_Str__replace2(this, &old, &new_); + } + Str escape(bool single_quote = true) const; void escape_(SStream& ss, bool single_quote = true) const; - int index(const Str& sub, int start = 0) const; - Str replace(char old, char new_) const; - Str replace(const Str& old, const Str& new_, int count = -1) const; vector split(const Str& sep) const; vector split(char sep) const; - int count(const Str& sub) const; + + int index(const Str& sub, int start = 0) const{ + return pkpy_Str__index(this, &sub, start); + } + + int count(const Str& sub) const{ + return pkpy_Str__count(this, &sub); + } /*************unicode*************/ - int _unicode_index_to_byte(int i) const; - int _byte_index_to_unicode(int n) const; - Str u8_getitem(int i) const; - Str u8_slice(int start, int stop, int step) const; - int u8_length() const; + int _unicode_index_to_byte(int i) const{ + return pkpy_Str__unicode_index_to_byte(this, i); + } + + int _byte_index_to_unicode(int n) const{ + return pkpy_Str__byte_index_to_unicode(this, n); + } + + Str u8_getitem(int i) const{ + return pkpy_Str__u8_getitem(this, i); + } + + Str u8_slice(int start, int stop, int step) const{ + return pkpy_Str__u8_slice(this, start, stop, step); + } + + int u8_length() const{ + return pkpy_Str__u8_length(this); + } }; struct StrName { diff --git a/include/pocketpy/common/traits.hpp b/include/pocketpy/common/traits.hpp index 35e90220..4aa1c789 100644 --- a/include/pocketpy/common/traits.hpp +++ b/include/pocketpy/common/traits.hpp @@ -37,4 +37,11 @@ struct has_gc_marker> : std::true_type {} template constexpr inline int py_sizeof = 16 + sizeof(T); + +#define PK_ALWAYS_PASS_BY_POINTER(T) \ + T(const T&) = delete; \ + T& operator= (const T&) = delete; \ + T(T&&) = delete; \ + T& operator= (T&&) = delete; + } // namespace pkpy diff --git a/include/pocketpy/common/utils.h b/include/pocketpy/common/utils.h new file mode 100644 index 00000000..52d4bfc2 --- /dev/null +++ b/include/pocketpy/common/utils.h @@ -0,0 +1,29 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#define PK_REGION(name) 1 + +#define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step) + +// global constants +#define PK_HEX_TABLE "0123456789abcdef" + +extern const char* kPlatformStrings[]; + +#ifdef _MSC_VER +#define PK_UNREACHABLE() __assume(0); +#else +#define PK_UNREACHABLE() __builtin_unreachable(); +#endif + +#define PK_FATAL_ERROR(...) { fprintf(stderr, __VA_ARGS__); abort(); } + +#define PK_MIN(a, b) ((a) < (b) ? (a) : (b)) +#define PK_MAX(a, b) ((a) > (b) ? (a) : (b)) + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/pocketpy/common/utils.hpp b/include/pocketpy/common/utils.hpp deleted file mode 100644 index abf53c0e..00000000 --- a/include/pocketpy/common/utils.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#define PK_REGION(name) 1 - -#define PK_ALWAYS_PASS_BY_POINTER(T) \ - T(const T&) = delete; \ - T& operator= (const T&) = delete; \ - T(T&&) = delete; \ - T& operator= (T&&) = delete; - -#define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step) - -namespace pkpy { - -// global constants -const inline char* PK_HEX_TABLE = "0123456789abcdef"; - -const inline char* kPlatformStrings[] = { - "win32", // 0 - "emscripten", // 1 - "ios", // 2 - "darwin", // 3 - "android", // 4 - "linux", // 5 - "unknown" // 6 -}; - -#ifdef _MSC_VER -#define PK_UNREACHABLE() __assume(0); -#else -#define PK_UNREACHABLE() __builtin_unreachable(); -#endif - -#define PK_FATAL_ERROR(...) { fprintf(stderr, __VA_ARGS__); std::abort(); } - -} // namespace pkpy diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h new file mode 100644 index 00000000..6c9c33d1 --- /dev/null +++ b/include/pocketpy/common/vector.h @@ -0,0 +1,55 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct c11_array{ + void* data; + int count; + int elem_size; +} c11_array; + +void c11_array__ctor(c11_array* self, int elem_size, int count); +void c11_array__dtor(c11_array* self); +c11_array c11_array__copy(const c11_array* self); +void* c11_array__at(c11_array* self, int index); + +typedef struct c11_vector{ + void* data; + int count; + int capacity; + int elem_size; +} c11_vector; + +void c11_vector__ctor(c11_vector* self, int elem_size); +void c11_vector__dtor(c11_vector* self); +c11_vector c11_vector__copy(const c11_vector* self); +void* c11_vector__at(c11_vector* self, int index); +void c11_vector__reserve(c11_vector* self, int capacity); + +#define c11__getitem(T, self, index) ((T*)(self)->data)[index] +#define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; + +#define c11_vector__push_back(T, self, elem) \ + do{ \ + if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \ + ((T*)(self)->data)[(self)->count] = (elem); \ + (self)->count++; \ + }while(0) + +#define c11_vector__pop_back(T, self) \ + do{ \ + (self)->count--; \ + }while(0) + +#define c11_vector__extend(T, self, p, size) \ + do{ \ + c11_vector__reserve((self), (self)->count + (size)); \ + memcpy((T*)(self)->data + (self)->count, (p), (size) * sizeof(T)); \ + (self)->count += (size); \ + }while(0) + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/pocketpy/interpreter/gc.hpp b/include/pocketpy/interpreter/gc.hpp index 5c1060db..7cf64b2b 100644 --- a/include/pocketpy/interpreter/gc.hpp +++ b/include/pocketpy/interpreter/gc.hpp @@ -2,7 +2,7 @@ #include "pocketpy/common/config.h" #include "pocketpy/common/vector.hpp" -#include "pocketpy/common/utils.hpp" +#include "pocketpy/common/utils.h" #include "pocketpy/objects/object.hpp" #include "pocketpy/objects/namedict.hpp" diff --git a/include/pocketpy/interpreter/vm.hpp b/include/pocketpy/interpreter/vm.hpp index 48ed5f48..1c2ec125 100644 --- a/include/pocketpy/interpreter/vm.hpp +++ b/include/pocketpy/interpreter/vm.hpp @@ -319,8 +319,8 @@ public: #endif #if PK_REGION("Logging Methods") - virtual void stdout_write(const Str& s){ _stdout(s.data, s.size); } - virtual void stderr_write(const Str& s){ _stderr(s.data, s.size); } + virtual void stdout_write(const Str& s){ _stdout(s.c_str(), s.size); } + virtual void stderr_write(const Str& s){ _stderr(s.c_str(), s.size); } #endif #if PK_REGION("Magic Bindings") diff --git a/include/pocketpy/objects/namedict.hpp b/include/pocketpy/objects/namedict.hpp index 25922687..d396e35b 100644 --- a/include/pocketpy/objects/namedict.hpp +++ b/include/pocketpy/objects/namedict.hpp @@ -2,7 +2,7 @@ #include "pocketpy/common/config.h" #include "pocketpy/common/str.hpp" -#include "pocketpy/common/utils.hpp" +#include "pocketpy/common/utils.h" #include "pocketpy/objects/object.hpp" namespace pkpy { diff --git a/include/pocketpy/objects/sourcedata.hpp b/include/pocketpy/objects/sourcedata.hpp index 4fb96d98..07ff384e 100644 --- a/include/pocketpy/objects/sourcedata.hpp +++ b/include/pocketpy/objects/sourcedata.hpp @@ -1,6 +1,6 @@ #pragma once -#include "pocketpy/common/utils.hpp" +#include "pocketpy/common/utils.h" #include "pocketpy/common/str.hpp" namespace pkpy { diff --git a/src/common/any.cpp b/src/common/any.cpp index b2274afa..6ec3636d 100644 --- a/src/common/any.cpp +++ b/src/common/any.cpp @@ -1,5 +1,5 @@ #include "pocketpy/common/any.hpp" -#include "pocketpy/common/utils.hpp" +#include "pocketpy/common/utils.h" #include diff --git a/src/common/str.c b/src/common/str.c new file mode 100644 index 00000000..1e5bbade --- /dev/null +++ b/src/common/str.c @@ -0,0 +1,250 @@ +#include "pocketpy/common/str.h" +#include "pocketpy/common/vector.h" +#include "pocketpy/common/utils.h" + +#include +#include +#include +#include +#include + +int pkpy_utils__u8len(unsigned char c, bool suppress) { + if((c & 0b10000000) == 0) return 1; + if((c & 0b11100000) == 0b11000000) return 2; + if((c & 0b11110000) == 0b11100000) return 3; + if((c & 0b11111000) == 0b11110000) return 4; + if((c & 0b11111100) == 0b11111000) return 5; + if((c & 0b11111110) == 0b11111100) return 6; + if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n") + return 0; +} + +void pkpy_Str__ctor(pkpy_Str *self, const char *data){ + pkpy_Str__ctor2(self, data, strlen(data)); +} + +void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ + self->size = size; + self->is_ascii = true; + self->is_sso = size < sizeof(self->_inlined); + char* p; + if(self->is_sso){ + p = self->_inlined; + }else{ + self->_ptr = (char*)malloc(size + 1); + p = self->_ptr; + } + memcpy(p, data, size); + p[size] = '\0'; + // check is_ascii + for(int i = 0; i < size; i++){ + if(!isascii(p[i])){ + self->is_ascii = false; + break; + } + } +} + +void pkpy_Str__dtor(pkpy_Str *self){ + if(!self->is_sso){ + free(self->_ptr); + self->is_sso = true; + self->size = 0; + } +} + +pkpy_Str pkpy_Str__copy(const pkpy_Str *self){ + pkpy_Str retval = *self; + if(!self->is_sso){ + retval._ptr = (char*)malloc(self->size + 1); + memcpy(retval._ptr, self->_ptr, self->size + 1); + retval._ptr[retval.size] = '\0'; + } + return retval; +} + +pkpy_Str pkpy_Str__concat(const pkpy_Str *self, const pkpy_Str *other){ + pkpy_Str retval = { + .size = self->size + other->size, + .is_ascii = self->is_ascii && other->is_ascii, + .is_sso = self->size + other->size < sizeof(retval._inlined), + }; + char* p; + if(retval.is_sso){ + p = retval._inlined; + }else{ + retval._ptr = (char*)malloc(retval.size + 1); + p = retval._ptr; + } + memcpy(p, pkpy_Str__data(self), self->size); + memcpy(p + self->size, pkpy_Str__data(other), other->size); + p[retval.size] = '\0'; + return retval; +} + +pkpy_Str pkpy_Str__concat2(const pkpy_Str *self, const char *other, int size){ + pkpy_Str tmp; + pkpy_Str__ctor2(&tmp, other, size); + pkpy_Str retval = pkpy_Str__concat(self, &tmp); + pkpy_Str__dtor(&tmp); + return retval; +} + +pkpy_Str pkpy_Str__substr(const pkpy_Str *self, int start){ + return pkpy_Str__substr2(self, start, self->size - start); +} + +pkpy_Str pkpy_Str__substr2(const pkpy_Str *self, int start, int size){ + pkpy_Str retval; + pkpy_Str__ctor2(&retval, pkpy_Str__data(self) + start, size); + return retval; +} + +pkpy_Str pkpy_Str__lower(const pkpy_Str *self){ + pkpy_Str retval = pkpy_Str__copy(self); + char* p = (char*)pkpy_Str__data(&retval); + for(int i = 0; i < retval.size; i++){ + if('A' <= p[i] && p[i] <= 'Z') p[i] += 32; + } + return retval; +} + +pkpy_Str pkpy_Str__upper(const pkpy_Str *self){ + pkpy_Str retval = pkpy_Str__copy(self); + char* p = (char*)pkpy_Str__data(&retval); + for(int i = 0; i < retval.size; i++){ + if('a' <= p[i] && p[i] <= 'z') p[i] -= 32; + } + return retval; +} + +pkpy_Str pkpy_Str__replace(const pkpy_Str *self, char old, char new_){ + pkpy_Str retval = pkpy_Str__copy(self); + char* p = (char*)pkpy_Str__data(&retval); + for(int i = 0; i < retval.size; i++){ + if(p[i] == old) p[i] = new_; + } + return retval; +} + +pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkpy_Str *new_){ + c11_vector buffer; + c11_vector__ctor(&buffer, sizeof(char)); + int start = 0; + while(true) { + int i = pkpy_Str__index(self, old, start); + if(i == -1) break; + pkpy_Str tmp = pkpy_Str__substr2(self, start, i - start); + c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); + pkpy_Str__dtor(&tmp); + c11_vector__extend(char, &buffer, pkpy_Str__data(new_), new_->size); + start = i + old->size; + } + pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start); + c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); + pkpy_Str__dtor(&tmp); + pkpy_Str retval = { + .size = buffer.count, + .is_ascii = self->is_ascii && old->is_ascii && new_->is_ascii, + .is_sso = false, + ._ptr = (char*)buffer.data, + }; + return retval; +} + +int pkpy_Str__cmp(const pkpy_Str *self, const pkpy_Str *other){ + return pkpy_Str__cmp2(self, pkpy_Str__data(other), other->size); +} + +int pkpy_Str__cmp2(const pkpy_Str *self, const char *other, int size){ + int res = strncmp(pkpy_Str__data(self), other, PK_MIN(self->size, size)); + if(res != 0) return res; + return self->size - size; +} + +pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str *self, int i){ + i = pkpy_Str__unicode_index_to_byte(self, i); + return pkpy_Str__substr2( + self, i, + pkpy_utils__u8len(pkpy_Str__data(self)[i], false) + ); +} + +pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step){ + c11_vector buffer; + c11_vector__ctor(&buffer, sizeof(char)); + assert(step != 0); + if(self->is_ascii){ + const char* p = pkpy_Str__data(self); + for (int i=start; step>0 ? istop; i+=step) { + c11_vector__push_back(char, &buffer, p[i]); + } + }else{ + for (int i=start; step>0 ? istop; i+=step) { + pkpy_Str unicode = pkpy_Str__u8_getitem(self, i); + const char* p = pkpy_Str__data(&unicode); + for(int j = 0; j < unicode.size; j++){ + c11_vector__push_back(char, &buffer, p[j]); + } + pkpy_Str__dtor(&unicode); + } + } + pkpy_Str retval = { + .size = buffer.count, + .is_ascii = self->is_ascii, + .is_sso = false, + ._ptr = (char*)buffer.data, + }; + return retval; +} + +int pkpy_Str__u8_length(const pkpy_Str *self){ + return pkpy_Str__byte_index_to_unicode(self, self->size); +} + +int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i) { + if(self->is_ascii) return i; + const char* p = pkpy_Str__data(self); + int j = 0; + while(i > 0) { + j += pkpy_utils__u8len(p[j], false); + i--; + } + return j; +} + +int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n) { + if(self->is_ascii) return n; + const char* p = pkpy_Str__data(self); + int cnt = 0; + for(int i = 0; i < n; i++) { + if((p[i] & 0xC0) != 0x80) cnt++; + } + return cnt; +} + +int pkpy_Str__index(const pkpy_Str *self, const pkpy_Str *sub, int start){ + if(sub->size == 0) return start; + int max_end = self->size - sub->size; + const char* self_data = pkpy_Str__data(self); + const char* sub_data = pkpy_Str__data(sub); + for(int i=start; i<=max_end; i++){ + int res = memcmp(self_data + i, sub_data, sub->size); + if(res == 0) return i; + } + return -1; +} + +int pkpy_Str__count(const pkpy_Str *self, const pkpy_Str *sub){ + if(sub->size == 0) return self->size + 1; + int cnt = 0; + int start = 0; + while(true) { + int i = pkpy_Str__index(self, sub, start); + if(i == -1) break; + cnt++; + start = i + sub->size; + } + return cnt; +} + diff --git a/src/common/str.cpp b/src/common/str.cpp index fed8827a..564e5efb 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -9,159 +9,20 @@ namespace pkpy { -int utf8len(unsigned char c, bool suppress) { - if((c & 0b10000000) == 0) return 1; - if((c & 0b11100000) == 0b11000000) return 2; - if((c & 0b11110000) == 0b11100000) return 3; - if((c & 0b11111000) == 0b11110000) return 4; - if((c & 0b11111100) == 0b11111000) return 5; - if((c & 0b11111110) == 0b11111100) return 6; - if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n") - return 0; -} - -#define PK_STR_ALLOCATE() \ - if(this->size < (int)sizeof(this->_inlined)) { \ - this->data = this->_inlined; \ - } else { \ - this->data = (char*)std::malloc(this->size + 1); \ - } - -#define PK_STR_COPY_INIT(__s) \ - for(int i = 0; i < this->size; i++) { \ - this->data[i] = __s[i]; \ - if(!isascii(__s[i])) is_ascii = false; \ - } \ - this->data[this->size] = '\0'; - -Str::Str() : size(0), is_ascii(true), data(_inlined) { _inlined[0] = '\0'; } - -Str::Str(int size, bool is_ascii) : - size(size), is_ascii(is_ascii){PK_STR_ALLOCATE()} - - Str::Str(const std::string& s) : - size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)} - - Str::Str(std::string_view s) : - size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)} - - Str::Str(const char* s) : - size(strlen(s)), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)} - - Str::Str(const char* s, int len) : - size(len), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)} - - Str::Str(pair detached) : size(detached.second), is_ascii(true) { - this->data = detached.first; +Str::Str(pair detached) { + this->size = detached.second; + this->is_ascii = true; + this->is_sso = false; + this->_ptr = detached.first; for(int i = 0; i < size; i++) { - if(!isascii(data[i])) { + if(!isascii(_ptr[i])) { is_ascii = false; break; } } - assert(data[size] == '\0'); + assert(_ptr[size] == '\0'); } -Str::Str(const Str& other) : size(other.size), is_ascii(other.is_ascii) { - PK_STR_ALLOCATE() - std::memcpy(data, other.data, size); - data[size] = '\0'; -} - -Str::Str(Str&& other) : size(other.size), is_ascii(other.is_ascii) { - if(other.is_inlined()) { - data = _inlined; - for(int i = 0; i < size; i++) - _inlined[i] = other._inlined[i]; - data[size] = '\0'; - } else { - data = other.data; - // zero out `other` - other.data = other._inlined; - other.data[0] = '\0'; - other.size = 0; - } -} - -Str operator+ (const char* p, const Str& str) { - Str other(p); - return other + str; -} - -std::ostream& operator<< (std::ostream& os, const Str& str) { return os << str.sv(); } - -bool operator< (const std::string_view other, const Str& str) { return other < str.sv(); } - -Str& Str::operator= (const Str& other) { - if(!is_inlined()) std::free(data); - size = other.size; - is_ascii = other.is_ascii; - PK_STR_ALLOCATE() - std::memcpy(data, other.data, size); - data[size] = '\0'; - return *this; -} - -Str Str::operator+ (const Str& other) const { - Str ret(size + other.size, is_ascii && other.is_ascii); - std::memcpy(ret.data, data, size); - std::memcpy(ret.data + size, other.data, other.size); - ret.data[ret.size] = '\0'; - return ret; -} - -Str Str::operator+ (const char* p) const { - Str other(p); - return *this + other; -} - -bool Str::operator== (const Str& other) const { - if(size != other.size) return false; - return memcmp(data, other.data, size) == 0; -} - -bool Str::operator!= (const Str& other) const { - if(size != other.size) return true; - return memcmp(data, other.data, size) != 0; -} - -bool Str::operator== (const std::string_view other) const { - if(size != (int)other.size()) return false; - return memcmp(data, other.data(), size) == 0; -} - -bool Str::operator!= (const std::string_view other) const { - if(size != (int)other.size()) return true; - return memcmp(data, other.data(), size) != 0; -} - -bool Str::operator== (const char* p) const { return *this == std::string_view(p); } - -bool Str::operator!= (const char* p) const { return *this != std::string_view(p); } - -bool Str::operator< (const Str& other) const { return this->sv() < other.sv(); } - -bool Str::operator< (const std::string_view other) const { return this->sv() < other; } - -bool Str::operator> (const Str& other) const { return this->sv() > other.sv(); } - -bool Str::operator<= (const Str& other) const { return this->sv() <= other.sv(); } - -bool Str::operator>= (const Str& other) const { return this->sv() >= other.sv(); } - -Str::~Str() { - if(!is_inlined()) std::free(data); -} - -Str Str::substr(int start, int len) const { - Str ret(len, is_ascii); - std::memcpy(ret.data, data + start, len); - ret.data[len] = '\0'; - return ret; -} - -Str Str::substr(int start) const { return substr(start, size - start); } - Str Str::strip(bool left, bool right, const Str& chars) const { int L = 0; int R = u8_length(); @@ -177,6 +38,7 @@ Str Str::strip(bool left, bool right, const Str& chars) const { } Str Str::strip(bool left, bool right) const { + const char* data = pkpy_Str__data(this); if(is_ascii) { int L = 0; int R = size; @@ -194,24 +56,6 @@ Str Str::strip(bool left, bool right) const { } } -Str Str::lower() const { - std::string copy(data, size); - std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { - if('A' <= c && c <= 'Z') return c + ('a' - 'A'); - return (int)c; - }); - return Str(copy); -} - -Str Str::upper() const { - std::string copy(data, size); - std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { - if('a' <= c && c <= 'z') return c - ('a' - 'A'); - return (int)c; - }); - return Str(copy); -} - Str Str::escape(bool single_quote) const { SStream ss; escape_(ss, single_quote); @@ -220,7 +64,7 @@ Str Str::escape(bool single_quote) const { void Str::escape_(SStream& ss, bool single_quote) const { ss << (single_quote ? '\'' : '"'); - for(int i = 0; i < length(); i++) { + for(int i = 0; i < size; i++) { char c = this->operator[] (i); switch(c) { case '"': @@ -249,71 +93,6 @@ void Str::escape_(SStream& ss, bool single_quote) const { ss << (single_quote ? '\'' : '"'); } -int Str::index(const Str& sub, int start) const { - auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size); - if(p == data + size) return -1; - return p - data; -} - -Str Str::replace(char old, char new_) const { - Str copied = *this; - for(int i = 0; i < copied.size; i++) { - if(copied.data[i] == old) copied.data[i] = new_; - } - return copied; -} - -Str Str::replace(const Str& old, const Str& new_, int count) const { - SStream ss; - int start = 0; - while(true) { - int i = index(old, start); - if(i == -1) break; - ss << substr(start, i - start); - ss << new_; - start = i + old.size; - if(count != -1 && --count == 0) break; - } - ss << substr(start, size - start); - return ss.str(); -} - -int Str::_unicode_index_to_byte(int i) const { - if(is_ascii) return i; - int j = 0; - while(i > 0) { - j += utf8len(data[j]); - i--; - } - return j; -} - -int Str::_byte_index_to_unicode(int n) const { - if(is_ascii) return n; - int cnt = 0; - for(int i = 0; i < n; i++) { - if((data[i] & 0xC0) != 0x80) cnt++; - } - return cnt; -} - -Str Str::u8_getitem(int i) const { - i = _unicode_index_to_byte(i); - return substr(i, utf8len(data[i])); -} - -Str Str::u8_slice(int start, int stop, int step) const { - SStream ss; - if(is_ascii) { - PK_SLICE_LOOP(i, start, stop, step) ss << data[i]; - } else { - PK_SLICE_LOOP(i, start, stop, step) ss << u8_getitem(i); - } - return ss.str(); -} - -int Str::u8_length() const { return _byte_index_to_unicode(size); } - vector Str::split(const Str& sep) const { vector result; std::string_view tmp; @@ -332,6 +111,7 @@ vector Str::split(const Str& sep) const { vector Str::split(char sep) const { vector result; + const char* data = pkpy_Str__data(this); int i = 0; for(int j = 0; j < size; j++) { if(data[j] == sep) { @@ -344,19 +124,6 @@ vector Str::split(char sep) const { return result; } -int Str::count(const Str& sub) const { - if(sub.empty()) return size + 1; - int cnt = 0; - int start = 0; - while(true) { - int i = index(sub, start); - if(i == -1) break; - cnt++; - start = i + sub.size; - } - return cnt; -} - static std::map& _interned() { static std::map interned; return interned; diff --git a/src/common/utils.c b/src/common/utils.c new file mode 100644 index 00000000..d3032366 --- /dev/null +++ b/src/common/utils.c @@ -0,0 +1,9 @@ +const char* kPlatformStrings[] = { + "win32", // 0 + "emscripten", // 1 + "ios", // 2 + "darwin", // 3 + "android", // 4 + "linux", // 5 + "unknown" // 6 +}; \ No newline at end of file diff --git a/src/common/vector.c b/src/common/vector.c new file mode 100644 index 00000000..46d83ea5 --- /dev/null +++ b/src/common/vector.c @@ -0,0 +1,61 @@ +#include "pocketpy/common/vector.h" + +#include +#include + +void c11_array__ctor(c11_array* self, int elem_size, int count){ + self->data = malloc(elem_size * count); + self->count = count; + self->elem_size = elem_size; +} + +void c11_array__dtor(c11_array* self){ + free(self->data); + self->data = NULL; + self->count = 0; +} + +c11_array c11_array__copy(const c11_array* self){ + c11_array retval; + c11_array__ctor(&retval, self->elem_size, self->count); + memcpy(retval.data, self->data, self->elem_size * self->count); + return retval; +} + +void* c11_array__at(c11_array* self, int index){ + return (char*)self->data + self->elem_size * index; +} + +void c11_vector__ctor(c11_vector* self, int elem_size){ + self->data = NULL; + self->count = 0; + self->capacity = 0; + self->elem_size = elem_size; +} + +void c11_vector__dtor(c11_vector* self){ + if(self->data) free(self->data); + self->data = NULL; + self->count = 0; + self->capacity = 0; +} + +c11_vector c11_vector__copy(const c11_vector* self){ + c11_vector retval; + c11_vector__ctor(&retval, self->elem_size); + c11_vector__reserve(&retval, self->capacity); + memcpy(retval.data, self->data, self->elem_size * self->count); + retval.count = self->count; + return retval; +} + +void* c11_vector__at(c11_vector* self, int index){ + return (char*)self->data + self->elem_size * index; +} + +void c11_vector__reserve(c11_vector* self, int capacity){ + if(capacity < 4) capacity = 4; + if(capacity <= self->capacity) return; + self->capacity = capacity; + self->data = realloc(self->data, self->elem_size * self->capacity); +} diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index f303a7ba..13226239 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -1,6 +1,7 @@ #include "pocketpy/compiler/lexer.hpp" #include "pocketpy/common/gil.hpp" #include "pocketpy/common/version.h" +#include "pocketpy/common/str.h" #include @@ -107,7 +108,7 @@ Error* Lexer::eat_name() noexcept{ curr_char--; while(true) { unsigned char c = peekchar(); - int u8bytes = utf8len(c, true); + int u8bytes = pkpy_utils__u8len(c, true); if(u8bytes == 0) return SyntaxError("invalid char: %c", c); if(u8bytes == 1) { if(isalpha(c) || c == '_' || isdigit(c)) { diff --git a/src/interpreter/ceval.cpp b/src/interpreter/ceval.cpp index 01e77832..978d296e 100644 --- a/src/interpreter/ceval.cpp +++ b/src/interpreter/ceval.cpp @@ -446,7 +446,7 @@ PyVar VM::__run_top_frame() { case OP_BUILD_BYTES: { const Str& s = CAST(Str&, TOP()); unsigned char* p = (unsigned char*)std::malloc(s.size); - std::memcpy(p, s.data, s.size); + std::memcpy(p, s.c_str(), s.size); TOP() = VAR(Bytes(p, s.size)); } DISPATCH() diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 15d058d5..390df813 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -49,7 +49,7 @@ void StringIter::_register(VM* vm, PyObject* mod, PyObject* type) { Str& s = PK_OBJ_GET(Str, self.ref); if(self.i == s.size) return 0; int start = self.i; - int len = utf8len(s.data[self.i]); + int len = pkpy_utils__u8len(s[self.i], false); self.i += len; vm->s_data.push(VAR(s.substr(start, len))); return 1; diff --git a/src/modules/io.cpp b/src/modules/io.cpp index 6bc551b3..30847e6b 100644 --- a/src/modules/io.cpp +++ b/src/modules/io.cpp @@ -85,7 +85,7 @@ void FileIO::_register(VM* vm, PyObject* mod, PyObject* type) { FileIO& io = PK_OBJ_GET(FileIO, args[0]); if(io.is_text) { Str& s = CAST(Str&, args[1]); - fwrite(s.data, 1, s.length(), io.fp); + fwrite(s.c_str(), 1, s.length(), io.fp); } else { Bytes& buffer = CAST(Bytes&, args[1]); fwrite(buffer.data(), 1, buffer.size(), io.fp); diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 4e7452eb..4b523275 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -539,7 +539,7 @@ void __init_builtins(VM* _vm) { double float_out; char* p_end; try { - float_out = std::strtod(s.data, &p_end); + float_out = std::strtod(s.c_str(), &p_end); if(p_end != s.end()) throw 1; } catch(...) { vm->ValueError("invalid literal for float(): " + s.escape()); } return VAR(float_out); @@ -636,13 +636,12 @@ void __init_builtins(VM* _vm) { return VAR(self.u8_getitem(i)); }); - _vm->bind(_vm->_t(VM::tp_str), "replace(self, old, new, count=-1)", [](VM* vm, ArgsView args) { + _vm->bind(_vm->_t(VM::tp_str), "replace(self, old, new)", [](VM* vm, ArgsView args) { const Str& self = _CAST(Str&, args[0]); const Str& old = CAST(Str&, args[1]); if(old.empty()) vm->ValueError("empty substring"); const Str& new_ = CAST(Str&, args[2]); - int count = CAST(int, args[3]); - return VAR(self.replace(old, new_, count)); + return VAR(self.replace(old, new_)); }); _vm->bind(_vm->_t(VM::tp_str), "split(self, sep=' ')", [](VM* vm, ArgsView args) { @@ -705,14 +704,14 @@ void __init_builtins(VM* _vm) { const Str& suffix = CAST(Str&, args[1]); int offset = self.length() - suffix.length(); if(offset < 0) return vm->False; - bool ok = memcmp(self.data + offset, suffix.data, suffix.length()) == 0; + bool ok = memcmp(self.c_str() + offset, suffix.c_str(), suffix.length()) == 0; return VAR(ok); }); _vm->bind_func(VM::tp_str, "encode", 1, [](VM* vm, ArgsView args) { const Str& self = _CAST(Str&, args[0]); Bytes retval(self.length()); - std::memcpy(retval.data(), self.data, self.length()); + std::memcpy(retval.data(), self.c_str(), self.length()); return VAR(std::move(retval)); }); diff --git a/tests/04_str.py b/tests/04_str.py index 79778a77..79ad9a8f 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -39,8 +39,8 @@ assert t[-5:] == 'ow!!!' assert t[3:-3] == 's is string example....wow' assert s > q;assert s < r assert s.replace("o","") == "ftball" -assert s.replace("o","O",1) == "fOotball" -assert s.replace("foo","ball",1) == "balltball" +assert s.replace("o","O") == "fOOtball" +assert s.replace("foo","ball") == "balltball" assert s.startswith('f') == True;assert s.endswith('o') == False assert t.startswith('this') == True; From 0312dbc829427033e7095eeee3f474e7d1e66cac Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 10 Jun 2024 22:46:21 +0800 Subject: [PATCH 02/60] fix scripts --- .github/workflows/main.yml | 46 +++++++++++++++++--------------------- run_profile.sh | 6 ++++- run_tests.sh | 6 ++++- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 573a121e..e88f51d0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,21 +12,17 @@ on: - 'web/**' - '**.md' jobs: - build_win32_amalgamated: - runs-on: windows-latest - steps: - - uses: actions/checkout@v4 - - uses: ilammy/msvc-dev-cmd@v1 - - name: Compile - shell: powershell - run: | - python amalgamate.py - cd amalgamated - cl.exe /std:c++17 /EHsc /utf-8 /Ox /I. /DPK_ENABLE_OS=1 main.cpp /link /out:pkpy.exe - # - uses: actions/upload-artifact@v4 - # with: - # name: amalgamated - # path: amalgamated/pkpy.exe + # build_win32_amalgamated: + # runs-on: windows-latest + # steps: + # - uses: actions/checkout@v4 + # - uses: ilammy/msvc-dev-cmd@v1 + # - name: Compile + # shell: powershell + # run: | + # python amalgamate.py + # cd amalgamated + # cl.exe /std:c++17 /EHsc /utf-8 /Ox /I. /DPK_ENABLE_OS=1 main.cpp /link /out:pkpy.exe build_win32: runs-on: windows-latest steps: @@ -109,16 +105,16 @@ jobs: python scripts/run_tests.py - name: Benchmark run: python scripts/run_tests.py benchmark - - run: | - python amalgamate.py - cd plugins/macos/pocketpy - mkdir output - xcodebuild clean build CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO - cp -r build/Release/pocketpy.bundle output - - uses: actions/upload-artifact@v4 - with: - name: macos - path: plugins/macos/pocketpy/output + # - run: | + # python amalgamate.py + # cd plugins/macos/pocketpy + # mkdir output + # xcodebuild clean build CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO + # cp -r build/Release/pocketpy.bundle output + # - uses: actions/upload-artifact@v4 + # with: + # name: macos + # path: plugins/macos/pocketpy/output build_android: runs-on: ubuntu-latest steps: diff --git a/run_profile.sh b/run_profile.sh index 68316f8b..fa02515b 100644 --- a/run_profile.sh +++ b/run_profile.sh @@ -1,5 +1,9 @@ python prebuild.py -SRC=$(find src/ -name "*.cpp") + +SRC_C=$(find src/ -name "*.c") +SRC_CPP=$(find src/ -name "*.cpp") +SRC="$SRC_C $SRC_CPP" + g++ -pg -Og -std=c++17 -frtti -Wfatal-errors -o main $SRC src2/main.cpp -Iinclude ./main benchmarks/fib.py gprof main gmon.out > gprof.txt diff --git a/run_tests.sh b/run_tests.sh index 924a3562..695eedb9 100644 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,5 +1,9 @@ python prebuild.py -SRC=$(find src/ -name "*.cpp") + +SRC_C=$(find src/ -name "*.c") +SRC_CPP=$(find src/ -name "*.cpp") +SRC="$SRC_C $SRC_CPP" + clang++ -std=c++17 --coverage -O1 -stdlib=libc++ -frtti -Wfatal-errors -o main src2/main.cpp $SRC -Iinclude -DPK_ENABLE_OS=1 -DPK_DEBUG_PRECOMPILED_EXEC=1 -DPK_ENABLE_PROFILER=1 python scripts/run_tests.py From 878db5a828e2e2543a5cf35b1e38a6d4d805b100 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 10:47:54 +0800 Subject: [PATCH 03/60] some rename --- include/pocketpy/common/str.h | 2 +- include/pocketpy/common/vector.h | 4 ++-- src/common/str.c | 16 ++++++++-------- src/compiler/lexer.cpp | 2 +- src/interpreter/iter.cpp | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 585073c9..c2336680 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -24,7 +24,7 @@ inline int pkpy_Str__size(const pkpy_Str* self){ return self->size; } -int pkpy_utils__u8len(unsigned char c, bool suppress); +int pkpy_utils__u8_header(unsigned char c, bool suppress); void pkpy_Str__ctor(pkpy_Str* self, const char* data); void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size); void pkpy_Str__dtor(pkpy_Str* self); diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 6c9c33d1..ad32e216 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -31,7 +31,7 @@ void c11_vector__reserve(c11_vector* self, int capacity); #define c11__getitem(T, self, index) ((T*)(self)->data)[index] #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; -#define c11_vector__push_back(T, self, elem) \ +#define c11_vector__append(T, self, elem) \ do{ \ if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \ ((T*)(self)->data)[(self)->count] = (elem); \ @@ -43,7 +43,7 @@ void c11_vector__reserve(c11_vector* self, int capacity); (self)->count--; \ }while(0) -#define c11_vector__extend(T, self, p, size) \ +#define c11_vector__push(T, self, p, size) \ do{ \ c11_vector__reserve((self), (self)->count + (size)); \ memcpy((T*)(self)->data + (self)->count, (p), (size) * sizeof(T)); \ diff --git a/src/common/str.c b/src/common/str.c index 1e5bbade..2dcd000d 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -8,7 +8,7 @@ #include #include -int pkpy_utils__u8len(unsigned char c, bool suppress) { +int pkpy_utils__u8_header(unsigned char c, bool suppress) { if((c & 0b10000000) == 0) return 1; if((c & 0b11100000) == 0b11000000) return 2; if((c & 0b11110000) == 0b11100000) return 3; @@ -135,13 +135,13 @@ pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkp int i = pkpy_Str__index(self, old, start); if(i == -1) break; pkpy_Str tmp = pkpy_Str__substr2(self, start, i - start); - c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); + c11_vector__push(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); - c11_vector__extend(char, &buffer, pkpy_Str__data(new_), new_->size); + c11_vector__push(char, &buffer, pkpy_Str__data(new_), new_->size); start = i + old->size; } pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start); - c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); + c11_vector__push(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); pkpy_Str retval = { .size = buffer.count, @@ -166,7 +166,7 @@ pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str *self, int i){ i = pkpy_Str__unicode_index_to_byte(self, i); return pkpy_Str__substr2( self, i, - pkpy_utils__u8len(pkpy_Str__data(self)[i], false) + pkpy_utils__u8_header(pkpy_Str__data(self)[i], false) ); } @@ -177,14 +177,14 @@ pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step) if(self->is_ascii){ const char* p = pkpy_Str__data(self); for (int i=start; step>0 ? istop; i+=step) { - c11_vector__push_back(char, &buffer, p[i]); + c11_vector__append(char, &buffer, p[i]); } }else{ for (int i=start; step>0 ? istop; i+=step) { pkpy_Str unicode = pkpy_Str__u8_getitem(self, i); const char* p = pkpy_Str__data(&unicode); for(int j = 0; j < unicode.size; j++){ - c11_vector__push_back(char, &buffer, p[j]); + c11_vector__append(char, &buffer, p[j]); } pkpy_Str__dtor(&unicode); } @@ -207,7 +207,7 @@ int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i) { const char* p = pkpy_Str__data(self); int j = 0; while(i > 0) { - j += pkpy_utils__u8len(p[j], false); + j += pkpy_utils__u8_header(p[j], false); i--; } return j; diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index 13226239..77130ad4 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -108,7 +108,7 @@ Error* Lexer::eat_name() noexcept{ curr_char--; while(true) { unsigned char c = peekchar(); - int u8bytes = pkpy_utils__u8len(c, true); + int u8bytes = pkpy_utils__u8_header(c, true); if(u8bytes == 0) return SyntaxError("invalid char: %c", c); if(u8bytes == 1) { if(isalpha(c) || c == '_' || isdigit(c)) { diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 390df813..089df39c 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -49,7 +49,7 @@ void StringIter::_register(VM* vm, PyObject* mod, PyObject* type) { Str& s = PK_OBJ_GET(Str, self.ref); if(self.i == s.size) return 0; int start = self.i; - int len = pkpy_utils__u8len(s[self.i], false); + int len = pkpy_utils__u8_header(s[self.i], false); self.i += len; vm->s_data.push(VAR(s.substr(start, len))); return 1; From 3e64b3b742b8ca02132496931591c6e8b757f460 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 12:00:15 +0800 Subject: [PATCH 04/60] some fix --- include/pocketpy/common/vector.h | 6 +++--- src/common/str.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index ad32e216..0c5d0bb7 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -31,19 +31,19 @@ void c11_vector__reserve(c11_vector* self, int capacity); #define c11__getitem(T, self, index) ((T*)(self)->data)[index] #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; -#define c11_vector__append(T, self, elem) \ +#define c11_vector__push(T, self, elem) \ do{ \ if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \ ((T*)(self)->data)[(self)->count] = (elem); \ (self)->count++; \ }while(0) -#define c11_vector__pop_back(T, self) \ +#define c11_vector__pop(T, self) \ do{ \ (self)->count--; \ }while(0) -#define c11_vector__push(T, self, p, size) \ +#define c11_vector__extend(T, self, p, size) \ do{ \ c11_vector__reserve((self), (self)->count + (size)); \ memcpy((T*)(self)->data + (self)->count, (p), (size) * sizeof(T)); \ diff --git a/src/common/str.c b/src/common/str.c index 2dcd000d..d9c619b1 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -135,13 +135,13 @@ pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkp int i = pkpy_Str__index(self, old, start); if(i == -1) break; pkpy_Str tmp = pkpy_Str__substr2(self, start, i - start); - c11_vector__push(char, &buffer, pkpy_Str__data(&tmp), tmp.size); + c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); - c11_vector__push(char, &buffer, pkpy_Str__data(new_), new_->size); + c11_vector__extend(char, &buffer, pkpy_Str__data(new_), new_->size); start = i + old->size; } pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start); - c11_vector__push(char, &buffer, pkpy_Str__data(&tmp), tmp.size); + c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); pkpy_Str retval = { .size = buffer.count, @@ -177,14 +177,14 @@ pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step) if(self->is_ascii){ const char* p = pkpy_Str__data(self); for (int i=start; step>0 ? istop; i+=step) { - c11_vector__append(char, &buffer, p[i]); + c11_vector__push(char, &buffer, p[i]); } }else{ for (int i=start; step>0 ? istop; i+=step) { pkpy_Str unicode = pkpy_Str__u8_getitem(self, i); const char* p = pkpy_Str__data(&unicode); for(int j = 0; j < unicode.size; j++){ - c11_vector__append(char, &buffer, p[j]); + c11_vector__push(char, &buffer, p[j]); } pkpy_Str__dtor(&unicode); } From f0ec979815290cbcf1e004943c93aefffef5f4ce Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 12:25:04 +0800 Subject: [PATCH 05/60] move escape --- include/pocketpy/common/str.h | 3 ++ include/pocketpy/common/str.hpp | 6 ++-- src/common/str.c | 62 +++++++++++++++++++++++++++++++-- src/common/str.cpp | 37 -------------------- src/interpreter/vm.cpp | 4 +-- 5 files changed, 69 insertions(+), 43 deletions(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index c2336680..d39c43af 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -35,6 +35,9 @@ pkpy_Str pkpy_Str__substr(const pkpy_Str* self, int start); pkpy_Str pkpy_Str__substr2(const pkpy_Str* self, int start, int size); pkpy_Str pkpy_Str__lower(const pkpy_Str* self); pkpy_Str pkpy_Str__upper(const pkpy_Str* self); +pkpy_Str pkpy_Str__escape(const pkpy_Str* self, char quote); +// pkpy_Str pkpy_Str__strip(const pkpy_Str* self, bool left, bool right); +// pkpy_Str pkpy_Str__strip2(const pkpy_Str* self, bool left, bool right, const pkpy_Str* chars); pkpy_Str pkpy_Str__replace(const pkpy_Str* self, char old, char new_); pkpy_Str pkpy_Str__replace2(const pkpy_Str* self, const pkpy_Str* old, const pkpy_Str* new_); pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str* self, int i); diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 4d31ddc9..f43fb16a 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -177,8 +177,10 @@ struct Str: pkpy_Str { return pkpy_Str__replace2(this, &old, &new_); } - Str escape(bool single_quote = true) const; - void escape_(SStream& ss, bool single_quote = true) const; + Str escape(char quote='\'') const{ + return pkpy_Str__escape(this, quote); + } + vector split(const Str& sep) const; vector split(char sep) const; diff --git a/src/common/str.c b/src/common/str.c index d9c619b1..905a9f83 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -118,6 +118,62 @@ pkpy_Str pkpy_Str__upper(const pkpy_Str *self){ return retval; } +pkpy_Str pkpy_Str__escape(const pkpy_Str* self, char quote){ + assert(quote == '"' || quote == '\''); + c11_vector buffer; + c11_vector__ctor(&buffer, sizeof(char)); + c11_vector__reserve(&buffer, self->size); + c11_vector__push(char, &buffer, quote); + const char* data = pkpy_Str__data(self); + for(int i = 0; i < self->size; i++) { + char c = data[i]; + switch(c) { + case '"': case '\'': + if(c == quote) c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, c); + break; + case '\\': + c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, '\\'); + break; + case '\n': + c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, 'n'); + break; + case '\r': + c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, 'r'); + break; + case '\t': + c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, 't'); + break; + case '\b': + c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, 'b'); + break; + default: + if('\x00' <= c && c <= '\x1f') { + c11_vector__push(char, &buffer, '\\'); + c11_vector__push(char, &buffer, 'x'); + c11_vector__push(char, &buffer, PK_HEX_TABLE[c >> 4]); + c11_vector__push(char, &buffer, PK_HEX_TABLE[c & 0xf]); + } else { + c11_vector__push(char, &buffer, c); + } + } + } + c11_vector__push(char, &buffer, quote); + c11_vector__push(char, &buffer, '\0'); + pkpy_Str retval = { + .size = buffer.count - 1, + .is_ascii = self->is_ascii, + .is_sso = false, + ._ptr = (char*)buffer.data, + }; + return retval; +} + pkpy_Str pkpy_Str__replace(const pkpy_Str *self, char old, char new_){ pkpy_Str retval = pkpy_Str__copy(self); char* p = (char*)pkpy_Str__data(&retval); @@ -143,8 +199,9 @@ pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkp pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start); c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); + c11_vector__push(char, &buffer, '\0'); pkpy_Str retval = { - .size = buffer.count, + .size = buffer.count - 1, .is_ascii = self->is_ascii && old->is_ascii && new_->is_ascii, .is_sso = false, ._ptr = (char*)buffer.data, @@ -189,8 +246,9 @@ pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step) pkpy_Str__dtor(&unicode); } } + c11_vector__push(char, &buffer, '\0'); pkpy_Str retval = { - .size = buffer.count, + .size = buffer.count - 1, .is_ascii = self->is_ascii, .is_sso = false, ._ptr = (char*)buffer.data, diff --git a/src/common/str.cpp b/src/common/str.cpp index 564e5efb..6471d7b2 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -56,43 +56,6 @@ Str Str::strip(bool left, bool right) const { } } -Str Str::escape(bool single_quote) const { - SStream ss; - escape_(ss, single_quote); - return ss.str(); -} - -void Str::escape_(SStream& ss, bool single_quote) const { - ss << (single_quote ? '\'' : '"'); - for(int i = 0; i < size; i++) { - char c = this->operator[] (i); - switch(c) { - case '"': - if(!single_quote) ss << '\\'; - ss << '"'; - break; - case '\'': - if(single_quote) ss << '\\'; - ss << '\''; - break; - case '\\': ss << '\\' << '\\'; break; - case '\n': ss << "\\n"; break; - case '\r': ss << "\\r"; break; - case '\t': ss << "\\t"; break; - case '\b': ss << "\\b"; break; - default: - if('\x00' <= c && c <= '\x1f') { - ss << "\\x"; // << std::hex << std::setw(2) << std::setfill('0') << (int)c; - ss << PK_HEX_TABLE[c >> 4]; - ss << PK_HEX_TABLE[c & 0xf]; - } else { - ss << c; - } - } - } - ss << (single_quote ? '\'' : '"'); -} - vector Str::split(const Str& sep) const { vector result; std::string_view tmp; diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 3ea17293..392af7e5 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -38,7 +38,7 @@ struct JsonSerializer { if(!is_type(k, VM::tp_str)) { vm->TypeError(_S("json keys must be string, got ", _type_name(vm, vm->_tp(k)))); } - ss << _CAST(Str&, k).escape(false) << ": "; + ss << _CAST(Str&, k).escape('"') << ": "; write_object(v); }); ss << '}'; @@ -57,7 +57,7 @@ struct JsonSerializer { } else if(obj_t == vm->tp_bool) { ss << (obj == vm->True ? "true" : "false"); } else if(obj_t == vm->tp_str) { - _CAST(Str&, obj).escape_(ss, false); + ss << _CAST(Str&, obj).escape('"'); } else if(obj_t == vm->tp_list) { write_array(_CAST(List&, obj)); } else if(obj_t == vm->tp_tuple) { From 07e07831c36096475c715ab7d42f3a6e61571176 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 12:46:53 +0800 Subject: [PATCH 06/60] rename subscr to slice --- include/pocketpy/common/str.h | 4 +-- include/pocketpy/common/str.hpp | 10 ++++-- src/common/str.c | 58 +++++++++++++++++++++++++++------ src/common/str.cpp | 2 +- src/compiler/expr.cpp | 12 +++---- src/interpreter/iter.cpp | 2 +- src/interpreter/vm.cpp | 8 ++--- 7 files changed, 69 insertions(+), 27 deletions(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index d39c43af..cc3185ed 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -31,8 +31,8 @@ void pkpy_Str__dtor(pkpy_Str* self); pkpy_Str pkpy_Str__copy(const pkpy_Str* self); pkpy_Str pkpy_Str__concat(const pkpy_Str* self, const pkpy_Str* other); pkpy_Str pkpy_Str__concat2(const pkpy_Str* self, const char* other, int size); -pkpy_Str pkpy_Str__substr(const pkpy_Str* self, int start); -pkpy_Str pkpy_Str__substr2(const pkpy_Str* self, int start, int size); +pkpy_Str pkpy_Str__slice(const pkpy_Str* self, int start); +pkpy_Str pkpy_Str__slice2(const pkpy_Str* self, int start, int stop); pkpy_Str pkpy_Str__lower(const pkpy_Str* self); pkpy_Str pkpy_Str__upper(const pkpy_Str* self); pkpy_Str pkpy_Str__escape(const pkpy_Str* self, char quote); diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index f43fb16a..1b2ea5a6 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -149,12 +149,16 @@ struct Str: pkpy_Str { return std::string(pkpy_Str__data(this), size); } - Str substr(int start, int size) const{ - return pkpy_Str__substr2(this, start, size); + Str slice(int start, int stop) const{ + return pkpy_Str__slice2(this, start, stop); + } + + Str slice(int start) const{ + return pkpy_Str__slice(this, start); } Str substr(int start) const{ - return pkpy_Str__substr(this, start); + return pkpy_Str__slice(this, start); } Str strip(bool left, bool right, const Str& chars) const; diff --git a/src/common/str.c b/src/common/str.c index 905a9f83..df33c737 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -90,13 +90,14 @@ pkpy_Str pkpy_Str__concat2(const pkpy_Str *self, const char *other, int size){ return retval; } -pkpy_Str pkpy_Str__substr(const pkpy_Str *self, int start){ - return pkpy_Str__substr2(self, start, self->size - start); +pkpy_Str pkpy_Str__slice(const pkpy_Str *self, int start){ + return pkpy_Str__slice2(self, start, self->size); } -pkpy_Str pkpy_Str__substr2(const pkpy_Str *self, int start, int size){ +pkpy_Str pkpy_Str__slice2(const pkpy_Str *self, int start, int stop){ pkpy_Str retval; - pkpy_Str__ctor2(&retval, pkpy_Str__data(self) + start, size); + if(stop < start) stop = start; + pkpy_Str__ctor2(&retval, pkpy_Str__data(self) + start, stop - start); return retval; } @@ -174,6 +175,45 @@ pkpy_Str pkpy_Str__escape(const pkpy_Str* self, char quote){ return retval; } +// pkpy_Str pkpy_Str__strip(const pkpy_Str *self, bool left, bool right){ +// const char* data = pkpy_Str__data(self); +// if(self->is_ascii) { +// int L = 0; +// int R = self->size; +// if(left) { +// while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) +// L++; +// } +// if(right) { +// while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r')) +// R--; +// } +// return pkpy_Str__substr2(self, L, R - L); +// } else { +// pkpy_Str tmp; +// pkpy_Str__ctor(&tmp, " \t\n\r"); +// pkpy_Str retval = pkpy_Str__strip2(self, left, right, &tmp); +// pkpy_Str__dtor(&tmp); +// return retval; +// } +// } + +// pkpy_Str pkpy_Str__strip2(const pkpy_Str *self, bool left, bool right, const pkpy_Str *chars){ +// int L = 0; +// int R = pkpy_Str__u8_length(self); +// pkpy_Str tmp; +// if(left) { +// tmp = pkpy_Str__u8_getitem(self, L); +// while(L < R && chars.index(u8_getitem(L)) != -1) +// L++; +// } +// if(right) { +// while(L < R && chars.index(u8_getitem(R - 1)) != -1) +// R--; +// } +// return pkpy_Str__u8_slice(self, L, R, 1); +// } + pkpy_Str pkpy_Str__replace(const pkpy_Str *self, char old, char new_){ pkpy_Str retval = pkpy_Str__copy(self); char* p = (char*)pkpy_Str__data(&retval); @@ -190,13 +230,13 @@ pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkp while(true) { int i = pkpy_Str__index(self, old, start); if(i == -1) break; - pkpy_Str tmp = pkpy_Str__substr2(self, start, i - start); + pkpy_Str tmp = pkpy_Str__slice2(self, start, i); c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); c11_vector__extend(char, &buffer, pkpy_Str__data(new_), new_->size); start = i + old->size; } - pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start); + pkpy_Str tmp = pkpy_Str__slice2(self, start, self->size); c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size); pkpy_Str__dtor(&tmp); c11_vector__push(char, &buffer, '\0'); @@ -221,10 +261,8 @@ int pkpy_Str__cmp2(const pkpy_Str *self, const char *other, int size){ pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str *self, int i){ i = pkpy_Str__unicode_index_to_byte(self, i); - return pkpy_Str__substr2( - self, i, - pkpy_utils__u8_header(pkpy_Str__data(self)[i], false) - ); + int size = pkpy_utils__u8_header(pkpy_Str__data(self)[i], false); + return pkpy_Str__slice2(self, i, i + size); } pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step){ diff --git a/src/common/str.cpp b/src/common/str.cpp index 6471d7b2..d30dcb2f 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -50,7 +50,7 @@ Str Str::strip(bool left, bool right) const { while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r')) R--; } - return substr(L, R - L); + return slice(L, R); } else { return strip(left, right, " \t\n\r"); } diff --git a/src/compiler/expr.cpp b/src/compiler/expr.cpp index ed0d38ce..9327ea1a 100644 --- a/src/compiler/expr.cpp +++ b/src/compiler/expr.cpp @@ -414,11 +414,11 @@ void FStringExpr::_load_simple_expr(CodeEmitContext* ctx, Str expr) { switch(expr.end()[-1]) { case 'r': repr = true; - expr = expr.substr(0, expr.size - 2); + expr = expr.slice(0, expr.size - 2); break; case 's': repr = false; - expr = expr.substr(0, expr.size - 2); + expr = expr.slice(0, expr.size - 2); break; default: break; // nothing happens } @@ -472,7 +472,7 @@ void FStringExpr::emit_(CodeEmitContext* ctx) { if(flag) { if(src[j] == '}') { // add expression - Str expr = src.substr(i, j - i); + Str expr = src.slice(i, j); // BUG: ':' is not a format specifier in f"{stack[2:]}" int conon = expr.index(":"); if(conon >= 0) { @@ -485,7 +485,7 @@ void FStringExpr::emit_(CodeEmitContext* ctx) { break; } if(ok) { - _load_simple_expr(ctx, expr.substr(0, conon)); + _load_simple_expr(ctx, expr.slice(0, conon)); ctx->emit_(OP_FORMAT_STRING, ctx->add_const_string(spec.sv()), line); } else { // ':' is not a spec indicator @@ -527,7 +527,7 @@ void FStringExpr::emit_(CodeEmitContext* ctx) { i = j; while(j < src.size && src[j] != '{' && src[j] != '}') j++; - Str literal = src.substr(i, j - i); + Str literal = src.slice(i, j); ctx->emit_(OP_LOAD_CONST, ctx->add_const_string(literal.sv()), line); count++; continue; // skip j++ @@ -538,7 +538,7 @@ void FStringExpr::emit_(CodeEmitContext* ctx) { if(flag) { // literal - Str literal = src.substr(i, src.size - i); + Str literal = src.slice(i, src.size); ctx->emit_(OP_LOAD_CONST, ctx->add_const_string(literal.sv()), line); count++; } diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 089df39c..18c484d0 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -51,7 +51,7 @@ void StringIter::_register(VM* vm, PyObject* mod, PyObject* type) { int start = self.i; int len = pkpy_utils__u8_header(s[self.i], false); self.i += len; - vm->s_data.push(VAR(s.substr(start, len))); + vm->s_data.push(VAR(s.slice(start, self.i))); return 1; }); } diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 392af7e5..ce4d1114 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -623,7 +623,7 @@ PyVar VM::__format_object(PyVar obj, Str spec) { case 'd': case 's': type = spec.end()[-1]; - spec = spec.substr(0, spec.length() - 1); + spec = spec.slice(0, spec.length() - 1); break; default: type = ' '; break; } @@ -660,9 +660,9 @@ PyVar VM::__format_object(PyVar obj, Str spec) { if(dot == 0) { width = -1; } else { - width = std::stoi(spec.substr(0, dot).str()); + width = std::stoi(spec.slice(0, dot).str()); } - precision = std::stoi(spec.substr(dot + 1).str()); + precision = std::stoi(spec.slice(dot + 1).str()); } else { width = std::stoi(spec.str()); precision = -1; @@ -761,7 +761,7 @@ static std::string _opcode_argstr(VM* vm, int i, Bytecode byte, const CodeObject Str VM::disassemble(CodeObject_ co) { auto pad = [](const Str& s, const int n) { - if(s.length() >= n) return s.substr(0, n); + if(s.length() >= n) return s.slice(0, n); return s + std::string(n - s.length(), ' '); }; From 6a70f535b3ded10cd1e437f3f585fcd2e94a7c8c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 12:52:22 +0800 Subject: [PATCH 07/60] move strip --- include/pocketpy/common/str.h | 4 +- include/pocketpy/common/str.hpp | 9 +++- src/common/str.c | 82 ++++++++++++++++++--------------- src/common/str.cpp | 33 ------------- 4 files changed, 54 insertions(+), 74 deletions(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index cc3185ed..26d8e0a0 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -36,8 +36,8 @@ pkpy_Str pkpy_Str__slice2(const pkpy_Str* self, int start, int stop); pkpy_Str pkpy_Str__lower(const pkpy_Str* self); pkpy_Str pkpy_Str__upper(const pkpy_Str* self); pkpy_Str pkpy_Str__escape(const pkpy_Str* self, char quote); -// pkpy_Str pkpy_Str__strip(const pkpy_Str* self, bool left, bool right); -// pkpy_Str pkpy_Str__strip2(const pkpy_Str* self, bool left, bool right, const pkpy_Str* chars); +pkpy_Str pkpy_Str__strip(const pkpy_Str* self, bool left, bool right); +pkpy_Str pkpy_Str__strip2(const pkpy_Str* self, bool left, bool right, const pkpy_Str* chars); pkpy_Str pkpy_Str__replace(const pkpy_Str* self, char old, char new_); pkpy_Str pkpy_Str__replace2(const pkpy_Str* self, const pkpy_Str* old, const pkpy_Str* new_); pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str* self, int i); diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 1b2ea5a6..65497159 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -161,8 +161,13 @@ struct Str: pkpy_Str { return pkpy_Str__slice(this, start); } - Str strip(bool left, bool right, const Str& chars) const; - Str strip(bool left = true, bool right = true) const; + Str strip(bool left, bool right, const Str& chars) const{ + return pkpy_Str__strip2(this, left, right, &chars); + } + + Str strip(bool left = true, bool right = true) const{ + return pkpy_Str__strip(this, left, right); + } Str lstrip() const { return strip(true, false); } diff --git a/src/common/str.c b/src/common/str.c index df33c737..26df5ae4 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -175,44 +175,52 @@ pkpy_Str pkpy_Str__escape(const pkpy_Str* self, char quote){ return retval; } -// pkpy_Str pkpy_Str__strip(const pkpy_Str *self, bool left, bool right){ -// const char* data = pkpy_Str__data(self); -// if(self->is_ascii) { -// int L = 0; -// int R = self->size; -// if(left) { -// while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) -// L++; -// } -// if(right) { -// while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r')) -// R--; -// } -// return pkpy_Str__substr2(self, L, R - L); -// } else { -// pkpy_Str tmp; -// pkpy_Str__ctor(&tmp, " \t\n\r"); -// pkpy_Str retval = pkpy_Str__strip2(self, left, right, &tmp); -// pkpy_Str__dtor(&tmp); -// return retval; -// } -// } +pkpy_Str pkpy_Str__strip(const pkpy_Str *self, bool left, bool right){ + const char* data = pkpy_Str__data(self); + if(self->is_ascii) { + int L = 0; + int R = self->size; + if(left) { + while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) + L++; + } + if(right) { + while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r')) + R--; + } + return pkpy_Str__slice2(self, L, R); + } else { + pkpy_Str tmp; + pkpy_Str__ctor(&tmp, " \t\n\r"); + pkpy_Str retval = pkpy_Str__strip2(self, left, right, &tmp); + pkpy_Str__dtor(&tmp); + return retval; + } +} -// pkpy_Str pkpy_Str__strip2(const pkpy_Str *self, bool left, bool right, const pkpy_Str *chars){ -// int L = 0; -// int R = pkpy_Str__u8_length(self); -// pkpy_Str tmp; -// if(left) { -// tmp = pkpy_Str__u8_getitem(self, L); -// while(L < R && chars.index(u8_getitem(L)) != -1) -// L++; -// } -// if(right) { -// while(L < R && chars.index(u8_getitem(R - 1)) != -1) -// R--; -// } -// return pkpy_Str__u8_slice(self, L, R, 1); -// } +pkpy_Str pkpy_Str__strip2(const pkpy_Str *self, bool left, bool right, const pkpy_Str *chars){ + int L = 0; + int R = pkpy_Str__u8_length(self); + if(left) { + while(L < R){ + pkpy_Str tmp = pkpy_Str__u8_getitem(self, L); + bool found = pkpy_Str__index(chars, &tmp, 0) != -1; + pkpy_Str__dtor(&tmp); + if(!found) break; + L++; + } + } + if(right) { + while(L < R){ + pkpy_Str tmp = pkpy_Str__u8_getitem(self, R - 1); + bool found = pkpy_Str__index(chars, &tmp, 0) != -1; + pkpy_Str__dtor(&tmp); + if(!found) break; + R--; + } + } + return pkpy_Str__u8_slice(self, L, R, 1); +} pkpy_Str pkpy_Str__replace(const pkpy_Str *self, char old, char new_){ pkpy_Str retval = pkpy_Str__copy(self); diff --git a/src/common/str.cpp b/src/common/str.cpp index d30dcb2f..7f3124da 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -23,39 +23,6 @@ Str::Str(pair detached) { assert(_ptr[size] == '\0'); } -Str Str::strip(bool left, bool right, const Str& chars) const { - int L = 0; - int R = u8_length(); - if(left) { - while(L < R && chars.index(u8_getitem(L)) != -1) - L++; - } - if(right) { - while(L < R && chars.index(u8_getitem(R - 1)) != -1) - R--; - } - return u8_slice(L, R, 1); -} - -Str Str::strip(bool left, bool right) const { - const char* data = pkpy_Str__data(this); - if(is_ascii) { - int L = 0; - int R = size; - if(left) { - while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) - L++; - } - if(right) { - while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r')) - R--; - } - return slice(L, R); - } else { - return strip(left, right, " \t\n\r"); - } -} - vector Str::split(const Str& sep) const { vector result; std::string_view tmp; From 2fd92764fd334a7129e459c8ad86892032585bcb Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 12:54:54 +0800 Subject: [PATCH 08/60] fix CI --- .github/workflows/main.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e88f51d0..1b5a0207 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -54,18 +54,19 @@ jobs: platform: x64 - name: Install libc++ run: sudo apt-get install -y libc++-15-dev libc++1-15 libc++abi-15-dev libc++abi1-15 libclang-rt-15-dev - - name: Unit Test with Coverage - run: bash run_tests.sh - - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4 - with: - token: ${{ secrets.CODECOV_TOKEN }} - directory: .coverage - if: github.ref == 'refs/heads/main' - - name: Compile + # - name: Unit Test with Coverage + # run: bash run_tests.sh + # - name: Upload coverage reports to Codecov + # uses: codecov/codecov-action@v4 + # with: + # token: ${{ secrets.CODECOV_TOKEN }} + # directory: .coverage + # if: github.ref == 'refs/heads/main' + - name: Compile and Test run: | mkdir -p output/x86_64 python cmake_build.py + python scripts/run_tests.py cp main output/x86_64 cp libpocketpy.so output/x86_64 env: From 874d3a0b88e2ee32a52b368430e3d4a7e8ab20e3 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 13:37:28 +0800 Subject: [PATCH 09/60] complete string move --- include/pocketpy/common/str.h | 9 ++++++++ include/pocketpy/common/str.hpp | 24 ++++++++++++++++++-- include/pocketpy/common/vector.h | 1 + src/common/str.c | 39 +++++++++++++++++++++++++++++++- src/common/str.cpp | 31 ------------------------- src/common/vector.c | 9 ++++++++ 6 files changed, 79 insertions(+), 34 deletions(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 26d8e0a0..7673948a 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -5,6 +5,13 @@ extern "C" { #endif #include +#include "pocketpy/common/vector.h" + +/* string_view */ +typedef struct c11_string{ + const char* data; + int size; +} c11_string; typedef struct pkpy_Str{ int size; @@ -49,6 +56,8 @@ int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i); int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n); int pkpy_Str__index(const pkpy_Str* self, const pkpy_Str* sub, int start); int pkpy_Str__count(const pkpy_Str* self, const pkpy_Str* sub); +c11_array/* T=c11_string */ pkpy_Str__split(const pkpy_Str* self, char sep); +c11_array/* T=c11_string */ pkpy_Str__split2(const pkpy_Str* self, const pkpy_Str* sep); #ifdef __cplusplus } diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 65497159..414d3f5a 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -2,6 +2,7 @@ #include "pocketpy/common/utils.h" #include "pocketpy/common/memorypool.hpp" +#include "pocketpy/common/vector.h" #include "pocketpy/common/vector.hpp" #include "pocketpy/common/str.h" @@ -190,8 +191,27 @@ struct Str: pkpy_Str { return pkpy_Str__escape(this, quote); } - vector split(const Str& sep) const; - vector split(char sep) const; + vector split(const Str& sep) const{ + c11_array/* T=c11_string */ res = pkpy_Str__split2(this, &sep); + vector retval(res.count); + for(int i = 0; i < res.count; i++){ + c11_string tmp = c11__getitem(c11_string, &res, i); + retval[i] = std::string_view(tmp.data, tmp.size); + } + c11_array__dtor(&res); + return retval; + } + + vector split(char sep) const{ + c11_array/* T=c11_string */ res = pkpy_Str__split(this, sep); + vector retval(res.count); + for(int i = 0; i < res.count; i++){ + c11_string tmp = c11__getitem(c11_string, &res, i); + retval[i] = std::string_view(tmp.data, tmp.size); + } + c11_array__dtor(&res); + return retval; + } int index(const Str& sub, int start = 0) const{ return pkpy_Str__index(this, &sub, start); diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 0c5d0bb7..4cb99c25 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -27,6 +27,7 @@ void c11_vector__dtor(c11_vector* self); c11_vector c11_vector__copy(const c11_vector* self); void* c11_vector__at(c11_vector* self, int index); void c11_vector__reserve(c11_vector* self, int capacity); +c11_array c11_vector__as_array(c11_vector* self); #define c11__getitem(T, self, index) ((T*)(self)->data)[index] #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; diff --git a/src/common/str.c b/src/common/str.c index 26df5ae4..e369c1da 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -1,5 +1,4 @@ #include "pocketpy/common/str.h" -#include "pocketpy/common/vector.h" #include "pocketpy/common/utils.h" #include @@ -352,3 +351,41 @@ int pkpy_Str__count(const pkpy_Str *self, const pkpy_Str *sub){ return cnt; } +c11_array/* T=c11_string */ pkpy_Str__split(const pkpy_Str *self, char sep){ + c11_vector retval; + c11_vector__ctor(&retval, sizeof(c11_string)); + const char* data = pkpy_Str__data(self); + int i = 0; + for(int j = 0; j < self->size; j++) { + if(data[j] == sep) { + if(j > i){ + c11_string tmp = {data + i, j - i}; + c11_vector__push(c11_string, &retval, tmp); + } + i = j + 1; + continue; + } + } + if(self->size > i){ + c11_string tmp = {data + i, self->size - i}; + c11_vector__push(c11_string, &retval, tmp); + } + return c11_vector__as_array(&retval); +} + +c11_array/* T=c11_string */ pkpy_Str__split2(const pkpy_Str *self, const pkpy_Str *sep){ + c11_vector retval; + c11_vector__ctor(&retval, sizeof(c11_string)); + int start = 0; + const char* data = pkpy_Str__data(self); + while(true) { + int i = pkpy_Str__index(self, sep, start); + if(i == -1) break; + c11_string tmp = {data + start, i - start}; + if(tmp.size != 0) c11_vector__push(c11_string, &retval, tmp); + start = i + sep->size; + } + c11_string tmp = {data + start, self->size - start}; + if(tmp.size != 0) c11_vector__push(c11_string, &retval, tmp); + return c11_vector__as_array(&retval); +} diff --git a/src/common/str.cpp b/src/common/str.cpp index 7f3124da..f9f14fab 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -23,37 +23,6 @@ Str::Str(pair detached) { assert(_ptr[size] == '\0'); } -vector Str::split(const Str& sep) const { - vector result; - std::string_view tmp; - int start = 0; - while(true) { - int i = index(sep, start); - if(i == -1) break; - tmp = sv().substr(start, i - start); - if(!tmp.empty()) result.push_back(tmp); - start = i + sep.size; - } - tmp = sv().substr(start, size - start); - if(!tmp.empty()) result.push_back(tmp); - return result; -} - -vector Str::split(char sep) const { - vector result; - const char* data = pkpy_Str__data(this); - int i = 0; - for(int j = 0; j < size; j++) { - if(data[j] == sep) { - if(j > i) result.emplace_back(data + i, j - i); - i = j + 1; - continue; - } - } - if(size > i) result.emplace_back(data + i, size - i); - return result; -} - static std::map& _interned() { static std::map interned; return interned; diff --git a/src/common/vector.c b/src/common/vector.c index 46d83ea5..92137c58 100644 --- a/src/common/vector.c +++ b/src/common/vector.c @@ -59,3 +59,12 @@ void c11_vector__reserve(c11_vector* self, int capacity){ self->capacity = capacity; self->data = realloc(self->data, self->elem_size * self->capacity); } + +c11_array c11_vector__as_array(c11_vector* self){ + c11_array retval = { + .data = self->data, + .count = self->count, + .elem_size = self->elem_size, + }; + return retval; +} From 1c9bd1836c99c366825ce27ec17ef5f202b09237 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 13:39:41 +0800 Subject: [PATCH 10/60] some fix --- include/pocketpy/common/str.h | 4 ++-- include/pocketpy/common/str.hpp | 8 ++++---- include/pocketpy/common/vector.h | 1 - src/common/str.c | 8 ++++---- src/common/vector.c | 8 -------- 5 files changed, 10 insertions(+), 19 deletions(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 7673948a..7fb2ca6b 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -56,8 +56,8 @@ int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i); int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n); int pkpy_Str__index(const pkpy_Str* self, const pkpy_Str* sub, int start); int pkpy_Str__count(const pkpy_Str* self, const pkpy_Str* sub); -c11_array/* T=c11_string */ pkpy_Str__split(const pkpy_Str* self, char sep); -c11_array/* T=c11_string */ pkpy_Str__split2(const pkpy_Str* self, const pkpy_Str* sep); +c11_vector/* T=c11_string */ pkpy_Str__split(const pkpy_Str* self, char sep); +c11_vector/* T=c11_string */ pkpy_Str__split2(const pkpy_Str* self, const pkpy_Str* sep); #ifdef __cplusplus } diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 414d3f5a..ed0f7004 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -192,24 +192,24 @@ struct Str: pkpy_Str { } vector split(const Str& sep) const{ - c11_array/* T=c11_string */ res = pkpy_Str__split2(this, &sep); + c11_vector/* T=c11_string */ res = pkpy_Str__split2(this, &sep); vector retval(res.count); for(int i = 0; i < res.count; i++){ c11_string tmp = c11__getitem(c11_string, &res, i); retval[i] = std::string_view(tmp.data, tmp.size); } - c11_array__dtor(&res); + c11_vector__dtor(&res); return retval; } vector split(char sep) const{ - c11_array/* T=c11_string */ res = pkpy_Str__split(this, sep); + c11_vector/* T=c11_string */ res = pkpy_Str__split(this, sep); vector retval(res.count); for(int i = 0; i < res.count; i++){ c11_string tmp = c11__getitem(c11_string, &res, i); retval[i] = std::string_view(tmp.data, tmp.size); } - c11_array__dtor(&res); + c11_vector__dtor(&res); return retval; } diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 4cb99c25..0c5d0bb7 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -27,7 +27,6 @@ void c11_vector__dtor(c11_vector* self); c11_vector c11_vector__copy(const c11_vector* self); void* c11_vector__at(c11_vector* self, int index); void c11_vector__reserve(c11_vector* self, int capacity); -c11_array c11_vector__as_array(c11_vector* self); #define c11__getitem(T, self, index) ((T*)(self)->data)[index] #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; diff --git a/src/common/str.c b/src/common/str.c index e369c1da..bf20a4cd 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -351,7 +351,7 @@ int pkpy_Str__count(const pkpy_Str *self, const pkpy_Str *sub){ return cnt; } -c11_array/* T=c11_string */ pkpy_Str__split(const pkpy_Str *self, char sep){ +c11_vector/* T=c11_string */ pkpy_Str__split(const pkpy_Str *self, char sep){ c11_vector retval; c11_vector__ctor(&retval, sizeof(c11_string)); const char* data = pkpy_Str__data(self); @@ -370,10 +370,10 @@ c11_array/* T=c11_string */ pkpy_Str__split(const pkpy_Str *self, char sep){ c11_string tmp = {data + i, self->size - i}; c11_vector__push(c11_string, &retval, tmp); } - return c11_vector__as_array(&retval); + return retval; } -c11_array/* T=c11_string */ pkpy_Str__split2(const pkpy_Str *self, const pkpy_Str *sep){ +c11_vector/* T=c11_string */ pkpy_Str__split2(const pkpy_Str *self, const pkpy_Str *sep){ c11_vector retval; c11_vector__ctor(&retval, sizeof(c11_string)); int start = 0; @@ -387,5 +387,5 @@ c11_array/* T=c11_string */ pkpy_Str__split2(const pkpy_Str *self, const pkpy_St } c11_string tmp = {data + start, self->size - start}; if(tmp.size != 0) c11_vector__push(c11_string, &retval, tmp); - return c11_vector__as_array(&retval); + return retval; } diff --git a/src/common/vector.c b/src/common/vector.c index 92137c58..b6212672 100644 --- a/src/common/vector.c +++ b/src/common/vector.c @@ -60,11 +60,3 @@ void c11_vector__reserve(c11_vector* self, int capacity){ self->data = realloc(self->data, self->elem_size * self->capacity); } -c11_array c11_vector__as_array(c11_vector* self){ - c11_array retval = { - .data = self->data, - .count = self->count, - .elem_size = self->elem_size, - }; - return retval; -} From 2291ae5af34703c16dac77c3cce009c1d46645b0 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 11 Jun 2024 13:41:38 +0800 Subject: [PATCH 11/60] fix CI --- .github/workflows/main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1b5a0207..d9b5a246 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -179,11 +179,11 @@ jobs: name: linux path: $GITHUB_WORKSPACE/output/linux - - name: "Merge darwin" - uses: actions/download-artifact@v4.1.7 - with: - name: macos - path: $GITHUB_WORKSPACE/output/macos + # - name: "Merge darwin" + # uses: actions/download-artifact@v4.1.7 + # with: + # name: macos + # path: $GITHUB_WORKSPACE/output/macos - name: "Merge android" uses: actions/download-artifact@v4.1.7 From 031f189a4aba030e10e1c6f02a92e2eccaa6c2b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=B9=E8=80=8C=E9=9D=99?= Date: Tue, 11 Jun 2024 20:28:51 +0800 Subject: [PATCH 12/60] Make SourceData and SStream c11 (#266) * Make SourceData c11 * make it compile * add a marco * follow up rename of c11_vetor__push * remove isascii marco * fix empty function name handling * change enum back * Remove trival accessor * make pkpy_Str__take_buf hidden * make it compile * remove rcptr and use shared_ptr instead * change enum name in C++ code back * fix type problem * remove strdup polyfill * remove xmake * ... --- .gitignore | 5 +- include/pocketpy/common/sstream.h | 27 +++++++ include/pocketpy/compiler/compiler.hpp | 2 +- include/pocketpy/objects/sourcedata.h | 32 +++++++++ include/pocketpy/objects/sourcedata.hpp | 32 ++++----- src/common/sourcedata.c | 95 +++++++++++++++++++++++++ src/common/sstream.c | 48 +++++++++++++ src/common/str.c | 34 +++++++-- src/compiler/compiler.cpp | 6 +- src/compiler/lexer.cpp | 16 ++--- src/interpreter/profiler.cpp | 4 +- src/interpreter/vm.cpp | 2 +- src/objects/sourcedata.cpp | 68 ------------------ 13 files changed, 263 insertions(+), 108 deletions(-) create mode 100644 include/pocketpy/common/sstream.h create mode 100644 include/pocketpy/objects/sourcedata.h create mode 100644 src/common/sourcedata.c create mode 100644 src/common/sstream.c delete mode 100644 src/objects/sourcedata.cpp diff --git a/.gitignore b/.gitignore index e4db4f86..ce195c89 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,7 @@ pocketpy.dSYM libpocketpy.dylib.dSYM/ main.dSYM/ -docs/references.md \ No newline at end of file +docs/references.md + +.xmake +.vs diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h new file mode 100644 index 00000000..d0b7cfc5 --- /dev/null +++ b/include/pocketpy/common/sstream.h @@ -0,0 +1,27 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pocketpy/common/vector.h" +#include "pocketpy/common/str.h" +#include + +typedef struct pkpy_SStream { + c11_vector data; +} pkpy_SStream; + +void pkpy_SStream__ctor(pkpy_SStream* self); +void pkpy_SStream__dtor(pkpy_SStream* self); +void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str); +void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n); +void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str); +void pkpy_SStream__append_char(pkpy_SStream* self, char c); +void pkpy_SStream__append_int(pkpy_SStream* self, int i); +void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i); +pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/compiler/compiler.hpp b/include/pocketpy/compiler/compiler.hpp index 704523d2..39132cc2 100644 --- a/include/pocketpy/compiler/compiler.hpp +++ b/include/pocketpy/compiler/compiler.hpp @@ -41,7 +41,7 @@ struct Compiler { #if PK_DEBUG_COMPILER if(__i>=0 && __ifilename.c_str(), + lexer.src.filename().c_str(), curr().line, TK_STR(curr().type), curr().str().escape().c_str() diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h new file mode 100644 index 00000000..7f8d7cc0 --- /dev/null +++ b/include/pocketpy/objects/sourcedata.h @@ -0,0 +1,32 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pocketpy/common/str.h" +#include "pocketpy/common/vector.h" + +enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE }; + +struct pkpy_SourceData { + enum CompileMode mode; + bool is_precompiled; + + pkpy_Str filename; + pkpy_Str source; + + c11_vector line_starts; // contains "const char *" + c11_vector _precompiled_tokens; // contains "pkpy_Str" +}; + +void pkpy_SourceData__ctor(struct pkpy_SourceData *self, const char *source, int source_size, const pkpy_Str *filename, enum CompileMode mode); +void pkpy_SourceData__dtor(struct pkpy_SourceData* self); + +bool pkpy_SourceData__get_line(const struct pkpy_SourceData *self, int lineno, const char **st, const char **ed); +pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData *self, int lineno, const char *cursor, const char *name); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/objects/sourcedata.hpp b/include/pocketpy/objects/sourcedata.hpp index 07ff384e..2295e294 100644 --- a/include/pocketpy/objects/sourcedata.hpp +++ b/include/pocketpy/objects/sourcedata.hpp @@ -2,28 +2,26 @@ #include "pocketpy/common/utils.h" #include "pocketpy/common/str.hpp" +#include "pocketpy/objects/sourcedata.h" namespace pkpy { -enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE }; +struct SourceData : public pkpy_SourceData { + SourceData(std::string_view source, const Str& filename, CompileMode mode) { + pkpy_SourceData__ctor(this, source.data(), source.size(), &filename, mode); + } -struct SourceData { - PK_ALWAYS_PASS_BY_POINTER(SourceData) + std::string_view get_line(int lineno) const { + const char *st, *ed; + if (pkpy_SourceData__get_line(this, lineno, &st, &ed)) { + return std::string_view(st, ed - st); + } + return ""; + } - Str filename; - CompileMode mode; - - Str source; - vector line_starts; - - bool is_precompiled; - vector _precompiled_tokens; - - SourceData(std::string_view source, const Str& filename, CompileMode mode); - SourceData(const Str& filename, CompileMode mode); - pair _get_line(int lineno) const; - std::string_view get_line(int lineno) const; - Str snapshot(int lineno, const char* cursor, std::string_view name) const; + Str snapshot(int lineno, const char* cursor, std::string_view name) const { + return pkpy_SourceData__snapshot(this, lineno, cursor, name.empty() ? nullptr : name.data()); + } }; } // namespace pkpy diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c new file mode 100644 index 00000000..40929655 --- /dev/null +++ b/src/common/sourcedata.c @@ -0,0 +1,95 @@ +#include "pocketpy/objects/sourcedata.h" +#include "pocketpy/common/sstream.h" +#include +#include +#include + +void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size); + +void pkpy_SourceData__ctor(struct pkpy_SourceData* self, + const char* source, + int source_size, + const pkpy_Str* filename, + enum CompileMode mode) { + self->filename = pkpy_Str__copy(filename); // OPTIMIZEME? + self->mode = mode; + + c11_vector__ctor(&self->line_starts, sizeof(const char*)); + c11_vector__ctor(&self->_precompiled_tokens, sizeof(pkpy_Str)); + + int index = (strncmp(source, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0; + int len = source_size - index; + for(int i = 0; i < source_size; ++i) + len -= (source[i] == '\r'); + + char *buf = malloc(len + 1), *p = buf; + buf[len] = '\0'; + for(; index < source_size; ++index) { + if(source[index] != '\r') *(p++) = source[index]; + } + pkpy_Str__take_buf(&self->source, buf, len); + + self->is_precompiled = (strncmp(pkpy_Str__data(&self->source), "pkpy:", 5) == 0); + c11_vector__push(const char*, &self->line_starts, pkpy_Str__data(&self->source)); +} + +void pkpy_SourceData__dtor(struct pkpy_SourceData* self) { + pkpy_Str__dtor(&self->filename); + pkpy_Str__dtor(&self->source); + c11_vector__dtor(&self->line_starts); + c11_vector__dtor(&self->_precompiled_tokens); +} + +bool pkpy_SourceData__get_line(const struct pkpy_SourceData* self, int lineno, const char** st, const char** ed) { + if(self->is_precompiled || lineno == -1) { return false; } + lineno -= 1; + if(lineno < 0) lineno = 0; + const char* _start = c11__getitem(const char*, &self->line_starts, lineno); + const char* i = _start; + // max 300 chars + while(*i != '\n' && *i != '\0' && i - _start < 300) + i++; + *st = _start; + *ed = i; + return true; +} + +pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData* self, int lineno, const char* cursor, const char* name) { + pkpy_SStream ss; + pkpy_SStream__ctor(&ss); + pkpy_SStream__append_cstr(&ss, " File \""); + pkpy_SStream__append_Str(&ss, &self->filename); + pkpy_SStream__append_cstr(&ss, "\", line "); + pkpy_SStream__append_int(&ss, lineno); + + if(name) { + pkpy_SStream__append_cstr(&ss, ", in "); + pkpy_SStream__append_cstr(&ss, name); + } + + if(!self->is_precompiled) { + pkpy_SStream__append_char(&ss, '\n'); + const char *st = NULL, *ed; + if(pkpy_SourceData__get_line(self, lineno, &st, &ed)) { + while(st < ed && isblank(*st)) + ++st; + if(st < ed) { + pkpy_SStream__append_cstr(&ss, " "); + pkpy_SStream__append_cstrn(&ss, st, ed - st); + if(cursor && st <= cursor && cursor <= ed) { + pkpy_SStream__append_cstr(&ss, "\n "); + for(int i = 0; i < (cursor - st); ++i) + pkpy_SStream__append_char(&ss, ' '); + pkpy_SStream__append_cstr(&ss, "^"); + } + } else { + st = NULL; + } + } + + if(!st) { pkpy_SStream__append_cstr(&ss, " "); } + } + pkpy_Str res = pkpy_SStream__to_Str(&ss); + pkpy_SStream__dtor(&ss); + return res; +} diff --git a/src/common/sstream.c b/src/common/sstream.c new file mode 100644 index 00000000..08091c3d --- /dev/null +++ b/src/common/sstream.c @@ -0,0 +1,48 @@ +#include "pocketpy/common/sstream.h" +#include + +void pkpy_SStream__ctor(pkpy_SStream* self) { + c11_vector__ctor(&self->data, sizeof(char)); +} + +void pkpy_SStream__dtor(pkpy_SStream* self) { + c11_vector__dtor(&self->data); +} + +void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str) { + for (int i = 0; str[i] != '\0'; i++) { + c11_vector__push(char, &self->data, str[i]); + } +} + +void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n) { + for (int i = 0; i < n; i++) { + c11_vector__push(char, &self->data, str[i]); + } +} + +void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str) { + pkpy_SStream__append_cstr(self, pkpy_Str__data(str)); +} + +void pkpy_SStream__append_char(pkpy_SStream* self, char c) { + c11_vector__push(char, &self->data, c); +} + +void pkpy_SStream__append_int(pkpy_SStream* self, int i) { + char str[12]; // sign + 10 digits + null terminator + sprintf(str, "%d", i); + pkpy_SStream__append_cstr(self, str); +} + +void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i) { + char str[23]; // sign + 21 digits + null terminator + sprintf(str, "%lld", i); + pkpy_SStream__append_cstr(self, str); +} + +pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self) { + pkpy_Str res; + pkpy_Str__ctor2(&res, self->data.data, self->data.count); + return res; +} diff --git a/src/common/str.c b/src/common/str.c index bf20a4cd..50af2b3e 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -22,6 +22,32 @@ void pkpy_Str__ctor(pkpy_Str *self, const char *data){ pkpy_Str__ctor2(self, data, strlen(data)); } +static void pkpy_Str__check_ascii(pkpy_Str *self, char *p) { + for(int i = 0; i < self->size; i++){ + if(!isascii(p[i])){ + self->is_ascii = false; + break; + } + } +} + +void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size) { + self->size = size; + self->is_ascii = true; + self->is_sso = size < sizeof(self->_inlined); + char* p; + if(self->is_sso){ + p = self->_inlined; + memcpy(p, data, size); + p[size] = '\0'; + free(data); + }else{ + self->_ptr = data; + p = self->_ptr; + } + pkpy_Str__check_ascii(self, p); +} + void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ self->size = size; self->is_ascii = true; @@ -35,13 +61,7 @@ void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ } memcpy(p, data, size); p[size] = '\0'; - // check is_ascii - for(int i = 0; i < size; i++){ - if(!isascii(p[i])){ - self->is_ascii = false; - break; - } - } + pkpy_Str__check_ascii(self, p); } void pkpy_Str__dtor(pkpy_Str *self){ diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 1db9e898..78a231e7 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -20,7 +20,7 @@ NameScope Compiler::name_scope() const noexcept{ } CodeObject_ Compiler::push_global_context() noexcept{ - CodeObject_ co = std::make_shared(lexer.src, lexer.src->filename); + CodeObject_ co = std::make_shared(lexer.src, static_cast(lexer.src->filename)); co->start_line = __i == 0 ? 1 : prev().line; contexts.push_back(CodeEmitContext(vm, co, contexts.size())); return co; @@ -1293,8 +1293,8 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ Error* err; check(lexer.run()); - // if(lexer.src->filename[0] != '<'){ - // printf("%s\n", lexer.src->filename.c_str()); + // if(lexer.src.filename()[0] != '<'){ + // printf("%s\n", lexer.src.filename().c_str()); // for(int i=0; iline_starts.push_back(curr_char); + c11_vector__push(const char*, &src->line_starts, curr_char); } return c; } @@ -534,8 +534,8 @@ Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{ } Lexer::Lexer(VM* vm, std::shared_ptr src) noexcept : vm(vm), src(src){ - this->token_start = src->source.c_str(); - this->curr_char = src->source.c_str(); + this->token_start = pkpy_Str__data(&src->source); + this->curr_char = pkpy_Str__data(&src->source); } Error* Lexer::run() noexcept{ @@ -557,7 +557,7 @@ Error* Lexer::run() noexcept{ } Error* Lexer::from_precompiled() noexcept{ - TokenDeserializer deserializer(src->source.c_str()); + TokenDeserializer deserializer(pkpy_Str__data(&src->source)); deserializer.curr += 5; // skip "pkpy:" std::string_view version = deserializer.read_string('\n'); @@ -569,9 +569,9 @@ Error* Lexer::from_precompiled() noexcept{ } int count = deserializer.read_count(); - vector& precompiled_tokens = src->_precompiled_tokens; + auto precompiled_tokens = &src->_precompiled_tokens; for(int i = 0; i < count; i++) { - precompiled_tokens.push_back(deserializer.read_string('\n')); + c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n'))); } count = deserializer.read_count(); @@ -580,8 +580,8 @@ Error* Lexer::from_precompiled() noexcept{ t.type = (unsigned char)deserializer.read_uint(','); if(is_raw_string_used(t.type)) { i64 index = deserializer.read_uint(','); - t.start = precompiled_tokens[index].c_str(); - t.length = precompiled_tokens[index].size; + t.start = c11__getitem(Str, precompiled_tokens, index).c_str(); + t.length = c11__getitem(Str, precompiled_tokens, index).size; } else { t.start = nullptr; t.length = 0; diff --git a/src/interpreter/profiler.cpp b/src/interpreter/profiler.cpp index 04e851f5..2482f4d2 100644 --- a/src/interpreter/profiler.cpp +++ b/src/interpreter/profiler.cpp @@ -20,7 +20,7 @@ void LineProfiler::begin() { frames.clear(); } void LineProfiler::_step(int callstack_size, Frame* frame) { auto line_info = frame->co->lines[frame->ip()]; if(line_info.is_virtual) return; - std::string_view filename = frame->co->src->filename.sv(); + std::string_view filename = frame->co->src.filename().sv(); int line = line_info.lineno; if(frames.empty()) { @@ -87,7 +87,7 @@ Str LineProfiler::stats() { int start_line = decl->code->start_line; int end_line = decl->code->end_line; if(start_line == -1 || end_line == -1) continue; - std::string_view filename = decl->code->src->filename.sv(); + std::string_view filename = decl->code->src.filename().sv(); const _LineRecord* file_records = records[filename]; clock_t total_time = 0; for(int line = start_line; line <= end_line; line++) { diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index ce4d1114..898b2850 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -1706,7 +1706,7 @@ void VM::__breakpoint() { SStream ss; Frame* frame = &frames[i]->frame; int lineno = frame->curr_lineno(); - ss << "File \"" << frame->co->src->filename << "\", line " << lineno; + ss << "File \"" << frame->co->src.filename() << "\", line " << lineno; if(frame->_callable) { ss << ", in "; ss << frame->_callable->as().decl->code->name; diff --git a/src/objects/sourcedata.cpp b/src/objects/sourcedata.cpp deleted file mode 100644 index f6d96ee8..00000000 --- a/src/objects/sourcedata.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "pocketpy/objects/sourcedata.hpp" - -namespace pkpy { -SourceData::SourceData(std::string_view source, const Str& filename, CompileMode mode) : - filename(filename), mode(mode) { - int index = 0; - // Skip utf8 BOM if there is any. - if(strncmp(source.data(), "\xEF\xBB\xBF", 3) == 0) index += 3; - // Drop all '\r' - SStream ss(source.size() + 1); - while(index < source.size()) { - if(source[index] != '\r') ss << source[index]; - index++; - } - this->source = ss.str(); - if(this->source.size > 5 && this->source.sv().substr(0, 5) == "pkpy:") { - this->is_precompiled = true; - } else { - this->is_precompiled = false; - } - line_starts.push_back(this->source.c_str()); -} - -SourceData::SourceData(const Str& filename, CompileMode mode) : filename(filename), mode(mode) { - line_starts.push_back(this->source.c_str()); -} - -pair SourceData::_get_line(int lineno) const { - if(is_precompiled || lineno == -1) return {nullptr, nullptr}; - lineno -= 1; - if(lineno < 0) lineno = 0; - const char* _start = line_starts[lineno]; - const char* i = _start; - // max 300 chars - while(*i != '\n' && *i != '\0' && i - _start < 300) - i++; - return {_start, i}; -} - -std::string_view SourceData::get_line(int lineno) const { - auto [_0, _1] = _get_line(lineno); - if(_0 && _1) return std::string_view(_0, _1 - _0); - return ""; -} - -Str SourceData::snapshot(int lineno, const char* cursor, std::string_view name) const { - SStream ss; - ss << " " << "File \"" << filename << "\", line " << lineno; - if(!name.empty()) ss << ", in " << name; - if(!is_precompiled) { - ss << '\n'; - pair pair = _get_line(lineno); - Str line = ""; - int removed_spaces = 0; - if(pair.first && pair.second) { - line = Str(pair.first, pair.second - pair.first).lstrip(); - removed_spaces = pair.second - pair.first - line.length(); - if(line.empty()) line = ""; - } - ss << " " << line; - if(cursor && line != "" && cursor >= pair.first && cursor <= pair.second) { - auto column = cursor - pair.first - removed_spaces; - if(column >= 0) ss << "\n " << std::string(column, ' ') << "^"; - } - } - return ss.str(); -} -} // namespace pkpy From 3ae90fe9b19da19288160d8fca116598a63dad25 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 12 Jun 2024 01:31:34 +0800 Subject: [PATCH 13/60] some fix --- src/common/str.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/str.c b/src/common/str.c index 50af2b3e..9c3fb5fb 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -76,8 +76,8 @@ pkpy_Str pkpy_Str__copy(const pkpy_Str *self){ pkpy_Str retval = *self; if(!self->is_sso){ retval._ptr = (char*)malloc(self->size + 1); + // '\0' is copied memcpy(retval._ptr, self->_ptr, self->size + 1); - retval._ptr[retval.size] = '\0'; } return retval; } From bcf51c453511b8cb68e72e532b1d723d4876b8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=B9=E8=80=8C=E9=9D=99?= Date: Wed, 12 Jun 2024 11:29:38 +0800 Subject: [PATCH 14/60] Add the missing destructor for `SourceData` C++ binding (#267) --- include/pocketpy/objects/sourcedata.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/pocketpy/objects/sourcedata.hpp b/include/pocketpy/objects/sourcedata.hpp index 2295e294..a9b6ac19 100644 --- a/include/pocketpy/objects/sourcedata.hpp +++ b/include/pocketpy/objects/sourcedata.hpp @@ -11,6 +11,10 @@ struct SourceData : public pkpy_SourceData { pkpy_SourceData__ctor(this, source.data(), source.size(), &filename, mode); } + ~SourceData() { + pkpy_SourceData__dtor(this); + } + std::string_view get_line(int lineno) const { const char *st, *ed; if (pkpy_SourceData__get_line(this, lineno, &st, &ed)) { From 9d6f044d33e66714008353955b740ff765258e26 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 10:46:22 +0800 Subject: [PATCH 15/60] Squashed commit of the following: commit b584de5c3d4603a476cdd60830289104784a4942 Author: blueloveTH Date: Thu Jun 13 10:46:07 2024 +0800 some fix commit 1fe8a3280949d724ddab9c6b1476e1b55c5beb9d Author: blueloveTH Date: Wed Jun 12 22:08:09 2024 +0800 backup --- compile_flags.txt | 3 +- include/pocketpy/common/sstream.h | 70 ++++++++++++++++++++--- include/pocketpy/common/utils.h | 5 ++ include/pocketpy/common/vector.h | 3 + src/common/sourcedata.c | 39 +++++++------ src/common/sstream.c | 93 +++++++++++++++++++++---------- tests/80_traceback.py | 11 +++- 7 files changed, 166 insertions(+), 58 deletions(-) diff --git a/compile_flags.txt b/compile_flags.txt index 6a6ce7ea..ac20b509 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -1,9 +1,8 @@ --xc++ - -Wall -W* -std=c++17 +-std=c11 -stdlib=libc++ -Iinclude/ diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index d0b7cfc5..5f66d97d 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -6,21 +6,77 @@ extern "C" { #include "pocketpy/common/vector.h" #include "pocketpy/common/str.h" +#include "pocketpy/common/utils.h" + #include typedef struct pkpy_SStream { c11_vector data; } pkpy_SStream; +typedef struct pkpy_AnyStr { + int type; + union { + int _int; + int64_t _int64; + float _float; + double _double; + char _char; + unsigned char _hex; + const pkpy_Str* _str; + c11_string _sv; + const char* _cstr; + void* _ptr; + }; +} pkpy_AnyStr; + +inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){.type = 1, ._int = x}; } +inline pkpy_AnyStr pkpy_AnyStr__int64(int64_t x) { return (pkpy_AnyStr){.type = 2, ._int64 = x}; } +inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){.type = 3, ._float = x}; } +inline pkpy_AnyStr pkpy_AnyStr__double(double x) { return (pkpy_AnyStr){.type = 4, ._double = x}; } +inline pkpy_AnyStr pkpy_AnyStr__char(char x) { return (pkpy_AnyStr){.type = 5, ._char = x}; } +inline pkpy_AnyStr pkpy_AnyStr__hex(unsigned char x) { return (pkpy_AnyStr){.type = 6, ._hex = x}; } +inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { return (pkpy_AnyStr){.type = 7, ._str = x}; } +inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){.type = 8, ._sv = x}; } +inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){.type = 9, ._cstr = x}; } +inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 10, ._ptr = x}; } + void pkpy_SStream__ctor(pkpy_SStream* self); void pkpy_SStream__dtor(pkpy_SStream* self); -void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str); -void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n); -void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str); -void pkpy_SStream__append_char(pkpy_SStream* self, char c); -void pkpy_SStream__append_int(pkpy_SStream* self, int i); -void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i); -pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self); +void pkpy_SStream__write_char(pkpy_SStream* self, char); +void pkpy_SStream__write_int(pkpy_SStream* self, int); +void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); +void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); +void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); +void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); +void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char*, int); +void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n); + +// Submit the stream and return the final string. The stream becomes invalid after this call +pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); + +#define pkpy__anystr(x) _Generic((x), \ + int: pkpy_AnyStr__int, \ + int64_t: pkpy_AnyStr__int64, \ + float: pkpy_AnyStr__float, \ + double: pkpy_AnyStr__double, \ + char: pkpy_AnyStr__char, \ + unsigned char: pkpy_AnyStr__hex, \ + const pkpy_Str*: pkpy_AnyStr__str, \ + c11_string: pkpy_AnyStr__sv, \ + const char*: pkpy_AnyStr__cstr, \ + void*: pkpy_AnyStr__ptr \ +)(x) + +#define pkpy__anystr_list_1(a) (pkpy_AnyStr[]){pkpy__anystr(a)}, 1 +#define pkpy__anystr_list_2(a, b) (pkpy_AnyStr[]){pkpy__anystr(a), pkpy__anystr(b)}, 2 +#define pkpy__anystr_list_3(a, b, c) (pkpy_AnyStr[]){pkpy__anystr(a), pkpy__anystr(b), pkpy__anystr(c)}, 3 +#define pkpy__anystr_list_4(a, b, c, d) (pkpy_AnyStr[]){pkpy__anystr(a), pkpy__anystr(b), pkpy__anystr(c), pkpy__anystr(d)}, 4 + +#define pkpy__anystr_list_dispatcher(...) PK_NARGS_SEQ(__VA_ARGS__, pkpy__anystr_list_4, pkpy__anystr_list_3, pkpy__anystr_list_2, pkpy__anystr_list_1, 0) +#define pkpy__anystr_list(...) pkpy__anystr_list_dispatcher(__VA_ARGS__)(__VA_ARGS__) + +#define pkpy_SStream__write(self, fmt, ...) pkpy_SStream__write_any(self, fmt, pkpy__anystr_list(__VA_ARGS__)) #ifdef __cplusplus } diff --git a/include/pocketpy/common/utils.h b/include/pocketpy/common/utils.h index 52d4bfc2..d395c654 100644 --- a/include/pocketpy/common/utils.h +++ b/include/pocketpy/common/utils.h @@ -24,6 +24,11 @@ extern const char* kPlatformStrings[]; #define PK_MIN(a, b) ((a) < (b) ? (a) : (b)) #define PK_MAX(a, b) ((a) > (b) ? (a) : (b)) +// NARGS +#define PK_NARGS_SEQ(_1, _2, _3, _4, N, ...) N +#define PK_NARGS(...) PK_NARGS_SEQ(__VA_ARGS__, 4, 3, 2, 1, 0) +#define PK_NPTRS(...) PK_NARGS_SEQ(__VA_ARGS__, int****, int***, int**, int*, int) + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 0c5d0bb7..1620520d 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -4,6 +4,9 @@ extern "C" { #endif +#include +#include + typedef struct c11_array{ void* data; int count; diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c index 40929655..496ea12c 100644 --- a/src/common/sourcedata.c +++ b/src/common/sourcedata.c @@ -22,7 +22,7 @@ void pkpy_SourceData__ctor(struct pkpy_SourceData* self, for(int i = 0; i < source_size; ++i) len -= (source[i] == '\r'); - char *buf = malloc(len + 1), *p = buf; + char *buf = (char*)malloc(len + 1), *p = buf; buf[len] = '\0'; for(; index < source_size; ++index) { if(source[index] != '\r') *(p++) = source[index]; @@ -57,39 +57,44 @@ bool pkpy_SourceData__get_line(const struct pkpy_SourceData* self, int lineno, c pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData* self, int lineno, const char* cursor, const char* name) { pkpy_SStream ss; pkpy_SStream__ctor(&ss); - pkpy_SStream__append_cstr(&ss, " File \""); - pkpy_SStream__append_Str(&ss, &self->filename); - pkpy_SStream__append_cstr(&ss, "\", line "); - pkpy_SStream__append_int(&ss, lineno); + + // pkpy_SStream__write_cstr(&ss, " File \""); + // pkpy_SStream__write_Str(&ss, &self->filename); + // pkpy_SStream__write_cstr(&ss, "\", line "); + // pkpy_SStream__write_int(&ss, lineno); + + pkpy_SStream__write(&ss, + " File \"{}\", line {}", + &self->filename, + lineno + ); if(name) { - pkpy_SStream__append_cstr(&ss, ", in "); - pkpy_SStream__append_cstr(&ss, name); + pkpy_SStream__write_cstr(&ss, ", in "); + pkpy_SStream__write_cstr(&ss, name); } if(!self->is_precompiled) { - pkpy_SStream__append_char(&ss, '\n'); + pkpy_SStream__write_char(&ss, '\n'); const char *st = NULL, *ed; if(pkpy_SourceData__get_line(self, lineno, &st, &ed)) { while(st < ed && isblank(*st)) ++st; if(st < ed) { - pkpy_SStream__append_cstr(&ss, " "); - pkpy_SStream__append_cstrn(&ss, st, ed - st); + pkpy_SStream__write_cstr(&ss, " "); + pkpy_SStream__write_cstrn(&ss, st, ed - st); if(cursor && st <= cursor && cursor <= ed) { - pkpy_SStream__append_cstr(&ss, "\n "); + pkpy_SStream__write_cstr(&ss, "\n "); for(int i = 0; i < (cursor - st); ++i) - pkpy_SStream__append_char(&ss, ' '); - pkpy_SStream__append_cstr(&ss, "^"); + pkpy_SStream__write_char(&ss, ' '); + pkpy_SStream__write_cstr(&ss, "^"); } } else { st = NULL; } } - if(!st) { pkpy_SStream__append_cstr(&ss, " "); } + if(!st) { pkpy_SStream__write_cstr(&ss, " "); } } - pkpy_Str res = pkpy_SStream__to_Str(&ss); - pkpy_SStream__dtor(&ss); - return res; + return pkpy_SStream__submit(&ss); } diff --git a/src/common/sstream.c b/src/common/sstream.c index 08091c3d..6ba3a7ba 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -1,5 +1,8 @@ #include "pocketpy/common/sstream.h" +#include "pocketpy/common/utils.h" + #include +#include void pkpy_SStream__ctor(pkpy_SStream* self) { c11_vector__ctor(&self->data, sizeof(char)); @@ -9,40 +12,72 @@ void pkpy_SStream__dtor(pkpy_SStream* self) { c11_vector__dtor(&self->data); } -void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str) { - for (int i = 0; str[i] != '\0'; i++) { - c11_vector__push(char, &self->data, str[i]); - } -} - -void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n) { - for (int i = 0; i < n; i++) { - c11_vector__push(char, &self->data, str[i]); - } -} - -void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str) { - pkpy_SStream__append_cstr(self, pkpy_Str__data(str)); -} - -void pkpy_SStream__append_char(pkpy_SStream* self, char c) { +void pkpy_SStream__write_char(pkpy_SStream* self, char c) { c11_vector__push(char, &self->data, c); } -void pkpy_SStream__append_int(pkpy_SStream* self, int i) { - char str[12]; // sign + 10 digits + null terminator - sprintf(str, "%d", i); - pkpy_SStream__append_cstr(self, str); +void pkpy_SStream__write_int(pkpy_SStream* self, int i) { + char buf[12]; // sign + 10 digits + null terminator + snprintf(buf, sizeof(buf), "%d", i); + pkpy_SStream__write_cstr(self, buf); } -void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i) { - char str[23]; // sign + 21 digits + null terminator - sprintf(str, "%lld", i); - pkpy_SStream__append_cstr(self, str); +void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t i) { + char buf[23]; // sign + 21 digits + null terminator + snprintf(buf, sizeof(buf), "%lld", i); + pkpy_SStream__write_cstr(self, buf); } -pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self) { - pkpy_Str res; - pkpy_Str__ctor2(&res, self->data.data, self->data.count); - return res; +void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str* str) { + pkpy_SStream__write_cstr(self, pkpy_Str__data(str)); +} + +void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string sv) { + pkpy_SStream__write_cstrn(self, sv.data, sv.size); +} + +void pkpy_SStream__write_cstr(pkpy_SStream* self, const char* str) { + pkpy_SStream__write_cstrn(self, str, strlen(str)); +} + +void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char* str, int n) { + c11_vector__extend(char, &self->data, str, n); +} + +void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n){ + int i = 0; + while(*fmt){ + if(*fmt == '{' && fmt[1] == '}'){ + assert(i < n); + switch(args[i].type){ + case 1: pkpy_SStream__write_int(self, args[i]._int); break; + case 2: pkpy_SStream__write_int64(self, args[i]._int64); break; + case 3: assert(0); break; + case 4: assert(0); break; + case 5: pkpy_SStream__write_char(self, args[i]._char); break; + case 6: assert(0); break; + case 7: pkpy_SStream__write_Str(self, args[i]._str); break; + case 8: pkpy_SStream__write_sv(self, args[i]._sv); break; + case 9: pkpy_SStream__write_cstr(self, args[i]._cstr); break; + case 10: assert(0); break; + default: assert(0); break; + } + fmt += 2; + i++; + }else{ + pkpy_SStream__write_char(self, *fmt); + fmt++; + } + } +} + +pkpy_Str pkpy_SStream__submit(pkpy_SStream* self) { + c11_vector__push(char, &self->data, '\0'); + pkpy_Str retval = { + .size = self->data.count - 1, + .is_ascii = false, // need to check + .is_sso = false, + ._ptr = (char*)self->data.data + }; + return retval; } diff --git a/tests/80_traceback.py b/tests/80_traceback.py index b1df66fd..d44ae1a7 100644 --- a/tests/80_traceback.py +++ b/tests/80_traceback.py @@ -4,11 +4,16 @@ try: a = {'123': 4} b = a[6] except KeyError: - s = traceback.format_exc() + actual = traceback.format_exc() -ok = s == '''Traceback (most recent call last): +expected = '''Traceback (most recent call last): File "80_traceback.py", line 5 b = a[6] KeyError: 6''' -assert ok, s \ No newline at end of file +if actual != expected: + print('--- ACTUAL RESULT -----') + print(actual) + print('--- EXPECTED RESULT ---') + print(expected) + exit(1) \ No newline at end of file From 0811f23b711b855e1788170811d720f937ee03f2 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 13:08:19 +0800 Subject: [PATCH 16/60] some more move --- compile_flags.txt | 3 +- include/pocketpy/common/sstream.h | 16 +++--- include/pocketpy/common/str.h | 4 -- include/pocketpy/common/str.hpp | 87 +++++++++++++++++++++++++------ src/common/sstream.c | 47 ++++++++++++++--- src/common/str.cpp | 82 ----------------------------- src/interpreter/vm.cpp | 8 ++- tests/99_builtin_func.py | 9 ++-- 8 files changed, 130 insertions(+), 126 deletions(-) diff --git a/compile_flags.txt b/compile_flags.txt index ac20b509..cc06ba37 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -1,9 +1,8 @@ -Wall -W* --std=c++17 --std=c11 -stdlib=libc++ +-std=c++17 -Iinclude/ -I3rd/cjson/include/ diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index 5f66d97d..e2e119a3 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -22,7 +22,6 @@ typedef struct pkpy_AnyStr { float _float; double _double; char _char; - unsigned char _hex; const pkpy_Str* _str; c11_string _sv; const char* _cstr; @@ -35,17 +34,19 @@ inline pkpy_AnyStr pkpy_AnyStr__int64(int64_t x) { return (pkpy_AnyStr){.type = inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){.type = 3, ._float = x}; } inline pkpy_AnyStr pkpy_AnyStr__double(double x) { return (pkpy_AnyStr){.type = 4, ._double = x}; } inline pkpy_AnyStr pkpy_AnyStr__char(char x) { return (pkpy_AnyStr){.type = 5, ._char = x}; } -inline pkpy_AnyStr pkpy_AnyStr__hex(unsigned char x) { return (pkpy_AnyStr){.type = 6, ._hex = x}; } -inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { return (pkpy_AnyStr){.type = 7, ._str = x}; } -inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){.type = 8, ._sv = x}; } -inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){.type = 9, ._cstr = x}; } -inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 10, ._ptr = x}; } +inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { return (pkpy_AnyStr){.type = 6, ._str = x}; } +inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){.type = 7, ._sv = x}; } +inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){.type = 8, ._cstr = x}; } +inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 9, ._ptr = x}; } void pkpy_SStream__ctor(pkpy_SStream* self); void pkpy_SStream__dtor(pkpy_SStream* self); -void pkpy_SStream__write_char(pkpy_SStream* self, char); + void pkpy_SStream__write_int(pkpy_SStream* self, int); void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); +void pkpy_SStream__write_float(pkpy_SStream* self, float, int precision); +void pkpy_SStream__write_double(pkpy_SStream* self, double, int precision); +void pkpy_SStream__write_char(pkpy_SStream* self, char); void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); @@ -61,7 +62,6 @@ pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); float: pkpy_AnyStr__float, \ double: pkpy_AnyStr__double, \ char: pkpy_AnyStr__char, \ - unsigned char: pkpy_AnyStr__hex, \ const pkpy_Str*: pkpy_AnyStr__str, \ c11_string: pkpy_AnyStr__sv, \ const char*: pkpy_AnyStr__cstr, \ diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 7fb2ca6b..9469fafc 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -27,10 +27,6 @@ inline const char* pkpy_Str__data(const pkpy_Str* self){ return self->is_sso ? self->_inlined : self->_ptr; } -inline int pkpy_Str__size(const pkpy_Str* self){ - return self->size; -} - int pkpy_utils__u8_header(unsigned char c, bool suppress); void pkpy_Str__ctor(pkpy_Str* self, const char* data); void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size); diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index ed0f7004..25dbee7a 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -1,11 +1,13 @@ #pragma once +#include "pocketpy/common/sstream.h" #include "pocketpy/common/utils.h" #include "pocketpy/common/memorypool.hpp" #include "pocketpy/common/vector.h" #include "pocketpy/common/vector.hpp" #include "pocketpy/common/str.h" +#include #include #include @@ -273,32 +275,83 @@ struct StrName { static uint32_t _pesudo_random_index; }; -struct SStream { +struct SStream: pkpy_SStream { PK_ALWAYS_PASS_BY_POINTER(SStream) - vector buffer; int _precision = -1; - - bool empty() const { return buffer.empty(); } + bool _submited = false; + bool empty() const { return data.count == 0; } void setprecision(int precision) { _precision = precision; } - SStream() {} + SStream() { + pkpy_SStream__ctor(this); + } - SStream(int guess_size) { buffer.reserve(guess_size); } + SStream(int guess_size) { c11_vector__reserve(&data, guess_size); } - Str str(); + ~SStream() { + // in case of error + if(!_submited) pkpy_SStream__dtor(this); + } - SStream& operator<< (const Str&); - SStream& operator<< (const char*); - SStream& operator<< (int); - SStream& operator<< (size_t); - SStream& operator<< (i64); - SStream& operator<< (f64); - SStream& operator<< (const std::string&); - SStream& operator<< (std::string_view); - SStream& operator<< (char); - SStream& operator<< (StrName); + Str str(){ + assert(!_submited); + _submited = true; + return pkpy_SStream__submit(this); + } + + SStream& operator<< (const Str& val){ + pkpy_SStream__write_Str(this, &val); + return *this; + } + + SStream& operator<< (const char* val){ + pkpy_SStream__write_cstr(this, val); + return *this; + } + + SStream& operator<< (int val){ + pkpy_SStream__write_int(this, val); + return *this; + } + + SStream& operator<< (size_t val){ + // size_t could overflow int64, but nevermind... + pkpy_SStream__write_int64(this, val); + return *this; + } + + SStream& operator<< (i64 val){ + pkpy_SStream__write_int64(this, val); + return *this; + } + + SStream& operator<< (f64 val){ + pkpy_SStream__write_double(this, val, _precision); + return *this; + } + + SStream& operator<< (const std::string& val){ + pkpy_SStream__write_cstrn(this, val.data(), val.size()); + return *this; + } + + SStream& operator<< (std::string_view val){ + pkpy_SStream__write_cstrn(this, val.data(), val.size()); + return *this; + } + + SStream& operator<< (char val){ + pkpy_SStream__write_char(this, val); + return *this; + } + + SStream& operator<< (StrName name){ + std::string_view sv = name.sv(); + pkpy_SStream__write_cstrn(this, sv.data(), sv.size()); + return *this; + } void write_hex(unsigned char, bool non_zero = false); void write_hex(void*); diff --git a/src/common/sstream.c b/src/common/sstream.c index 6ba3a7ba..df20c129 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -3,6 +3,8 @@ #include #include +#include +#include void pkpy_SStream__ctor(pkpy_SStream* self) { c11_vector__ctor(&self->data, sizeof(char)); @@ -28,8 +30,38 @@ void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t i) { pkpy_SStream__write_cstr(self, buf); } +void pkpy_SStream__write_float(pkpy_SStream* self, float val, int precision){ + return pkpy_SStream__write_double(self, val, precision); +} + +void pkpy_SStream__write_double(pkpy_SStream* self, double val, int precision){ + if(val == INFINITY) { + pkpy_SStream__write_cstr(self, val > 0 ? "inf" : "-inf"); + return; + } + if(val == NAN) { + pkpy_SStream__write_cstr(self, "nan"); + return; + } + char b[32]; + int size; + if(precision < 0) { + int prec = 17 - 1; // std::numeric_limits::max_digits10 == 17 + size = snprintf(b, sizeof(b), "%.*g", prec, val); + } else { + int prec = precision; + size = snprintf(b, sizeof(b), "%.*f", prec, val); + } + pkpy_SStream__write_cstr(self, b); + bool all_is_digit = true; + for(int i = 1; i < size; i++){ + if(!isdigit(b[i])){ all_is_digit = false; break; } + } + if(all_is_digit) pkpy_SStream__write_cstr(self, ".0"); +} + void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str* str) { - pkpy_SStream__write_cstr(self, pkpy_Str__data(str)); + pkpy_SStream__write_cstrn(self, pkpy_Str__data(str), str->size); } void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string sv) { @@ -52,14 +84,13 @@ void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_Any switch(args[i].type){ case 1: pkpy_SStream__write_int(self, args[i]._int); break; case 2: pkpy_SStream__write_int64(self, args[i]._int64); break; - case 3: assert(0); break; - case 4: assert(0); break; + case 3: pkpy_SStream__write_float(self, args[i]._float, -1); break; + case 4: pkpy_SStream__write_double(self, args[i]._double, -1); break; case 5: pkpy_SStream__write_char(self, args[i]._char); break; - case 6: assert(0); break; - case 7: pkpy_SStream__write_Str(self, args[i]._str); break; - case 8: pkpy_SStream__write_sv(self, args[i]._sv); break; - case 9: pkpy_SStream__write_cstr(self, args[i]._cstr); break; - case 10: assert(0); break; + case 6: pkpy_SStream__write_Str(self, args[i]._str); break; + case 7: pkpy_SStream__write_sv(self, args[i]._sv); break; + case 8: pkpy_SStream__write_cstr(self, args[i]._cstr); break; + case 9: assert(0); break; default: assert(0); break; } fmt += 2; diff --git a/src/common/str.cpp b/src/common/str.cpp index f9f14fab..451f5818 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -54,88 +54,6 @@ StrName StrName::get(std::string_view s) { return StrName(index); } -Str SStream::str() { - // after this call, the buffer is no longer valid - buffer.push_back('\0'); - auto detached = buffer.detach(); - detached.second--; // remove the last '\0' - return Str(detached); -} - -SStream& SStream::operator<< (const Str& s) { - for(char c: s) - buffer.push_back(c); - return *this; -} - -SStream& SStream::operator<< (const char* s) { - while(*s) - buffer.push_back(*s++); - return *this; -} - -SStream& SStream::operator<< (const std::string& s) { - for(char c: s) - buffer.push_back(c); - return *this; -} - -SStream& SStream::operator<< (std::string_view s) { - for(char c: s) - buffer.push_back(c); - return *this; -} - -SStream& SStream::operator<< (char c) { - buffer.push_back(c); - return *this; -} - -SStream& SStream::operator<< (StrName sn) { return *this << sn.sv(); } - -SStream& SStream::operator<< (size_t val) { - // size_t could be out of range of `i64`, use `std::to_string` instead - return (*this) << std::to_string(val); -} - -SStream& SStream::operator<< (int val) { return (*this) << static_cast(val); } - -SStream& SStream::operator<< (i64 val) { - // str(-2**64).__len__() == 21 - buffer.reserve(buffer.size() + 24); - if(val == 0) { - buffer.push_back('0'); - return *this; - } - if(val < 0) { - buffer.push_back('-'); - val = -val; - } - auto begin = buffer.end(); - while(val) { - buffer.push_back('0' + val % 10); - val /= 10; - } - std::reverse(begin, buffer.end()); - return *this; -} - -SStream& SStream::operator<< (f64 val) { - if(std::isinf(val)) { return (*this) << (val > 0 ? "inf" : "-inf"); } - if(std::isnan(val)) { return (*this) << "nan"; } - char b[32]; - if(_precision == -1) { - int prec = std::numeric_limits::max_digits10 - 1; - snprintf(b, sizeof(b), "%.*g", prec, val); - } else { - int prec = _precision; - snprintf(b, sizeof(b), "%.*f", prec, val); - } - (*this) << b; - if(std::all_of(b + 1, b + strlen(b), isdigit)) (*this) << ".0"; - return *this; -} - void SStream::write_hex(unsigned char c, bool non_zero) { unsigned char high = c >> 4; unsigned char low = c & 0xf; diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 898b2850..10ad986c 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -4,6 +4,10 @@ #include #include +#if PK_DEBUG_CEVAL_STEP +#include +#endif + const static char* OP_NAMES[] = { #define OPCODE(name) #name, #include "pocketpy/opcodes.h" @@ -117,7 +121,7 @@ Str VM::py_repr(PyVar obj) { } Str VM::py_json(PyVar obj) { - auto j = JsonSerializer(this, obj); + JsonSerializer j(this, obj); return j.serialize(); } @@ -808,7 +812,7 @@ Str VM::disassemble(CodeObject_ co) { #if PK_DEBUG_CEVAL_STEP void VM::__log_s_data(const char* title) { - if(_main == nullptr) return; + // if(_main == nullptr) return; if(callstack.empty()) return; SStream ss; if(title) ss << title << " | "; diff --git a/tests/99_builtin_func.py b/tests/99_builtin_func.py index d44eb5f1..5ff315f1 100644 --- a/tests/99_builtin_func.py +++ b/tests/99_builtin_func.py @@ -146,10 +146,13 @@ except: pass # test chr -l = [] +actual = [] for i in range(128): - l.append(f'{i} {chr(i)}') -assert l == ['0 \x00', '1 \x01', '2 \x02', '3 \x03', '4 \x04', '5 \x05', '6 \x06', '7 \x07', '8 \x08', '9 \t', '10 \n', '11 \x0b', '12 \x0c', '13 \r', '14 \x0e', '15 \x0f', '16 \x10', '17 \x11', '18 \x12', '19 \x13', '20 \x14', '21 \x15', '22 \x16', '23 \x17', '24 \x18', '25 \x19', '26 \x1a', '27 \x1b', '28 \x1c', '29 \x1d', '30 \x1e', '31 \x1f', '32 ', '33 !', '34 "', '35 #', '36 $', '37 %', '38 &', "39 '", '40 (', '41 )', '42 *', '43 +', '44 ,', '45 -', '46 .', '47 /', '48 0', '49 1', '50 2', '51 3', '52 4', '53 5', '54 6', '55 7', '56 8', '57 9', '58 :', '59 ;', '60 <', '61 =', '62 >', '63 ?', '64 @', '65 A', '66 B', '67 C', '68 D', '69 E', '70 F', '71 G', '72 H', '73 I', '74 J', '75 K', '76 L', '77 M', '78 N', '79 O', '80 P', '81 Q', '82 R', '83 S', '84 T', '85 U', '86 V', '87 W', '88 X', '89 Y', '90 Z', '91 [', '92 \\', '93 ]', '94 ^', '95 _', '96 `', '97 a', '98 b', '99 c', '100 d', '101 e', '102 f', '103 g', '104 h', '105 i', '106 j', '107 k', '108 l', '109 m', '110 n', '111 o', '112 p', '113 q', '114 r', '115 s', '116 t', '117 u', '118 v', '119 w', '120 x', '121 y', '122 z', '123 {', '124 |', '125 }', '126 ~', '127 \x7f'] + actual.append(f'{i} {chr(i)}') +expected = ['0 \x00', '1 \x01', '2 \x02', '3 \x03', '4 \x04', '5 \x05', '6 \x06', '7 \x07', '8 \x08', '9 \t', '10 \n', '11 \x0b', '12 \x0c', '13 \r', '14 \x0e', '15 \x0f', '16 \x10', '17 \x11', '18 \x12', '19 \x13', '20 \x14', '21 \x15', '22 \x16', '23 \x17', '24 \x18', '25 \x19', '26 \x1a', '27 \x1b', '28 \x1c', '29 \x1d', '30 \x1e', '31 \x1f', '32 ', '33 !', '34 "', '35 #', '36 $', '37 %', '38 &', "39 '", '40 (', '41 )', '42 *', '43 +', '44 ,', '45 -', '46 .', '47 /', '48 0', '49 1', '50 2', '51 3', '52 4', '53 5', '54 6', '55 7', '56 8', '57 9', '58 :', '59 ;', '60 <', '61 =', '62 >', '63 ?', '64 @', '65 A', '66 B', '67 C', '68 D', '69 E', '70 F', '71 G', '72 H', '73 I', '74 J', '75 K', '76 L', '77 M', '78 N', '79 O', '80 P', '81 Q', '82 R', '83 S', '84 T', '85 U', '86 V', '87 W', '88 X', '89 Y', '90 Z', '91 [', '92 \\', '93 ]', '94 ^', '95 _', '96 `', '97 a', '98 b', '99 c', '100 d', '101 e', '102 f', '103 g', '104 h', '105 i', '106 j', '107 k', '108 l', '109 m', '110 n', '111 o', '112 p', '113 q', '114 r', '115 s', '116 t', '117 u', '118 v', '119 w', '120 x', '121 y', '122 z', '123 {', '124 |', '125 }', '126 ~', '127 \x7f'] +assert len(actual) == len(expected) +for i in range(len(actual)): + assert (actual[i] == expected[i]), (actual[i], expected[i]) assert type(bin(1234)) is str From f0d82a19ee57e707160425033b667350de40f473 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 13:22:29 +0800 Subject: [PATCH 17/60] complete sstream --- include/pocketpy/common/sstream.h | 2 ++ include/pocketpy/common/str.hpp | 10 ++++-- include/pocketpy/interpreter/cffi.hpp | 2 +- src/common/sstream.c | 27 +++++++++++++++ src/common/str.cpp | 48 --------------------------- src/pocketpy.cpp | 19 +++++++++-- 6 files changed, 54 insertions(+), 54 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index e2e119a3..b1e37cd0 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -51,6 +51,8 @@ void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char*, int); +void pkpy_SStream__write_hex(pkpy_SStream* self, unsigned char, bool non_zero); +void pkpy_SStream__write_ptr(pkpy_SStream* self, void*); void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n); // Submit the stream and return the final string. The stream becomes invalid after this call diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 25dbee7a..9e529ee6 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -353,9 +353,13 @@ struct SStream: pkpy_SStream { return *this; } - void write_hex(unsigned char, bool non_zero = false); - void write_hex(void*); - void write_hex(i64); + void write_hex(unsigned char val, bool non_zero = false){ + pkpy_SStream__write_hex(this, val, non_zero); + } + + void write_ptr(void* p){ + pkpy_SStream__write_ptr(this, p); + } }; #ifdef _S diff --git a/include/pocketpy/interpreter/cffi.hpp b/include/pocketpy/interpreter/cffi.hpp index 7dd5362b..7c9d8b9e 100644 --- a/include/pocketpy/interpreter/cffi.hpp +++ b/include/pocketpy/interpreter/cffi.hpp @@ -29,7 +29,7 @@ struct VoidP { Str hex() const { SStream ss; - ss.write_hex(ptr); + ss.write_ptr(ptr); return ss.str(); } diff --git a/src/common/sstream.c b/src/common/sstream.c index df20c129..8fee789a 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -76,6 +76,33 @@ void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char* str, int n) { c11_vector__extend(char, &self->data, str, n); } +void pkpy_SStream__write_hex(pkpy_SStream* self, unsigned char c, bool non_zero) { + unsigned char high = c >> 4; + unsigned char low = c & 0xf; + if(non_zero) { + if(high) pkpy_SStream__write_char(self, PK_HEX_TABLE[high]); + if(high || low) pkpy_SStream__write_char(self, PK_HEX_TABLE[low]); + } else { + pkpy_SStream__write_char(self, PK_HEX_TABLE[high]); + pkpy_SStream__write_char(self, PK_HEX_TABLE[low]); + } +} + +void pkpy_SStream__write_ptr(pkpy_SStream* self, void* p) { + if(p == NULL) { + pkpy_SStream__write_cstr(self, "0x0"); + return; + } + pkpy_SStream__write_cstr(self, "0x"); + uintptr_t p_t = (uintptr_t)(p); + bool non_zero = true; + for(int i = sizeof(void*) - 1; i >= 0; i--) { + unsigned char cpnt = (p_t >> (i * 8)) & 0xff; + pkpy_SStream__write_hex(self, cpnt, non_zero); + if(cpnt != 0) non_zero = false; + } +} + void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n){ int i = 0; while(*fmt){ diff --git a/src/common/str.cpp b/src/common/str.cpp index 451f5818..2d14136b 100644 --- a/src/common/str.cpp +++ b/src/common/str.cpp @@ -54,54 +54,6 @@ StrName StrName::get(std::string_view s) { return StrName(index); } -void SStream::write_hex(unsigned char c, bool non_zero) { - unsigned char high = c >> 4; - unsigned char low = c & 0xf; - if(non_zero) { - if(high) (*this) << PK_HEX_TABLE[high]; - if(high || low) (*this) << PK_HEX_TABLE[low]; - } else { - (*this) << PK_HEX_TABLE[high]; - (*this) << PK_HEX_TABLE[low]; - } -} - -void SStream::write_hex(void* p) { - if(p == nullptr) { - (*this) << "0x0"; - return; - } - (*this) << "0x"; - uintptr_t p_t = reinterpret_cast(p); - bool non_zero = true; - for(int i = sizeof(void*) - 1; i >= 0; i--) { - unsigned char cpnt = (p_t >> (i * 8)) & 0xff; - write_hex(cpnt, non_zero); - if(cpnt != 0) non_zero = false; - } -} - -void SStream::write_hex(i64 val) { - if(val == 0) { - (*this) << "0x0"; - return; - } - if(val < 0) { - (*this) << "-"; - val = -val; - } - (*this) << "0x"; - bool non_zero = true; - for(int i = 56; i >= 0; i -= 8) { - unsigned char cpnt = (val >> i) & 0xff; - write_hex(cpnt, non_zero); - if(cpnt != 0) non_zero = false; - } -} - -#undef PK_STR_ALLOCATE -#undef PK_STR_COPY_INIT - // unary operators const StrName __repr__ = StrName::get("__repr__"); const StrName __str__ = StrName::get("__str__"); diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 4b523275..23d3596a 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -296,7 +296,22 @@ void __init_builtins(VM* _vm) { _vm->bind_func(_vm->builtins, "hex", 1, [](VM* vm, ArgsView args) { SStream ss; - ss.write_hex(CAST(i64, args[0])); + i64 val = CAST(i64, args[0]); + if(val == 0) { + ss << "0x0"; + return VAR(ss.str()); + } + if(val < 0) { + ss << "-"; + val = -val; + } + ss << "0x"; + bool non_zero = true; + for(int i = 56; i >= 0; i -= 8) { + unsigned char cpnt = (val >> i) & 0xff; + ss.write_hex(cpnt, non_zero); + if(cpnt != 0) non_zero = false; + } return VAR(ss.str()); }); @@ -353,7 +368,7 @@ void __init_builtins(VM* _vm) { assert(!is_tagged(obj)); SStream ss; ss << "<" << _type_name(vm, vm->_tp(obj)) << " object at "; - ss.write_hex(obj.get()); + ss.write_ptr(obj.get()); ss << ">"; return ss.str(); }); From deec5edff932add8322c9439f920c62a979a5425 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 14:20:51 +0800 Subject: [PATCH 18/60] some fix --- include/pocketpy/common/str.hpp | 2 -- src/common/sstream.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 9e529ee6..6bec4580 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -13,8 +13,6 @@ namespace pkpy { -struct SStream; - struct Str: pkpy_Str { bool is_inlined() const { return is_sso; } diff --git a/src/common/sstream.c b/src/common/sstream.c index 8fee789a..d4ad9699 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -35,11 +35,11 @@ void pkpy_SStream__write_float(pkpy_SStream* self, float val, int precision){ } void pkpy_SStream__write_double(pkpy_SStream* self, double val, int precision){ - if(val == INFINITY) { + if(isinf(val)) { pkpy_SStream__write_cstr(self, val > 0 ? "inf" : "-inf"); return; } - if(val == NAN) { + if(isnan(val)) { pkpy_SStream__write_cstr(self, "nan"); return; } From e1c706b08f6a9722d3e3372e914b2e4143d625ae Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 14:26:51 +0800 Subject: [PATCH 19/60] some fix --- include/pocketpy/common/sstream.h | 41 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index b1e37cd0..f1344da3 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -14,6 +14,25 @@ typedef struct pkpy_SStream { c11_vector data; } pkpy_SStream; +void pkpy_SStream__ctor(pkpy_SStream* self); +void pkpy_SStream__dtor(pkpy_SStream* self); + +void pkpy_SStream__write_int(pkpy_SStream* self, int); +void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); +void pkpy_SStream__write_float(pkpy_SStream* self, float, int precision); +void pkpy_SStream__write_double(pkpy_SStream* self, double, int precision); +void pkpy_SStream__write_char(pkpy_SStream* self, char); +void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); +void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); +void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); +void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char*, int); +void pkpy_SStream__write_hex(pkpy_SStream* self, unsigned char, bool non_zero); +void pkpy_SStream__write_ptr(pkpy_SStream* self, void*); + +// Submit the stream and return the final string. The stream becomes invalid after this call +pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); + +#ifndef __cplusplus typedef struct pkpy_AnyStr { int type; union { @@ -29,6 +48,8 @@ typedef struct pkpy_AnyStr { }; } pkpy_AnyStr; +void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n); + inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){.type = 1, ._int = x}; } inline pkpy_AnyStr pkpy_AnyStr__int64(int64_t x) { return (pkpy_AnyStr){.type = 2, ._int64 = x}; } inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){.type = 3, ._float = x}; } @@ -39,25 +60,6 @@ inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){.type = inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){.type = 8, ._cstr = x}; } inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 9, ._ptr = x}; } -void pkpy_SStream__ctor(pkpy_SStream* self); -void pkpy_SStream__dtor(pkpy_SStream* self); - -void pkpy_SStream__write_int(pkpy_SStream* self, int); -void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); -void pkpy_SStream__write_float(pkpy_SStream* self, float, int precision); -void pkpy_SStream__write_double(pkpy_SStream* self, double, int precision); -void pkpy_SStream__write_char(pkpy_SStream* self, char); -void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); -void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); -void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); -void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char*, int); -void pkpy_SStream__write_hex(pkpy_SStream* self, unsigned char, bool non_zero); -void pkpy_SStream__write_ptr(pkpy_SStream* self, void*); -void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n); - -// Submit the stream and return the final string. The stream becomes invalid after this call -pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); - #define pkpy__anystr(x) _Generic((x), \ int: pkpy_AnyStr__int, \ int64_t: pkpy_AnyStr__int64, \ @@ -79,6 +81,7 @@ pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); #define pkpy__anystr_list(...) pkpy__anystr_list_dispatcher(__VA_ARGS__)(__VA_ARGS__) #define pkpy_SStream__write(self, fmt, ...) pkpy_SStream__write_any(self, fmt, pkpy__anystr_list(__VA_ARGS__)) +#endif #ifdef __cplusplus } From ca4ebf4ba7fbb58d49f479c854987ae2ecf486a1 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 14:40:04 +0800 Subject: [PATCH 20/60] some fix --- include/pocketpy/common/sstream.h | 41 ++++++++++++++----------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index f1344da3..b1e37cd0 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -14,25 +14,6 @@ typedef struct pkpy_SStream { c11_vector data; } pkpy_SStream; -void pkpy_SStream__ctor(pkpy_SStream* self); -void pkpy_SStream__dtor(pkpy_SStream* self); - -void pkpy_SStream__write_int(pkpy_SStream* self, int); -void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); -void pkpy_SStream__write_float(pkpy_SStream* self, float, int precision); -void pkpy_SStream__write_double(pkpy_SStream* self, double, int precision); -void pkpy_SStream__write_char(pkpy_SStream* self, char); -void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); -void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); -void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); -void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char*, int); -void pkpy_SStream__write_hex(pkpy_SStream* self, unsigned char, bool non_zero); -void pkpy_SStream__write_ptr(pkpy_SStream* self, void*); - -// Submit the stream and return the final string. The stream becomes invalid after this call -pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); - -#ifndef __cplusplus typedef struct pkpy_AnyStr { int type; union { @@ -48,8 +29,6 @@ typedef struct pkpy_AnyStr { }; } pkpy_AnyStr; -void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n); - inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){.type = 1, ._int = x}; } inline pkpy_AnyStr pkpy_AnyStr__int64(int64_t x) { return (pkpy_AnyStr){.type = 2, ._int64 = x}; } inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){.type = 3, ._float = x}; } @@ -60,6 +39,25 @@ inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){.type = inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){.type = 8, ._cstr = x}; } inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 9, ._ptr = x}; } +void pkpy_SStream__ctor(pkpy_SStream* self); +void pkpy_SStream__dtor(pkpy_SStream* self); + +void pkpy_SStream__write_int(pkpy_SStream* self, int); +void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); +void pkpy_SStream__write_float(pkpy_SStream* self, float, int precision); +void pkpy_SStream__write_double(pkpy_SStream* self, double, int precision); +void pkpy_SStream__write_char(pkpy_SStream* self, char); +void pkpy_SStream__write_Str(pkpy_SStream* self, const pkpy_Str*); +void pkpy_SStream__write_sv(pkpy_SStream* self, c11_string); +void pkpy_SStream__write_cstr(pkpy_SStream* self, const char*); +void pkpy_SStream__write_cstrn(pkpy_SStream* self, const char*, int); +void pkpy_SStream__write_hex(pkpy_SStream* self, unsigned char, bool non_zero); +void pkpy_SStream__write_ptr(pkpy_SStream* self, void*); +void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_AnyStr* args, int n); + +// Submit the stream and return the final string. The stream becomes invalid after this call +pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); + #define pkpy__anystr(x) _Generic((x), \ int: pkpy_AnyStr__int, \ int64_t: pkpy_AnyStr__int64, \ @@ -81,7 +79,6 @@ inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 9, . #define pkpy__anystr_list(...) pkpy__anystr_list_dispatcher(__VA_ARGS__)(__VA_ARGS__) #define pkpy_SStream__write(self, fmt, ...) pkpy_SStream__write_any(self, fmt, pkpy__anystr_list(__VA_ARGS__)) -#endif #ifdef __cplusplus } From 9b72ae7223fca81d3c365312c9693d6404cf2fc1 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 14:41:46 +0800 Subject: [PATCH 21/60] fix msvc --- include/pocketpy/common/sstream.h | 8 ++++---- include/pocketpy/common/str.hpp | 4 ++-- src/common/sstream.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index b1e37cd0..b1f5e75d 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -18,7 +18,7 @@ typedef struct pkpy_AnyStr { int type; union { int _int; - int64_t _int64; + int64_t _i64; float _float; double _double; char _char; @@ -30,7 +30,7 @@ typedef struct pkpy_AnyStr { } pkpy_AnyStr; inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){.type = 1, ._int = x}; } -inline pkpy_AnyStr pkpy_AnyStr__int64(int64_t x) { return (pkpy_AnyStr){.type = 2, ._int64 = x}; } +inline pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { return (pkpy_AnyStr){.type = 2, ._i64 = x}; } inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){.type = 3, ._float = x}; } inline pkpy_AnyStr pkpy_AnyStr__double(double x) { return (pkpy_AnyStr){.type = 4, ._double = x}; } inline pkpy_AnyStr pkpy_AnyStr__char(char x) { return (pkpy_AnyStr){.type = 5, ._char = x}; } @@ -43,7 +43,7 @@ void pkpy_SStream__ctor(pkpy_SStream* self); void pkpy_SStream__dtor(pkpy_SStream* self); void pkpy_SStream__write_int(pkpy_SStream* self, int); -void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t); +void pkpy_SStream__write_i64(pkpy_SStream* self, int64_t); void pkpy_SStream__write_float(pkpy_SStream* self, float, int precision); void pkpy_SStream__write_double(pkpy_SStream* self, double, int precision); void pkpy_SStream__write_char(pkpy_SStream* self, char); @@ -60,7 +60,7 @@ pkpy_Str pkpy_SStream__submit(pkpy_SStream* self); #define pkpy__anystr(x) _Generic((x), \ int: pkpy_AnyStr__int, \ - int64_t: pkpy_AnyStr__int64, \ + int64_t: pkpy_AnyStr__i64, \ float: pkpy_AnyStr__float, \ double: pkpy_AnyStr__double, \ char: pkpy_AnyStr__char, \ diff --git a/include/pocketpy/common/str.hpp b/include/pocketpy/common/str.hpp index 6bec4580..ed690ff3 100644 --- a/include/pocketpy/common/str.hpp +++ b/include/pocketpy/common/str.hpp @@ -316,12 +316,12 @@ struct SStream: pkpy_SStream { SStream& operator<< (size_t val){ // size_t could overflow int64, but nevermind... - pkpy_SStream__write_int64(this, val); + pkpy_SStream__write_i64(this, val); return *this; } SStream& operator<< (i64 val){ - pkpy_SStream__write_int64(this, val); + pkpy_SStream__write_i64(this, val); return *this; } diff --git a/src/common/sstream.c b/src/common/sstream.c index d4ad9699..ad99a93a 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -24,7 +24,7 @@ void pkpy_SStream__write_int(pkpy_SStream* self, int i) { pkpy_SStream__write_cstr(self, buf); } -void pkpy_SStream__write_int64(pkpy_SStream* self, int64_t i) { +void pkpy_SStream__write_i64(pkpy_SStream* self, int64_t i) { char buf[23]; // sign + 21 digits + null terminator snprintf(buf, sizeof(buf), "%lld", i); pkpy_SStream__write_cstr(self, buf); @@ -110,7 +110,7 @@ void pkpy_SStream__write_any(pkpy_SStream* self, const char* fmt, const pkpy_Any assert(i < n); switch(args[i].type){ case 1: pkpy_SStream__write_int(self, args[i]._int); break; - case 2: pkpy_SStream__write_int64(self, args[i]._int64); break; + case 2: pkpy_SStream__write_i64(self, args[i]._i64); break; case 3: pkpy_SStream__write_float(self, args[i]._float, -1); break; case 4: pkpy_SStream__write_double(self, args[i]._double, -1); break; case 5: pkpy_SStream__write_char(self, args[i]._char); break; From ead9d93f5bb85de34bbc9ee3688e9a7bfe2ba37e Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 14:48:37 +0800 Subject: [PATCH 22/60] fix msvc --- include/pocketpy/common/sstream.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index b1f5e75d..00391b5a 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -29,15 +29,15 @@ typedef struct pkpy_AnyStr { }; } pkpy_AnyStr; -inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){.type = 1, ._int = x}; } -inline pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { return (pkpy_AnyStr){.type = 2, ._i64 = x}; } -inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){.type = 3, ._float = x}; } -inline pkpy_AnyStr pkpy_AnyStr__double(double x) { return (pkpy_AnyStr){.type = 4, ._double = x}; } -inline pkpy_AnyStr pkpy_AnyStr__char(char x) { return (pkpy_AnyStr){.type = 5, ._char = x}; } -inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { return (pkpy_AnyStr){.type = 6, ._str = x}; } -inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){.type = 7, ._sv = x}; } -inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){.type = 8, ._cstr = x}; } -inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){.type = 9, ._ptr = x}; } +inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){1, ._int = x}; } +inline pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { return (pkpy_AnyStr){2, ._i64 = x}; } +inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){3, ._float = x}; } +inline pkpy_AnyStr pkpy_AnyStr__double(double x) { return (pkpy_AnyStr){4, ._double = x}; } +inline pkpy_AnyStr pkpy_AnyStr__char(char x) { return (pkpy_AnyStr){5, ._char = x}; } +inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { return (pkpy_AnyStr){6, ._str = x}; } +inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){7, ._sv = x}; } +inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){8, ._cstr = x}; } +inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){9, ._ptr = x}; } void pkpy_SStream__ctor(pkpy_SStream* self); void pkpy_SStream__dtor(pkpy_SStream* self); From 59537c9bd09ef37d07f4c9e13badeb52df03e45e Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 14:58:23 +0800 Subject: [PATCH 23/60] fix msvc --- include/pocketpy/common/sstream.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index 00391b5a..282724de 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -29,15 +29,15 @@ typedef struct pkpy_AnyStr { }; } pkpy_AnyStr; -inline pkpy_AnyStr pkpy_AnyStr__int(int x) { return (pkpy_AnyStr){1, ._int = x}; } -inline pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { return (pkpy_AnyStr){2, ._i64 = x}; } -inline pkpy_AnyStr pkpy_AnyStr__float(float x) { return (pkpy_AnyStr){3, ._float = x}; } -inline pkpy_AnyStr pkpy_AnyStr__double(double x) { return (pkpy_AnyStr){4, ._double = x}; } -inline pkpy_AnyStr pkpy_AnyStr__char(char x) { return (pkpy_AnyStr){5, ._char = x}; } -inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { return (pkpy_AnyStr){6, ._str = x}; } -inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { return (pkpy_AnyStr){7, ._sv = x}; } -inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { return (pkpy_AnyStr){8, ._cstr = x}; } -inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { return (pkpy_AnyStr){9, ._ptr = x}; } +inline pkpy_AnyStr pkpy_AnyStr__int(int x) { pkpy_AnyStr s; s.type = 1; s._int = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { pkpy_AnyStr s; s.type = 2; s._i64 = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__float(float x) { pkpy_AnyStr s; s.type = 3; s._float = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__double(double x) { pkpy_AnyStr s; s.type = 4; s._double = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__char(char x) { pkpy_AnyStr s; s.type = 5; s._char = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { pkpy_AnyStr s; s.type = 6; s._str = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { pkpy_AnyStr s; s.type = 7; s._sv = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { pkpy_AnyStr s; s.type = 8; s._cstr = x; return s; } +inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { pkpy_AnyStr s; s.type = 9; s._ptr = x; return s; } void pkpy_SStream__ctor(pkpy_SStream* self); void pkpy_SStream__dtor(pkpy_SStream* self); From d871d91adb1f57a0bc4c2b62c930be483181e4a8 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 14:09:45 +0800 Subject: [PATCH 24/60] Impl Dict and DictIter in c11 --- include/pocketpy/interpreter/iter.hpp | 4 +- include/pocketpy/objects/dict.h | 113 +++++++++++++ include/pocketpy/objects/dict.hpp | 108 +++++++----- include/pocketpy/objects/pyvar.h | 54 ++++++ src/interpreter/iter.cpp | 13 +- src/interpreter/vm.cpp | 31 ---- src/objects/dict.c | 228 ++++++++++++++++++++++++++ src/objects/dict.cpp | 180 -------------------- src/objects/pyvar.cpp | 17 ++ src/pocketpy.cpp | 10 +- 10 files changed, 495 insertions(+), 263 deletions(-) create mode 100644 include/pocketpy/objects/dict.h create mode 100644 include/pocketpy/objects/pyvar.h create mode 100644 src/objects/dict.c delete mode 100644 src/objects/dict.cpp create mode 100644 src/objects/pyvar.cpp diff --git a/include/pocketpy/interpreter/iter.hpp b/include/pocketpy/interpreter/iter.hpp index 130c08f7..a1917679 100644 --- a/include/pocketpy/interpreter/iter.hpp +++ b/include/pocketpy/interpreter/iter.hpp @@ -74,9 +74,9 @@ struct Generator { struct DictItemsIter { PyVar ref; - int i; + pkpy_DictIter it; - DictItemsIter(PyVar ref) : ref(ref) { i = PK_OBJ_GET(Dict, ref)._head_idx; } + DictItemsIter(PyVar ref) : ref(ref) { it = PK_OBJ_GET(Dict, ref).iter(); } void _gc_mark(VM* vm) const { vm->obj_gc_mark(ref); } diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h new file mode 100644 index 00000000..aa56ae42 --- /dev/null +++ b/include/pocketpy/objects/dict.h @@ -0,0 +1,113 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pocketpy/objects/pyvar.h" +#include "pocketpy/common/vector.h" + +typedef struct { + unsigned int _version; /** used internelly to detect iterator invalidation */ + int count; /** number of elements in the dictionary */ + c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ + int _htcap; /** capacity of the hashtable, always a power of 2 */ + void* _hashtable; /** contains indecies, can be `u8`, `u16` or `u32` according to size*/ +} pkpy_Dict; + +typedef struct { + const pkpy_Dict* _dict; + unsigned int _version; + int _index; +} pkpy_DictIter; + +/** + * @brief `pkpy_Dict` constructor + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__ctor(pkpy_Dict* self); + +/** + * @brief `pkpy_Dict` destructor + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__dtor(pkpy_Dict* self); + +/** + * @brief Copy a `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @return a new `pkpy_Dict` instance, must be destructed by the caller + */ +pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self); + +/** + * @brief Set a key-value pair into the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to set + * @param val value to set + * @return `true` if the key is newly added, `false` if the key already exists + */ +bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val); + +/** + * @brief Check if a key exists in the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to check + * @return `true` if the key exists, `false` otherwise + */ +bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Remove a key from the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to remove + * @return `true` if the key was found and removed, `false` if the key doesn't exist + */ +bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Try to get a value from the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param key key to get + * @return the value associated with the key, `NULL` if the key doesn't exist + */ +const pkpy_Var* pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key); + +/** + * @brief Update the `pkpy_Dict` with another one + * @param self `pkpy_Dict` instance + * @param vm __eq__ and __hash__ context + * @param other `pkpy_Dict` instance to update with + */ +void pkpy_Dict__update(pkpy_Dict* self, void *vm, const pkpy_Dict* other); + +/** + * @brief Clear the `pkpy_Dict` + * @param self `pkpy_Dict` instance + */ +void pkpy_Dict__clear(pkpy_Dict* self); + +/** + * @brief Iterate over the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @return an iterator over the `pkpy_Dict` + */ +pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict* self); + +/** + * @brief Iterate over the `pkpy_Dict` + * @param self `pkpy_Dict` instance + * @param key key will be filled with the current key, can be `NULL` if not needed + * @param value value will be filled with the current value, can be `NULL` if not needed + * @return `true` if the iteration is still valid, `false` otherwise + */ +bool pkpy_DictIter__next(pkpy_DictIter* self, pkpy_Var* key, pkpy_Var* value); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 853f0d25..8272a45c 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -2,63 +2,95 @@ #include "pocketpy/objects/base.hpp" #include "pocketpy/objects/tuplelist.hpp" +#include "pocketpy/objects/dict.h" namespace pkpy { -struct Dict { - struct Item { - PyVar first; - PyVar second; - int prev; - int next; - }; +struct Dict : private pkpy_Dict { + Dict() { + pkpy_Dict__ctor(this); + } - constexpr static int __Capacity = 8; - constexpr static float __LoadFactor = 0.67f; + Dict(Dict&& other) { + std::memcpy(this, &other, sizeof(Dict)); + pkpy_Dict__ctor(&other); + } - int _capacity; - int _mask; - int _size; - int _critical_size; - int _head_idx; // for order preserving - int _tail_idx; // for order preserving - Item* _items; - - Dict(); - Dict(Dict&& other); - Dict(const Dict& other); + Dict(const Dict& other) { + // OPTIMIZEME: reduce copy + auto clone = pkpy_Dict__copy(&other); + std::memcpy(this, &clone, sizeof(Dict)); + } + Dict& operator= (const Dict&) = delete; Dict& operator= (Dict&&) = delete; - int size() const { return _size; } + int size() const { return count; } - void _probe_0(VM* vm, PyVar key, bool& ok, int& i) const; - void _probe_1(VM* vm, PyVar key, bool& ok, int& i) const; + void set(VM* vm, PyVar key, PyVar val) { + pkpy_Dict__set(this, vm, *reinterpret_cast<::pkpy_Var*>(&key), *reinterpret_cast<::pkpy_Var*>(&val)); + } - void set(VM* vm, PyVar key, PyVar val); - void _rehash(VM* vm); + PyVar try_get(VM* vm, PyVar key) const { + auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + if (!res) return nullptr; + return *reinterpret_cast(&res); + } - PyVar try_get(VM* vm, PyVar key) const; + bool contains(VM* vm, PyVar key) const { + return pkpy_Dict__contains(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + } - bool contains(VM* vm, PyVar key) const; - bool del(VM* vm, PyVar key); - void update(VM* vm, const Dict& other); + bool del(VM* vm, PyVar key) { + return pkpy_Dict__del(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + } + + void update(VM* vm, const Dict& other) { + pkpy_Dict__update(this, vm, &other); + } template void apply(__Func f) const { - int i = _head_idx; - while(i != -1) { - f(_items[i].first, _items[i].second); - i = _items[i].next; + pkpy_DictIter it = iter(); + PyVar key, val; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + f(key, val); } } - Tuple keys() const; - Tuple values() const; - void clear(); - ~Dict(); + Tuple keys() const { + Tuple res(count); + pkpy_DictIter it = iter(); + PyVar key, val; + int i = 0; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + res[i++] = key; + } + return res; + } - void __alloc_items(); + Tuple values() const { + Tuple res(count); + pkpy_DictIter it = iter(); + PyVar key, val; + int i = 0; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + res[i++] = val; + } + return res; + } + + pkpy_DictIter iter() const { + return pkpy_Dict__iter(this); + } + + void clear() { + pkpy_Dict__clear(this); + } + + ~Dict() { + pkpy_Dict__dtor(this); + } void _gc_mark(VM*) const; }; diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h new file mode 100644 index 00000000..bc8fd593 --- /dev/null +++ b/include/pocketpy/objects/pyvar.h @@ -0,0 +1,54 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * @brief A python value in pocketpy. + */ +typedef struct { + // TODO: implement + + union { + int type; + char buf[16]; + }; +} pkpy_Var; + +/** + * @brief Check if the pkpy_Var is null. + * @param self The variable to check. + * @return True if the variable is null, false otherwise. + */ +#define pkpy_Var__is_null(self) ((self)->type == 0) + +/** + * @brief Set the variable to null. + * @param self The variable to set. + */ +#define pkpy_Var__set_null(self) do { (self)->type = 0; } while(0) + +/** + * @brief Check if two pkpy_Vars are equal, respects to __eq__ method. + * @param vm The virtual machine. + * @param a The first pkpy_Var. + * @param b The second pkpy_Var. + * @return True if the pkpy_Vars are equal, false otherwise. + */ +bool pkpy_Var__eq__(void *vm, pkpy_Var a, pkpy_Var b); + +/** + * @brief Get the hash of the pkpy_Var, respects to __hash__ method. + * @param vm The virtual machine. + * @param a The pkpy_Var to hash. + * @return The hash of the pkpy_Var. + */ +int64_t pkpy_Var__hash__(void *vm, pkpy_Var a); + +#ifdef __cplusplus +} +#endif diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 18c484d0..56d5f1f6 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -117,12 +117,13 @@ void DictItemsIter::_register(VM* vm, PyObject* mod, PyObject* type) { }); vm->bind__next__(type->as(), [](VM* vm, PyVar _0) -> unsigned { DictItemsIter& self = _CAST(DictItemsIter&, _0); - Dict& d = PK_OBJ_GET(Dict, self.ref); - if(self.i == -1) return 0; - vm->s_data.push(d._items[self.i].first); - vm->s_data.push(d._items[self.i].second); - self.i = d._items[self.i].next; - return 2; + PyVar key, val; + if (pkpy_DictIter__next(&self.it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + vm->s_data.push(key); + vm->s_data.push(val); + return 2; + } + return 0; }); } diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index 10ad986c..40ca619b 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -1628,37 +1628,6 @@ BIND_BINARY_SPECIAL(__xor__) #undef BIND_BINARY_SPECIAL -void Dict::_probe_0(VM* vm, PyVar key, bool& ok, int& i) const { - ok = false; - i64 hash = vm->py_hash(key); - i = hash & _mask; - for(int j = 0; j < _capacity; j++) { - if(_items[i].first != nullptr) { - if(vm->py_eq(_items[i].first, key)) { - ok = true; - break; - } - } else { - if(_items[i].second == nullptr) break; - } - // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166 - i = ((5 * i) + 1) & _mask; - } -} - -void Dict::_probe_1(VM* vm, PyVar key, bool& ok, int& i) const { - ok = false; - i = vm->py_hash(key) & _mask; - while(_items[i].first != nullptr) { - if(vm->py_eq(_items[i].first, key)) { - ok = true; - break; - } - // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166 - i = ((5 * i) + 1) & _mask; - } -} - #if PK_ENABLE_PROFILER void NextBreakpoint::_step(VM* vm) { int curr_callstack_size = vm->callstack.size(); diff --git a/src/objects/dict.c b/src/objects/dict.c new file mode 100644 index 00000000..62f89924 --- /dev/null +++ b/src/objects/dict.c @@ -0,0 +1,228 @@ +#include "pocketpy/objects/dict.h" +#include "pocketpy/common/utils.h" +#include +#include +#include + +struct pkpy_DictEntry { + int64_t hash; + pkpy_Var key; + pkpy_Var val; +}; + +inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { + if(self->count < 255) return 1; + if(self->count < 65535) return 2; + return 4; +} + +inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { + if(self->count < 255) return 255; + if(self->count < 65535) return 65535; + return 4294967295; +} + +inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } + +void pkpy_Dict__ctor(pkpy_Dict* self) { + self->_version = 0; + self->count = 0; + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + self->_htcap = 16; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); +} + +void pkpy_Dict__dtor(pkpy_Dict* self) { + c11_vector__dtor(&self->_entries); + free(self->_hashtable); +} + +pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { + int ht_size = pkpy_Dict__ht_byte_size(self); + void* ht_clone = malloc(ht_size); + memcpy(ht_clone, self->_hashtable, ht_size); + return (pkpy_Dict){._version = 0, + .count = self->count, + ._entries = c11_vector__copy(&self->_entries), + ._htcap = self->_htcap, + ._hashtable = ht_clone}; +} + +static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { + int sz = pkpy_Dict__idx_size(self); + switch(sz) { + case 1: return ((uint8_t*)self->_hashtable)[h]; + case 2: return ((uint16_t*)self->_hashtable)[h]; + case 4: return ((uint32_t*)self->_hashtable)[h]; + default: PK_UNREACHABLE(); + } +} + +static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { + int sz = pkpy_Dict__idx_size(self); + switch(sz) { + case 1: ((uint8_t*)self->_hashtable)[h] = v; break; + case 2: ((uint16_t*)self->_hashtable)[h] = v; break; + case 4: ((uint32_t*)self->_hashtable)[h] = v; break; + default: PK_UNREACHABLE(); + } +} + +static int pkpy_Dict__probe(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; + for(int h = hash & mask;; h = (h + 1) & mask) { + int idx = pkpy_Dict__htget(self, h); + if(idx == null) return h; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(pkpy_Var__is_null(&entry->key)) return h; + if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; + } + PK_UNREACHABLE(); +} + +static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { + self->_version += 1; + free(self->_hashtable); + self->_htcap *= 2; + void* new_ht = malloc(pkpy_Dict__ht_byte_size(self)); + memset(new_ht, 0xff, pkpy_Dict__ht_byte_size(self)); + + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + pkpy_Dict__htset(self, h, i); + } +} + +static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { + int deleted_slots = self->_entries.count - self->count; + if(deleted_slots < self->_entries.count * 0.25) return false; + + // shrink + self->_version += 1; + free(self->_hashtable); + while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) + self->_htcap /= 2; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); + + c11_vector new_entries; + c11_vector__ctor(&new_entries, sizeof(struct pkpy_DictEntry)); + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int j = new_entries.count; + c11_vector__push(struct pkpy_DictEntry, &new_entries, *entry); + pkpy_Dict__htset(self, pkpy_Dict__probe(self, vm, entry->key, entry->hash), j); + } + c11_vector__dtor(&self->_entries); + self->_entries = new_entries; + return true; +} + +bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) { + self->_version += 1; + idx = self->_entries.count; + c11_vector__push(struct pkpy_DictEntry, + &self->_entries, + ((struct pkpy_DictEntry){ + .hash = hash, + .key = key, + .val = val, + })); + pkpy_Dict__htset(self, h, idx); + if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); + return true; + } + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + entry->val = val; + return false; +} + +bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) return false; + return true; +} + +bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + if(idx == null) return false; + + self->_version += 1; + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + pkpy_Var__set_null(&entry->key); + pkpy_Dict__htset(self, h, null); + pkpy_Dict__refactor(self, vm); + return true; +} + +const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { + int hash = pkpy_Var__hash__(vm, key); + int h = pkpy_Dict__probe(self, vm, key, hash); + + int idx = pkpy_Dict__htget(self, h); + if(idx == pkpy_Dict__idx_null(self)) return NULL; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + return &entry->val; +} + +void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { + for(int i = 0; i < other->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &other->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + pkpy_Dict__set(self, vm, entry->key, entry->val); + } +} + +void pkpy_Dict__clear(pkpy_Dict *self) { + int v = self->_version; + pkpy_Dict__dtor(self); + pkpy_Dict__ctor(self); + self->_version = v + 1; +} + +pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { + return (pkpy_DictIter){ + ._dict = self, + ._index = 0, + ._version = self->_version, + }; +} + +bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { + if(self->_version != self->_dict->_version) return false; + if(self->_index >= self->_dict->_entries.count) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); + assert(!pkpy_Var__is_null(&entry->key)); + if (key) *key = entry->key; + if (val) *val = entry->val; + + while (self->_index < self->_dict->_entries.count) { + self->_index++; + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); + if(!pkpy_Var__is_null(&entry->key)) break; + } + return true; +} diff --git a/src/objects/dict.cpp b/src/objects/dict.cpp deleted file mode 100644 index 184d7c1c..00000000 --- a/src/objects/dict.cpp +++ /dev/null @@ -1,180 +0,0 @@ -#include "pocketpy/objects/dict.hpp" - -namespace pkpy { - -Dict::Dict() : - _capacity(__Capacity), _mask(__Capacity - 1), _size(0), _critical_size(__Capacity * __LoadFactor + 0.5f), - _head_idx(-1), _tail_idx(-1) { - __alloc_items(); -} - -void Dict::__alloc_items() { - _items = (Item*)std::malloc(_capacity * sizeof(Item)); - for(int i = 0; i < _capacity; i++) { - _items[i].first = nullptr; - _items[i].second = nullptr; - _items[i].prev = -1; - _items[i].next = -1; - } -} - -Dict::Dict(Dict&& other) { - _capacity = other._capacity; - _mask = other._mask; - _size = other._size; - _critical_size = other._critical_size; - _head_idx = other._head_idx; - _tail_idx = other._tail_idx; - _items = other._items; - other._items = nullptr; -} - -Dict::Dict(const Dict& other) { - _capacity = other._capacity; - _mask = other._mask; - _size = other._size; - _critical_size = other._critical_size; - _head_idx = other._head_idx; - _tail_idx = other._tail_idx; - // copy items - _items = (Item*)std::malloc(_capacity * sizeof(Item)); - std::memcpy(_items, other._items, _capacity * sizeof(Item)); -} - -void Dict::set(VM* vm, PyVar key, PyVar val) { - // do possible rehash - if(_size + 1 > _critical_size) _rehash(vm); - bool ok; - int i; - _probe_1(vm, key, ok, i); - if(!ok) { - _size++; - _items[i].first = key; - - // append to tail - if(_size == 0 + 1) { - _head_idx = i; - _tail_idx = i; - } else { - _items[i].prev = _tail_idx; - _items[_tail_idx].next = i; - _tail_idx = i; - } - } - _items[i].second = val; -} - -void Dict::_rehash(VM* vm) { - Item* old_items = _items; - int old_head_idx = _head_idx; - - _capacity *= 4; - _mask = _capacity - 1; - _size = 0; - _critical_size = _capacity * __LoadFactor + 0.5f; - _head_idx = -1; - _tail_idx = -1; - - __alloc_items(); - - // copy old items to new dict - int i = old_head_idx; - while(i != -1) { - set(vm, old_items[i].first, old_items[i].second); - i = old_items[i].next; - } - - std::free(old_items); -} - -PyVar Dict::try_get(VM* vm, PyVar key) const { - bool ok; - int i; - _probe_0(vm, key, ok, i); - if(!ok) return nullptr; - return _items[i].second; -} - -bool Dict::contains(VM* vm, PyVar key) const { - bool ok; - int i; - _probe_0(vm, key, ok, i); - return ok; -} - -bool Dict::del(VM* vm, PyVar key) { - bool ok; - int i; - _probe_0(vm, key, ok, i); - if(!ok) return false; - _items[i].first = nullptr; - // _items[i].second = PY_DELETED_SLOT; // do not change .second if it is not NULL, it means the slot is occupied by - // a deleted item - _size--; - - if(_size == 0) { - _head_idx = -1; - _tail_idx = -1; - } else { - if(_head_idx == i) { - _head_idx = _items[i].next; - _items[_head_idx].prev = -1; - } else if(_tail_idx == i) { - _tail_idx = _items[i].prev; - _items[_tail_idx].next = -1; - } else { - _items[_items[i].prev].next = _items[i].next; - _items[_items[i].next].prev = _items[i].prev; - } - } - _items[i].prev = -1; - _items[i].next = -1; - return true; -} - -void Dict::update(VM* vm, const Dict& other) { - other.apply([&](PyVar k, PyVar v) { - set(vm, k, v); - }); -} - -Tuple Dict::keys() const { - Tuple t(_size); - int i = _head_idx; - int j = 0; - while(i != -1) { - t[j++] = _items[i].first; - i = _items[i].next; - } - assert(j == _size); - return t; -} - -Tuple Dict::values() const { - Tuple t(_size); - int i = _head_idx; - int j = 0; - while(i != -1) { - t[j++] = _items[i].second; - i = _items[i].next; - } - assert(j == _size); - return t; -} - -void Dict::clear() { - _size = 0; - _head_idx = -1; - _tail_idx = -1; - for(int i = 0; i < _capacity; i++) { - _items[i].first = nullptr; - _items[i].second = nullptr; - _items[i].prev = -1; - _items[i].next = -1; - } -} - -Dict::~Dict() { - if(_items) std::free(_items); -} -} // namespace pkpy diff --git a/src/objects/pyvar.cpp b/src/objects/pyvar.cpp new file mode 100644 index 00000000..14ad6911 --- /dev/null +++ b/src/objects/pyvar.cpp @@ -0,0 +1,17 @@ +#include "pocketpy/objects/base.hpp" +#include "pocketpy/objects/pyvar.h" +#include "pocketpy/interpreter/vm.hpp" + +extern "C" { + +bool pkpy_Var__eq__(void *vm_, pkpy_Var a, pkpy_Var b) { + auto vm = static_cast(vm_); + return vm->py_eq(*reinterpret_cast(&a), *reinterpret_cast(&b)); +} + +int64_t pkpy_Var__hash__(void *vm_, pkpy_Var a) { + auto vm = static_cast(vm_); + return vm->py_hash(*reinterpret_cast(&a)); +} + +} diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 23d3596a..26105865 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1493,12 +1493,10 @@ void __init_builtins(VM* _vm) { if(!vm->isinstance(_1, vm->tp_dict)) return vm->NotImplemented; Dict& other = _CAST(Dict&, _1); if(self.size() != other.size()) return vm->False; - for(int i = 0; i < self._capacity; i++) { - auto item = self._items[i]; - if(item.first == nullptr) continue; - PyVar value = other.try_get(vm, item.first); - if(value == nullptr) return vm->False; - if(!vm->py_eq(item.second, value)) return vm->False; + pkpy_DictIter it = self.iter(); + PyVar key, val; + while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + if(!vm->py_eq(val, other.try_get(vm, key))) return vm->False; } return vm->True; }); From 249656039aa7c5df78c195144b9c4965df62f5be Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 14:36:48 +0800 Subject: [PATCH 25/60] fix uninitialize --- src/objects/dict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/objects/dict.c b/src/objects/dict.c index 62f89924..3ac785b5 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -30,6 +30,7 @@ void pkpy_Dict__ctor(pkpy_Dict* self) { c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); self->_htcap = 16; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); } void pkpy_Dict__dtor(pkpy_Dict* self) { From f4e9293643cb9da43a60da55439be15b7554b3ee Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 15:03:19 +0800 Subject: [PATCH 26/60] ... --- include/pocketpy/objects/dict.hpp | 2 +- include/pocketpy/objects/pyvar.h | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 8272a45c..1c51161c 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -34,7 +34,7 @@ struct Dict : private pkpy_Dict { PyVar try_get(VM* vm, PyVar key) const { auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); if (!res) return nullptr; - return *reinterpret_cast(&res); + return PyVar(*reinterpret_cast(&res)); } bool contains(VM* vm, PyVar key) const { diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h index bc8fd593..5947c4d9 100644 --- a/include/pocketpy/objects/pyvar.h +++ b/include/pocketpy/objects/pyvar.h @@ -11,12 +11,8 @@ extern "C" { * @brief A python value in pocketpy. */ typedef struct { - // TODO: implement - - union { - int type; - char buf[16]; - }; + int type; + int _0, _1, _2; } pkpy_Var; /** From 73c9c5a2280805e5c94cbb0945fa4736e39d4950 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 16:26:06 +0800 Subject: [PATCH 27/60] fix RE --- include/pocketpy/objects/dict.hpp | 2 +- include/pocketpy/objects/pyvar.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 1c51161c..6c854961 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -34,7 +34,7 @@ struct Dict : private pkpy_Dict { PyVar try_get(VM* vm, PyVar key) const { auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); if (!res) return nullptr; - return PyVar(*reinterpret_cast(&res)); + return *reinterpret_cast(res); } bool contains(VM* vm, PyVar key) const { diff --git a/include/pocketpy/objects/pyvar.h b/include/pocketpy/objects/pyvar.h index 5947c4d9..edd7495f 100644 --- a/include/pocketpy/objects/pyvar.h +++ b/include/pocketpy/objects/pyvar.h @@ -12,7 +12,8 @@ extern "C" { */ typedef struct { int type; - int _0, _1, _2; + int _0; + int64_t _1; } pkpy_Var; /** From e455e36a3980a109df638cf5be652d532be3221f Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 16:44:41 +0800 Subject: [PATCH 28/60] fix iteration and count --- src/objects/dict.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 3ac785b5..e65675db 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -142,6 +142,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { .val = val, })); pkpy_Dict__htset(self, h, idx); + self->count += 1; if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); return true; } @@ -173,6 +174,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { pkpy_Var__set_null(&entry->key); pkpy_Dict__htset(self, h, null); pkpy_Dict__refactor(self, vm); + self->count -= 1; return true; } @@ -203,10 +205,19 @@ void pkpy_Dict__clear(pkpy_Dict *self) { self->_version = v + 1; } +static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { + do { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(!pkpy_Var__is_null(&entry->key)) break; + idx++; + } while (idx < self->_entries.count); + return idx; +} + pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { return (pkpy_DictIter){ ._dict = self, - ._index = 0, + ._index = pkpy_Dict__next_entry_idx(self, 0), ._version = self->_version, }; } @@ -220,10 +231,6 @@ bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { if (key) *key = entry->key; if (val) *val = entry->val; - while (self->_index < self->_dict->_entries.count) { - self->_index++; - struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); - if(!pkpy_Var__is_null(&entry->key)) break; - } + self->_index = pkpy_Dict__next_entry_idx(self->_dict, self->_index + 1); return true; } From b2d5708fd83e0ba7fc17fdd344a6ec53ec95a65d Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 17:03:16 +0800 Subject: [PATCH 29/60] fix --- src/objects/dict.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index e65675db..2014303c 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -11,14 +11,14 @@ struct pkpy_DictEntry { }; inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { - if(self->count < 255) return 1; - if(self->count < 65535) return 2; + if(self->_htcap < 255) return 1; + if(self->_htcap < 65535) return 2; return 4; } inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { - if(self->count < 255) return 255; - if(self->count < 65535) return 65535; + if(self->_htcap < 255) return 255; + if(self->_htcap < 65535) return 65535; return 4294967295; } @@ -87,8 +87,8 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { self->_version += 1; free(self->_hashtable); self->_htcap *= 2; - void* new_ht = malloc(pkpy_Dict__ht_byte_size(self)); - memset(new_ht, 0xff, pkpy_Dict__ht_byte_size(self)); + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); for(int i = 0; i < self->_entries.count; i++) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); @@ -206,6 +206,7 @@ void pkpy_Dict__clear(pkpy_Dict *self) { } static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { + if (idx >= self->_entries.count) return idx; do { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(!pkpy_Var__is_null(&entry->key)) break; From 6167cb4a431cd05b2117c87c77861b8f9c2cbb64 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 20:13:20 +0800 Subject: [PATCH 30/60] add `PK_INLINE` --- include/pocketpy/common/sstream.h | 18 +++++++++--------- include/pocketpy/common/str.h | 4 +++- include/pocketpy/common/utils.h | 6 ++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index 282724de..6189e8ef 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -29,15 +29,15 @@ typedef struct pkpy_AnyStr { }; } pkpy_AnyStr; -inline pkpy_AnyStr pkpy_AnyStr__int(int x) { pkpy_AnyStr s; s.type = 1; s._int = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { pkpy_AnyStr s; s.type = 2; s._i64 = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__float(float x) { pkpy_AnyStr s; s.type = 3; s._float = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__double(double x) { pkpy_AnyStr s; s.type = 4; s._double = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__char(char x) { pkpy_AnyStr s; s.type = 5; s._char = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { pkpy_AnyStr s; s.type = 6; s._str = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { pkpy_AnyStr s; s.type = 7; s._sv = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { pkpy_AnyStr s; s.type = 8; s._cstr = x; return s; } -inline pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { pkpy_AnyStr s; s.type = 9; s._ptr = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__int(int x) { pkpy_AnyStr s; s.type = 1; s._int = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__i64(int64_t x) { pkpy_AnyStr s; s.type = 2; s._i64 = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__float(float x) { pkpy_AnyStr s; s.type = 3; s._float = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__double(double x) { pkpy_AnyStr s; s.type = 4; s._double = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__char(char x) { pkpy_AnyStr s; s.type = 5; s._char = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__str(const pkpy_Str* x) { pkpy_AnyStr s; s.type = 6; s._str = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__sv(c11_string x) { pkpy_AnyStr s; s.type = 7; s._sv = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { pkpy_AnyStr s; s.type = 8; s._cstr = x; return s; } +PK_INLINE pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { pkpy_AnyStr s; s.type = 9; s._ptr = x; return s; } void pkpy_SStream__ctor(pkpy_SStream* self); void pkpy_SStream__dtor(pkpy_SStream* self); diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 9469fafc..c09be739 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -5,7 +5,9 @@ extern "C" { #endif #include + #include "pocketpy/common/vector.h" +#include "pocketpy/common/utils.h" /* string_view */ typedef struct c11_string{ @@ -23,7 +25,7 @@ typedef struct pkpy_Str{ }; } pkpy_Str; -inline const char* pkpy_Str__data(const pkpy_Str* self){ +PK_INLINE const char* pkpy_Str__data(const pkpy_Str* self){ return self->is_sso ? self->_inlined : self->_ptr; } diff --git a/include/pocketpy/common/utils.h b/include/pocketpy/common/utils.h index d395c654..f11604da 100644 --- a/include/pocketpy/common/utils.h +++ b/include/pocketpy/common/utils.h @@ -4,6 +4,12 @@ extern "C" { #endif +#ifdef __cplusplus +#define PK_INLINE inline +#else +#define PK_INLINE static inline +#endif + #define PK_REGION(name) 1 #define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step) From 3d72ca0cc6068b56d7e1c7cafb2f13ba8af466b3 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 13 Jun 2024 21:37:33 +0800 Subject: [PATCH 31/60] remove any --- build_g.sh | 14 ++- include/pocketpy/common/any.h | 33 +++++++ include/pocketpy/common/any.hpp | 113 ------------------------ include/pocketpy/interpreter/vm.hpp | 2 + include/pocketpy/objects/codeobject.hpp | 8 +- src/common/any.c | 7 ++ src/common/any.cpp | 26 ------ 7 files changed, 58 insertions(+), 145 deletions(-) create mode 100644 include/pocketpy/common/any.h delete mode 100644 include/pocketpy/common/any.hpp create mode 100644 src/common/any.c delete mode 100644 src/common/any.cpp diff --git a/build_g.sh b/build_g.sh index 340e471c..bffbb51f 100644 --- a/build_g.sh +++ b/build_g.sh @@ -2,9 +2,17 @@ python prebuild.py SRC_C=$(find src/ -name "*.c") SRC_CPP=$(find src/ -name "*.cpp") -SRC="$SRC_C $SRC_CPP" -FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG -DPK_ENABLE_OS=1" +COMMON_FLAGS="-Iinclude -O0 -Wfatal-errors -g -DDEBUG -DPK_ENABLE_OS=1" -clang++ $FLAGS -o main src2/main.cpp $SRC +FLAGS_C="-std=c11 $COMMON_FLAGS" +FLAGS_CPP="-std=c++17 -stdlib=libc++ -frtti $COMMON_FLAGS" +echo "Compiling C files..." +clang $FLAGS_C -c $SRC_C +ar rcs libpocketpy_c.a *.o +rm *.o + +echo "Compiling C++ files..." +clang++ $FLAGS_CPP -o main src2/main.cpp $SRC_CPP libpocketpy_c.a +rm libpocketpy_c.a diff --git a/include/pocketpy/common/any.h b/include/pocketpy/common/any.h new file mode 100644 index 00000000..3e7e904b --- /dev/null +++ b/include/pocketpy/common/any.h @@ -0,0 +1,33 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct c11_userdata{ + void* _0; + void* _1; +} c11_userdata; + +void c11_userdata__ctor(c11_userdata* self, void* ptr, int size); +#define c11_userdata__as(T, self) (*( (T*)(self) )) + +#ifdef __cplusplus +} + +namespace pkpy{ + struct any: c11_userdata{ + template + any(T value){ + c11_userdata__ctor(this, &value, sizeof(T)); + } + + any(){ } + + template + T as(){ + return c11_userdata__as(T, this); + } + }; +} // namespace pkpy +#endif \ No newline at end of file diff --git a/include/pocketpy/common/any.hpp b/include/pocketpy/common/any.hpp deleted file mode 100644 index 6fa09ee1..00000000 --- a/include/pocketpy/common/any.hpp +++ /dev/null @@ -1,113 +0,0 @@ -#pragma once - -#include "pocketpy/common/traits.hpp" - -#include -#include -#include -#include -#include - -namespace pkpy { - -template -constexpr inline bool is_any_sso_v = is_pod_v && sizeof(T) <= sizeof(void*); - -struct any { - struct vtable { - const std::type_index type; - void (*deleter)(void*); - - template - inline static vtable* get() { - static_assert(std::is_same_v>); - if constexpr(is_any_sso_v) { - static vtable vt{typeid(T), nullptr}; - return &vt; - } else { - static vtable vt{typeid(T), [](void* ptr) { - delete static_cast(ptr); - }}; - return &vt; - } - } - }; - - void* data; - vtable* _vt; - - any() : data(nullptr), _vt(nullptr) {} - - explicit operator bool () const { return _vt != nullptr; } - - template - any(T&& value) { - using U = std::decay_t; - static_assert(!std::is_same_v, "any(const any&) is deleted"); - static_assert(sizeof(U) == sizeof(T)); - if constexpr(is_any_sso_v) { - std::memcpy(&data, &value, sizeof(U)); - } else { - data = new U(std::forward(value)); - } - _vt = vtable::get(); - } - - any(any&& other) noexcept; - any& operator= (any&& other) noexcept; - - const std::type_index type_id() const { return _vt ? _vt->type : typeid(void); } - - any(const any& other) = delete; - any& operator= (const any& other) = delete; - - ~any() { - if(_vt && _vt->deleter) _vt->deleter(data); - } - - template - T& _cast() const noexcept { - static_assert(std::is_same_v>); - if constexpr(is_any_sso_v) { - return *((T*)(&data)); - } else { - return *(static_cast(data)); - } - } - - template - T& cast() const { - static_assert(std::is_same_v>); - if(type_id() != typeid(T)) __bad_any_cast(typeid(T), type_id()); - return _cast(); - } - - static void __bad_any_cast(const std::type_index expected, const std::type_index actual); -}; - -template -struct function; - -template -struct function { - any _impl; - Ret (*_wrapper)(const any&, Params...); - - function() : _impl(), _wrapper(nullptr) {} - - explicit operator bool () const { return _wrapper != nullptr; } - - template - function(F&& f) : _impl(std::forward(f)) { - _wrapper = [](const any& impl, Params... params) -> Ret { - return impl._cast>()(std::forward(params)...); - }; - } - - Ret operator() (Params... params) const { - assert(_wrapper); - return _wrapper(_impl, std::forward(params)...); - } -}; - -} // namespace pkpy diff --git a/include/pocketpy/interpreter/vm.hpp b/include/pocketpy/interpreter/vm.hpp index 1c2ec125..cb5276f6 100644 --- a/include/pocketpy/interpreter/vm.hpp +++ b/include/pocketpy/interpreter/vm.hpp @@ -9,6 +9,8 @@ #include "pocketpy/interpreter/frame.hpp" #include "pocketpy/interpreter/profiler.hpp" +#include + namespace pkpy { /* Stack manipulation macros */ diff --git a/include/pocketpy/objects/codeobject.hpp b/include/pocketpy/objects/codeobject.hpp index baefd679..d9afc2c9 100644 --- a/include/pocketpy/objects/codeobject.hpp +++ b/include/pocketpy/objects/codeobject.hpp @@ -1,6 +1,6 @@ #pragma once -#include "pocketpy/common/any.hpp" +#include "pocketpy/common/any.h" #include "pocketpy/objects/tuplelist.hpp" #include "pocketpy/objects/namedict.hpp" #include "pocketpy/objects/sourcedata.hpp" @@ -171,10 +171,12 @@ struct Function { }; template -T& lambda_get_userdata(PyVar* p) { +T lambda_get_userdata(PyVar* p) { static_assert(std::is_same_v>); + static_assert(is_pod_v); int offset = p[-1] != nullptr ? -1 : -2; - return p[offset].obj_get()._userdata.cast(); + NativeFunc& nf = p[offset].obj_get(); + return nf._userdata.as(); } } // namespace pkpy diff --git a/src/common/any.c b/src/common/any.c new file mode 100644 index 00000000..af22bd94 --- /dev/null +++ b/src/common/any.c @@ -0,0 +1,7 @@ +#include "pocketpy/common/any.h" + +#include + +void c11_userdata__ctor(c11_userdata* self, void* ptr, int size){ + memcpy(self, ptr, size); +} diff --git a/src/common/any.cpp b/src/common/any.cpp deleted file mode 100644 index 6ec3636d..00000000 --- a/src/common/any.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "pocketpy/common/any.hpp" -#include "pocketpy/common/utils.h" - -#include - -namespace pkpy { - -void any::__bad_any_cast(const std::type_index expected, const std::type_index actual) { - PK_FATAL_ERROR("bad_any_cast: expected %s, got %s\n", expected.name(), actual.name()) -} - -any::any(any&& other) noexcept : data(other.data), _vt(other._vt) { - other.data = nullptr; - other._vt = nullptr; -} - -any& any::operator= (any&& other) noexcept { - if(data) _vt->deleter(data); - data = other.data; - _vt = other._vt; - other.data = nullptr; - other._vt = nullptr; - return *this; -} - -} // namespace pkpy From 8458e49a307264640239457de26edb547d927437 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 18:56:55 +0800 Subject: [PATCH 32/60] Minimum deleted slot required to refactor --- CMakeLists.txt | 4 ++-- src/objects/dict.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db995efa..38d74742 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,8 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-") add_compile_options(/wd4267 /wd4244) else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") # disable -Wshorten-64-to-32 for apple if(APPLE) diff --git a/src/objects/dict.c b/src/objects/dict.c index 2014303c..69f2557c 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -101,7 +101,7 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int deleted_slots = self->_entries.count - self->count; - if(deleted_slots < self->_entries.count * 0.25) return false; + if(deleted_slots >= 8 && deleted_slots < self->_entries.count * 0.25) return false; // shrink self->_version += 1; From f28b2f152e9b1678e3f6c2291f74b5d1831701bd Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 21:51:03 +0800 Subject: [PATCH 33/60] Fix --- src/objects/dict.c | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 69f2557c..f1c91590 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -99,33 +99,6 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { } } -static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { - int deleted_slots = self->_entries.count - self->count; - if(deleted_slots >= 8 && deleted_slots < self->_entries.count * 0.25) return false; - - // shrink - self->_version += 1; - free(self->_hashtable); - while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) - self->_htcap /= 2; - self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); - memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); - - c11_vector new_entries; - c11_vector__ctor(&new_entries, sizeof(struct pkpy_DictEntry)); - for(int i = 0; i < self->_entries.count; i++) { - struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); - if(pkpy_Var__is_null(&entry->key)) continue; - - int j = new_entries.count; - c11_vector__push(struct pkpy_DictEntry, &new_entries, *entry); - pkpy_Dict__htset(self, pkpy_Dict__probe(self, vm, entry->key, entry->hash), j); - } - c11_vector__dtor(&self->_entries); - self->_entries = new_entries; - return true; -} - bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe(self, vm, key, hash); @@ -162,6 +135,33 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { return true; } +static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { + int deleted_slots = self->_entries.count - self->count; + if(deleted_slots <= 8 || deleted_slots < self->_entries.count * 0.25) return false; + + // shrink + self->_version += 1; + free(self->_hashtable); + while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) + self->_htcap /= 2; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); + + c11_vector old_entries = self->_entries; + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + for(int i = 0; i < old_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &old_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + + int j = self->_entries.count; + c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); + int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + pkpy_Dict__htset(self, h, j); + } + c11_vector__dtor(&old_entries); + return true; +} + bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe(self, vm, key, hash); @@ -173,8 +173,8 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); pkpy_Var__set_null(&entry->key); pkpy_Dict__htset(self, h, null); - pkpy_Dict__refactor(self, vm); self->count -= 1; + pkpy_Dict__refactor(self, vm); return true; } From 41562cf4c33db3876e87384a86ab22cb4804cb38 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 22:16:12 +0800 Subject: [PATCH 34/60] fix find and insert --- src/objects/dict.c | 49 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index f1c91590..31da0e7a 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -69,7 +69,7 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { } } -static int pkpy_Dict__probe(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h + 1) & mask) { @@ -83,6 +83,19 @@ static int pkpy_Dict__probe(const pkpy_Dict* self, void* vm, pkpy_Var key, int64 PK_UNREACHABLE(); } +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; + for(int h = hash & mask;; h = (h + 1) & mask) { + int idx = pkpy_Dict__htget(self, h); + if(idx == null) return h; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; + } + PK_UNREACHABLE(); +} + static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { self->_version += 1; free(self->_hashtable); @@ -94,14 +107,14 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); pkpy_Dict__htset(self, h, i); } } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { @@ -114,6 +127,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { .key = key, .val = val, })); + h = pkpy_Dict__probe0(self, vm, key, hash); pkpy_Dict__htset(self, h, idx); self->count += 1; if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); @@ -121,17 +135,31 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); - entry->val = val; + + if(entry->hash == hash || pkpy_Var__eq__(vm, entry->key, key)) { + entry->val = val; + } else { + self->_version += 1; + self->count += 1; + h = pkpy_Dict__probe0(self, vm, key, hash); + idx = pkpy_Dict__htget(self, h); + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + entry->key = key; + entry->val = val; + entry->hash = hash; + } return false; } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; + + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); return true; } @@ -155,7 +183,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int j = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); - int h = pkpy_Dict__probe(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); pkpy_Dict__htset(self, h, j); } c11_vector__dtor(&old_entries); @@ -164,15 +192,14 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; - self->_version += 1; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + self->_version += 1; pkpy_Var__set_null(&entry->key); - pkpy_Dict__htset(self, h, null); self->count -= 1; pkpy_Dict__refactor(self, vm); return true; @@ -180,7 +207,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = pkpy_Var__hash__(vm, key); - int h = pkpy_Dict__probe(self, vm, key, hash); + int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; From 5e6226729a380e35dc1a646b010d63697b8d4189 Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 22:23:23 +0800 Subject: [PATCH 35/60] skip nullptr in probe1 --- src/objects/dict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/objects/dict.c b/src/objects/dict.c index 31da0e7a..29bbb3fb 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -91,6 +91,7 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); + if(pkpy_Var__is_null(&entry->key)) continue; if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; } PK_UNREACHABLE(); From c047eafa7eb13dfdc6e063d3701e7862627950ef Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 23:09:24 +0800 Subject: [PATCH 36/60] optimize probe0 for less __eq__ and hash compare --- src/objects/dict.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 29bbb3fb..b1453b9c 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -78,7 +78,6 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(pkpy_Var__is_null(&entry->key)) return h; - if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; } PK_UNREACHABLE(); } From 637aedabc56e333f3d41104c82fd5cd2a0ec1b9c Mon Sep 17 00:00:00 2001 From: szdytom Date: Thu, 13 Jun 2024 23:53:03 +0800 Subject: [PATCH 37/60] Use 4 byte hash only --- src/objects/dict.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index b1453b9c..50af5c99 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -4,8 +4,10 @@ #include #include +#define HASH_MASK ((int64_t)0xffffffff) + struct pkpy_DictEntry { - int64_t hash; + int32_t hash; pkpy_Var key; pkpy_Var val; }; @@ -69,7 +71,7 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { } } -static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h + 1) & mask) { @@ -82,7 +84,7 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 PK_UNREACHABLE(); } -static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h + 1) & mask) { @@ -113,7 +115,7 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); @@ -152,7 +154,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); @@ -191,7 +193,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; @@ -206,7 +208,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { } const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key); + int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); From d25afcaeae588c0bacb3e665ee786a538050cb4f Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 00:07:14 +0800 Subject: [PATCH 38/60] remove hash from entry --- src/objects/dict.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 50af5c99..1355a524 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -4,10 +4,7 @@ #include #include -#define HASH_MASK ((int64_t)0xffffffff) - struct pkpy_DictEntry { - int32_t hash; pkpy_Var key; pkpy_Var val; }; @@ -71,10 +68,10 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { } } -static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h + 1) & mask) { + for(int h = hash & mask;; h = (h * 5 + 1) & mask) { int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; @@ -84,16 +81,16 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int PK_UNREACHABLE(); } -static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h + 1) & mask) { + for(int h = hash & mask;; h = (h * 5 + 1) & mask) { int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(pkpy_Var__is_null(&entry->key)) continue; - if(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)) return h; + if(pkpy_Var__eq__(vm, entry->key, key)) return h; } PK_UNREACHABLE(); } @@ -109,13 +106,13 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); pkpy_Dict__htset(self, h, i); } } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); @@ -125,7 +122,6 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { c11_vector__push(struct pkpy_DictEntry, &self->_entries, ((struct pkpy_DictEntry){ - .hash = hash, .key = key, .val = val, })); @@ -138,7 +134,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - if(entry->hash == hash || pkpy_Var__eq__(vm, entry->key, key)) { + if(pkpy_Var__eq__(vm, entry->key, key)) { entry->val = val; } else { self->_version += 1; @@ -148,20 +144,19 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); entry->key = key; entry->val = val; - entry->hash = hash; } return false; } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + assert(pkpy_Var__eq__(vm, entry->key, key)); return true; } @@ -185,7 +180,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int j = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); - int h = pkpy_Dict__probe0(self, vm, entry->key, entry->hash); + int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); pkpy_Dict__htset(self, h, j); } c11_vector__dtor(&old_entries); @@ -193,13 +188,13 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + assert(pkpy_Var__eq__(vm, entry->key, key)); self->_version += 1; pkpy_Var__set_null(&entry->key); self->count -= 1; @@ -208,14 +203,14 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { } const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int hash = pkpy_Var__hash__(vm, key) & HASH_MASK; + int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(entry->hash == hash && pkpy_Var__eq__(vm, entry->key, key)); + assert(pkpy_Var__eq__(vm, entry->key, key)); return &entry->val; } From 3d90bd03923a2f5cc8e24bdab2548deaf2d5a6fb Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 00:08:59 +0800 Subject: [PATCH 39/60] change cmake back --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38d74742..db995efa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,8 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-") add_compile_options(/wd4267 /wd4244) else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") # disable -Wshorten-64-to-32 for apple if(APPLE) From 6e780173f94bc2e35048f09b26c2a30cd26b7244 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 09:32:58 +0800 Subject: [PATCH 40/60] remove _version --- include/pocketpy/objects/dict.h | 1 - src/objects/dict.c | 20 ++++---------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h index aa56ae42..071b4bdc 100644 --- a/include/pocketpy/objects/dict.h +++ b/include/pocketpy/objects/dict.h @@ -9,7 +9,6 @@ extern "C" { #include "pocketpy/common/vector.h" typedef struct { - unsigned int _version; /** used internelly to detect iterator invalidation */ int count; /** number of elements in the dictionary */ c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ int _htcap; /** capacity of the hashtable, always a power of 2 */ diff --git a/src/objects/dict.c b/src/objects/dict.c index 1355a524..3e1bb7a2 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -24,7 +24,6 @@ inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } void pkpy_Dict__ctor(pkpy_Dict* self) { - self->_version = 0; self->count = 0; c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); self->_htcap = 16; @@ -41,8 +40,7 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { int ht_size = pkpy_Dict__ht_byte_size(self); void* ht_clone = malloc(ht_size); memcpy(ht_clone, self->_hashtable, ht_size); - return (pkpy_Dict){._version = 0, - .count = self->count, + return (pkpy_Dict){.count = self->count, ._entries = c11_vector__copy(&self->_entries), ._htcap = self->_htcap, ._hashtable = ht_clone}; @@ -96,7 +94,6 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 } static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { - self->_version += 1; free(self->_hashtable); self->_htcap *= 2; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); @@ -117,7 +114,6 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { - self->_version += 1; idx = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, @@ -137,7 +133,6 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { if(pkpy_Var__eq__(vm, entry->key, key)) { entry->val = val; } else { - self->_version += 1; self->count += 1; h = pkpy_Dict__probe0(self, vm, key, hash); idx = pkpy_Dict__htget(self, h); @@ -165,7 +160,6 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { if(deleted_slots <= 8 || deleted_slots < self->_entries.count * 0.25) return false; // shrink - self->_version += 1; free(self->_hashtable); while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) self->_htcap /= 2; @@ -195,7 +189,6 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); assert(pkpy_Var__eq__(vm, entry->key, key)); - self->_version += 1; pkpy_Var__set_null(&entry->key); self->count -= 1; pkpy_Dict__refactor(self, vm); @@ -223,19 +216,16 @@ void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { } void pkpy_Dict__clear(pkpy_Dict *self) { - int v = self->_version; pkpy_Dict__dtor(self); pkpy_Dict__ctor(self); - self->_version = v + 1; } static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { - if (idx >= self->_entries.count) return idx; - do { + while (idx < self->_entries.count) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); if(!pkpy_Var__is_null(&entry->key)) break; idx++; - } while (idx < self->_entries.count); + } return idx; } @@ -243,16 +233,14 @@ pkpy_DictIter pkpy_Dict__iter(const pkpy_Dict *self) { return (pkpy_DictIter){ ._dict = self, ._index = pkpy_Dict__next_entry_idx(self, 0), - ._version = self->_version, }; } bool pkpy_DictIter__next(pkpy_DictIter *self, pkpy_Var *key, pkpy_Var *val) { - if(self->_version != self->_dict->_version) return false; if(self->_index >= self->_dict->_entries.count) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_dict->_entries, self->_index); - assert(!pkpy_Var__is_null(&entry->key)); + if(pkpy_Var__is_null(&entry->key)) return false; if (key) *key = entry->key; if (val) *val = entry->val; From 21fdaeaa212a1758a5b0a57846a4d4bdf64931ec Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 11:06:39 +0800 Subject: [PATCH 41/60] fix dict compare --- src/pocketpy.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 26105865..c6a7bb7d 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1496,7 +1496,9 @@ void __init_builtins(VM* _vm) { pkpy_DictIter it = self.iter(); PyVar key, val; while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { - if(!vm->py_eq(val, other.try_get(vm, key))) return vm->False; + PyVar other_val = other.try_get(vm, key); + if(other_val == nullptr) return vm->False; + if(!vm->py_eq(val, other_val)) return vm->False; } return vm->True; }); From 9390b0d6381c307c699d48c14eb1ef8ec81a5800 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 11:08:34 +0800 Subject: [PATCH 42/60] use marcos to control load factor --- src/objects/dict.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 3e1bb7a2..df285611 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -4,6 +4,8 @@ #include #include +#define DICT_MAX_LOAD 0.75 + struct pkpy_DictEntry { pkpy_Var key; pkpy_Var val; @@ -124,7 +126,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { h = pkpy_Dict__probe0(self, vm, key, hash); pkpy_Dict__htset(self, h, idx); self->count += 1; - if(self->count >= self->_htcap * 0.75) pkpy_Dict__extendht(self, vm); + if(self->count >= self->_htcap * DICT_MAX_LOAD) pkpy_Dict__extendht(self, vm); return true; } @@ -157,11 +159,11 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int deleted_slots = self->_entries.count - self->count; - if(deleted_slots <= 8 || deleted_slots < self->_entries.count * 0.25) return false; + if(deleted_slots <= 8 || deleted_slots < self->_entries.count * (1 - DICT_MAX_LOAD)) return false; // shrink free(self->_hashtable); - while(self->_htcap * 0.375 > self->count && self->_htcap >= 32) + while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) self->_htcap /= 2; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); From ed2e95b3f4f90efe5cf219e15a0cd280e1887175 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 11:10:08 +0800 Subject: [PATCH 43/60] fix overflow --- src/objects/dict.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index df285611..48643235 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -17,10 +17,10 @@ inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { return 4; } -inline static int pkpy_Dict__idx_null(const pkpy_Dict* self) { +inline static unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { if(self->_htcap < 255) return 255; if(self->_htcap < 65535) return 65535; - return 4294967295; + return 4294967295u; // 2^32 - 1 } inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } @@ -48,7 +48,7 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { ._hashtable = ht_clone}; } -static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { +static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { int sz = pkpy_Dict__idx_size(self); switch(sz) { case 1: return ((uint8_t*)self->_hashtable)[h]; @@ -72,7 +72,7 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h * 5 + 1) & mask) { - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -85,7 +85,7 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = (h * 5 + 1) & mask) { - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -114,7 +114,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { idx = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, @@ -137,7 +137,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } else { self->count += 1; h = pkpy_Dict__probe0(self, vm, key, hash); - idx = pkpy_Dict__htget(self, h); + unsigned idx = pkpy_Dict__htget(self, h); struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); entry->key = key; entry->val = val; @@ -149,7 +149,7 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -186,7 +186,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + unsigned int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -201,7 +201,7 @@ const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key int64_t hash = pkpy_Var__hash__(vm, key); int h = pkpy_Dict__probe1(self, vm, key, hash); - int idx = pkpy_Dict__htget(self, h); + unsigned int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); From a47b52f086aa9cfc952259f82de4d588c62e528e Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:04:39 +0800 Subject: [PATCH 44/60] optimize hashtable access --- src/objects/dict.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 48643235..08b8ccbc 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -18,9 +18,10 @@ inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { } inline static unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { - if(self->_htcap < 255) return 255; - if(self->_htcap < 65535) return 65535; - return 4294967295u; // 2^32 - 1 + // if(self->_htcap < 255) return 255; + // if(self->_htcap < 65535) return 65535; + // return 4294967295u; // 2^32 - 1 + return (1u << ((pkpy_Dict__idx_size(self) * 8) & 31)) - 1u; } inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } @@ -49,23 +50,20 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { } static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { - int sz = pkpy_Dict__idx_size(self); - switch(sz) { - case 1: return ((uint8_t*)self->_hashtable)[h]; - case 2: return ((uint16_t*)self->_hashtable)[h]; - case 4: return ((uint32_t*)self->_hashtable)[h]; - default: PK_UNREACHABLE(); - } + const int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + return (*p) & pkpy_Dict__idx_null(self); } -static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { +static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { int sz = pkpy_Dict__idx_size(self); - switch(sz) { - case 1: ((uint8_t*)self->_hashtable)[h] = v; break; - case 2: ((uint16_t*)self->_hashtable)[h] = v; break; - case 4: ((uint32_t*)self->_hashtable)[h] = v; break; - default: PK_UNREACHABLE(); - } + // switch(sz) { + // case 1: ((uint8_t*)self->_hashtable)[h] = v; break; + // case 2: ((uint16_t*)self->_hashtable)[h] = v; break; + // case 4: ((uint32_t*)self->_hashtable)[h] = v; break; + // default: PK_UNREACHABLE(); + // } + int *p = ((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self); + *p = v | (*p & ~pkpy_Dict__idx_null(self)); } static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { From 784980af93a5cf466a90ed697211f406f3533e6b Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:09:46 +0800 Subject: [PATCH 45/60] add marco PK_DICT_COMPACT_MODE --- src/objects/dict.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 08b8ccbc..6e6b4069 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -5,26 +5,32 @@ #include #define DICT_MAX_LOAD 0.75 +#define PK_DICT_COMPACT_MODE 1 struct pkpy_DictEntry { pkpy_Var key; pkpy_Var val; }; -inline static int pkpy_Dict__idx_size(const pkpy_Dict* self) { +inline extern int pkpy_Dict__idx_size(const pkpy_Dict* self) { +#if PK_DICT_COMPACT_MODE if(self->_htcap < 255) return 1; if(self->_htcap < 65535) return 2; +#endif return 4; } -inline static unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { +inline extern unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { +#if PK_DICT_COMPACT_MODE // if(self->_htcap < 255) return 255; // if(self->_htcap < 65535) return 65535; // return 4294967295u; // 2^32 - 1 return (1u << ((pkpy_Dict__idx_size(self) * 8) & 31)) - 1u; +#endif + return 4294967295u; } -inline static int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } +inline extern int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } void pkpy_Dict__ctor(pkpy_Dict* self) { self->count = 0; @@ -50,20 +56,21 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { } static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { - const int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); +#if PK_DICT_COMPACT_MODE + const unsigned int *p = (const unsigned int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); return (*p) & pkpy_Dict__idx_null(self); +#else + return ((const unsigned int*)self->_hashtable)[h]; +#endif } static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { - int sz = pkpy_Dict__idx_size(self); - // switch(sz) { - // case 1: ((uint8_t*)self->_hashtable)[h] = v; break; - // case 2: ((uint16_t*)self->_hashtable)[h] = v; break; - // case 4: ((uint32_t*)self->_hashtable)[h] = v; break; - // default: PK_UNREACHABLE(); - // } - int *p = ((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self); +#if PK_DICT_COMPACT_MODE + unsigned int *p = (unsigned int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); *p = v | (*p & ~pkpy_Dict__idx_null(self)); +#else + ((unsigned int*)self->_hashtable)[h] = v; +#endif } static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { From 6d938d30bf34d687d23a70ca0b3cdd057798f299 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:19:16 +0800 Subject: [PATCH 46/60] make hash functions macros --- src/objects/dict.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 6e6b4069..c20f59eb 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -5,6 +5,8 @@ #include #define DICT_MAX_LOAD 0.75 +#define DICT_HASH_NEXT(h) ((h) * 5 + 1) +#define DICT_HASH_TRANS(h) ((int)((h) & 0xffffffff)) // used for tansform value from __hash__ #define PK_DICT_COMPACT_MODE 1 struct pkpy_DictEntry { @@ -73,10 +75,10 @@ static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { #endif } -static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { - const int null = pkpy_Dict__idx_null(self); - const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h * 5 + 1) & mask) { +static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { + const unsigned int null = pkpy_Dict__idx_null(self); + const unsigned int mask = self->_htcap - 1; + for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; @@ -86,10 +88,10 @@ static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int6 PK_UNREACHABLE(); } -static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int64_t hash) { +static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; - for(int h = hash & mask;; h = (h * 5 + 1) & mask) { + for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { unsigned int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; @@ -110,13 +112,14 @@ static void pkpy_Dict__extendht(pkpy_Dict* self, void* vm) { struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); + int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); + int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); pkpy_Dict__htset(self, h, i); } } bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h); @@ -151,7 +154,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h); @@ -181,7 +184,8 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { int j = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); - int h = pkpy_Dict__probe0(self, vm, entry->key, pkpy_Var__hash__(vm, entry->key)); + int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); + int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); pkpy_Dict__htset(self, h, j); } c11_vector__dtor(&old_entries); @@ -189,7 +193,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { } bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; @@ -203,7 +207,7 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { } const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key) { - int64_t hash = pkpy_Var__hash__(vm, key); + int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); unsigned int idx = pkpy_Dict__htget(self, h); From d1763bdef177441f8ed373a2fc44e968567329af Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:21:20 +0800 Subject: [PATCH 47/60] replace reinterpret_cast with C-style cast --- include/pocketpy/objects/dict.hpp | 14 +++++++------- src/interpreter/iter.cpp | 2 +- src/pocketpy.cpp | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 6c854961..94fe0c12 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -28,21 +28,21 @@ struct Dict : private pkpy_Dict { int size() const { return count; } void set(VM* vm, PyVar key, PyVar val) { - pkpy_Dict__set(this, vm, *reinterpret_cast<::pkpy_Var*>(&key), *reinterpret_cast<::pkpy_Var*>(&val)); + pkpy_Dict__set(this, vm, *(pkpy_Var*)(&key), *(pkpy_Var*)(&val)); } PyVar try_get(VM* vm, PyVar key) const { - auto res = pkpy_Dict__try_get(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + auto res = pkpy_Dict__try_get(this, vm, *(pkpy_Var*)(&key)); if (!res) return nullptr; return *reinterpret_cast(res); } bool contains(VM* vm, PyVar key) const { - return pkpy_Dict__contains(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + return pkpy_Dict__contains(this, vm, *(pkpy_Var*)(&key)); } bool del(VM* vm, PyVar key) { - return pkpy_Dict__del(this, vm, *reinterpret_cast<::pkpy_Var*>(&key)); + return pkpy_Dict__del(this, vm, *(pkpy_Var*)(&key)); } void update(VM* vm, const Dict& other) { @@ -53,7 +53,7 @@ struct Dict : private pkpy_Dict { void apply(__Func f) const { pkpy_DictIter it = iter(); PyVar key, val; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { f(key, val); } } @@ -63,7 +63,7 @@ struct Dict : private pkpy_Dict { pkpy_DictIter it = iter(); PyVar key, val; int i = 0; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { res[i++] = key; } return res; @@ -74,7 +74,7 @@ struct Dict : private pkpy_Dict { pkpy_DictIter it = iter(); PyVar key, val; int i = 0; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { res[i++] = val; } return res; diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index 56d5f1f6..d0fae96a 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -118,7 +118,7 @@ void DictItemsIter::_register(VM* vm, PyObject* mod, PyObject* type) { vm->bind__next__(type->as(), [](VM* vm, PyVar _0) -> unsigned { DictItemsIter& self = _CAST(DictItemsIter&, _0); PyVar key, val; - if (pkpy_DictIter__next(&self.it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + if (pkpy_DictIter__next(&self.it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { vm->s_data.push(key); vm->s_data.push(val); return 2; diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index c6a7bb7d..ba1456eb 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1495,7 +1495,7 @@ void __init_builtins(VM* _vm) { if(self.size() != other.size()) return vm->False; pkpy_DictIter it = self.iter(); PyVar key, val; - while(pkpy_DictIter__next(&it, reinterpret_cast<::pkpy_Var*>(&key), reinterpret_cast<::pkpy_Var*>(&val))) { + while(pkpy_DictIter__next(&it, (pkpy_Var*)(&key), (pkpy_Var*)(&val))) { PyVar other_val = other.try_get(vm, key); if(other_val == nullptr) return vm->False; if(!vm->py_eq(val, other_val)) return vm->False; From a8ca70ca74d81d1db562d347975d5cef343a31f8 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:24:22 +0800 Subject: [PATCH 48/60] more replace --- include/pocketpy/objects/dict.hpp | 2 +- src/objects/pyvar.cpp | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/pocketpy/objects/dict.hpp b/include/pocketpy/objects/dict.hpp index 94fe0c12..2d521073 100644 --- a/include/pocketpy/objects/dict.hpp +++ b/include/pocketpy/objects/dict.hpp @@ -34,7 +34,7 @@ struct Dict : private pkpy_Dict { PyVar try_get(VM* vm, PyVar key) const { auto res = pkpy_Dict__try_get(this, vm, *(pkpy_Var*)(&key)); if (!res) return nullptr; - return *reinterpret_cast(res); + return *(const PyVar*)(res); } bool contains(VM* vm, PyVar key) const { diff --git a/src/objects/pyvar.cpp b/src/objects/pyvar.cpp index 14ad6911..7b9a1851 100644 --- a/src/objects/pyvar.cpp +++ b/src/objects/pyvar.cpp @@ -4,14 +4,14 @@ extern "C" { -bool pkpy_Var__eq__(void *vm_, pkpy_Var a, pkpy_Var b) { - auto vm = static_cast(vm_); - return vm->py_eq(*reinterpret_cast(&a), *reinterpret_cast(&b)); +bool pkpy_Var__eq__(void* vm_, pkpy_Var a, pkpy_Var b) { + auto vm = (pkpy::VM*)(vm_); + return vm->py_eq(*(pkpy::PyVar*)(&a), *(pkpy::PyVar*)(&b)); } -int64_t pkpy_Var__hash__(void *vm_, pkpy_Var a) { - auto vm = static_cast(vm_); - return vm->py_hash(*reinterpret_cast(&a)); +int64_t pkpy_Var__hash__(void* vm_, pkpy_Var a) { + auto vm = (pkpy::VM*)(vm_); + return vm->py_hash(*(pkpy::PyVar*)(&a)); } } From 7549f1b95a6aab0ae815acf53bf149f38a030af3 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:28:55 +0800 Subject: [PATCH 49/60] better dict clear --- include/pocketpy/objects/dict.h | 3 ++- src/objects/dict.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/pocketpy/objects/dict.h b/include/pocketpy/objects/dict.h index 071b4bdc..0ad7c6a5 100644 --- a/include/pocketpy/objects/dict.h +++ b/include/pocketpy/objects/dict.h @@ -8,6 +8,7 @@ extern "C" { #include "pocketpy/objects/pyvar.h" #include "pocketpy/common/vector.h" +/** @brief `pkpy_Dict` is the Dict type in Python */ typedef struct { int count; /** number of elements in the dictionary */ c11_vector _entries; /** contains `pkpy_DictEntry` (hidden type) */ @@ -15,9 +16,9 @@ typedef struct { void* _hashtable; /** contains indecies, can be `u8`, `u16` or `u32` according to size*/ } pkpy_Dict; +/** @brief `pkpy_DictIter` is used to iterate over a `pkpy_Dict` */ typedef struct { const pkpy_Dict* _dict; - unsigned int _version; int _index; } pkpy_DictIter; diff --git a/src/objects/dict.c b/src/objects/dict.c index c20f59eb..9eff5c77 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -227,8 +227,15 @@ void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { } void pkpy_Dict__clear(pkpy_Dict *self) { - pkpy_Dict__dtor(self); - pkpy_Dict__ctor(self); + self->count = 0; + c11_vector__dtor(&self->_entries); + c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); + if (self->_hashtable > 16) { + free(self->_hashtable); + self->_htcap = 16; + self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + } + memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); } static int pkpy_Dict__next_entry_idx(const pkpy_Dict* self, int idx) { From 681b9d7dd05427db7f9115f22b73672732d53ff7 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:30:24 +0800 Subject: [PATCH 50/60] fix...and remove assert with side effect --- src/objects/dict.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 9eff5c77..be3044db 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -214,7 +214,6 @@ const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(pkpy_Var__eq__(vm, entry->key, key)); return &entry->val; } @@ -230,7 +229,7 @@ void pkpy_Dict__clear(pkpy_Dict *self) { self->count = 0; c11_vector__dtor(&self->_entries); c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); - if (self->_hashtable > 16) { + if (self->_htcap > 16) { free(self->_hashtable); self->_htcap = 16; self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); From 6220ab029b8d1dd88861acc885feadbb2c6cdc1b Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:35:31 +0800 Subject: [PATCH 51/60] stop using unsigned for indecies --- src/objects/dict.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index be3044db..efd51a21 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -22,14 +22,12 @@ inline extern int pkpy_Dict__idx_size(const pkpy_Dict* self) { return 4; } -inline extern unsigned int pkpy_Dict__idx_null(const pkpy_Dict* self) { +inline extern int pkpy_Dict__idx_null(const pkpy_Dict* self) { #if PK_DICT_COMPACT_MODE - // if(self->_htcap < 255) return 255; - // if(self->_htcap < 65535) return 65535; - // return 4294967295u; // 2^32 - 1 - return (1u << ((pkpy_Dict__idx_size(self) * 8) & 31)) - 1u; + if(self->_htcap < 255) return 255; + if(self->_htcap < 65535) return 65535; #endif - return 4294967295u; + return -1; } inline extern int pkpy_Dict__ht_byte_size(const pkpy_Dict* self) { return self->_htcap * pkpy_Dict__idx_size(self); } @@ -57,29 +55,29 @@ pkpy_Dict pkpy_Dict__copy(const pkpy_Dict* self) { ._hashtable = ht_clone}; } -static unsigned int pkpy_Dict__htget(const pkpy_Dict* self, int h) { +static int pkpy_Dict__htget(const pkpy_Dict* self, int h) { #if PK_DICT_COMPACT_MODE - const unsigned int *p = (const unsigned int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + const int *p = (const int*)(((const char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); return (*p) & pkpy_Dict__idx_null(self); #else - return ((const unsigned int*)self->_hashtable)[h]; + return ((const int*)self->_hashtable)[h]; #endif } -static void pkpy_Dict__htset(pkpy_Dict* self, int h, unsigned int v) { +static void pkpy_Dict__htset(pkpy_Dict* self, int h, int v) { #if PK_DICT_COMPACT_MODE - unsigned int *p = (unsigned int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); + int *p = (int*)(((char*)self->_hashtable) + h * pkpy_Dict__idx_size(self)); *p = v | (*p & ~pkpy_Dict__idx_null(self)); #else - ((unsigned int*)self->_hashtable)[h] = v; + ((int*)self->_hashtable)[h] = v; #endif } static int pkpy_Dict__probe0(const pkpy_Dict* self, void* vm, pkpy_Var key, int hash) { - const unsigned int null = pkpy_Dict__idx_null(self); - const unsigned int mask = self->_htcap - 1; + const int null = pkpy_Dict__idx_null(self); + const int mask = self->_htcap - 1; for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -92,7 +90,7 @@ static int pkpy_Dict__probe1(const pkpy_Dict* self, void* vm, pkpy_Var key, int const int null = pkpy_Dict__idx_null(self); const int mask = self->_htcap - 1; for(int h = hash & mask;; h = DICT_HASH_NEXT(h) & mask) { - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == null) return h; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -122,7 +120,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) { idx = self->_entries.count; c11_vector__push(struct pkpy_DictEntry, @@ -145,7 +143,7 @@ bool pkpy_Dict__set(pkpy_Dict* self, void* vm, pkpy_Var key, pkpy_Var val) { } else { self->count += 1; h = pkpy_Dict__probe0(self, vm, key, hash); - unsigned idx = pkpy_Dict__htget(self, h); + idx = pkpy_Dict__htget(self, h); struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); entry->key = key; entry->val = val; @@ -157,7 +155,7 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -195,7 +193,7 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); + int idx = pkpy_Dict__htget(self, h), null = pkpy_Dict__idx_null(self); if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); @@ -210,7 +208,7 @@ const pkpy_Var *pkpy_Dict__try_get(const pkpy_Dict* self, void* vm, pkpy_Var key int hash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, key)); int h = pkpy_Dict__probe1(self, vm, key, hash); - unsigned int idx = pkpy_Dict__htget(self, h); + int idx = pkpy_Dict__htget(self, h); if(idx == pkpy_Dict__idx_null(self)) return NULL; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); From 5e38f7debd8a89766f92d43c747dc9477c63f5a4 Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 12:36:47 +0800 Subject: [PATCH 52/60] remove asserts with side effect --- src/objects/dict.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index efd51a21..82caff11 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -159,7 +159,6 @@ bool pkpy_Dict__contains(const pkpy_Dict* self, void* vm, pkpy_Var key) { if(idx == pkpy_Dict__idx_null(self)) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(pkpy_Var__eq__(vm, entry->key, key)); return true; } @@ -197,7 +196,6 @@ bool pkpy_Dict__del(pkpy_Dict* self, void* vm, pkpy_Var key) { if(idx == null) return false; struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, idx); - assert(pkpy_Var__eq__(vm, entry->key, key)); pkpy_Var__set_null(&entry->key); self->count -= 1; pkpy_Dict__refactor(self, vm); From 6649a5b9870f5ada229ac680e30e9c33ea58063e Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 13:13:24 +0800 Subject: [PATCH 53/60] never shrink --- src/objects/dict.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/objects/dict.c b/src/objects/dict.c index 82caff11..a17432f1 100644 --- a/src/objects/dict.c +++ b/src/objects/dict.c @@ -167,25 +167,29 @@ static bool pkpy_Dict__refactor(pkpy_Dict* self, void* vm) { if(deleted_slots <= 8 || deleted_slots < self->_entries.count * (1 - DICT_MAX_LOAD)) return false; // shrink - free(self->_hashtable); - while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) - self->_htcap /= 2; - self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); + // free(self->_hashtable); + // while(self->_htcap * DICT_MAX_LOAD / 2 > self->count && self->_htcap >= 32) + // self->_htcap /= 2; + // self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); - c11_vector old_entries = self->_entries; - c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); - for(int i = 0; i < old_entries.count; i++) { - struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &old_entries, i); + int new_cnt = 0; + for (int i = 0; i < self->_entries.count; ++i) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); + if(pkpy_Var__is_null(&entry->key)) continue; + if (i > new_cnt) c11__setitem(struct pkpy_DictEntry, &self->_entries, new_cnt, *entry); + new_cnt += 1; + } + + self->_entries.count = new_cnt; + for(int i = 0; i < self->_entries.count; i++) { + struct pkpy_DictEntry* entry = &c11__getitem(struct pkpy_DictEntry, &self->_entries, i); if(pkpy_Var__is_null(&entry->key)) continue; - int j = self->_entries.count; - c11_vector__push(struct pkpy_DictEntry, &self->_entries, *entry); int rhash = DICT_HASH_TRANS(pkpy_Var__hash__(vm, entry->key)); int h = pkpy_Dict__probe0(self, vm, entry->key, rhash); - pkpy_Dict__htset(self, h, j); + pkpy_Dict__htset(self, h, i); } - c11_vector__dtor(&old_entries); return true; } @@ -223,13 +227,7 @@ void pkpy_Dict__update(pkpy_Dict *self, void *vm, const pkpy_Dict *other) { void pkpy_Dict__clear(pkpy_Dict *self) { self->count = 0; - c11_vector__dtor(&self->_entries); - c11_vector__ctor(&self->_entries, sizeof(struct pkpy_DictEntry)); - if (self->_htcap > 16) { - free(self->_hashtable); - self->_htcap = 16; - self->_hashtable = malloc(pkpy_Dict__ht_byte_size(self)); - } + self->_entries.count = 0; memset(self->_hashtable, 0xff, pkpy_Dict__ht_byte_size(self)); } From 33b110589dcead671e8060d530fc6fc5434eb82b Mon Sep 17 00:00:00 2001 From: szdytom Date: Fri, 14 Jun 2024 13:35:32 +0800 Subject: [PATCH 54/60] add test for dict larger than 65536 --- tests/07_dict.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/07_dict.py b/tests/07_dict.py index 6a0d3b49..8578a97d 100644 --- a/tests/07_dict.py +++ b/tests/07_dict.py @@ -159,6 +159,17 @@ try: except TypeError: pass +n = 2 ** 17 +a = {} +for i in range(n): + a[str(i)] = i + +for i in range(n): + y = a[str(i)] + +for i in range(n): + del a[str(i)] + a = {1: 2, 3: 4} a['a'] = a assert repr(a) == "{1: 2, 3: 4, 'a': {...}}" @@ -169,4 +180,3 @@ gc.collect() for k, v in a.items(): pass assert gc.collect() == 1 - From b2360315540d2585f2618f3e05fe535bae73fca3 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 14 Jun 2024 13:57:12 +0800 Subject: [PATCH 55/60] add benchmark --- benchmarks/dict_0.py | 20 ++++++++++++++++++++ benchmarks/dict_1.py | 27 +++++++++++++++++++++++++++ build_g.sh | 2 ++ 3 files changed, 49 insertions(+) create mode 100644 benchmarks/dict_0.py create mode 100644 benchmarks/dict_1.py diff --git a/benchmarks/dict_0.py b/benchmarks/dict_0.py new file mode 100644 index 00000000..9637fac5 --- /dev/null +++ b/benchmarks/dict_0.py @@ -0,0 +1,20 @@ +# test basic get/set +import random +random.seed(7) + +a = {str(i): i for i in range(100)} +a['existed'] = 0 +a['missed'] = 0 + +for i in range(1000000): + key = str(random.randint(-100, 100)) + if key in a: + a['existed'] += 1 + else: + a['missed'] += 1 + +existed = a['existed'] +missed = a['missed'] + +assert abs(existed - missed) < 10000 + diff --git a/benchmarks/dict_1.py b/benchmarks/dict_1.py new file mode 100644 index 00000000..6c5daa31 --- /dev/null +++ b/benchmarks/dict_1.py @@ -0,0 +1,27 @@ +# test deletion +rnd = 0 +keys = [] +while True: + keys.append(rnd) + rnd = ((rnd * 5) + 1) & 1023 + if rnd == 0: + break + +assert len(keys) == 1024 + +a = {k: k for k in keys} + +for i in range(10000): + if i % 2 == 0: + # del all keys + for k in keys: + del a[k] + assert len(a) == 0 + else: + # add keys back + for k in keys: + a[k] = k + assert len(a) == len(keys) + +assert len(a) == len(keys) +assert list(a.keys()) == keys # order matters diff --git a/build_g.sh b/build_g.sh index bffbb51f..5744fb07 100644 --- a/build_g.sh +++ b/build_g.sh @@ -1,3 +1,5 @@ +set -e + python prebuild.py SRC_C=$(find src/ -name "*.c") From e7e8b9141ed1d2db3587e74cf0d4ba380ce9b366 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 14 Jun 2024 19:08:53 +0800 Subject: [PATCH 56/60] Update CMakeLists.txt --- CMakeLists.txt | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db995efa..0a4c4450 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,12 +8,22 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) if(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /utf-8 /Ox /jumptablerdata /GS-") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /utf-8 /jumptablerdata /GS-") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /jumptablerdata /GS-") add_compile_options(/wd4267 /wd4244) + + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Ox") + endif() else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + + if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + endif() # disable -Wshorten-64-to-32 for apple if(APPLE) From eae3c69f85e17a06a5503986482faa05b5dc6f21 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 15 Jun 2024 11:56:04 +0800 Subject: [PATCH 57/60] backup --- include/pocketpy/common/algorithm.h | 23 +++++++++++++++++++++++ include/pocketpy/common/vector.h | 18 ++++++++++++++++++ include/pocketpy/common/vector.hpp | 5 +++-- src/common/algorithm.c | 20 ++++++++++++++++++++ src/common/vector.c | 3 +++ src/compiler/lexer.cpp | 4 ++-- src/modules/random.cpp | 2 +- 7 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 include/pocketpy/common/algorithm.h create mode 100644 src/common/algorithm.c diff --git a/include/pocketpy/common/algorithm.h b/include/pocketpy/common/algorithm.h new file mode 100644 index 00000000..f5a56509 --- /dev/null +++ b/include/pocketpy/common/algorithm.h @@ -0,0 +1,23 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void *c11__lower_bound(const void *key, const void *ptr, int count, int size, + bool (*less)(const void *, const void *)); + +#ifdef __cplusplus +} + +namespace pkpy{ +template + T* lower_bound(T* begin, T* end, const T& value){ + return (T*)c11__lower_bound(&value, begin, end - begin, sizeof(T), [](const void* a, const void* b){ + return *(T*)a < *(T*)b; + }); + } +} // namespace pkpy +#endif \ No newline at end of file diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 1620520d..a3c4abf3 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -7,6 +7,8 @@ extern "C" { #include #include +#include "pocketpy/common/algorithm.h" + typedef struct c11_array{ void* data; int count; @@ -30,6 +32,7 @@ void c11_vector__dtor(c11_vector* self); c11_vector c11_vector__copy(const c11_vector* self); void* c11_vector__at(c11_vector* self, int index); void c11_vector__reserve(c11_vector* self, int capacity); +void c11_vector__clear(c11_vector* self); #define c11__getitem(T, self, index) ((T*)(self)->data)[index] #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; @@ -53,6 +56,21 @@ void c11_vector__reserve(c11_vector* self, int capacity); (self)->count += (size); \ }while(0) + +#define c11_vector__insert(T, self, index, elem) \ + do{ \ + if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \ + memmove((T*)(self)->data + (index) + 1, (T*)(self)->data + (index), ((self)->count - (index)) * sizeof(T)); \ + ((T*)(self)->data)[index] = (elem); \ + (self)->count++; \ + }while(0) + +#define c11_vector__erase(T, self, index) \ + do{ \ + memmove((T*)(self)->data + (index), (T*)(self)->data + (index) + 1, ((self)->count - (index) - 1) * sizeof(T)); \ + (self)->count--; \ + }while(0) + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/include/pocketpy/common/vector.hpp b/include/pocketpy/common/vector.hpp index ac7d4640..4d05c4cf 100644 --- a/include/pocketpy/common/vector.hpp +++ b/include/pocketpy/common/vector.hpp @@ -8,6 +8,7 @@ #include "pocketpy/common/traits.hpp" #include "pocketpy/common/types.hpp" +#include "pocketpy/common/algorithm.h" namespace pkpy { @@ -427,13 +428,13 @@ struct small_map { Item* data() const { return _data.data(); } void insert(const K& key, const V& value) { - Item* it = std::lower_bound(_data.begin(), _data.end(), key); + Item* it = lower_bound(_data.begin(), _data.end(), key); assert(it == _data.end() || it->first != key); _data.insert(it, {key, value}); } V* try_get(const K& key) const { - auto it = std::lower_bound(_data.begin(), _data.end(), key); + auto it = lower_bound(_data.begin(), _data.end(), key); if(it == _data.end() || it->first != key) return nullptr; return &it->second; } diff --git a/src/common/algorithm.c b/src/common/algorithm.c new file mode 100644 index 00000000..0db7d07c --- /dev/null +++ b/src/common/algorithm.c @@ -0,0 +1,20 @@ +#include "pocketpy/common/algorithm.h" + +void *c11__lower_bound(const void *key, const void *ptr, int count, int size, + bool (*less)(const void *, const void *)) { + char* __first = (char*)ptr; + int __len = count; + + while(__len != 0){ + int __l2 = (int)((unsigned int)__len >> 1); + char* __m = __first + __l2 * size; + if(less(__m, key)){ + __first = __m; + __m += size; + __len -= __l2 + 1; + }else{ + __len = __l2; + } + } + return __first; +} \ No newline at end of file diff --git a/src/common/vector.c b/src/common/vector.c index b6212672..b28b75fe 100644 --- a/src/common/vector.c +++ b/src/common/vector.c @@ -60,3 +60,6 @@ void c11_vector__reserve(c11_vector* self, int capacity){ self->data = realloc(self->data, self->elem_size * self->capacity); } +void c11_vector__clear(c11_vector* self){ + self->count = 0; +} diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index 8fd748e6..e87ce605 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -28,7 +28,7 @@ static bool is_possible_number_char(char c) noexcept{ static bool is_unicode_Lo_char(uint32_t c) noexcept{ // open a hole for carrot if(c == U'🥕') return true; - auto index = std::lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA; + auto index = lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA; if(c == kLoRangeA[index]) return true; index -= 1; if(index < 0) return false; @@ -161,7 +161,7 @@ Error* Lexer::eat_name() noexcept{ const auto KW_BEGIN = kTokens + TK("False"); const auto KW_END = kTokens + kTokenCount; - auto it = std::lower_bound(KW_BEGIN, KW_END, name); + auto it = lower_bound(KW_BEGIN, KW_END, name); if(it != KW_END && *it == name) { add_token(it - kTokens); } else { diff --git a/src/modules/random.cpp b/src/modules/random.cpp index 6dca0d54..131aa298 100644 --- a/src/modules/random.cpp +++ b/src/modules/random.cpp @@ -199,7 +199,7 @@ struct Random { List result(k); for(int i = 0; i < k; i++) { f64 r = self.gen.uniform(0.0, cum_weights[size - 1]); - int idx = std::lower_bound(cum_weights.begin(), cum_weights.end(), r) - cum_weights.begin(); + int idx = lower_bound(cum_weights.begin(), cum_weights.end(), r) - cum_weights.begin(); result[i] = data[idx]; } return VAR(std::move(result)); From 2d0db3dc717267a54624ad732ba78dd4f08e7ddc Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 15 Jun 2024 13:34:47 +0800 Subject: [PATCH 58/60] add lower_bound --- include/pocketpy/common/algorithm.h | 11 +--- include/pocketpy/common/sstream.h | 9 +-- include/pocketpy/common/str.h | 14 +++-- include/pocketpy/common/vector.h | 27 ++++++--- include/pocketpy/common/vector.hpp | 4 +- include/pocketpy/objects/sourcedata.h | 6 +- include/pocketpy/objects/sourcedata.hpp | 2 +- src/common/algorithm.c | 25 +++++++- src/common/sourcedata.c | 28 ++++----- src/common/sstream.c | 28 +++++++-- src/common/str.c | 76 ++++++++++++------------- src/compiler/lexer.cpp | 19 +------ src/interpreter/iter.cpp | 2 +- src/modules/random.cpp | 2 +- 14 files changed, 138 insertions(+), 115 deletions(-) diff --git a/include/pocketpy/common/algorithm.h b/include/pocketpy/common/algorithm.h index f5a56509..4438327a 100644 --- a/include/pocketpy/common/algorithm.h +++ b/include/pocketpy/common/algorithm.h @@ -9,15 +9,10 @@ extern "C" { void *c11__lower_bound(const void *key, const void *ptr, int count, int size, bool (*less)(const void *, const void *)); +int *c11__lower_bound_int(int key, const int *ptr, int count); +double *c11__lower_bound_double(double key, const double *ptr, int count); + #ifdef __cplusplus } -namespace pkpy{ -template - T* lower_bound(T* begin, T* end, const T& value){ - return (T*)c11__lower_bound(&value, begin, end - begin, sizeof(T), [](const void* a, const void* b){ - return *(T*)a < *(T*)b; - }); - } -} // namespace pkpy #endif \ No newline at end of file diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index 6189e8ef..7525246b 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -1,15 +1,15 @@ #pragma once -#ifdef __cplusplus -extern "C" { -#endif - #include "pocketpy/common/vector.h" #include "pocketpy/common/str.h" #include "pocketpy/common/utils.h" #include +#ifdef __cplusplus +extern "C" { +#endif + typedef struct pkpy_SStream { c11_vector data; } pkpy_SStream; @@ -40,6 +40,7 @@ PK_INLINE pkpy_AnyStr pkpy_AnyStr__cstr(const char* x) { pkpy_AnyStr s; s.type = PK_INLINE pkpy_AnyStr pkpy_AnyStr__ptr(void* x) { pkpy_AnyStr s; s.type = 9; s._ptr = x; return s; } void pkpy_SStream__ctor(pkpy_SStream* self); +void pkpy_SStream__ctor2(pkpy_SStream* self, int capacity); void pkpy_SStream__dtor(pkpy_SStream* self); void pkpy_SStream__write_int(pkpy_SStream* self, int); diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index c09be739..28efd200 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -1,14 +1,13 @@ #pragma once +#include +#include "pocketpy/common/vector.h" +#include "pocketpy/common/utils.h" + #ifdef __cplusplus extern "C" { #endif -#include - -#include "pocketpy/common/vector.h" -#include "pocketpy/common/utils.h" - /* string_view */ typedef struct c11_string{ const char* data; @@ -29,7 +28,6 @@ PK_INLINE const char* pkpy_Str__data(const pkpy_Str* self){ return self->is_sso ? self->_inlined : self->_ptr; } -int pkpy_utils__u8_header(unsigned char c, bool suppress); void pkpy_Str__ctor(pkpy_Str* self, const char* data); void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size); void pkpy_Str__dtor(pkpy_Str* self); @@ -57,6 +55,10 @@ int pkpy_Str__count(const pkpy_Str* self, const pkpy_Str* sub); c11_vector/* T=c11_string */ pkpy_Str__split(const pkpy_Str* self, char sep); c11_vector/* T=c11_string */ pkpy_Str__split2(const pkpy_Str* self, const pkpy_Str* sep); +bool c11__isascii(const char* p, int size); +bool c11__is_unicode_Lo_char(int c); +int c11__u8_header(unsigned char c, bool suppress); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index a3c4abf3..fb7f72f1 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -1,14 +1,13 @@ #pragma once +#include +#include +#include "pocketpy/common/algorithm.h" + #ifdef __cplusplus extern "C" { #endif -#include -#include - -#include "pocketpy/common/algorithm.h" - typedef struct c11_array{ void* data; int count; @@ -60,17 +59,29 @@ void c11_vector__clear(c11_vector* self); #define c11_vector__insert(T, self, index, elem) \ do{ \ if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \ - memmove((T*)(self)->data + (index) + 1, (T*)(self)->data + (index), ((self)->count - (index)) * sizeof(T)); \ - ((T*)(self)->data)[index] = (elem); \ + T* p = (T*)(self)->data + (index); \ + memmove(p + 1, p, ((self)->count - (index)) * sizeof(T)); \ + *p = (elem); \ (self)->count++; \ }while(0) #define c11_vector__erase(T, self, index) \ do{ \ - memmove((T*)(self)->data + (index), (T*)(self)->data + (index) + 1, ((self)->count - (index) - 1) * sizeof(T)); \ + T* p = (T*)(self)->data + (index); \ + memmove(p, p + 1, ((self)->count - (index) - 1) * sizeof(T)); \ (self)->count--; \ }while(0) +#define c11_vector__reverse(T, self, start, end) \ + do{ \ + T* p = (T*)(self)->data + (start); \ + T* q = (T*)(self)->data + (end); \ + while(p < q){ \ + T tmp = *p; *p = *q; *q = tmp; \ + p++; q--; \ + } \ + }while(0) + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/include/pocketpy/common/vector.hpp b/include/pocketpy/common/vector.hpp index 4d05c4cf..66e035a0 100644 --- a/include/pocketpy/common/vector.hpp +++ b/include/pocketpy/common/vector.hpp @@ -428,13 +428,13 @@ struct small_map { Item* data() const { return _data.data(); } void insert(const K& key, const V& value) { - Item* it = lower_bound(_data.begin(), _data.end(), key); + Item* it = std::lower_bound(_data.begin(), _data.end(), key); assert(it == _data.end() || it->first != key); _data.insert(it, {key, value}); } V* try_get(const K& key) const { - auto it = lower_bound(_data.begin(), _data.end(), key); + auto it = std::lower_bound(_data.begin(), _data.end(), key); if(it == _data.end() || it->first != key) return nullptr; return &it->second; } diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h index 7f8d7cc0..3b40175a 100644 --- a/include/pocketpy/objects/sourcedata.h +++ b/include/pocketpy/objects/sourcedata.h @@ -17,11 +17,11 @@ struct pkpy_SourceData { pkpy_Str filename; pkpy_Str source; - c11_vector line_starts; // contains "const char *" - c11_vector _precompiled_tokens; // contains "pkpy_Str" + c11_vector/*T=const char* */ line_starts; + c11_vector/*T=pkpy_Str*/ _precompiled_tokens; }; -void pkpy_SourceData__ctor(struct pkpy_SourceData *self, const char *source, int source_size, const pkpy_Str *filename, enum CompileMode mode); +void pkpy_SourceData__ctor(struct pkpy_SourceData *self, c11_string source, const pkpy_Str *filename, enum CompileMode mode); void pkpy_SourceData__dtor(struct pkpy_SourceData* self); bool pkpy_SourceData__get_line(const struct pkpy_SourceData *self, int lineno, const char **st, const char **ed); diff --git a/include/pocketpy/objects/sourcedata.hpp b/include/pocketpy/objects/sourcedata.hpp index a9b6ac19..f4ce2908 100644 --- a/include/pocketpy/objects/sourcedata.hpp +++ b/include/pocketpy/objects/sourcedata.hpp @@ -8,7 +8,7 @@ namespace pkpy { struct SourceData : public pkpy_SourceData { SourceData(std::string_view source, const Str& filename, CompileMode mode) { - pkpy_SourceData__ctor(this, source.data(), source.size(), &filename, mode); + pkpy_SourceData__ctor(this, {source.data(), (int)source.size()}, &filename, mode); } ~SourceData() { diff --git a/src/common/algorithm.c b/src/common/algorithm.c index 0db7d07c..5917e337 100644 --- a/src/common/algorithm.c +++ b/src/common/algorithm.c @@ -6,15 +6,34 @@ void *c11__lower_bound(const void *key, const void *ptr, int count, int size, int __len = count; while(__len != 0){ - int __l2 = (int)((unsigned int)__len >> 1); + int __l2 = (int)((unsigned int)__len / 2); char* __m = __first + __l2 * size; if(less(__m, key)){ - __first = __m; __m += size; + __first = __m; __len -= __l2 + 1; }else{ __len = __l2; } } return __first; -} \ No newline at end of file +} + +static bool c11__less_int(const void* a, const void* b){ + return *(int*)a < *(int*)b; +} + +static bool c11__less_double(const void* a, const void* b){ + return *(double*)a < *(double*)b; +} + +int *c11__lower_bound_int(int key, const int *ptr, int count) { + void* res = c11__lower_bound(&key, ptr, count, sizeof(int), c11__less_int); + return (int*)res; +} + +double *c11__lower_bound_double(double key, const double *ptr, int count) { + void* res = c11__lower_bound(&key, ptr, count, sizeof(double), c11__less_double); + return (double*)res; +} + diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c index 496ea12c..b37a1810 100644 --- a/src/common/sourcedata.c +++ b/src/common/sourcedata.c @@ -4,31 +4,27 @@ #include #include -void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size); - void pkpy_SourceData__ctor(struct pkpy_SourceData* self, - const char* source, - int source_size, + c11_string source, // may not be null-terminated const pkpy_Str* filename, enum CompileMode mode) { self->filename = pkpy_Str__copy(filename); // OPTIMIZEME? self->mode = mode; - c11_vector__ctor(&self->line_starts, sizeof(const char*)); c11_vector__ctor(&self->_precompiled_tokens, sizeof(pkpy_Str)); - int index = (strncmp(source, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0; - int len = source_size - index; - for(int i = 0; i < source_size; ++i) - len -= (source[i] == '\r'); - - char *buf = (char*)malloc(len + 1), *p = buf; - buf[len] = '\0'; - for(; index < source_size; ++index) { - if(source[index] != '\r') *(p++) = source[index]; + int index = 0; + // Skip utf8 BOM if there is any. + if (source.size >= 3 && strncmp(source.data, "\xEF\xBB\xBF", 3) == 0) index += 3; + // Drop all '\r' + pkpy_SStream ss; + pkpy_SStream__ctor2(&ss, source.size + 1); + while(index < source.size){ + char c = source.data[index]; + if(c != '\r') pkpy_SStream__write_char(&ss, c); + index++; } - pkpy_Str__take_buf(&self->source, buf, len); - + self->source = pkpy_SStream__submit(&ss); self->is_precompiled = (strncmp(pkpy_Str__data(&self->source), "pkpy:", 5) == 0); c11_vector__push(const char*, &self->line_starts, pkpy_Str__data(&self->source)); } diff --git a/src/common/sstream.c b/src/common/sstream.c index ad99a93a..5b9b513f 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -10,6 +10,11 @@ void pkpy_SStream__ctor(pkpy_SStream* self) { c11_vector__ctor(&self->data, sizeof(char)); } +void pkpy_SStream__ctor2(pkpy_SStream* self, int capacity) { + c11_vector__ctor(&self->data, sizeof(char)); + c11_vector__reserve(&self->data, capacity); +} + void pkpy_SStream__dtor(pkpy_SStream* self) { c11_vector__dtor(&self->data); } @@ -24,10 +29,25 @@ void pkpy_SStream__write_int(pkpy_SStream* self, int i) { pkpy_SStream__write_cstr(self, buf); } -void pkpy_SStream__write_i64(pkpy_SStream* self, int64_t i) { - char buf[23]; // sign + 21 digits + null terminator - snprintf(buf, sizeof(buf), "%lld", i); - pkpy_SStream__write_cstr(self, buf); +void pkpy_SStream__write_i64(pkpy_SStream* self, int64_t val) { + // sign + 21 digits + null terminator + // str(-2**64).__len__() == 21 + c11_vector__reserve(&self->data, self->data.count + 23); + if(val == 0){ + pkpy_SStream__write_char(self, '0'); + return; + } + if(val < 0){ + pkpy_SStream__write_char(self, '-'); + val = -val; + } + int start = self->data.count; + while(val){ + c11_vector__push(char, &self->data, '0' + val % 10); + val /= 10; + } + int end = self->data.count - 1; + c11_vector__reverse(char, &self->data, start, end); } void pkpy_SStream__write_float(pkpy_SStream* self, float val, int precision){ diff --git a/src/common/str.c b/src/common/str.c index 9c3fb5fb..cfd48094 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -7,50 +7,13 @@ #include #include -int pkpy_utils__u8_header(unsigned char c, bool suppress) { - if((c & 0b10000000) == 0) return 1; - if((c & 0b11100000) == 0b11000000) return 2; - if((c & 0b11110000) == 0b11100000) return 3; - if((c & 0b11111000) == 0b11110000) return 4; - if((c & 0b11111100) == 0b11111000) return 5; - if((c & 0b11111110) == 0b11111100) return 6; - if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n") - return 0; -} - void pkpy_Str__ctor(pkpy_Str *self, const char *data){ pkpy_Str__ctor2(self, data, strlen(data)); } -static void pkpy_Str__check_ascii(pkpy_Str *self, char *p) { - for(int i = 0; i < self->size; i++){ - if(!isascii(p[i])){ - self->is_ascii = false; - break; - } - } -} - -void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size) { - self->size = size; - self->is_ascii = true; - self->is_sso = size < sizeof(self->_inlined); - char* p; - if(self->is_sso){ - p = self->_inlined; - memcpy(p, data, size); - p[size] = '\0'; - free(data); - }else{ - self->_ptr = data; - p = self->_ptr; - } - pkpy_Str__check_ascii(self, p); -} - void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ self->size = size; - self->is_ascii = true; + self->is_ascii = c11__isascii(data, size); self->is_sso = size < sizeof(self->_inlined); char* p; if(self->is_sso){ @@ -61,7 +24,6 @@ void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ } memcpy(p, data, size); p[size] = '\0'; - pkpy_Str__check_ascii(self, p); } void pkpy_Str__dtor(pkpy_Str *self){ @@ -288,7 +250,7 @@ int pkpy_Str__cmp2(const pkpy_Str *self, const char *other, int size){ pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str *self, int i){ i = pkpy_Str__unicode_index_to_byte(self, i); - int size = pkpy_utils__u8_header(pkpy_Str__data(self)[i], false); + int size = c11__u8_header(pkpy_Str__data(self)[i], false); return pkpy_Str__slice2(self, i, i + size); } @@ -330,7 +292,7 @@ int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i) { const char* p = pkpy_Str__data(self); int j = 0; while(i > 0) { - j += pkpy_utils__u8_header(p[j], false); + j += c11__u8_header(p[j], false); i--; } return j; @@ -409,3 +371,35 @@ c11_vector/* T=c11_string */ pkpy_Str__split2(const pkpy_Str *self, const pkpy_S if(tmp.size != 0) c11_vector__push(c11_string, &retval, tmp); return retval; } + +bool c11__isascii(const char* p, int size){ + for(int i = 0; i < size; i++) + if((unsigned char)p[i] > 127) + return false; + return true; +} + +// clang-format off +static const int kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; +static const int kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101}; +// clang-format on + +bool c11__is_unicode_Lo_char(int c){ + if(c == 0x1f955) return true; + int index = c11__lower_bound_int(c, kLoRangeA, 476) - kLoRangeA; + if(c == kLoRangeA[index]) return true; + index -= 1; + if(index < 0) return false; + return c >= kLoRangeA[index] && c <= kLoRangeB[index]; +} + +int c11__u8_header(unsigned char c, bool suppress) { + if((c & 0b10000000) == 0) return 1; + if((c & 0b11100000) == 0b11000000) return 2; + if((c & 0b11110000) == 0b11100000) return 3; + if((c & 0b11111000) == 0b11110000) return 4; + if((c & 0b11111100) == 0b11111000) return 5; + if((c & 0b11111110) == 0b11111100) return 6; + if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n") + return 0; +} \ No newline at end of file diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index e87ce605..0b3f1a35 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -7,11 +7,6 @@ namespace pkpy { -// clang-format off -static const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; -static const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101}; -// clang-format on - static bool is_possible_number_char(char c) noexcept{ switch(c) { // clang-format off @@ -25,16 +20,6 @@ static bool is_possible_number_char(char c) noexcept{ } } -static bool is_unicode_Lo_char(uint32_t c) noexcept{ - // open a hole for carrot - if(c == U'🥕') return true; - auto index = lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA; - if(c == kLoRangeA[index]) return true; - index -= 1; - if(index < 0) return false; - return c >= kLoRangeA[index] && c <= kLoRangeB[index]; -} - bool Lexer::match_n_chars(int n, char c0) noexcept{ const char* c = curr_char; for(int i = 0; i < n; i++) { @@ -108,7 +93,7 @@ Error* Lexer::eat_name() noexcept{ curr_char--; while(true) { unsigned char c = peekchar(); - int u8bytes = pkpy_utils__u8_header(c, true); + int u8bytes = c11__u8_header(c, true); if(u8bytes == 0) return SyntaxError("invalid char: %c", c); if(u8bytes == 1) { if(isalpha(c) || c == '_' || isdigit(c)) { @@ -135,7 +120,7 @@ Error* Lexer::eat_name() noexcept{ value |= (b & 0b00111111) << (6 * (u8bytes - k - 1)); } } - if(is_unicode_Lo_char(value)) + if(c11__is_unicode_Lo_char(value)) curr_char += u8bytes; else break; diff --git a/src/interpreter/iter.cpp b/src/interpreter/iter.cpp index d0fae96a..882c7f82 100644 --- a/src/interpreter/iter.cpp +++ b/src/interpreter/iter.cpp @@ -49,7 +49,7 @@ void StringIter::_register(VM* vm, PyObject* mod, PyObject* type) { Str& s = PK_OBJ_GET(Str, self.ref); if(self.i == s.size) return 0; int start = self.i; - int len = pkpy_utils__u8_header(s[self.i], false); + int len = c11__u8_header(s[self.i], false); self.i += len; vm->s_data.push(VAR(s.slice(start, self.i))); return 1; diff --git a/src/modules/random.cpp b/src/modules/random.cpp index 131aa298..03b59196 100644 --- a/src/modules/random.cpp +++ b/src/modules/random.cpp @@ -199,7 +199,7 @@ struct Random { List result(k); for(int i = 0; i < k; i++) { f64 r = self.gen.uniform(0.0, cum_weights[size - 1]); - int idx = lower_bound(cum_weights.begin(), cum_weights.end(), r) - cum_weights.begin(); + int idx = c11__lower_bound_double(r, cum_weights.begin(), cum_weights.size()) - cum_weights.begin(); result[i] = data[idx]; } return VAR(std::move(result)); From 3bd794f2fb9d84cb4bbd65fe122a513b7a494687 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 15 Jun 2024 13:39:32 +0800 Subject: [PATCH 59/60] some fix --- include/pocketpy/common/_generated.h | 27 ++++++++++++++++++++++++++ include/pocketpy/common/_generated.hpp | 21 -------------------- prebuild.py | 23 +++++++++++++--------- src/common/_generated.c | 19 ++++++++++++++++++ src/common/_generated.cpp | 21 -------------------- src/modules/modules.cpp | 2 +- src/pocketpy.cpp | 2 +- 7 files changed, 62 insertions(+), 53 deletions(-) create mode 100644 include/pocketpy/common/_generated.h delete mode 100644 include/pocketpy/common/_generated.hpp create mode 100644 src/common/_generated.c delete mode 100644 src/common/_generated.cpp diff --git a/include/pocketpy/common/_generated.h b/include/pocketpy/common/_generated.h new file mode 100644 index 00000000..23841897 --- /dev/null +++ b/include/pocketpy/common/_generated.h @@ -0,0 +1,27 @@ +#pragma once +// generated by prebuild.py + +#ifdef __cplusplus +extern "C" { +#endif + +extern const char kPythonLibs__enum[]; +extern const char kPythonLibs__long[]; +extern const char kPythonLibs__set[]; +extern const char kPythonLibs_bisect[]; +extern const char kPythonLibs_builtins[]; +extern const char kPythonLibs_cmath[]; +extern const char kPythonLibs_collections[]; +extern const char kPythonLibs_colorsys[]; +extern const char kPythonLibs_datetime[]; +extern const char kPythonLibs_functools[]; +extern const char kPythonLibs_heapq[]; +extern const char kPythonLibs_itertools[]; +extern const char kPythonLibs_operator[]; +extern const char kPythonLibs_pickle[]; +extern const char kPythonLibs_this[]; +extern const char kPythonLibs_typing[]; + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/include/pocketpy/common/_generated.hpp b/include/pocketpy/common/_generated.hpp deleted file mode 100644 index 605ede4c..00000000 --- a/include/pocketpy/common/_generated.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once -// generated by prebuild.py - -namespace pkpy{ - extern const char kPythonLibs__enum[]; - extern const char kPythonLibs__long[]; - extern const char kPythonLibs__set[]; - extern const char kPythonLibs_bisect[]; - extern const char kPythonLibs_builtins[]; - extern const char kPythonLibs_cmath[]; - extern const char kPythonLibs_collections[]; - extern const char kPythonLibs_colorsys[]; - extern const char kPythonLibs_datetime[]; - extern const char kPythonLibs_functools[]; - extern const char kPythonLibs_heapq[]; - extern const char kPythonLibs_itertools[]; - extern const char kPythonLibs_operator[]; - extern const char kPythonLibs_pickle[]; - extern const char kPythonLibs_this[]; - extern const char kPythonLibs_typing[]; -} // namespace pkpy diff --git a/prebuild.py b/prebuild.py index e2f44721..d8790fb8 100644 --- a/prebuild.py +++ b/prebuild.py @@ -23,26 +23,31 @@ def get_sources(): sources = get_sources() # use LF line endings instead of CRLF -with open("include/pocketpy/common/_generated.hpp", "wt", encoding='utf-8', newline='\n') as f: +with open("include/pocketpy/common/_generated.h", "wt", encoding='utf-8', newline='\n') as f: data = '''#pragma once // generated by prebuild.py -namespace pkpy{ +#ifdef __cplusplus +extern "C" { +#endif + ''' for key in sorted(sources.keys()): value = sources[key] - data += f' extern const char kPythonLibs_{key}[];\n' - data += '} // namespace pkpy\n' + data += f'extern const char kPythonLibs_{key}[];\n' + data += ''' +#ifdef __cplusplus +} // extern "C" +#endif +''' f.write(data) -with open("src/common/_generated.cpp", "wt", encoding='utf-8', newline='\n') as f: +with open("src/common/_generated.c", "wt", encoding='utf-8', newline='\n') as f: data = '''// generated by prebuild.py -#include "pocketpy/common/_generated.hpp" +#include "pocketpy/common/_generated.h" -namespace pkpy{ ''' for key in sorted(sources.keys()): value = sources[key] - data += f' const char kPythonLibs_{key}[] = {value};\n' - data += '} // namespace pkpy\n' + data += f'const char kPythonLibs_{key}[] = {value};\n' f.write(data) diff --git a/src/common/_generated.c b/src/common/_generated.c new file mode 100644 index 00000000..1ba06cee --- /dev/null +++ b/src/common/_generated.c @@ -0,0 +1,19 @@ +// generated by prebuild.py +#include "pocketpy/common/_generated.h" + +const char kPythonLibs__enum[] = "class Enum:\n def __init__(self, name, value):\n self.name = name\n self.value = value\n\n def __str__(self):\n return f'{type(self).__name__}.{self.name}'\n \n def __repr__(self):\n return f'<{str(self)}: {self.value!r}>'\n \n"; +const char kPythonLibs__long[] = "# after v1.2.2, int is always 64-bit\nPyLong_SHIFT = 60//2 - 1\n\nPyLong_BASE = 2 ** PyLong_SHIFT\nPyLong_MASK = PyLong_BASE - 1\nPyLong_DECIMAL_SHIFT = 4\nPyLong_DECIMAL_BASE = 10 ** PyLong_DECIMAL_SHIFT\n\n##############################################################\n\ndef ulong_fromint(x: int):\n # return a list of digits and sign\n if x == 0: return [0], 1\n sign = 1 if x > 0 else -1\n if sign < 0: x = -x\n res = []\n while x:\n res.append(x & PyLong_MASK)\n x >>= PyLong_SHIFT\n return res, sign\n\ndef ulong_cmp(a: list, b: list) -> int:\n # return 1 if a>b, -1 if a len(b): return 1\n if len(a) < len(b): return -1\n for i in range(len(a)-1, -1, -1):\n if a[i] > b[i]: return 1\n if a[i] < b[i]: return -1\n return 0\n\ndef ulong_pad_(a: list, size: int):\n # pad leading zeros to have `size` digits\n delta = size - len(a)\n if delta > 0:\n a.extend([0] * delta)\n\ndef ulong_unpad_(a: list):\n # remove leading zeros\n while len(a)>1 and a[-1]==0:\n a.pop()\n\ndef ulong_add(a: list, b: list) -> list:\n res = [0] * max(len(a), len(b))\n ulong_pad_(a, len(res))\n ulong_pad_(b, len(res))\n carry = 0\n for i in range(len(res)):\n carry += a[i] + b[i]\n res[i] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n if carry > 0:\n res.append(carry)\n return res\n\ndef ulong_inc_(a: list):\n a[0] += 1\n for i in range(len(a)):\n if a[i] < PyLong_BASE: break\n a[i] -= PyLong_BASE\n if i+1 == len(a):\n a.append(1)\n else:\n a[i+1] += 1\n \n\ndef ulong_sub(a: list, b: list) -> list:\n # a >= b\n res = []\n borrow = 0\n for i in range(len(b)):\n tmp = a[i] - b[i] - borrow\n if tmp < 0:\n tmp += PyLong_BASE\n borrow = 1\n else:\n borrow = 0\n res.append(tmp)\n for i in range(len(b), len(a)):\n tmp = a[i] - borrow\n if tmp < 0:\n tmp += PyLong_BASE\n borrow = 1\n else:\n borrow = 0\n res.append(tmp)\n ulong_unpad_(res)\n return res\n\ndef ulong_divmodi(a: list, b: int):\n # b > 0\n res = []\n carry = 0\n for i in range(len(a)-1, -1, -1):\n carry <<= PyLong_SHIFT\n carry += a[i]\n res.append(carry // b)\n carry %= b\n res.reverse()\n ulong_unpad_(res)\n return res, carry\n\n\ndef ulong_divmod(a: list, b: list):\n\n if ulong_cmp(a, b) < 0:\n return [0], a\n\n if len(b) == 1:\n q, r = ulong_divmodi(a, b[0])\n r, _ = ulong_fromint(r)\n return q, r\n\n max = (len(a) - len(b)) * PyLong_SHIFT + \x5c\n (a[-1].bit_length() - b[-1].bit_length())\n\n low = [0]\n\n high = (max // PyLong_SHIFT) * [0] + \x5c\n [(2**(max % PyLong_SHIFT)) & PyLong_MASK]\n\n while ulong_cmp(low, high) < 0:\n ulong_inc_(high)\n mid, r = ulong_divmodi(ulong_add(low, high), 2)\n if ulong_cmp(a, ulong_mul(b, mid)) >= 0:\n low = mid\n else:\n high = ulong_sub(mid, [1])\n\n q = [0] * (len(a) - len(b) + 1)\n while ulong_cmp(a, ulong_mul(b, low)) >= 0:\n q = ulong_add(q, low)\n a = ulong_sub(a, ulong_mul(b, low))\n ulong_unpad_(q)\n return q, a\n\ndef ulong_floordivi(a: list, b: int):\n # b > 0\n return ulong_divmodi(a, b)[0]\n\ndef ulong_muli(a: list, b: int):\n # b >= 0\n res = [0] * len(a)\n carry = 0\n for i in range(len(a)):\n carry += a[i] * b\n res[i] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n if carry > 0:\n res.append(carry)\n return res\n\ndef ulong_mul(a: list, b: list):\n N = len(a) + len(b)\n # use grade-school multiplication\n res = [0] * N\n for i in range(len(a)):\n carry = 0\n for j in range(len(b)):\n carry += res[i+j] + a[i] * b[j]\n res[i+j] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n res[i+len(b)] = carry\n ulong_unpad_(res)\n return res\n\ndef ulong_powi(a: list, b: int):\n # b >= 0\n if b == 0: return [1]\n res = [1]\n while b:\n if b & 1:\n res = ulong_mul(res, a)\n a = ulong_mul(a, a)\n b >>= 1\n return res\n\ndef ulong_repr(x: list) -> str:\n res = []\n while len(x)>1 or x[0]>0: # non-zero\n x, r = ulong_divmodi(x, PyLong_DECIMAL_BASE)\n res.append(str(r).zfill(PyLong_DECIMAL_SHIFT))\n res.reverse()\n s = ''.join(res)\n if len(s) == 0: return '0'\n if len(s) > 1: s = s.lstrip('0')\n return s\n\ndef ulong_fromstr(s: str):\n if s[-1] == 'L':\n s = s[:-1]\n res, base = [0], [1]\n if s[0] == '-':\n sign = -1\n s = s[1:]\n else:\n sign = 1\n s = s[::-1]\n for c in s:\n c = ord(c) - 48\n assert 0 <= c <= 9\n res = ulong_add(res, ulong_muli(base, c))\n base = ulong_muli(base, 10)\n return res, sign\n\nclass long:\n def __init__(self, x):\n if type(x) is tuple:\n self.digits, self.sign = x\n elif type(x) is int:\n self.digits, self.sign = ulong_fromint(x)\n elif type(x) is float:\n self.digits, self.sign = ulong_fromint(int(x))\n elif type(x) is str:\n self.digits, self.sign = ulong_fromstr(x)\n elif type(x) is long:\n self.digits, self.sign = x.digits.copy(), x.sign\n else:\n raise TypeError('expected int or str')\n \n def __len__(self):\n return len(self.digits)\n\n def __add__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign == other.sign:\n return long((ulong_add(self.digits, other.digits), self.sign))\n else:\n cmp = ulong_cmp(self.digits, other.digits)\n if cmp == 0:\n return long(0)\n if cmp > 0:\n return long((ulong_sub(self.digits, other.digits), self.sign))\n else:\n return long((ulong_sub(other.digits, self.digits), other.sign))\n \n def __radd__(self, other):\n return self.__add__(other)\n \n def __sub__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign != other.sign:\n return long((ulong_add(self.digits, other.digits), self.sign))\n cmp = ulong_cmp(self.digits, other.digits)\n if cmp == 0:\n return long(0)\n if cmp > 0:\n return long((ulong_sub(self.digits, other.digits), self.sign))\n else:\n return long((ulong_sub(other.digits, self.digits), -other.sign))\n \n def __rsub__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n return other.__sub__(self)\n \n def __mul__(self, other):\n if type(other) is int:\n return long((\n ulong_muli(self.digits, abs(other)),\n self.sign * (1 if other >= 0 else -1)\n ))\n elif type(other) is long:\n return long((\n ulong_mul(self.digits, other.digits),\n self.sign * other.sign\n ))\n return NotImplemented\n \n def __rmul__(self, other):\n return self.__mul__(other)\n \n #######################################################\n def __divmod__(self, other):\n if type(other) is int:\n assert self.sign == 1 and other > 0\n q, r = ulong_divmodi(self.digits, other)\n return long((q, 1)), r\n if type(other) is long:\n assert self.sign == 1 and other.sign == 1\n q, r = ulong_divmod(self.digits, other.digits)\n assert len(other)>1 or other.digits[0]>0\n return long((q, 1)), long((r, 1))\n raise NotImplementedError\n\n def __floordiv__(self, other):\n return self.__divmod__(other)[0]\n\n def __mod__(self, other):\n return self.__divmod__(other)[1]\n\n def __pow__(self, other: int):\n assert type(other) is int and other >= 0\n if self.sign == -1 and other & 1:\n sign = -1\n else:\n sign = 1\n return long((ulong_powi(self.digits, other), sign))\n \n def __lshift__(self, other: int):\n assert type(other) is int and other >= 0\n x = self.digits.copy()\n q, r = divmod(other, PyLong_SHIFT)\n x = [0]*q + x\n for _ in range(r): x = ulong_muli(x, 2)\n return long((x, self.sign))\n \n def __rshift__(self, other: int):\n assert type(other) is int and other >= 0\n x = self.digits.copy()\n q, r = divmod(other, PyLong_SHIFT)\n x = x[q:]\n if not x: return long(0)\n for _ in range(r): x = ulong_floordivi(x, 2)\n return long((x, self.sign))\n \n def __neg__(self):\n return long((self.digits, -self.sign))\n \n def __cmp__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign > other.sign:\n return 1\n elif self.sign < other.sign:\n return -1\n else:\n return ulong_cmp(self.digits, other.digits)\n \n def __eq__(self, other):\n return self.__cmp__(other) == 0\n def __lt__(self, other):\n return self.__cmp__(other) < 0\n def __le__(self, other):\n return self.__cmp__(other) <= 0\n def __gt__(self, other):\n return self.__cmp__(other) > 0\n def __ge__(self, other):\n return self.__cmp__(other) >= 0\n \n def __repr__(self):\n prefix = '-' if self.sign < 0 else ''\n return prefix + ulong_repr(self.digits) + 'L'\n"; +const char kPythonLibs__set[] = "class set:\n def __init__(self, iterable=None):\n iterable = iterable or []\n self._a = {}\n self.update(iterable)\n\n def add(self, elem):\n self._a[elem] = None\n \n def discard(self, elem):\n self._a.pop(elem, None)\n\n def remove(self, elem):\n del self._a[elem]\n \n def clear(self):\n self._a.clear()\n\n def update(self, other):\n for elem in other:\n self.add(elem)\n\n def __len__(self):\n return len(self._a)\n \n def copy(self):\n return set(self._a.keys())\n \n def __and__(self, other):\n return {elem for elem in self if elem in other}\n\n def __sub__(self, other):\n return {elem for elem in self if elem not in other}\n \n def __or__(self, other):\n ret = self.copy()\n ret.update(other)\n return ret\n\n def __xor__(self, other): \n _0 = self - other\n _1 = other - self\n return _0 | _1\n\n def union(self, other):\n return self | other\n\n def intersection(self, other):\n return self & other\n\n def difference(self, other):\n return self - other\n\n def symmetric_difference(self, other): \n return self ^ other\n \n def __eq__(self, other):\n if not isinstance(other, set):\n return NotImplemented\n return len(self ^ other) == 0\n\n def isdisjoint(self, other):\n return len(self & other) == 0\n \n def issubset(self, other):\n return len(self - other) == 0\n \n def issuperset(self, other):\n return len(other - self) == 0\n\n def __contains__(self, elem):\n return elem in self._a\n \n def __repr__(self):\n if len(self) == 0:\n return 'set()'\n return '{'+ ', '.join([repr(i) for i in self._a.keys()]) + '}'\n \n def __iter__(self):\n return iter(self._a.keys())"; +const char kPythonLibs_bisect[] = "\"\"\"Bisection algorithms.\"\"\"\n\ndef insort_right(a, x, lo=0, hi=None):\n \"\"\"Insert item x in list a, and keep it sorted assuming a is sorted.\n\n If x is already in a, insert it to the right of the rightmost x.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n lo = bisect_right(a, x, lo, hi)\n a.insert(lo, x)\n\ndef bisect_right(a, x, lo=0, hi=None):\n \"\"\"Return the index where to insert item x in list a, assuming a is sorted.\n\n The return value i is such that all e in a[:i] have e <= x, and all e in\n a[i:] have e > x. So if x already appears in the list, a.insert(x) will\n insert just after the rightmost x already there.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n if lo < 0:\n raise ValueError('lo must be non-negative')\n if hi is None:\n hi = len(a)\n while lo < hi:\n mid = (lo+hi)//2\n if x < a[mid]: hi = mid\n else: lo = mid+1\n return lo\n\ndef insort_left(a, x, lo=0, hi=None):\n \"\"\"Insert item x in list a, and keep it sorted assuming a is sorted.\n\n If x is already in a, insert it to the left of the leftmost x.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n lo = bisect_left(a, x, lo, hi)\n a.insert(lo, x)\n\n\ndef bisect_left(a, x, lo=0, hi=None):\n \"\"\"Return the index where to insert item x in list a, assuming a is sorted.\n\n The return value i is such that all e in a[:i] have e < x, and all e in\n a[i:] have e >= x. So if x already appears in the list, a.insert(x) will\n insert just before the leftmost x already there.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n if lo < 0:\n raise ValueError('lo must be non-negative')\n if hi is None:\n hi = len(a)\n while lo < hi:\n mid = (lo+hi)//2\n if a[mid] < x: lo = mid+1\n else: hi = mid\n return lo\n\n# Create aliases\nbisect = bisect_right\ninsort = insort_right\n"; +const char kPythonLibs_builtins[] = "from __builtins import next as __builtins_next\n\ndef all(iterable):\n for i in iterable:\n if not i:\n return False\n return True\n\ndef any(iterable):\n for i in iterable:\n if i:\n return True\n return False\n\ndef enumerate(iterable, start=0):\n n = start\n for elem in iterable:\n yield n, elem\n n += 1\n\ndef sum(iterable):\n res = 0\n for i in iterable:\n res += i\n return res\n\ndef map(f, iterable):\n for i in iterable:\n yield f(i)\n\ndef filter(f, iterable):\n for i in iterable:\n if f(i):\n yield i\n\ndef zip(a, b):\n a = iter(a)\n b = iter(b)\n while True:\n ai = __builtins_next(a)\n bi = __builtins_next(b)\n if ai is StopIteration or bi is StopIteration:\n break\n yield ai, bi\n\ndef reversed(iterable):\n a = list(iterable)\n a.reverse()\n return a\n\ndef sorted(iterable, key=None, reverse=False):\n a = list(iterable)\n a.sort(key=key, reverse=reverse)\n return a\n\n##### str #####\ndef __format_string(self: str, *args, **kwargs) -> str:\n def tokenizeString(s: str):\n tokens = []\n L, R = 0,0\n \n mode = None\n curArg = 0\n # lookingForKword = False\n \n while(R int:\n n = 0\n for item in self:\n if item == x:\n n += 1\n return n\n \n def extend(self, iterable: Iterable[T]):\n for x in iterable:\n self.append(x)\n\n def extendleft(self, iterable: Iterable[T]):\n for x in iterable:\n self.appendleft(x)\n \n def pop(self) -> T:\n if self._head == self._tail:\n raise IndexError(\"pop from an empty deque\")\n self._tail = (self._tail - 1 + self._capacity) % self._capacity\n return self._data[self._tail]\n \n def popleft(self) -> T:\n if self._head == self._tail:\n raise IndexError(\"pop from an empty deque\")\n x = self._data[self._head]\n self._head = (self._head + 1) % self._capacity\n return x\n \n def clear(self):\n i = self._head\n while i != self._tail:\n self._data[i] = None\n i = (i + 1) % self._capacity\n self._head = 0\n self._tail = 0\n\n def rotate(self, n: int = 1):\n if len(self) == 0:\n return\n if n > 0:\n n = n % len(self)\n for _ in range(n):\n self.appendleft(self.pop())\n elif n < 0:\n n = -n % len(self)\n for _ in range(n):\n self.append(self.popleft())\n\n def __len__(self) -> int:\n return (self._tail - self._head + self._capacity) % self._capacity\n\n def __contains__(self, x: object) -> bool:\n for item in self:\n if item == x:\n return True\n return False\n \n def __iter__(self):\n i = self._head\n while i != self._tail:\n yield self._data[i]\n i = (i + 1) % self._capacity\n\n def __eq__(self, other: object) -> bool:\n if not isinstance(other, deque):\n return False\n if len(self) != len(other):\n return False\n for x, y in zip(self, other):\n if x != y:\n return False\n return True\n \n def __repr__(self) -> str:\n return f\"deque({list(self)!r})\"\n\n"; +const char kPythonLibs_colorsys[] = "\"\"\"Conversion functions between RGB and other color systems.\n\nThis modules provides two functions for each color system ABC:\n\n rgb_to_abc(r, g, b) --> a, b, c\n abc_to_rgb(a, b, c) --> r, g, b\n\nAll inputs and outputs are triples of floats in the range [0.0...1.0]\n(with the exception of I and Q, which covers a slightly larger range).\nInputs outside the valid range may cause exceptions or invalid outputs.\n\nSupported color systems:\nRGB: Red, Green, Blue components\nYIQ: Luminance, Chrominance (used by composite video signals)\nHLS: Hue, Luminance, Saturation\nHSV: Hue, Saturation, Value\n\"\"\"\n\n# References:\n# http://en.wikipedia.org/wiki/YIQ\n# http://en.wikipedia.org/wiki/HLS_color_space\n# http://en.wikipedia.org/wiki/HSV_color_space\n\n__all__ = [\"rgb_to_yiq\",\"yiq_to_rgb\",\"rgb_to_hls\",\"hls_to_rgb\",\n \"rgb_to_hsv\",\"hsv_to_rgb\"]\n\n# Some floating point constants\n\nONE_THIRD = 1.0/3.0\nONE_SIXTH = 1.0/6.0\nTWO_THIRD = 2.0/3.0\n\n# YIQ: used by composite video signals (linear combinations of RGB)\n# Y: perceived grey level (0.0 == black, 1.0 == white)\n# I, Q: color components\n#\n# There are a great many versions of the constants used in these formulae.\n# The ones in this library uses constants from the FCC version of NTSC.\n\ndef rgb_to_yiq(r, g, b):\n y = 0.30*r + 0.59*g + 0.11*b\n i = 0.74*(r-y) - 0.27*(b-y)\n q = 0.48*(r-y) + 0.41*(b-y)\n return (y, i, q)\n\ndef yiq_to_rgb(y, i, q):\n # r = y + (0.27*q + 0.41*i) / (0.74*0.41 + 0.27*0.48)\n # b = y + (0.74*q - 0.48*i) / (0.74*0.41 + 0.27*0.48)\n # g = y - (0.30*(r-y) + 0.11*(b-y)) / 0.59\n\n r = y + 0.9468822170900693*i + 0.6235565819861433*q\n g = y - 0.27478764629897834*i - 0.6356910791873801*q\n b = y - 1.1085450346420322*i + 1.7090069284064666*q\n\n if r < 0.0:\n r = 0.0\n if g < 0.0:\n g = 0.0\n if b < 0.0:\n b = 0.0\n if r > 1.0:\n r = 1.0\n if g > 1.0:\n g = 1.0\n if b > 1.0:\n b = 1.0\n return (r, g, b)\n\n\n# HLS: Hue, Luminance, Saturation\n# H: position in the spectrum\n# L: color lightness\n# S: color saturation\n\ndef rgb_to_hls(r, g, b):\n maxc = max(r, g, b)\n minc = min(r, g, b)\n sumc = (maxc+minc)\n rangec = (maxc-minc)\n l = sumc/2.0\n if minc == maxc:\n return 0.0, l, 0.0\n if l <= 0.5:\n s = rangec / sumc\n else:\n s = rangec / (2.0-maxc-minc) # Not always 2.0-sumc: gh-106498.\n rc = (maxc-r) / rangec\n gc = (maxc-g) / rangec\n bc = (maxc-b) / rangec\n if r == maxc:\n h = bc-gc\n elif g == maxc:\n h = 2.0+rc-bc\n else:\n h = 4.0+gc-rc\n # h = (h/6.0) % 1.0\n h = h / 6.0\n h = h - int(h)\n return h, l, s\n\ndef hls_to_rgb(h, l, s):\n if s == 0.0:\n return l, l, l\n if l <= 0.5:\n m2 = l * (1.0+s)\n else:\n m2 = l+s-(l*s)\n m1 = 2.0*l - m2\n return (_v(m1, m2, h+ONE_THIRD), _v(m1, m2, h), _v(m1, m2, h-ONE_THIRD))\n\ndef _v(m1, m2, hue):\n # hue = hue % 1.0\n hue = hue - int(hue)\n if hue < ONE_SIXTH:\n return m1 + (m2-m1)*hue*6.0\n if hue < 0.5:\n return m2\n if hue < TWO_THIRD:\n return m1 + (m2-m1)*(TWO_THIRD-hue)*6.0\n return m1\n\n\n# HSV: Hue, Saturation, Value\n# H: position in the spectrum\n# S: color saturation (\"purity\")\n# V: color brightness\n\ndef rgb_to_hsv(r, g, b):\n maxc = max(r, g, b)\n minc = min(r, g, b)\n rangec = (maxc-minc)\n v = maxc\n if minc == maxc:\n return 0.0, 0.0, v\n s = rangec / maxc\n rc = (maxc-r) / rangec\n gc = (maxc-g) / rangec\n bc = (maxc-b) / rangec\n if r == maxc:\n h = bc-gc\n elif g == maxc:\n h = 2.0+rc-bc\n else:\n h = 4.0+gc-rc\n # h = (h/6.0) % 1.0\n h = h / 6.0\n h = h - int(h)\n return h, s, v\n\ndef hsv_to_rgb(h, s, v):\n if s == 0.0:\n return v, v, v\n i = int(h*6.0) # XXX assume int() truncates!\n f = (h*6.0) - i\n p = v*(1.0 - s)\n q = v*(1.0 - s*f)\n t = v*(1.0 - s*(1.0-f))\n i = i%6\n if i == 0:\n return v, t, p\n if i == 1:\n return q, v, p\n if i == 2:\n return p, v, t\n if i == 3:\n return p, q, v\n if i == 4:\n return t, p, v\n if i == 5:\n return v, p, q\n # Cannot get here"; +const char kPythonLibs_datetime[] = "from time import localtime\n\nclass timedelta:\n def __init__(self, days=0, seconds=0):\n self.days = days\n self.seconds = seconds\n\n def __repr__(self):\n return f\"datetime.timedelta(days={self.days}, seconds={self.seconds})\"\n\n def __eq__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) == (other.days, other.seconds)\n\n def __lt__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) < (other.days, other.seconds)\n\n def __le__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) <= (other.days, other.seconds)\n\n def __gt__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) > (other.days, other.seconds)\n\n def __ge__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) >= (other.days, other.seconds)\n\n\nclass date:\n def __init__(self, year: int, month: int, day: int):\n self.year = year\n self.month = month\n self.day = day\n\n @staticmethod\n def today():\n t = localtime()\n return date(t.tm_year, t.tm_mon, t.tm_mday)\n\n def __eq__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) == (other.year, other.month, other.day)\n\n def __lt__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) < (other.year, other.month, other.day)\n\n def __le__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) <= (other.year, other.month, other.day)\n\n def __gt__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) > (other.year, other.month, other.day)\n\n def __ge__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) >= (other.year, other.month, other.day)\n\n def __str__(self):\n return f\"{self.year}-{self.month:02}-{self.day:02}\"\n\n def __repr__(self):\n return f\"datetime.date({self.year}, {self.month}, {self.day})\"\n\n\nclass datetime(date):\n def __init__(self, year: int, month: int, day: int, hour: int, minute: int, second: int):\n super().__init__(year, month, day)\n # Validate and set hour, minute, and second\n if not 0 <= hour <= 23:\n raise ValueError(\"Hour must be between 0 and 23\")\n self.hour = hour\n if not 0 <= minute <= 59:\n raise ValueError(\"Minute must be between 0 and 59\")\n self.minute = minute\n if not 0 <= second <= 59:\n raise ValueError(\"Second must be between 0 and 59\")\n self.second = second\n\n def date(self) -> date:\n return date(self.year, self.month, self.day)\n\n @staticmethod\n def now():\n t = localtime()\n tm_sec = t.tm_sec\n if tm_sec == 60:\n tm_sec = 59\n return datetime(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, tm_sec)\n\n def __str__(self):\n return f\"{self.year}-{self.month:02}-{self.day:02} {self.hour:02}:{self.minute:02}:{self.second:02}\"\n\n def __repr__(self):\n return f\"datetime.datetime({self.year}, {self.month}, {self.day}, {self.hour}, {self.minute}, {self.second})\"\n\n def __eq__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) ==\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __lt__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) <\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __le__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) <=\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __gt__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) >\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __ge__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) >=\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def timestamp(self) -> float:\n raise NotImplementedError\n\n"; +const char kPythonLibs_functools[] = "from __builtins import next\n\nclass cache:\n def __init__(self, f):\n self.f = f\n self.cache = {}\n\n def __call__(self, *args):\n if args not in self.cache:\n self.cache[args] = self.f(*args)\n return self.cache[args]\n \ndef reduce(function, sequence, initial=...):\n it = iter(sequence)\n if initial is ...:\n value = next(it)\n if value is StopIteration:\n raise TypeError(\"reduce() of empty iterable with no initial value\")\n else:\n value = initial\n for element in it:\n value = function(value, element)\n return value\n\nclass partial:\n def __init__(self, f, *args, **kwargs):\n self.f = f\n if not callable(f):\n raise TypeError(\"the first argument must be callable\")\n self.args = args\n self.kwargs = kwargs\n\n def __call__(self, *args, **kwargs):\n kwargs.update(self.kwargs)\n return self.f(*self.args, *args, **kwargs)\n\n"; +const char kPythonLibs_heapq[] = "# Heap queue algorithm (a.k.a. priority queue)\ndef heappush(heap, item):\n \"\"\"Push item onto heap, maintaining the heap invariant.\"\"\"\n heap.append(item)\n _siftdown(heap, 0, len(heap)-1)\n\ndef heappop(heap):\n \"\"\"Pop the smallest item off the heap, maintaining the heap invariant.\"\"\"\n lastelt = heap.pop() # raises appropriate IndexError if heap is empty\n if heap:\n returnitem = heap[0]\n heap[0] = lastelt\n _siftup(heap, 0)\n return returnitem\n return lastelt\n\ndef heapreplace(heap, item):\n \"\"\"Pop and return the current smallest value, and add the new item.\n\n This is more efficient than heappop() followed by heappush(), and can be\n more appropriate when using a fixed-size heap. Note that the value\n returned may be larger than item! That constrains reasonable uses of\n this routine unless written as part of a conditional replacement:\n\n if item > heap[0]:\n item = heapreplace(heap, item)\n \"\"\"\n returnitem = heap[0] # raises appropriate IndexError if heap is empty\n heap[0] = item\n _siftup(heap, 0)\n return returnitem\n\ndef heappushpop(heap, item):\n \"\"\"Fast version of a heappush followed by a heappop.\"\"\"\n if heap and heap[0] < item:\n item, heap[0] = heap[0], item\n _siftup(heap, 0)\n return item\n\ndef heapify(x):\n \"\"\"Transform list into a heap, in-place, in O(len(x)) time.\"\"\"\n n = len(x)\n # Transform bottom-up. The largest index there's any point to looking at\n # is the largest with a child index in-range, so must have 2*i + 1 < n,\n # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so\n # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is\n # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.\n for i in reversed(range(n//2)):\n _siftup(x, i)\n\n# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos\n# is the index of a leaf with a possibly out-of-order value. Restore the\n# heap invariant.\ndef _siftdown(heap, startpos, pos):\n newitem = heap[pos]\n # Follow the path to the root, moving parents down until finding a place\n # newitem fits.\n while pos > startpos:\n parentpos = (pos - 1) >> 1\n parent = heap[parentpos]\n if newitem < parent:\n heap[pos] = parent\n pos = parentpos\n continue\n break\n heap[pos] = newitem\n\ndef _siftup(heap, pos):\n endpos = len(heap)\n startpos = pos\n newitem = heap[pos]\n # Bubble up the smaller child until hitting a leaf.\n childpos = 2*pos + 1 # leftmost child position\n while childpos < endpos:\n # Set childpos to index of smaller child.\n rightpos = childpos + 1\n if rightpos < endpos and not heap[childpos] < heap[rightpos]:\n childpos = rightpos\n # Move the smaller child up.\n heap[pos] = heap[childpos]\n pos = childpos\n childpos = 2*pos + 1\n # The leaf at pos is empty now. Put newitem there, and bubble it up\n # to its final resting place (by sifting its parents down).\n heap[pos] = newitem\n _siftdown(heap, startpos, pos)"; +const char kPythonLibs_itertools[] = "from __builtins import next\n\ndef zip_longest(a, b):\n a = iter(a)\n b = iter(b)\n while True:\n ai = next(a)\n bi = next(b)\n if ai is StopIteration and bi is StopIteration:\n break\n if ai is StopIteration:\n ai = None\n if bi is StopIteration:\n bi = None\n yield ai, bi\n"; +const char kPythonLibs_operator[] = "# https://docs.python.org/3/library/operator.html#mapping-operators-to-functions\n\ndef le(a, b): return a <= b\ndef lt(a, b): return a < b\ndef ge(a, b): return a >= b\ndef gt(a, b): return a > b\ndef eq(a, b): return a == b\ndef ne(a, b): return a != b\n\ndef and_(a, b): return a & b\ndef or_(a, b): return a | b\ndef xor(a, b): return a ^ b\ndef invert(a): return ~a\ndef lshift(a, b): return a << b\ndef rshift(a, b): return a >> b\n\ndef is_(a, b): return a is b\ndef is_not(a, b): return a is not b\ndef not_(a): return not a\ndef truth(a): return bool(a)\ndef contains(a, b): return b in a\n\ndef add(a, b): return a + b\ndef sub(a, b): return a - b\ndef mul(a, b): return a * b\ndef truediv(a, b): return a / b\ndef floordiv(a, b): return a // b\ndef mod(a, b): return a % b\ndef pow(a, b): return a ** b\ndef neg(a): return -a\ndef matmul(a, b): return a @ b\n\ndef getitem(a, b): return a[b]\ndef setitem(a, b, c): a[b] = c\ndef delitem(a, b): del a[b]\n\ndef iadd(a, b): a += b; return a\ndef isub(a, b): a -= b; return a\ndef imul(a, b): a *= b; return a\ndef itruediv(a, b): a /= b; return a\ndef ifloordiv(a, b): a //= b; return a\ndef imod(a, b): a %= b; return a\n# def ipow(a, b): a **= b; return a\n# def imatmul(a, b): a @= b; return a\ndef iand(a, b): a &= b; return a\ndef ior(a, b): a |= b; return a\ndef ixor(a, b): a ^= b; return a\ndef ilshift(a, b): a <<= b; return a\ndef irshift(a, b): a >>= b; return a\n"; +const char kPythonLibs_pickle[] = "import json\nfrom c import struct\nimport builtins\n\n_BASIC_TYPES = [int, float, str, bool, type(None)]\n_MOD_T_SEP = \"@\"\n\ndef _find_class(path: str):\n if _MOD_T_SEP not in path:\n return builtins.__dict__[path]\n modpath, name = path.split(_MOD_T_SEP)\n return __import__(modpath).__dict__[name]\n\nclass _Pickler:\n def __init__(self, obj) -> None:\n self.obj = obj\n self.raw_memo = {} # id -> int\n self.memo = [] # int -> object\n\n @staticmethod\n def _type_id(t: type):\n assert type(t) is type\n name = t.__name__\n mod = t.__module__\n if mod is not None:\n name = mod.__path__ + _MOD_T_SEP + name\n return name\n\n def wrap(self, o):\n o_t = type(o)\n if o_t in _BASIC_TYPES:\n return o\n if o_t is type:\n return [\"type\", self._type_id(o)]\n\n index = self.raw_memo.get(id(o), None)\n if index is not None:\n return [index]\n \n ret = []\n index = len(self.memo)\n self.memo.append(ret)\n self.raw_memo[id(o)] = index\n\n if o_t is tuple:\n ret.append(\"tuple\")\n ret.append([self.wrap(i) for i in o])\n return [index]\n if o_t is bytes:\n ret.append(\"bytes\")\n ret.append([o[j] for j in range(len(o))])\n return [index]\n if o_t is list:\n ret.append(\"list\")\n ret.append([self.wrap(i) for i in o])\n return [index]\n if o_t is dict:\n ret.append(\"dict\")\n ret.append([[self.wrap(k), self.wrap(v)] for k,v in o.items()])\n return [index]\n \n _0 = self._type_id(o_t)\n\n if getattr(o_t, '__struct__', False):\n ret.append(_0)\n ret.append(o.tostruct().hex())\n return [index]\n\n if hasattr(o, \"__getnewargs__\"):\n _1 = o.__getnewargs__() # an iterable\n _1 = [self.wrap(i) for i in _1]\n else:\n _1 = None\n\n if o.__dict__ is None:\n _2 = None\n else:\n _2 = {k: self.wrap(v) for k,v in o.__dict__.items()}\n\n ret.append(_0) # type id\n ret.append(_1) # newargs\n ret.append(_2) # state\n return [index]\n \n def run_pipe(self):\n o = self.wrap(self.obj)\n return [o, self.memo]\n\n\n\nclass _Unpickler:\n def __init__(self, obj, memo: list) -> None:\n self.obj = obj\n self.memo = memo\n self._unwrapped = [None] * len(memo)\n\n def tag(self, index, o):\n assert self._unwrapped[index] is None\n self._unwrapped[index] = o\n\n def unwrap(self, o, index=None):\n if type(o) in _BASIC_TYPES:\n return o\n assert type(o) is list\n\n if o[0] == \"type\":\n return _find_class(o[1])\n\n # reference\n if type(o[0]) is int:\n assert index is None # index should be None\n index = o[0]\n if self._unwrapped[index] is None:\n o = self.memo[index]\n assert type(o) is list\n assert type(o[0]) is str\n self.unwrap(o, index)\n assert self._unwrapped[index] is not None\n return self._unwrapped[index]\n \n # concrete reference type\n if o[0] == \"tuple\":\n ret = tuple([self.unwrap(i) for i in o[1]])\n self.tag(index, ret)\n return ret\n if o[0] == \"bytes\":\n ret = bytes(o[1])\n self.tag(index, ret)\n return ret\n if o[0] == \"list\":\n ret = []\n self.tag(index, ret)\n for i in o[1]:\n ret.append(self.unwrap(i))\n return ret\n if o[0] == \"dict\":\n ret = {}\n self.tag(index, ret)\n for k,v in o[1]:\n ret[self.unwrap(k)] = self.unwrap(v)\n return ret\n \n # generic object\n cls = _find_class(o[0])\n if getattr(cls, '__struct__', False):\n inst = cls.fromstruct(struct.fromhex(o[1]))\n self.tag(index, inst)\n return inst\n else:\n _, newargs, state = o\n # create uninitialized instance\n new_f = getattr(cls, \"__new__\")\n if newargs is not None:\n newargs = [self.unwrap(i) for i in newargs]\n inst = new_f(cls, *newargs)\n else:\n inst = new_f(cls)\n self.tag(index, inst)\n # restore state\n if state is not None:\n for k,v in state.items():\n setattr(inst, k, self.unwrap(v))\n return inst\n\n def run_pipe(self):\n return self.unwrap(self.obj)\n\n\ndef _wrap(o):\n return _Pickler(o).run_pipe()\n\ndef _unwrap(packed: list):\n return _Unpickler(*packed).run_pipe()\n\ndef dumps(o) -> bytes:\n o = _wrap(o)\n return json.dumps(o).encode()\n\ndef loads(b) -> object:\n assert type(b) is bytes\n o = json.loads(b.decode())\n return _unwrap(o)"; +const char kPythonLibs_this[] = "print(\"\"\"The Zen of Python, by Tim Peters\n\nBeautiful is better than ugly.\nExplicit is better than implicit.\nSimple is better than complex.\nComplex is better than complicated.\nFlat is better than nested.\nSparse is better than dense.\nReadability counts.\nSpecial cases aren't special enough to break the rules.\nAlthough practicality beats purity.\nErrors should never pass silently.\nUnless explicitly silenced.\nIn the face of ambiguity, refuse the temptation to guess.\nThere should be one-- and preferably only one --obvious way to do it.\nAlthough that way may not be obvious at first unless you're Dutch.\nNow is better than never.\nAlthough never is often better than *right* now.\nIf the implementation is hard to explain, it's a bad idea.\nIf the implementation is easy to explain, it may be a good idea.\nNamespaces are one honking great idea -- let's do more of those!\"\"\")"; +const char kPythonLibs_typing[] = "class _Placeholder:\n def __init__(self, *args, **kwargs):\n pass\n def __getitem__(self, *args):\n return self\n def __call__(self, *args, **kwargs):\n return self\n def __and__(self, other):\n return self\n def __or__(self, other):\n return self\n def __xor__(self, other):\n return self\n\n\n_PLACEHOLDER = _Placeholder()\n\nList = _PLACEHOLDER\nDict = _PLACEHOLDER\nTuple = _PLACEHOLDER\nSet = _PLACEHOLDER\nAny = _PLACEHOLDER\nUnion = _PLACEHOLDER\nOptional = _PLACEHOLDER\nCallable = _PLACEHOLDER\nType = _PLACEHOLDER\nProtocol = _PLACEHOLDER\n\nLiteral = _PLACEHOLDER\nLiteralString = _PLACEHOLDER\n\nIterable = _PLACEHOLDER\nGenerator = _PLACEHOLDER\n\nHashable = _PLACEHOLDER\n\nTypeVar = _PLACEHOLDER\nSelf = _PLACEHOLDER\n\nclass Generic:\n pass\n\nTYPE_CHECKING = False\n\n# decorators\noverload = lambda x: x\nfinal = lambda x: x\n"; diff --git a/src/common/_generated.cpp b/src/common/_generated.cpp deleted file mode 100644 index 69a232fb..00000000 --- a/src/common/_generated.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// generated by prebuild.py -#include "pocketpy/common/_generated.hpp" - -namespace pkpy{ - const char kPythonLibs__enum[] = "class Enum:\n def __init__(self, name, value):\n self.name = name\n self.value = value\n\n def __str__(self):\n return f'{type(self).__name__}.{self.name}'\n \n def __repr__(self):\n return f'<{str(self)}: {self.value!r}>'\n \n"; - const char kPythonLibs__long[] = "# after v1.2.2, int is always 64-bit\nPyLong_SHIFT = 60//2 - 1\n\nPyLong_BASE = 2 ** PyLong_SHIFT\nPyLong_MASK = PyLong_BASE - 1\nPyLong_DECIMAL_SHIFT = 4\nPyLong_DECIMAL_BASE = 10 ** PyLong_DECIMAL_SHIFT\n\n##############################################################\n\ndef ulong_fromint(x: int):\n # return a list of digits and sign\n if x == 0: return [0], 1\n sign = 1 if x > 0 else -1\n if sign < 0: x = -x\n res = []\n while x:\n res.append(x & PyLong_MASK)\n x >>= PyLong_SHIFT\n return res, sign\n\ndef ulong_cmp(a: list, b: list) -> int:\n # return 1 if a>b, -1 if a len(b): return 1\n if len(a) < len(b): return -1\n for i in range(len(a)-1, -1, -1):\n if a[i] > b[i]: return 1\n if a[i] < b[i]: return -1\n return 0\n\ndef ulong_pad_(a: list, size: int):\n # pad leading zeros to have `size` digits\n delta = size - len(a)\n if delta > 0:\n a.extend([0] * delta)\n\ndef ulong_unpad_(a: list):\n # remove leading zeros\n while len(a)>1 and a[-1]==0:\n a.pop()\n\ndef ulong_add(a: list, b: list) -> list:\n res = [0] * max(len(a), len(b))\n ulong_pad_(a, len(res))\n ulong_pad_(b, len(res))\n carry = 0\n for i in range(len(res)):\n carry += a[i] + b[i]\n res[i] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n if carry > 0:\n res.append(carry)\n return res\n\ndef ulong_inc_(a: list):\n a[0] += 1\n for i in range(len(a)):\n if a[i] < PyLong_BASE: break\n a[i] -= PyLong_BASE\n if i+1 == len(a):\n a.append(1)\n else:\n a[i+1] += 1\n \n\ndef ulong_sub(a: list, b: list) -> list:\n # a >= b\n res = []\n borrow = 0\n for i in range(len(b)):\n tmp = a[i] - b[i] - borrow\n if tmp < 0:\n tmp += PyLong_BASE\n borrow = 1\n else:\n borrow = 0\n res.append(tmp)\n for i in range(len(b), len(a)):\n tmp = a[i] - borrow\n if tmp < 0:\n tmp += PyLong_BASE\n borrow = 1\n else:\n borrow = 0\n res.append(tmp)\n ulong_unpad_(res)\n return res\n\ndef ulong_divmodi(a: list, b: int):\n # b > 0\n res = []\n carry = 0\n for i in range(len(a)-1, -1, -1):\n carry <<= PyLong_SHIFT\n carry += a[i]\n res.append(carry // b)\n carry %= b\n res.reverse()\n ulong_unpad_(res)\n return res, carry\n\n\ndef ulong_divmod(a: list, b: list):\n\n if ulong_cmp(a, b) < 0:\n return [0], a\n\n if len(b) == 1:\n q, r = ulong_divmodi(a, b[0])\n r, _ = ulong_fromint(r)\n return q, r\n\n max = (len(a) - len(b)) * PyLong_SHIFT + \x5c\n (a[-1].bit_length() - b[-1].bit_length())\n\n low = [0]\n\n high = (max // PyLong_SHIFT) * [0] + \x5c\n [(2**(max % PyLong_SHIFT)) & PyLong_MASK]\n\n while ulong_cmp(low, high) < 0:\n ulong_inc_(high)\n mid, r = ulong_divmodi(ulong_add(low, high), 2)\n if ulong_cmp(a, ulong_mul(b, mid)) >= 0:\n low = mid\n else:\n high = ulong_sub(mid, [1])\n\n q = [0] * (len(a) - len(b) + 1)\n while ulong_cmp(a, ulong_mul(b, low)) >= 0:\n q = ulong_add(q, low)\n a = ulong_sub(a, ulong_mul(b, low))\n ulong_unpad_(q)\n return q, a\n\ndef ulong_floordivi(a: list, b: int):\n # b > 0\n return ulong_divmodi(a, b)[0]\n\ndef ulong_muli(a: list, b: int):\n # b >= 0\n res = [0] * len(a)\n carry = 0\n for i in range(len(a)):\n carry += a[i] * b\n res[i] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n if carry > 0:\n res.append(carry)\n return res\n\ndef ulong_mul(a: list, b: list):\n N = len(a) + len(b)\n # use grade-school multiplication\n res = [0] * N\n for i in range(len(a)):\n carry = 0\n for j in range(len(b)):\n carry += res[i+j] + a[i] * b[j]\n res[i+j] = carry & PyLong_MASK\n carry >>= PyLong_SHIFT\n res[i+len(b)] = carry\n ulong_unpad_(res)\n return res\n\ndef ulong_powi(a: list, b: int):\n # b >= 0\n if b == 0: return [1]\n res = [1]\n while b:\n if b & 1:\n res = ulong_mul(res, a)\n a = ulong_mul(a, a)\n b >>= 1\n return res\n\ndef ulong_repr(x: list) -> str:\n res = []\n while len(x)>1 or x[0]>0: # non-zero\n x, r = ulong_divmodi(x, PyLong_DECIMAL_BASE)\n res.append(str(r).zfill(PyLong_DECIMAL_SHIFT))\n res.reverse()\n s = ''.join(res)\n if len(s) == 0: return '0'\n if len(s) > 1: s = s.lstrip('0')\n return s\n\ndef ulong_fromstr(s: str):\n if s[-1] == 'L':\n s = s[:-1]\n res, base = [0], [1]\n if s[0] == '-':\n sign = -1\n s = s[1:]\n else:\n sign = 1\n s = s[::-1]\n for c in s:\n c = ord(c) - 48\n assert 0 <= c <= 9\n res = ulong_add(res, ulong_muli(base, c))\n base = ulong_muli(base, 10)\n return res, sign\n\nclass long:\n def __init__(self, x):\n if type(x) is tuple:\n self.digits, self.sign = x\n elif type(x) is int:\n self.digits, self.sign = ulong_fromint(x)\n elif type(x) is float:\n self.digits, self.sign = ulong_fromint(int(x))\n elif type(x) is str:\n self.digits, self.sign = ulong_fromstr(x)\n elif type(x) is long:\n self.digits, self.sign = x.digits.copy(), x.sign\n else:\n raise TypeError('expected int or str')\n \n def __len__(self):\n return len(self.digits)\n\n def __add__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign == other.sign:\n return long((ulong_add(self.digits, other.digits), self.sign))\n else:\n cmp = ulong_cmp(self.digits, other.digits)\n if cmp == 0:\n return long(0)\n if cmp > 0:\n return long((ulong_sub(self.digits, other.digits), self.sign))\n else:\n return long((ulong_sub(other.digits, self.digits), other.sign))\n \n def __radd__(self, other):\n return self.__add__(other)\n \n def __sub__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign != other.sign:\n return long((ulong_add(self.digits, other.digits), self.sign))\n cmp = ulong_cmp(self.digits, other.digits)\n if cmp == 0:\n return long(0)\n if cmp > 0:\n return long((ulong_sub(self.digits, other.digits), self.sign))\n else:\n return long((ulong_sub(other.digits, self.digits), -other.sign))\n \n def __rsub__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n return other.__sub__(self)\n \n def __mul__(self, other):\n if type(other) is int:\n return long((\n ulong_muli(self.digits, abs(other)),\n self.sign * (1 if other >= 0 else -1)\n ))\n elif type(other) is long:\n return long((\n ulong_mul(self.digits, other.digits),\n self.sign * other.sign\n ))\n return NotImplemented\n \n def __rmul__(self, other):\n return self.__mul__(other)\n \n #######################################################\n def __divmod__(self, other):\n if type(other) is int:\n assert self.sign == 1 and other > 0\n q, r = ulong_divmodi(self.digits, other)\n return long((q, 1)), r\n if type(other) is long:\n assert self.sign == 1 and other.sign == 1\n q, r = ulong_divmod(self.digits, other.digits)\n assert len(other)>1 or other.digits[0]>0\n return long((q, 1)), long((r, 1))\n raise NotImplementedError\n\n def __floordiv__(self, other):\n return self.__divmod__(other)[0]\n\n def __mod__(self, other):\n return self.__divmod__(other)[1]\n\n def __pow__(self, other: int):\n assert type(other) is int and other >= 0\n if self.sign == -1 and other & 1:\n sign = -1\n else:\n sign = 1\n return long((ulong_powi(self.digits, other), sign))\n \n def __lshift__(self, other: int):\n assert type(other) is int and other >= 0\n x = self.digits.copy()\n q, r = divmod(other, PyLong_SHIFT)\n x = [0]*q + x\n for _ in range(r): x = ulong_muli(x, 2)\n return long((x, self.sign))\n \n def __rshift__(self, other: int):\n assert type(other) is int and other >= 0\n x = self.digits.copy()\n q, r = divmod(other, PyLong_SHIFT)\n x = x[q:]\n if not x: return long(0)\n for _ in range(r): x = ulong_floordivi(x, 2)\n return long((x, self.sign))\n \n def __neg__(self):\n return long((self.digits, -self.sign))\n \n def __cmp__(self, other):\n if type(other) is int:\n other = long(other)\n elif type(other) is not long:\n return NotImplemented\n if self.sign > other.sign:\n return 1\n elif self.sign < other.sign:\n return -1\n else:\n return ulong_cmp(self.digits, other.digits)\n \n def __eq__(self, other):\n return self.__cmp__(other) == 0\n def __lt__(self, other):\n return self.__cmp__(other) < 0\n def __le__(self, other):\n return self.__cmp__(other) <= 0\n def __gt__(self, other):\n return self.__cmp__(other) > 0\n def __ge__(self, other):\n return self.__cmp__(other) >= 0\n \n def __repr__(self):\n prefix = '-' if self.sign < 0 else ''\n return prefix + ulong_repr(self.digits) + 'L'\n"; - const char kPythonLibs__set[] = "class set:\n def __init__(self, iterable=None):\n iterable = iterable or []\n self._a = {}\n self.update(iterable)\n\n def add(self, elem):\n self._a[elem] = None\n \n def discard(self, elem):\n self._a.pop(elem, None)\n\n def remove(self, elem):\n del self._a[elem]\n \n def clear(self):\n self._a.clear()\n\n def update(self, other):\n for elem in other:\n self.add(elem)\n\n def __len__(self):\n return len(self._a)\n \n def copy(self):\n return set(self._a.keys())\n \n def __and__(self, other):\n return {elem for elem in self if elem in other}\n\n def __sub__(self, other):\n return {elem for elem in self if elem not in other}\n \n def __or__(self, other):\n ret = self.copy()\n ret.update(other)\n return ret\n\n def __xor__(self, other): \n _0 = self - other\n _1 = other - self\n return _0 | _1\n\n def union(self, other):\n return self | other\n\n def intersection(self, other):\n return self & other\n\n def difference(self, other):\n return self - other\n\n def symmetric_difference(self, other): \n return self ^ other\n \n def __eq__(self, other):\n if not isinstance(other, set):\n return NotImplemented\n return len(self ^ other) == 0\n\n def isdisjoint(self, other):\n return len(self & other) == 0\n \n def issubset(self, other):\n return len(self - other) == 0\n \n def issuperset(self, other):\n return len(other - self) == 0\n\n def __contains__(self, elem):\n return elem in self._a\n \n def __repr__(self):\n if len(self) == 0:\n return 'set()'\n return '{'+ ', '.join([repr(i) for i in self._a.keys()]) + '}'\n \n def __iter__(self):\n return iter(self._a.keys())"; - const char kPythonLibs_bisect[] = "\"\"\"Bisection algorithms.\"\"\"\n\ndef insort_right(a, x, lo=0, hi=None):\n \"\"\"Insert item x in list a, and keep it sorted assuming a is sorted.\n\n If x is already in a, insert it to the right of the rightmost x.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n lo = bisect_right(a, x, lo, hi)\n a.insert(lo, x)\n\ndef bisect_right(a, x, lo=0, hi=None):\n \"\"\"Return the index where to insert item x in list a, assuming a is sorted.\n\n The return value i is such that all e in a[:i] have e <= x, and all e in\n a[i:] have e > x. So if x already appears in the list, a.insert(x) will\n insert just after the rightmost x already there.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n if lo < 0:\n raise ValueError('lo must be non-negative')\n if hi is None:\n hi = len(a)\n while lo < hi:\n mid = (lo+hi)//2\n if x < a[mid]: hi = mid\n else: lo = mid+1\n return lo\n\ndef insort_left(a, x, lo=0, hi=None):\n \"\"\"Insert item x in list a, and keep it sorted assuming a is sorted.\n\n If x is already in a, insert it to the left of the leftmost x.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n lo = bisect_left(a, x, lo, hi)\n a.insert(lo, x)\n\n\ndef bisect_left(a, x, lo=0, hi=None):\n \"\"\"Return the index where to insert item x in list a, assuming a is sorted.\n\n The return value i is such that all e in a[:i] have e < x, and all e in\n a[i:] have e >= x. So if x already appears in the list, a.insert(x) will\n insert just before the leftmost x already there.\n\n Optional args lo (default 0) and hi (default len(a)) bound the\n slice of a to be searched.\n \"\"\"\n\n if lo < 0:\n raise ValueError('lo must be non-negative')\n if hi is None:\n hi = len(a)\n while lo < hi:\n mid = (lo+hi)//2\n if a[mid] < x: lo = mid+1\n else: hi = mid\n return lo\n\n# Create aliases\nbisect = bisect_right\ninsort = insort_right\n"; - const char kPythonLibs_builtins[] = "from __builtins import next as __builtins_next\n\ndef all(iterable):\n for i in iterable:\n if not i:\n return False\n return True\n\ndef any(iterable):\n for i in iterable:\n if i:\n return True\n return False\n\ndef enumerate(iterable, start=0):\n n = start\n for elem in iterable:\n yield n, elem\n n += 1\n\ndef sum(iterable):\n res = 0\n for i in iterable:\n res += i\n return res\n\ndef map(f, iterable):\n for i in iterable:\n yield f(i)\n\ndef filter(f, iterable):\n for i in iterable:\n if f(i):\n yield i\n\ndef zip(a, b):\n a = iter(a)\n b = iter(b)\n while True:\n ai = __builtins_next(a)\n bi = __builtins_next(b)\n if ai is StopIteration or bi is StopIteration:\n break\n yield ai, bi\n\ndef reversed(iterable):\n a = list(iterable)\n a.reverse()\n return a\n\ndef sorted(iterable, key=None, reverse=False):\n a = list(iterable)\n a.sort(key=key, reverse=reverse)\n return a\n\n##### str #####\ndef __format_string(self: str, *args, **kwargs) -> str:\n def tokenizeString(s: str):\n tokens = []\n L, R = 0,0\n \n mode = None\n curArg = 0\n # lookingForKword = False\n \n while(R int:\n n = 0\n for item in self:\n if item == x:\n n += 1\n return n\n \n def extend(self, iterable: Iterable[T]):\n for x in iterable:\n self.append(x)\n\n def extendleft(self, iterable: Iterable[T]):\n for x in iterable:\n self.appendleft(x)\n \n def pop(self) -> T:\n if self._head == self._tail:\n raise IndexError(\"pop from an empty deque\")\n self._tail = (self._tail - 1 + self._capacity) % self._capacity\n return self._data[self._tail]\n \n def popleft(self) -> T:\n if self._head == self._tail:\n raise IndexError(\"pop from an empty deque\")\n x = self._data[self._head]\n self._head = (self._head + 1) % self._capacity\n return x\n \n def clear(self):\n i = self._head\n while i != self._tail:\n self._data[i] = None\n i = (i + 1) % self._capacity\n self._head = 0\n self._tail = 0\n\n def rotate(self, n: int = 1):\n if len(self) == 0:\n return\n if n > 0:\n n = n % len(self)\n for _ in range(n):\n self.appendleft(self.pop())\n elif n < 0:\n n = -n % len(self)\n for _ in range(n):\n self.append(self.popleft())\n\n def __len__(self) -> int:\n return (self._tail - self._head + self._capacity) % self._capacity\n\n def __contains__(self, x: object) -> bool:\n for item in self:\n if item == x:\n return True\n return False\n \n def __iter__(self):\n i = self._head\n while i != self._tail:\n yield self._data[i]\n i = (i + 1) % self._capacity\n\n def __eq__(self, other: object) -> bool:\n if not isinstance(other, deque):\n return False\n if len(self) != len(other):\n return False\n for x, y in zip(self, other):\n if x != y:\n return False\n return True\n \n def __repr__(self) -> str:\n return f\"deque({list(self)!r})\"\n\n"; - const char kPythonLibs_colorsys[] = "\"\"\"Conversion functions between RGB and other color systems.\n\nThis modules provides two functions for each color system ABC:\n\n rgb_to_abc(r, g, b) --> a, b, c\n abc_to_rgb(a, b, c) --> r, g, b\n\nAll inputs and outputs are triples of floats in the range [0.0...1.0]\n(with the exception of I and Q, which covers a slightly larger range).\nInputs outside the valid range may cause exceptions or invalid outputs.\n\nSupported color systems:\nRGB: Red, Green, Blue components\nYIQ: Luminance, Chrominance (used by composite video signals)\nHLS: Hue, Luminance, Saturation\nHSV: Hue, Saturation, Value\n\"\"\"\n\n# References:\n# http://en.wikipedia.org/wiki/YIQ\n# http://en.wikipedia.org/wiki/HLS_color_space\n# http://en.wikipedia.org/wiki/HSV_color_space\n\n__all__ = [\"rgb_to_yiq\",\"yiq_to_rgb\",\"rgb_to_hls\",\"hls_to_rgb\",\n \"rgb_to_hsv\",\"hsv_to_rgb\"]\n\n# Some floating point constants\n\nONE_THIRD = 1.0/3.0\nONE_SIXTH = 1.0/6.0\nTWO_THIRD = 2.0/3.0\n\n# YIQ: used by composite video signals (linear combinations of RGB)\n# Y: perceived grey level (0.0 == black, 1.0 == white)\n# I, Q: color components\n#\n# There are a great many versions of the constants used in these formulae.\n# The ones in this library uses constants from the FCC version of NTSC.\n\ndef rgb_to_yiq(r, g, b):\n y = 0.30*r + 0.59*g + 0.11*b\n i = 0.74*(r-y) - 0.27*(b-y)\n q = 0.48*(r-y) + 0.41*(b-y)\n return (y, i, q)\n\ndef yiq_to_rgb(y, i, q):\n # r = y + (0.27*q + 0.41*i) / (0.74*0.41 + 0.27*0.48)\n # b = y + (0.74*q - 0.48*i) / (0.74*0.41 + 0.27*0.48)\n # g = y - (0.30*(r-y) + 0.11*(b-y)) / 0.59\n\n r = y + 0.9468822170900693*i + 0.6235565819861433*q\n g = y - 0.27478764629897834*i - 0.6356910791873801*q\n b = y - 1.1085450346420322*i + 1.7090069284064666*q\n\n if r < 0.0:\n r = 0.0\n if g < 0.0:\n g = 0.0\n if b < 0.0:\n b = 0.0\n if r > 1.0:\n r = 1.0\n if g > 1.0:\n g = 1.0\n if b > 1.0:\n b = 1.0\n return (r, g, b)\n\n\n# HLS: Hue, Luminance, Saturation\n# H: position in the spectrum\n# L: color lightness\n# S: color saturation\n\ndef rgb_to_hls(r, g, b):\n maxc = max(r, g, b)\n minc = min(r, g, b)\n sumc = (maxc+minc)\n rangec = (maxc-minc)\n l = sumc/2.0\n if minc == maxc:\n return 0.0, l, 0.0\n if l <= 0.5:\n s = rangec / sumc\n else:\n s = rangec / (2.0-maxc-minc) # Not always 2.0-sumc: gh-106498.\n rc = (maxc-r) / rangec\n gc = (maxc-g) / rangec\n bc = (maxc-b) / rangec\n if r == maxc:\n h = bc-gc\n elif g == maxc:\n h = 2.0+rc-bc\n else:\n h = 4.0+gc-rc\n # h = (h/6.0) % 1.0\n h = h / 6.0\n h = h - int(h)\n return h, l, s\n\ndef hls_to_rgb(h, l, s):\n if s == 0.0:\n return l, l, l\n if l <= 0.5:\n m2 = l * (1.0+s)\n else:\n m2 = l+s-(l*s)\n m1 = 2.0*l - m2\n return (_v(m1, m2, h+ONE_THIRD), _v(m1, m2, h), _v(m1, m2, h-ONE_THIRD))\n\ndef _v(m1, m2, hue):\n # hue = hue % 1.0\n hue = hue - int(hue)\n if hue < ONE_SIXTH:\n return m1 + (m2-m1)*hue*6.0\n if hue < 0.5:\n return m2\n if hue < TWO_THIRD:\n return m1 + (m2-m1)*(TWO_THIRD-hue)*6.0\n return m1\n\n\n# HSV: Hue, Saturation, Value\n# H: position in the spectrum\n# S: color saturation (\"purity\")\n# V: color brightness\n\ndef rgb_to_hsv(r, g, b):\n maxc = max(r, g, b)\n minc = min(r, g, b)\n rangec = (maxc-minc)\n v = maxc\n if minc == maxc:\n return 0.0, 0.0, v\n s = rangec / maxc\n rc = (maxc-r) / rangec\n gc = (maxc-g) / rangec\n bc = (maxc-b) / rangec\n if r == maxc:\n h = bc-gc\n elif g == maxc:\n h = 2.0+rc-bc\n else:\n h = 4.0+gc-rc\n # h = (h/6.0) % 1.0\n h = h / 6.0\n h = h - int(h)\n return h, s, v\n\ndef hsv_to_rgb(h, s, v):\n if s == 0.0:\n return v, v, v\n i = int(h*6.0) # XXX assume int() truncates!\n f = (h*6.0) - i\n p = v*(1.0 - s)\n q = v*(1.0 - s*f)\n t = v*(1.0 - s*(1.0-f))\n i = i%6\n if i == 0:\n return v, t, p\n if i == 1:\n return q, v, p\n if i == 2:\n return p, v, t\n if i == 3:\n return p, q, v\n if i == 4:\n return t, p, v\n if i == 5:\n return v, p, q\n # Cannot get here"; - const char kPythonLibs_datetime[] = "from time import localtime\n\nclass timedelta:\n def __init__(self, days=0, seconds=0):\n self.days = days\n self.seconds = seconds\n\n def __repr__(self):\n return f\"datetime.timedelta(days={self.days}, seconds={self.seconds})\"\n\n def __eq__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) == (other.days, other.seconds)\n\n def __lt__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) < (other.days, other.seconds)\n\n def __le__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) <= (other.days, other.seconds)\n\n def __gt__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) > (other.days, other.seconds)\n\n def __ge__(self, other: 'timedelta') -> bool:\n if type(other) is not timedelta:\n return NotImplemented\n return (self.days, self.seconds) >= (other.days, other.seconds)\n\n\nclass date:\n def __init__(self, year: int, month: int, day: int):\n self.year = year\n self.month = month\n self.day = day\n\n @staticmethod\n def today():\n t = localtime()\n return date(t.tm_year, t.tm_mon, t.tm_mday)\n\n def __eq__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) == (other.year, other.month, other.day)\n\n def __lt__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) < (other.year, other.month, other.day)\n\n def __le__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) <= (other.year, other.month, other.day)\n\n def __gt__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) > (other.year, other.month, other.day)\n\n def __ge__(self, other: 'date') -> bool:\n if type(other) is not date:\n return NotImplemented\n return (self.year, self.month, self.day) >= (other.year, other.month, other.day)\n\n def __str__(self):\n return f\"{self.year}-{self.month:02}-{self.day:02}\"\n\n def __repr__(self):\n return f\"datetime.date({self.year}, {self.month}, {self.day})\"\n\n\nclass datetime(date):\n def __init__(self, year: int, month: int, day: int, hour: int, minute: int, second: int):\n super().__init__(year, month, day)\n # Validate and set hour, minute, and second\n if not 0 <= hour <= 23:\n raise ValueError(\"Hour must be between 0 and 23\")\n self.hour = hour\n if not 0 <= minute <= 59:\n raise ValueError(\"Minute must be between 0 and 59\")\n self.minute = minute\n if not 0 <= second <= 59:\n raise ValueError(\"Second must be between 0 and 59\")\n self.second = second\n\n def date(self) -> date:\n return date(self.year, self.month, self.day)\n\n @staticmethod\n def now():\n t = localtime()\n tm_sec = t.tm_sec\n if tm_sec == 60:\n tm_sec = 59\n return datetime(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, tm_sec)\n\n def __str__(self):\n return f\"{self.year}-{self.month:02}-{self.day:02} {self.hour:02}:{self.minute:02}:{self.second:02}\"\n\n def __repr__(self):\n return f\"datetime.datetime({self.year}, {self.month}, {self.day}, {self.hour}, {self.minute}, {self.second})\"\n\n def __eq__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) ==\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __lt__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) <\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __le__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) <=\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __gt__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) >\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def __ge__(self, other) -> bool:\n if type(other) is not datetime:\n return NotImplemented\n return (self.year, self.month, self.day, self.hour, self.minute, self.second) >=\x5c\n (other.year, other.month, other.day,\n other.hour, other.minute, other.second)\n\n def timestamp(self) -> float:\n raise NotImplementedError\n\n"; - const char kPythonLibs_functools[] = "from __builtins import next\n\nclass cache:\n def __init__(self, f):\n self.f = f\n self.cache = {}\n\n def __call__(self, *args):\n if args not in self.cache:\n self.cache[args] = self.f(*args)\n return self.cache[args]\n \ndef reduce(function, sequence, initial=...):\n it = iter(sequence)\n if initial is ...:\n value = next(it)\n if value is StopIteration:\n raise TypeError(\"reduce() of empty iterable with no initial value\")\n else:\n value = initial\n for element in it:\n value = function(value, element)\n return value\n\nclass partial:\n def __init__(self, f, *args, **kwargs):\n self.f = f\n if not callable(f):\n raise TypeError(\"the first argument must be callable\")\n self.args = args\n self.kwargs = kwargs\n\n def __call__(self, *args, **kwargs):\n kwargs.update(self.kwargs)\n return self.f(*self.args, *args, **kwargs)\n\n"; - const char kPythonLibs_heapq[] = "# Heap queue algorithm (a.k.a. priority queue)\ndef heappush(heap, item):\n \"\"\"Push item onto heap, maintaining the heap invariant.\"\"\"\n heap.append(item)\n _siftdown(heap, 0, len(heap)-1)\n\ndef heappop(heap):\n \"\"\"Pop the smallest item off the heap, maintaining the heap invariant.\"\"\"\n lastelt = heap.pop() # raises appropriate IndexError if heap is empty\n if heap:\n returnitem = heap[0]\n heap[0] = lastelt\n _siftup(heap, 0)\n return returnitem\n return lastelt\n\ndef heapreplace(heap, item):\n \"\"\"Pop and return the current smallest value, and add the new item.\n\n This is more efficient than heappop() followed by heappush(), and can be\n more appropriate when using a fixed-size heap. Note that the value\n returned may be larger than item! That constrains reasonable uses of\n this routine unless written as part of a conditional replacement:\n\n if item > heap[0]:\n item = heapreplace(heap, item)\n \"\"\"\n returnitem = heap[0] # raises appropriate IndexError if heap is empty\n heap[0] = item\n _siftup(heap, 0)\n return returnitem\n\ndef heappushpop(heap, item):\n \"\"\"Fast version of a heappush followed by a heappop.\"\"\"\n if heap and heap[0] < item:\n item, heap[0] = heap[0], item\n _siftup(heap, 0)\n return item\n\ndef heapify(x):\n \"\"\"Transform list into a heap, in-place, in O(len(x)) time.\"\"\"\n n = len(x)\n # Transform bottom-up. The largest index there's any point to looking at\n # is the largest with a child index in-range, so must have 2*i + 1 < n,\n # or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so\n # j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is\n # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.\n for i in reversed(range(n//2)):\n _siftup(x, i)\n\n# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos\n# is the index of a leaf with a possibly out-of-order value. Restore the\n# heap invariant.\ndef _siftdown(heap, startpos, pos):\n newitem = heap[pos]\n # Follow the path to the root, moving parents down until finding a place\n # newitem fits.\n while pos > startpos:\n parentpos = (pos - 1) >> 1\n parent = heap[parentpos]\n if newitem < parent:\n heap[pos] = parent\n pos = parentpos\n continue\n break\n heap[pos] = newitem\n\ndef _siftup(heap, pos):\n endpos = len(heap)\n startpos = pos\n newitem = heap[pos]\n # Bubble up the smaller child until hitting a leaf.\n childpos = 2*pos + 1 # leftmost child position\n while childpos < endpos:\n # Set childpos to index of smaller child.\n rightpos = childpos + 1\n if rightpos < endpos and not heap[childpos] < heap[rightpos]:\n childpos = rightpos\n # Move the smaller child up.\n heap[pos] = heap[childpos]\n pos = childpos\n childpos = 2*pos + 1\n # The leaf at pos is empty now. Put newitem there, and bubble it up\n # to its final resting place (by sifting its parents down).\n heap[pos] = newitem\n _siftdown(heap, startpos, pos)"; - const char kPythonLibs_itertools[] = "from __builtins import next\n\ndef zip_longest(a, b):\n a = iter(a)\n b = iter(b)\n while True:\n ai = next(a)\n bi = next(b)\n if ai is StopIteration and bi is StopIteration:\n break\n if ai is StopIteration:\n ai = None\n if bi is StopIteration:\n bi = None\n yield ai, bi\n"; - const char kPythonLibs_operator[] = "# https://docs.python.org/3/library/operator.html#mapping-operators-to-functions\n\ndef le(a, b): return a <= b\ndef lt(a, b): return a < b\ndef ge(a, b): return a >= b\ndef gt(a, b): return a > b\ndef eq(a, b): return a == b\ndef ne(a, b): return a != b\n\ndef and_(a, b): return a & b\ndef or_(a, b): return a | b\ndef xor(a, b): return a ^ b\ndef invert(a): return ~a\ndef lshift(a, b): return a << b\ndef rshift(a, b): return a >> b\n\ndef is_(a, b): return a is b\ndef is_not(a, b): return a is not b\ndef not_(a): return not a\ndef truth(a): return bool(a)\ndef contains(a, b): return b in a\n\ndef add(a, b): return a + b\ndef sub(a, b): return a - b\ndef mul(a, b): return a * b\ndef truediv(a, b): return a / b\ndef floordiv(a, b): return a // b\ndef mod(a, b): return a % b\ndef pow(a, b): return a ** b\ndef neg(a): return -a\ndef matmul(a, b): return a @ b\n\ndef getitem(a, b): return a[b]\ndef setitem(a, b, c): a[b] = c\ndef delitem(a, b): del a[b]\n\ndef iadd(a, b): a += b; return a\ndef isub(a, b): a -= b; return a\ndef imul(a, b): a *= b; return a\ndef itruediv(a, b): a /= b; return a\ndef ifloordiv(a, b): a //= b; return a\ndef imod(a, b): a %= b; return a\n# def ipow(a, b): a **= b; return a\n# def imatmul(a, b): a @= b; return a\ndef iand(a, b): a &= b; return a\ndef ior(a, b): a |= b; return a\ndef ixor(a, b): a ^= b; return a\ndef ilshift(a, b): a <<= b; return a\ndef irshift(a, b): a >>= b; return a\n"; - const char kPythonLibs_pickle[] = "import json\nfrom c import struct\nimport builtins\n\n_BASIC_TYPES = [int, float, str, bool, type(None)]\n_MOD_T_SEP = \"@\"\n\ndef _find_class(path: str):\n if _MOD_T_SEP not in path:\n return builtins.__dict__[path]\n modpath, name = path.split(_MOD_T_SEP)\n return __import__(modpath).__dict__[name]\n\nclass _Pickler:\n def __init__(self, obj) -> None:\n self.obj = obj\n self.raw_memo = {} # id -> int\n self.memo = [] # int -> object\n\n @staticmethod\n def _type_id(t: type):\n assert type(t) is type\n name = t.__name__\n mod = t.__module__\n if mod is not None:\n name = mod.__path__ + _MOD_T_SEP + name\n return name\n\n def wrap(self, o):\n o_t = type(o)\n if o_t in _BASIC_TYPES:\n return o\n if o_t is type:\n return [\"type\", self._type_id(o)]\n\n index = self.raw_memo.get(id(o), None)\n if index is not None:\n return [index]\n \n ret = []\n index = len(self.memo)\n self.memo.append(ret)\n self.raw_memo[id(o)] = index\n\n if o_t is tuple:\n ret.append(\"tuple\")\n ret.append([self.wrap(i) for i in o])\n return [index]\n if o_t is bytes:\n ret.append(\"bytes\")\n ret.append([o[j] for j in range(len(o))])\n return [index]\n if o_t is list:\n ret.append(\"list\")\n ret.append([self.wrap(i) for i in o])\n return [index]\n if o_t is dict:\n ret.append(\"dict\")\n ret.append([[self.wrap(k), self.wrap(v)] for k,v in o.items()])\n return [index]\n \n _0 = self._type_id(o_t)\n\n if getattr(o_t, '__struct__', False):\n ret.append(_0)\n ret.append(o.tostruct().hex())\n return [index]\n\n if hasattr(o, \"__getnewargs__\"):\n _1 = o.__getnewargs__() # an iterable\n _1 = [self.wrap(i) for i in _1]\n else:\n _1 = None\n\n if o.__dict__ is None:\n _2 = None\n else:\n _2 = {k: self.wrap(v) for k,v in o.__dict__.items()}\n\n ret.append(_0) # type id\n ret.append(_1) # newargs\n ret.append(_2) # state\n return [index]\n \n def run_pipe(self):\n o = self.wrap(self.obj)\n return [o, self.memo]\n\n\n\nclass _Unpickler:\n def __init__(self, obj, memo: list) -> None:\n self.obj = obj\n self.memo = memo\n self._unwrapped = [None] * len(memo)\n\n def tag(self, index, o):\n assert self._unwrapped[index] is None\n self._unwrapped[index] = o\n\n def unwrap(self, o, index=None):\n if type(o) in _BASIC_TYPES:\n return o\n assert type(o) is list\n\n if o[0] == \"type\":\n return _find_class(o[1])\n\n # reference\n if type(o[0]) is int:\n assert index is None # index should be None\n index = o[0]\n if self._unwrapped[index] is None:\n o = self.memo[index]\n assert type(o) is list\n assert type(o[0]) is str\n self.unwrap(o, index)\n assert self._unwrapped[index] is not None\n return self._unwrapped[index]\n \n # concrete reference type\n if o[0] == \"tuple\":\n ret = tuple([self.unwrap(i) for i in o[1]])\n self.tag(index, ret)\n return ret\n if o[0] == \"bytes\":\n ret = bytes(o[1])\n self.tag(index, ret)\n return ret\n if o[0] == \"list\":\n ret = []\n self.tag(index, ret)\n for i in o[1]:\n ret.append(self.unwrap(i))\n return ret\n if o[0] == \"dict\":\n ret = {}\n self.tag(index, ret)\n for k,v in o[1]:\n ret[self.unwrap(k)] = self.unwrap(v)\n return ret\n \n # generic object\n cls = _find_class(o[0])\n if getattr(cls, '__struct__', False):\n inst = cls.fromstruct(struct.fromhex(o[1]))\n self.tag(index, inst)\n return inst\n else:\n _, newargs, state = o\n # create uninitialized instance\n new_f = getattr(cls, \"__new__\")\n if newargs is not None:\n newargs = [self.unwrap(i) for i in newargs]\n inst = new_f(cls, *newargs)\n else:\n inst = new_f(cls)\n self.tag(index, inst)\n # restore state\n if state is not None:\n for k,v in state.items():\n setattr(inst, k, self.unwrap(v))\n return inst\n\n def run_pipe(self):\n return self.unwrap(self.obj)\n\n\ndef _wrap(o):\n return _Pickler(o).run_pipe()\n\ndef _unwrap(packed: list):\n return _Unpickler(*packed).run_pipe()\n\ndef dumps(o) -> bytes:\n o = _wrap(o)\n return json.dumps(o).encode()\n\ndef loads(b) -> object:\n assert type(b) is bytes\n o = json.loads(b.decode())\n return _unwrap(o)"; - const char kPythonLibs_this[] = "print(\"\"\"The Zen of Python, by Tim Peters\n\nBeautiful is better than ugly.\nExplicit is better than implicit.\nSimple is better than complex.\nComplex is better than complicated.\nFlat is better than nested.\nSparse is better than dense.\nReadability counts.\nSpecial cases aren't special enough to break the rules.\nAlthough practicality beats purity.\nErrors should never pass silently.\nUnless explicitly silenced.\nIn the face of ambiguity, refuse the temptation to guess.\nThere should be one-- and preferably only one --obvious way to do it.\nAlthough that way may not be obvious at first unless you're Dutch.\nNow is better than never.\nAlthough never is often better than *right* now.\nIf the implementation is hard to explain, it's a bad idea.\nIf the implementation is easy to explain, it may be a good idea.\nNamespaces are one honking great idea -- let's do more of those!\"\"\")"; - const char kPythonLibs_typing[] = "class _Placeholder:\n def __init__(self, *args, **kwargs):\n pass\n def __getitem__(self, *args):\n return self\n def __call__(self, *args, **kwargs):\n return self\n def __and__(self, other):\n return self\n def __or__(self, other):\n return self\n def __xor__(self, other):\n return self\n\n\n_PLACEHOLDER = _Placeholder()\n\nList = _PLACEHOLDER\nDict = _PLACEHOLDER\nTuple = _PLACEHOLDER\nSet = _PLACEHOLDER\nAny = _PLACEHOLDER\nUnion = _PLACEHOLDER\nOptional = _PLACEHOLDER\nCallable = _PLACEHOLDER\nType = _PLACEHOLDER\nProtocol = _PLACEHOLDER\n\nLiteral = _PLACEHOLDER\nLiteralString = _PLACEHOLDER\n\nIterable = _PLACEHOLDER\nGenerator = _PLACEHOLDER\n\nHashable = _PLACEHOLDER\n\nTypeVar = _PLACEHOLDER\nSelf = _PLACEHOLDER\n\nclass Generic:\n pass\n\nTYPE_CHECKING = False\n\n# decorators\noverload = lambda x: x\nfinal = lambda x: x\n"; -} // namespace pkpy diff --git a/src/modules/modules.cpp b/src/modules/modules.cpp index c3cdca41..bd2a2388 100644 --- a/src/modules/modules.cpp +++ b/src/modules/modules.cpp @@ -3,7 +3,7 @@ #include "pocketpy/common/version.h" #include "pocketpy/common/export.h" -#include "pocketpy/common/_generated.hpp" +#include "pocketpy/common/_generated.h" #include #include diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index ba1456eb..6e1c4109 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -1,6 +1,6 @@ #include "pocketpy/pocketpy.hpp" -#include "pocketpy/common/_generated.hpp" +#include "pocketpy/common/_generated.h" #include "pocketpy/modules/array2d.hpp" #include "pocketpy/modules/base64.hpp" From 597abacc5820e97806ab8c67a7b4d67d2881a1bd Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 15 Jun 2024 14:20:21 +0800 Subject: [PATCH 60/60] some fix --- docs/bindings.md | 7 ------- include/pocketpy/common/config.h | 7 ------- include/pocketpy/objects/codeobject.hpp | 4 ---- 3 files changed, 18 deletions(-) diff --git a/docs/bindings.md b/docs/bindings.md index 257e659e..b1600bf6 100644 --- a/docs/bindings.md +++ b/docs/bindings.md @@ -92,13 +92,6 @@ vm->bind(obj, "f() -> int", [](VM* vm, ArgsView args){ }, x); // capture x ``` -The 3rd way is to change the macro `PK_ENABLE_STD_FUNCTION` in `config.h`: -```cpp -#define PK_ENABLE_STD_FUNCTION 0 // => 1 -``` - -Then you can use standard capture list in lambda. - ## Bind a class or struct Assume you have a struct `Point` declared as follows. diff --git a/include/pocketpy/common/config.h b/include/pocketpy/common/config.h index 547c4a50..598c65a2 100644 --- a/include/pocketpy/common/config.h +++ b/include/pocketpy/common/config.h @@ -24,13 +24,6 @@ #define PK_GC_MIN_THRESHOLD 16384 #endif -// Whether to use `pkpy::function<>` to do bindings or not -// By default, functions to be binded must be a C function pointer without capture -// However, someone thinks it's not convenient. -// By setting this to 1, capturing lambdas can be binded, -// but it's slower and may cause "code bloat", it also needs more time to compile. -#define PK_ENABLE_STD_FUNCTION 0 - /*************** debug settings ***************/ // Do not edit the following settings unless you know what you are doing #define PK_DEBUG_CEVAL_STEP 0 diff --git a/include/pocketpy/objects/codeobject.hpp b/include/pocketpy/objects/codeobject.hpp index d9afc2c9..7bf95080 100644 --- a/include/pocketpy/objects/codeobject.hpp +++ b/include/pocketpy/objects/codeobject.hpp @@ -7,11 +7,7 @@ namespace pkpy { -#if PK_ENABLE_STD_FUNCTION -using NativeFuncC = function; -#else typedef PyVar (*NativeFuncC)(VM*, ArgsView); -#endif enum class BindType { DEFAULT,