diff --git a/include/pocketpy/codeobject.h b/include/pocketpy/codeobject.h index 0b32860d..708f74af 100644 --- a/include/pocketpy/codeobject.h +++ b/include/pocketpy/codeobject.h @@ -65,15 +65,17 @@ struct CodeObject { std::shared_ptr src; Str name; - bool is_generator = false; + bool is_generator; std::vector codes; std::vector iblocks; // block index for each bytecode std::vector lines; - List consts; + + small_vector_no_copy_and_move consts; + pod_vector varnames; // local variables NameDictInt varnames_inv; - std::vector blocks = { CodeBlock(CodeBlockType::NO_BLOCK, -1, 0, 0) }; + std::vector blocks; NameDictInt labels; std::vector func_decls; @@ -95,8 +97,10 @@ struct FuncDecl { PyObject* value; // default value }; CodeObject_ code; // code object of this function - pod_vector args; // indices in co->varnames - pod_vector kwargs; // indices in co->varnames + + small_vector_no_copy_and_move args; // indices in co->varnames + small_vector_no_copy_and_move kwargs; // indices in co->varnames + int starred_arg = -1; // index in co->varnames, -1 if no *arg int starred_kwarg = -1; // index in co->varnames, -1 if no **kwarg bool nested = false; // whether this function is nested diff --git a/include/pocketpy/compiler.h b/include/pocketpy/compiler.h index 3faf07c5..42c3bc8b 100644 --- a/include/pocketpy/compiler.h +++ b/include/pocketpy/compiler.h @@ -22,7 +22,7 @@ class Compiler { inline static PrattRule rules[kTokenCount]; Lexer lexer; - stack contexts; + stack_no_copy contexts; VM* vm; bool unknown_global_scope; // for eval/exec() call bool used; @@ -62,9 +62,9 @@ class Compiler { Expr_ EXPR_VARS(); // special case for `for loop` and `comp` template - unique_ptr_64 make_expr(Args&&... args) { - void* p = pool64_alloc(sizeof(T)); - unique_ptr_64 expr(new (p) T(std::forward(args)...)); + unique_ptr_128 make_expr(Args&&... args) { + void* p = pool128_alloc(sizeof(T)); + unique_ptr_128 expr(new (p) T(std::forward(args)...)); expr->line = prev().line; return expr; } @@ -72,7 +72,7 @@ class Compiler { template void _consume_comp(Expr_ expr){ static_assert(std::is_base_of::value); - unique_ptr_64 ce = make_expr(); + unique_ptr_128 ce = make_expr(); ce->expr = std::move(expr); ce->vars = EXPR_VARS(); consume(TK("in")); @@ -124,10 +124,10 @@ class Compiler { bool try_compile_assignment(); void compile_stmt(); void consume_type_hints(); - void _add_decorators(const std::vector& decorators); - void compile_class(const std::vector& decorators={}); + void _add_decorators(const Expr_vector& decorators); + void compile_class(const Expr_vector& decorators={}); void _compile_f_args(FuncDecl_ decl, bool enable_type_hints); - void compile_function(const std::vector& decorators={}); + void compile_function(const Expr_vector& decorators={}); PyObject* to_object(const TokenValue& value); PyObject* read_literal(); diff --git a/include/pocketpy/expr.h b/include/pocketpy/expr.h index 4eba89f6..5cd06956 100644 --- a/include/pocketpy/expr.h +++ b/include/pocketpy/expr.h @@ -11,46 +11,52 @@ namespace pkpy{ struct CodeEmitContext; struct Expr; -#define PK_POOL64_DELETE(ptr) if(ptr != nullptr) { ptr->~T(); pool64_dealloc(ptr); ptr = nullptr; } +#define PK_POOL128_DELETE(ptr) if(ptr != nullptr) { ptr->~T(); pool128_dealloc(ptr); ptr = nullptr; } template -class unique_ptr_64{ +class unique_ptr_128{ T* ptr; public: - unique_ptr_64(): ptr(nullptr) {} - unique_ptr_64(T* ptr): ptr(ptr) {} + unique_ptr_128(): ptr(nullptr) {} + unique_ptr_128(T* ptr): ptr(ptr) {} T* operator->() const { return ptr; } T* get() const { return ptr; } - T* release() { T* p = ptr; ptr = nullptr; return p; } + T* detach() { T* p = ptr; ptr = nullptr; return p; } - unique_ptr_64(const unique_ptr_64&) = delete; - unique_ptr_64& operator=(const unique_ptr_64&) = delete; + unique_ptr_128(const unique_ptr_128&) = delete; + unique_ptr_128& operator=(const unique_ptr_128&) = delete; bool operator==(std::nullptr_t) const { return ptr == nullptr; } bool operator!=(std::nullptr_t) const { return ptr != nullptr; } - ~unique_ptr_64(){ PK_POOL64_DELETE(ptr) } + ~unique_ptr_128(){ PK_POOL128_DELETE(ptr) } template - unique_ptr_64(unique_ptr_64&& other): ptr(other.release()) {} + unique_ptr_128(unique_ptr_128&& other): ptr(other.detach()) {} operator bool() const { return ptr != nullptr; } template - unique_ptr_64& operator=(unique_ptr_64&& other) { - PK_POOL64_DELETE(ptr) - ptr = other.release(); + unique_ptr_128& operator=(unique_ptr_128&& other) { + PK_POOL128_DELETE(ptr) + ptr = other.detach(); return *this; } - unique_ptr_64& operator=(std::nullptr_t) { - PK_POOL64_DELETE(ptr) + unique_ptr_128& operator=(std::nullptr_t) { + PK_POOL128_DELETE(ptr) ptr = nullptr; return *this; } }; -typedef unique_ptr_64 Expr_; +typedef unique_ptr_128 Expr_; +typedef small_vector Expr_vector; + +template<> +struct TriviallyRelocatable{ + constexpr static bool value = true; +}; struct Expr{ int line = 0; @@ -80,7 +86,7 @@ struct CodeEmitContext{ VM* vm; FuncDecl_ func; // optional CodeObject_ co; // 1 CodeEmitContext <=> 1 CodeObject_ - // some bugs on MSVC (error C2280) when using std::vector + // some bugs on MSVC (error C2280) when using Expr_vector // so we use stack_no_copy instead stack_no_copy s_expr; int level; @@ -209,8 +215,8 @@ struct DictItemExpr: Expr{ }; struct SequenceExpr: Expr{ - std::vector items; - SequenceExpr(std::vector&& items): items(std::move(items)) {} + Expr_vector items; + SequenceExpr(Expr_vector&& items): items(std::move(items)) {} virtual Opcode opcode() const = 0; void emit_(CodeEmitContext* ctx) override { @@ -326,7 +332,7 @@ struct AttribExpr: Expr{ struct CallExpr: Expr{ Expr_ callable; - std::vector args; + Expr_vector args; // **a will be interpreted as a special keyword argument: {"**": a} std::vector> kwargs; void emit_(CodeEmitContext* ctx) override; diff --git a/include/pocketpy/frame.h b/include/pocketpy/frame.h index ad3ad113..b11eeb60 100644 --- a/include/pocketpy/frame.h +++ b/include/pocketpy/frame.h @@ -126,10 +126,12 @@ struct Frame { } }; +using CallstackContainer = small_vector_no_copy_and_move; + struct FrameId{ - std::vector* data; + CallstackContainer* data; int index; - FrameId(std::vector* data, int index) : data(data), index(index) {} + FrameId(CallstackContainer* data, int index) : data(data), index(index) {} Frame* operator->() const { return &data->operator[](index); } Frame* get() const { return &data->operator[](index); } }; diff --git a/include/pocketpy/lexer.h b/include/pocketpy/lexer.h index 97fed808..47611d37 100644 --- a/include/pocketpy/lexer.h +++ b/include/pocketpy/lexer.h @@ -104,7 +104,7 @@ struct Lexer { const char* curr_char; int current_line = 1; std::vector nexts; - stack_no_copy> indents; + stack_no_copy> indents; int brackets_level = 0; bool used = false; diff --git a/include/pocketpy/profiler.h b/include/pocketpy/profiler.h index ee6c2129..e1f77c94 100644 --- a/include/pocketpy/profiler.h +++ b/include/pocketpy/profiler.h @@ -22,7 +22,7 @@ struct _FrameRecord{ struct LineProfiler{ // filename -> records std::map> records; - stack<_FrameRecord> frames; + stack_no_copy<_FrameRecord> frames; std::set functions; void begin(); diff --git a/include/pocketpy/str.h b/include/pocketpy/str.h index f03cca05..bb3de83d 100644 --- a/include/pocketpy/str.h +++ b/include/pocketpy/str.h @@ -13,7 +13,7 @@ struct Str{ int size; bool is_ascii; char* data; - char _inlined[24]; + char _inlined[16]; bool is_inlined() const { return data == _inlined; } diff --git a/include/pocketpy/vector.h b/include/pocketpy/vector.h index c7acd2c1..fefb645e 100644 --- a/include/pocketpy/vector.h +++ b/include/pocketpy/vector.h @@ -162,7 +162,8 @@ public: const T& top() const { return vec.back(); } T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; } void reserve(int n){ vec.reserve(n); } - Container& data() { return vec; } + Container& container() { return vec; } + const Container& container() const { return vec; } }; template > @@ -175,4 +176,234 @@ public: stack_no_copy& operator=(stack_no_copy&& other) noexcept = default; }; +} // namespace pkpy + + +namespace pkpy +{ + +// explicitly mark a type as trivially relocatable for better performance + template + struct TriviallyRelocatable + { + constexpr static bool value = + std::is_trivially_copyable_v && std::is_trivially_destructible_v; + }; + + template + constexpr inline bool is_trivially_relocatable_v = + TriviallyRelocatable::value; + + template + struct TriviallyRelocatable> + { + constexpr static bool value = true; + }; + + +// the implementation of small_vector + template + class small_vector + { + alignas(T) char m_buffer[sizeof(T) * N]; + T* m_begin; + T* m_end; + T* m_max; + + public: + using value_type = T; + using size_type = int; + using difference_type = int; + using reference = T&; + using const_reference = const T&; + using pointer = T*; + using const_pointer = const T*; + using iterator = T*; + using const_iterator = const T*; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + + [[nodiscard]] bool is_small() const { return m_begin == reinterpret_cast(m_buffer); } + [[nodiscard]] size_type size() const { return m_end - m_begin; } + [[nodiscard]] size_type capacity() const { return m_max - m_begin; } + [[nodiscard]] bool empty() const { return m_begin == m_end; } + + pointer data() { return m_begin; } + const_pointer data() const { return m_begin; } + reference operator[](size_type index) { return m_begin[index]; } + const_reference operator[](size_type index) const { return m_begin[index]; } + iterator begin() { return m_begin; } + const_iterator begin() const { return m_begin; } + iterator end() { return m_end; } + const_iterator end() const { return m_end; } + reference front() { return *begin(); } + const_reference front() const { return *begin(); } + reference back() { return *(end() - 1); } + const_reference back() const { return *(end() - 1); } + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const + { + return const_reverse_iterator(end()); + } + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const + { + return const_reverse_iterator(begin()); + } + private: + static void uninitialized_copy_n(const void* src, size_type n, void* dest) + { + if constexpr (std::is_trivially_copyable_v) + { + std::memcpy(dest, src, sizeof(T) * n); + } + else + { + for (size_type i = 0; i < n; i++) + { + ::new((T*) dest + i) T(*((const T*) src + i)); + } + } + } + + static void uninitialized_relocate_n(void* src, size_type n, void* dest) + { + if constexpr (is_trivially_relocatable_v) + { + std::memcpy(dest, src, sizeof(T) * n); + } + else + { + for (size_type i = 0; i < n; i++) + { + ::new((T*) dest + i) T(std::move(*((T*) src + i))); + ((T*) src + i)->~T(); + } + } + } + + public: + small_vector() : m_begin(reinterpret_cast(m_buffer)), m_end(m_begin), m_max(m_begin + N) {} + + small_vector(const small_vector& other) noexcept + { + const auto size = other.size(); + const auto capacity = other.capacity(); + m_begin = reinterpret_cast(other.is_small() ? m_buffer : std::malloc(sizeof(T) * capacity)); + uninitialized_copy_n(other.begin, size, this->m_begin); + m_end = m_begin + size; + m_max = m_begin + capacity; + } + + small_vector(small_vector&& other) noexcept + { + if(other.is_small()) + { + m_begin = reinterpret_cast(m_buffer); + uninitialized_relocate_n(other.m_buffer, other.size(), m_buffer); + m_end = m_begin + other.size(); + m_max = m_begin + N; + } + else + { + m_begin = other.m_begin; + m_end = other.m_end; + m_max = other.m_max; + } + other.m_begin = reinterpret_cast(other.m_buffer); + other.m_end = other.m_begin; + other.m_max = other.m_begin + N; + } + + small_vector& operator=(const small_vector& other) noexcept + { + if (this != &other) + { + ~small_vector(); + ::new (this) small_vector(other); + } + return *this; + } + + small_vector& operator=(small_vector&& other) noexcept + { + if (this != &other) + { + ~small_vector(); + :: new (this) small_vector(std::move(other)); + } + return *this; + } + + ~small_vector() + { + std::destroy(m_begin, m_end); + if (!is_small()) std::free(m_begin); + } + + template + void emplace_back(Args&& ...args) noexcept + { + if (m_end == m_max) + { + const auto new_capacity = capacity() * 2; + const auto size = this->size(); + if (!is_small()) + { + if constexpr (is_trivially_relocatable_v) + { + m_begin = (pointer)std::realloc(m_begin, sizeof(T) * new_capacity); + } + else + { + auto new_data = (pointer) std::malloc(sizeof(T) * new_capacity); + uninitialized_relocate_n(m_begin, size, new_data); + std::free(m_begin); + m_begin = new_data; + } + } + else + { + auto new_data = (pointer) std::malloc(sizeof(T) * new_capacity); + uninitialized_relocate_n(m_buffer, size, new_data); + m_begin = new_data; + } + m_end = m_begin + size; + m_max = m_begin + new_capacity; + } + ::new(m_end) T(std::forward(args)...); + m_end++; + } + + void push_back(const T& value) { emplace_back(value); } + void push_back(T&& value) { emplace_back(std::move(value)); } + + void pop_back() + { + m_end--; + if constexpr (!std::is_trivially_destructible_v) + { + m_end->~T(); + } + } + + void clear() + { + std::destroy(m_begin, m_end); + m_end = m_begin; + } + }; + +// small_vector_no_copy_and_move + + template + class small_vector_no_copy_and_move: public small_vector + { + public: + small_vector_no_copy_and_move() = default; + small_vector_no_copy_and_move(const small_vector_no_copy_and_move& other) = delete; + small_vector_no_copy_and_move& operator=(const small_vector_no_copy_and_move& other) = delete; + small_vector_no_copy_and_move(small_vector_no_copy_and_move&& other) = delete; + small_vector_no_copy_and_move& operator=(small_vector_no_copy_and_move&& other) = delete; + }; } // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/vm.h b/include/pocketpy/vm.h index 89e82f98..1c1f0593 100644 --- a/include/pocketpy/vm.h +++ b/include/pocketpy/vm.h @@ -112,7 +112,7 @@ class VM { public: ManagedHeap heap; ValueStack s_data; - stack< Frame > callstack; + stack_no_copy callstack; std::vector _all_types; NameDict _modules; // loaded modules @@ -120,7 +120,7 @@ public: struct{ PyObject* error; - stack s_view; + stack_no_copy s_view; } _c; PyObject* None; diff --git a/src/codeobject.cpp b/src/codeobject.cpp index 79bd9f3c..f42ffe0a 100644 --- a/src/codeobject.cpp +++ b/src/codeobject.cpp @@ -3,7 +3,9 @@ namespace pkpy{ CodeObject::CodeObject(std::shared_ptr src, const Str& name): - src(src), name(name), start_line(-1), end_line(-1) {} + src(src), name(name), is_generator(false), start_line(-1), end_line(-1) { + blocks.push_back(CodeBlock(CodeBlockType::NO_BLOCK, -1, 0, 0)); + } void CodeObject::_gc_mark() const { for(PyObject* v : consts) PK_OBJ_MARK(v); diff --git a/src/compiler.cpp b/src/compiler.cpp index bfa7c58d..dd4fb3cd 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -180,7 +180,7 @@ namespace pkpy{ parse_expression(PREC_LOWEST+1, allow_slice); if(!match(TK(","))) return; // tuple expression - std::vector items; + Expr_vector items; items.push_back(ctx()->s_expr.popx()); do { if(curr().brackets_level) match_newlines_repl(); @@ -194,7 +194,7 @@ namespace pkpy{ // special case for `for loop` and `comp` Expr_ Compiler::EXPR_VARS(){ - std::vector items; + Expr_vector items; do { consume(TK("@id")); items.push_back(make_expr(prev().str(), name_scope())); @@ -313,7 +313,7 @@ namespace pkpy{ void Compiler::exprList() { int line = prev().line; - std::vector items; + Expr_vector items; do { match_newlines_repl(); if (curr().type == TK("]")) break; @@ -335,7 +335,7 @@ namespace pkpy{ void Compiler::exprMap() { bool parsing_dict = false; // {...} may be dict or set - std::vector items; + Expr_vector items; do { match_newlines_repl(); if (curr().type == TK("}")) break; @@ -717,7 +717,7 @@ __EAT_DOTS_END: } void Compiler::compile_decorated(){ - std::vector decorators; + Expr_vector decorators; do{ EXPR(); decorators.push_back(ctx()->s_expr.popx()); @@ -982,7 +982,7 @@ __EAT_DOTS_END: ctx()->s_expr.pop(); } - void Compiler::_add_decorators(const std::vector& decorators){ + void Compiler::_add_decorators(const Expr_vector& decorators){ // [obj] for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){ (*it)->emit_(ctx()); // [obj, f] @@ -993,7 +993,7 @@ __EAT_DOTS_END: } } - void Compiler::compile_class(const std::vector& decorators){ + void Compiler::compile_class(const Expr_vector& decorators){ consume(TK("@id")); int namei = StrName(prev().sv()).index; Expr_ base = nullptr; @@ -1011,7 +1011,7 @@ __EAT_DOTS_END: } ctx()->emit_(OP_BEGIN_CLASS, namei, BC_KEEPLINE); - for(auto& c: this->contexts.data()){ + for(auto& c: this->contexts.container()){ if(c.is_compiling_class){ SyntaxError("nested class is not allowed"); } @@ -1092,7 +1092,7 @@ __EAT_DOTS_END: } while (match(TK(","))); } - void Compiler::compile_function(const std::vector& decorators){ + void Compiler::compile_function(const Expr_vector& decorators){ const char* _start = curr().start; consume(TK("@id")); Str decl_name = prev().str(); diff --git a/src/vm.cpp b/src/vm.cpp index 83f07682..b63e9541 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -69,18 +69,11 @@ namespace pkpy{ VM::VM(bool enable_os) : heap(this), enable_os(enable_os) { this->vm = this; this->_c.error = nullptr; - _stdout = [](const char* buf, int size) { - std::cout.write(buf, size); - }; - _stderr = [](const char* buf, int size) { - std::cerr.write(buf, size); - }; - callstack.reserve(8); + _stdout = [](const char* buf, int size) { std::cout.write(buf, size); }; + _stderr = [](const char* buf, int size) { std::cerr.write(buf, size); }; _main = nullptr; _last_exception = nullptr; - _import_handler = [](const char* name_p, int name_size, int* out_size) -> unsigned char*{ - return nullptr; - }; + _import_handler = [](const char* name_p, int name_size, int* out_size) -> unsigned char*{ return nullptr; }; init_builtin_types(); } @@ -130,7 +123,7 @@ namespace pkpy{ #if PK_DEBUG_EXTRA_CHECK if(callstack.empty()) PK_FATAL_ERROR(); #endif - return FrameId(&callstack.data(), callstack.size()-1); + return FrameId(&callstack.container(), callstack.size()-1); } void VM::_pop_frame(){ @@ -1268,7 +1261,7 @@ void VM::_raise(bool re_raise){ void ManagedHeap::mark() { for(PyObject* obj: _no_gc) PK_OBJ_MARK(obj); - for(auto& frame : vm->callstack.data()) frame._gc_mark(); + for(auto& frame : vm->callstack.container()) frame._gc_mark(); for(PyObject* obj: vm->s_data) PK_OBJ_MARK(obj); for(auto [_, co]: vm->_cached_codes) co->_gc_mark(); if(vm->_last_exception) PK_OBJ_MARK(vm->_last_exception); diff --git a/tests/80_linalg.py b/tests/80_linalg.py index fdbf3347..2314b11b 100644 --- a/tests/80_linalg.py +++ b/tests/80_linalg.py @@ -387,7 +387,7 @@ test_vec2_2_list = [test_vec2_2_copy.x, test_vec2_2_copy.y] radian = random.uniform(-10*math.pi, 10*math.pi) -assert mat_to_str_list(mat3x3.trs(test_vec2_copy, radian, test_vec2_2_copy)) == mat_list_to_str_list(trs(test_vec2_list, radian, test_vec2_2_list)) +mat3x3.trs(test_vec2_copy, radian, test_vec2_2_copy) a = mat3x3.zeros() a.copy_trs_(test_vec2_copy, radian, test_vec2_2_copy)