From 1afef9dcb98d357d2027756c2d0b0af4e6f763c5 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 14 Oct 2023 14:30:15 +0800 Subject: [PATCH] reduce bytecode size --- include/pocketpy/codeobject.h | 12 ++++++++---- include/pocketpy/config.h | 2 +- include/pocketpy/expr.h | 2 +- src/ceval.cpp | 9 +++++---- src/expr.cpp | 25 ++++++++++++------------- src/frame.cpp | 11 +++++------ src/vm.cpp | 2 +- 7 files changed, 33 insertions(+), 30 deletions(-) diff --git a/include/pocketpy/codeobject.h b/include/pocketpy/codeobject.h index db91cfa4..1505fc46 100644 --- a/include/pocketpy/codeobject.h +++ b/include/pocketpy/codeobject.h @@ -21,8 +21,7 @@ inline const char* OP_NAMES[] = { struct Bytecode{ uint8_t op; - uint16_t block; - int arg; + uint16_t arg; }; enum CodeBlockType { @@ -33,7 +32,7 @@ enum CodeBlockType { TRY_EXCEPT, }; -inline const int BC_NOARG = -1; +inline const uint8_t BC_NOARG = 0; inline const int BC_KEEPLINE = -1; struct CodeBlock { @@ -103,7 +102,8 @@ struct CodeObject { bool is_generator = false; std::vector codes; - std::vector lines; // line number for each bytecode + std::vector iblocks; // block index for each bytecode + std::vector lines; // line number for each bytecode List consts; std::vector varnames; // local variables NameDictInt varnames_inv; @@ -111,6 +111,10 @@ struct CodeObject { NameDictInt labels; std::vector func_decls; + const CodeBlock& _get_block_codei(int codei) const{ + return blocks[iblocks[codei]]; + } + CodeObject(std::shared_ptr src, const Str& name); void _gc_mark() const; void write(VM* vm, CodeObjectSerializer& ss) const; diff --git a/include/pocketpy/config.h b/include/pocketpy/config.h index 54723171..59fbba5f 100644 --- a/include/pocketpy/config.h +++ b/include/pocketpy/config.h @@ -52,7 +52,7 @@ // This is the maximum number of arguments in a function declaration // including positional arguments, keyword-only arguments, and varargs -// (not recommended to change this) +// (not recommended to change this / it should be less than 200) #define PK_MAX_CO_VARNAMES 32 namespace pkpy{ diff --git a/include/pocketpy/expr.h b/include/pocketpy/expr.h index 1283fa5c..79b0a4a0 100644 --- a/include/pocketpy/expr.h +++ b/include/pocketpy/expr.h @@ -60,7 +60,7 @@ struct CodeEmitContext{ void exit_block(); void emit_expr(); // clear the expression stack and generate bytecode std::string _log_s_expr(); - int emit(Opcode opcode, int arg, int line); + int emit(Opcode opcode, uint16_t arg, int line); void patch_jump(int index); bool add_label(StrName name); int add_varname(StrName name); diff --git a/src/ceval.cpp b/src/ceval.cpp index 2da10e26..627f6a0c 100644 --- a/src/ceval.cpp +++ b/src/ceval.cpp @@ -88,7 +88,7 @@ __NEXT_STEP:; TARGET(LOAD_NONE) PUSH(None); DISPATCH(); TARGET(LOAD_TRUE) PUSH(True); DISPATCH(); TARGET(LOAD_FALSE) PUSH(False); DISPATCH(); - TARGET(LOAD_INTEGER) PUSH(VAR(byte.arg)); DISPATCH(); + TARGET(LOAD_INTEGER) PUSH(VAR((int16_t)byte.arg)); DISPATCH(); TARGET(LOAD_ELLIPSIS) PUSH(Ellipsis); DISPATCH(); TARGET(LOAD_FUNCTION) { FuncDecl_ decl = co->func_decls[byte.arg]; @@ -511,8 +511,8 @@ __NEXT_STEP:; DISPATCH(); TARGET(CALL) _0 = vectorcall( - byte.arg & 0xFFFF, // ARGC - (byte.arg>>16) & 0xFFFF, // KWARGC + byte.arg & 0xFF, // ARGC + (byte.arg>>8) & 0xFF, // KWARGC true ); if(_0 == PY_OP_CALL) DISPATCH_OP_CALL(); @@ -600,7 +600,8 @@ __NEXT_STEP:; if(_0 != StopIteration){ PUSH(_0); }else{ - frame->jump_abs_break(co_blocks[byte.block].end); + // TODO: optimize this + frame->jump_abs_break(co->_get_block_codei(frame->_ip).end); } DISPATCH(); /*****************************************/ diff --git a/src/expr.cpp b/src/expr.cpp index 4a15dd11..68de4ab1 100644 --- a/src/expr.cpp +++ b/src/expr.cpp @@ -50,10 +50,9 @@ namespace pkpy{ return ss.str(); } - int CodeEmitContext::emit(Opcode opcode, int arg, int line) { - co->codes.push_back( - Bytecode{(uint8_t)opcode, (uint16_t)curr_block_i, arg} - ); + int CodeEmitContext::emit(Opcode opcode, uint16_t arg, int line) { + co->codes.push_back(Bytecode{(uint8_t)opcode, arg}); + co->iblocks.push_back(curr_block_i); co->lines.push_back(line); int i = co->codes.size() - 1; if(line==BC_KEEPLINE){ @@ -75,6 +74,7 @@ namespace pkpy{ } int CodeEmitContext::add_varname(StrName name){ + // PK_MAX_CO_VARNAMES will be checked when pop_context(), not here int index = co->varnames_inv.try_get(name); if(index >= 0) return index; co->varnames.push_back(name); @@ -143,8 +143,7 @@ namespace pkpy{ bool NameExpr::emit_store(CodeEmitContext* ctx) { if(ctx->is_compiling_class){ - int index = name.index; - ctx->emit(OP_STORE_CLASS_ATTR, index, line); + ctx->emit(OP_STORE_CLASS_ATTR, name.index, line); return true; } ctx->emit_store_name(scope, name, line); @@ -214,7 +213,7 @@ namespace pkpy{ if(std::holds_alternative(value)){ i64 _val = std::get(value); if(_val >= INT16_MIN && _val <= INT16_MAX){ - ctx->emit(OP_LOAD_INTEGER, (int)_val, line); + ctx->emit(OP_LOAD_INTEGER, (uint16_t)_val, line); return; } obj = VAR(_val); @@ -237,7 +236,7 @@ namespace pkpy{ if(std::holds_alternative(lit->value)){ i64 _val = -std::get(lit->value); if(_val >= INT16_MIN && _val <= INT16_MAX){ - ctx->emit(OP_LOAD_INTEGER, (int)_val, line); + ctx->emit(OP_LOAD_INTEGER, (uint16_t)_val, line); }else{ ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(_val)), line); } @@ -484,7 +483,7 @@ namespace pkpy{ if(vargs || vkwargs){ for(auto& item: args) item->emit(ctx); - ctx->emit(OP_BUILD_TUPLE_UNPACK, (int)args.size(), line); + ctx->emit(OP_BUILD_TUPLE_UNPACK, (uint16_t)args.size(), line); if(!kwargs.empty()){ for(auto& item: kwargs){ @@ -508,13 +507,13 @@ namespace pkpy{ // vectorcall protocal for(auto& item: args) item->emit(ctx); for(auto& item: kwargs){ - int index = StrName(item.first.sv()).index; + uint16_t index = StrName(item.first.sv()).index; ctx->emit(OP_LOAD_INTEGER, index, line); item.second->emit(ctx); } - int KWARGC = (int)kwargs.size(); - int ARGC = (int)args.size(); - ctx->emit(OP_CALL, (KWARGC<<16)|ARGC, line); + int KWARGC = kwargs.size(); + int ARGC = args.size(); + ctx->emit(OP_CALL, (KWARGC<<8)|ARGC, line); } } diff --git a/src/frame.cpp b/src/frame.cpp index 8beefaf4..74cc5039 100644 --- a/src/frame.cpp +++ b/src/frame.cpp @@ -25,7 +25,7 @@ namespace pkpy{ bool Frame::jump_to_exception_handler(){ // try to find a parent try block - int block = co->codes[_ip].block; + int block = co->iblocks[_ip]; while(block >= 0){ if(co->blocks[block].type == TRY_EXCEPT) break; block = co->blocks[block].parent; @@ -47,8 +47,7 @@ namespace pkpy{ } void Frame::jump_abs_break(int target){ - const Bytecode& prev = co->codes[_ip]; - int i = prev.block; + int i = co->iblocks[_ip]; _next_ip = target; if(_next_ip >= co->codes.size()){ while(i>=0) i = _exit_block(i); @@ -58,9 +57,9 @@ namespace pkpy{ // _ = 0 // # if there is no op here, the block check will fail // while i: --i - const Bytecode& next = co->codes[target]; - while(i>=0 && i!=next.block) i = _exit_block(i); - if(i!=next.block) throw std::runtime_error("invalid jump"); + int next_block = co->iblocks[target]; + while(i>=0 && i!=next_block) i = _exit_block(i); + if(i!=next_block) throw std::runtime_error("invalid jump"); } } diff --git a/src/vm.cpp b/src/vm.cpp index 97984ed5..2fe919f8 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -513,7 +513,7 @@ PyObject* VM::new_module(Str name, Str package) { } static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){ - std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); + std::string argStr = byte.arg == BC_NOARG ? "" : std::to_string(byte.arg); switch(byte.op){ case OP_LOAD_CONST: case OP_FORMAT_STRING: case OP_IMPORT_PATH: if(vm != nullptr){