From ea8dd3f0e82fcc597f38ac6f37807e903b4169f4 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 24 May 2024 22:01:48 +0800 Subject: [PATCH] use relative jump --- include/pocketpy/codeobject.h | 9 +++++++ include/pocketpy/expr.h | 2 +- include/pocketpy/opcodes.h | 5 ++-- include/pocketpy/vm.h | 1 + src/ceval.cpp | 45 +++++++++++++++++++---------------- src/compiler.cpp | 26 ++++++++++---------- src/expr.cpp | 27 +++++++++++---------- src/vm.cpp | 42 ++++++++++++++++---------------- 8 files changed, 84 insertions(+), 73 deletions(-) diff --git a/include/pocketpy/codeobject.h b/include/pocketpy/codeobject.h index c01123e0..2d2aa468 100644 --- a/include/pocketpy/codeobject.h +++ b/include/pocketpy/codeobject.h @@ -17,6 +17,15 @@ enum Opcode: uint8_t { struct Bytecode{ uint8_t op; uint16_t arg; + + void set_signed_arg(int arg){ + if(arg < INT16_MIN || arg > INT16_MAX) throw std::runtime_error("byte.arg overflow"); + this->arg = (int16_t)arg; + } + + bool is_forward_jump() const{ + return op >= OP_JUMP_FORWARD && op <= OP_LOOP_BREAK; + } }; enum class CodeBlockType { diff --git a/include/pocketpy/expr.h b/include/pocketpy/expr.h index 082fec37..396aec58 100644 --- a/include/pocketpy/expr.h +++ b/include/pocketpy/expr.h @@ -93,7 +93,7 @@ struct CodeEmitContext{ std::set global_names; CodeEmitContext(VM* vm, CodeObject_ co, int level): vm(vm), co(co), level(level) {} - int curr_block_i = 0; + int curr_iblock = 0; bool is_compiling_class = false; int base_stack_size = 0; diff --git a/include/pocketpy/opcodes.h b/include/pocketpy/opcodes.h index b10fe130..883518f7 100644 --- a/include/pocketpy/opcodes.h +++ b/include/pocketpy/opcodes.h @@ -87,8 +87,7 @@ OPCODE(IS_OP) OPCODE(IS_NOT_OP) OPCODE(CONTAINS_OP) /**************************/ -OPCODE(JUMP_ABSOLUTE) -OPCODE(JUMP_ABSOLUTE_TOP) +OPCODE(JUMP_FORWARD) OPCODE(POP_JUMP_IF_FALSE) OPCODE(POP_JUMP_IF_TRUE) OPCODE(JUMP_IF_TRUE_OR_POP) @@ -96,6 +95,8 @@ OPCODE(JUMP_IF_FALSE_OR_POP) OPCODE(SHORTCUT_IF_FALSE_OR_POP) OPCODE(LOOP_CONTINUE) OPCODE(LOOP_BREAK) +/***/ +OPCODE(JUMP_ABSOLUTE_TOP) OPCODE(GOTO) /**************************/ OPCODE(FSTRING_EVAL) diff --git a/include/pocketpy/vm.h b/include/pocketpy/vm.h index cfd0de2b..154416a9 100644 --- a/include/pocketpy/vm.h +++ b/include/pocketpy/vm.h @@ -407,6 +407,7 @@ public: PyVar _t(PyVar obj){ return _all_types[_tp(obj)].obj; } PyVar _t(Type type){ return _all_types[type].obj; } + // equivalent to `obj == NotImplemented` but faster static bool is_not_implemented(PyVar obj){ return obj.type == tp_not_implemented; } #endif diff --git a/src/ceval.cpp b/src/ceval.cpp index b5fa3e2c..df032f38 100644 --- a/src/ceval.cpp +++ b/src/ceval.cpp @@ -95,7 +95,8 @@ bool VM::py_ge(PyVar _0, PyVar _1){ #endif #define DISPATCH() { frame->_ip++; goto __NEXT_STEP; } -#define DISPATCH_JUMP(__target) { frame->_ip=co_codes+__target; goto __NEXT_STEP; } +#define DISPATCH_JUMP(__offset) { frame->_ip+=__offset; goto __NEXT_STEP; } +#define DISPATCH_JUMP_ABSOLUTE(__target) { frame->_ip=co_codes+__target; goto __NEXT_STEP; } PyVar VM::__run_top_frame(){ Frame* frame = &callstack.top(); @@ -638,26 +639,24 @@ __NEXT_STEP: TOP() = VAR(static_cast((int)CAST(bool, _0) ^ byte.arg)); } DISPATCH() /*****************************************/ - case OP_JUMP_ABSOLUTE: - DISPATCH_JUMP(byte.arg) - case OP_JUMP_ABSOLUTE_TOP: - DISPATCH_JUMP(_CAST(int, POPX())) + case OP_JUMP_FORWARD: + DISPATCH_JUMP((int16_t)byte.arg) case OP_POP_JUMP_IF_FALSE: - if(!py_bool(POPX())) DISPATCH_JUMP(byte.arg) + if(!py_bool(POPX())) DISPATCH_JUMP((int16_t)byte.arg) DISPATCH() case OP_POP_JUMP_IF_TRUE: - if(py_bool(POPX())) DISPATCH_JUMP(byte.arg) + if(py_bool(POPX())) DISPATCH_JUMP((int16_t)byte.arg) DISPATCH() case OP_JUMP_IF_TRUE_OR_POP: if(py_bool(TOP())){ - DISPATCH_JUMP(byte.arg) + DISPATCH_JUMP((int16_t)byte.arg) }else{ POP(); DISPATCH() } case OP_JUMP_IF_FALSE_OR_POP: if(!py_bool(TOP())){ - DISPATCH_JUMP(byte.arg) + DISPATCH_JUMP((int16_t)byte.arg) }else{ POP(); DISPATCH() @@ -666,22 +665,26 @@ __NEXT_STEP: if(!py_bool(TOP())){ // [b, False] STACK_SHRINK(2); // [] PUSH(vm->False); // [False] - DISPATCH_JUMP(byte.arg) + DISPATCH_JUMP((int16_t)byte.arg) } else{ POP(); // [b] DISPATCH() } case OP_LOOP_CONTINUE: - DISPATCH_JUMP(byte.arg) - case OP_LOOP_BREAK: - frame->prepare_jump_break(&s_data, byte.arg); - DISPATCH_JUMP(byte.arg) + // just an alias of OP_JUMP_FORWARD + DISPATCH_JUMP((int16_t)byte.arg) + case OP_LOOP_BREAK: { + frame->prepare_jump_break(&s_data, frame->ip()+byte.arg); + DISPATCH_JUMP((int16_t)byte.arg) + } + case OP_JUMP_ABSOLUTE_TOP: + DISPATCH_JUMP_ABSOLUTE(_CAST(int, POPX())) case OP_GOTO: { StrName _name(byte.arg); int target = frame->co->labels.try_get_likely_found(_name); if(target < 0) RuntimeError(_S("label ", _name.escape(), " not found")); frame->prepare_jump_break(&s_data, target); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) } /*****************************************/ case OP_FSTRING_EVAL:{ @@ -803,7 +806,7 @@ __NEXT_STEP: PyVar _0 = py_next(TOP()); if(_0 == StopIteration){ int target = frame->prepare_loop_break(&s_data); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) } else{ PUSH(_0); DISPATCH() @@ -813,7 +816,7 @@ __NEXT_STEP: PyVar _0 = py_next(TOP()); if(_0 == StopIteration){ int target = frame->prepare_loop_break(&s_data); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) }else{ frame->_locals[byte.arg] = _0; DISPATCH() @@ -823,7 +826,7 @@ __NEXT_STEP: PyVar _0 = py_next(TOP()); if(_0 == StopIteration){ int target = frame->prepare_loop_break(&s_data); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) }else{ frame->f_globals().set(StrName(byte.arg), _0); DISPATCH() @@ -833,7 +836,7 @@ __NEXT_STEP: PyVar _0 = py_next(TOP()); if(_0 == StopIteration){ int target = frame->prepare_loop_break(&s_data); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) }else{ PUSH(_0); return PY_OP_YIELD; @@ -847,7 +850,7 @@ __NEXT_STEP: if(n == 0){ // StopIteration int target = frame->prepare_loop_break(&s_data); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) }else if(n == 1){ // UNPACK_SEQUENCE __op_unpack_sequence(byte.arg); @@ -865,7 +868,7 @@ __NEXT_STEP: __op_unpack_sequence(byte.arg); }else{ int target = frame->prepare_loop_break(&s_data); - DISPATCH_JUMP(target) + DISPATCH_JUMP_ABSOLUTE(target) } } } DISPATCH() diff --git a/src/compiler.cpp b/src/compiler.cpp index 34c96988..5fade046 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -47,17 +47,13 @@ namespace pkpy{ if(ctx()->co->consts.size() > 65530){ SyntaxError("maximum number of constants exceeded"); } - if(codes.size() > 65530 && ctx()->co->src->mode != JSON_MODE){ - // json mode does not contain jump instructions, so it is safe to ignore this check - SyntaxError("maximum number of opcodes exceeded"); - } // pre-compute LOOP_BREAK and LOOP_CONTINUE for(int i=0; ico->blocks[bc.arg].start; + bc.set_signed_arg(ctx()->co->blocks[bc.arg].start - i); }else if(bc.op == OP_LOOP_BREAK){ - bc.arg = ctx()->co->blocks[bc.arg].get_break_end(); + bc.set_signed_arg(ctx()->co->blocks[bc.arg].get_break_end() - i); } } // pre-compute func->is_simple @@ -655,12 +651,12 @@ __EAT_DOTS_END: int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); if (match(TK("elif"))) { - int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line); ctx()->patch_jump(patch); compile_if_stmt(); ctx()->patch_jump(exit_patch); } else if (match(TK("else"))) { - int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line); ctx()->patch_jump(patch); compile_block_body(); ctx()->patch_jump(exit_patch); @@ -691,7 +687,7 @@ __EAT_DOTS_END: EXPR_TUPLE(); ctx()->emit_expr(); ctx()->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); CodeBlock* block = ctx()->enter_block(CodeBlockType::FOR_LOOP); - int for_codei = ctx()->emit_(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + int for_codei = ctx()->emit_(OP_FOR_ITER, ctx()->curr_iblock, BC_KEEPLINE); bool ok = vars->emit_store(ctx()); if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind ctx()->try_merge_for_iter_store(for_codei); @@ -710,7 +706,7 @@ __EAT_DOTS_END: compile_block_body(); small_vector_2 patches; patches.push_back( - ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) + ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE) ); ctx()->exit_block(); @@ -739,13 +735,13 @@ __EAT_DOTS_END: // pop the exception ctx()->emit_(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE); compile_block_body(); - patches.push_back(ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)); + patches.push_back(ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE)); ctx()->patch_jump(patch); }while(curr().type == TK("except")); } if(match(TK("finally"))){ - int patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE); + int patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE); finally_entry = ctx()->co->codes.size(); compile_block_body(); ctx()->emit_(OP_JUMP_ABSOLUTE_TOP, BC_NOARG, BC_KEEPLINE); @@ -755,7 +751,8 @@ __EAT_DOTS_END: if(finally_entry != -1){ i64 target = ctx()->co->codes.size()+2; ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE); - ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE); + int i = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE); + ctx()->co->codes[i].set_signed_arg(finally_entry - i); } ctx()->emit_(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE); @@ -764,7 +761,8 @@ __EAT_DOTS_END: if(finally_entry != -1){ i64 target = ctx()->co->codes.size()+2; ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE); - ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE); + int i = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE); + ctx()->co->codes[i].set_signed_arg(finally_entry - i); } } diff --git a/src/expr.cpp b/src/expr.cpp index 48413e54..4d5516a5 100644 --- a/src/expr.cpp +++ b/src/expr.cpp @@ -14,7 +14,7 @@ namespace pkpy{ } int CodeEmitContext::get_loop() const { - int index = curr_block_i; + int index = curr_iblock; while(index >= 0){ if(co->blocks[index].type == CodeBlockType::FOR_LOOP) break; if(co->blocks[index].type == CodeBlockType::WHILE_LOOP) break; @@ -26,18 +26,18 @@ namespace pkpy{ CodeBlock* CodeEmitContext::enter_block(CodeBlockType type){ if(type==CodeBlockType::FOR_LOOP || type==CodeBlockType::CONTEXT_MANAGER) base_stack_size++; co->blocks.push_back(CodeBlock( - type, curr_block_i, base_stack_size, (int)co->codes.size() + type, curr_iblock, base_stack_size, (int)co->codes.size() )); - curr_block_i = co->blocks.size()-1; - return &co->blocks[curr_block_i]; + curr_iblock = co->blocks.size()-1; + return &co->blocks[curr_iblock]; } void CodeEmitContext::exit_block(){ - auto curr_type = co->blocks[curr_block_i].type; + auto curr_type = co->blocks[curr_iblock].type; if(curr_type == CodeBlockType::FOR_LOOP || curr_type==CodeBlockType::CONTEXT_MANAGER) base_stack_size--; - co->blocks[curr_block_i].end = co->codes.size(); - curr_block_i = co->blocks[curr_block_i].parent; - if(curr_block_i < 0) PK_FATAL_ERROR(); + co->blocks[curr_iblock].end = co->codes.size(); + curr_iblock = co->blocks[curr_iblock].parent; + if(curr_iblock < 0) PK_FATAL_ERROR(); if(curr_type == CodeBlockType::FOR_LOOP){ // add a no op here to make block check work @@ -54,7 +54,7 @@ namespace pkpy{ int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual) { co->codes.push_back(Bytecode{(uint8_t)opcode, arg}); - co->lines.push_back(CodeObject::LineInfo{line, is_virtual, curr_block_i}); + co->lines.push_back(CodeObject::LineInfo{line, is_virtual, curr_iblock}); int i = co->codes.size() - 1; if(line == BC_KEEPLINE){ if(i >= 1) co->lines[i].lineno = co->lines[i-1].lineno; @@ -97,7 +97,7 @@ namespace pkpy{ void CodeEmitContext::patch_jump(int index) { int target = co->codes.size(); - co->codes[index].arg = target; + co->codes[index].set_signed_arg(target-index); } bool CodeEmitContext::add_label(StrName name){ @@ -399,7 +399,8 @@ namespace pkpy{ iter->emit_(ctx); ctx->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); ctx->enter_block(CodeBlockType::FOR_LOOP); - int for_codei = ctx->emit_(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + int curr_iblock = ctx->curr_iblock; + int for_codei = ctx->emit_(OP_FOR_ITER, curr_iblock, BC_KEEPLINE); bool ok = vars->emit_store(ctx); // this error occurs in `vars` instead of this line, but...nevermind if(!ok) throw std::runtime_error("SyntaxError"); @@ -414,7 +415,7 @@ namespace pkpy{ expr->emit_(ctx); ctx->emit_(op1(), BC_NOARG, BC_KEEPLINE); } - ctx->emit_(OP_LOOP_CONTINUE, ctx->get_loop(), BC_KEEPLINE); + ctx->emit_(OP_LOOP_CONTINUE, curr_iblock, BC_KEEPLINE); ctx->exit_block(); } @@ -729,7 +730,7 @@ namespace pkpy{ cond->emit_(ctx); int patch = ctx->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line); true_expr->emit_(ctx); - int patch_2 = ctx->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, true_expr->line); + int patch_2 = ctx->emit_(OP_JUMP_FORWARD, BC_NOARG, true_expr->line); ctx->patch_jump(patch); false_expr->emit_(ctx); ctx->patch_jump(patch_2); diff --git a/src/vm.cpp b/src/vm.cpp index 5c9336f2..22e0e64a 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -710,29 +710,34 @@ PyVar VM::new_module(Str name, Str package) { return obj; } -static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){ - std::string argStr = std::to_string(byte.arg); +static std::string _opcode_argstr(VM* vm, int i, Bytecode byte, const CodeObject* co){ + SStream ss; + if(byte.is_forward_jump()){ + std::string argStr = std::to_string((int16_t)byte.arg); + ss << (i64)(int16_t)byte.arg; + ss << " (to " << (i64)((int16_t)byte.arg + i) << ")"; + return ss.str().str(); + } + ss << (i64)byte.arg; switch(byte.op){ case OP_LOAD_CONST: case OP_FORMAT_STRING: case OP_IMPORT_PATH: - if(vm != nullptr){ - argStr += _S(" (", vm->py_repr(co->consts[byte.arg]), ")").sv(); - } + if(vm != nullptr) ss << " (" << vm->py_repr(co->consts[byte.arg]) << ")"; break; case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL: case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: case OP_BEGIN_CLASS: case OP_GOTO: case OP_DELETE_GLOBAL: case OP_INC_GLOBAL: case OP_DEC_GLOBAL: case OP_STORE_CLASS_ATTR: case OP_FOR_ITER_STORE_GLOBAL: - argStr += _S(" (", StrName(byte.arg).sv(), ")").sv(); + ss << " (" << StrName(byte.arg).sv() << ")"; break; case OP_LOAD_FAST: case OP_STORE_FAST: case OP_DELETE_FAST: case OP_INC_FAST: case OP_DEC_FAST: case OP_FOR_ITER_STORE_FAST: case OP_LOAD_SUBSCR_FAST: case OP_STORE_SUBSCR_FAST: - argStr += _S(" (", co->varnames[byte.arg].sv(), ")").sv(); + ss << " (" << co->varnames[byte.arg].sv() << ")"; break; case OP_LOAD_FUNCTION: - argStr += _S(" (", co->func_decls[byte.arg]->code->name, ")").sv(); + ss << " (" << co->func_decls[byte.arg]->code->name << ")"; break; } - return argStr; + return ss.str().str(); } Str VM::disassemble(CodeObject_ co){ @@ -742,14 +747,10 @@ Str VM::disassemble(CodeObject_ co){ }; std::vector jumpTargets; - for(auto byte : co->codes){ - if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE || byte.op == OP_SHORTCUT_IF_FALSE_OR_POP || byte.op == OP_LOOP_CONTINUE){ - jumpTargets.push_back(byte.arg); - } - if(byte.op == OP_GOTO){ - // TODO: pre-compute jump targets for OP_GOTO - int* target = co->labels.try_get_2_likely_found(StrName(byte.arg)); - if(target != nullptr) jumpTargets.push_back(*target); + for(int i=0; icodes.size(); i++){ + Bytecode byte = co->codes[i]; + if(byte.is_forward_jump()){ + jumpTargets.push_back((int16_t)byte.arg + i); } } SStream ss; @@ -773,11 +774,8 @@ Str VM::disassemble(CodeObject_ co){ std::string bc_name(OP_NAMES[byte.op]); if(co->lines[i].is_virtual) bc_name += '*'; ss << " " << pad(bc_name, 25) << " "; - // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); - std::string argStr = _opcode_argstr(this, byte, co.get()); + std::string argStr = _opcode_argstr(this, i, byte, co.get()); ss << argStr; - // ss << pad(argStr, 40); // may overflow - // ss << co->blocks[byte.block].type; if(i != co->codes.size() - 1) ss << '\n'; } @@ -835,7 +833,7 @@ void VM::__log_s_data(const char* title) { output.pop_back(); output.pop_back(); } output.push_back(']'); - Bytecode byte = frame->co->codes[frame->_ip]; + Bytecode byte = *frame->_ip; std::cout << output << " " << OP_NAMES[byte.op] << " " << _opcode_argstr(nullptr, byte, frame->co) << std::endl; } #endif