From c607d11bd68ab76ac71db50a47dd5e626500e617 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 23:47:33 +0800 Subject: [PATCH] up up up up up --- src/codeobject.h | 20 -- src/common.h | 13 +- src/compiler.h | 756 ++++++++++++++++++++++++++++------------------- src/error.h | 15 +- src/expr.h | 149 ++++++++-- src/lexer.h | 10 +- src/main.cpp | 1 - src/vm.h | 46 +-- 8 files changed, 625 insertions(+), 385 deletions(-) diff --git a/src/codeobject.h b/src/codeobject.h index fcd503bb..b742dba6 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -92,26 +92,6 @@ struct CodeObject { void _mark() const { for(PyObject* v : consts) OBJ_MARK(v); } - - /************************************************/ - int _curr_block_i = 0; - int _rvalue = 0; - bool _is_compiling_class = false; - bool _is_curr_block_loop() const { - return blocks[_curr_block_i].type == FOR_LOOP || blocks[_curr_block_i].type == WHILE_LOOP; - } - - void _enter_block(CodeBlockType type){ - blocks.push_back(CodeBlock{type, _curr_block_i, (int)codes.size()}); - _curr_block_i = blocks.size()-1; - } - - void _exit_block(){ - blocks[_curr_block_i].end = codes.size(); - _curr_block_i = blocks[_curr_block_i].parent; - if(_curr_block_i < 0) UNREACHABLE(); - } - /************************************************/ }; diff --git a/src/common.h b/src/common.h index 1e022115..9ca65136 100644 --- a/src/common.h +++ b/src/common.h @@ -68,8 +68,7 @@ struct Type { operator int() const noexcept { return this->index; } }; -//#define THREAD_LOCAL thread_local -#define THREAD_LOCAL +#define THREAD_LOCAL // thread_local #define CPP_LAMBDA(x) ([](VM* vm, Args& args) { return x; }) #define CPP_NOT_IMPLEMENTED() ([](VM* vm, Args& args) { vm->NotImplementedError(); return vm->None; }) @@ -79,9 +78,9 @@ struct Type { #define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!"); #endif -const float kLocalsLoadFactor = 0.67f; -const float kInstAttrLoadFactor = 0.67f; -const float kTypeAttrLoadFactor = 0.5f; +inline const float kLocalsLoadFactor = 0.67f; +inline const float kInstAttrLoadFactor = 0.67f; +inline const float kTypeAttrLoadFactor = 0.5f; static_assert(sizeof(i64) == sizeof(int*)); static_assert(sizeof(f64) == sizeof(int*)); @@ -114,7 +113,11 @@ public: size_t size() const { return vec.size(); } T& top(){ return vec.back(); } const T& top() const { return vec.back(); } + T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; } const std::vector& data() const { return vec; } }; +struct Expression; +typedef std::unique_ptr Expression_; + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index 9510e316..bc58348f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -5,29 +5,82 @@ #include "lexer.h" #include "error.h" #include "ceval.h" +#include "expr.h" +#include "obj.h" +#include "str.h" namespace pkpy{ class Compiler; -typedef void (Compiler::*GrammarFn)(); -typedef void (Compiler::*CompilerAction)(); +typedef void (Compiler::*PrattCallback)(); -struct GrammarRule{ - GrammarFn prefix; - GrammarFn infix; +struct PrattRule{ + PrattCallback prefix; + PrattCallback infix; Precedence precedence; }; +struct CodeEmitContext{ + CodeObject_ co; + stack s_expr; + + CodeEmitContext(CodeObject_ co): co(co) {} + + int curr_block_i = 0; + bool is_compiling_class = false; + + bool is_curr_block_loop() const { + return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; + } + + void enter_block(CodeBlockType type){ + co->blocks.push_back(CodeBlock{ + type, curr_block_i, (int)co->codes.size() + }); + curr_block_i = co->blocks.size()-1; + } + + void exit_block(){ + co->blocks[curr_block_i].end = co->codes.size(); + curr_block_i = co->blocks[curr_block_i].parent; + if(curr_block_i < 0) UNREACHABLE(); + } + + // clear the expression stack and generate bytecode + void emit_expr(){ + if(s_expr.size() != 1) UNREACHABLE(); + Expression_ expr = s_expr.popx(); + // emit + // ... + } +}; + class Compiler { std::unique_ptr lexer; - stack codes; + stack contexts; + std::map rules; bool used = false; VM* vm; - std::map rules; - CodeObject_ co() const{ return codes.top(); } + CodeObject* co() const{ return contexts.top().co.get(); } + CodeEmitContext* ctx() { return &contexts.top(); } CompileMode mode() const{ return lexer->src->mode; } - NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } + NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } + + template + CodeObject_ push_context(Args&&... args){ + CodeObject_ co = make_sp(std::forward(args)...); + contexts.push(CodeEmitContext(co)); + return co; + } + + void pop_context(){ + if(!ctx()->s_expr.empty()){ + ctx()->emit_expr(); + } + ctx()->co->optimize(vm); + contexts.pop(); + } public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ @@ -41,8 +94,8 @@ public: #define NO_INFIX nullptr, PREC_NONE for(TokenIndex i=0; i>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA }; + rules[TK(":")] = { nullptr, METHOD(exprSlice), PREC_SLICE }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; @@ -107,7 +161,7 @@ private: const Token& prev() { return tokens.at(i-1); } const Token& curr() { return tokens.at(i); } const Token& next() { return tokens.at(i+1); } - const Token& peek(int offset=0) { return tokens.at(i+offset); } + const Token& peek(int offset) { return tokens.at(i+offset); } void advance() { i++; } bool match(TokenIndex expected) { @@ -131,7 +185,7 @@ private: consumed = true; } if (repl_throw && curr().type == TK("@eof")){ - throw NeedMoreLines(co()->_is_compiling_class); + throw NeedMoreLines(ctx()->is_compiling_class); } return consumed; } @@ -159,238 +213,305 @@ private: } } - void exprLiteral() { - PyObject* value = get_value(prev()); - int index = co()->add_const(value); - emit(OP_LOAD_CONST, index); + void exprLiteral(){ + ctx()->s_expr.push( + std::make_unique(prev().value) + ); + // PyObject* value = get_value(prev()); + // int index = co()->add_const(value); + // emit(OP_LOAD_CONST, index); } - void exprFString() { - static const std::regex pattern(R"(\{(.*?)\})"); - PyObject* value = get_value(prev()); - Str s = CAST(Str, value); - std::sregex_iterator begin(s.begin(), s.end(), pattern); - std::sregex_iterator end; - int size = 0; - int i = 0; - for(auto it = begin; it != end; it++) { - std::smatch m = *it; - if (i < m.position()) { - std::string literal = s.substr(i, m.position() - i); - emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - size++; - } - emit(OP_LOAD_EVAL_FN); - emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); - emit(OP_CALL, 1); - size++; - i = (int)(m.position() + m.length()); - } - if (i < s.size()) { - std::string literal = s.substr(i, s.size() - i); - emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - size++; - } - emit(OP_BUILD_STRING, size); + void exprFString(){ + ctx()->s_expr.push( + std::make_unique(std::get(prev().value)) + ); + // static const std::regex pattern(R"(\{(.*?)\})"); + // PyObject* value = get_value(prev()); + // Str s = CAST(Str, value); + // std::sregex_iterator begin(s.begin(), s.end(), pattern); + // std::sregex_iterator end; + // int size = 0; + // int i = 0; + // for(auto it = begin; it != end; it++) { + // std::smatch m = *it; + // if (i < m.position()) { + // std::string literal = s.substr(i, m.position() - i); + // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); + // size++; + // } + // emit(OP_LOAD_EVAL_FN); + // emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); + // emit(OP_CALL, 1); + // size++; + // i = (int)(m.position() + m.length()); + // } + // if (i < s.size()) { + // std::string literal = s.substr(i, s.size() - i); + // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); + // size++; + // } + // emit(OP_BUILD_STRING, size); } - void exprLambda() { + void emit_expr(){} + + void exprLambda(){ Function func; func.name = ""; if(!match(TK(":"))){ _compile_f_args(func, false); consume(TK(":")); } - func.code = make_sp(lexer->src, func.name.str()); - this->codes.push(func.code); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; + func.code = push_context(lexer->src, func.name.str()); + EXPR(); + emit_expr(); emit(OP_RETURN_VALUE); - func.code->optimize(vm); - this->codes.pop(); - emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); + pop_context(); + + ctx()->s_expr.push( + std::make_unique(std::move(func), name_scope()) + ); + + // emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); + // if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); } - void exprAssign() { - if(co()->codes.empty()) UNREACHABLE(); - bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; - int _name_arg = co()->codes.back().arg; - // if the last op is OP_LOAD_NAME_REF, remove it - // because we will emit OP_STORE_NAME or OP_STORE_CLASS_ATTR - if(is_load_name_ref) co()->codes.pop_back(); - - co()->_rvalue += 1; + void exprAssign(){ + Expression_ lhs = ctx()->s_expr.popx(); TokenIndex op = prev().type; - if(op == TK("=")) { // a = (expr) - EXPR_TUPLE(); - if(is_load_name_ref){ - auto op = co()->_is_compiling_class ? OP_STORE_CLASS_ATTR : OP_STORE_NAME; - emit(op, _name_arg); - }else{ - if(co()->_is_compiling_class) SyntaxError(); - emit(OP_STORE_REF); - } - }else{ // a += (expr) -> a = a + (expr) - if(co()->_is_compiling_class) SyntaxError(); - if(is_load_name_ref){ - emit(OP_LOAD_NAME, _name_arg); - }else{ - emit(OP_DUP_TOP_VALUE); - } - EXPR(); - switch (op) { - case TK("+="): emit(OP_BINARY_OP, 0); break; - case TK("-="): emit(OP_BINARY_OP, 1); break; - case TK("*="): emit(OP_BINARY_OP, 2); break; - case TK("/="): emit(OP_BINARY_OP, 3); break; - case TK("//="): emit(OP_BINARY_OP, 4); break; - case TK("%="): emit(OP_BINARY_OP, 5); break; - case TK("<<="): emit(OP_BITWISE_OP, 0); break; - case TK(">>="): emit(OP_BITWISE_OP, 1); break; - case TK("&="): emit(OP_BITWISE_OP, 2); break; - case TK("|="): emit(OP_BITWISE_OP, 3); break; - case TK("^="): emit(OP_BITWISE_OP, 4); break; - default: UNREACHABLE(); - } - if(is_load_name_ref){ - emit(OP_STORE_NAME, _name_arg); - }else{ - emit(OP_STORE_REF); - } + EXPR_TUPLE(); + if(op == TK("=")){ + ctx()->s_expr.push( + std::make_unique(std::move(lhs), ctx()->s_expr.popx()) + ); + }else{ + // += -= ... + ctx()->s_expr.push( + std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) + ); } - co()->_rvalue -= 1; + + // if(co()->codes.empty()) UNREACHABLE(); + // bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; + // int _name_arg = co()->codes.back().arg; + // // if the last op is OP_LOAD_NAME_REF, remove it + // // because we will emit OP_STORE_NAME or OP_STORE_CLASS_ATTR + // if(is_load_name_ref) co()->codes.pop_back(); + + // co()->_rvalue += 1; + // TokenIndex op = prev().type; + // if(op == TK("=")) { // a = (expr) + // EXPR_TUPLE(); + // if(is_load_name_ref){ + // auto op = ctx()->is_compiling_class ? OP_STORE_CLASS_ATTR : OP_STORE_NAME; + // emit(op, _name_arg); + // }else{ + // if(ctx()->is_compiling_class) SyntaxError(); + // emit(OP_STORE_REF); + // } + // }else{ // a += (expr) -> a = a + (expr) + // if(ctx()->is_compiling_class) SyntaxError(); + // if(is_load_name_ref){ + // emit(OP_LOAD_NAME, _name_arg); + // }else{ + // emit(OP_DUP_TOP_VALUE); + // } + // EXPR(); + // switch (op) { + // case TK("+="): emit(OP_BINARY_OP, 0); break; + // case TK("-="): emit(OP_BINARY_OP, 1); break; + // case TK("*="): emit(OP_BINARY_OP, 2); break; + // case TK("/="): emit(OP_BINARY_OP, 3); break; + // case TK("//="): emit(OP_BINARY_OP, 4); break; + // case TK("%="): emit(OP_BINARY_OP, 5); break; + // case TK("<<="): emit(OP_BITWISE_OP, 0); break; + // case TK(">>="): emit(OP_BITWISE_OP, 1); break; + // case TK("&="): emit(OP_BITWISE_OP, 2); break; + // case TK("|="): emit(OP_BITWISE_OP, 3); break; + // case TK("^="): emit(OP_BITWISE_OP, 4); break; + // default: UNREACHABLE(); + // } + // if(is_load_name_ref){ + // emit(OP_STORE_NAME, _name_arg); + // }else{ + // emit(OP_STORE_REF); + // } + // } + // co()->_rvalue -= 1; } - void exprComma() { + void exprSlice(){ + } + + void exprComma(){ int size = 1; // an expr is in the stack now do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok size++; } while(match(TK(","))); - emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); + std::vector items(size); + for(int i=size-1; i>=0; i--) items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push( + std::make_unique(std::move(items)) + ); + // emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); } - void exprOr() { - int patch = emit(OP_JUMP_IF_TRUE_OR_POP); + void exprOr(){ + Expression_ lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_OR); - patch_jump(patch); + ctx()->s_expr.push( + std::make_unique(std::move(lhs), ctx()->s_expr.popx()) + ); + + // int patch = emit(OP_JUMP_IF_TRUE_OR_POP); + // parse_expression(PREC_LOGICAL_OR); + // patch_jump(patch); } - void exprAnd() { - int patch = emit(OP_JUMP_IF_FALSE_OR_POP); + void exprAnd(){ + Expression_ lhs = ctx()->s_expr.popx(); parse_expression(PREC_LOGICAL_AND); - patch_jump(patch); + ctx()->s_expr.push( + std::make_unique(std::move(lhs), ctx()->s_expr.popx()) + ); + // int patch = emit(OP_JUMP_IF_FALSE_OR_POP); + // parse_expression(PREC_LOGICAL_AND); + // patch_jump(patch); } - void exprTernary() { - int patch = emit(OP_POP_JUMP_IF_FALSE); + void exprTernary(){ + Expression_ cond = ctx()->s_expr.popx(); EXPR(); // if true - int patch2 = emit(OP_JUMP_ABSOLUTE); + Expression_ true_expr = ctx()->s_expr.popx(); consume(TK(":")); - patch_jump(patch); EXPR(); // if false - patch_jump(patch2); + Expression_ false_expr = ctx()->s_expr.popx(); + ctx()->s_expr.push( + std::make_unique(std::move(cond), std::move(true_expr), std::move(false_expr)) + ); + // int patch = emit(OP_POP_JUMP_IF_FALSE); + // EXPR(); // if true + // int patch2 = emit(OP_JUMP_ABSOLUTE); + // consume(TK(":")); + // patch_jump(patch); + // EXPR(); // if false + // patch_jump(patch2); } - void exprBinaryOp() { + void exprBinaryOp(){ TokenIndex op = prev().type; + Expression_ lhs = ctx()->s_expr.popx(); parse_expression((Precedence)(rules[op].precedence + 1)); + ctx()->s_expr.push( + std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) + ); + // switch (op) { + // case TK("+"): emit(OP_BINARY_OP, 0); break; + // case TK("-"): emit(OP_BINARY_OP, 1); break; + // case TK("*"): emit(OP_BINARY_OP, 2); break; + // case TK("/"): emit(OP_BINARY_OP, 3); break; + // case TK("//"): emit(OP_BINARY_OP, 4); break; + // case TK("%"): emit(OP_BINARY_OP, 5); break; + // case TK("**"): emit(OP_BINARY_OP, 6); break; - switch (op) { - case TK("+"): emit(OP_BINARY_OP, 0); break; - case TK("-"): emit(OP_BINARY_OP, 1); break; - case TK("*"): emit(OP_BINARY_OP, 2); break; - case TK("/"): emit(OP_BINARY_OP, 3); break; - case TK("//"): emit(OP_BINARY_OP, 4); break; - case TK("%"): emit(OP_BINARY_OP, 5); break; - case TK("**"): emit(OP_BINARY_OP, 6); break; + // case TK("<"): emit(OP_COMPARE_OP, 0); break; + // case TK("<="): emit(OP_COMPARE_OP, 1); break; + // case TK("=="): emit(OP_COMPARE_OP, 2); break; + // case TK("!="): emit(OP_COMPARE_OP, 3); break; + // case TK(">"): emit(OP_COMPARE_OP, 4); break; + // case TK(">="): emit(OP_COMPARE_OP, 5); break; + // case TK("in"): emit(OP_CONTAINS_OP, 0); break; + // case TK("not in"): emit(OP_CONTAINS_OP, 1); break; + // case TK("is"): emit(OP_IS_OP, 0); break; + // case TK("is not"): emit(OP_IS_OP, 1); break; - case TK("<"): emit(OP_COMPARE_OP, 0); break; - case TK("<="): emit(OP_COMPARE_OP, 1); break; - case TK("=="): emit(OP_COMPARE_OP, 2); break; - case TK("!="): emit(OP_COMPARE_OP, 3); break; - case TK(">"): emit(OP_COMPARE_OP, 4); break; - case TK(">="): emit(OP_COMPARE_OP, 5); break; - case TK("in"): emit(OP_CONTAINS_OP, 0); break; - case TK("not in"): emit(OP_CONTAINS_OP, 1); break; - case TK("is"): emit(OP_IS_OP, 0); break; - case TK("is not"): emit(OP_IS_OP, 1); break; - - case TK("<<"): emit(OP_BITWISE_OP, 0); break; - case TK(">>"): emit(OP_BITWISE_OP, 1); break; - case TK("&"): emit(OP_BITWISE_OP, 2); break; - case TK("|"): emit(OP_BITWISE_OP, 3); break; - case TK("^"): emit(OP_BITWISE_OP, 4); break; - default: UNREACHABLE(); - } + // case TK("<<"): emit(OP_BITWISE_OP, 0); break; + // case TK(">>"): emit(OP_BITWISE_OP, 1); break; + // case TK("&"): emit(OP_BITWISE_OP, 2); break; + // case TK("|"): emit(OP_BITWISE_OP, 3); break; + // case TK("^"): emit(OP_BITWISE_OP, 4); break; + // default: UNREACHABLE(); + // } } void exprNot() { parse_expression((Precedence)(PREC_LOGICAL_NOT + 1)); - emit(OP_UNARY_NOT); + ctx()->s_expr.push( + std::make_unique(ctx()->s_expr.popx()) + ); + // emit(OP_UNARY_NOT); } - void exprUnaryOp() { - TokenIndex op = prev().type; + void exprUnaryOp(){ + TokenIndex type = prev().type; parse_expression((Precedence)(PREC_UNARY + 1)); - switch (op) { - case TK("-"): emit(OP_UNARY_NEGATIVE); break; - case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; - default: UNREACHABLE(); - } + ctx()->s_expr.push( + std::make_unique(type, ctx()->s_expr.popx()) + ); + // switch (type) { + // case TK("-"): emit(OP_UNARY_NEGATIVE); break; + // case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; + // default: UNREACHABLE(); + // } } - void exprGrouping() { + // () is just for change precedence, so we don't need to push it into stack + void exprGroup(){ match_newlines(mode()==REPL_MODE); EXPR_TUPLE(); match_newlines(mode()==REPL_MODE); consume(TK(")")); } - void _consume_comp(Opcode op0, Opcode op1, int _patch, int _body_start){ - int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); - int _body_end = co()->codes.size(); - co()->codes[_patch].op = OP_JUMP_ABSOLUTE; - co()->codes[_patch].arg = _body_end; - emit(op0, 0); - EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); - match_newlines(mode()==REPL_MODE); + // void _consume_comp(Opcode op0, Opcode op1, int _patch, int _body_start){ + // int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); + // int _body_end = co()->codes.size(); + // co()->codes[_patch].op = OP_JUMP_ABSOLUTE; + // co()->codes[_patch].arg = _body_end; + // emit(op0, 0); + // EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); + // match_newlines(mode()==REPL_MODE); - int _skipPatch = emit(OP_JUMP_ABSOLUTE); - int _cond_start = co()->codes.size(); - int _cond_end_return = -1; - if(match(TK("if"))) { - EXPR_TUPLE(); - _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); - } - patch_jump(_skipPatch); + // int _skipPatch = emit(OP_JUMP_ABSOLUTE); + // int _cond_start = co()->codes.size(); + // int _cond_end_return = -1; + // if(match(TK("if"))) { + // EXPR_TUPLE(); + // _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); + // } + // patch_jump(_skipPatch); - emit(OP_GET_ITER); - co()->_enter_block(FOR_LOOP); - emit(OP_FOR_ITER); + // emit(OP_GET_ITER); + // co()->_enter_block(FOR_LOOP); + // emit(OP_FOR_ITER); - if(_cond_end_return != -1) { // there is an if condition - emit(OP_JUMP_ABSOLUTE, _cond_start); - patch_jump(_cond_end_return); - int ifpatch = emit(OP_POP_JUMP_IF_FALSE); - emit(OP_JUMP_ABSOLUTE, _body_start); - patch_jump(_body_end_return); - emit(op1); - patch_jump(ifpatch); - }else{ - emit(OP_JUMP_ABSOLUTE, _body_start); - patch_jump(_body_end_return); - emit(op1); - } + // if(_cond_end_return != -1) { // there is an if condition + // emit(OP_JUMP_ABSOLUTE, _cond_start); + // patch_jump(_cond_end_return); + // int ifpatch = emit(OP_POP_JUMP_IF_FALSE); + // emit(OP_JUMP_ABSOLUTE, _body_start); + // patch_jump(_body_end_return); + // emit(op1); + // patch_jump(ifpatch); + // }else{ + // emit(OP_JUMP_ABSOLUTE, _body_start); + // patch_jump(_body_end_return); + // emit(op1); + // } + + // emit(OP_LOOP_CONTINUE, -1, true); + // co()->_exit_block(); + // match_newlines(mode()==REPL_MODE); + // } + + template + void _consume_comp(){ - emit(OP_LOOP_CONTINUE, -1, true); - co()->_exit_block(); - match_newlines(mode()==REPL_MODE); } void exprList() { - int _patch = emit(OP_NO_OP); - int _body_start = co()->codes.size(); int ARGC = 0; do { match_newlines(mode()==REPL_MODE); @@ -398,19 +519,38 @@ private: EXPR(); ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))){ - _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); + _consume_comp(); consume(TK("]")); return; } } while (match(TK(","))); match_newlines(mode()==REPL_MODE); consume(TK("]")); - emit(OP_BUILD_LIST, ARGC); + auto list_expr = std::make_unique(); + list_expr->items.resize(ARGC); + for(int i=ARGC-1; i>=0; i--) list_expr->items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(list_expr)); + + // int _patch = emit(OP_NO_OP); + // int _body_start = co()->codes.size(); + // int ARGC = 0; + // do { + // match_newlines(mode()==REPL_MODE); + // if (curr().type == TK("]")) break; + // EXPR(); ARGC++; + // match_newlines(mode()==REPL_MODE); + // if(ARGC == 1 && match(TK("for"))){ + // _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); + // consume(TK("]")); + // return; + // } + // } while (match(TK(","))); + // match_newlines(mode()==REPL_MODE); + // consume(TK("]")); + // emit(OP_BUILD_LIST, ARGC); } void exprMap() { - int _patch = emit(OP_NO_OP); - int _body_start = co()->codes.size(); bool parsing_dict = false; int ARGC = 0; do { @@ -421,78 +561,113 @@ private: if(parsing_dict){ consume(TK(":")); EXPR(); + Expression_ value = ctx()->s_expr.popx(); + ctx()->s_expr.push( + std::make_unique(ctx()->s_expr.popx(), std::move(value)) + ); } ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))){ - if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); - else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); + if(parsing_dict) _consume_comp(); + else _consume_comp(); consume(TK("}")); return; } } while (match(TK(","))); consume(TK("}")); + if(ARGC == 0 || parsing_dict){ + auto e = std::make_unique(); + e->items.resize(ARGC); + for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + }else{ + auto e = std::make_unique(); + e->items.resize(ARGC); + for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } + // int _patch = emit(OP_NO_OP); + // int _body_start = co()->codes.size(); + // bool parsing_dict = false; + // int ARGC = 0; + // do { + // match_newlines(mode()==REPL_MODE); + // if (curr().type == TK("}")) break; + // EXPR(); + // if(curr().type == TK(":")) parsing_dict = true; + // if(parsing_dict){ + // consume(TK(":")); + // EXPR(); + // } + // ARGC++; + // match_newlines(mode()==REPL_MODE); + // if(ARGC == 1 && match(TK("for"))){ + // if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); + // else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); + // consume(TK("}")); + // return; + // } + // } while (match(TK(","))); + // consume(TK("}")); - if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); - else emit(OP_BUILD_SET, ARGC); + // if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); + // else emit(OP_BUILD_SET, ARGC); } void exprCall() { - int ARGC = 0; - int KWARGC = 0; - bool need_unpack = false; + auto e = std::make_unique(); do { match_newlines(mode()==REPL_MODE); - if (curr().type == TK(")")) break; - if(curr().type == TK("@id") && next().type == TK("=")) { + if (curr().type==TK(")")) break; + if(curr().type==TK("@id") && next().type==TK("=")) { consume(TK("@id")); - const Str& key = prev().str(); - emit(OP_LOAD_CONST, co()->add_const(VAR(key))); + Str key = prev().str(); + // emit(OP_LOAD_CONST, co()->add_const(VAR(key))); consume(TK("=")); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - KWARGC++; + EXPR(); + e->kwargs.push_back({key, ctx()->s_expr.popx()}); } else{ - if(KWARGC > 0) SyntaxError("positional argument follows keyword argument"); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; - ARGC++; + if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument"); + EXPR(); + // if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; + e->args.push_back(ctx()->s_expr.popx()); } match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK(")")); - if(ARGC > 32767) SyntaxError("too many positional arguments"); - if(KWARGC > 32767) SyntaxError("too many keyword arguments"); - if(KWARGC > 0){ - emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); - }else{ - emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); - } + ctx()->s_expr.push(std::move(e)); + // if(ARGC > 32767) SyntaxError("too many positional arguments"); + // if(KWARGC > 32767) SyntaxError("too many keyword arguments"); + // if(KWARGC > 0){ + // emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); + // }else{ + // emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); + // } } - void exprName(){ _exprName(false); } - - void _exprName(bool force_lvalue) { - const Token& tkname = prev(); - int index = co()->add_name(tkname.str(), name_scope()); - bool fast_load = !force_lvalue && co()->_rvalue>0; - emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index); + void exprName(){ + ctx()->s_expr.push( + std::make_unique(prev().str(), name_scope()) + ); } void exprAttrib() { consume(TK("@id")); - const Str& name = prev().str(); - int index = co()->add_name(name, NAME_ATTR); - emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index); + ctx()->s_expr.push( + std::make_unique(ctx()->s_expr.popx(), prev().str()) + ); } // [:], [:b] // [a], [a:], [a:b] - void exprSubscript() { + void exprSubscr() { + Expression_ a = nullptr; + Expression_ b = nullptr; if(match(TK(":"))){ - emit(OP_LOAD_NONE); - if(match(TK("]"))){ - emit(OP_LOAD_NONE); - }else{ + if(match(TK("]"))){ // [:] + + }else{ // [:b] EXPR_TUPLE(); consume(TK("]")); } @@ -500,36 +675,31 @@ private: }else{ EXPR_TUPLE(); if(match(TK(":"))){ - if(match(TK("]"))){ + if(match(TK("]"))){ // [a:] emit(OP_LOAD_NONE); - }else{ + }else{ // [a:b] EXPR_TUPLE(); consume(TK("]")); } emit(OP_BUILD_SLICE); - }else{ + }else{ // [a] consume(TK("]")); } } - emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); + // emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); } void exprValue() { - TokenIndex op = prev().type; - switch (op) { - case TK("None"): emit(OP_LOAD_NONE); break; - case TK("True"): emit(OP_LOAD_TRUE); break; - case TK("False"): emit(OP_LOAD_FALSE); break; - case TK("..."): emit(OP_LOAD_ELLIPSIS); break; - default: UNREACHABLE(); - } + ctx()->s_expr.push( + std::make_unique(prev().type) + ); } int emit(Opcode opcode, int arg=-1, bool keepline=false) { int line = prev().line; co()->codes.push_back( - Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line} + Bytecode{(uint8_t)opcode, (uint16_t)ctx()->curr_block_i, arg, line} ); int i = co()->codes.size() - 1; if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; @@ -541,11 +711,10 @@ private: co()->codes[addr_index].arg = target; } - void compile_block_body(CompilerAction action=nullptr) { - if(action == nullptr) action = &Compiler::compile_stmt; + void compile_block_body() { consume(TK(":")); if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ - (this->*action)(); // inline block + compile_stmt(); // inline block return; } if(!match_newlines(mode()==REPL_MODE)){ @@ -554,7 +723,7 @@ private: consume(TK("@indent")); while (curr().type != TK("@dedent")) { match_newlines(); - (this->*action)(); + compile_stmt(); match_newlines(); } consume(TK("@dedent")); @@ -612,20 +781,17 @@ private: // a = 1 + 2 // ['a', '1', '2', '+', '='] // - void parse_expression(Precedence precedence) { + void parse_expression(Precedence precedence, bool allowslice=false) { advance(); - GrammarFn prefix = rules[prev().type].prefix; + PrattCallback prefix = rules[prev().type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); (this->*prefix)(); - bool meet_assign_token = false; + // rhs of = cannot be a AssignExpr or InplaceAssignExpr while (rules[curr().type].precedence >= precedence) { + TokenIndex op = curr().type; advance(); - TokenIndex op = prev().type; - if (op == TK("=")){ - if(meet_assign_token) SyntaxError(); - meet_assign_token = true; - } - GrammarFn infix = rules[op].infix; + if (op == TK(":") && !allowslice) SyntaxError(); + PrattCallback infix = rules[op].infix; if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); } @@ -633,9 +799,8 @@ private: void compile_if_stmt() { match_newlines(); - co()->_rvalue += 1; - EXPR_TUPLE(); // condition - co()->_rvalue -= 1; + EXPR(); // condition + emit_expr(); int ifpatch = emit(OP_POP_JUMP_IF_FALSE); compile_block_body(); @@ -655,44 +820,45 @@ private: } void compile_while_loop() { - co()->_enter_block(WHILE_LOOP); - co()->_rvalue += 1; - EXPR_TUPLE(); // condition - co()->_rvalue -= 1; + ctx()->enter_block(WHILE_LOOP); + EXPR(); // condition + emit_expr(); int patch = emit(OP_POP_JUMP_IF_FALSE); compile_block_body(); emit(OP_LOOP_CONTINUE, -1, true); patch_jump(patch); - co()->_exit_block(); + ctx()->exit_block(); } void EXPR_FOR_VARS(){ int size = 0; do { consume(TK("@id")); - _exprName(true); size++; + int index = co()->add_name(prev().str(), name_scope()); + emit(OP_LOAD_NAME_REF, index); + size++; } while (match(TK(","))); if(size > 1) emit(OP_BUILD_TUPLE_REF, size); } void compile_for_loop() { EXPR_FOR_VARS();consume(TK("in")); - co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1; + EXPR_TUPLE(); emit_expr(); emit(OP_GET_ITER); - co()->_enter_block(FOR_LOOP); + ctx()->enter_block(FOR_LOOP); emit(OP_FOR_ITER); compile_block_body(); emit(OP_LOOP_CONTINUE, -1, true); - co()->_exit_block(); + ctx()->exit_block(); } void compile_try_except() { - co()->_enter_block(TRY_EXCEPT); + ctx()->enter_block(TRY_EXCEPT); emit(OP_TRY_BLOCK_ENTER); compile_block_body(); emit(OP_TRY_BLOCK_EXIT); std::vector patches = { emit(OP_JUMP_ABSOLUTE) }; - co()->_exit_block(); + ctx()->exit_block(); do { consume(TK("except")); @@ -714,29 +880,25 @@ private: void compile_stmt() { if (match(TK("break"))) { - if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop"); + if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); consume_end_stmt(); emit(OP_LOOP_BREAK); } else if (match(TK("continue"))) { - if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); + if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); consume_end_stmt(); emit(OP_LOOP_CONTINUE); } else if (match(TK("yield"))) { - if (codes.size() == 1) SyntaxError("'yield' outside function"); - co()->_rvalue += 1; - EXPR_TUPLE(); - co()->_rvalue -= 1; + if (contexts.size() <= 1) SyntaxError("'yield' outside function"); + EXPR_TUPLE(); emit_expr(); consume_end_stmt(); co()->is_generator = true; emit(OP_YIELD_VALUE, -1, true); } else if (match(TK("return"))) { - if (codes.size() == 1) SyntaxError("'return' outside function"); + if (contexts.size() <= 1) SyntaxError("'return' outside function"); if(match_end_stmt()){ emit(OP_LOAD_NONE); }else{ - co()->_rvalue += 1; - EXPR_TUPLE(); // return value - co()->_rvalue -= 1; + EXPR_TUPLE(); emit_expr(); consume_end_stmt(); } emit(OP_RETURN_VALUE, -1, true); @@ -763,11 +925,11 @@ private: } else if (match(TK("try"))) { compile_try_except(); } else if(match(TK("assert"))) { - co()->_rvalue += 1; - EXPR(); - if (match(TK(","))) EXPR(); - else emit(OP_LOAD_CONST, co()->add_const(VAR(""))); - co()->_rvalue -= 1; + EXPR_TUPLE(); emit_expr(); + // OP_CODE needs to change + + // if (match(TK(","))) EXPR(); + // else emit(OP_LOAD_CONST, co()->add_const(VAR(""))); emit(OP_ASSERT); consume_end_stmt(); } else if(match(TK("with"))){ @@ -845,9 +1007,9 @@ private: if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); else emit(OP_LOAD_NAME, super_cls_name_idx); emit(OP_BEGIN_CLASS, cls_name_idx); - co()->_is_compiling_class = true; + ctx()->is_compiling_class = true; compile_block_body(); - co()->_is_compiling_class = false; + ctx()->is_compiling_class = false; emit(OP_END_CLASS); } @@ -897,7 +1059,7 @@ private: StrName obj_name; consume(TK("@id")); func.name = prev().str(); - if(!co()->_is_compiling_class && match(TK("::"))){ + if(!ctx()->is_compiling_class && match(TK("::"))){ consume(TK("@id")); obj_name = func.name; func.name = prev().str(); @@ -910,14 +1072,12 @@ private: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = make_sp(lexer->src, func.name.str()); - this->codes.push(func.code); + func.code = push_context(lexer->src, func.name.str()); compile_block_body(); - func.code->optimize(vm); - this->codes.pop(); + pop_context(); emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); - if(!co()->_is_compiling_class){ + if(!ctx()->is_compiling_class){ if(obj_name.empty()){ if(has_decorator) emit(OP_CALL, 1); emit(OP_STORE_NAME, co()->add_name(func.name, name_scope())); @@ -952,28 +1112,27 @@ private: void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); } void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); } + void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); } public: CodeObject_ compile(){ - // can only be called once if(used) UNREACHABLE(); used = true; tokens = lexer->run(); - // if(lexer->src->filename == "tests/01_int.py"){ + // if(lexer->src->filename == ""){ // for(auto& t: tokens) std::cout << t.info() << std::endl; // } - CodeObject_ code = make_sp(lexer->src, lexer->src->filename); - codes.push(code); + CodeObject_ code = push_context(lexer->src, lexer->src->filename); advance(); // skip @sof, so prev() is always valid - match_newlines(); // skip leading '\n' + match_newlines(); // skip possible leading '\n' if(mode()==EVAL_MODE) { EXPR_TUPLE(); consume(TK("@eof")); - code->optimize(vm); + pop_context(); return code; }else if(mode()==JSON_MODE){ PyObject* value = read_literal(); @@ -982,7 +1141,8 @@ public: else if(match(TK("["))) exprList(); else SyntaxError("expect a JSON object or array"); consume(TK("@eof")); - return code; // no need to optimize for JSON decoding + pop_context(); + return code; } while (!match(TK("@eof"))) { @@ -993,7 +1153,7 @@ public: } match_newlines(); } - code->optimize(vm); + pop_context(); return code; } }; diff --git a/src/error.h b/src/error.h index d0732b50..34d4874d 100644 --- a/src/error.h +++ b/src/error.h @@ -1,6 +1,7 @@ #pragma once #include "namedict.h" +#include "str.h" #include "tuplelist.h" namespace pkpy{ @@ -22,7 +23,7 @@ enum CompileMode { }; struct SourceData { - const char* source; + std::string source; Str filename; std::vector line_starts; CompileMode mode; @@ -38,11 +39,17 @@ struct SourceData { } SourceData(const char* source, Str filename, CompileMode mode) { - source = strdup(source); // Skip utf8 BOM if there is any. if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; + // Remove all '\r' + StrStream ss; + while(*source != '\0'){ + if(*source != '\r') ss << *source; + source++; + } + this->filename = filename; - this->source = source; + this->source = ss.str(); line_starts.push_back(source); this->mode = mode; } @@ -65,8 +72,6 @@ struct SourceData { } return ss.str(); } - - ~SourceData() { free((void*)source); } }; class Exception { diff --git a/src/expr.h b/src/expr.h index 5e21f629..0ae035bf 100644 --- a/src/expr.h +++ b/src/expr.h @@ -2,107 +2,200 @@ #include "codeobject.h" #include "common.h" -#include "parser.h" +#include "lexer.h" #include "error.h" #include "ceval.h" -#include namespace pkpy{ -struct Expression; -typedef std::unique_ptr Expression_; - struct Expression{ - std::vector children; virtual Str to_string() const = 0; }; struct NameExpr: Expression{ Str name; NameScope scope; - NameExpr(Str name, NameScope scope): name(name), scope(scope) {} + NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {} + NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {} Str to_string() const override { return name; } }; -struct GroupExpr: Expression{ - Expression_ expr; - GroupExpr(Expression_ expr): expr(std::move(expr)) {} - Str to_string() const override { return "()"; } -}; - struct UnaryExpr: Expression{ TokenIndex op; - UnaryExpr(TokenIndex op): op(op) {} + Expression_ child; + UnaryExpr(TokenIndex op, Expression_&& child): op(op), child(std::move(child)) {} Str to_string() const override { return TK_STR(op); } }; struct NotExpr: Expression{ + Expression_ child; + NotExpr(Expression_&& child): child(std::move(child)) {} Str to_string() const override { return "not"; } }; struct AndExpr: Expression{ + Expression_ lhs; + Expression_ rhs; + AndExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} Str to_string() const override { return "and"; } }; struct OrExpr: Expression{ + Expression_ lhs; + Expression_ rhs; + OrExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} Str to_string() const override { return "or"; } }; -// None, True, False, ... -struct SpecialValueExpr: Expression{ +// [None, True, False, ...] +struct SpecialLiteralExpr: Expression{ TokenIndex token; - SpecialValueExpr(TokenIndex token): token(token) {} + SpecialLiteralExpr(TokenIndex token): token(token) {} Str to_string() const override { return TK_STR(token); } + + void gen(){ + // switch (token) { + // case TK("None"): emit(OP_LOAD_NONE); break; + // case TK("True"): emit(OP_LOAD_TRUE); break; + // case TK("False"): emit(OP_LOAD_FALSE); break; + // case TK("..."): emit(OP_LOAD_ELLIPSIS); break; + // default: UNREACHABLE(); + // } + } }; // @num, @str which needs to invoke OP_LOAD_CONST struct LiteralExpr: Expression{ - PyObject* value; - LiteralExpr(PyObject* value): value(value) {} + TokenValue value; + LiteralExpr(TokenValue value): value(value) {} Str to_string() const override { return "literal"; } }; +struct SliceExpr: Expression{ + Expression_ start; + Expression_ stop; + Expression_ step; + SliceExpr(Expression_&& start, Expression_&& stop, Expression_&& step): + start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {} + Str to_string() const override { return "slice"; } +}; + struct ListExpr: Expression{ + std::vector items; Str to_string() const override { return "[]"; } }; struct DictExpr: Expression{ + std::vector items; // each item is a DictItemExpr Str to_string() const override { return "{}"; } }; +struct SetExpr: Expression{ + std::vector items; + Str to_string() const override { return "{}"; } +}; + + +struct TupleExpr: Expression{ + std::vector items; + TupleExpr(std::vector&& items): items(std::move(items)) {} + Str to_string() const override { return "(a, b, c)"; } +}; + +struct CompExpr: Expression{ + Expression_ expr; // loop expr + Expression_ vars; // loop vars + Expression_ iter; // loop iter + Expression_ cond; // optional if condition + virtual void emit_expr() = 0; +}; + +// a:b +struct DictItemExpr: Expression{ + Expression_ key; + Expression_ value; + DictItemExpr(Expression_&& key, Expression_&& value) + : key(std::move(key)), value(std::move(value)) {} + Str to_string() const override { return "dict item"; } +}; + +struct ListCompExpr: CompExpr{ +}; + +struct DictCompExpr: CompExpr{ +}; + +struct SetCompExpr: CompExpr{ +}; + struct LambdaExpr: Expression{ + Function func; + NameScope scope; + LambdaExpr(Function&& func, NameScope scope): func(std::move(func)), scope(scope) {} Str to_string() const override { return "lambda"; } }; struct FStringExpr: Expression{ + Str src; + FStringExpr(const Str& src): src(src) {} Str to_string() const override { return "@fstr"; } }; +struct SubscrExpr: Expression{ + Expression_ a; + Expression_ b; + SubscrExpr(Expression_&& a, Expression_&& b): a(std::move(a)), b(std::move(b)) {} + Str to_string() const override { return "a[b]"; } +}; + struct AttribExpr: Expression{ + Expression_ a; + Str b; + AttribExpr(Expression_ a, const Str& b): a(std::move(a)), b(b) {} + AttribExpr(Expression_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} Str to_string() const override { return "."; } }; +struct AssignExpr: Expression{ + Expression_ lhs; + Expression_ rhs; + AssignExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} + Str to_string() const override { return "="; } +}; + +struct InplaceAssignExpr: Expression{ + TokenIndex op; + Expression_ lhs; + Expression_ rhs; + InplaceAssignExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) + : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} + Str to_string() const override { return TK_STR(op); } +}; + + struct CallExpr: Expression{ + std::vector args; + std::vector> kwargs; Str to_string() const override { return "()"; } }; struct BinaryExpr: Expression{ TokenIndex op; - BinaryExpr(TokenIndex op): op(op) {} + Expression_ lhs; + Expression_ rhs; + BinaryExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) + : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} Str to_string() const override { return TK_STR(op); } }; struct TernaryExpr: Expression{ + Expression_ cond; + Expression_ true_expr; + Expression_ false_expr; + TernaryExpr(Expression_&& cond, Expression_&& true_expr, Expression_&& false_expr) + : cond(std::move(cond)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {} Str to_string() const override { return "?"; } }; -struct AssignExpr: Expression{ - Str to_string() const override { return "="; } -}; - -struct CommaExpr: Expression{ - Str to_string() const override { return ","; } -}; - } // namespace pkpy \ No newline at end of file diff --git a/src/lexer.h b/src/lexer.h index 3e20071b..8412aa4c 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -68,11 +68,12 @@ struct Token{ } }; -// https://docs.python.org/3/reference/expressions.html +// https://docs.python.org/3/reference/expressions.html#operator-precedence enum Precedence { PREC_NONE, PREC_ASSIGNMENT, // = PREC_COMMA, // , + PREC_SLICE, // : (only available inside a subscript expression) PREC_TERNARY, // ?: PREC_LOGICAL_OR, // or PREC_LOGICAL_AND, // and @@ -135,7 +136,7 @@ struct Lexer { if(brackets_level > 0) return true; int spaces = eat_spaces(); if(peekchar() == '#') skip_line_comment(); - if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; + if(peekchar() == '\0' || peekchar() == '\n') return true; // https://docs.python.org/3/reference/lexical_analysis.html#indentation if(spaces > indents.top()){ indents.push(spaces); @@ -428,7 +429,6 @@ struct Lexer { add_token_2('=', TK("/"), TK("/=")); } return true; - case '\r': break; // just ignore '\r' case ' ': case '\t': eat_spaces(); break; case '\n': { add_token(TK("@eol")); @@ -493,8 +493,8 @@ struct Lexer { Lexer(shared_ptr src) { this->src = src; - this->token_start = src->source; - this->curr_char = src->source; + this->token_start = src->source.c_str(); + this->curr_char = src->source.c_str(); this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line}); this->indents.push(0); } diff --git a/src/main.cpp b/src/main.cpp index 3c96fd54..b3156c41 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,7 +21,6 @@ std::string getline(bool* eof=nullptr) { std::string output; output.resize(length); WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL); - if(!output.empty() && output.back() == '\r') output.pop_back(); return output; } diff --git a/src/vm.h b/src/vm.h index da1c1d05..383a00cf 100644 --- a/src/vm.h +++ b/src/vm.h @@ -357,30 +357,30 @@ inline void CodeObject::optimize(VM* vm){ perfect_locals_capacity = find_next_capacity(base_n); perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys); - for(int i=1; inum_negated(consts[pos]); - } + // for(int i=1; inum_negated(consts[pos]); + // } - if(i>=2 && codes[i].op == OP_BUILD_INDEX){ - const Bytecode& a = codes[i-1]; - const Bytecode& x = codes[i-2]; - if(codes[i].arg == 1){ - if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){ - codes[i].op = OP_FAST_INDEX; - }else continue; - }else{ - if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){ - codes[i].op = OP_FAST_INDEX_REF; - }else continue; - } - codes[i].arg = (a.arg << 16) | x.arg; - codes[i-1].op = OP_NO_OP; - codes[i-2].op = OP_NO_OP; - } - } + // if(i>=2 && codes[i].op == OP_BUILD_INDEX){ + // const Bytecode& a = codes[i-1]; + // const Bytecode& x = codes[i-2]; + // if(codes[i].arg == 1){ + // if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){ + // codes[i].op = OP_FAST_INDEX; + // }else continue; + // }else{ + // if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){ + // codes[i].op = OP_FAST_INDEX_REF; + // }else continue; + // } + // codes[i].arg = (a.arg << 16) | x.arg; + // codes[i-1].op = OP_NO_OP; + // codes[i-2].op = OP_NO_OP; + // } + // } // pre-compute sn in co_consts for(int i=0; i