diff --git a/src/codeobject.h b/src/codeobject.h index b742dba6..d8552f82 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -39,6 +39,9 @@ enum CodeBlockType { TRY_EXCEPT, }; +#define BC_NOARG -1 +#define BC_KEEPLINE -1 + struct CodeBlock { CodeBlockType type; int parent; // parent index in blocks @@ -68,27 +71,6 @@ struct CodeObject { void optimize(VM* vm); - bool add_label(StrName label){ - if(labels.count(label)) return false; - labels[label] = codes.size(); - return true; - } - - int add_name(StrName name, NameScope scope){ - if(scope == NAME_LOCAL && global_names.count(name)) scope = NAME_GLOBAL; - auto p = std::make_pair(name, scope); - for(int i=0; i& data() const { return vec; } }; -struct Expression; -typedef std::unique_ptr Expression_; +struct Expr; +typedef std::unique_ptr Expr_; } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index bc58348f..57d204a8 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -20,41 +20,6 @@ struct PrattRule{ Precedence precedence; }; -struct CodeEmitContext{ - CodeObject_ co; - stack s_expr; - - CodeEmitContext(CodeObject_ co): co(co) {} - - int curr_block_i = 0; - bool is_compiling_class = false; - - bool is_curr_block_loop() const { - return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; - } - - void enter_block(CodeBlockType type){ - co->blocks.push_back(CodeBlock{ - type, curr_block_i, (int)co->codes.size() - }); - curr_block_i = co->blocks.size()-1; - } - - void exit_block(){ - co->blocks[curr_block_i].end = co->codes.size(); - curr_block_i = co->blocks[curr_block_i].parent; - if(curr_block_i < 0) UNREACHABLE(); - } - - // clear the expression stack and generate bytecode - void emit_expr(){ - if(s_expr.size() != 1) UNREACHABLE(); - Expression_ expr = s_expr.popx(); - // emit - // ... - } -}; - class Compiler { std::unique_ptr lexer; stack contexts; @@ -70,7 +35,7 @@ class Compiler { template CodeObject_ push_context(Args&&... args){ CodeObject_ co = make_sp(std::forward(args)...); - contexts.push(CodeEmitContext(co)); + contexts.push(CodeEmitContext(vm, co)); return co; } @@ -117,30 +82,29 @@ public: rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK("True")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("False")] = { METHOD(exprValue), NO_INFIX }; + rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; - rules[TK("None")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("...")] = { METHOD(exprValue), NO_INFIX }; rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK(">>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA }; - rules[TK(":")] = { nullptr, METHOD(exprSlice), PREC_SLICE }; + rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT }; + rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; @@ -150,7 +114,7 @@ public: #undef NO_INFIX #define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression -#define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed +#define EXPR_TUPLE() parse_expression(PREC_TUPLE) // no '=', but ',' is allowed #define EXPR_ANY() parse_expression(PREC_ASSIGNMENT) } @@ -201,96 +165,55 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } - PyObject* get_value(const Token& token) { - switch (token.type) { - case TK("@num"): - if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); - if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); - UNREACHABLE(); - case TK("@str"): case TK("@fstr"): - return VAR(std::get(token.value)); - default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type)); - } - } - void exprLiteral(){ ctx()->s_expr.push( - std::make_unique(prev().value) + expr_prev_line(prev().value) ); - // PyObject* value = get_value(prev()); - // int index = co()->add_const(value); - // emit(OP_LOAD_CONST, index); } void exprFString(){ ctx()->s_expr.push( - std::make_unique(std::get(prev().value)) + expr_prev_line(std::get(prev().value)) ); - // static const std::regex pattern(R"(\{(.*?)\})"); - // PyObject* value = get_value(prev()); - // Str s = CAST(Str, value); - // std::sregex_iterator begin(s.begin(), s.end(), pattern); - // std::sregex_iterator end; - // int size = 0; - // int i = 0; - // for(auto it = begin; it != end; it++) { - // std::smatch m = *it; - // if (i < m.position()) { - // std::string literal = s.substr(i, m.position() - i); - // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - // size++; - // } - // emit(OP_LOAD_EVAL_FN); - // emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); - // emit(OP_CALL, 1); - // size++; - // i = (int)(m.position() + m.length()); - // } - // if (i < s.size()) { - // std::string literal = s.substr(i, s.size() - i); - // emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - // size++; - // } - // emit(OP_BUILD_STRING, size); } - void emit_expr(){} + template + std::unique_ptr expr_prev_line(Args&&... args) { + std::unique_ptr expr = std::make_unique(std::forward(args)...); + expr->line = prev().line; + return expr; + } void exprLambda(){ - Function func; - func.name = ""; + auto e = expr_prev_line(); + e->func.name = ""; + e->scope = name_scope(); if(!match(TK(":"))){ - _compile_f_args(func, false); + _compile_f_args(e->func, false); consume(TK(":")); } - func.code = push_context(lexer->src, func.name.str()); + e->func.code = push_context(lexer->src, ""); EXPR(); - emit_expr(); - emit(OP_RETURN_VALUE); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); pop_context(); + ctx()->s_expr.push(std::move(e)); + } - ctx()->s_expr.push( - std::make_unique(std::move(func), name_scope()) - ); - - // emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - // if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); + void exprInplaceAssign(){ + auto e = expr_prev_line(); + e->op = prev().type; + e->lhs = ctx()->s_expr.popx(); + EXPR_TUPLE(); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprAssign(){ - Expression_ lhs = ctx()->s_expr.popx(); - TokenIndex op = prev().type; + auto e = expr_prev_line(); + e->lhs = ctx()->s_expr.popx(); EXPR_TUPLE(); - if(op == TK("=")){ - ctx()->s_expr.push( - std::make_unique(std::move(lhs), ctx()->s_expr.popx()) - ); - }else{ - // += -= ... - ctx()->s_expr.push( - std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) - ); - } + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); // if(co()->codes.empty()) UNREACHABLE(); // bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF; @@ -341,123 +264,73 @@ private: // co()->_rvalue -= 1; } - void exprSlice(){ - } - - void exprComma(){ - int size = 1; // an expr is in the stack now + void exprTuple(){ + auto e = expr_prev_line(); do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok - size++; + e->items.push_back(ctx()->s_expr.popx()); } while(match(TK(","))); - std::vector items(size); - for(int i=size-1; i>=0; i--) items[i] = ctx()->s_expr.popx(); - ctx()->s_expr.push( - std::make_unique(std::move(items)) - ); - // emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); + ctx()->s_expr.push(std::move(e)); } void exprOr(){ - Expression_ lhs = ctx()->s_expr.popx(); - parse_expression(PREC_LOGICAL_OR); - ctx()->s_expr.push( - std::make_unique(std::move(lhs), ctx()->s_expr.popx()) - ); - - // int patch = emit(OP_JUMP_IF_TRUE_OR_POP); - // parse_expression(PREC_LOGICAL_OR); - // patch_jump(patch); + auto e = expr_prev_line(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_OR + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprAnd(){ - Expression_ lhs = ctx()->s_expr.popx(); - parse_expression(PREC_LOGICAL_AND); - ctx()->s_expr.push( - std::make_unique(std::move(lhs), ctx()->s_expr.popx()) - ); - // int patch = emit(OP_JUMP_IF_FALSE_OR_POP); - // parse_expression(PREC_LOGICAL_AND); - // patch_jump(patch); + auto e = expr_prev_line(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_AND + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprTernary(){ - Expression_ cond = ctx()->s_expr.popx(); + auto e = expr_prev_line(); + e->cond = ctx()->s_expr.popx(); EXPR(); // if true - Expression_ true_expr = ctx()->s_expr.popx(); + e->true_expr = ctx()->s_expr.popx(); consume(TK(":")); EXPR(); // if false - Expression_ false_expr = ctx()->s_expr.popx(); - ctx()->s_expr.push( - std::make_unique(std::move(cond), std::move(true_expr), std::move(false_expr)) - ); - // int patch = emit(OP_POP_JUMP_IF_FALSE); - // EXPR(); // if true - // int patch2 = emit(OP_JUMP_ABSOLUTE); - // consume(TK(":")); - // patch_jump(patch); - // EXPR(); // if false - // patch_jump(patch2); + e->false_expr = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprBinaryOp(){ - TokenIndex op = prev().type; - Expression_ lhs = ctx()->s_expr.popx(); - parse_expression((Precedence)(rules[op].precedence + 1)); - ctx()->s_expr.push( - std::make_unique(op, std::move(lhs), ctx()->s_expr.popx()) - ); - // switch (op) { - // case TK("+"): emit(OP_BINARY_OP, 0); break; - // case TK("-"): emit(OP_BINARY_OP, 1); break; - // case TK("*"): emit(OP_BINARY_OP, 2); break; - // case TK("/"): emit(OP_BINARY_OP, 3); break; - // case TK("//"): emit(OP_BINARY_OP, 4); break; - // case TK("%"): emit(OP_BINARY_OP, 5); break; - // case TK("**"): emit(OP_BINARY_OP, 6); break; - - // case TK("<"): emit(OP_COMPARE_OP, 0); break; - // case TK("<="): emit(OP_COMPARE_OP, 1); break; - // case TK("=="): emit(OP_COMPARE_OP, 2); break; - // case TK("!="): emit(OP_COMPARE_OP, 3); break; - // case TK(">"): emit(OP_COMPARE_OP, 4); break; - // case TK(">="): emit(OP_COMPARE_OP, 5); break; - // case TK("in"): emit(OP_CONTAINS_OP, 0); break; - // case TK("not in"): emit(OP_CONTAINS_OP, 1); break; - // case TK("is"): emit(OP_IS_OP, 0); break; - // case TK("is not"): emit(OP_IS_OP, 1); break; - - // case TK("<<"): emit(OP_BITWISE_OP, 0); break; - // case TK(">>"): emit(OP_BITWISE_OP, 1); break; - // case TK("&"): emit(OP_BITWISE_OP, 2); break; - // case TK("|"): emit(OP_BITWISE_OP, 3); break; - // case TK("^"): emit(OP_BITWISE_OP, 4); break; - // default: UNREACHABLE(); - // } + auto e = expr_prev_line(); + e->op = prev().type; + e->lhs = ctx()->s_expr.popx(); + parse_expression(rules[e->op].precedence + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } void exprNot() { - parse_expression((Precedence)(PREC_LOGICAL_NOT + 1)); + parse_expression(PREC_LOGICAL_NOT + 1); ctx()->s_expr.push( - std::make_unique(ctx()->s_expr.popx()) + expr_prev_line(ctx()->s_expr.popx()) ); - // emit(OP_UNARY_NOT); } void exprUnaryOp(){ TokenIndex type = prev().type; - parse_expression((Precedence)(PREC_UNARY + 1)); - ctx()->s_expr.push( - std::make_unique(type, ctx()->s_expr.popx()) - ); - // switch (type) { - // case TK("-"): emit(OP_UNARY_NEGATIVE); break; - // case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; - // default: UNREACHABLE(); - // } + parse_expression(PREC_UNARY + 1); + Expr_ e; + switch(type){ + case TK("-"): + e = expr_prev_line(ctx()->s_expr.popx()); + case TK("*"): + e = expr_prev_line(ctx()->s_expr.popx()); + default: UNREACHABLE(); + } + ctx()->s_expr.push(std::move(e)); } - // () is just for change precedence, so we don't need to push it into stack + // () is just for change precedence void exprGroup(){ match_newlines(mode()==REPL_MODE); EXPR_TUPLE(); @@ -507,52 +380,37 @@ private: // } template - void _consume_comp(){ - + void _consume_comp(Expr_ expr){ + static_assert(std::is_base_of::value); + std::unique_ptr ce = std::make_unique(); + ce->expr = std::move(expr); + // ... + ctx()->s_expr.push(std::move(ce)); } void exprList() { - int ARGC = 0; + auto e = expr_prev_line(); do { match_newlines(mode()==REPL_MODE); if (curr().type == TK("]")) break; - EXPR(); ARGC++; + EXPR(); + e->items.push_back(ctx()->s_expr.popx()); match_newlines(mode()==REPL_MODE); - if(ARGC == 1 && match(TK("for"))){ - _consume_comp(); + if(e->items.size()==1 && match(TK("for"))){ + _consume_comp(std::move(e->items[0])); consume(TK("]")); return; } } while (match(TK(","))); match_newlines(mode()==REPL_MODE); consume(TK("]")); - auto list_expr = std::make_unique(); - list_expr->items.resize(ARGC); - for(int i=ARGC-1; i>=0; i--) list_expr->items[i] = ctx()->s_expr.popx(); - ctx()->s_expr.push(std::move(list_expr)); - - // int _patch = emit(OP_NO_OP); - // int _body_start = co()->codes.size(); - // int ARGC = 0; - // do { - // match_newlines(mode()==REPL_MODE); - // if (curr().type == TK("]")) break; - // EXPR(); ARGC++; - // match_newlines(mode()==REPL_MODE); - // if(ARGC == 1 && match(TK("for"))){ - // _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); - // consume(TK("]")); - // return; - // } - // } while (match(TK(","))); - // match_newlines(mode()==REPL_MODE); - // consume(TK("]")); - // emit(OP_BUILD_LIST, ARGC); + ctx()->s_expr.push(std::move(e)); } + // {...} may be dict or set void exprMap() { bool parsing_dict = false; - int ARGC = 0; + std::vector items; do { match_newlines(mode()==REPL_MODE); if (curr().type == TK("}")) break; @@ -561,62 +419,33 @@ private: if(parsing_dict){ consume(TK(":")); EXPR(); - Expression_ value = ctx()->s_expr.popx(); - ctx()->s_expr.push( - std::make_unique(ctx()->s_expr.popx(), std::move(value)) - ); + auto dict_item = expr_prev_line(); + dict_item->key = ctx()->s_expr.popx(); + dict_item->value = ctx()->s_expr.popx(); + items.push_back(std::move(dict_item)); + }else{ + items.push_back(ctx()->s_expr.popx()); } - ARGC++; match_newlines(mode()==REPL_MODE); - if(ARGC == 1 && match(TK("for"))){ - if(parsing_dict) _consume_comp(); - else _consume_comp(); + if(items.size()==1 && match(TK("for"))){ + if(parsing_dict) _consume_comp(std::move(items[0])); + else _consume_comp(std::move(items[0])); consume(TK("}")); return; } } while (match(TK(","))); consume(TK("}")); - if(ARGC == 0 || parsing_dict){ - auto e = std::make_unique(); - e->items.resize(ARGC); - for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + if(items.size()==0 || parsing_dict){ + auto e = expr_prev_line(std::move(items)); ctx()->s_expr.push(std::move(e)); }else{ - auto e = std::make_unique(); - e->items.resize(ARGC); - for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx(); + auto e = expr_prev_line(std::move(items)); ctx()->s_expr.push(std::move(e)); } - // int _patch = emit(OP_NO_OP); - // int _body_start = co()->codes.size(); - // bool parsing_dict = false; - // int ARGC = 0; - // do { - // match_newlines(mode()==REPL_MODE); - // if (curr().type == TK("}")) break; - // EXPR(); - // if(curr().type == TK(":")) parsing_dict = true; - // if(parsing_dict){ - // consume(TK(":")); - // EXPR(); - // } - // ARGC++; - // match_newlines(mode()==REPL_MODE); - // if(ARGC == 1 && match(TK("for"))){ - // if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); - // else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); - // consume(TK("}")); - // return; - // } - // } while (match(TK(","))); - // consume(TK("}")); - - // if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); - // else emit(OP_BUILD_SET, ARGC); } void exprCall() { - auto e = std::make_unique(); + auto e = _expr(); do { match_newlines(mode()==REPL_MODE); if (curr().type==TK(")")) break; @@ -648,69 +477,49 @@ private: void exprName(){ ctx()->s_expr.push( - std::make_unique(prev().str(), name_scope()) + expr_prev_line(prev().str(), name_scope()) ); } void exprAttrib() { consume(TK("@id")); ctx()->s_expr.push( - std::make_unique(ctx()->s_expr.popx(), prev().str()) + expr_prev_line(ctx()->s_expr.popx(), prev().str()) ); } - // [:], [:b] - // [a], [a:], [a:b] void exprSubscr() { - Expression_ a = nullptr; - Expression_ b = nullptr; - if(match(TK(":"))){ - if(match(TK("]"))){ // [:] - - }else{ // [:b] - EXPR_TUPLE(); - consume(TK("]")); - } - emit(OP_BUILD_SLICE); - }else{ + auto e = expr_prev_line(); + std::vector items; + do { EXPR_TUPLE(); - if(match(TK(":"))){ - if(match(TK("]"))){ // [a:] - emit(OP_LOAD_NONE); - }else{ // [a:b] - EXPR_TUPLE(); - consume(TK("]")); + items.push_back(ctx()->s_expr.popx()); + } while(match(TK(":"))); + consume(TK("]")); + switch(items.size()){ + case 1: + e->b = std::move(items[0]); + break; + case 2: case 3: { + auto slice = expr_prev_line(); + slice->start = std::move(items[0]); + slice->stop = std::move(items[1]); + if(items.size()==3){ + slice->step = std::move(items[2]); } - emit(OP_BUILD_SLICE); - }else{ // [a] - consume(TK("]")); - } + e->b = std::move(slice); + } break; + default: SyntaxError(); break; } - - // emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); + ctx()->s_expr.push(std::move(e)); } - void exprValue() { + void exprLiteral0() { ctx()->s_expr.push( - std::make_unique(prev().type) + expr_prev_line(prev().type) ); } - int emit(Opcode opcode, int arg=-1, bool keepline=false) { - int line = prev().line; - co()->codes.push_back( - Bytecode{(uint8_t)opcode, (uint16_t)ctx()->curr_block_i, arg, line} - ); - int i = co()->codes.size() - 1; - if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; - return i; - } - - inline void patch_jump(int addr_index) { - int target = co()->codes.size(); - co()->codes[addr_index].arg = target; - } - void compile_block_body() { consume(TK(":")); if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ @@ -778,10 +587,11 @@ private: consume_end_stmt(); } - // a = 1 + 2 - // ['a', '1', '2', '+', '='] - // - void parse_expression(Precedence precedence, bool allowslice=false) { + void parse_expression(int precedence){ + parse_expression((Precedence)precedence); + } + + void parse_expression(Precedence precedence) { advance(); PrattCallback prefix = rules[prev().type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); diff --git a/src/expr.h b/src/expr.h index 0ae035bf..4740b9f1 100644 --- a/src/expr.h +++ b/src/expr.h @@ -8,115 +8,262 @@ namespace pkpy{ -struct Expression{ - virtual Str to_string() const = 0; +struct CodeEmitContext; +struct Expr{ + int line = 0; + virtual Str str() const = 0; + ~Expr() = default; + virtual void emit(CodeEmitContext* ctx) = 0; }; -struct NameExpr: Expression{ +struct CodeEmitContext{ + CodeObject_ co; + VM* vm; + stack s_expr; + + CodeEmitContext(VM* vm, CodeObject_ co): co(co) {} + CodeEmitContext(const CodeEmitContext&) = delete; + CodeEmitContext& operator=(const CodeEmitContext&) = delete; + CodeEmitContext(CodeEmitContext&&) = delete; + CodeEmitContext& operator=(CodeEmitContext&&) = delete; + + int curr_block_i = 0; + bool is_compiling_class = false; + + bool is_curr_block_loop() const { + return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; + } + + void enter_block(CodeBlockType type){ + co->blocks.push_back(CodeBlock{ + type, curr_block_i, (int)co->codes.size() + }); + curr_block_i = co->blocks.size()-1; + } + + void exit_block(){ + co->blocks[curr_block_i].end = co->codes.size(); + curr_block_i = co->blocks[curr_block_i].parent; + if(curr_block_i < 0) UNREACHABLE(); + } + + // clear the expression stack and generate bytecode + void emit_expr(){ + if(s_expr.size() != 1) UNREACHABLE(); + Expr_ expr = s_expr.popx(); + // emit + // ... + } + + int emit(Opcode opcode, int arg, int line) { + co->codes.push_back( + Bytecode{(uint8_t)opcode, (uint16_t)curr_block_i, arg, line} + ); + int i = co->codes.size() - 1; + if(line==BC_KEEPLINE && i>=1) co->codes[i].line = co->codes[i-1].line; + return i; + } + + void patch_jump(int index) { + int target = co->codes.size(); + co->codes[index].arg = target; + } + + bool add_label(StrName label){ + if(co->labels.count(label)) return false; + co->labels[label] = co->codes.size(); + return true; + } + + int add_name(StrName name, NameScope scope){ + if(scope == NAME_LOCAL && co->global_names.count(name)) scope = NAME_GLOBAL; + auto p = std::make_pair(name, scope); + for(int i=0; inames.size(); i++){ + if(co->names[i] == p) return i; + } + co->names.push_back(p); + return co->names.size() - 1; + } + + int add_const(PyObject* v){ + co->consts.push_back(v); + return co->consts.size() - 1; + } +}; + +struct NameExpr: Expr{ Str name; NameScope scope; NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {} NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {} - Str to_string() const override { return name; } + + Str str() const override { return "$" + name; } + + void emit(CodeEmitContext* ctx) override { + int index = ctx->add_name(name, scope); + ctx->emit(OP_LOAD_NAME, index, line); + } }; -struct UnaryExpr: Expression{ - TokenIndex op; - Expression_ child; - UnaryExpr(TokenIndex op, Expression_&& child): op(op), child(std::move(child)) {} - Str to_string() const override { return TK_STR(op); } +struct StarredExpr: Expr{ + Expr_ child; + StarredExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "*"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_STAR, (int)false, line); + } }; -struct NotExpr: Expression{ - Expression_ child; - NotExpr(Expression_&& child): child(std::move(child)) {} - Str to_string() const override { return "not"; } +struct NegatedExpr: Expr{ + Expr_ child; + NegatedExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "-"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); + } }; -struct AndExpr: Expression{ - Expression_ lhs; - Expression_ rhs; - AndExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return "and"; } +struct NotExpr: Expr{ + Expr_ child; + NotExpr(Expr_&& child): child(std::move(child)) {} + Str str() const override { return "not"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_NOT, BC_NOARG, line); + } }; -struct OrExpr: Expression{ - Expression_ lhs; - Expression_ rhs; - OrExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return "or"; } +struct AndExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + Str str() const override { return "and"; } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line); + rhs->emit(ctx); + ctx->patch_jump(patch); + } +}; + +struct OrExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + Str str() const override { return "or"; } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line); + rhs->emit(ctx); + ctx->patch_jump(patch); + } }; // [None, True, False, ...] -struct SpecialLiteralExpr: Expression{ +struct Literal0Expr: Expr{ TokenIndex token; - SpecialLiteralExpr(TokenIndex token): token(token) {} - Str to_string() const override { return TK_STR(token); } + Literal0Expr(TokenIndex token): token(token) {} + Str str() const override { return TK_STR(token); } - void gen(){ - // switch (token) { - // case TK("None"): emit(OP_LOAD_NONE); break; - // case TK("True"): emit(OP_LOAD_TRUE); break; - // case TK("False"): emit(OP_LOAD_FALSE); break; - // case TK("..."): emit(OP_LOAD_ELLIPSIS); break; - // default: UNREACHABLE(); - // } + void emit(CodeEmitContext* ctx) override { + switch (token) { + case TK("None"): ctx->emit(OP_LOAD_NONE, BC_NOARG, line); break; + case TK("True"): ctx->emit(OP_LOAD_TRUE, BC_NOARG, line); break; + case TK("False"): ctx->emit(OP_LOAD_FALSE, BC_NOARG, line); break; + case TK("..."): ctx->emit(OP_LOAD_ELLIPSIS, BC_NOARG, line); break; + default: UNREACHABLE(); + } } }; // @num, @str which needs to invoke OP_LOAD_CONST -struct LiteralExpr: Expression{ +struct LiteralExpr: Expr{ TokenValue value; LiteralExpr(TokenValue value): value(value) {} - Str to_string() const override { return "literal"; } + Str str() const override { + if(std::holds_alternative(value)){ + return std::to_string(std::get(value)); + } + + if(std::holds_alternative(value)){ + return std::to_string(std::get(value)); + } + + if(std::holds_alternative(value)){ + return std::get(value).escape(true); + } + + UNREACHABLE(); + } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + PyObject* obj = nullptr; + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + + if(!obj) UNREACHABLE(); + int index = ctx->add_const(obj); + ctx->emit(OP_LOAD_CONST, index, line); + } }; -struct SliceExpr: Expression{ - Expression_ start; - Expression_ stop; - Expression_ step; - SliceExpr(Expression_&& start, Expression_&& stop, Expression_&& step): - start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {} - Str to_string() const override { return "slice"; } +struct SliceExpr: Expr{ + Expr_ start; + Expr_ stop; + Expr_ step; + Str str() const override { return "slice()"; } }; -struct ListExpr: Expression{ - std::vector items; - Str to_string() const override { return "[]"; } +struct ListExpr: Expr{ + std::vector items; + Str str() const override { return "[]"; } }; -struct DictExpr: Expression{ - std::vector items; // each item is a DictItemExpr - Str to_string() const override { return "{}"; } +struct DictExpr: Expr{ + std::vector items; // each item is a DictItemExpr + DictExpr(std::vector&& items): items(std::move(items)) {} + Str str() const override { return "{}"; } }; -struct SetExpr: Expression{ - std::vector items; - Str to_string() const override { return "{}"; } +struct SetExpr: Expr{ + std::vector items; + Set(std::vector&& items): items(std::move(items)) {} + Str str() const override { return "{}"; } }; - -struct TupleExpr: Expression{ - std::vector items; - TupleExpr(std::vector&& items): items(std::move(items)) {} - Str to_string() const override { return "(a, b, c)"; } +struct TupleExpr: Expr{ + std::vector items; + Str str() const override { return "tuple()"; } }; -struct CompExpr: Expression{ - Expression_ expr; // loop expr - Expression_ vars; // loop vars - Expression_ iter; // loop iter - Expression_ cond; // optional if condition +struct CompExpr: Expr{ + Expr_ expr; // loop expr + Expr_ vars; // loop vars + Expr_ iter; // loop iter + Expr_ cond; // optional if condition virtual void emit_expr() = 0; }; // a:b -struct DictItemExpr: Expression{ - Expression_ key; - Expression_ value; - DictItemExpr(Expression_&& key, Expression_&& value) - : key(std::move(key)), value(std::move(value)) {} - Str to_string() const override { return "dict item"; } +struct DictItemExpr: Expr{ + Expr_ key; + Expr_ value; + Str str() const override { return "k:v"; } }; struct ListCompExpr: CompExpr{ @@ -128,73 +275,144 @@ struct DictCompExpr: CompExpr{ struct SetCompExpr: CompExpr{ }; -struct LambdaExpr: Expression{ +struct LambdaExpr: Expr{ Function func; NameScope scope; - LambdaExpr(Function&& func, NameScope scope): func(std::move(func)), scope(scope) {} - Str to_string() const override { return "lambda"; } + Str str() const override { return ""; } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + ctx->emit(OP_LOAD_FUNCTION, ctx->add_const(VAR(func)), line); + if(scope == NAME_LOCAL) ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, line); + } }; -struct FStringExpr: Expression{ +struct FStringExpr: Expr{ Str src; FStringExpr(const Str& src): src(src) {} - Str to_string() const override { return "@fstr"; } + Str str() const override { + return "f" + src.escape(true); + } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + static const std::regex pattern(R"(\{(.*?)\})"); + std::sregex_iterator begin(src.begin(), src.end(), pattern); + std::sregex_iterator end; + int size = 0; + int i = 0; + for(auto it = begin; it != end; it++) { + std::smatch m = *it; + if (i < m.position()) { + std::string literal = src.substr(i, m.position() - i); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); + size++; + } + ctx->emit(OP_LOAD_EVAL_FN, BC_NOARG, line); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line); + ctx->emit(OP_CALL, 1, line); + size++; + i = (int)(m.position() + m.length()); + } + if (i < src.size()) { + std::string literal = src.substr(i, src.size() - i); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); + size++; + } + ctx->emit(OP_BUILD_STRING, size, line); + } }; -struct SubscrExpr: Expression{ - Expression_ a; - Expression_ b; - SubscrExpr(Expression_&& a, Expression_&& b): a(std::move(a)), b(std::move(b)) {} - Str to_string() const override { return "a[b]"; } +struct SubscrExpr: Expr{ + Expr_ a; + Expr_ b; + Str str() const override { return "a[b]"; } }; -struct AttribExpr: Expression{ - Expression_ a; +struct AttribExpr: Expr{ + Expr_ a; Str b; - AttribExpr(Expression_ a, const Str& b): a(std::move(a)), b(b) {} - AttribExpr(Expression_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} - Str to_string() const override { return "."; } + AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {} + AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} + Str str() const override { return "a.b"; } }; -struct AssignExpr: Expression{ - Expression_ lhs; - Expression_ rhs; - AssignExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return "="; } +struct AssignExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + Str str() const override { return "="; } }; -struct InplaceAssignExpr: Expression{ +struct InplaceAssignExpr: Expr{ TokenIndex op; - Expression_ lhs; - Expression_ rhs; - InplaceAssignExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) - : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return TK_STR(op); } + Expr_ lhs; + Expr_ rhs; + Str str() const override { return TK_STR(op); } }; - -struct CallExpr: Expression{ - std::vector args; - std::vector> kwargs; - Str to_string() const override { return "()"; } +struct CallExpr: Expr{ + std::vector args; + std::vector> kwargs; + Str str() const override { return "()"; } }; -struct BinaryExpr: Expression{ +struct BinaryExpr: Expr{ TokenIndex op; - Expression_ lhs; - Expression_ rhs; - BinaryExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs) - : op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {} - Str to_string() const override { return TK_STR(op); } + Expr_ lhs; + Expr_ rhs; + Str str() const override { return TK_STR(op); } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + rhs->emit(ctx); + switch (op) { + case TK("+"): ctx->emit(OP_BINARY_OP, 0, line); break; + case TK("-"): ctx->emit(OP_BINARY_OP, 1, line); break; + case TK("*"): ctx->emit(OP_BINARY_OP, 2, line); break; + case TK("/"): ctx->emit(OP_BINARY_OP, 3, line); break; + case TK("//"): ctx->emit(OP_BINARY_OP, 4, line); break; + case TK("%"): ctx->emit(OP_BINARY_OP, 5, line); break; + case TK("**"): ctx->emit(OP_BINARY_OP, 6, line); break; + + case TK("<"): ctx->emit(OP_COMPARE_OP, 0, line); break; + case TK("<="): ctx->emit(OP_COMPARE_OP, 1, line); break; + case TK("=="): ctx->emit(OP_COMPARE_OP, 2, line); break; + case TK("!="): ctx->emit(OP_COMPARE_OP, 3, line); break; + case TK(">"): ctx->emit(OP_COMPARE_OP, 4, line); break; + case TK(">="): ctx->emit(OP_COMPARE_OP, 5, line); break; + case TK("in"): ctx->emit(OP_CONTAINS_OP, 0, line); break; + case TK("not in"): ctx->emit(OP_CONTAINS_OP, 1, line); break; + case TK("is"): ctx->emit(OP_IS_OP, 0, line); break; + case TK("is not"): ctx->emit(OP_IS_OP, 1, line); break; + + case TK("<<"): ctx->emit(OP_BITWISE_OP, 0, line); break; + case TK(">>"): ctx->emit(OP_BITWISE_OP, 1, line); break; + case TK("&"): ctx->emit(OP_BITWISE_OP, 2, line); break; + case TK("|"): ctx->emit(OP_BITWISE_OP, 3, line); break; + case TK("^"): ctx->emit(OP_BITWISE_OP, 4, line); break; + default: UNREACHABLE(); + } + } }; -struct TernaryExpr: Expression{ - Expression_ cond; - Expression_ true_expr; - Expression_ false_expr; - TernaryExpr(Expression_&& cond, Expression_&& true_expr, Expression_&& false_expr) - : cond(std::move(cond)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {} - Str to_string() const override { return "?"; } +struct TernaryExpr: Expr{ + Expr_ cond; + Expr_ true_expr; + Expr_ false_expr; + + Str str() const override { + return "cond ? true_expr : false_expr"; + } + + void emit(CodeEmitContext* ctx) override { + cond->emit(ctx); + int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line); + true_expr->emit(ctx); + int patch_2 = ctx->emit(OP_JUMP_ABSOLUTE, BC_NOARG, true_expr->line); + ctx->patch_jump(patch); + false_expr->emit(ctx); + ctx->patch_jump(patch_2); + } }; diff --git a/src/lexer.h b/src/lexer.h index 8412aa4c..8b997ca5 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -72,8 +72,7 @@ struct Token{ enum Precedence { PREC_NONE, PREC_ASSIGNMENT, // = - PREC_COMMA, // , - PREC_SLICE, // : (only available inside a subscript expression) + PREC_TUPLE, // , PREC_TERNARY, // ?: PREC_LOGICAL_OR, // or PREC_LOGICAL_AND, // and