#pragma once #include "parser.h" #include "error.h" #include "ceval.h" namespace pkpy{ class Compiler; typedef void (Compiler::*GrammarFn)(); typedef void (Compiler::*CompilerAction)(); struct GrammarRule{ GrammarFn prefix; GrammarFn infix; Precedence precedence; }; enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; class Compiler { std::unique_ptr parser; std::stack codes; int lexing_count = 0; bool used = false; VM* vm; std::map rules; CodeObject_ co() const{ return codes.top(); } CompileMode mode() const{ return parser->src->mode; } NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ this->vm = vm; this->parser = std::make_unique( make_sp(source, filename, mode) ); // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #define METHOD(name) &Compiler::name #define NO_INFIX nullptr, PREC_NONE for(TokenIndex i=0; i")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY }; rules[TK("True")] = { METHOD(exprValue), NO_INFIX }; rules[TK("False")] = { METHOD(exprValue), NO_INFIX }; rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; rules[TK("None")] = { METHOD(exprValue), NO_INFIX }; rules[TK("...")] = { METHOD(exprValue), NO_INFIX }; rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK(">>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR }; rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR }; #undef METHOD #undef NO_INFIX #define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression #define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed #define EXPR_ANY() parse_expression(PREC_ASSIGNMENT) } private: Str eat_string_until(char quote, bool raw) { bool quote3 = parser->match_n_chars(2, quote); std::vector buff; while (true) { char c = parser->eatchar_include_newline(); if (c == quote){ if(quote3 && !parser->match_n_chars(2, quote)){ buff.push_back(c); continue; } break; } if (c == '\0'){ if(quote3 && parser->src->mode == REPL_MODE){ throw NeedMoreLines(false); } SyntaxError("EOL while scanning string literal"); } if (c == '\n'){ if(!quote3) SyntaxError("EOL while scanning string literal"); else{ buff.push_back(c); continue; } } if (!raw && c == '\\') { switch (parser->eatchar_include_newline()) { case '"': buff.push_back('"'); break; case '\'': buff.push_back('\''); break; case '\\': buff.push_back('\\'); break; case 'n': buff.push_back('\n'); break; case 'r': buff.push_back('\r'); break; case 't': buff.push_back('\t'); break; default: SyntaxError("invalid escape char"); } } else { buff.push_back(c); } } return Str(buff.data(), buff.size()); } void eat_string(char quote, StringType type) { Str s = eat_string_until(quote, type == RAW_STRING); if(type == F_STRING){ parser->set_next_token(TK("@fstr"), vm->PyStr(s)); }else{ parser->set_next_token(TK("@str"), vm->PyStr(s)); } } void eat_number() { static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); std::smatch m; const char* i = parser->token_start; while(*i != '\n' && *i != '\0') i++; std::string s = std::string(parser->token_start, i); try{ if (std::regex_search(s, m, pattern)) { // here is m.length()-1, since the first char was eaten by lex_token() for(int j=0; jeatchar(); int base = 10; size_t size; if (m[1].matched) base = 16; if (m[2].matched) { if(base == 16) SyntaxError("hex literal should not contain a dot"); parser->set_next_token(TK("@num"), py_object(vm, S_TO_FLOAT(m[0], &size))); } else { parser->set_next_token(TK("@num"), py_object(vm, S_TO_INT(m[0], &size, base))); } if (size != m.length()) UNREACHABLE(); } }catch(std::exception& _){ SyntaxError("invalid number literal"); } } void lex_token(){ lexing_count++; _lex_token(); lexing_count--; } // Lex the next token and set it as the next token. void _lex_token() { parser->prev = parser->curr; parser->curr = parser->next_token(); //std::cout << parser->curr.info() << std::endl; while (parser->peekchar() != '\0') { parser->token_start = parser->curr_char; char c = parser->eatchar_include_newline(); switch (c) { case '\'': case '"': eat_string(c, NORMAL_STRING); return; case '#': parser->skip_line_comment(); break; case '{': parser->set_next_token(TK("{")); return; case '}': parser->set_next_token(TK("}")); return; case ',': parser->set_next_token(TK(",")); return; case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return; case ';': parser->set_next_token(TK(";")); return; case '(': parser->set_next_token(TK("(")); return; case ')': parser->set_next_token(TK(")")); return; case '[': parser->set_next_token(TK("[")); return; case ']': parser->set_next_token(TK("]")); return; case '@': parser->set_next_token(TK("@")); return; case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return; case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return; case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return; case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return; case '?': parser->set_next_token(TK("?")); return; case '.': { if(parser->matchchar('.')) { if(parser->matchchar('.')) { parser->set_next_token(TK("...")); } else { SyntaxError("invalid token '..'"); } } else { parser->set_next_token(TK(".")); } return; } case '=': parser->set_next_token_2('=', TK("="), TK("==")); return; case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return; case '>': { if(parser->matchchar('=')) parser->set_next_token(TK(">=")); else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>=")); else parser->set_next_token(TK(">")); return; } case '<': { if(parser->matchchar('=')) parser->set_next_token(TK("<=")); else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<=")); else parser->set_next_token(TK("<")); return; } case '-': { if(parser->matchchar('=')) parser->set_next_token(TK("-=")); else if(parser->matchchar('>')) parser->set_next_token(TK("->")); else parser->set_next_token(TK("-")); return; } case '!': if(parser->matchchar('=')) parser->set_next_token(TK("!=")); else SyntaxError("expected '=' after '!'"); break; case '*': if (parser->matchchar('*')) { parser->set_next_token(TK("**")); // '**' } else { parser->set_next_token_2('=', TK("*"), TK("*=")); } return; case '/': if(parser->matchchar('/')) { parser->set_next_token_2('=', TK("//"), TK("//=")); } else { parser->set_next_token_2('=', TK("/"), TK("/=")); } return; case '\r': break; // just ignore '\r' case ' ': case '\t': parser->eat_spaces(); break; case '\n': { parser->set_next_token(TK("@eol")); if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level"); return; } default: { if(c == 'f'){ if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;} if(parser->matchchar('"')) {eat_string('"', F_STRING); return;} }else if(c == 'r'){ if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;} if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;} } if (c >= '0' && c <= '9') { eat_number(); return; } switch (parser->eat_name()) { case 0: break; case 1: SyntaxError("invalid char: " + std::string(1, c)); case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); case 3: SyntaxError("@id contains invalid char"); break; case 4: SyntaxError("invalid JSON token"); break; default: UNREACHABLE(); } return; } } } parser->token_start = parser->curr_char; parser->set_next_token(TK("@eof")); } inline TokenIndex peek() { return parser->curr.type; } // not sure this will work TokenIndex peek_next() { if(parser->nexts.empty()) return TK("@eof"); return parser->nexts.front().type; } bool match(TokenIndex expected) { if (peek() != expected) return false; lex_token(); return true; } void consume(TokenIndex expected) { if (!match(expected)){ StrStream ss; ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'"; SyntaxError(ss.str()); } } bool match_newlines(bool repl_throw=false) { bool consumed = false; if (peek() == TK("@eol")) { while (peek() == TK("@eol")) lex_token(); consumed = true; } if (repl_throw && peek() == TK("@eof")){ throw NeedMoreLines(co()->_is_compiling_class); } return consumed; } bool match_end_stmt() { if (match(TK(";"))) { match_newlines(); return true; } if (match_newlines() || peek()==TK("@eof")) return true; if (peek() == TK("@dedent")) return true; return false; } void consume_end_stmt() { if (!match_end_stmt()) SyntaxError("expected statement end"); } void exprLiteral() { PyVar value = parser->prev.value; int index = co()->add_const(value); emit(OP_LOAD_CONST, index); } void exprFString() { static const std::regex pattern(R"(\{(.*?)\})"); PyVar value = parser->prev.value; Str s = vm->PyStr_AS_C(value); std::sregex_iterator begin(s.begin(), s.end(), pattern); std::sregex_iterator end; int size = 0; int i = 0; for(auto it = begin; it != end; it++) { std::smatch m = *it; if (i < m.position()) { std::string literal = s.substr(i, m.position() - i); emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal))); size++; } emit(OP_LOAD_EVAL_FN); emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(m[1].str()))); emit(OP_CALL, 1); size++; i = (int)(m.position() + m.length()); } if (i < s.size()) { std::string literal = s.substr(i, s.size() - i); emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal))); size++; } emit(OP_BUILD_STRING, size); } void exprLambda() { Function func; func.name = ""; if(!match(TK(":"))){ _compile_f_args(func, false); consume(TK(":")); } func.code = make_sp(parser->src, func.name.str()); this->codes.push(func.code); co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1; emit(OP_RETURN_VALUE); func.code->optimize(vm); this->codes.pop(); emit(OP_LOAD_FUNCTION, co()->add_const(vm->PyFunction(func))); if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); } void exprAssign() { int lhs = co()->codes.empty() ? -1 : co()->codes.size() - 1; co()->_rvalue += 1; TokenIndex op = parser->prev.type; if(op == TK("=")) { // a = (expr) EXPR_TUPLE(); if(lhs!=-1 && co()->codes[lhs].op == OP_LOAD_NAME_REF){ if(co()->_is_compiling_class){ emit(OP_STORE_CLASS_ATTR, co()->codes[lhs].arg); }else{ emit(OP_STORE_NAME, co()->codes[lhs].arg); } co()->codes[lhs].op = OP_NO_OP; co()->codes[lhs].arg = -1; }else{ if(co()->_is_compiling_class) SyntaxError(); emit(OP_STORE_REF); } }else{ // a += (expr) -> a = a + (expr) if(co()->_is_compiling_class) SyntaxError(); EXPR(); switch (op) { case TK("+="): emit(OP_INPLACE_BINARY_OP, 0); break; case TK("-="): emit(OP_INPLACE_BINARY_OP, 1); break; case TK("*="): emit(OP_INPLACE_BINARY_OP, 2); break; case TK("/="): emit(OP_INPLACE_BINARY_OP, 3); break; case TK("//="): emit(OP_INPLACE_BINARY_OP, 4); break; case TK("%="): emit(OP_INPLACE_BINARY_OP, 5); break; case TK("<<="): emit(OP_INPLACE_BITWISE_OP, 0); break; case TK(">>="): emit(OP_INPLACE_BITWISE_OP, 1); break; case TK("&="): emit(OP_INPLACE_BITWISE_OP, 2); break; case TK("|="): emit(OP_INPLACE_BITWISE_OP, 3); break; case TK("^="): emit(OP_INPLACE_BITWISE_OP, 4); break; default: UNREACHABLE(); } } co()->_rvalue -= 1; } void exprComma() { int size = 1; // an expr is in the stack now do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok size++; } while(match(TK(","))); emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); } void exprOr() { int patch = emit(OP_JUMP_IF_TRUE_OR_POP); parse_expression(PREC_LOGICAL_OR); patch_jump(patch); } void exprAnd() { int patch = emit(OP_JUMP_IF_FALSE_OR_POP); parse_expression(PREC_LOGICAL_AND); patch_jump(patch); } void exprTernary() { int patch = emit(OP_POP_JUMP_IF_FALSE); EXPR(); // if true int patch2 = emit(OP_JUMP_ABSOLUTE); consume(TK(":")); patch_jump(patch); EXPR(); // if false patch_jump(patch2); } void exprBinaryOp() { TokenIndex op = parser->prev.type; parse_expression((Precedence)(rules[op].precedence + 1)); switch (op) { case TK("+"): emit(OP_BINARY_OP, 0); break; case TK("-"): emit(OP_BINARY_OP, 1); break; case TK("*"): emit(OP_BINARY_OP, 2); break; case TK("/"): emit(OP_BINARY_OP, 3); break; case TK("//"): emit(OP_BINARY_OP, 4); break; case TK("%"): emit(OP_BINARY_OP, 5); break; case TK("**"): emit(OP_BINARY_OP, 6); break; case TK("<"): emit(OP_COMPARE_OP, 0); break; case TK("<="): emit(OP_COMPARE_OP, 1); break; case TK("=="): emit(OP_COMPARE_OP, 2); break; case TK("!="): emit(OP_COMPARE_OP, 3); break; case TK(">"): emit(OP_COMPARE_OP, 4); break; case TK(">="): emit(OP_COMPARE_OP, 5); break; case TK("in"): emit(OP_CONTAINS_OP, 0); break; case TK("not in"): emit(OP_CONTAINS_OP, 1); break; case TK("is"): emit(OP_IS_OP, 0); break; case TK("is not"): emit(OP_IS_OP, 1); break; case TK("<<"): emit(OP_BITWISE_OP, 0); break; case TK(">>"): emit(OP_BITWISE_OP, 1); break; case TK("&"): emit(OP_BITWISE_OP, 2); break; case TK("|"): emit(OP_BITWISE_OP, 3); break; case TK("^"): emit(OP_BITWISE_OP, 4); break; default: UNREACHABLE(); } } void exprUnaryOp() { TokenIndex op = parser->prev.type; parse_expression((Precedence)(PREC_UNARY + 1)); switch (op) { case TK("-"): emit(OP_UNARY_NEGATIVE); break; case TK("not"): emit(OP_UNARY_NOT); break; case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; default: UNREACHABLE(); } } void exprGrouping() { match_newlines(mode()==REPL_MODE); EXPR_TUPLE(); match_newlines(mode()==REPL_MODE); consume(TK(")")); } void exprList() { int _patch = emit(OP_NO_OP); int _body_start = co()->codes.size(); int ARGC = 0; do { match_newlines(mode()==REPL_MODE); if (peek() == TK("]")) break; EXPR(); ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP; } while (match(TK(","))); match_newlines(mode()==REPL_MODE); consume(TK("]")); emit(OP_BUILD_LIST, ARGC); return; __LISTCOMP: int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); int _body_end = co()->codes.size(); co()->codes[_patch].op = OP_JUMP_ABSOLUTE; co()->codes[_patch].arg = _body_end; emit(OP_BUILD_LIST, 0); EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); match_newlines(mode()==REPL_MODE); int _skipPatch = emit(OP_JUMP_ABSOLUTE); int _cond_start = co()->codes.size(); int _cond_end_return = -1; if(match(TK("if"))) { EXPR_TUPLE(); _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); } patch_jump(_skipPatch); emit(OP_GET_ITER); co()->_enter_block(FOR_LOOP); emit(OP_FOR_ITER); if(_cond_end_return != -1) { // there is an if condition emit(OP_JUMP_ABSOLUTE, _cond_start); patch_jump(_cond_end_return); int ifpatch = emit(OP_POP_JUMP_IF_FALSE); emit(OP_JUMP_ABSOLUTE, _body_start); patch_jump(_body_end_return); emit(OP_LIST_APPEND); patch_jump(ifpatch); }else{ emit(OP_JUMP_ABSOLUTE, _body_start); patch_jump(_body_end_return); emit(OP_LIST_APPEND); } emit(OP_LOOP_CONTINUE, -1, true); co()->_exit_block(); match_newlines(mode()==REPL_MODE); consume(TK("]")); } void exprMap() { bool parsing_dict = false; int size = 0; do { match_newlines(mode()==REPL_MODE); if (peek() == TK("}")) break; EXPR(); if(peek() == TK(":")) parsing_dict = true; if(parsing_dict){ consume(TK(":")); EXPR(); } size++; match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK("}")); if(size == 0 || parsing_dict) emit(OP_BUILD_MAP, size); else emit(OP_BUILD_SET, size); } void exprCall() { int ARGC = 0; int KWARGC = 0; bool need_unpack = false; do { match_newlines(mode()==REPL_MODE); if (peek() == TK(")")) break; if(peek() == TK("@id") && peek_next() == TK("=")) { consume(TK("@id")); const Str& key = parser->prev.str(); emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(key))); consume(TK("=")); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; KWARGC++; } else{ if(KWARGC > 0) SyntaxError("positional argument follows keyword argument"); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; ARGC++; } match_newlines(mode()==REPL_MODE); } while (match(TK(","))); consume(TK(")")); if(ARGC > 32767) SyntaxError("too many positional arguments"); if(KWARGC > 32767) SyntaxError("too many keyword arguments"); if(KWARGC > 0){ emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); }else{ emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); } } void exprName(){ _exprName(false); } void _exprName(bool force_lvalue) { Token tkname = parser->prev; int index = co()->add_name(tkname.str(), name_scope()); bool fast_load = !force_lvalue && co()->_rvalue>0; emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index); } void exprAttrib() { consume(TK("@id")); const Str& name = parser->prev.str(); int index = co()->add_name(name, NAME_ATTR); emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index); } // [:], [:b] // [a], [a:], [a:b] void exprSubscript() { if(match(TK(":"))){ emit(OP_LOAD_NONE); if(match(TK("]"))){ emit(OP_LOAD_NONE); }else{ EXPR_TUPLE(); consume(TK("]")); } emit(OP_BUILD_SLICE); }else{ EXPR_TUPLE(); if(match(TK(":"))){ if(match(TK("]"))){ emit(OP_LOAD_NONE); }else{ EXPR_TUPLE(); consume(TK("]")); } emit(OP_BUILD_SLICE); }else{ consume(TK("]")); } } emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); } void exprValue() { TokenIndex op = parser->prev.type; switch (op) { case TK("None"): emit(OP_LOAD_NONE); break; case TK("True"): emit(OP_LOAD_TRUE); break; case TK("False"): emit(OP_LOAD_FALSE); break; case TK("..."): emit(OP_LOAD_ELLIPSIS); break; default: UNREACHABLE(); } } int emit(Opcode opcode, int arg=-1, bool keepline=false) { int line = parser->prev.line; co()->codes.push_back( Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i} ); int i = co()->codes.size() - 1; if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; return i; } inline void patch_jump(int addr_index) { int target = co()->codes.size(); co()->codes[addr_index].arg = target; } void compile_block_body(CompilerAction action=nullptr) { if(action == nullptr) action = &Compiler::compile_stmt; consume(TK(":")); if(peek()!=TK("@eol") && peek()!=TK("@eof")){ (this->*action)(); // inline block return; } if(!match_newlines(mode()==REPL_MODE)){ SyntaxError("expected a new line after ':'"); } consume(TK("@indent")); while (peek() != TK("@dedent")) { match_newlines(); (this->*action)(); match_newlines(); } consume(TK("@dedent")); } Token _compile_import() { consume(TK("@id")); Token tkmodule = parser->prev; int index = co()->add_name(tkmodule.str(), NAME_SPECIAL); emit(OP_IMPORT_NAME, index); return tkmodule; } // import a as b void compile_normal_import() { do { Token tkmodule = _compile_import(); if (match(TK("as"))) { consume(TK("@id")); tkmodule = parser->prev; } int index = co()->add_name(tkmodule.str(), name_scope()); emit(OP_STORE_NAME, index); } while (match(TK(","))); consume_end_stmt(); } // from a import b as c, d as e void compile_from_import() { Token tkmodule = _compile_import(); consume(TK("import")); if (match(TK("*"))) { if(name_scope() != NAME_GLOBAL) SyntaxError("import * can only be used in global scope"); emit(OP_STORE_ALL_NAMES); consume_end_stmt(); return; } do { emit(OP_DUP_TOP_VALUE); consume(TK("@id")); Token tkname = parser->prev; int index = co()->add_name(tkname.str(), NAME_ATTR); emit(OP_BUILD_ATTR, index); if (match(TK("as"))) { consume(TK("@id")); tkname = parser->prev; } index = co()->add_name(tkname.str(), name_scope()); emit(OP_STORE_NAME, index); } while (match(TK(","))); emit(OP_POP_TOP); consume_end_stmt(); } void parse_expression(Precedence precedence) { lex_token(); GrammarFn prefix = rules[parser->prev.type].prefix; if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type)); (this->*prefix)(); bool meet_assign_token = false; while (rules[peek()].precedence >= precedence) { lex_token(); TokenIndex op = parser->prev.type; if (op == TK("=")){ if(meet_assign_token) SyntaxError(); meet_assign_token = true; } GrammarFn infix = rules[op].infix; if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); } } void compile_if_stmt() { match_newlines(); co()->_rvalue += 1; EXPR_TUPLE(); // condition co()->_rvalue -= 1; int ifpatch = emit(OP_POP_JUMP_IF_FALSE); compile_block_body(); if (match(TK("elif"))) { int exit_jump = emit(OP_JUMP_ABSOLUTE); patch_jump(ifpatch); compile_if_stmt(); patch_jump(exit_jump); } else if (match(TK("else"))) { int exit_jump = emit(OP_JUMP_ABSOLUTE); patch_jump(ifpatch); compile_block_body(); patch_jump(exit_jump); } else { patch_jump(ifpatch); } } void compile_while_loop() { co()->_enter_block(WHILE_LOOP); co()->_rvalue += 1; EXPR_TUPLE(); // condition co()->_rvalue -= 1; int patch = emit(OP_POP_JUMP_IF_FALSE); compile_block_body(); emit(OP_LOOP_CONTINUE, -1, true); patch_jump(patch); co()->_exit_block(); } void EXPR_FOR_VARS(){ int size = 0; do { consume(TK("@id")); _exprName(true); size++; } while (match(TK(","))); if(size > 1) emit(OP_BUILD_TUPLE_REF, size); } void compile_for_loop() { EXPR_FOR_VARS();consume(TK("in")); co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1; emit(OP_GET_ITER); co()->_enter_block(FOR_LOOP); emit(OP_FOR_ITER); compile_block_body(); emit(OP_LOOP_CONTINUE, -1, true); co()->_exit_block(); } void compile_try_except() { co()->_enter_block(TRY_EXCEPT); emit(OP_TRY_BLOCK_ENTER); compile_block_body(); emit(OP_TRY_BLOCK_EXIT); std::vector patches = { emit(OP_JUMP_ABSOLUTE) }; co()->_exit_block(); do { consume(TK("except")); if(match(TK("@id"))){ int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL); emit(OP_EXCEPTION_MATCH, name_idx); }else{ emit(OP_LOAD_TRUE); } int patch = emit(OP_POP_JUMP_IF_FALSE); emit(OP_POP_TOP); // pop the exception on match compile_block_body(); patches.push_back(emit(OP_JUMP_ABSOLUTE)); patch_jump(patch); }while(peek() == TK("except")); emit(OP_RE_RAISE); // no match, re-raise for (int patch : patches) patch_jump(patch); } void compile_stmt() { if (match(TK("break"))) { if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop"); consume_end_stmt(); emit(OP_LOOP_BREAK); } else if (match(TK("continue"))) { if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); consume_end_stmt(); emit(OP_LOOP_CONTINUE); } else if (match(TK("yield"))) { if (codes.size() == 1) SyntaxError("'yield' outside function"); co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1; consume_end_stmt(); co()->is_generator = true; emit(OP_YIELD_VALUE, -1, true); } else if (match(TK("return"))) { if (codes.size() == 1) SyntaxError("'return' outside function"); if(match_end_stmt()){ emit(OP_LOAD_NONE); }else{ co()->_rvalue += 1; EXPR_TUPLE(); // return value co()->_rvalue -= 1; consume_end_stmt(); } emit(OP_RETURN_VALUE, -1, true); } else if (match(TK("if"))) { compile_if_stmt(); } else if (match(TK("while"))) { compile_while_loop(); } else if (match(TK("for"))) { compile_for_loop(); } else if (match(TK("import"))){ compile_normal_import(); } else if (match(TK("from"))){ compile_from_import(); } else if (match(TK("def"))){ compile_function(); } else if (match(TK("@"))){ EXPR(); if(!match_newlines(mode()==REPL_MODE)){ SyntaxError("expected a new line after '@'"); } emit(OP_SETUP_DECORATOR); consume(TK("def")); compile_function(); } else if (match(TK("try"))) { compile_try_except(); } else if(match(TK("assert"))) { co()->_rvalue += 1; EXPR(); if (match(TK(","))) EXPR(); else emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(""))); co()->_rvalue -= 1; emit(OP_ASSERT); consume_end_stmt(); } else if(match(TK("with"))){ EXPR(); consume(TK("as")); consume(TK("@id")); Token tkname = parser->prev; int index = co()->add_name(tkname.str(), name_scope()); emit(OP_STORE_NAME, index); emit(OP_LOAD_NAME_REF, index); emit(OP_WITH_ENTER); compile_block_body(); emit(OP_LOAD_NAME_REF, index); emit(OP_WITH_EXIT); } else if(match(TK("label"))){ if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); Str label = parser->prev.str(); bool ok = co()->add_label(label); if(!ok) SyntaxError("label '" + label + "' already exists"); consume_end_stmt(); } else if(match(TK("goto"))){ // https://entrian.com/goto/ if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL)); consume_end_stmt(); } else if(match(TK("raise"))){ consume(TK("@id")); int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL); if(match(TK("(")) && !match(TK(")"))){ EXPR(); consume(TK(")")); }else{ emit(OP_LOAD_NONE); } emit(OP_RAISE, dummy_t); consume_end_stmt(); } else if(match(TK("del"))){ EXPR_TUPLE(); emit(OP_DELETE_REF); consume_end_stmt(); } else if(match(TK("global"))){ do { consume(TK("@id")); co()->global_names[parser->prev.str()] = 1; } while (match(TK(","))); consume_end_stmt(); } else if(match(TK("pass"))){ consume_end_stmt(); } else { EXPR_ANY(); consume_end_stmt(); // If last op is not an assignment, pop the result. uint8_t last_op = co()->codes.back().op; if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF && last_op!=OP_INPLACE_BINARY_OP && last_op!=OP_INPLACE_BITWISE_OP && last_op!=OP_STORE_ALL_NAMES && last_op!=OP_STORE_CLASS_ATTR){ if(last_op == OP_BUILD_TUPLE_REF) co()->codes.back().op = OP_BUILD_TUPLE; if(mode()==REPL_MODE && name_scope() == NAME_GLOBAL) emit(OP_PRINT_EXPR, -1, true); emit(OP_POP_TOP, -1, true); } } } void compile_class(){ consume(TK("@id")); int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); int super_cls_name_idx = -1; if(match(TK("(")) && match(TK("@id"))){ super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); consume(TK(")")); } if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); else emit(OP_LOAD_NAME, super_cls_name_idx); emit(OP_BEGIN_CLASS, cls_name_idx); co()->_is_compiling_class = true; compile_block_body(); co()->_is_compiling_class = false; emit(OP_END_CLASS); } void _compile_f_args(Function& func, bool enable_type_hints){ int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs do { if(state == 3) SyntaxError("**kwargs should be the last argument"); match_newlines(); if(match(TK("*"))){ if(state < 1) state = 1; else SyntaxError("*args should be placed before **kwargs"); } else if(match(TK("**"))){ state = 3; } consume(TK("@id")); const Str& name = parser->prev.str(); if(func.has_name(name)) SyntaxError("duplicate argument name"); // eat type hints if(enable_type_hints && match(TK(":"))) consume(TK("@id")); if(state == 0 && peek() == TK("=")) state = 2; switch (state) { case 0: func.args.push_back(name); break; case 1: func.starred_arg = name; state+=1; break; case 2: { consume(TK("=")); PyVarOrNull value = read_literal(); if(value == nullptr){ SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type)); } func.kwargs.set(name, value); func.kwargs_order.push_back(name); } break; case 3: SyntaxError("**kwargs is not supported yet"); break; } } while (match(TK(","))); } void compile_function(){ bool has_decorator = !co()->codes.empty() && co()->codes.back().op == OP_SETUP_DECORATOR; Function func; StrName obj_name; consume(TK("@id")); func.name = parser->prev.str(); if(!co()->_is_compiling_class && match(TK("::"))){ consume(TK("@id")); obj_name = func.name; func.name = parser->prev.str(); } consume(TK("(")); if (!match(TK(")"))) { _compile_f_args(func, true); consume(TK(")")); } if(match(TK("->"))) consume(TK("@id")); // eat type hints func.code = make_sp(parser->src, func.name.str()); this->codes.push(func.code); compile_block_body(); func.code->optimize(vm); this->codes.pop(); emit(OP_LOAD_FUNCTION, co()->add_const(vm->PyFunction(func))); if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); if(!co()->_is_compiling_class){ if(obj_name.empty()){ if(has_decorator) emit(OP_CALL, 1); emit(OP_STORE_NAME, co()->add_name(func.name, name_scope())); } else { if(has_decorator) SyntaxError("decorator is not supported here"); emit(OP_LOAD_NAME, co()->add_name(obj_name, name_scope())); int index = co()->add_name(func.name, NAME_ATTR); emit(OP_BUILD_ATTR_REF, index); emit(OP_ROT_TWO); emit(OP_STORE_REF); } }else{ if(has_decorator) emit(OP_CALL, 1); emit(OP_STORE_CLASS_ATTR, co()->add_name(func.name, name_scope())); } } PyVarOrNull read_literal(){ if(match(TK("-"))){ consume(TK("@num")); PyVar val = parser->prev.value; return vm->num_negated(val); } if(match(TK("@num"))) return parser->prev.value; if(match(TK("@str"))) return parser->prev.value; if(match(TK("True"))) return vm->PyBool(true); if(match(TK("False"))) return vm->PyBool(false); if(match(TK("None"))) return vm->None; if(match(TK("..."))) return vm->Ellipsis; return nullptr; } /***** Error Reporter *****/ void throw_err(Str type, Str msg){ int lineno = parser->curr.line; const char* cursor = parser->curr.start; // if error occurs in lexing, lineno should be `parser->current_line` if(lexing_count > 0){ lineno = parser->current_line; cursor = parser->curr_char; } if(parser->peekchar() == '\n') lineno--; auto e = Exception("SyntaxError", msg); e.st_push(parser->src->snapshot(lineno, cursor)); throw e; } void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } void IndentationError(Str msg){ throw_err("IndentationError", msg); } public: CodeObject_ compile(){ // can only be called once if(used) UNREACHABLE(); used = true; CodeObject_ code = make_sp(parser->src, Str("")); codes.push(code); lex_token(); lex_token(); match_newlines(); if(mode()==EVAL_MODE) { EXPR_TUPLE(); consume(TK("@eof")); code->optimize(vm); return code; }else if(mode()==JSON_MODE){ PyVarOrNull value = read_literal(); if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value)); else if(match(TK("{"))) exprMap(); else if(match(TK("["))) exprList(); else SyntaxError("expect a JSON object or array"); consume(TK("@eof")); return code; // no need to optimize for JSON decoding } while (!match(TK("@eof"))) { if (match(TK("class"))) { compile_class(); } else { compile_stmt(); } match_newlines(); } code->optimize(vm); return code; } }; } // namespace pkpy