diff --git a/include/pocketpy/compiler/compiler.hpp b/include/pocketpy/compiler/compiler.hpp index 39132cc2..bdb06812 100644 --- a/include/pocketpy/compiler/compiler.hpp +++ b/include/pocketpy/compiler/compiler.hpp @@ -17,7 +17,7 @@ struct PrattRule { struct Compiler { PK_ALWAYS_PASS_BY_POINTER(Compiler) - static PrattRule rules[kTokenCount]; + static PrattRule rules[TK__COUNT__]; Lexer lexer; vector contexts; @@ -43,7 +43,7 @@ struct Compiler { printf("%s:%d %s %s\n", lexer.src.filename().c_str(), curr().line, - TK_STR(curr().type), + pk_TokenSymbols(curr().type), curr().str().escape().c_str() ); } diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h index cd3dc35b..c3b49306 100644 --- a/include/pocketpy/compiler/lexer.h +++ b/include/pocketpy/compiler/lexer.h @@ -7,11 +7,38 @@ extern "C" { #endif +extern const char* pk_TokenSymbols[]; + typedef struct pkpy_TokenDeserializer { const char* curr; const char* source; } pkpy_TokenDeserializer; +enum TokenIndex{ + TK_EOF, TK_EOL, TK_SOF, + TK_ID, TK_NUM, TK_STR, TK_FSTR, TK_LONG, TK_BYTES, TK_IMAG, + TK_INDENT, TK_DEDENT, + /***************/ + TK_IS_NOT, TK_NOT_IN, TK_YIELD_FROM, + /***************/ + TK_ADD, TK_IADD, TK_SUB, TK_ISUB, + TK_MUL, TK_IMUL, TK_DIV, TK_IDIV, TK_FLOORDIV, TK_IFLOORDIV, TK_MOD, TK_IMOD, + TK_AND, TK_IAND, TK_OR, TK_IOR, TK_XOR, TK_IXOR, + TK_LSHIFT, TK_ILSHIFT, TK_RSHIFT, TK_IRSHIFT, + /***************/ + TK_LPAREN, TK_RPAREN, TK_LBRACKET, TK_RBRACKET, TK_LBRACE, TK_RBRACE, + TK_DOT, TK_DOTDOT, TK_DOTDOTDOT, TK_COMMA, TK_COLON, TK_SEMICOLON, + TK_POW, TK_ARROW, TK_HASH, TK_DECORATOR, + TK_GT, TK_LT, TK_ASSIGN, TK_EQ, TK_NE, TK_GE, TK_LE, TK_INVERT, + /***************/ + TK_FALSE, TK_NONE, TK_TRUE, TK_AND_KW, TK_AS, TK_ASSERT, TK_BREAK, TK_CLASS, TK_CONTINUE, + TK_DEF, TK_DEL, TK_ELIF, TK_ELSE, TK_EXCEPT, TK_FINALLY, TK_FOR, TK_FROM, TK_GLOBAL, + TK_IF, TK_IMPORT, TK_IN, TK_IS, TK_LAMBDA, TK_NOT_KW, TK_OR_KW, TK_PASS, TK_RAISE, TK_RETURN, + TK_TRY, TK_WHILE, TK_WITH, TK_YIELD, + /***************/ + TK__COUNT__ +}; + void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source); bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c); c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c); diff --git a/include/pocketpy/compiler/lexer.hpp b/include/pocketpy/compiler/lexer.hpp index c2301b18..955bc45c 100644 --- a/include/pocketpy/compiler/lexer.hpp +++ b/include/pocketpy/compiler/lexer.hpp @@ -2,58 +2,15 @@ #include "pocketpy/objects/error.hpp" #include "pocketpy/objects/sourcedata.h" +#include "pocketpy/compiler/lexer.h" #include namespace pkpy { -typedef uint8_t TokenIndex; - -// clang-format off -constexpr const char* kTokens[] = { - "@eof", "@eol", "@sof", - "@id", "@num", "@str", "@fstr", "@long", "@bytes", "@imag", - "@indent", "@dedent", - // These 3 are compound keywords which are generated on the fly - "is not", "not in", "yield from", - /*****************************************/ - "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed - "*", "*=", "/", "/=", "//", "//=", "%", "%=", - "&", "&=", "|", "|=", "^", "^=", - "<<", "<<=", ">>", ">>=", - /*****************************************/ - "(", ")", "[", "]", "{", "}", - ".", "..", "...", ",", ":", ";", - "**", "->", "#", "@", - ">", "<", "=", "==", "!=", ">=", "<=", "~", - /** KW_BEGIN **/ - // NOTE: These keywords should be sorted in ascending order!! - "False", "None", "True", "and", "as", "assert", "break", "class", "continue", - "def", "del", "elif", "else", "except", "finally", "for", "from", "global", - "if", "import", "in", "is", "lambda", "not", "or", "pass", "raise", "return", - "try", "while", "with", "yield", -}; -// clang-format on - using TokenValue = std::variant; -const int kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); -constexpr TokenIndex TK(const char token[]) { - for(int k = 0; k < kTokenCount; k++) { - const char* i = kTokens[k]; - const char* j = token; - while(*i && *j && *i == *j) { - i++; - j++; - } - if(*i == *j) return k; - } - return 255; -} - -constexpr inline bool is_raw_string_used(TokenIndex t) noexcept{ return t == TK("@id") || t == TK("@long"); } - -#define TK_STR(t) kTokens[t] +constexpr inline bool is_raw_string_used(TokenIndex t) noexcept{ return t == TK_ID || t == TK_LONG; } struct Token { TokenIndex type; diff --git a/src/common/str.c b/src/common/str.c index 7b7705d4..ff6fec7d 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -406,6 +406,7 @@ bool c11__is_unicode_Lo_char(int c){ if(c == 0x1f955) return true; int index; c11__lower_bound(const int, kLoRangeA, 476, c, c11__less, &index); + if(index == 476) return false; if(c == kLoRangeA[index]) return true; index -= 1; if(index < 0) return false; diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 4572d897..56a21fc0 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -6,12 +6,12 @@ namespace pkpy { -#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", TK_STR(expected), TK_STR(curr().type)); +#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", pk_TokenSymbols[expected], pk_TokenSymbols[curr().type]); #define consume_end_stmt() if(!match_end_stmt()) return SyntaxError("expected statement end") #define check_newlines_repl() { bool __nml; match_newlines(&__nml); if(__nml) return NeedMoreLines(); } #define check(B) if((err = B)) return err -PrattRule Compiler::rules[kTokenCount]; +PrattRule Compiler::rules[TK__COUNT__]; NameScope Compiler::name_scope() const noexcept{ auto s = contexts.size() > 1 ? NAME_LOCAL : NAME_GLOBAL; @@ -44,7 +44,7 @@ Error* Compiler::pop_context() noexcept{ ctx()->emit_(OP_RETURN_VALUE, 1, BC_KEEPLINE, true); // find the last valid token int j = __i - 1; - while(tk(j).type == TK("@eol") || tk(j).type == TK("@dedent") || tk(j).type == TK("@eof")) + while(tk(j).type == TK_EOL || tk(j).type == TK_DEDENT || tk(j).type == TK_EOF) j--; ctx()->co->end_line = tk(j).line; @@ -115,52 +115,52 @@ void Compiler::init_pratt_rules() noexcept{ // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #define PK_METHOD(name) &Compiler::name #define PK_NO_INFIX nullptr, PREC_LOWEST - for(TokenIndex i = 0; i < kTokenCount; i++) rules[i] = { nullptr, PK_NO_INFIX }; - rules[TK(".")] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY }; - rules[TK("(")] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY }; - rules[TK("[")] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY }; - rules[TK("{")] = { PK_METHOD(exprMap), PK_NO_INFIX }; - rules[TK("%")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK("+")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM }; - rules[TK("-")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM }; - rules[TK("*")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK("~")] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY }; - rules[TK("/")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK("//")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK("**")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT }; - rules[TK(">")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("==")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("!=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK(">=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("is")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK(">>")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK("&")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND }; - rules[TK("|")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR }; - rules[TK("^")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR }; - rules[TK("@")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK("if")] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY }; - rules[TK("not in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("is not")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("and") ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND }; - rules[TK("or")] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR }; - rules[TK("not")] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK("True")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK("False")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK("None")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK("...")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK("lambda")] = { PK_METHOD(exprLambda), PK_NO_INFIX }; - rules[TK("@id")] = { PK_METHOD(exprName), PK_NO_INFIX }; - rules[TK("@num")] = { PK_METHOD(exprLiteral), PK_NO_INFIX }; - rules[TK("@str")] = { PK_METHOD(exprLiteral), PK_NO_INFIX }; - rules[TK("@fstr")] = { PK_METHOD(exprFString), PK_NO_INFIX }; - rules[TK("@long")] = { PK_METHOD(exprLong), PK_NO_INFIX }; - rules[TK("@imag")] = { PK_METHOD(exprImag), PK_NO_INFIX }; - rules[TK("@bytes")] = { PK_METHOD(exprBytes), PK_NO_INFIX }; - rules[TK(":")] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY }; + for(int i = 0; i < TK__COUNT__; i++) rules[i] = { nullptr, PK_NO_INFIX }; + rules[TK_DOT] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY }; + rules[TK_LPAREN] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY }; + rules[TK_LBRACKET] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY }; + rules[TK_LBRACE] = { PK_METHOD(exprMap), PK_NO_INFIX }; + rules[TK_MOD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK_ADD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM }; + rules[TK_SUB] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM }; + rules[TK_MUL] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK_INVERT] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY }; + rules[TK_DIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK_FLOORDIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK_POW] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT }; + rules[TK_GT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_LT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_EQ] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_NE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_GE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_LE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_IS] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_LSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; + rules[TK_RSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; + rules[TK_AND] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND }; + rules[TK_OR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR }; + rules[TK_XOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR }; + rules[TK_DECORATOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK_IF] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY }; + rules[TK_NOT_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_IS_NOT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK_AND_KW ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND }; + rules[TK_OR_KW] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR }; + rules[TK_NOT_KW] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; + rules[TK_TRUE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; + rules[TK_FALSE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; + rules[TK_NONE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; + rules[TK_DOTDOTDOT] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; + rules[TK_LAMBDA] = { PK_METHOD(exprLambda), PK_NO_INFIX }; + rules[TK_ID] = { PK_METHOD(exprName), PK_NO_INFIX }; + rules[TK_NUM] = { PK_METHOD(exprLiteral), PK_NO_INFIX }; + rules[TK_STR] = { PK_METHOD(exprLiteral), PK_NO_INFIX }; + rules[TK_FSTR] = { PK_METHOD(exprFString), PK_NO_INFIX }; + rules[TK_LONG] = { PK_METHOD(exprLong), PK_NO_INFIX }; + rules[TK_IMAG] = { PK_METHOD(exprImag), PK_NO_INFIX }; + rules[TK_BYTES] = { PK_METHOD(exprBytes), PK_NO_INFIX }; + rules[TK_COLON] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY }; #undef PK_METHOD #undef PK_NO_INFIX @@ -175,30 +175,30 @@ bool Compiler::match(TokenIndex expected) noexcept{ bool Compiler::match_newlines(bool* need_more_lines) noexcept{ bool consumed = false; - if(curr().type == TK("@eol")) { - while(curr().type == TK("@eol")) advance(); + if(curr().type == TK_EOL) { + while(curr().type == TK_EOL) advance(); consumed = true; } if(need_more_lines) { - *need_more_lines = (mode() == REPL_MODE && curr().type == TK("@eof")); + *need_more_lines = (mode() == REPL_MODE && curr().type == TK_EOF); } return consumed; } bool Compiler::match_end_stmt() noexcept{ - if(match(TK(";"))) { + if(match(TK_SEMICOLON)) { match_newlines(); return true; } - if(match_newlines() || curr().type == TK("@eof")) return true; - if(curr().type == TK("@dedent")) return true; + if(match_newlines() || curr().type == TK_EOF) return true; + if(curr().type == TK_DEDENT) return true; return false; } Error* Compiler::EXPR_TUPLE(bool allow_slice) noexcept{ Error* err; check(parse_expression(PREC_LOWEST + 1, allow_slice)); - if(!match(TK(","))) return NULL; + if(!match(TK_COMMA)) return NULL; // tuple expression int count = 1; do { @@ -207,7 +207,7 @@ Error* Compiler::EXPR_TUPLE(bool allow_slice) noexcept{ check(parse_expression(PREC_LOWEST + 1, allow_slice)); count += 1; if(curr().brackets_level) check_newlines_repl(); - } while(match(TK(","))); + } while(match(TK_COMMA)); TupleExpr* e = make_expr(count); for(int i=count-1; i>=0; i--) e->items[i] = ctx()->s_popx(); @@ -218,10 +218,10 @@ Error* Compiler::EXPR_TUPLE(bool allow_slice) noexcept{ Error* Compiler::EXPR_VARS() noexcept{ int count = 0; do { - consume(TK("@id")); + consume(TK_ID); ctx()->s_push(make_expr(prev().str(), name_scope())); count += 1; - } while(match(TK(","))); + } while(match(TK_COMMA)); if(count > 1){ TupleExpr* e = make_expr(count); for(int i=count-1; i>=0; i--) @@ -260,9 +260,9 @@ Error* Compiler::exprLambda() noexcept{ Error* err; FuncDecl_ decl = push_f_context(""); int line = prev().line; // backup line - if(!match(TK(":"))) { + if(!match(TK_COLON)) { check(_compile_f_args(decl, false)); - consume(TK(":")); + consume(TK_COLON); } // https://github.com/pocketpy/pocketpy/issues/37 check(parse_expression(PREC_LAMBDA + 1)); @@ -304,7 +304,7 @@ Error* Compiler::exprTernary() noexcept{ Error* err; int line = prev().line; check(parse_expression(PREC_TERNARY + 1)); // [true_expr, cond] - consume(TK("else")); + consume(TK_ELSE); check(parse_expression(PREC_TERNARY + 1)); // [true_expr, cond, false_expr] auto e = make_expr(); e->line = line; @@ -341,10 +341,10 @@ Error* Compiler::exprUnaryOp() noexcept{ TokenIndex op = prev().type; check(parse_expression(PREC_UNARY + 1)); switch(op) { - case TK("-"): ctx()->s_push(make_expr(ctx()->s_popx())); break; - case TK("~"): ctx()->s_push(make_expr(ctx()->s_popx())); break; - case TK("*"): ctx()->s_push(make_expr(ctx()->s_popx(), 1)); break; - case TK("**"): ctx()->s_push(make_expr(ctx()->s_popx(), 2)); break; + case TK_SUB: ctx()->s_push(make_expr(ctx()->s_popx())); break; + case TK_INVERT: ctx()->s_push(make_expr(ctx()->s_popx())); break; + case TK_MUL: ctx()->s_push(make_expr(ctx()->s_popx(), 1)); break; + case TK_POW: ctx()->s_push(make_expr(ctx()->s_popx(), 2)); break; default: assert(false); } return NULL; @@ -355,7 +355,7 @@ Error* Compiler::exprGroup() noexcept{ check_newlines_repl() check(EXPR_TUPLE()); // () is just for change precedence check_newlines_repl() - consume(TK(")")); + consume(TK_RPAREN); if(ctx()->s_top()->is_tuple()) return NULL; Expr* g = make_expr(ctx()->s_popx()); ctx()->s_push(g); @@ -367,10 +367,10 @@ Error* Compiler::consume_comp(Opcode op0, Opcode op1) noexcept{ Error* err; bool has_cond = false; check(EXPR_VARS()); // [expr, vars] - consume(TK("in")); + consume(TK_IN); check(parse_expression(PREC_TERNARY + 1)); // [expr, vars, iter] check_newlines_repl() - if(match(TK("if"))) { + if(match(TK_IF)) { check(parse_expression(PREC_TERNARY + 1)); // [expr, vars, iter, cond] has_cond = true; } @@ -390,17 +390,17 @@ Error* Compiler::exprList() noexcept{ int count = 0; do { check_newlines_repl() - if(curr().type == TK("]")) break; + if(curr().type == TK_RBRACKET) break; check(EXPR()); count += 1; check_newlines_repl() - if(count == 1 && match(TK("for"))) { + if(count == 1 && match(TK_FOR)) { check(consume_comp(OP_BUILD_LIST, OP_LIST_APPEND)); - consume(TK("]")); + consume(TK_RBRACKET); return NULL; } check_newlines_repl() - } while(match(TK(","))); - consume(TK("]")); + } while(match(TK_COMMA)); + consume(TK_RBRACKET); ListExpr* e = make_expr(count); e->line = line; // override line for(int i=count-1; i>=0; i--) @@ -415,10 +415,10 @@ Error* Compiler::exprMap() noexcept{ int count = 0; do { check_newlines_repl() - if(curr().type == TK("}")) break; + if(curr().type == TK_RBRACE) break; check(EXPR()); // [key] int star_level = ctx()->s_top()->star_level(); - if(star_level == 2 || curr().type == TK(":")) { parsing_dict = true; } + if(star_level == 2 || curr().type == TK_COLON) { parsing_dict = true; } if(parsing_dict) { if(star_level == 2) { DictItemExpr* dict_item = make_expr(); @@ -426,7 +426,7 @@ Error* Compiler::exprMap() noexcept{ dict_item->value = ctx()->s_popx(); ctx()->s_push(dict_item); } else { - consume(TK(":")); + consume(TK_COLON); check(EXPR()); DictItemExpr* dict_item = make_expr(); dict_item->value = ctx()->s_popx(); @@ -436,18 +436,18 @@ Error* Compiler::exprMap() noexcept{ } count += 1; check_newlines_repl() - if(count == 1 && match(TK("for"))) { + if(count == 1 && match(TK_FOR)) { if(parsing_dict){ check(consume_comp(OP_BUILD_DICT, OP_DICT_ADD)); }else{ check(consume_comp(OP_BUILD_SET, OP_SET_ADD)); } - consume(TK("}")); + consume(TK_RBRACE); return NULL; } check_newlines_repl() - } while(match(TK(","))); - consume(TK("}")); + } while(match(TK_COMMA)); + consume(TK_RBRACE); SequenceExpr* se; if(count == 0 || parsing_dict) { @@ -468,11 +468,11 @@ Error* Compiler::exprCall() noexcept{ ctx()->s_push(e); // push onto the stack in advance do { check_newlines_repl() - if(curr().type == TK(")")) break; - if(curr().type == TK("@id") && next().type == TK("=")) { - consume(TK("@id")); + if(curr().type == TK_RPAREN) break; + if(curr().type == TK_ID && next().type == TK_ASSIGN) { + consume(TK_ID); StrName key(prev().sv()); - consume(TK("=")); + consume(TK_ASSIGN); check(EXPR()); e->kwargs.push_back({key, ctx()->s_popx()}); } else { @@ -487,8 +487,8 @@ Error* Compiler::exprCall() noexcept{ } } check_newlines_repl() - } while(match(TK(","))); - consume(TK(")")); + } while(match(TK_COMMA)); + consume(TK_RPAREN); return NULL; } @@ -501,7 +501,7 @@ Error* Compiler::exprName() noexcept{ } Error* Compiler::exprAttrib() noexcept{ - consume(TK("@id")); + consume(TK_ID); ctx()->s_push(make_expr(ctx()->s_popx(), StrName::get(prev().sv()))); return NULL; } @@ -514,11 +514,11 @@ Error* Compiler::exprSlice0() noexcept{ check(EXPR()); slice->stop = ctx()->s_popx(); // try optional step - if(match(TK(":"))) { // :: + if(match(TK_COLON)) { // :: check(EXPR()); slice->step = ctx()->s_popx(); } - } else if(match(TK(":"))) { + } else if(match(TK_COLON)) { if(is_expression()) { // :: check(EXPR()); slice->step = ctx()->s_popx(); @@ -536,11 +536,11 @@ Error* Compiler::exprSlice1() noexcept{ check(EXPR()); slice->stop = ctx()->s_popx(); // try optional step - if(match(TK(":"))) { // :: + if(match(TK_COLON)) { // :: check(EXPR()); slice->step = ctx()->s_popx(); } - } else if(match(TK(":"))) { // :: + } else if(match(TK_COLON)) { // :: check(EXPR()); slice->step = ctx()->s_popx(); } // else : @@ -553,7 +553,7 @@ Error* Compiler::exprSubscr() noexcept{ check_newlines_repl() check(EXPR_TUPLE(true)); check_newlines_repl() - consume(TK("]")); // [lhs, rhs] + consume(TK_RBRACKET); // [lhs, rhs] SubscrExpr* e = make_expr(); e->line = line; e->rhs = ctx()->s_popx(); // [lhs] @@ -570,12 +570,12 @@ Error* Compiler::exprLiteral0() noexcept{ Error* Compiler::compile_block_body(PrattCallback callback) noexcept{ Error* err; if(!callback) callback = &Compiler::compile_stmt; - consume(TK(":")); - if(curr().type != TK("@eol") && curr().type != TK("@eof")) { + consume(TK_COLON); + if(curr().type != TK_EOL && curr().type != TK_EOF) { while(true) { check(compile_stmt()); - bool possible = curr().type != TK("@eol") && curr().type != TK("@eof"); - if(prev().type != TK(";") || !possible) break; + bool possible = curr().type != TK_EOL && curr().type != TK_EOF; + if(prev().type != TK_SEMICOLON || !possible) break; } return NULL; } @@ -585,13 +585,13 @@ Error* Compiler::compile_block_body(PrattCallback callback) noexcept{ if(need_more_lines) return NeedMoreLines(); if(!consumed) return SyntaxError("expected a new line after ':'"); - consume(TK("@indent")); - while(curr().type != TK("@dedent")) { + consume(TK_INDENT); + while(curr().type != TK_DEDENT) { match_newlines(); check((this->*callback)()); match_newlines(); } - consume(TK("@dedent")); + consume(TK_DEDENT); return NULL; } @@ -599,15 +599,15 @@ Error* Compiler::compile_block_body(PrattCallback callback) noexcept{ // import a [as b], c [as d] Error* Compiler::compile_normal_import() noexcept{ do { - consume(TK("@id")); + consume(TK_ID); Str name = prev().str(); ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(name.sv()), prev().line); - if(match(TK("as"))) { - consume(TK("@id")); + if(match(TK_AS)) { + consume(TK_ID); name = prev().str(); } ctx()->emit_store_name(name_scope(), StrName(name), prev().line); - } while(match(TK(","))); + } while(match(TK_COMMA)); consume_end_stmt(); return NULL; } @@ -624,9 +624,9 @@ Error* Compiler::compile_from_import() noexcept{ while(true) { switch(curr().type) { - case TK("."): dots += 1; break; - case TK(".."): dots += 2; break; - case TK("..."): dots += 3; break; + case TK_DOT: dots += 1; break; + case TK_DOTDOT: dots += 2; break; + case TK_DOTDOTDOT: dots += 3; break; default: goto __EAT_DOTS_END; } advance(); @@ -638,27 +638,27 @@ __EAT_DOTS_END: if(dots > 0) { // @id is optional if dots > 0 - if(match(TK("@id"))) { + if(match(TK_ID)) { ss << prev().sv(); - while(match(TK("."))) { - consume(TK("@id")); + while(match(TK_DOT)) { + consume(TK_ID); ss << "." << prev().sv(); } } } else { // @id is required if dots == 0 - consume(TK("@id")); + consume(TK_ID); ss << prev().sv(); - while(match(TK("."))) { - consume(TK("@id")); + while(match(TK_DOT)) { + consume(TK_ID); ss << "." << prev().sv(); } } ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(ss.str().sv()), prev().line); - consume(TK("import")); + consume(TK_IMPORT); - if(match(TK("*"))) { + if(match(TK_MUL)) { if(name_scope() != NAME_GLOBAL) return SyntaxError("from import * can only be used in global scope"); // pop the module and import __all__ ctx()->emit_(OP_POP_IMPORT_STAR, BC_NOARG, prev().line); @@ -668,15 +668,15 @@ __EAT_DOTS_END: do { ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); - consume(TK("@id")); + consume(TK_ID); Str name = prev().str(); ctx()->emit_(OP_LOAD_ATTR, StrName(name).index, prev().line); - if(match(TK("as"))) { - consume(TK("@id")); + if(match(TK_AS)) { + consume(TK_ID); name = prev().str(); } ctx()->emit_store_name(name_scope(), StrName(name), prev().line); - } while(match(TK(","))); + } while(match(TK_COMMA)); ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); consume_end_stmt(); return NULL; @@ -684,18 +684,18 @@ __EAT_DOTS_END: bool Compiler::is_expression(bool allow_slice) noexcept{ PrattCallback prefix = rules[curr().type].prefix; - return prefix != nullptr && (allow_slice || curr().type != TK(":")); + return prefix != nullptr && (allow_slice || curr().type != TK_COLON); } Error* Compiler::parse_expression(int precedence, bool allow_slice) noexcept{ PrattCallback prefix = rules[curr().type].prefix; - if(prefix == nullptr || (curr().type == TK(":") && !allow_slice)) { - return SyntaxError("expected an expression, got %s", TK_STR(curr().type)); + if(prefix == nullptr || (curr().type == TK_COLON && !allow_slice)) { + return SyntaxError("expected an expression, got %s", pk_TokenSymbols[curr().type]); } advance(); Error* err; check((this->*prefix)()); - while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK(":"))) { + while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK_COLON)) { TokenIndex op = curr().type; advance(); PrattCallback infix = rules[op].infix; @@ -712,12 +712,12 @@ Error* Compiler::compile_if_stmt() noexcept{ int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); err = compile_block_body(); if(err) return err; - if(match(TK("elif"))) { + if(match(TK_ELIF)) { int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line); ctx()->patch_jump(patch); check(compile_if_stmt()); ctx()->patch_jump(exit_patch); - } else if(match(TK("else"))) { + } else if(match(TK_ELSE)) { int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line); ctx()->patch_jump(patch); check(compile_block_body()); @@ -739,7 +739,7 @@ Error* Compiler::compile_while_loop() noexcept{ ctx()->patch_jump(patch); ctx()->exit_block(); // optional else clause - if(match(TK("else"))) { + if(match(TK_ELSE)) { check(compile_block_body()); block->end2 = ctx()->co->codes.size(); } @@ -749,7 +749,7 @@ Error* Compiler::compile_while_loop() noexcept{ Error* Compiler::compile_for_loop() noexcept{ Error* err; check(EXPR_VARS()); // [vars] - consume(TK("in")); + consume(TK_IN); check(EXPR_TUPLE()); // [vars, iter] ctx()->s_emit_top(); // [vars] ctx()->emit_(OP_GET_ITER_NEW, BC_NOARG, BC_KEEPLINE); @@ -764,7 +764,7 @@ Error* Compiler::compile_for_loop() noexcept{ ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true); ctx()->exit_block(); // optional else clause - if(match(TK("else"))) { + if(match(TK_ELSE)) { check(compile_block_body()); block->end2 = ctx()->co->codes.size(); } @@ -781,16 +781,16 @@ Error* Compiler::compile_try_except() noexcept{ ctx()->exit_block(); int finally_entry = -1; - if(curr().type != TK("finally")) { + if(curr().type != TK_FINALLY) { do { StrName as_name; - consume(TK("except")); + consume(TK_EXCEPT); if(is_expression()) { check(EXPR()); // push assumed type on to the stack ctx()->s_emit_top(); ctx()->emit_(OP_EXCEPTION_MATCH, BC_NOARG, prev().line); - if(match(TK("as"))) { - consume(TK("@id")); + if(match(TK_AS)) { + consume(TK_ID); as_name = StrName(prev().sv()); } } else { @@ -807,10 +807,10 @@ Error* Compiler::compile_try_except() noexcept{ check(compile_block_body()); patches.push_back(ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE)); ctx()->patch_jump(patch); - } while(curr().type == TK("except")); + } while(curr().type == TK_EXCEPT); } - if(match(TK("finally"))) { + if(match(TK_FINALLY)) { int patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE); finally_entry = ctx()->co->codes.size(); check(compile_block_body()); @@ -848,12 +848,12 @@ Error* Compiler::compile_decorated() noexcept{ bool consumed = match_newlines(&need_more_lines); if(need_more_lines) return NeedMoreLines(); if(!consumed) return SyntaxError("expected a newline after '@'"); - } while(match(TK("@"))); + } while(match(TK_DECORATOR)); - if(match(TK("class"))) { + if(match(TK_CLASS)) { check(compile_class(count)); } else { - consume(TK("def")); + consume(TK_DEF); check(compile_function(count)); } return NULL; @@ -862,17 +862,17 @@ Error* Compiler::compile_decorated() noexcept{ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{ Error* err; switch(curr().type) { - case TK("+="): - case TK("-="): - case TK("*="): - case TK("/="): - case TK("//="): - case TK("%="): - case TK("<<="): - case TK(">>="): - case TK("&="): - case TK("|="): - case TK("^="): { + case TK_IADD: + case TK_ISUB: + case TK_IMUL: + case TK_IDIV: + case TK_IFLOORDIV: + case TK_IMOD: + case TK_ILSHIFT: + case TK_IRSHIFT: + case TK_IAND: + case TK_IOR: + case TK_IXOR: { if(ctx()->s_top()->is_starred()) return SyntaxError(); if(ctx()->is_compiling_class){ return SyntaxError("can't use inplace operator in class definition"); @@ -882,7 +882,7 @@ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{ // a.x += 1; a should be evaluated only once // -1 to remove =; inplace=true int line = prev().line; - TokenIndex op = prev().type-1; + TokenIndex op = (TokenIndex)(prev().type - 1); // [lhs] check(EXPR_TUPLE()); // [lhs, rhs] if(ctx()->s_top()->is_starred()) return SyntaxError(); @@ -897,9 +897,9 @@ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{ *is_assign = true; return NULL; } - case TK("="): { + case TK_ASSIGN: { int n = 0; - while(match(TK("="))) { + while(match(TK_ASSIGN)) { check(EXPR_TUPLE()); n += 1; } @@ -923,7 +923,7 @@ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{ Error* Compiler::compile_stmt() noexcept{ Error* err; - if(match(TK("class"))) { + if(match(TK_CLASS)) { check(compile_class()); return NULL; } @@ -931,24 +931,24 @@ Error* Compiler::compile_stmt() noexcept{ int kw_line = prev().line; // backup line number int curr_loop_block = ctx()->get_loop(); switch(prev().type) { - case TK("break"): + case TK_BREAK: if(curr_loop_block < 0) return SyntaxError("'break' outside loop"); ctx()->emit_(OP_LOOP_BREAK, curr_loop_block, kw_line); consume_end_stmt(); break; - case TK("continue"): + case TK_CONTINUE: if(curr_loop_block < 0) return SyntaxError("'continue' not properly in loop"); ctx()->emit_(OP_LOOP_CONTINUE, curr_loop_block, kw_line); consume_end_stmt(); break; - case TK("yield"): + case TK_YIELD: if(contexts.size() <= 1) return SyntaxError("'yield' outside function"); check(EXPR_TUPLE()); ctx()->s_emit_top(); ctx()->emit_(OP_YIELD_VALUE, BC_NOARG, kw_line); consume_end_stmt(); break; - case TK("yield from"): + case TK_YIELD_FROM: if(contexts.size() <= 1) return SyntaxError("'yield from' outside function"); check(EXPR_TUPLE()); ctx()->s_emit_top(); @@ -960,7 +960,7 @@ Error* Compiler::compile_stmt() noexcept{ ctx()->exit_block(); consume_end_stmt(); break; - case TK("return"): + case TK_RETURN: if(contexts.size() <= 1) return SyntaxError("'return' outside function"); if(match_end_stmt()) { ctx()->emit_(OP_RETURN_VALUE, 1, kw_line); @@ -972,22 +972,22 @@ Error* Compiler::compile_stmt() noexcept{ } break; /*************************************************/ - case TK("if"): check(compile_if_stmt()); break; - case TK("while"): check(compile_while_loop()); break; - case TK("for"): check(compile_for_loop()); break; - case TK("import"): check(compile_normal_import()); break; - case TK("from"): check(compile_from_import()); break; - case TK("def"): check(compile_function()); break; - case TK("@"): check(compile_decorated()); break; - case TK("try"): check(compile_try_except()); break; - case TK("pass"): consume_end_stmt(); break; + case TK_IF: check(compile_if_stmt()); break; + case TK_WHILE: check(compile_while_loop()); break; + case TK_FOR: check(compile_for_loop()); break; + case TK_IMPORT: check(compile_normal_import()); break; + case TK_FROM: check(compile_from_import()); break; + case TK_DEF: check(compile_function()); break; + case TK_DECORATOR: check(compile_decorated()); break; + case TK_TRY: check(compile_try_except()); break; + case TK_PASS: consume_end_stmt(); break; /*************************************************/ - case TK("assert"): { + case TK_ASSERT: { check(EXPR()); // condition ctx()->s_emit_top(); int index = ctx()->emit_(OP_POP_JUMP_IF_TRUE, BC_NOARG, kw_line); int has_msg = 0; - if(match(TK(","))) { + if(match(TK_COMMA)) { check(EXPR()); // message ctx()->s_emit_top(); has_msg = 1; @@ -997,32 +997,32 @@ Error* Compiler::compile_stmt() noexcept{ consume_end_stmt(); break; } - case TK("global"): + case TK_GLOBAL: do { - consume(TK("@id")); + consume(TK_ID); ctx()->global_names.push_back(StrName(prev().sv())); - } while(match(TK(","))); + } while(match(TK_COMMA)); consume_end_stmt(); break; - case TK("raise"): { + case TK_RAISE: { check(EXPR()); ctx()->s_emit_top(); ctx()->emit_(OP_RAISE, BC_NOARG, kw_line); consume_end_stmt(); } break; - case TK("del"): { + case TK_DEL: { check(EXPR_TUPLE()); if(!ctx()->s_top()->emit_del(ctx())) return SyntaxError(); ctx()->s_pop(); consume_end_stmt(); } break; - case TK("with"): { + case TK_WITH: { check(EXPR()); // [ ] ctx()->s_emit_top(); ctx()->enter_block(CodeBlockType::CONTEXT_MANAGER); Expr* as_name = nullptr; - if(match(TK("as"))) { - consume(TK("@id")); + if(match(TK_AS)) { + consume(TK_ID); as_name = make_expr(prev().str(), name_scope()); } ctx()->emit_(OP_WITH_ENTER, BC_NOARG, prev().line); @@ -1039,18 +1039,18 @@ Error* Compiler::compile_stmt() noexcept{ ctx()->exit_block(); } break; /*************************************************/ - case TK("=="): { - consume(TK("@id")); + case TK_EQ: { + consume(TK_ID); if(mode() != EXEC_MODE) return SyntaxError("'label' is only available in EXEC_MODE"); if(!ctx()->add_label(prev().str())) { Str escaped(prev().str().escape()); return SyntaxError("label %s already exists", escaped.c_str()); } - consume(TK("==")); + consume(TK_EQ); consume_end_stmt(); } break; - case TK("->"): - consume(TK("@id")); + case TK_ARROW: + consume(TK_ID); if(mode() != EXEC_MODE) return SyntaxError("'goto' is only available in EXEC_MODE"); ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line); consume_end_stmt(); @@ -1064,7 +1064,7 @@ Error* Compiler::compile_stmt() noexcept{ bool is_typed_name = false; // e.g. x: int // eat variable's type hint if it is a single name if(ctx()->s_top()->is_name()) { - if(match(TK(":"))) { + if(match(TK_COLON)) { check(consume_type_hints()); is_typed_name = true; @@ -1107,15 +1107,15 @@ Error* Compiler::consume_type_hints() noexcept{ Error* Compiler::compile_class(int decorators) noexcept{ Error* err; - consume(TK("@id")); + consume(TK_ID); int namei = StrName(prev().sv()).index; bool has_base = false; - if(match(TK("("))) { + if(match(TK_LPAREN)) { if(is_expression()) { check(EXPR()); has_base = true; // [base] } - consume(TK(")")); + consume(TK_RPAREN); } if(!has_base) { ctx()->emit_(OP_LOAD_NONE, BC_NOARG, prev().line); @@ -1148,15 +1148,15 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep if(state > 3) return SyntaxError(); if(state == 3) return SyntaxError("**kwargs should be the last argument"); match_newlines(); - if(match(TK("*"))) { + if(match(TK_MUL)) { if(state < 1) state = 1; else return SyntaxError("*args should be placed before **kwargs"); - } else if(match(TK("**"))) { + } else if(match(TK_POW)) { state = 3; } - consume(TK("@id")); + consume(TK_ID); StrName name(prev().sv()); // check duplicate argument name @@ -1174,8 +1174,8 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep } // eat type hints - if(enable_type_hints && match(TK(":"))) check(consume_type_hints()); - if(state == 0 && curr().type == TK("=")) state = 2; + if(enable_type_hints && match(TK_COLON)) check(consume_type_hints()); + if(state == 0 && curr().type == TK_ASSIGN) state = 2; int index = ctx()->add_varname(name); switch(state) { case 0: decl->args.push_back(index); break; @@ -1184,7 +1184,7 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep state += 1; break; case 2: { - consume(TK("=")); + consume(TK_ASSIGN); PyVar value; check(read_literal(&value)); if(value == nullptr) return SyntaxError("default argument must be a literal"); @@ -1195,21 +1195,21 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep state += 1; break; } - } while(match(TK(","))); + } while(match(TK_COMMA)); return NULL; } Error* Compiler::compile_function(int decorators) noexcept{ Error* err; - consume(TK("@id")); + consume(TK_ID); Str decl_name = prev().str(); FuncDecl_ decl = push_f_context(decl_name); - consume(TK("(")); - if(!match(TK(")"))) { + consume(TK_LPAREN); + if(!match(TK_RPAREN)) { check(_compile_f_args(decl, true)); - consume(TK(")")); + consume(TK_RPAREN); } - if(match(TK("->"))) check(consume_type_hints()); + if(match(TK_ARROW)) check(consume_type_hints()); check(compile_block_body()); check(pop_context()); @@ -1251,29 +1251,29 @@ Error* Compiler::read_literal(PyVar* out) noexcept{ Error* err; advance(); switch(prev().type) { - case TK("-"): { - consume(TK("@num")); + case TK_SUB: { + consume(TK_NUM); PyVar val = to_object(prev().value); *out = vm->py_negate(val); return NULL; } - case TK("@num"): *out = to_object(prev().value); return NULL; - case TK("@str"): *out = to_object(prev().value); return NULL; - case TK("True"): *out = VAR(true); return NULL; - case TK("False"): *out = VAR(false); return NULL; - case TK("None"): *out = vm->None; return NULL; - case TK("..."): *out = vm->Ellipsis; return NULL; - case TK("("): { + case TK_NUM: *out = to_object(prev().value); return NULL; + case TK_STR: *out = to_object(prev().value); return NULL; + case TK_TRUE: *out = VAR(true); return NULL; + case TK_FALSE: *out = VAR(false); return NULL; + case TK_NONE: *out = vm->None; return NULL; + case TK_DOTDOTDOT: *out = vm->Ellipsis; return NULL; + case TK_LPAREN: { List cpnts; while(true) { PyVar elem; check(read_literal(&elem)); cpnts.push_back(elem); - if(curr().type == TK(")")) break; - consume(TK(",")); - if(curr().type == TK(")")) break; + if(curr().type == TK_RPAREN) break; + consume(TK_COMMA); + if(curr().type == TK_RPAREN) break; } - consume(TK(")")); + consume(TK_RPAREN); *out = VAR(cpnts.to_tuple()); return NULL; } @@ -1297,20 +1297,20 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ // if(lexer.src.filename()[0] != '<'){ // printf("%s\n", lexer.src.filename().c_str()); // for(int i=0; is_emit_top(); - consume(TK("@eof")); + consume(TK_EOF); ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); check(pop_context()); *out = code; @@ -1319,7 +1319,7 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ check(EXPR()); Expr* e = ctx()->s_popx(); if(!e->is_json_object()) return SyntaxError("expect a JSON object, literal or array"); - consume(TK("@eof")); + consume(TK_EOF); e->emit_(ctx()); ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); check(pop_context()); @@ -1327,7 +1327,7 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ return NULL; } - while(!match(TK("@eof"))) { + while(!match(TK_EOF)) { check(compile_stmt()); match_newlines(); } diff --git a/src/compiler/expr.cpp b/src/compiler/expr.cpp index 18ca583f..aa09921d 100644 --- a/src/compiler/expr.cpp +++ b/src/compiler/expr.cpp @@ -231,10 +231,10 @@ void OrExpr::emit_(CodeEmitContext* ctx) { void Literal0Expr::emit_(CodeEmitContext* ctx) { switch(token) { - case TK("None"): ctx->emit_(OP_LOAD_NONE, BC_NOARG, line); break; - case TK("True"): ctx->emit_(OP_LOAD_TRUE, BC_NOARG, line); break; - case TK("False"): ctx->emit_(OP_LOAD_FALSE, BC_NOARG, line); break; - case TK("..."): ctx->emit_(OP_LOAD_ELLIPSIS, BC_NOARG, line); break; + case TK_NONE: ctx->emit_(OP_LOAD_NONE, BC_NOARG, line); break; + case TK_TRUE: ctx->emit_(OP_LOAD_TRUE, BC_NOARG, line); break; + case TK_FALSE: ctx->emit_(OP_LOAD_FALSE, BC_NOARG, line); break; + case TK_DOTDOTDOT: ctx->emit_(OP_LOAD_ELLIPSIS, BC_NOARG, line); break; default: assert(false); } } @@ -681,12 +681,12 @@ void CallExpr::emit_(CodeEmitContext* ctx) { bool BinaryExpr::is_compare() const { switch(op) { - case TK("<"): - case TK("<="): - case TK("=="): - case TK("!="): - case TK(">"): - case TK(">="): return true; + case TK_LT: + case TK_LE: + case TK_EQ: + case TK_NE: + case TK_GT: + case TK_GE: return true; default: return false; } } @@ -701,12 +701,12 @@ void BinaryExpr::_emit_compare(CodeEmitContext* ctx, small_vector_2& jmp ctx->emit_(OP_DUP_TOP, BC_NOARG, line); // [a, b, b] ctx->emit_(OP_ROT_THREE, BC_NOARG, line); // [b, a, b] switch(op) { - case TK("<"): ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break; - case TK("<="): ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break; - case TK("=="): ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break; - case TK("!="): ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break; - case TK(">"): ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break; - case TK(">="): ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break; + case TK_LT: ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break; + case TK_LE: ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break; + case TK_EQ: ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break; + case TK_NE: ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break; + case TK_GT: ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break; + case TK_GE: ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break; default: PK_UNREACHABLE() } // [b, RES] @@ -731,34 +731,34 @@ void BinaryExpr::emit_(CodeEmitContext* ctx) { rhs->emit_(ctx); switch(op) { - case TK("+"): ctx->emit_(OP_BINARY_ADD, BC_NOARG, line); break; - case TK("-"): ctx->emit_(OP_BINARY_SUB, BC_NOARG, line); break; - case TK("*"): ctx->emit_(OP_BINARY_MUL, BC_NOARG, line); break; - case TK("/"): ctx->emit_(OP_BINARY_TRUEDIV, BC_NOARG, line); break; - case TK("//"): ctx->emit_(OP_BINARY_FLOORDIV, BC_NOARG, line); break; - case TK("%"): ctx->emit_(OP_BINARY_MOD, BC_NOARG, line); break; - case TK("**"): ctx->emit_(OP_BINARY_POW, BC_NOARG, line); break; + case TK_ADD: ctx->emit_(OP_BINARY_ADD, BC_NOARG, line); break; + case TK_SUB: ctx->emit_(OP_BINARY_SUB, BC_NOARG, line); break; + case TK_MUL: ctx->emit_(OP_BINARY_MUL, BC_NOARG, line); break; + case TK_DIV: ctx->emit_(OP_BINARY_TRUEDIV, BC_NOARG, line); break; + case TK_FLOORDIV: ctx->emit_(OP_BINARY_FLOORDIV, BC_NOARG, line); break; + case TK_MOD: ctx->emit_(OP_BINARY_MOD, BC_NOARG, line); break; + case TK_POW: ctx->emit_(OP_BINARY_POW, BC_NOARG, line); break; - case TK("<"): ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break; - case TK("<="): ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break; - case TK("=="): ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break; - case TK("!="): ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break; - case TK(">"): ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break; - case TK(">="): ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break; + case TK_LT: ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break; + case TK_LE: ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break; + case TK_EQ: ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break; + case TK_NE: ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break; + case TK_GT: ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break; + case TK_GE: ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break; - case TK("in"): ctx->emit_(OP_CONTAINS_OP, 0, line); break; - case TK("not in"): ctx->emit_(OP_CONTAINS_OP, 1, line); break; - case TK("is"): ctx->emit_(OP_IS_OP, BC_NOARG, line); break; - case TK("is not"): ctx->emit_(OP_IS_NOT_OP, BC_NOARG, line); break; + case TK_IN: ctx->emit_(OP_CONTAINS_OP, 0, line); break; + case TK_NOT_IN: ctx->emit_(OP_CONTAINS_OP, 1, line); break; + case TK_IS: ctx->emit_(OP_IS_OP, BC_NOARG, line); break; + case TK_IS_NOT: ctx->emit_(OP_IS_NOT_OP, BC_NOARG, line); break; - case TK("<<"): ctx->emit_(OP_BITWISE_LSHIFT, BC_NOARG, line); break; - case TK(">>"): ctx->emit_(OP_BITWISE_RSHIFT, BC_NOARG, line); break; - case TK("&"): ctx->emit_(OP_BITWISE_AND, BC_NOARG, line); break; - case TK("|"): ctx->emit_(OP_BITWISE_OR, BC_NOARG, line); break; - case TK("^"): ctx->emit_(OP_BITWISE_XOR, BC_NOARG, line); break; + case TK_LSHIFT: ctx->emit_(OP_BITWISE_LSHIFT, BC_NOARG, line); break; + case TK_RSHIFT: ctx->emit_(OP_BITWISE_RSHIFT, BC_NOARG, line); break; + case TK_AND: ctx->emit_(OP_BITWISE_AND, BC_NOARG, line); break; + case TK_OR: ctx->emit_(OP_BITWISE_OR, BC_NOARG, line); break; + case TK_XOR: ctx->emit_(OP_BITWISE_XOR, BC_NOARG, line); break; - case TK("@"): ctx->emit_(OP_BINARY_MATMUL, BC_NOARG, line); break; - default: PK_FATAL_ERROR("unknown binary operator: %s\n", TK_STR(op)); + case TK_DECORATOR: ctx->emit_(OP_BINARY_MATMUL, BC_NOARG, line); break; + default: PK_FATAL_ERROR("unknown binary operator: %s\n", pk_TokenSymbols[op]); } for(int i: jmps) diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 9294b9cb..6db8486e 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -3,6 +3,30 @@ #include "pocketpy/common/smallmap.h" #include "pocketpy/compiler/lexer.h" +const char* pk_TokenSymbols[] = { + "@eof", "@eol", "@sof", + "@id", "@num", "@str", "@fstr", "@long", "@bytes", "@imag", + "@indent", "@dedent", + // These 3 are compound keywords which are generated on the fly + "is not", "not in", "yield from", + /*****************************************/ + "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed + "*", "*=", "/", "/=", "//", "//=", "%", "%=", + "&", "&=", "|", "|=", "^", "^=", + "<<", "<<=", ">>", ">>=", + /*****************************************/ + "(", ")", "[", "]", "{", "}", + ".", "..", "...", ",", ":", ";", + "**", "->", "#", "@", + ">", "<", "=", "==", "!=", ">=", "<=", "~", + /** KW_BEGIN **/ + // NOTE: These keywords should be sorted in ascending order!! + "False", "None", "True", "and", "as", "assert", "break", "class", "continue", + "def", "del", "elif", "else", "except", "finally", "for", "from", "global", + "if", "import", "in", "is", "lambda", "not", "or", "pass", "raise", "return", + "try", "while", "with", "yield", +}; + void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){ self->curr = source; self->source = source; diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index 40f66683..79a93ba5 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -62,11 +62,11 @@ bool Lexer::eat_indentation() noexcept{ // https://docs.python.org/3/reference/lexical_analysis.html#indentation if(spaces > indents.back()) { indents.push_back(spaces); - nexts.push_back(Token{TK("@indent"), token_start, 0, current_line, brackets_level, {}}); + nexts.push_back(Token{TK_INDENT, token_start, 0, current_line, brackets_level, {}}); } else if(spaces < indents.back()) { while(spaces < indents.back()) { indents.pop_back(); - nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line, brackets_level, {}}); + nexts.push_back(Token{TK_DEDENT, token_start, 0, current_line, brackets_level, {}}); } if(spaces != indents.back()) { return false; } } @@ -129,29 +129,32 @@ Error* Lexer::eat_name() noexcept{ int length = (int)(curr_char - token_start); if(length == 0) return SyntaxError("@id contains invalid char"); - std::string_view name(token_start, length); + c11_string name = {token_start, length}; if(src->mode == JSON_MODE) { - if(name == "true") { - add_token(TK("True")); - } else if(name == "false") { - add_token(TK("False")); - } else if(name == "null") { - add_token(TK("None")); + if(c11_string__cmp3(name, "true") == 0) { + add_token(TK_TRUE); + } else if(c11_string__cmp3(name, "false") == 0) { + add_token(TK_FALSE); + } else if(c11_string__cmp3(name, "null") == 0) { + add_token(TK_NONE); } else { return SyntaxError("invalid JSON token"); } return NULL; } - const auto KW_BEGIN = kTokens + TK("False"); - const auto KW_END = kTokens + kTokenCount; + const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE; + int KW_COUNT = TK__COUNT__ - TK_FALSE; + #define less(a, b) (c11_string__cmp3(b, a) > 0) + int out; + c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out); + #undef less - auto it = lower_bound(KW_BEGIN, KW_END, name); - if(it != KW_END && *it == name) { - add_token(it - kTokens); + if(out != KW_COUNT && c11_string__cmp3(name, KW_BEGIN[out]) == 0) { + add_token((TokenIndex)(out + TK_FALSE)); } else { - add_token(TK("@id")); + add_token(TK_ID); } return NULL; } @@ -172,32 +175,33 @@ bool Lexer::matchchar(char c) noexcept{ void Lexer::add_token(TokenIndex type, TokenValue value) noexcept{ switch(type) { - case TK("{"): - case TK("["): - case TK("("): brackets_level++; break; - case TK(")"): - case TK("]"): - case TK("}"): brackets_level--; break; + case TK_LBRACE: + case TK_LBRACKET: + case TK_LPAREN: brackets_level++; break; + case TK_RPAREN: + case TK_RBRACKET: + case TK_RBRACE: brackets_level--; break; + default: break; } auto token = Token{type, token_start, (int)(curr_char - token_start), - current_line - ((type == TK("@eol")) ? 1 : 0), + current_line - ((type == TK_EOL) ? 1 : 0), brackets_level, value}; // handle "not in", "is not", "yield from" if(!nexts.empty()) { auto& back = nexts.back(); - if(back.type == TK("not") && type == TK("in")) { - back.type = TK("not in"); + if(back.type == TK_NOT_KW && type == TK_IN) { + back.type = TK_NOT_IN; return; } - if(back.type == TK("is") && type == TK("not")) { - back.type = TK("is not"); + if(back.type == TK_IS && type == TK_NOT_KW) { + back.type = TK_IS_NOT; return; } - if(back.type == TK("yield") && type == TK("from")) { - back.type = TK("yield from"); + if(back.type == TK_YIELD && type == TK_FROM) { + back.type = TK_YIELD_FROM; return; } nexts.push_back(token); @@ -271,11 +275,11 @@ Error* Lexer::eat_string(char quote, StringType type) noexcept{ Error* err = eat_string_until(quote, type == StringType::RAW_STRING, &s); if(err) return err; if(type == StringType::F_STRING) { - add_token(TK("@fstr"), s); + add_token(TK_FSTR, s); }else if(type == StringType::NORMAL_BYTES) { - add_token(TK("@bytes"), s); + add_token(TK_BYTES, s); }else{ - add_token(TK("@str"), s); + add_token(TK_STR, s); } return NULL; } @@ -299,13 +303,13 @@ Error* Lexer::eat_number() noexcept{ if(text[0] != '.' && !is_scientific_notation) { // try long if(i[-1] == 'L') { - add_token(TK("@long")); + add_token(TK_LONG); return NULL; } // try integer i64 int_out; switch(parse_uint(text, &int_out, -1)) { - case IntParsingResult::Success: add_token(TK("@num"), int_out); return NULL; + case IntParsingResult::Success: add_token(TK_NUM, int_out); return NULL; case IntParsingResult::Overflow: return SyntaxError("int literal is too large"); case IntParsingResult::Failure: break; // do nothing } @@ -321,12 +325,12 @@ Error* Lexer::eat_number() noexcept{ } if(p_end == text.data() + text.size()) { - add_token(TK("@num"), (f64)float_out); + add_token(TK_NUM, (f64)float_out); return NULL; } if(i[-1] == 'j' && p_end == text.data() + text.size() - 1) { - add_token(TK("@imag"), (f64)float_out); + add_token(TK_IMAG, (f64)float_out); return NULL; } @@ -346,17 +350,17 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{ return NULL; } case '#': skip_line_comment(); break; - case '~': add_token(TK("~")); return NULL; - case '{': add_token(TK("{")); return NULL; - case '}': add_token(TK("}")); return NULL; - case ',': add_token(TK(",")); return NULL; - case ':': add_token(TK(":")); return NULL; - case ';': add_token(TK(";")); return NULL; - case '(': add_token(TK("(")); return NULL; - case ')': add_token(TK(")")); return NULL; - case '[': add_token(TK("[")); return NULL; - case ']': add_token(TK("]")); return NULL; - case '@': add_token(TK("@")); return NULL; + case '~': add_token(TK_INVERT); return NULL; + case '{': add_token(TK_LBRACE); return NULL; + case '}': add_token(TK_RBRACE); return NULL; + case ',': add_token(TK_COMMA); return NULL; + case ':': add_token(TK_COLON); return NULL; + case ';': add_token(TK_SEMICOLON); return NULL; + case '(': add_token(TK_LPAREN); return NULL; + case ')': add_token(TK_RPAREN); return NULL; + case '[': add_token(TK_LBRACKET); return NULL; + case ']': add_token(TK_RBRACKET); return NULL; + case '@': add_token(TK_DECORATOR); return NULL; case '\\': { // line continuation character char c = eatchar_include_newline(); @@ -367,16 +371,16 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{ eat_spaces(); return NULL; } - case '%': add_token_2('=', TK("%"), TK("%=")); return NULL; - case '&': add_token_2('=', TK("&"), TK("&=")); return NULL; - case '|': add_token_2('=', TK("|"), TK("|=")); return NULL; - case '^': add_token_2('=', TK("^"), TK("^=")); return NULL; + case '%': add_token_2('=', TK_MOD, TK_IMOD); return NULL; + case '&': add_token_2('=', TK_AND, TK_IAND); return NULL; + case '|': add_token_2('=', TK_OR, TK_IOR); return NULL; + case '^': add_token_2('=', TK_XOR, TK_IXOR); return NULL; case '.': { if(matchchar('.')) { if(matchchar('.')) { - add_token(TK("...")); + add_token(TK_DOTDOTDOT); } else { - add_token(TK("..")); + add_token(TK_DOTDOT); } } else { char next_char = peekchar(); @@ -384,43 +388,43 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{ Error* err = eat_number(); if(err) return err; } else { - add_token(TK(".")); + add_token(TK_DOT); } } return NULL; } - case '=': add_token_2('=', TK("="), TK("==")); return NULL; - case '+': add_token_2('=', TK("+"), TK("+=")); return NULL; + case '=': add_token_2('=', TK_ASSIGN, TK_EQ); return NULL; + case '+': add_token_2('=', TK_ADD, TK_IADD); return NULL; case '>': { if(matchchar('=')) - add_token(TK(">=")); + add_token(TK_GE); else if(matchchar('>')) - add_token_2('=', TK(">>"), TK(">>=")); + add_token_2('=', TK_RSHIFT, TK_IRSHIFT); else - add_token(TK(">")); + add_token(TK_GT); return NULL; } case '<': { if(matchchar('=')) - add_token(TK("<=")); + add_token(TK_LE); else if(matchchar('<')) - add_token_2('=', TK("<<"), TK("<<=")); + add_token_2('=', TK_LSHIFT, TK_ILSHIFT); else - add_token(TK("<")); + add_token(TK_LT); return NULL; } case '-': { if(matchchar('=')) - add_token(TK("-=")); + add_token(TK_ISUB); else if(matchchar('>')) - add_token(TK("->")); + add_token(TK_ARROW); else - add_token(TK("-")); + add_token(TK_SUB); return NULL; } case '!': if(matchchar('=')){ - add_token(TK("!=")); + add_token(TK_NE); }else{ Error* err = SyntaxError("expected '=' after '!'"); if(err) return err; @@ -428,22 +432,22 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{ break; case '*': if(matchchar('*')) { - add_token(TK("**")); // '**' + add_token(TK_POW); // '**' } else { - add_token_2('=', TK("*"), TK("*=")); + add_token_2('=', TK_MUL, TK_IMUL); } return NULL; case '/': if(matchchar('/')) { - add_token_2('=', TK("//"), TK("//=")); + add_token_2('=', TK_FLOORDIV, TK_IFLOORDIV); } else { - add_token_2('=', TK("/"), TK("/=")); + add_token_2('=', TK_DIV, TK_IDIV); } return NULL; case ' ': case '\t': eat_spaces(); break; case '\n': { - add_token(TK("@eol")); + add_token(TK_EOL); if(!eat_indentation()){ return IndentationError("unindent does not match any outer indentation level"); } @@ -469,10 +473,10 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{ token_start = curr_char; while(indents.size() > 1) { indents.pop_back(); - add_token(TK("@dedent")); + add_token(TK_DEDENT); return NULL; } - add_token(TK("@eof")); + add_token(TK_EOF); *eof = true; return NULL; } @@ -496,7 +500,7 @@ Error* Lexer::_error(bool lexer_err, const char* type, const char* msg, va_list* if(args){ vsnprintf(err->msg, sizeof(err->msg), msg, *args); }else{ - std::strncpy(err->msg, msg, sizeof(err->msg)); + strncpy(err->msg, msg, sizeof(err->msg)); } err->userdata = userdata; return err; @@ -517,7 +521,7 @@ Error* Lexer::run() noexcept{ return from_precompiled(); } // push initial tokens - this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line, brackets_level, {}}); + this->nexts.push_back(Token{TK_SOF, token_start, 0, current_line, brackets_level, {}}); this->indents.push_back(0); bool eof = false; @@ -554,7 +558,7 @@ Error* Lexer::from_precompiled() noexcept{ count = pkpy_TokenDeserializer__read_count(&deserializer); for(int i = 0; i < count; i++) { Token t; - t.type = (unsigned char)pkpy_TokenDeserializer__read_uint(&deserializer, ','); + t.type = (TokenIndex)pkpy_TokenDeserializer__read_uint(&deserializer, ','); if(is_raw_string_used(t.type)) { i64 index = pkpy_TokenDeserializer__read_uint(&deserializer, ','); pkpy_Str* p = c11__at(pkpy_Str, precompiled_tokens, index); diff --git a/src/modules/random.cpp b/src/modules/random.cpp index ad3942cd..02ac9bfc 100644 --- a/src/modules/random.cpp +++ b/src/modules/random.cpp @@ -201,6 +201,7 @@ struct Random { f64 key = self.gen.uniform(0.0, cum_weights[size - 1]); int index; c11__lower_bound(f64, cum_weights.begin(), cum_weights.size(), key, c11__less, &index); + assert(index != cum_weights.size()); result[i] = data[index]; } return VAR(std::move(result));