#pragma once #include "common.h" #include "error.h" #include "str.h" namespace pkpy{ typedef uint8_t TokenIndex; constexpr const char* kTokens[] = { "is not", "not in", "yield from", "@eof", "@eol", "@sof", "@id", "@num", "@str", "@fstr", "@long", "@indent", "@dedent", /*****************************************/ "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed "*", "*=", "/", "/=", "//", "//=", "%", "%=", "&", "&=", "|", "|=", "^", "^=", "<<", "<<=", ">>", ">>=", /*****************************************/ ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=", "++", "--", "~", /** SPEC_BEGIN **/ "$goto", "$label", /** KW_BEGIN **/ "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise" }; using TokenValue = std::variant; const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); constexpr TokenIndex TK(const char token[]) { for(int k=0; k kTokenKwMap = [](){ std::map map; for(int k=TK("class"); k <= >= != ==, in / is / is not / not in PREC_BITWISE_OR, // | PREC_BITWISE_XOR, // ^ PREC_BITWISE_AND, // & PREC_BITWISE_SHIFT, // << >> PREC_TERM, // + - PREC_FACTOR, // * / % // @ PREC_UNARY, // - not ~ PREC_EXPONENT, // ** PREC_CALL, // () PREC_SUBSCRIPT, // [] PREC_ATTRIB, // .index PREC_PRIMARY, }; enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; struct Lexer { shared_ptr src; const char* token_start; const char* curr_char; int current_line = 1; std::vector nexts; stack indents; int brackets_level = 0; bool used = false; char peekchar() const{ return *curr_char; } bool match_n_chars(int n, char c0); bool match_string(const char* s); int eat_spaces(); bool eat_indentation(); char eatchar(); char eatchar_include_newline(); int eat_name(); void skip_line_comment(); bool matchchar(char c); void add_token(TokenIndex type, TokenValue value={}); void add_token_2(char c, TokenIndex one, TokenIndex two); Str eat_string_until(char quote, bool raw); void eat_string(char quote, StringType type); void eat_number(); bool lex_one_token(); /***** Error Reporter *****/ void throw_err(Str type, Str msg); void throw_err(Str type, Str msg, int lineno, const char* cursor); void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } void IndentationError(Str msg){ throw_err("IndentationError", msg); } Lexer(shared_ptr src); std::vector run(); }; } // namespace pkpy