From e78aa44895972cf2b5ad9e4a64df49bda5b0fa2f Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 29 Mar 2023 23:33:45 +0800 Subject: [PATCH] update lexer --- amalgamate.py | 4 +- src/ceval.h | 4 +- src/common.h | 34 +--- src/compiler.h | 378 +++++++++--------------------------- src/expr.h | 108 +++++++++++ src/frame.h | 8 +- src/gc.h | 6 +- src/lexer.h | 510 +++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 302 ----------------------------- src/pocketpy.h | 2 + src/vm.h | 4 +- 11 files changed, 730 insertions(+), 630 deletions(-) create mode 100644 src/expr.h create mode 100644 src/lexer.h delete mode 100644 src/parser.h diff --git a/amalgamate.py b/amalgamate.py index af1a2a2e..8327b940 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -6,8 +6,8 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: OPCODES_TEXT = f.read() pipeline = [ - ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"], - ["obj.h", "parser.h", "codeobject.h", "frame.h"], + ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], + ["obj.h", "codeobject.h", "frame.h"], ["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] ] diff --git a/src/ceval.h b/src/ceval.h index 531c3ea6..30397335 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -7,7 +7,7 @@ namespace pkpy{ inline PyObject* VM::run_frame(Frame* frame){ while(frame->has_next_bytecode()){ - // heap._auto_collect(this); + heap._auto_collect(this); const Bytecode& byte = frame->next_bytecode(); switch (byte.op) @@ -325,7 +325,7 @@ inline PyObject* VM::run_frame(Frame* frame){ if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE"); return frame->pop_value(this); } -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE"); #endif return None; diff --git a/src/common.h b/src/common.h index 06530277..1e022115 100644 --- a/src/common.h +++ b/src/common.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -26,10 +25,13 @@ #include #include #include -#include +#include -#define PK_VERSION "0.9.5" -#define PK_EXTRA_CHECK 0 +#define PK_VERSION "0.9.6" + +// debug macros +#define DEBUG_NO_BUILTIN_MODULES 0 +#define DEBUG_EXTRA_CHECK 1 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 @@ -40,13 +42,13 @@ #if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__) typedef int32_t i64; typedef float f64; -#define S_TO_INT std::stoi -#define S_TO_FLOAT std::stof +#define S_TO_INT(...) static_cast(std::stoi(__VA_ARGS__)) +#define S_TO_FLOAT(...) static_cast(std::stof(__VA_ARGS__)) #else typedef int64_t i64; typedef double f64; -#define S_TO_INT std::stoll -#define S_TO_FLOAT std::stod +#define S_TO_INT(...) static_cast(std::stoll(__VA_ARGS__)) +#define S_TO_FLOAT(...) static_cast(std::stod(__VA_ARGS__)) #endif namespace pkpy{ @@ -100,22 +102,6 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept { return is_int(a) && is_int(b); } - -template -class queue{ - std::list list; -public: - void push(const T& t){ list.push_back(t); } - void push(T&& t){ list.push_back(std::move(t)); } - void pop(){ list.pop_front(); } - void clear(){ list.clear(); } - bool empty() const { return list.empty(); } - size_t size() const { return list.size(); } - T& front(){ return list.front(); } - const T& front() const { return list.front(); } - const std::list& data() const { return list; } -}; - template class stack{ std::vector vec; diff --git a/src/compiler.h b/src/compiler.h index 613fd970..9510e316 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -2,7 +2,7 @@ #include "codeobject.h" #include "common.h" -#include "parser.h" +#include "lexer.h" #include "error.h" #include "ceval.h" @@ -18,24 +18,21 @@ struct GrammarRule{ Precedence precedence; }; -enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; - class Compiler { - std::unique_ptr parser; + std::unique_ptr lexer; stack codes; - int lexing_count = 0; bool used = false; VM* vm; std::map rules; CodeObject_ co() const{ return codes.top(); } - CompileMode mode() const{ return parser->src->mode; } + CompileMode mode() const{ return lexer->src->mode; } NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } public: Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ this->vm = vm; - this->parser = std::make_unique( + this->lexer = std::make_unique( make_sp(source, filename, mode) ); @@ -104,239 +101,36 @@ public: } private: - Str eat_string_until(char quote, bool raw) { - bool quote3 = parser->match_n_chars(2, quote); - std::vector buff; - while (true) { - char c = parser->eatchar_include_newline(); - if (c == quote){ - if(quote3 && !parser->match_n_chars(2, quote)){ - buff.push_back(c); - continue; - } - break; - } - if (c == '\0'){ - if(quote3 && parser->src->mode == REPL_MODE){ - throw NeedMoreLines(false); - } - SyntaxError("EOL while scanning string literal"); - } - if (c == '\n'){ - if(!quote3) SyntaxError("EOL while scanning string literal"); - else{ - buff.push_back(c); - continue; - } - } - if (!raw && c == '\\') { - switch (parser->eatchar_include_newline()) { - case '"': buff.push_back('"'); break; - case '\'': buff.push_back('\''); break; - case '\\': buff.push_back('\\'); break; - case 'n': buff.push_back('\n'); break; - case 'r': buff.push_back('\r'); break; - case 't': buff.push_back('\t'); break; - default: SyntaxError("invalid escape char"); - } - } else { - buff.push_back(c); - } - } - return Str(buff.data(), buff.size()); - } + int i = 0; + std::vector tokens; - void eat_string(char quote, StringType type) { - Str s = eat_string_until(quote, type == RAW_STRING); - if(type == F_STRING){ - parser->set_next_token(TK("@fstr"), VAR(s)); - }else{ - parser->set_next_token(TK("@str"), VAR(s)); - } - } - - void eat_number() { - static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); - std::smatch m; - - const char* i = parser->token_start; - while(*i != '\n' && *i != '\0') i++; - std::string s = std::string(parser->token_start, i); - - try{ - if (std::regex_search(s, m, pattern)) { - // here is m.length()-1, since the first char was eaten by lex_token() - for(int j=0; jeatchar(); - - int base = 10; - size_t size; - if (m[1].matched) base = 16; - if (m[2].matched) { - if(base == 16) SyntaxError("hex literal should not contain a dot"); - parser->set_next_token(TK("@num"), VAR(S_TO_FLOAT(m[0], &size))); - } else { - parser->set_next_token(TK("@num"), VAR(S_TO_INT(m[0], &size, base))); - } - if (size != m.length()) UNREACHABLE(); - } - }catch(std::exception& _){ - SyntaxError("invalid number literal"); - } - } - - void lex_token(){ - lexing_count++; - _lex_token(); - lexing_count--; - } - - // Lex the next token and set it as the next token. - void _lex_token() { - parser->prev = parser->curr; - parser->curr = parser->next_token(); - //std::cout << parser->curr.info() << std::endl; - - while (parser->peekchar() != '\0') { - parser->token_start = parser->curr_char; - char c = parser->eatchar_include_newline(); - switch (c) { - case '\'': case '"': eat_string(c, NORMAL_STRING); return; - case '#': parser->skip_line_comment(); break; - case '{': parser->set_next_token(TK("{")); return; - case '}': parser->set_next_token(TK("}")); return; - case ',': parser->set_next_token(TK(",")); return; - case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return; - case ';': parser->set_next_token(TK(";")); return; - case '(': parser->set_next_token(TK("(")); return; - case ')': parser->set_next_token(TK(")")); return; - case '[': parser->set_next_token(TK("[")); return; - case ']': parser->set_next_token(TK("]")); return; - case '@': parser->set_next_token(TK("@")); return; - case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return; - case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return; - case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return; - case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return; - case '?': parser->set_next_token(TK("?")); return; - case '.': { - if(parser->matchchar('.')) { - if(parser->matchchar('.')) { - parser->set_next_token(TK("...")); - } else { - SyntaxError("invalid token '..'"); - } - } else { - parser->set_next_token(TK(".")); - } - return; - } - case '=': parser->set_next_token_2('=', TK("="), TK("==")); return; - case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return; - case '>': { - if(parser->matchchar('=')) parser->set_next_token(TK(">=")); - else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>=")); - else parser->set_next_token(TK(">")); - return; - } - case '<': { - if(parser->matchchar('=')) parser->set_next_token(TK("<=")); - else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<=")); - else parser->set_next_token(TK("<")); - return; - } - case '-': { - if(parser->matchchar('=')) parser->set_next_token(TK("-=")); - else if(parser->matchchar('>')) parser->set_next_token(TK("->")); - else parser->set_next_token(TK("-")); - return; - } - case '!': - if(parser->matchchar('=')) parser->set_next_token(TK("!=")); - else SyntaxError("expected '=' after '!'"); - break; - case '*': - if (parser->matchchar('*')) { - parser->set_next_token(TK("**")); // '**' - } else { - parser->set_next_token_2('=', TK("*"), TK("*=")); - } - return; - case '/': - if(parser->matchchar('/')) { - parser->set_next_token_2('=', TK("//"), TK("//=")); - } else { - parser->set_next_token_2('=', TK("/"), TK("/=")); - } - return; - case '\r': break; // just ignore '\r' - case ' ': case '\t': parser->eat_spaces(); break; - case '\n': { - parser->set_next_token(TK("@eol")); - if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level"); - return; - } - default: { - if(c == 'f'){ - if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;} - if(parser->matchchar('"')) {eat_string('"', F_STRING); return;} - }else if(c == 'r'){ - if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;} - if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;} - } - - if (c >= '0' && c <= '9') { - eat_number(); - return; - } - - switch (parser->eat_name()) - { - case 0: break; - case 1: SyntaxError("invalid char: " + std::string(1, c)); - case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); - case 3: SyntaxError("@id contains invalid char"); break; - case 4: SyntaxError("invalid JSON token"); break; - default: UNREACHABLE(); - } - return; - } - } - } - - parser->token_start = parser->curr_char; - parser->set_next_token(TK("@eof")); - } - - TokenIndex peek() { - return parser->curr.type; - } - - // not sure this will work - TokenIndex peek_next() { - if(parser->nexts.empty()) return TK("@eof"); - return parser->nexts.front().type; - } + const Token& prev() { return tokens.at(i-1); } + const Token& curr() { return tokens.at(i); } + const Token& next() { return tokens.at(i+1); } + const Token& peek(int offset=0) { return tokens.at(i+offset); } + void advance() { i++; } bool match(TokenIndex expected) { - if (peek() != expected) return false; - lex_token(); + if (curr().type != expected) return false; + advance(); return true; } void consume(TokenIndex expected) { if (!match(expected)){ StrStream ss; - ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'"; + ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'"; SyntaxError(ss.str()); } } bool match_newlines(bool repl_throw=false) { bool consumed = false; - if (peek() == TK("@eol")) { - while (peek() == TK("@eol")) lex_token(); + if (curr().type == TK("@eol")) { + while (curr().type == TK("@eol")) advance(); consumed = true; } - if (repl_throw && peek() == TK("@eof")){ + if (repl_throw && curr().type == TK("@eof")){ throw NeedMoreLines(co()->_is_compiling_class); } return consumed; @@ -344,8 +138,8 @@ private: bool match_end_stmt() { if (match(TK(";"))) { match_newlines(); return true; } - if (match_newlines() || peek()==TK("@eof")) return true; - if (peek() == TK("@dedent")) return true; + if (match_newlines() || curr().type == TK("@eof")) return true; + if (curr().type == TK("@dedent")) return true; return false; } @@ -353,15 +147,27 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } + PyObject* get_value(const Token& token) { + switch (token.type) { + case TK("@num"): + if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); + if(std::holds_alternative(token.value)) return VAR(std::get(token.value)); + UNREACHABLE(); + case TK("@str"): case TK("@fstr"): + return VAR(std::get(token.value)); + default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type)); + } + } + void exprLiteral() { - PyObject* value = parser->prev.value; + PyObject* value = get_value(prev()); int index = co()->add_const(value); emit(OP_LOAD_CONST, index); } void exprFString() { static const std::regex pattern(R"(\{(.*?)\})"); - PyObject* value = parser->prev.value; + PyObject* value = get_value(prev()); Str s = CAST(Str, value); std::sregex_iterator begin(s.begin(), s.end(), pattern); std::sregex_iterator end; @@ -395,7 +201,7 @@ private: _compile_f_args(func, false); consume(TK(":")); } - func.code = make_sp(parser->src, func.name.str()); + func.code = make_sp(lexer->src, func.name.str()); this->codes.push(func.code); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; emit(OP_RETURN_VALUE); @@ -414,7 +220,7 @@ private: if(is_load_name_ref) co()->codes.pop_back(); co()->_rvalue += 1; - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; if(op == TK("=")) { // a = (expr) EXPR_TUPLE(); if(is_load_name_ref){ @@ -487,7 +293,7 @@ private: } void exprBinaryOp() { - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; parse_expression((Precedence)(rules[op].precedence + 1)); switch (op) { @@ -525,7 +331,7 @@ private: } void exprUnaryOp() { - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; parse_expression((Precedence)(PREC_UNARY + 1)); switch (op) { case TK("-"): emit(OP_UNARY_NEGATIVE); break; @@ -588,7 +394,7 @@ private: int ARGC = 0; do { match_newlines(mode()==REPL_MODE); - if (peek() == TK("]")) break; + if (curr().type == TK("]")) break; EXPR(); ARGC++; match_newlines(mode()==REPL_MODE); if(ARGC == 1 && match(TK("for"))){ @@ -609,9 +415,9 @@ private: int ARGC = 0; do { match_newlines(mode()==REPL_MODE); - if (peek() == TK("}")) break; + if (curr().type == TK("}")) break; EXPR(); - if(peek() == TK(":")) parsing_dict = true; + if(curr().type == TK(":")) parsing_dict = true; if(parsing_dict){ consume(TK(":")); EXPR(); @@ -637,10 +443,10 @@ private: bool need_unpack = false; do { match_newlines(mode()==REPL_MODE); - if (peek() == TK(")")) break; - if(peek() == TK("@id") && peek_next() == TK("=")) { + if (curr().type == TK(")")) break; + if(curr().type == TK("@id") && next().type == TK("=")) { consume(TK("@id")); - const Str& key = parser->prev.str(); + const Str& key = prev().str(); emit(OP_LOAD_CONST, co()->add_const(VAR(key))); consume(TK("=")); co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; @@ -666,7 +472,7 @@ private: void exprName(){ _exprName(false); } void _exprName(bool force_lvalue) { - Token tkname = parser->prev; + const Token& tkname = prev(); int index = co()->add_name(tkname.str(), name_scope()); bool fast_load = !force_lvalue && co()->_rvalue>0; emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index); @@ -674,7 +480,7 @@ private: void exprAttrib() { consume(TK("@id")); - const Str& name = parser->prev.str(); + const Str& name = prev().str(); int index = co()->add_name(name, NAME_ATTR); emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index); } @@ -710,7 +516,7 @@ private: } void exprValue() { - TokenIndex op = parser->prev.type; + TokenIndex op = prev().type; switch (op) { case TK("None"): emit(OP_LOAD_NONE); break; case TK("True"): emit(OP_LOAD_TRUE); break; @@ -721,7 +527,7 @@ private: } int emit(Opcode opcode, int arg=-1, bool keepline=false) { - int line = parser->prev.line; + int line = prev().line; co()->codes.push_back( Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line} ); @@ -738,7 +544,7 @@ private: void compile_block_body(CompilerAction action=nullptr) { if(action == nullptr) action = &Compiler::compile_stmt; consume(TK(":")); - if(peek()!=TK("@eol") && peek()!=TK("@eof")){ + if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ (this->*action)(); // inline block return; } @@ -746,7 +552,7 @@ private: SyntaxError("expected a new line after ':'"); } consume(TK("@indent")); - while (peek() != TK("@dedent")) { + while (curr().type != TK("@dedent")) { match_newlines(); (this->*action)(); match_newlines(); @@ -756,7 +562,7 @@ private: Token _compile_import() { consume(TK("@id")); - Token tkmodule = parser->prev; + Token tkmodule = prev(); int index = co()->add_name(tkmodule.str(), NAME_SPECIAL); emit(OP_IMPORT_NAME, index); return tkmodule; @@ -768,7 +574,7 @@ private: Token tkmodule = _compile_import(); if (match(TK("as"))) { consume(TK("@id")); - tkmodule = parser->prev; + tkmodule = prev(); } int index = co()->add_name(tkmodule.str(), name_scope()); emit(OP_STORE_NAME, index); @@ -789,12 +595,12 @@ private: do { emit(OP_DUP_TOP_VALUE); consume(TK("@id")); - Token tkname = parser->prev; + Token tkname = prev(); int index = co()->add_name(tkname.str(), NAME_ATTR); emit(OP_BUILD_ATTR, index); if (match(TK("as"))) { consume(TK("@id")); - tkname = parser->prev; + tkname = prev(); } index = co()->add_name(tkname.str(), name_scope()); emit(OP_STORE_NAME, index); @@ -807,14 +613,14 @@ private: // ['a', '1', '2', '+', '='] // void parse_expression(Precedence precedence) { - lex_token(); - GrammarFn prefix = rules[parser->prev.type].prefix; - if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type)); + advance(); + GrammarFn prefix = rules[prev().type].prefix; + if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); (this->*prefix)(); bool meet_assign_token = false; - while (rules[peek()].precedence >= precedence) { - lex_token(); - TokenIndex op = parser->prev.type; + while (rules[curr().type].precedence >= precedence) { + advance(); + TokenIndex op = prev().type; if (op == TK("=")){ if(meet_assign_token) SyntaxError(); meet_assign_token = true; @@ -891,7 +697,7 @@ private: do { consume(TK("except")); if(match(TK("@id"))){ - int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL); + int name_idx = co()->add_name(prev().str(), NAME_SPECIAL); emit(OP_EXCEPTION_MATCH, name_idx); }else{ emit(OP_LOAD_TRUE); @@ -901,7 +707,7 @@ private: compile_block_body(); patches.push_back(emit(OP_JUMP_ABSOLUTE)); patch_jump(patch); - }while(peek() == TK("except")); + }while(curr().type == TK("except")); emit(OP_RE_RAISE); // no match, re-raise for (int patch : patches) patch_jump(patch); } @@ -968,7 +774,7 @@ private: EXPR(); consume(TK("as")); consume(TK("@id")); - Token tkname = parser->prev; + Token tkname = prev(); int index = co()->add_name(tkname.str(), name_scope()); emit(OP_STORE_NAME, index); emit(OP_LOAD_NAME_REF, index); @@ -979,18 +785,18 @@ private: } else if(match(TK("label"))){ if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); - Str label = parser->prev.str(); + Str label = prev().str(); bool ok = co()->add_label(label); if(!ok) SyntaxError("label '" + label + "' already exists"); consume_end_stmt(); } else if(match(TK("goto"))){ // https://entrian.com/goto/ if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); consume(TK(".")); consume(TK("@id")); - emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL)); + emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL)); consume_end_stmt(); } else if(match(TK("raise"))){ consume(TK("@id")); - int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL); + int dummy_t = co()->add_name(prev().str(), NAME_SPECIAL); if(match(TK("(")) && !match(TK(")"))){ EXPR(); consume(TK(")")); }else{ @@ -1005,7 +811,7 @@ private: } else if(match(TK("global"))){ do { consume(TK("@id")); - co()->global_names[parser->prev.str()] = 1; + co()->global_names[prev().str()] = 1; } while (match(TK(","))); consume_end_stmt(); } else if(match(TK("pass"))){ @@ -1030,10 +836,10 @@ private: void compile_class(){ consume(TK("@id")); - int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); + int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL); int super_cls_name_idx = -1; if(match(TK("(")) && match(TK("@id"))){ - super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); + super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL); consume(TK(")")); } if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); @@ -1059,13 +865,13 @@ private: } consume(TK("@id")); - const Str& name = parser->prev.str(); + const Str& name = prev().str(); if(func.has_name(name)) SyntaxError("duplicate argument name"); // eat type hints if(enable_type_hints && match(TK(":"))) consume(TK("@id")); - if(state == 0 && peek() == TK("=")) state = 2; + if(state == 0 && curr().type == TK("=")) state = 2; switch (state) { @@ -1075,7 +881,7 @@ private: consume(TK("=")); PyObject* value = read_literal(); if(value == nullptr){ - SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type)); + SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type)); } func.kwargs.set(name, value); func.kwargs_order.push_back(name); @@ -1090,11 +896,11 @@ private: Function func; StrName obj_name; consume(TK("@id")); - func.name = parser->prev.str(); + func.name = prev().str(); if(!co()->_is_compiling_class && match(TK("::"))){ consume(TK("@id")); obj_name = func.name; - func.name = parser->prev.str(); + func.name = prev().str(); } consume(TK("(")); if (!match(TK(")"))) { @@ -1104,7 +910,7 @@ private: if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = make_sp(parser->src, func.name.str()); + func.code = make_sp(lexer->src, func.name.str()); this->codes.push(func.code); compile_block_body(); func.code->optimize(vm); @@ -1132,11 +938,11 @@ private: PyObject* read_literal(){ if(match(TK("-"))){ consume(TK("@num")); - PyObject* val = parser->prev.value; + PyObject* val = get_value(prev()); return vm->num_negated(val); } - if(match(TK("@num"))) return parser->prev.value; - if(match(TK("@str"))) return parser->prev.value; + if(match(TK("@num"))) return get_value(prev()); + if(match(TK("@str"))) return get_value(prev()); if(match(TK("True"))) return VAR(true); if(match(TK("False"))) return VAR(false); if(match(TK("None"))) return vm->None; @@ -1144,23 +950,8 @@ private: return nullptr; } - /***** Error Reporter *****/ - void throw_err(Str type, Str msg){ - int lineno = parser->curr.line; - const char* cursor = parser->curr.start; - // if error occurs in lexing, lineno should be `parser->current_line` - if(lexing_count > 0){ - lineno = parser->current_line; - cursor = parser->curr_char; - } - if(parser->peekchar() == '\n') lineno--; - auto e = Exception("SyntaxError", msg); - e.st_push(parser->src->snapshot(lineno, cursor)); - throw e; - } - void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } - void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } - void IndentationError(Str msg){ throw_err("IndentationError", msg); } + void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); } + void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); } public: CodeObject_ compile(){ @@ -1168,11 +959,16 @@ public: if(used) UNREACHABLE(); used = true; - CodeObject_ code = make_sp(parser->src, Str("")); + tokens = lexer->run(); + // if(lexer->src->filename == "tests/01_int.py"){ + // for(auto& t: tokens) std::cout << t.info() << std::endl; + // } + + CodeObject_ code = make_sp(lexer->src, lexer->src->filename); codes.push(code); - lex_token(); lex_token(); - match_newlines(); + advance(); // skip @sof, so prev() is always valid + match_newlines(); // skip leading '\n' if(mode()==EVAL_MODE) { EXPR_TUPLE(); diff --git a/src/expr.h b/src/expr.h new file mode 100644 index 00000000..5e21f629 --- /dev/null +++ b/src/expr.h @@ -0,0 +1,108 @@ +#pragma once + +#include "codeobject.h" +#include "common.h" +#include "parser.h" +#include "error.h" +#include "ceval.h" +#include + +namespace pkpy{ + +struct Expression; +typedef std::unique_ptr Expression_; + +struct Expression{ + std::vector children; + virtual Str to_string() const = 0; +}; + +struct NameExpr: Expression{ + Str name; + NameScope scope; + NameExpr(Str name, NameScope scope): name(name), scope(scope) {} + Str to_string() const override { return name; } +}; + +struct GroupExpr: Expression{ + Expression_ expr; + GroupExpr(Expression_ expr): expr(std::move(expr)) {} + Str to_string() const override { return "()"; } +}; + +struct UnaryExpr: Expression{ + TokenIndex op; + UnaryExpr(TokenIndex op): op(op) {} + Str to_string() const override { return TK_STR(op); } +}; + +struct NotExpr: Expression{ + Str to_string() const override { return "not"; } +}; + +struct AndExpr: Expression{ + Str to_string() const override { return "and"; } +}; + +struct OrExpr: Expression{ + Str to_string() const override { return "or"; } +}; + +// None, True, False, ... +struct SpecialValueExpr: Expression{ + TokenIndex token; + SpecialValueExpr(TokenIndex token): token(token) {} + Str to_string() const override { return TK_STR(token); } +}; + +// @num, @str which needs to invoke OP_LOAD_CONST +struct LiteralExpr: Expression{ + PyObject* value; + LiteralExpr(PyObject* value): value(value) {} + Str to_string() const override { return "literal"; } +}; + +struct ListExpr: Expression{ + Str to_string() const override { return "[]"; } +}; + +struct DictExpr: Expression{ + Str to_string() const override { return "{}"; } +}; + +struct LambdaExpr: Expression{ + Str to_string() const override { return "lambda"; } +}; + +struct FStringExpr: Expression{ + Str to_string() const override { return "@fstr"; } +}; + +struct AttribExpr: Expression{ + Str to_string() const override { return "."; } +}; + +struct CallExpr: Expression{ + Str to_string() const override { return "()"; } +}; + +struct BinaryExpr: Expression{ + TokenIndex op; + BinaryExpr(TokenIndex op): op(op) {} + Str to_string() const override { return TK_STR(op); } +}; + +struct TernaryExpr: Expression{ + Str to_string() const override { return "?"; } +}; + +struct AssignExpr: Expression{ + Str to_string() const override { return "="; } +}; + +struct CommaExpr: Expression{ + Str to_string() const override { return ","; } +}; + + +} // namespace pkpy \ No newline at end of file diff --git a/src/frame.h b/src/frame.h index 4ab98631..e2b9bc13 100644 --- a/src/frame.h +++ b/src/frame.h @@ -58,7 +58,7 @@ struct Frame { } PyObject* pop(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif PyObject* v = _data.back(); @@ -67,7 +67,7 @@ struct Frame { } void _pop(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif _data.pop_back(); @@ -88,14 +88,14 @@ struct Frame { } PyObject*& top(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } PyObject*& top_1(){ -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif return _data[_data.size()-2]; diff --git a/src/gc.h b/src/gc.h index 1e4d3f16..746bef60 100644 --- a/src/gc.h +++ b/src/gc.h @@ -67,9 +67,9 @@ struct ManagedHeap{ ~ManagedHeap(){ for(PyObject* obj: _no_gc) delete obj; - for(auto& [type, count]: deleted){ - std::cout << "GC: " << type << "=" << count << std::endl; - } + // for(auto& [type, count]: deleted){ + // std::cout << "GC: " << type << "=" << count << std::endl; + // } } int sweep(VM* vm){ diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 00000000..3e20071b --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,510 @@ +#pragma once + +#include "common.h" +#include "error.h" +#include "str.h" + +namespace pkpy{ + +typedef uint8_t TokenIndex; + +constexpr const char* kTokens[] = { + "@eof", "@eol", "@sof", + ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::", + "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->", + "<<", ">>", "&", "|", "^", "?", "@", + "==", "!=", ">=", "<=", + "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=", + /** KW_BEGIN **/ + "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", + "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", + "goto", "label", // extended keywords, not available in cpython + "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", + /** KW_END **/ + "is not", "not in", + "@id", "@num", "@str", "@fstr", + "@indent", "@dedent" +}; + +using TokenValue = std::variant; +const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); + +constexpr TokenIndex TK(const char token[]) { + for(int k=0; k kTokenKwMap = [](){ + std::map map; + for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k; + return map; +}(); + + +struct Token{ + TokenIndex type; + const char* start; + int length; + int line; + TokenValue value; + + Str str() const { return Str(start, length);} + + Str info() const { + StrStream ss; + Str raw = str(); + if (raw == Str("\n")) raw = "\\n"; + ss << line << ": " << TK_STR(type) << " '" << raw << "'"; + return ss.str(); + } +}; + +// https://docs.python.org/3/reference/expressions.html +enum Precedence { + PREC_NONE, + PREC_ASSIGNMENT, // = + PREC_COMMA, // , + PREC_TERNARY, // ?: + PREC_LOGICAL_OR, // or + PREC_LOGICAL_AND, // and + PREC_LOGICAL_NOT, // not + PREC_EQUALITY, // == != + PREC_TEST, // in / is / is not / not in + PREC_COMPARISION, // < > <= >= + PREC_BITWISE_OR, // | + PREC_BITWISE_XOR, // ^ + PREC_BITWISE_AND, // & + PREC_BITWISE_SHIFT, // << >> + PREC_TERM, // + - + PREC_FACTOR, // * / % // + PREC_UNARY, // - not + PREC_EXPONENT, // ** + PREC_CALL, // () + PREC_SUBSCRIPT, // [] + PREC_ATTRIB, // .index + PREC_PRIMARY, +}; + +enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; + +struct Lexer { + shared_ptr src; + const char* token_start; + const char* curr_char; + int current_line = 1; + std::vector nexts; + stack indents; + int brackets_level = 0; + bool used = false; + + char peekchar() const{ return *curr_char; } + + bool match_n_chars(int n, char c0){ + const char* c = curr_char; + for(int i=0; i 0) return true; + int spaces = eat_spaces(); + if(peekchar() == '#') skip_line_comment(); + if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; + // https://docs.python.org/3/reference/lexical_analysis.html#indentation + if(spaces > indents.top()){ + indents.push(spaces); + nexts.push_back(Token{TK("@indent"), token_start, 0, current_line}); + } else if(spaces < indents.top()){ + while(spaces < indents.top()){ + indents.pop(); + nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line}); + } + if(spaces != indents.top()){ + return false; + } + } + return true; + } + + char eatchar() { + char c = peekchar(); + if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); + curr_char++; + return c; + } + + char eatchar_include_newline() { + char c = peekchar(); + curr_char++; + if (c == '\n'){ + current_line++; + src->line_starts.push_back(curr_char); + } + return c; + } + + int eat_name() { + curr_char--; + while(true){ + uint8_t c = peekchar(); + int u8bytes = 0; + if((c & 0b10000000) == 0b00000000) u8bytes = 1; + else if((c & 0b11100000) == 0b11000000) u8bytes = 2; + else if((c & 0b11110000) == 0b11100000) u8bytes = 3; + else if((c & 0b11111000) == 0b11110000) u8bytes = 4; + else return 1; + if(u8bytes == 1){ + if(isalpha(c) || c=='_' || isdigit(c)) { + curr_char++; + continue; + }else{ + break; + } + } + // handle multibyte char + std::string u8str(curr_char, u8bytes); + if(u8str.size() != u8bytes) return 2; + uint32_t value = 0; + for(int k=0; k < u8bytes; k++){ + uint8_t b = u8str[k]; + if(k==0){ + if(u8bytes == 2) value = (b & 0b00011111) << 6; + else if(u8bytes == 3) value = (b & 0b00001111) << 12; + else if(u8bytes == 4) value = (b & 0b00000111) << 18; + }else{ + value |= (b & 0b00111111) << (6*(u8bytes-k-1)); + } + } + if(is_unicode_Lo_char(value)) curr_char += u8bytes; + else break; + } + + int length = (int)(curr_char - token_start); + if(length == 0) return 3; + std::string_view name(token_start, length); + + if(src->mode == JSON_MODE){ + if(name == "true"){ + add_token(TK("True")); + } else if(name == "false"){ + add_token(TK("False")); + } else if(name == "null"){ + add_token(TK("None")); + } else { + return 4; + } + return 0; + } + + if(kTokenKwMap.count(name)){ + if(name == "not"){ + if(strncmp(curr_char, " in", 3) == 0){ + curr_char += 3; + add_token(TK("not in")); + return 0; + } + }else if(name == "is"){ + if(strncmp(curr_char, " not", 4) == 0){ + curr_char += 4; + add_token(TK("is not")); + return 0; + } + } + add_token(kTokenKwMap.at(name)); + } else { + add_token(TK("@id")); + } + return 0; + } + + void skip_line_comment() { + char c; + while ((c = peekchar()) != '\0') { + if (c == '\n') return; + eatchar(); + } + } + + bool matchchar(char c) { + if (peekchar() != c) return false; + eatchar_include_newline(); + return true; + } + + void add_token(TokenIndex type, TokenValue value={}) { + switch(type){ + case TK("{"): case TK("["): case TK("("): brackets_level++; break; + case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; + } + nexts.push_back( Token{ + type, + token_start, + (int)(curr_char - token_start), + current_line - ((type == TK("@eol")) ? 1 : 0), + value + }); + } + + void add_token_2(char c, TokenIndex one, TokenIndex two) { + if (matchchar(c)) add_token(two); + else add_token(one); + } + + Str eat_string_until(char quote, bool raw) { + bool quote3 = match_n_chars(2, quote); + std::vector buff; + while (true) { + char c = eatchar_include_newline(); + if (c == quote){ + if(quote3 && !match_n_chars(2, quote)){ + buff.push_back(c); + continue; + } + break; + } + if (c == '\0'){ + if(quote3 && src->mode == REPL_MODE){ + throw NeedMoreLines(false); + } + SyntaxError("EOL while scanning string literal"); + } + if (c == '\n'){ + if(!quote3) SyntaxError("EOL while scanning string literal"); + else{ + buff.push_back(c); + continue; + } + } + if (!raw && c == '\\') { + switch (eatchar_include_newline()) { + case '"': buff.push_back('"'); break; + case '\'': buff.push_back('\''); break; + case '\\': buff.push_back('\\'); break; + case 'n': buff.push_back('\n'); break; + case 'r': buff.push_back('\r'); break; + case 't': buff.push_back('\t'); break; + default: SyntaxError("invalid escape char"); + } + } else { + buff.push_back(c); + } + } + return Str(buff.data(), buff.size()); + } + + void eat_string(char quote, StringType type) { + Str s = eat_string_until(quote, type == RAW_STRING); + if(type == F_STRING){ + add_token(TK("@fstr"), s); + }else{ + add_token(TK("@str"), s); + } + } + + void eat_number() { + static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); + std::smatch m; + + const char* i = token_start; + while(*i != '\n' && *i != '\0') i++; + std::string s = std::string(token_start, i); + + try{ + if (std::regex_search(s, m, pattern)) { + // here is m.length()-1, since the first char was eaten by lex_token() + for(int j=0; j=")); + else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>=")); + else add_token(TK(">")); + return true; + } + case '<': { + if(matchchar('=')) add_token(TK("<=")); + else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<=")); + else add_token(TK("<")); + return true; + } + case '-': { + if(matchchar('=')) add_token(TK("-=")); + else if(matchchar('>')) add_token(TK("->")); + else add_token(TK("-")); + return true; + } + case '!': + if(matchchar('=')) add_token(TK("!=")); + else SyntaxError("expected '=' after '!'"); + break; + case '*': + if (matchchar('*')) { + add_token(TK("**")); // '**' + } else { + add_token_2('=', TK("*"), TK("*=")); + } + return true; + case '/': + if(matchchar('/')) { + add_token_2('=', TK("//"), TK("//=")); + } else { + add_token_2('=', TK("/"), TK("/=")); + } + return true; + case '\r': break; // just ignore '\r' + case ' ': case '\t': eat_spaces(); break; + case '\n': { + add_token(TK("@eol")); + if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level"); + return true; + } + default: { + if(c == 'f'){ + if(matchchar('\'')) {eat_string('\'', F_STRING); return true;} + if(matchchar('"')) {eat_string('"', F_STRING); return true;} + }else if(c == 'r'){ + if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;} + if(matchchar('"')) {eat_string('"', RAW_STRING); return true;} + } + if (c >= '0' && c <= '9') { + eat_number(); + return true; + } + switch (eat_name()) + { + case 0: break; + case 1: SyntaxError("invalid char: " + std::string(1, c)); + case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); + case 3: SyntaxError("@id contains invalid char"); break; + case 4: SyntaxError("invalid JSON token"); break; + default: UNREACHABLE(); + } + return true; + } + } + } + + token_start = curr_char; + while(indents.size() > 1){ + indents.pop(); + add_token(TK("@dedent")); + return true; + } + add_token(TK("@eof")); + return false; + } + + /***** Error Reporter *****/ + void throw_err(Str type, Str msg){ + int lineno = current_line; + const char* cursor = curr_char; + if(peekchar() == '\n'){ + lineno--; + cursor--; + } + throw_err(type, msg, lineno, cursor); + } + + void throw_err(Str type, Str msg, int lineno, const char* cursor){ + auto e = Exception("SyntaxError", msg); + e.st_push(src->snapshot(lineno, cursor)); + throw e; + } + void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } + void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } + void IndentationError(Str msg){ throw_err("IndentationError", msg); } + + Lexer(shared_ptr src) { + this->src = src; + this->token_start = src->source; + this->curr_char = src->source; + this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line}); + this->indents.push(0); + } + + std::vector run() { + if(used) UNREACHABLE(); + used = true; + while (lex_one_token()); + return std::move(nexts); + } +}; + +} // namespace pkpy \ No newline at end of file diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index 60c280b2..00000000 --- a/src/parser.h +++ /dev/null @@ -1,302 +0,0 @@ -#pragma once - -#include "error.h" -#include "obj.h" - -namespace pkpy{ - -typedef uint8_t TokenIndex; - -constexpr const char* kTokens[] = { - "@error", "@eof", "@eol", "@sof", - ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::", - "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->", - "<<", ">>", "&", "|", "^", "?", "@", - "==", "!=", ">=", "<=", - "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=", - /** KW_BEGIN **/ - "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", - "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", - "goto", "label", // extended keywords, not available in cpython - "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", - /** KW_END **/ - "is not", "not in", - "@id", "@num", "@str", "@fstr", - "@indent", "@dedent" -}; - -const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); - -constexpr TokenIndex TK(const char token[]) { - for(int k=0; k kTokenKwMap = [](){ - std::map map; - for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k; - return map; -}(); - - -struct Token{ - TokenIndex type; - - const char* start; - int length; - int line; - PyObject* value; - - Str str() const { return Str(start, length);} - - Str info() const { - StrStream ss; - Str raw = str(); - if (raw == Str("\n")) raw = "\\n"; - ss << line << ": " << TK_STR(type) << " '" << raw << "'"; - return ss.str(); - } -}; - -// https://docs.python.org/3/reference/expressions.html -enum Precedence { - PREC_NONE, - PREC_ASSIGNMENT, // = - PREC_COMMA, // , - PREC_TERNARY, // ?: - PREC_LOGICAL_OR, // or - PREC_LOGICAL_AND, // and - PREC_LOGICAL_NOT, // not - PREC_EQUALITY, // == != - PREC_TEST, // in / is / is not / not in - PREC_COMPARISION, // < > <= >= - PREC_BITWISE_OR, // | - PREC_BITWISE_XOR, // ^ - PREC_BITWISE_AND, // & - PREC_BITWISE_SHIFT, // << >> - PREC_TERM, // + - - PREC_FACTOR, // * / % // - PREC_UNARY, // - not - PREC_EXPONENT, // ** - PREC_CALL, // () - PREC_SUBSCRIPT, // [] - PREC_ATTRIB, // .index - PREC_PRIMARY, -}; - -// The context of the parsing phase for the compiler. -struct Parser { - shared_ptr src; - - const char* token_start; - const char* curr_char; - int current_line = 1; - Token prev, curr; - queue nexts; - stack indents; - - int brackets_level = 0; - - Token next_token(){ - if(nexts.empty()){ - return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line}; - } - Token t = nexts.front(); - if(t.type == TK("@eof") && indents.size()>1){ - nexts.pop(); - indents.pop(); - return Token{TK("@dedent"), token_start, 0, current_line}; - } - nexts.pop(); - return t; - } - - char peekchar() const{ return *curr_char; } - - bool match_n_chars(int n, char c0){ - const char* c = curr_char; - for(int i=0; i 0) return true; - int spaces = eat_spaces(); - if(peekchar() == '#') skip_line_comment(); - if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; - // https://docs.python.org/3/reference/lexical_analysis.html#indentation - if(spaces > indents.top()){ - indents.push(spaces); - nexts.push(Token{TK("@indent"), token_start, 0, current_line}); - } else if(spaces < indents.top()){ - while(spaces < indents.top()){ - indents.pop(); - nexts.push(Token{TK("@dedent"), token_start, 0, current_line}); - } - if(spaces != indents.top()){ - return false; - } - } - return true; - } - - char eatchar() { - char c = peekchar(); - if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); - curr_char++; - return c; - } - - char eatchar_include_newline() { - char c = peekchar(); - curr_char++; - if (c == '\n'){ - current_line++; - src->line_starts.push_back(curr_char); - } - return c; - } - - int eat_name() { - curr_char--; - while(true){ - uint8_t c = peekchar(); - int u8bytes = 0; - if((c & 0b10000000) == 0b00000000) u8bytes = 1; - else if((c & 0b11100000) == 0b11000000) u8bytes = 2; - else if((c & 0b11110000) == 0b11100000) u8bytes = 3; - else if((c & 0b11111000) == 0b11110000) u8bytes = 4; - else return 1; - if(u8bytes == 1){ - if(isalpha(c) || c=='_' || isdigit(c)) { - curr_char++; - continue; - }else{ - break; - } - } - // handle multibyte char - std::string u8str(curr_char, u8bytes); - if(u8str.size() != u8bytes) return 2; - uint32_t value = 0; - for(int k=0; k < u8bytes; k++){ - uint8_t b = u8str[k]; - if(k==0){ - if(u8bytes == 2) value = (b & 0b00011111) << 6; - else if(u8bytes == 3) value = (b & 0b00001111) << 12; - else if(u8bytes == 4) value = (b & 0b00000111) << 18; - }else{ - value |= (b & 0b00111111) << (6*(u8bytes-k-1)); - } - } - if(is_unicode_Lo_char(value)) curr_char += u8bytes; - else break; - } - - int length = (int)(curr_char - token_start); - if(length == 0) return 3; - std::string_view name(token_start, length); - - if(src->mode == JSON_MODE){ - if(name == "true"){ - set_next_token(TK("True")); - } else if(name == "false"){ - set_next_token(TK("False")); - } else if(name == "null"){ - set_next_token(TK("None")); - } else { - return 4; - } - return 0; - } - - if(kTokenKwMap.count(name)){ - if(name == "not"){ - if(strncmp(curr_char, " in", 3) == 0){ - curr_char += 3; - set_next_token(TK("not in")); - return 0; - } - }else if(name == "is"){ - if(strncmp(curr_char, " not", 4) == 0){ - curr_char += 4; - set_next_token(TK("is not")); - return 0; - } - } - set_next_token(kTokenKwMap.at(name)); - } else { - set_next_token(TK("@id")); - } - return 0; - } - - void skip_line_comment() { - char c; - while ((c = peekchar()) != '\0') { - if (c == '\n') return; - eatchar(); - } - } - - bool matchchar(char c) { - if (peekchar() != c) return false; - eatchar_include_newline(); - return true; - } - - void set_next_token(TokenIndex type, PyObject* value=nullptr) { - switch(type){ - case TK("{"): case TK("["): case TK("("): brackets_level++; break; - case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; - } - nexts.push( Token{ - type, - token_start, - (int)(curr_char - token_start), - current_line - ((type == TK("@eol")) ? 1 : 0), - value - }); - } - - void set_next_token_2(char c, TokenIndex one, TokenIndex two) { - if (matchchar(c)) set_next_token(two); - else set_next_token(one); - } - - Parser(shared_ptr src) { - this->src = src; - this->token_start = src->source; - this->curr_char = src->source; - this->nexts.push(Token{TK("@sof"), token_start, 0, current_line}); - this->indents.push(0); - } -}; - -} // namespace pkpy \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h index bc6a8109..f5fe5aac 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -760,6 +760,7 @@ inline void add_module_gc(VM* vm){ inline void VM::post_init(){ init_builtins(this); +#if !DEBUG_NO_BUILTIN_MODULES add_module_sys(this); add_module_time(this); add_module_json(this); @@ -793,6 +794,7 @@ inline void VM::post_init(){ const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])]; return VAR(info.name); })); +#endif } } // namespace pkpy diff --git a/src/vm.h b/src/vm.h index 749f321b..da1c1d05 100644 --- a/src/vm.h +++ b/src/vm.h @@ -93,7 +93,7 @@ public: } Frame* top_frame() const { -#if PK_EXTRA_CHECK +#if DEBUG_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); #endif return callstack.top().get(); @@ -166,7 +166,7 @@ public: if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); - if(_module == _main) std::cout << disassemble(code) << '\n'; + // if(_module == _main) std::cout << disassemble(code) << '\n'; return _exec(code, _module); }catch (const Exception& e){ *_stderr << e.summary() << '\n';