update lexer

2025-12-06 18:20:17 +00:00 · 2023-03-29 23:33:45 +08:00 · 2023-03-29 23:33:45 +08:00 · e78aa44895
commit e78aa44895
parent 120773891a
11 changed files with 730 additions and 630 deletions
--- a/amalgamate.py
+++ b/amalgamate.py
@ -6,8 +6,8 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f:
 	OPCODES_TEXT = f.read()
 pipeline = [
-	["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"],
+	["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
-	["obj.h", "parser.h", "codeobject.h", "frame.h"],
+	["obj.h", "codeobject.h", "frame.h"],
 	["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"],
 	["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]
 ]
--- a/src/ceval.h
+++ b/src/ceval.h
@ -7,7 +7,7 @@ namespace pkpy{
 inline PyObject* VM::run_frame(Frame* frame){
    while(frame->has_next_bytecode()){
-        // heap._auto_collect(this);
+        heap._auto_collect(this);
        const Bytecode& byte = frame->next_bytecode();
        switch (byte.op)
@ -325,7 +325,7 @@ inline PyObject* VM::run_frame(Frame* frame){
        if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE");
        return frame->pop_value(this);
    }
-#if PK_EXTRA_CHECK
+#if DEBUG_EXTRA_CHECK
    if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE");
 #endif
    return None;
--- a/src/common.h
+++ b/src/common.h
@ -10,7 +10,6 @@
 #include <sstream>
 #include <regex>
 #include <cmath>
 #include <cstdlib>
 #include <stdexcept>
 #include <vector>
 #include <string>
@ -26,10 +25,13 @@
 #include <algorithm>
 #include <random>
 #include <initializer_list>
-#include <list>
+#include <variant>
-#define PK_VERSION				"0.9.5"
+#define PK_VERSION				"0.9.6"
-#define PK_EXTRA_CHECK 			0
+
 // debug macros
 #define DEBUG_NO_BUILTIN_MODULES	0
 #define DEBUG_EXTRA_CHECK			1
 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__)
 #define PK_ENABLE_FILEIO 		0
@ -40,13 +42,13 @@
 #if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__)
 typedef int32_t i64;
 typedef float f64;
-#define S_TO_INT std::stoi
+#define S_TO_INT(...) static_cast<i64>(std::stoi(__VA_ARGS__))
-#define S_TO_FLOAT std::stof
+#define S_TO_FLOAT(...) static_cast<f64>(std::stof(__VA_ARGS__))
 #else
 typedef int64_t i64;
 typedef double f64;
-#define S_TO_INT std::stoll
+#define S_TO_INT(...) static_cast<i64>(std::stoll(__VA_ARGS__))
-#define S_TO_FLOAT std::stod
+#define S_TO_FLOAT(...) static_cast<f64>(std::stod(__VA_ARGS__))
 #endif
 namespace pkpy{
@ -100,22 +102,6 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
    return is_int(a) && is_int(b);
 }
 template <typename T>
 class queue{
 	std::list<T> list;
 public:
 	void push(const T& t){ list.push_back(t); }
 	void push(T&& t){ list.push_back(std::move(t)); }
 	void pop(){ list.pop_front(); }
 	void clear(){ list.clear(); }
 	bool empty() const { return list.empty(); }
 	size_t size() const { return list.size(); }
 	T& front(){ return list.front(); }
 	const T& front() const { return list.front(); }
 	const std::list<T>& data() const { return list; }
 };
 template <typename T>
 class stack{
 	std::vector<T> vec;
--- a/src/compiler.h
+++ b/src/compiler.h
@ -2,7 +2,7 @@
 #include "codeobject.h"
 #include "common.h"
-#include "parser.h"
+#include "lexer.h"
 #include "error.h"
 #include "ceval.h"
@ -18,24 +18,21 @@ struct GrammarRule{
    Precedence precedence;
 };
 enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
 class Compiler {
-    std::unique_ptr<Parser> parser;
+    std::unique_ptr<Lexer> lexer;
    stack<CodeObject_> codes;
    int lexing_count = 0;
    bool used = false;
    VM* vm;
    std::map<TokenIndex, GrammarRule> rules;
    CodeObject_ co() const{ return codes.top(); }
-    CompileMode mode() const{ return parser->src->mode; }
+    CompileMode mode() const{ return lexer->src->mode; }
    NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
 public:
    Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
        this->vm = vm;
-        this->parser = std::make_unique<Parser>(
+        this->lexer = std::make_unique<Lexer>(
            make_sp<SourceData>(source, filename, mode)
        );
@ -104,239 +101,36 @@ public:
    }
 private:
-    Str eat_string_until(char quote, bool raw) {
+    int i = 0;
-        bool quote3 = parser->match_n_chars(2, quote);
+    std::vector<Token> tokens;
        std::vector<char> buff;
        while (true) {
            char c = parser->eatchar_include_newline();
            if (c == quote){
                if(quote3 && !parser->match_n_chars(2, quote)){
                    buff.push_back(c);
                    continue;
                }
                break;
            }
            if (c == '\0'){
                if(quote3 && parser->src->mode == REPL_MODE){
                    throw NeedMoreLines(false);
                }
                SyntaxError("EOL while scanning string literal");
            }
            if (c == '\n'){
                if(!quote3) SyntaxError("EOL while scanning string literal");
                else{
                    buff.push_back(c);
                    continue;
                }
            }
            if (!raw && c == '\\') {
                switch (parser->eatchar_include_newline()) {
                    case '"':  buff.push_back('"');  break;
                    case '\'': buff.push_back('\''); break;
                    case '\\': buff.push_back('\\'); break;
                    case 'n':  buff.push_back('\n'); break;
                    case 'r':  buff.push_back('\r'); break;
                    case 't':  buff.push_back('\t'); break;
                    default: SyntaxError("invalid escape char");
                }
            } else {
                buff.push_back(c);
            }
        }
        return Str(buff.data(), buff.size());
    }
-    void eat_string(char quote, StringType type) {
+    const Token& prev() { return tokens.at(i-1); }
-        Str s = eat_string_until(quote, type == RAW_STRING);
+    const Token& curr() { return tokens.at(i); }
-        if(type == F_STRING){
+    const Token& next() { return tokens.at(i+1); }
-            parser->set_next_token(TK("@fstr"), VAR(s));
+    const Token& peek(int offset=0) { return tokens.at(i+offset); }
-        }else{
+    void advance() { i++; }
            parser->set_next_token(TK("@str"), VAR(s));
        }
    }
    void eat_number() {
        static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
        std::smatch m;
        const char* i = parser->token_start;
        while(*i != '\n' && *i != '\0') i++;
        std::string s = std::string(parser->token_start, i);
        try{
            if (std::regex_search(s, m, pattern)) {
                // here is m.length()-1, since the first char was eaten by lex_token()
                for(int j=0; j<m.length()-1; j++) parser->eatchar();
                int base = 10;
                size_t size;
                if (m[1].matched) base = 16;
                if (m[2].matched) {
                    if(base == 16) SyntaxError("hex literal should not contain a dot");
                    parser->set_next_token(TK("@num"), VAR(S_TO_FLOAT(m[0], &size)));
                } else {
                    parser->set_next_token(TK("@num"), VAR(S_TO_INT(m[0], &size, base)));
                }
                if (size != m.length()) UNREACHABLE();
            }
        }catch(std::exception& _){
            SyntaxError("invalid number literal");
        } 
    }
    void lex_token(){
        lexing_count++;
        _lex_token();
        lexing_count--;
    }
    // Lex the next token and set it as the next token.
    void _lex_token() {
        parser->prev = parser->curr;
        parser->curr = parser->next_token();
        //std::cout << parser->curr.info() << std::endl;
        while (parser->peekchar() != '\0') {
            parser->token_start = parser->curr_char;
            char c = parser->eatchar_include_newline();
            switch (c) {
                case '\'': case '"': eat_string(c, NORMAL_STRING); return;
                case '#': parser->skip_line_comment(); break;
                case '{': parser->set_next_token(TK("{")); return;
                case '}': parser->set_next_token(TK("}")); return;
                case ',': parser->set_next_token(TK(",")); return;
                case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return;
                case ';': parser->set_next_token(TK(";")); return;
                case '(': parser->set_next_token(TK("(")); return;
                case ')': parser->set_next_token(TK(")")); return;
                case '[': parser->set_next_token(TK("[")); return;
                case ']': parser->set_next_token(TK("]")); return;
                case '@': parser->set_next_token(TK("@")); return;
                case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
                case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
                case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
                case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
                case '?': parser->set_next_token(TK("?")); return;
                case '.': {
                    if(parser->matchchar('.')) {
                        if(parser->matchchar('.')) {
                            parser->set_next_token(TK("..."));
                        } else {
                            SyntaxError("invalid token '..'");
                        }
                    } else {
                        parser->set_next_token(TK("."));
                    }
                    return;
                }
                case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
                case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
                case '>': {
                    if(parser->matchchar('=')) parser->set_next_token(TK(">="));
                    else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>="));
                    else parser->set_next_token(TK(">"));
                    return;
                }
                case '<': {
                    if(parser->matchchar('=')) parser->set_next_token(TK("<="));
                    else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<="));
                    else parser->set_next_token(TK("<"));
                    return;
                }
                case '-': {
                    if(parser->matchchar('=')) parser->set_next_token(TK("-="));
                    else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
                    else parser->set_next_token(TK("-"));
                    return;
                }
                case '!':
                    if(parser->matchchar('=')) parser->set_next_token(TK("!="));
                    else SyntaxError("expected '=' after '!'");
                    break;
                case '*':
                    if (parser->matchchar('*')) {
                        parser->set_next_token(TK("**"));  // '**'
                    } else {
                        parser->set_next_token_2('=', TK("*"), TK("*="));
                    }
                    return;
                case '/':
                    if(parser->matchchar('/')) {
                        parser->set_next_token_2('=', TK("//"), TK("//="));
                    } else {
                        parser->set_next_token_2('=', TK("/"), TK("/="));
                    }
                    return;
                case '\r': break;       // just ignore '\r'
                case ' ': case '\t': parser->eat_spaces(); break;
                case '\n': {
                    parser->set_next_token(TK("@eol"));
                    if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level");
                    return;
                }
                default: {
                    if(c == 'f'){
                        if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;}
                        if(parser->matchchar('"')) {eat_string('"', F_STRING); return;}
                    }else if(c == 'r'){
                        if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;}
                        if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;}
                    }
                    if (c >= '0' && c <= '9') {
                        eat_number();
                        return;
                    }
                    switch (parser->eat_name())
                    {
                        case 0: break;
                        case 1: SyntaxError("invalid char: " + std::string(1, c));
                        case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
                        case 3: SyntaxError("@id contains invalid char"); break;
                        case 4: SyntaxError("invalid JSON token"); break;
                        default: UNREACHABLE();
                    }
                    return;
                }
            }
        }
        parser->token_start = parser->curr_char;
        parser->set_next_token(TK("@eof"));
    }
    TokenIndex peek() {
        return parser->curr.type;
    }
    // not sure this will work
    TokenIndex peek_next() {
        if(parser->nexts.empty()) return TK("@eof");
        return parser->nexts.front().type;
    }
    bool match(TokenIndex expected) {
-        if (peek() != expected) return false;
+        if (curr().type != expected) return false;
-        lex_token();
+        advance();
        return true;
    }
    void consume(TokenIndex expected) {
        if (!match(expected)){
            StrStream ss;
-            ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
+            ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
            SyntaxError(ss.str());
        }
    }
    bool match_newlines(bool repl_throw=false) {
        bool consumed = false;
-        if (peek() == TK("@eol")) {
+        if (curr().type == TK("@eol")) {
-            while (peek() == TK("@eol")) lex_token();
+            while (curr().type == TK("@eol")) advance();
            consumed = true;
        }
-        if (repl_throw && peek() == TK("@eof")){
+        if (repl_throw && curr().type == TK("@eof")){
            throw NeedMoreLines(co()->_is_compiling_class);
        }
        return consumed;
@ -344,8 +138,8 @@ private:
    bool match_end_stmt() {
        if (match(TK(";"))) { match_newlines(); return true; }
-        if (match_newlines() || peek()==TK("@eof")) return true;
+        if (match_newlines() || curr().type == TK("@eof")) return true;
-        if (peek() == TK("@dedent")) return true;
+        if (curr().type == TK("@dedent")) return true;
        return false;
    }
@ -353,15 +147,27 @@ private:
        if (!match_end_stmt()) SyntaxError("expected statement end");
    }
    PyObject* get_value(const Token& token) {
        switch (token.type) {
            case TK("@num"):
                if(std::holds_alternative<i64>(token.value)) return VAR(std::get<i64>(token.value));
                if(std::holds_alternative<f64>(token.value)) return VAR(std::get<f64>(token.value));
                UNREACHABLE();
            case TK("@str"): case TK("@fstr"):
                return VAR(std::get<Str>(token.value));
            default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type));
        }
    }
    void exprLiteral() {
-        PyObject* value = parser->prev.value;
+        PyObject* value = get_value(prev());
        int index = co()->add_const(value);
        emit(OP_LOAD_CONST, index);
    }
    void exprFString() {
        static const std::regex pattern(R"(\{(.*?)\})");
-        PyObject* value = parser->prev.value;
+        PyObject* value = get_value(prev());
        Str s = CAST(Str, value);
        std::sregex_iterator begin(s.begin(), s.end(), pattern);
        std::sregex_iterator end;
@ -395,7 +201,7 @@ private:
            _compile_f_args(func, false);
            consume(TK(":"));
        }
-        func.code = make_sp<CodeObject>(parser->src, func.name.str());
+        func.code = make_sp<CodeObject>(lexer->src, func.name.str());
        this->codes.push(func.code);
        co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
        emit(OP_RETURN_VALUE);
@ -414,7 +220,7 @@ private:
        if(is_load_name_ref) co()->codes.pop_back();
        co()->_rvalue += 1;
-        TokenIndex op = parser->prev.type;
+        TokenIndex op = prev().type;
        if(op == TK("=")) {     // a = (expr)
            EXPR_TUPLE();
            if(is_load_name_ref){
@ -487,7 +293,7 @@ private:
    }
    void exprBinaryOp() {
-        TokenIndex op = parser->prev.type;
+        TokenIndex op = prev().type;
        parse_expression((Precedence)(rules[op].precedence + 1));
        switch (op) {
@ -525,7 +331,7 @@ private:
    }
    void exprUnaryOp() {
-        TokenIndex op = parser->prev.type;
+        TokenIndex op = prev().type;
        parse_expression((Precedence)(PREC_UNARY + 1));
        switch (op) {
            case TK("-"):     emit(OP_UNARY_NEGATIVE); break;
@ -588,7 +394,7 @@ private:
        int ARGC = 0;
        do {
            match_newlines(mode()==REPL_MODE);
-            if (peek() == TK("]")) break;
+            if (curr().type == TK("]")) break;
            EXPR(); ARGC++;
            match_newlines(mode()==REPL_MODE);
            if(ARGC == 1 && match(TK("for"))){
@ -609,9 +415,9 @@ private:
        int ARGC = 0;
        do {
            match_newlines(mode()==REPL_MODE);
-            if (peek() == TK("}")) break;
+            if (curr().type == TK("}")) break;
            EXPR();
-            if(peek() == TK(":")) parsing_dict = true;
+            if(curr().type == TK(":")) parsing_dict = true;
            if(parsing_dict){
                consume(TK(":"));
                EXPR();
@ -637,10 +443,10 @@ private:
        bool need_unpack = false;
        do {
            match_newlines(mode()==REPL_MODE);
-            if (peek() == TK(")")) break;
+            if (curr().type == TK(")")) break;
-            if(peek() == TK("@id") && peek_next() == TK("=")) {
+            if(curr().type == TK("@id") && next().type == TK("=")) {
                consume(TK("@id"));
-                const Str& key = parser->prev.str();
+                const Str& key = prev().str();
                emit(OP_LOAD_CONST, co()->add_const(VAR(key)));
                consume(TK("="));
                co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
@ -666,7 +472,7 @@ private:
    void exprName(){ _exprName(false); }
    void _exprName(bool force_lvalue) {
-        Token tkname = parser->prev;
+        const Token& tkname = prev();
        int index = co()->add_name(tkname.str(), name_scope());
        bool fast_load = !force_lvalue && co()->_rvalue>0;
        emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
@ -674,7 +480,7 @@ private:
    void exprAttrib() {
        consume(TK("@id"));
-        const Str& name = parser->prev.str();
+        const Str& name = prev().str();
        int index = co()->add_name(name, NAME_ATTR);
        emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index);
    }
@ -710,7 +516,7 @@ private:
    }
    void exprValue() {
-        TokenIndex op = parser->prev.type;
+        TokenIndex op = prev().type;
        switch (op) {
            case TK("None"):    emit(OP_LOAD_NONE);  break;
            case TK("True"):    emit(OP_LOAD_TRUE);  break;
@ -721,7 +527,7 @@ private:
    }
    int emit(Opcode opcode, int arg=-1, bool keepline=false) {
-        int line = parser->prev.line;
+        int line = prev().line;
        co()->codes.push_back(
            Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line}
        );
@ -738,7 +544,7 @@ private:
    void compile_block_body(CompilerAction action=nullptr) {
        if(action == nullptr) action = &Compiler::compile_stmt;
        consume(TK(":"));
-        if(peek()!=TK("@eol") && peek()!=TK("@eof")){
+        if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
            (this->*action)();  // inline block
            return;
        }
@ -746,7 +552,7 @@ private:
            SyntaxError("expected a new line after ':'");
        }
        consume(TK("@indent"));
-        while (peek() != TK("@dedent")) {
+        while (curr().type != TK("@dedent")) {
            match_newlines();
            (this->*action)();
            match_newlines();
@ -756,7 +562,7 @@ private:
    Token _compile_import() {
        consume(TK("@id"));
-        Token tkmodule = parser->prev;
+        Token tkmodule = prev();
        int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
        emit(OP_IMPORT_NAME, index);
        return tkmodule;
@ -768,7 +574,7 @@ private:
            Token tkmodule = _compile_import();
            if (match(TK("as"))) {
                consume(TK("@id"));
-                tkmodule = parser->prev;
+                tkmodule = prev();
            }
            int index = co()->add_name(tkmodule.str(), name_scope());
            emit(OP_STORE_NAME, index);
@ -789,12 +595,12 @@ private:
        do {
            emit(OP_DUP_TOP_VALUE);
            consume(TK("@id"));
-            Token tkname = parser->prev;
+            Token tkname = prev();
            int index = co()->add_name(tkname.str(), NAME_ATTR);
            emit(OP_BUILD_ATTR, index);
            if (match(TK("as"))) {
                consume(TK("@id"));
-                tkname = parser->prev;
+                tkname = prev();
            }
            index = co()->add_name(tkname.str(), name_scope());
            emit(OP_STORE_NAME, index);
@ -807,14 +613,14 @@ private:
    // ['a', '1', '2', '+', '=']
    // 
    void parse_expression(Precedence precedence) {
-        lex_token();
+        advance();
-        GrammarFn prefix = rules[parser->prev.type].prefix;
+        GrammarFn prefix = rules[prev().type].prefix;
-        if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type));
+        if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
        (this->*prefix)();
        bool meet_assign_token = false;
-        while (rules[peek()].precedence >= precedence) {
+        while (rules[curr().type].precedence >= precedence) {
-            lex_token();
+            advance();
-            TokenIndex op = parser->prev.type;
+            TokenIndex op = prev().type;
            if (op == TK("=")){
                if(meet_assign_token) SyntaxError();
                meet_assign_token = true;
@ -891,7 +697,7 @@ private:
        do {
            consume(TK("except"));
            if(match(TK("@id"))){
-                int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
+                int name_idx = co()->add_name(prev().str(), NAME_SPECIAL);
                emit(OP_EXCEPTION_MATCH, name_idx);
            }else{
                emit(OP_LOAD_TRUE);
@ -901,7 +707,7 @@ private:
            compile_block_body();
            patches.push_back(emit(OP_JUMP_ABSOLUTE));
            patch_jump(patch);
-        }while(peek() == TK("except"));
+        }while(curr().type == TK("except"));
        emit(OP_RE_RAISE);      // no match, re-raise
        for (int patch : patches) patch_jump(patch);
    }
@ -968,7 +774,7 @@ private:
            EXPR();
            consume(TK("as"));
            consume(TK("@id"));
-            Token tkname = parser->prev;
+            Token tkname = prev();
            int index = co()->add_name(tkname.str(), name_scope());
            emit(OP_STORE_NAME, index);
            emit(OP_LOAD_NAME_REF, index);
@ -979,18 +785,18 @@ private:
        } else if(match(TK("label"))){
            if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
            consume(TK(".")); consume(TK("@id"));
-            Str label = parser->prev.str();
+            Str label = prev().str();
            bool ok = co()->add_label(label);
            if(!ok) SyntaxError("label '" + label + "' already exists");
            consume_end_stmt();
        } else if(match(TK("goto"))){ // https://entrian.com/goto/
            if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
            consume(TK(".")); consume(TK("@id"));
-            emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
+            emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL));
            consume_end_stmt();
        } else if(match(TK("raise"))){
            consume(TK("@id"));
-            int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
+            int dummy_t = co()->add_name(prev().str(), NAME_SPECIAL);
            if(match(TK("(")) && !match(TK(")"))){
                EXPR(); consume(TK(")"));
            }else{
@ -1005,7 +811,7 @@ private:
        } else if(match(TK("global"))){
            do {
                consume(TK("@id"));
-                co()->global_names[parser->prev.str()] = 1;
+                co()->global_names[prev().str()] = 1;
            } while (match(TK(",")));
            consume_end_stmt();
        } else if(match(TK("pass"))){
@ -1030,10 +836,10 @@ private:
    void compile_class(){
        consume(TK("@id"));
-        int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
+        int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
        int super_cls_name_idx = -1;
        if(match(TK("(")) && match(TK("@id"))){
-            super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
+            super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
            consume(TK(")"));
        }
        if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
@ -1059,13 +865,13 @@ private:
            }
            consume(TK("@id"));
-            const Str& name = parser->prev.str();
+            const Str& name = prev().str();
            if(func.has_name(name)) SyntaxError("duplicate argument name");
            // eat type hints
            if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
-            if(state == 0 && peek() == TK("=")) state = 2;
+            if(state == 0 && curr().type == TK("=")) state = 2;
            switch (state)
            {
@ -1075,7 +881,7 @@ private:
                    consume(TK("="));
                    PyObject* value = read_literal();
                    if(value == nullptr){
-                        SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type));
+                        SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
                    }
                    func.kwargs.set(name, value);
                    func.kwargs_order.push_back(name);
@ -1090,11 +896,11 @@ private:
        Function func;
        StrName obj_name;
        consume(TK("@id"));
-        func.name = parser->prev.str();
+        func.name = prev().str();
        if(!co()->_is_compiling_class && match(TK("::"))){
            consume(TK("@id"));
            obj_name = func.name;
-            func.name = parser->prev.str();
+            func.name = prev().str();
        }
        consume(TK("("));
        if (!match(TK(")"))) {
@ -1104,7 +910,7 @@ private:
        if(match(TK("->"))){
            if(!match(TK("None"))) consume(TK("@id"));
        }
-        func.code = make_sp<CodeObject>(parser->src, func.name.str());
+        func.code = make_sp<CodeObject>(lexer->src, func.name.str());
        this->codes.push(func.code);
        compile_block_body();
        func.code->optimize(vm);
@ -1132,11 +938,11 @@ private:
    PyObject* read_literal(){
        if(match(TK("-"))){
            consume(TK("@num"));
-            PyObject* val = parser->prev.value;
+            PyObject* val = get_value(prev());
            return vm->num_negated(val);
        }
-        if(match(TK("@num"))) return parser->prev.value;
+        if(match(TK("@num"))) return get_value(prev());
-        if(match(TK("@str"))) return parser->prev.value;
+        if(match(TK("@str"))) return get_value(prev());
        if(match(TK("True"))) return VAR(true);
        if(match(TK("False"))) return VAR(false);
        if(match(TK("None"))) return vm->None;
@ -1144,23 +950,8 @@ private:
        return nullptr;
    }
-    /***** Error Reporter *****/
+    void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); }
-    void throw_err(Str type, Str msg){
+    void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); }
        int lineno = parser->curr.line;
        const char* cursor = parser->curr.start;
        // if error occurs in lexing, lineno should be `parser->current_line`
        if(lexing_count > 0){
            lineno = parser->current_line;
            cursor = parser->curr_char;
        }
        if(parser->peekchar() == '\n') lineno--;
        auto e = Exception("SyntaxError", msg);
        e.st_push(parser->src->snapshot(lineno, cursor));
        throw e;
    }
    void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
    void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
    void IndentationError(Str msg){ throw_err("IndentationError", msg); }
 public:
    CodeObject_ compile(){
@ -1168,11 +959,16 @@ public:
        if(used) UNREACHABLE();
        used = true;
-        CodeObject_ code = make_sp<CodeObject>(parser->src, Str("<module>"));
+        tokens = lexer->run();
        // if(lexer->src->filename == "tests/01_int.py"){
        //     for(auto& t: tokens) std::cout << t.info() << std::endl;
        // }
        CodeObject_ code = make_sp<CodeObject>(lexer->src, lexer->src->filename);
        codes.push(code);
-        lex_token(); lex_token();
+        advance();          // skip @sof, so prev() is always valid
-        match_newlines();
+        match_newlines();   // skip leading '\n'
        if(mode()==EVAL_MODE) {
            EXPR_TUPLE();
--- a/src/expr.h
+++ b/src/expr.h
@ -0,0 +1,108 @@
 #pragma once
 #include "codeobject.h"
 #include "common.h"
 #include "parser.h"
 #include "error.h"
 #include "ceval.h"
 #include <memory>
 namespace pkpy{
 struct Expression;
 typedef std::unique_ptr<Expression> Expression_;
 struct Expression{
    std::vector<Expression_> children;
    virtual Str to_string() const = 0;
 };
 struct NameExpr: Expression{
    Str name;
    NameScope scope;
    NameExpr(Str name, NameScope scope): name(name), scope(scope) {}
    Str to_string() const override { return name; }
 };
 struct GroupExpr: Expression{
    Expression_ expr;
    GroupExpr(Expression_ expr): expr(std::move(expr)) {}
    Str to_string() const override { return "()"; }
 };
 struct UnaryExpr: Expression{
    TokenIndex op;
    UnaryExpr(TokenIndex op): op(op) {}
    Str to_string() const override { return TK_STR(op); }
 };
 struct NotExpr: Expression{
    Str to_string() const override { return "not"; }
 };
 struct AndExpr: Expression{
    Str to_string() const override { return "and"; }
 };
 struct OrExpr: Expression{
    Str to_string() const override { return "or"; }
 };
 // None, True, False, ...
 struct SpecialValueExpr: Expression{
    TokenIndex token;
    SpecialValueExpr(TokenIndex token): token(token) {}
    Str to_string() const override { return TK_STR(token); }
 };
 // @num, @str which needs to invoke OP_LOAD_CONST
 struct LiteralExpr: Expression{
    PyObject* value;
    LiteralExpr(PyObject* value): value(value) {}
    Str to_string() const override { return "literal"; }
 };
 struct ListExpr: Expression{
    Str to_string() const override { return "[]"; }
 };
 struct DictExpr: Expression{
    Str to_string() const override { return "{}"; }
 };
 struct LambdaExpr: Expression{
    Str to_string() const override { return "lambda"; }
 };
 struct FStringExpr: Expression{
    Str to_string() const override { return "@fstr"; }
 };
 struct AttribExpr: Expression{
    Str to_string() const override { return "."; }
 };
 struct CallExpr: Expression{
    Str to_string() const override { return "()"; }
 };
 struct BinaryExpr: Expression{
    TokenIndex op;
    BinaryExpr(TokenIndex op): op(op) {}
    Str to_string() const override { return TK_STR(op); }
 };
 struct TernaryExpr: Expression{
    Str to_string() const override { return "?"; }
 };
 struct AssignExpr: Expression{
    Str to_string() const override { return "="; }
 };
 struct CommaExpr: Expression{
    Str to_string() const override { return ","; }
 };
 } // namespace pkpy
--- a/src/frame.h
+++ b/src/frame.h
@ -58,7 +58,7 @@ struct Frame {
    }
    PyObject* pop(){
-#if PK_EXTRA_CHECK
+#if DEBUG_EXTRA_CHECK
        if(_data.empty()) throw std::runtime_error("_data.empty() is true");
 #endif
        PyObject* v = _data.back();
@ -67,7 +67,7 @@ struct Frame {
    }
    void _pop(){
-#if PK_EXTRA_CHECK
+#if DEBUG_EXTRA_CHECK
        if(_data.empty()) throw std::runtime_error("_data.empty() is true");
 #endif
        _data.pop_back();
@ -88,14 +88,14 @@ struct Frame {
    }
    PyObject*& top(){
-#if PK_EXTRA_CHECK
+#if DEBUG_EXTRA_CHECK
        if(_data.empty()) throw std::runtime_error("_data.empty() is true");
 #endif
        return _data.back();
    }
    PyObject*& top_1(){
-#if PK_EXTRA_CHECK
+#if DEBUG_EXTRA_CHECK
        if(_data.size() < 2) throw std::runtime_error("_data.size() < 2");
 #endif
        return _data[_data.size()-2];
--- a/src/gc.h
+++ b/src/gc.h
@ -67,9 +67,9 @@ struct ManagedHeap{
    ~ManagedHeap(){
        for(PyObject* obj: _no_gc) delete obj;
-        for(auto& [type, count]: deleted){
+        // for(auto& [type, count]: deleted){
-            std::cout << "GC: " << type << "=" << count << std::endl;
+        //     std::cout << "GC: " << type << "=" << count << std::endl;
-        }
+        // }
    }
    int sweep(VM* vm){
--- a/src/lexer.h
+++ b/src/lexer.h
@ -0,0 +1,510 @@
 #pragma once
 #include "common.h"
 #include "error.h"
 #include "str.h"
 namespace pkpy{
 typedef uint8_t TokenIndex;
 constexpr const char* kTokens[] = {
    "@eof", "@eol", "@sof",
    ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
    "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
    "<<", ">>", "&", "|", "^", "?", "@",
    "==", "!=", ">=", "<=",
    "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
    /** KW_BEGIN **/
    "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
    "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
    "goto", "label",      // extended keywords, not available in cpython
    "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
    /** KW_END **/
    "is not", "not in",
    "@id", "@num", "@str", "@fstr",
    "@indent", "@dedent"
 };
 using TokenValue = std::variant<std::monostate, i64, f64, Str>;
 const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
 constexpr TokenIndex TK(const char token[]) {
    for(int k=0; k<kTokenCount; k++){
        const char* i = kTokens[k];
        const char* j = token;
        while(*i && *j && *i == *j) { i++; j++;}
        if(*i == *j) return k;
    }
    UNREACHABLE();
 }
 #define TK_STR(t) kTokens[t]
 const TokenIndex kTokenKwBegin = TK("class");
 const TokenIndex kTokenKwEnd = TK("raise");
 const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
    std::map<std::string_view, TokenIndex> map;
    for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
    return map;
 }();
 struct Token{
  TokenIndex type;
  const char* start;
  int length;
  int line;
  TokenValue value;
  Str str() const { return Str(start, length);}
  Str info() const {
    StrStream ss;
    Str raw = str();
    if (raw == Str("\n")) raw = "\\n";
    ss << line << ": " << TK_STR(type) << " '" << raw << "'";
    return ss.str();
  }
 };
 // https://docs.python.org/3/reference/expressions.html
 enum Precedence {
  PREC_NONE,
  PREC_ASSIGNMENT,    // =
  PREC_COMMA,         // ,
  PREC_TERNARY,       // ?:
  PREC_LOGICAL_OR,    // or
  PREC_LOGICAL_AND,   // and
  PREC_LOGICAL_NOT,   // not
  PREC_EQUALITY,      // == !=
  PREC_TEST,          // in / is / is not / not in
  PREC_COMPARISION,   // < > <= >=
  PREC_BITWISE_OR,    // |
  PREC_BITWISE_XOR,   // ^
  PREC_BITWISE_AND,   // &
  PREC_BITWISE_SHIFT, // << >>
  PREC_TERM,          // + -
  PREC_FACTOR,        // * / % //
  PREC_UNARY,         // - not
  PREC_EXPONENT,      // **
  PREC_CALL,          // ()
  PREC_SUBSCRIPT,     // []
  PREC_ATTRIB,        // .index
  PREC_PRIMARY,
 };
 enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
 struct Lexer {
    shared_ptr<SourceData> src;
    const char* token_start;
    const char* curr_char;
    int current_line = 1;
    std::vector<Token> nexts;
    stack<int> indents;
    int brackets_level = 0;
    bool used = false;
    char peekchar() const{ return *curr_char; }
    bool match_n_chars(int n, char c0){
        const char* c = curr_char;
        for(int i=0; i<n; i++){
            if(*c == '\0') return false;
            if(*c != c0) return false;
            c++;
        }
        for(int i=0; i<n; i++) eatchar_include_newline();
        return true;
    }
    int eat_spaces(){
        int count = 0;
        while (true) {
            switch (peekchar()) {
                case ' ' : count+=1; break;
                case '\t': count+=4; break;
                default: return count;
            }
            eatchar();
        }
    }
    bool eat_indentation(){
        if(brackets_level > 0) return true;
        int spaces = eat_spaces();
        if(peekchar() == '#') skip_line_comment();
        if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
        // https://docs.python.org/3/reference/lexical_analysis.html#indentation
        if(spaces > indents.top()){
            indents.push(spaces);
            nexts.push_back(Token{TK("@indent"), token_start, 0, current_line});
        } else if(spaces < indents.top()){
            while(spaces < indents.top()){
                indents.pop();
                nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line});
            }
            if(spaces != indents.top()){
                return false;
            }
        }
        return true;
    }
    char eatchar() {
        char c = peekchar();
        if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
        curr_char++;
        return c;
    }
    char eatchar_include_newline() {
        char c = peekchar();
        curr_char++;
        if (c == '\n'){
            current_line++;
            src->line_starts.push_back(curr_char);
        }
        return c;
    }
    int eat_name() {
        curr_char--;
        while(true){
            uint8_t c = peekchar();
            int u8bytes = 0;
            if((c & 0b10000000) == 0b00000000) u8bytes = 1;
            else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
            else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
            else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
            else return 1;
            if(u8bytes == 1){
                if(isalpha(c) || c=='_' || isdigit(c)) {
                    curr_char++;
                    continue;
                }else{
                    break;
                }
            }
            // handle multibyte char
            std::string u8str(curr_char, u8bytes);
            if(u8str.size() != u8bytes) return 2;
            uint32_t value = 0;
            for(int k=0; k < u8bytes; k++){
                uint8_t b = u8str[k];
                if(k==0){
                    if(u8bytes == 2) value = (b & 0b00011111) << 6;
                    else if(u8bytes == 3) value = (b & 0b00001111) << 12;
                    else if(u8bytes == 4) value = (b & 0b00000111) << 18;
                }else{
                    value |= (b & 0b00111111) << (6*(u8bytes-k-1));
                }
            }
            if(is_unicode_Lo_char(value)) curr_char += u8bytes;
            else break;
        }
        int length = (int)(curr_char - token_start);
        if(length == 0) return 3;
        std::string_view name(token_start, length);
        if(src->mode == JSON_MODE){
            if(name == "true"){
                add_token(TK("True"));
            } else if(name == "false"){
                add_token(TK("False"));
            } else if(name == "null"){
                add_token(TK("None"));
            } else {
                return 4;
            }
            return 0;
        }
        if(kTokenKwMap.count(name)){
            if(name == "not"){
                if(strncmp(curr_char, " in", 3) == 0){
                    curr_char += 3;
                    add_token(TK("not in"));
                    return 0;
                }
            }else if(name == "is"){
                if(strncmp(curr_char, " not", 4) == 0){
                    curr_char += 4;
                    add_token(TK("is not"));
                    return 0;
                }
            }
            add_token(kTokenKwMap.at(name));
        } else {
            add_token(TK("@id"));
        }
        return 0;
    }
    void skip_line_comment() {
        char c;
        while ((c = peekchar()) != '\0') {
            if (c == '\n') return;
            eatchar();
        }
    }
    bool matchchar(char c) {
        if (peekchar() != c) return false;
        eatchar_include_newline();
        return true;
    }
    void add_token(TokenIndex type, TokenValue value={}) {
        switch(type){
            case TK("{"): case TK("["): case TK("("): brackets_level++; break;
            case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
        }
        nexts.push_back( Token{
            type,
            token_start,
            (int)(curr_char - token_start),
            current_line - ((type == TK("@eol")) ? 1 : 0),
            value
        });
    }
    void add_token_2(char c, TokenIndex one, TokenIndex two) {
        if (matchchar(c)) add_token(two);
        else add_token(one);
    }
    Str eat_string_until(char quote, bool raw) {
        bool quote3 = match_n_chars(2, quote);
        std::vector<char> buff;
        while (true) {
            char c = eatchar_include_newline();
            if (c == quote){
                if(quote3 && !match_n_chars(2, quote)){
                    buff.push_back(c);
                    continue;
                }
                break;
            }
            if (c == '\0'){
                if(quote3 && src->mode == REPL_MODE){
                    throw NeedMoreLines(false);
                }
                SyntaxError("EOL while scanning string literal");
            }
            if (c == '\n'){
                if(!quote3) SyntaxError("EOL while scanning string literal");
                else{
                    buff.push_back(c);
                    continue;
                }
            }
            if (!raw && c == '\\') {
                switch (eatchar_include_newline()) {
                    case '"':  buff.push_back('"');  break;
                    case '\'': buff.push_back('\''); break;
                    case '\\': buff.push_back('\\'); break;
                    case 'n':  buff.push_back('\n'); break;
                    case 'r':  buff.push_back('\r'); break;
                    case 't':  buff.push_back('\t'); break;
                    default: SyntaxError("invalid escape char");
                }
            } else {
                buff.push_back(c);
            }
        }
        return Str(buff.data(), buff.size());
    }
    void eat_string(char quote, StringType type) {
        Str s = eat_string_until(quote, type == RAW_STRING);
        if(type == F_STRING){
            add_token(TK("@fstr"), s);
        }else{
            add_token(TK("@str"), s);
        }
    }
    void eat_number() {
        static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
        std::smatch m;
        const char* i = token_start;
        while(*i != '\n' && *i != '\0') i++;
        std::string s = std::string(token_start, i);
        try{
            if (std::regex_search(s, m, pattern)) {
                // here is m.length()-1, since the first char was eaten by lex_token()
                for(int j=0; j<m.length()-1; j++) eatchar();
                int base = 10;
                size_t size;
                if (m[1].matched) base = 16;
                if (m[2].matched) {
                    if(base == 16) SyntaxError("hex literal should not contain a dot");
                    add_token(TK("@num"), S_TO_FLOAT(m[0], &size));
                } else {
                    add_token(TK("@num"), S_TO_INT(m[0], &size, base));
                }
                if (size != m.length()) UNREACHABLE();
            }
        }catch(std::exception& _){
            SyntaxError("invalid number literal");
        } 
    }
    bool lex_one_token() {
        while (peekchar() != '\0') {
            token_start = curr_char;
            char c = eatchar_include_newline();
            switch (c) {
                case '\'': case '"': eat_string(c, NORMAL_STRING); return true;
                case '#': skip_line_comment(); break;
                case '{': add_token(TK("{")); return true;
                case '}': add_token(TK("}")); return true;
                case ',': add_token(TK(",")); return true;
                case ':': add_token_2(':', TK(":"), TK("::")); return true;
                case ';': add_token(TK(";")); return true;
                case '(': add_token(TK("(")); return true;
                case ')': add_token(TK(")")); return true;
                case '[': add_token(TK("[")); return true;
                case ']': add_token(TK("]")); return true;
                case '@': add_token(TK("@")); return true;
                case '%': add_token_2('=', TK("%"), TK("%=")); return true;
                case '&': add_token_2('=', TK("&"), TK("&=")); return true;
                case '|': add_token_2('=', TK("|"), TK("|=")); return true;
                case '^': add_token_2('=', TK("^"), TK("^=")); return true;
                case '?': add_token(TK("?")); return true;
                case '.': {
                    if(matchchar('.')) {
                        if(matchchar('.')) {
                            add_token(TK("..."));
                        } else {
                            SyntaxError("invalid token '..'");
                        }
                    } else {
                        add_token(TK("."));
                    }
                    return true;
                }
                case '=': add_token_2('=', TK("="), TK("==")); return true;
                case '+': add_token_2('=', TK("+"), TK("+=")); return true;
                case '>': {
                    if(matchchar('=')) add_token(TK(">="));
                    else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>="));
                    else add_token(TK(">"));
                    return true;
                }
                case '<': {
                    if(matchchar('=')) add_token(TK("<="));
                    else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<="));
                    else add_token(TK("<"));
                    return true;
                }
                case '-': {
                    if(matchchar('=')) add_token(TK("-="));
                    else if(matchchar('>')) add_token(TK("->"));
                    else add_token(TK("-"));
                    return true;
                }
                case '!':
                    if(matchchar('=')) add_token(TK("!="));
                    else SyntaxError("expected '=' after '!'");
                    break;
                case '*':
                    if (matchchar('*')) {
                        add_token(TK("**"));  // '**'
                    } else {
                        add_token_2('=', TK("*"), TK("*="));
                    }
                    return true;
                case '/':
                    if(matchchar('/')) {
                        add_token_2('=', TK("//"), TK("//="));
                    } else {
                        add_token_2('=', TK("/"), TK("/="));
                    }
                    return true;
                case '\r': break;       // just ignore '\r'
                case ' ': case '\t': eat_spaces(); break;
                case '\n': {
                    add_token(TK("@eol"));
                    if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level");
                    return true;
                }
                default: {
                    if(c == 'f'){
                        if(matchchar('\'')) {eat_string('\'', F_STRING); return true;}
                        if(matchchar('"')) {eat_string('"', F_STRING); return true;}
                    }else if(c == 'r'){
                        if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;}
                        if(matchchar('"')) {eat_string('"', RAW_STRING); return true;}
                    }
                    if (c >= '0' && c <= '9') {
                        eat_number();
                        return true;
                    }
                    switch (eat_name())
                    {
                        case 0: break;
                        case 1: SyntaxError("invalid char: " + std::string(1, c));
                        case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
                        case 3: SyntaxError("@id contains invalid char"); break;
                        case 4: SyntaxError("invalid JSON token"); break;
                        default: UNREACHABLE();
                    }
                    return true;
                }
            }
        }
        token_start = curr_char;
        while(indents.size() > 1){
            indents.pop();
            add_token(TK("@dedent"));
            return true;
        }
        add_token(TK("@eof"));
        return false;
    }
    /***** Error Reporter *****/
    void throw_err(Str type, Str msg){
        int lineno = current_line;
        const char* cursor = curr_char;
        if(peekchar() == '\n'){
            lineno--;
            cursor--;
        }
        throw_err(type, msg, lineno, cursor);
    }
    void throw_err(Str type, Str msg, int lineno, const char* cursor){
        auto e = Exception("SyntaxError", msg);
        e.st_push(src->snapshot(lineno, cursor));
        throw e;
    }
    void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
    void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
    void IndentationError(Str msg){ throw_err("IndentationError", msg); }
    Lexer(shared_ptr<SourceData> src) {
        this->src = src;
        this->token_start = src->source;
        this->curr_char = src->source;
        this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line});
        this->indents.push(0);
    }
    std::vector<Token> run() {
        if(used) UNREACHABLE();
        used = true;
        while (lex_one_token());
        return std::move(nexts);
    }
 };
 } // namespace pkpy
--- a/src/parser.h
+++ b/src/parser.h
@ -1,302 +0,0 @@
 #pragma once
 #include "error.h"
 #include "obj.h"
 namespace pkpy{
 typedef uint8_t TokenIndex;
 constexpr const char* kTokens[] = {
    "@error", "@eof", "@eol", "@sof",
    ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
    "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
    "<<", ">>", "&", "|", "^", "?", "@",
    "==", "!=", ">=", "<=",
    "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
    /** KW_BEGIN **/
    "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
    "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
    "goto", "label",      // extended keywords, not available in cpython
    "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
    /** KW_END **/
    "is not", "not in",
    "@id", "@num", "@str", "@fstr",
    "@indent", "@dedent"
 };
 const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
 constexpr TokenIndex TK(const char token[]) {
    for(int k=0; k<kTokenCount; k++){
        const char* i = kTokens[k];
        const char* j = token;
        while(*i && *j && *i == *j) { i++; j++;}
        if(*i == *j) return k;
    }
    UNREACHABLE();
 }
 #define TK_STR(t) kTokens[t]
 const TokenIndex kTokenKwBegin = TK("class");
 const TokenIndex kTokenKwEnd = TK("raise");
 const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
    std::map<std::string_view, TokenIndex> map;
    for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
    return map;
 }();
 struct Token{
  TokenIndex type;
  const char* start;
  int length;
  int line;
  PyObject* value;
  Str str() const { return Str(start, length);}
  Str info() const {
    StrStream ss;
    Str raw = str();
    if (raw == Str("\n")) raw = "\\n";
    ss << line << ": " << TK_STR(type) << " '" << raw << "'";
    return ss.str();
  }
 };
 // https://docs.python.org/3/reference/expressions.html
 enum Precedence {
  PREC_NONE,
  PREC_ASSIGNMENT,    // =
  PREC_COMMA,         // ,
  PREC_TERNARY,       // ?:
  PREC_LOGICAL_OR,    // or
  PREC_LOGICAL_AND,   // and
  PREC_LOGICAL_NOT,   // not
  PREC_EQUALITY,      // == !=
  PREC_TEST,          // in / is / is not / not in
  PREC_COMPARISION,   // < > <= >=
  PREC_BITWISE_OR,    // |
  PREC_BITWISE_XOR,   // ^
  PREC_BITWISE_AND,   // &
  PREC_BITWISE_SHIFT, // << >>
  PREC_TERM,          // + -
  PREC_FACTOR,        // * / % //
  PREC_UNARY,         // - not
  PREC_EXPONENT,      // **
  PREC_CALL,          // ()
  PREC_SUBSCRIPT,     // []
  PREC_ATTRIB,        // .index
  PREC_PRIMARY,
 };
 // The context of the parsing phase for the compiler.
 struct Parser {
    shared_ptr<SourceData> src;
    const char* token_start;
    const char* curr_char;
    int current_line = 1;
    Token prev, curr;
    queue<Token> nexts;
    stack<int> indents;
    int brackets_level = 0;
    Token next_token(){
        if(nexts.empty()){
            return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line};
        }
        Token t = nexts.front();
        if(t.type == TK("@eof") && indents.size()>1){
            nexts.pop();
            indents.pop();
            return Token{TK("@dedent"), token_start, 0, current_line};
        }
        nexts.pop();
        return t;
    }
    char peekchar() const{ return *curr_char; }
    bool match_n_chars(int n, char c0){
        const char* c = curr_char;
        for(int i=0; i<n; i++){
            if(*c == '\0') return false;
            if(*c != c0) return false;
            c++;
        }
        for(int i=0; i<n; i++) eatchar_include_newline();
        return true;
    }
    int eat_spaces(){
        int count = 0;
        while (true) {
            switch (peekchar()) {
                case ' ' : count+=1; break;
                case '\t': count+=4; break;
                default: return count;
            }
            eatchar();
        }
    }
    bool eat_indentation(){
        if(brackets_level > 0) return true;
        int spaces = eat_spaces();
        if(peekchar() == '#') skip_line_comment();
        if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
        // https://docs.python.org/3/reference/lexical_analysis.html#indentation
        if(spaces > indents.top()){
            indents.push(spaces);
            nexts.push(Token{TK("@indent"), token_start, 0, current_line});
        } else if(spaces < indents.top()){
            while(spaces < indents.top()){
                indents.pop();
                nexts.push(Token{TK("@dedent"), token_start, 0, current_line});
            }
            if(spaces != indents.top()){
                return false;
            }
        }
        return true;
    }
    char eatchar() {
        char c = peekchar();
        if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
        curr_char++;
        return c;
    }
    char eatchar_include_newline() {
        char c = peekchar();
        curr_char++;
        if (c == '\n'){
            current_line++;
            src->line_starts.push_back(curr_char);
        }
        return c;
    }
    int eat_name() {
        curr_char--;
        while(true){
            uint8_t c = peekchar();
            int u8bytes = 0;
            if((c & 0b10000000) == 0b00000000) u8bytes = 1;
            else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
            else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
            else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
            else return 1;
            if(u8bytes == 1){
                if(isalpha(c) || c=='_' || isdigit(c)) {
                    curr_char++;
                    continue;
                }else{
                    break;
                }
            }
            // handle multibyte char
            std::string u8str(curr_char, u8bytes);
            if(u8str.size() != u8bytes) return 2;
            uint32_t value = 0;
            for(int k=0; k < u8bytes; k++){
                uint8_t b = u8str[k];
                if(k==0){
                    if(u8bytes == 2) value = (b & 0b00011111) << 6;
                    else if(u8bytes == 3) value = (b & 0b00001111) << 12;
                    else if(u8bytes == 4) value = (b & 0b00000111) << 18;
                }else{
                    value |= (b & 0b00111111) << (6*(u8bytes-k-1));
                }
            }
            if(is_unicode_Lo_char(value)) curr_char += u8bytes;
            else break;
        }
        int length = (int)(curr_char - token_start);
        if(length == 0) return 3;
        std::string_view name(token_start, length);
        if(src->mode == JSON_MODE){
            if(name == "true"){
                set_next_token(TK("True"));
            } else if(name == "false"){
                set_next_token(TK("False"));
            } else if(name == "null"){
                set_next_token(TK("None"));
            } else {
                return 4;
            }
            return 0;
        }
        if(kTokenKwMap.count(name)){
            if(name == "not"){
                if(strncmp(curr_char, " in", 3) == 0){
                    curr_char += 3;
                    set_next_token(TK("not in"));
                    return 0;
                }
            }else if(name == "is"){
                if(strncmp(curr_char, " not", 4) == 0){
                    curr_char += 4;
                    set_next_token(TK("is not"));
                    return 0;
                }
            }
            set_next_token(kTokenKwMap.at(name));
        } else {
            set_next_token(TK("@id"));
        }
        return 0;
    }
    void skip_line_comment() {
        char c;
        while ((c = peekchar()) != '\0') {
            if (c == '\n') return;
            eatchar();
        }
    }
    bool matchchar(char c) {
        if (peekchar() != c) return false;
        eatchar_include_newline();
        return true;
    }
    void set_next_token(TokenIndex type, PyObject* value=nullptr) {
        switch(type){
            case TK("{"): case TK("["): case TK("("): brackets_level++; break;
            case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
        }
        nexts.push( Token{
            type,
            token_start,
            (int)(curr_char - token_start),
            current_line - ((type == TK("@eol")) ? 1 : 0),
            value
        });
    }
    void set_next_token_2(char c, TokenIndex one, TokenIndex two) {
        if (matchchar(c)) set_next_token(two);
        else set_next_token(one);
    }
    Parser(shared_ptr<SourceData> src) {
        this->src = src;
        this->token_start = src->source;
        this->curr_char = src->source;
        this->nexts.push(Token{TK("@sof"), token_start, 0, current_line});
        this->indents.push(0);
    }
 };
 } // namespace pkpy
--- a/src/pocketpy.h
+++ b/src/pocketpy.h
@ -760,6 +760,7 @@ inline void add_module_gc(VM* vm){
 inline void VM::post_init(){
    init_builtins(this);
 #if !DEBUG_NO_BUILTIN_MODULES
    add_module_sys(this);
    add_module_time(this);
    add_module_json(this);
@ -793,6 +794,7 @@ inline void VM::post_init(){
        const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])];
        return VAR(info.name);
    }));
 #endif
 }
 }   // namespace pkpy
--- a/src/vm.h
+++ b/src/vm.h
@ -93,7 +93,7 @@ public:
    }
    Frame* top_frame() const {
-#if PK_EXTRA_CHECK
+#if DEBUG_EXTRA_CHECK
        if(callstack.empty()) UNREACHABLE();
 #endif
        return callstack.top().get();
@ -166,7 +166,7 @@ public:
        if(_module == nullptr) _module = _main;
        try {
            CodeObject_ code = compile(source, filename, mode);
-            if(_module == _main) std::cout << disassemble(code) << '\n';
+            // if(_module == _main) std::cout << disassemble(code) << '\n';
            return _exec(code, _module);
        }catch (const Exception& e){
            *_stderr << e.summary() << '\n';