diff --git a/src/codeobject.h b/src/codeobject.h index 02bf9629..51e67fad 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -25,7 +25,15 @@ _Str pad(const _Str& s, const int n){ return s + _Str(n - s.size(), ' '); } +enum CompileMode { + EXEC_MODE, + EVAL_MODE, + SINGLE_MODE +}; + struct CodeObject { + CompileMode mode = EXEC_MODE; + std::vector co_code; _Str co_filename; _Str co_name; diff --git a/src/compiler.h b/src/compiler.h index 89c604da..7980eb7e 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -41,8 +41,6 @@ struct Loop { class Compiler { public: std::unique_ptr parser; - bool repl_mode; - std::stack<_Code> codes; std::stack loops; @@ -61,10 +59,13 @@ public: return loops.top(); } - Compiler(VM* vm, const char* source, _Code code, bool repl_mode){ + CompileMode mode() { + return getCode()->mode; + } + + Compiler(VM* vm, const char* source, _Code code){ this->vm = vm; this->codes.push(code); - this->repl_mode = repl_mode; if (!code->co_filename.empty()) path = code->co_filename; this->parser = std::make_unique(source); @@ -103,6 +104,7 @@ public: rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; + rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; @@ -118,9 +120,8 @@ public: #define EXPR_ANY() parsePrecedence(PREC_NONE) } - void eatString(bool single_quote) { + _Str eatStringUntil(char quote) { std::vector buff; - char quote = (single_quote) ? '\'' : '"'; while (true) { char c = parser->eatChar(); if (c == quote) break; @@ -134,16 +135,23 @@ public: case 'n': buff.push_back('\n'); break; case 'r': buff.push_back('\r'); break; case 't': buff.push_back('\t'); break; - case '\n': break; // Just ignore the next line. - case '\r': if (parser->matchChar('\n')) break; - default: throw SyntaxError(path, parser->makeErrToken(), "invalid syntax"); + case '\n': case '\r': break; + default: throw SyntaxError(path, parser->makeErrToken(), "invalid escape character"); } } else { buff.push_back(c); } } + return _Str(buff.data(), buff.size()); + } - parser->setNextToken(TK("@str"), vm->PyStr(_Str(buff.data(), buff.size()))); + void eatString(char quote, bool fstr) { + _Str s = eatStringUntil(quote); + if(fstr){ + parser->setNextToken(TK("@fstr"), vm->PyStr(s)); + }else{ + parser->setNextToken(TK("@str"), vm->PyStr(s)); + } } void eatNumber() { @@ -182,8 +190,7 @@ public: parser->token_start = parser->current_char; char c = parser->eatCharIncludeNewLine(); switch (c) { - case '"': eatString(false); return; - case '\'': eatString(true); return; + case '\'': case '"': eatString(c, false); return; case '#': parser->skipLineComment(); break; case '{': parser->setNextToken(TK("{")); return; case '}': parser->setNextToken(TK("}")); return; @@ -232,6 +239,10 @@ public: if (isdigit(c)) { eatNumber(); } else if (isalpha(c) || c=='_') { + if(c == 'f'){ + if(parser->matchChar('\'')) {eatString('\'', true); return;} + if(parser->matchChar('"')) {eatString('"', true); return;} + } parser->eatName(); } else { throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c); @@ -297,6 +308,35 @@ public: emitCode(OP_LOAD_CONST, index); } + void exprFString() { + PyVar value = parser->previous.value; + std::string s = vm->PyStr_AS_C(value).str(); + std::regex pattern(R"(\{(.*?)\})"); + std::sregex_iterator begin(s.begin(), s.end(), pattern); + std::sregex_iterator end; + int size = 0; + int i = 0; + for(auto it = begin; it != end; it++) { + std::smatch m = *it; + if (i < m.position()) { + std::string literal = s.substr(i, m.position() - i); + emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal))); + size++; + } + emitCode(OP_LOAD_EVAL_FN); + emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str()))); + emitCode(OP_CALL, 1); + size++; + i = m.position() + m.length(); + } + if (i < s.size()) { + std::string literal = s.substr(i, s.size() - i); + emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal))); + size++; + } + emitCode(OP_BUILD_STRING, size); + } + void exprLambda() { throw SyntaxError(path, parser->previous, "lambda is not implemented yet"); } @@ -493,7 +533,7 @@ public: void __compileBlockBody(CompilerAction action) { consume(TK(":")); - if(!matchNewLines(repl_mode)){ + if(!matchNewLines(mode()==SINGLE_MODE)){ throw SyntaxError(path, parser->previous, "expected a new line after ':'"); } consume(TK("@indent")); @@ -657,7 +697,7 @@ public: // If last op is not an assignment, pop the result. uint8_t lastOp = getCode()->co_code.back().op; if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){ - if(repl_mode && parser->indents.top() == 0){ + if(mode()==SINGLE_MODE && parser->indents.top() == 0){ emitCode(OP_PRINT_EXPR); } emitCode(OP_POP_TOP); @@ -713,6 +753,8 @@ public: const _Str& name = parser->previous.str(); if(func.hasName(name)) throw SyntaxError(path, parser->previous, "duplicate argument name"); + if(state == 0 && peek() == TK("=")) state = 2; + switch (state) { case 0: func.args.push_back(name); break; @@ -740,7 +782,7 @@ public: if(match(TK("True"))) goto __LITERAL_EXIT; if(match(TK("False"))) goto __LITERAL_EXIT; if(match(TK("None"))) goto __LITERAL_EXIT; - throw SyntaxError(path, parser->previous, "expect a literal"); + throw SyntaxError(path, parser->previous, "expect a literal, not %s", TK_STR(parser->current.type)); __LITERAL_EXIT: return parser->previous.value; } @@ -757,26 +799,34 @@ __LITERAL_EXIT: } } + void __fillCode(){ + // Lex initial tokens. current <-- next. + lexToken(); + lexToken(); + matchNewLines(); + + if(mode() == EVAL_MODE) { + EXPR_TUPLE(); + consume(TK("@eof")); + return; + } + + while (!match(TK("@eof"))) { + compileTopLevelStatement(); + matchNewLines(); + } + } }; - -_Code compile(VM* vm, const char* source, _Str filename, bool repl_mode=false) { +_Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) { // Skip utf8 BOM if there is any. if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; _Code code = std::make_shared(); code->co_filename = filename; - Compiler compiler(vm, source, code, repl_mode); - - // Lex initial tokens. current <-- next. - compiler.lexToken(); - compiler.lexToken(); - compiler.matchNewLines(); - - while (!compiler.match(TK("@eof"))) { - compiler.compileTopLevelStatement(); - compiler.matchNewLines(); - } + code->mode = mode; + Compiler compiler(vm, source, code); + compiler.__fillCode(); return code; } \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 66f497c4..bd5d82f4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,6 +45,7 @@ void REPL(){ VM* vm = newVM(); while(true){ + CompileMode mode = SINGLE_MODE; vm->printFn(need_more_lines ? "... " : ">>> "); std::string line; std::getline(std::cin, line); @@ -56,6 +57,7 @@ void REPL(){ if(n>=2 && buffer[n-1]=='\n' && buffer[n-2]=='\n'){ need_more_lines = false; line = buffer; + mode = EXEC_MODE; // tmp set to EXEC_MODE buffer.clear(); }else{ continue; @@ -65,7 +67,7 @@ void REPL(){ if(line.empty()) continue; } try{ - _Code code = compile(vm, line.c_str(), "", true); + _Code code = compile(vm, line.c_str(), "", mode); vm->exec(code); #ifdef PK_DEBUG }catch(NeedMoreLines& e){ @@ -106,7 +108,7 @@ int main(int argc, char** argv){ std::string src((std::istreambuf_iterator(file)), std::istreambuf_iterator()); VM* vm = newVM(); Timer timer("编译时间"); - _Code code = compile(vm, src.c_str(), filename, false); + _Code code = compile(vm, src.c_str(), filename); timer.stop(); //std::cout << code->toString() << std::endl; Timer timer2("运行时间"); diff --git a/src/opcodes.h b/src/opcodes.h index e86b894c..6c22325f 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -33,6 +33,7 @@ OPCODE(JUMP_IF_FALSE_OR_POP) OPCODE(LOAD_NONE) OPCODE(LOAD_TRUE) OPCODE(LOAD_FALSE) +OPCODE(LOAD_EVAL_FN) // load eval() callable into stack OPCODE(ASSERT) OPCODE(RAISE_ERROR) @@ -48,5 +49,6 @@ OPCODE(STORE_PTR) // no arg, [ptr, expr] -> *ptr = expr OPCODE(DELETE_PTR) // no arg, [ptr] -> [] -> delete ptr OPCODE(BUILD_SMART_TUPLE) // if all elements are pointers, build a compound pointer, otherwise build a tuple +OPCODE(BUILD_STRING) // arg is the expr count, build a string from the top of the stack #endif \ No newline at end of file diff --git a/src/parser.h b/src/parser.h index f0eb238d..857e345d 100644 --- a/src/parser.h +++ b/src/parser.h @@ -20,7 +20,7 @@ constexpr const char* __TOKENS[] = { "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", /** KW_END **/ "is not", "not in", - "@id", "@num", "@str", + "@id", "@num", "@str", "@fstr", "@indent", "@dedent" }; diff --git a/src/pocketpy.h b/src/pocketpy.h index 4c5c6319..7c83eb3f 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -49,6 +49,14 @@ void __initializeBuiltinFunctions(VM* _vm) { return vm->None; }); + _vm->bindBuiltinFunc("eval", [](VM* vm, PyVarList args) { + if (args.size() != 1) vm->_error("TypeError", "eval() takes exactly one argument"); + if (!args[0]->isType(vm->_tp_str)) vm->_error("TypeError", "eval() argument must be a string"); + const _Str& expr = vm->PyStr_AS_C(args[0]); + _Code code = compile(vm, expr, "", EVAL_MODE); + return vm->exec(code); // not working in function + }); + _vm->bindBuiltinFunc("repr", [](VM* vm, PyVarList args) { return vm->asRepr(args.at(0)); }); @@ -93,6 +101,10 @@ void __initializeBuiltinFunctions(VM* _vm) { return vm->PyStr(s); }); + _vm->bindMethod("type", "__new__", [](VM* vm, PyVarList args) { + return args.at(1)->attribs["__class__"]; + }); + _vm->bindMethod("range", "__new__", [](VM* vm, PyVarList args) { _Range r; if( args.size() == 0 ) vm->_error("TypeError", "range expected 1 arguments, got 0"); diff --git a/src/str.h b/src/str.h index cea4e4e9..8d347d4c 100644 --- a/src/str.h +++ b/src/str.h @@ -3,10 +3,10 @@ #include #include #include +#include typedef std::stringstream _StrStream; - class _Str { private: mutable bool utf8_initialized = false; diff --git a/src/vm.h b/src/vm.h index 19d6d62c..fa6af833 100644 --- a/src/vm.h +++ b/src/vm.h @@ -210,6 +210,16 @@ public: pointers[i] = PyPointer_AS_C(items[i]); frame->push(PyPointer(std::make_shared(pointers))); } break; + case OP_BUILD_STRING: + { + PyVarList items = frame->popNValuesReversed(this, byte.arg); + _StrStream ss; + for(const auto& i : items) ss << PyStr_AS_C(asStr(i)); + frame->push(PyStr(ss)); + } break; + case OP_LOAD_EVAL_FN: { + frame->push(builtins->attribs["eval"]); + } break; case OP_STORE_FUNCTION: { PyVar obj = frame->popValue(this); @@ -381,6 +391,15 @@ public: break; } } + + if(frame->code->mode == EVAL_MODE) { + if(frame->stackSize() != 1) { + _error("SystemError", "stack size is not 1 in EVAL_MODE"); + } + return frame->popValue(this); + } + + if(frame->stackSize() != 0) _error("SystemError", "stack not empty in EXEC_MODE"); callstack.pop(); return None; }