impl f-string

This commit is contained in:
blueloveTH 2022-11-08 15:40:06 +08:00
parent 81516eafb5
commit d4d312fc36
8 changed files with 125 additions and 32 deletions

View File

@ -25,7 +25,15 @@ _Str pad(const _Str& s, const int n){
return s + _Str(n - s.size(), ' '); return s + _Str(n - s.size(), ' ');
} }
enum CompileMode {
EXEC_MODE,
EVAL_MODE,
SINGLE_MODE
};
struct CodeObject { struct CodeObject {
CompileMode mode = EXEC_MODE;
std::vector<ByteCode> co_code; std::vector<ByteCode> co_code;
_Str co_filename; _Str co_filename;
_Str co_name; _Str co_name;

View File

@ -41,8 +41,6 @@ struct Loop {
class Compiler { class Compiler {
public: public:
std::unique_ptr<Parser> parser; std::unique_ptr<Parser> parser;
bool repl_mode;
std::stack<_Code> codes; std::stack<_Code> codes;
std::stack<Loop> loops; std::stack<Loop> loops;
@ -61,10 +59,13 @@ public:
return loops.top(); return loops.top();
} }
Compiler(VM* vm, const char* source, _Code code, bool repl_mode){ CompileMode mode() {
return getCode()->mode;
}
Compiler(VM* vm, const char* source, _Code code){
this->vm = vm; this->vm = vm;
this->codes.push(code); this->codes.push(code);
this->repl_mode = repl_mode;
if (!code->co_filename.empty()) path = code->co_filename; if (!code->co_filename.empty()) path = code->co_filename;
this->parser = std::make_unique<Parser>(source); this->parser = std::make_unique<Parser>(source);
@ -103,6 +104,7 @@ public:
rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
@ -118,9 +120,8 @@ public:
#define EXPR_ANY() parsePrecedence(PREC_NONE) #define EXPR_ANY() parsePrecedence(PREC_NONE)
} }
void eatString(bool single_quote) { _Str eatStringUntil(char quote) {
std::vector<char> buff; std::vector<char> buff;
char quote = (single_quote) ? '\'' : '"';
while (true) { while (true) {
char c = parser->eatChar(); char c = parser->eatChar();
if (c == quote) break; if (c == quote) break;
@ -134,16 +135,23 @@ public:
case 'n': buff.push_back('\n'); break; case 'n': buff.push_back('\n'); break;
case 'r': buff.push_back('\r'); break; case 'r': buff.push_back('\r'); break;
case 't': buff.push_back('\t'); break; case 't': buff.push_back('\t'); break;
case '\n': break; // Just ignore the next line. case '\n': case '\r': break;
case '\r': if (parser->matchChar('\n')) break; default: throw SyntaxError(path, parser->makeErrToken(), "invalid escape character");
default: throw SyntaxError(path, parser->makeErrToken(), "invalid syntax");
} }
} else { } else {
buff.push_back(c); buff.push_back(c);
} }
} }
return _Str(buff.data(), buff.size());
}
parser->setNextToken(TK("@str"), vm->PyStr(_Str(buff.data(), buff.size()))); void eatString(char quote, bool fstr) {
_Str s = eatStringUntil(quote);
if(fstr){
parser->setNextToken(TK("@fstr"), vm->PyStr(s));
}else{
parser->setNextToken(TK("@str"), vm->PyStr(s));
}
} }
void eatNumber() { void eatNumber() {
@ -182,8 +190,7 @@ public:
parser->token_start = parser->current_char; parser->token_start = parser->current_char;
char c = parser->eatCharIncludeNewLine(); char c = parser->eatCharIncludeNewLine();
switch (c) { switch (c) {
case '"': eatString(false); return; case '\'': case '"': eatString(c, false); return;
case '\'': eatString(true); return;
case '#': parser->skipLineComment(); break; case '#': parser->skipLineComment(); break;
case '{': parser->setNextToken(TK("{")); return; case '{': parser->setNextToken(TK("{")); return;
case '}': parser->setNextToken(TK("}")); return; case '}': parser->setNextToken(TK("}")); return;
@ -232,6 +239,10 @@ public:
if (isdigit(c)) { if (isdigit(c)) {
eatNumber(); eatNumber();
} else if (isalpha(c) || c=='_') { } else if (isalpha(c) || c=='_') {
if(c == 'f'){
if(parser->matchChar('\'')) {eatString('\'', true); return;}
if(parser->matchChar('"')) {eatString('"', true); return;}
}
parser->eatName(); parser->eatName();
} else { } else {
throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c); throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c);
@ -297,6 +308,35 @@ public:
emitCode(OP_LOAD_CONST, index); emitCode(OP_LOAD_CONST, index);
} }
void exprFString() {
PyVar value = parser->previous.value;
std::string s = vm->PyStr_AS_C(value).str();
std::regex pattern(R"(\{(.*?)\})");
std::sregex_iterator begin(s.begin(), s.end(), pattern);
std::sregex_iterator end;
int size = 0;
int i = 0;
for(auto it = begin; it != end; it++) {
std::smatch m = *it;
if (i < m.position()) {
std::string literal = s.substr(i, m.position() - i);
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
size++;
}
emitCode(OP_LOAD_EVAL_FN);
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
emitCode(OP_CALL, 1);
size++;
i = m.position() + m.length();
}
if (i < s.size()) {
std::string literal = s.substr(i, s.size() - i);
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
size++;
}
emitCode(OP_BUILD_STRING, size);
}
void exprLambda() { void exprLambda() {
throw SyntaxError(path, parser->previous, "lambda is not implemented yet"); throw SyntaxError(path, parser->previous, "lambda is not implemented yet");
} }
@ -493,7 +533,7 @@ public:
void __compileBlockBody(CompilerAction action) { void __compileBlockBody(CompilerAction action) {
consume(TK(":")); consume(TK(":"));
if(!matchNewLines(repl_mode)){ if(!matchNewLines(mode()==SINGLE_MODE)){
throw SyntaxError(path, parser->previous, "expected a new line after ':'"); throw SyntaxError(path, parser->previous, "expected a new line after ':'");
} }
consume(TK("@indent")); consume(TK("@indent"));
@ -657,7 +697,7 @@ public:
// If last op is not an assignment, pop the result. // If last op is not an assignment, pop the result.
uint8_t lastOp = getCode()->co_code.back().op; uint8_t lastOp = getCode()->co_code.back().op;
if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){ if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
if(repl_mode && parser->indents.top() == 0){ if(mode()==SINGLE_MODE && parser->indents.top() == 0){
emitCode(OP_PRINT_EXPR); emitCode(OP_PRINT_EXPR);
} }
emitCode(OP_POP_TOP); emitCode(OP_POP_TOP);
@ -713,6 +753,8 @@ public:
const _Str& name = parser->previous.str(); const _Str& name = parser->previous.str();
if(func.hasName(name)) throw SyntaxError(path, parser->previous, "duplicate argument name"); if(func.hasName(name)) throw SyntaxError(path, parser->previous, "duplicate argument name");
if(state == 0 && peek() == TK("=")) state = 2;
switch (state) switch (state)
{ {
case 0: func.args.push_back(name); break; case 0: func.args.push_back(name); break;
@ -740,7 +782,7 @@ public:
if(match(TK("True"))) goto __LITERAL_EXIT; if(match(TK("True"))) goto __LITERAL_EXIT;
if(match(TK("False"))) goto __LITERAL_EXIT; if(match(TK("False"))) goto __LITERAL_EXIT;
if(match(TK("None"))) goto __LITERAL_EXIT; if(match(TK("None"))) goto __LITERAL_EXIT;
throw SyntaxError(path, parser->previous, "expect a literal"); throw SyntaxError(path, parser->previous, "expect a literal, not %s", TK_STR(parser->current.type));
__LITERAL_EXIT: __LITERAL_EXIT:
return parser->previous.value; return parser->previous.value;
} }
@ -757,26 +799,34 @@ __LITERAL_EXIT:
} }
} }
void __fillCode(){
// Lex initial tokens. current <-- next.
lexToken();
lexToken();
matchNewLines();
if(mode() == EVAL_MODE) {
EXPR_TUPLE();
consume(TK("@eof"));
return;
}
while (!match(TK("@eof"))) {
compileTopLevelStatement();
matchNewLines();
}
}
}; };
_Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
_Code compile(VM* vm, const char* source, _Str filename, bool repl_mode=false) {
// Skip utf8 BOM if there is any. // Skip utf8 BOM if there is any.
if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
_Code code = std::make_shared<CodeObject>(); _Code code = std::make_shared<CodeObject>();
code->co_filename = filename; code->co_filename = filename;
Compiler compiler(vm, source, code, repl_mode); code->mode = mode;
// Lex initial tokens. current <-- next.
compiler.lexToken();
compiler.lexToken();
compiler.matchNewLines();
while (!compiler.match(TK("@eof"))) {
compiler.compileTopLevelStatement();
compiler.matchNewLines();
}
Compiler compiler(vm, source, code);
compiler.__fillCode();
return code; return code;
} }

View File

@ -45,6 +45,7 @@ void REPL(){
VM* vm = newVM(); VM* vm = newVM();
while(true){ while(true){
CompileMode mode = SINGLE_MODE;
vm->printFn(need_more_lines ? "... " : ">>> "); vm->printFn(need_more_lines ? "... " : ">>> ");
std::string line; std::string line;
std::getline(std::cin, line); std::getline(std::cin, line);
@ -56,6 +57,7 @@ void REPL(){
if(n>=2 && buffer[n-1]=='\n' && buffer[n-2]=='\n'){ if(n>=2 && buffer[n-1]=='\n' && buffer[n-2]=='\n'){
need_more_lines = false; need_more_lines = false;
line = buffer; line = buffer;
mode = EXEC_MODE; // tmp set to EXEC_MODE
buffer.clear(); buffer.clear();
}else{ }else{
continue; continue;
@ -65,7 +67,7 @@ void REPL(){
if(line.empty()) continue; if(line.empty()) continue;
} }
try{ try{
_Code code = compile(vm, line.c_str(), "<stdin>", true); _Code code = compile(vm, line.c_str(), "<stdin>", mode);
vm->exec(code); vm->exec(code);
#ifdef PK_DEBUG #ifdef PK_DEBUG
}catch(NeedMoreLines& e){ }catch(NeedMoreLines& e){
@ -106,7 +108,7 @@ int main(int argc, char** argv){
std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
VM* vm = newVM(); VM* vm = newVM();
Timer timer("编译时间"); Timer timer("编译时间");
_Code code = compile(vm, src.c_str(), filename, false); _Code code = compile(vm, src.c_str(), filename);
timer.stop(); timer.stop();
//std::cout << code->toString() << std::endl; //std::cout << code->toString() << std::endl;
Timer timer2("运行时间"); Timer timer2("运行时间");

View File

@ -33,6 +33,7 @@ OPCODE(JUMP_IF_FALSE_OR_POP)
OPCODE(LOAD_NONE) OPCODE(LOAD_NONE)
OPCODE(LOAD_TRUE) OPCODE(LOAD_TRUE)
OPCODE(LOAD_FALSE) OPCODE(LOAD_FALSE)
OPCODE(LOAD_EVAL_FN) // load eval() callable into stack
OPCODE(ASSERT) OPCODE(ASSERT)
OPCODE(RAISE_ERROR) OPCODE(RAISE_ERROR)
@ -48,5 +49,6 @@ OPCODE(STORE_PTR) // no arg, [ptr, expr] -> *ptr = expr
OPCODE(DELETE_PTR) // no arg, [ptr] -> [] -> delete ptr OPCODE(DELETE_PTR) // no arg, [ptr] -> [] -> delete ptr
OPCODE(BUILD_SMART_TUPLE) // if all elements are pointers, build a compound pointer, otherwise build a tuple OPCODE(BUILD_SMART_TUPLE) // if all elements are pointers, build a compound pointer, otherwise build a tuple
OPCODE(BUILD_STRING) // arg is the expr count, build a string from the top of the stack
#endif #endif

View File

@ -20,7 +20,7 @@ constexpr const char* __TOKENS[] = {
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
/** KW_END **/ /** KW_END **/
"is not", "not in", "is not", "not in",
"@id", "@num", "@str", "@id", "@num", "@str", "@fstr",
"@indent", "@dedent" "@indent", "@dedent"
}; };

View File

@ -49,6 +49,14 @@ void __initializeBuiltinFunctions(VM* _vm) {
return vm->None; return vm->None;
}); });
_vm->bindBuiltinFunc("eval", [](VM* vm, PyVarList args) {
if (args.size() != 1) vm->_error("TypeError", "eval() takes exactly one argument");
if (!args[0]->isType(vm->_tp_str)) vm->_error("TypeError", "eval() argument must be a string");
const _Str& expr = vm->PyStr_AS_C(args[0]);
_Code code = compile(vm, expr, "<f-string>", EVAL_MODE);
return vm->exec(code); // not working in function
});
_vm->bindBuiltinFunc("repr", [](VM* vm, PyVarList args) { _vm->bindBuiltinFunc("repr", [](VM* vm, PyVarList args) {
return vm->asRepr(args.at(0)); return vm->asRepr(args.at(0));
}); });
@ -93,6 +101,10 @@ void __initializeBuiltinFunctions(VM* _vm) {
return vm->PyStr(s); return vm->PyStr(s);
}); });
_vm->bindMethod("type", "__new__", [](VM* vm, PyVarList args) {
return args.at(1)->attribs["__class__"];
});
_vm->bindMethod("range", "__new__", [](VM* vm, PyVarList args) { _vm->bindMethod("range", "__new__", [](VM* vm, PyVarList args) {
_Range r; _Range r;
if( args.size() == 0 ) vm->_error("TypeError", "range expected 1 arguments, got 0"); if( args.size() == 0 ) vm->_error("TypeError", "range expected 1 arguments, got 0");

View File

@ -3,10 +3,10 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <regex>
typedef std::stringstream _StrStream; typedef std::stringstream _StrStream;
class _Str { class _Str {
private: private:
mutable bool utf8_initialized = false; mutable bool utf8_initialized = false;

View File

@ -210,6 +210,16 @@ public:
pointers[i] = PyPointer_AS_C(items[i]); pointers[i] = PyPointer_AS_C(items[i]);
frame->push(PyPointer(std::make_shared<CompoundPointer>(pointers))); frame->push(PyPointer(std::make_shared<CompoundPointer>(pointers)));
} break; } break;
case OP_BUILD_STRING:
{
PyVarList items = frame->popNValuesReversed(this, byte.arg);
_StrStream ss;
for(const auto& i : items) ss << PyStr_AS_C(asStr(i));
frame->push(PyStr(ss));
} break;
case OP_LOAD_EVAL_FN: {
frame->push(builtins->attribs["eval"]);
} break;
case OP_STORE_FUNCTION: case OP_STORE_FUNCTION:
{ {
PyVar obj = frame->popValue(this); PyVar obj = frame->popValue(this);
@ -381,6 +391,15 @@ public:
break; break;
} }
} }
if(frame->code->mode == EVAL_MODE) {
if(frame->stackSize() != 1) {
_error("SystemError", "stack size is not 1 in EVAL_MODE");
}
return frame->popValue(this);
}
if(frame->stackSize() != 0) _error("SystemError", "stack not empty in EXEC_MODE");
callstack.pop(); callstack.pop();
return None; return None;
} }