impl f-string

This commit is contained in:
blueloveTH 2022-11-08 15:40:06 +08:00
parent 81516eafb5
commit d4d312fc36
8 changed files with 125 additions and 32 deletions

View File

@ -25,7 +25,15 @@ _Str pad(const _Str& s, const int n){
return s + _Str(n - s.size(), ' ');
}
enum CompileMode {
EXEC_MODE,
EVAL_MODE,
SINGLE_MODE
};
struct CodeObject {
CompileMode mode = EXEC_MODE;
std::vector<ByteCode> co_code;
_Str co_filename;
_Str co_name;

View File

@ -41,8 +41,6 @@ struct Loop {
class Compiler {
public:
std::unique_ptr<Parser> parser;
bool repl_mode;
std::stack<_Code> codes;
std::stack<Loop> loops;
@ -61,10 +59,13 @@ public:
return loops.top();
}
Compiler(VM* vm, const char* source, _Code code, bool repl_mode){
CompileMode mode() {
return getCode()->mode;
}
Compiler(VM* vm, const char* source, _Code code){
this->vm = vm;
this->codes.push(code);
this->repl_mode = repl_mode;
if (!code->co_filename.empty()) path = code->co_filename;
this->parser = std::make_unique<Parser>(source);
@ -103,6 +104,7 @@ public:
rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
@ -118,9 +120,8 @@ public:
#define EXPR_ANY() parsePrecedence(PREC_NONE)
}
void eatString(bool single_quote) {
_Str eatStringUntil(char quote) {
std::vector<char> buff;
char quote = (single_quote) ? '\'' : '"';
while (true) {
char c = parser->eatChar();
if (c == quote) break;
@ -134,16 +135,23 @@ public:
case 'n': buff.push_back('\n'); break;
case 'r': buff.push_back('\r'); break;
case 't': buff.push_back('\t'); break;
case '\n': break; // Just ignore the next line.
case '\r': if (parser->matchChar('\n')) break;
default: throw SyntaxError(path, parser->makeErrToken(), "invalid syntax");
case '\n': case '\r': break;
default: throw SyntaxError(path, parser->makeErrToken(), "invalid escape character");
}
} else {
buff.push_back(c);
}
}
return _Str(buff.data(), buff.size());
}
parser->setNextToken(TK("@str"), vm->PyStr(_Str(buff.data(), buff.size())));
void eatString(char quote, bool fstr) {
_Str s = eatStringUntil(quote);
if(fstr){
parser->setNextToken(TK("@fstr"), vm->PyStr(s));
}else{
parser->setNextToken(TK("@str"), vm->PyStr(s));
}
}
void eatNumber() {
@ -182,8 +190,7 @@ public:
parser->token_start = parser->current_char;
char c = parser->eatCharIncludeNewLine();
switch (c) {
case '"': eatString(false); return;
case '\'': eatString(true); return;
case '\'': case '"': eatString(c, false); return;
case '#': parser->skipLineComment(); break;
case '{': parser->setNextToken(TK("{")); return;
case '}': parser->setNextToken(TK("}")); return;
@ -232,6 +239,10 @@ public:
if (isdigit(c)) {
eatNumber();
} else if (isalpha(c) || c=='_') {
if(c == 'f'){
if(parser->matchChar('\'')) {eatString('\'', true); return;}
if(parser->matchChar('"')) {eatString('"', true); return;}
}
parser->eatName();
} else {
throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c);
@ -297,6 +308,35 @@ public:
emitCode(OP_LOAD_CONST, index);
}
void exprFString() {
PyVar value = parser->previous.value;
std::string s = vm->PyStr_AS_C(value).str();
std::regex pattern(R"(\{(.*?)\})");
std::sregex_iterator begin(s.begin(), s.end(), pattern);
std::sregex_iterator end;
int size = 0;
int i = 0;
for(auto it = begin; it != end; it++) {
std::smatch m = *it;
if (i < m.position()) {
std::string literal = s.substr(i, m.position() - i);
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
size++;
}
emitCode(OP_LOAD_EVAL_FN);
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
emitCode(OP_CALL, 1);
size++;
i = m.position() + m.length();
}
if (i < s.size()) {
std::string literal = s.substr(i, s.size() - i);
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
size++;
}
emitCode(OP_BUILD_STRING, size);
}
void exprLambda() {
throw SyntaxError(path, parser->previous, "lambda is not implemented yet");
}
@ -493,7 +533,7 @@ public:
void __compileBlockBody(CompilerAction action) {
consume(TK(":"));
if(!matchNewLines(repl_mode)){
if(!matchNewLines(mode()==SINGLE_MODE)){
throw SyntaxError(path, parser->previous, "expected a new line after ':'");
}
consume(TK("@indent"));
@ -657,7 +697,7 @@ public:
// If last op is not an assignment, pop the result.
uint8_t lastOp = getCode()->co_code.back().op;
if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
if(repl_mode && parser->indents.top() == 0){
if(mode()==SINGLE_MODE && parser->indents.top() == 0){
emitCode(OP_PRINT_EXPR);
}
emitCode(OP_POP_TOP);
@ -713,6 +753,8 @@ public:
const _Str& name = parser->previous.str();
if(func.hasName(name)) throw SyntaxError(path, parser->previous, "duplicate argument name");
if(state == 0 && peek() == TK("=")) state = 2;
switch (state)
{
case 0: func.args.push_back(name); break;
@ -740,7 +782,7 @@ public:
if(match(TK("True"))) goto __LITERAL_EXIT;
if(match(TK("False"))) goto __LITERAL_EXIT;
if(match(TK("None"))) goto __LITERAL_EXIT;
throw SyntaxError(path, parser->previous, "expect a literal");
throw SyntaxError(path, parser->previous, "expect a literal, not %s", TK_STR(parser->current.type));
__LITERAL_EXIT:
return parser->previous.value;
}
@ -757,26 +799,34 @@ __LITERAL_EXIT:
}
}
void __fillCode(){
// Lex initial tokens. current <-- next.
lexToken();
lexToken();
matchNewLines();
if(mode() == EVAL_MODE) {
EXPR_TUPLE();
consume(TK("@eof"));
return;
}
while (!match(TK("@eof"))) {
compileTopLevelStatement();
matchNewLines();
}
}
};
_Code compile(VM* vm, const char* source, _Str filename, bool repl_mode=false) {
_Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
// Skip utf8 BOM if there is any.
if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
_Code code = std::make_shared<CodeObject>();
code->co_filename = filename;
Compiler compiler(vm, source, code, repl_mode);
// Lex initial tokens. current <-- next.
compiler.lexToken();
compiler.lexToken();
compiler.matchNewLines();
while (!compiler.match(TK("@eof"))) {
compiler.compileTopLevelStatement();
compiler.matchNewLines();
}
code->mode = mode;
Compiler compiler(vm, source, code);
compiler.__fillCode();
return code;
}

View File

@ -45,6 +45,7 @@ void REPL(){
VM* vm = newVM();
while(true){
CompileMode mode = SINGLE_MODE;
vm->printFn(need_more_lines ? "... " : ">>> ");
std::string line;
std::getline(std::cin, line);
@ -56,6 +57,7 @@ void REPL(){
if(n>=2 && buffer[n-1]=='\n' && buffer[n-2]=='\n'){
need_more_lines = false;
line = buffer;
mode = EXEC_MODE; // tmp set to EXEC_MODE
buffer.clear();
}else{
continue;
@ -65,7 +67,7 @@ void REPL(){
if(line.empty()) continue;
}
try{
_Code code = compile(vm, line.c_str(), "<stdin>", true);
_Code code = compile(vm, line.c_str(), "<stdin>", mode);
vm->exec(code);
#ifdef PK_DEBUG
}catch(NeedMoreLines& e){
@ -106,7 +108,7 @@ int main(int argc, char** argv){
std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
VM* vm = newVM();
Timer timer("编译时间");
_Code code = compile(vm, src.c_str(), filename, false);
_Code code = compile(vm, src.c_str(), filename);
timer.stop();
//std::cout << code->toString() << std::endl;
Timer timer2("运行时间");

View File

@ -33,6 +33,7 @@ OPCODE(JUMP_IF_FALSE_OR_POP)
OPCODE(LOAD_NONE)
OPCODE(LOAD_TRUE)
OPCODE(LOAD_FALSE)
OPCODE(LOAD_EVAL_FN) // load eval() callable into stack
OPCODE(ASSERT)
OPCODE(RAISE_ERROR)
@ -48,5 +49,6 @@ OPCODE(STORE_PTR) // no arg, [ptr, expr] -> *ptr = expr
OPCODE(DELETE_PTR) // no arg, [ptr] -> [] -> delete ptr
OPCODE(BUILD_SMART_TUPLE) // if all elements are pointers, build a compound pointer, otherwise build a tuple
OPCODE(BUILD_STRING) // arg is the expr count, build a string from the top of the stack
#endif

View File

@ -20,7 +20,7 @@ constexpr const char* __TOKENS[] = {
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
/** KW_END **/
"is not", "not in",
"@id", "@num", "@str",
"@id", "@num", "@str", "@fstr",
"@indent", "@dedent"
};

View File

@ -49,6 +49,14 @@ void __initializeBuiltinFunctions(VM* _vm) {
return vm->None;
});
_vm->bindBuiltinFunc("eval", [](VM* vm, PyVarList args) {
if (args.size() != 1) vm->_error("TypeError", "eval() takes exactly one argument");
if (!args[0]->isType(vm->_tp_str)) vm->_error("TypeError", "eval() argument must be a string");
const _Str& expr = vm->PyStr_AS_C(args[0]);
_Code code = compile(vm, expr, "<f-string>", EVAL_MODE);
return vm->exec(code); // not working in function
});
_vm->bindBuiltinFunc("repr", [](VM* vm, PyVarList args) {
return vm->asRepr(args.at(0));
});
@ -93,6 +101,10 @@ void __initializeBuiltinFunctions(VM* _vm) {
return vm->PyStr(s);
});
_vm->bindMethod("type", "__new__", [](VM* vm, PyVarList args) {
return args.at(1)->attribs["__class__"];
});
_vm->bindMethod("range", "__new__", [](VM* vm, PyVarList args) {
_Range r;
if( args.size() == 0 ) vm->_error("TypeError", "range expected 1 arguments, got 0");

View File

@ -3,10 +3,10 @@
#include <vector>
#include <string>
#include <sstream>
#include <regex>
typedef std::stringstream _StrStream;
class _Str {
private:
mutable bool utf8_initialized = false;

View File

@ -210,6 +210,16 @@ public:
pointers[i] = PyPointer_AS_C(items[i]);
frame->push(PyPointer(std::make_shared<CompoundPointer>(pointers)));
} break;
case OP_BUILD_STRING:
{
PyVarList items = frame->popNValuesReversed(this, byte.arg);
_StrStream ss;
for(const auto& i : items) ss << PyStr_AS_C(asStr(i));
frame->push(PyStr(ss));
} break;
case OP_LOAD_EVAL_FN: {
frame->push(builtins->attribs["eval"]);
} break;
case OP_STORE_FUNCTION:
{
PyVar obj = frame->popValue(this);
@ -381,6 +391,15 @@ public:
break;
}
}
if(frame->code->mode == EVAL_MODE) {
if(frame->stackSize() != 1) {
_error("SystemError", "stack size is not 1 in EVAL_MODE");
}
return frame->popValue(this);
}
if(frame->stackSize() != 0) _error("SystemError", "stack not empty in EXEC_MODE");
callstack.pop();
return None;
}