This commit is contained in:
blueloveTH 2023-01-08 00:35:58 +08:00
parent 76d167437c
commit f0c89961cc
2 changed files with 162 additions and 124 deletions

View File

@ -3165,7 +3165,7 @@ constexpr const char* __TOKENS[] = {
"+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=",
/** KW_BEGIN **/ /** KW_BEGIN **/
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "class", "import", "as", "def", "lambda", "pass", "del", "from", "with",
"None", "in", "is", "and", "or", "not", "True", "False", "global", "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
"goto", "label", // extended keywords, not available in cpython "goto", "label", // extended keywords, not available in cpython
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
/** KW_END **/ /** KW_END **/
@ -3530,9 +3530,16 @@ enum Opcode {
#define OPCODE(name) OP_##name, #define OPCODE(name) OP_##name,
#ifdef OPCODE #ifdef OPCODE
// Do nothing
OPCODE(NO_OP) OPCODE(NO_OP)
// This op is a placeholder that should never be executed
OPCODE(DELETED_OP) OPCODE(DELETED_OP)
// Load a constant from the `co_consts`
// ARG: array index
OPCODE(LOAD_CONST) OPCODE(LOAD_CONST)
OPCODE(IMPORT_NAME) OPCODE(IMPORT_NAME)
OPCODE(PRINT_EXPR) OPCODE(PRINT_EXPR)
OPCODE(POP_TOP) OPCODE(POP_TOP)
@ -3595,7 +3602,10 @@ OPCODE(GOTO)
OPCODE(WITH_ENTER) OPCODE(WITH_ENTER)
OPCODE(WITH_EXIT) OPCODE(WITH_EXIT)
OPCODE(JUMP_RELATIVE) OPCODE(RAISE_VARARGS)
OPCODE(LOOP_BREAK)
OPCODE(LOOP_CONTINUE)
#endif #endif
#undef OPCODE #undef OPCODE
@ -3605,9 +3615,16 @@ static const char* OP_NAMES[] = {
#define OPCODE(name) #name, #define OPCODE(name) #name,
#ifdef OPCODE #ifdef OPCODE
// Do nothing
OPCODE(NO_OP) OPCODE(NO_OP)
// This op is a placeholder that should never be executed
OPCODE(DELETED_OP) OPCODE(DELETED_OP)
// Load a constant from the `co_consts`
// ARG: array index
OPCODE(LOAD_CONST) OPCODE(LOAD_CONST)
OPCODE(IMPORT_NAME) OPCODE(IMPORT_NAME)
OPCODE(PRINT_EXPR) OPCODE(PRINT_EXPR)
OPCODE(POP_TOP) OPCODE(POP_TOP)
@ -3670,7 +3687,10 @@ OPCODE(GOTO)
OPCODE(WITH_ENTER) OPCODE(WITH_ENTER)
OPCODE(WITH_EXIT) OPCODE(WITH_EXIT)
OPCODE(JUMP_RELATIVE) OPCODE(RAISE_VARARGS)
OPCODE(LOOP_BREAK)
OPCODE(LOOP_CONTINUE)
#endif #endif
#undef OPCODE #undef OPCODE
@ -3687,6 +3707,48 @@ _Str pad(const _Str& s, const int n){
return s + std::string(n - s.size(), ' '); return s + std::string(n - s.size(), ' ');
} }
enum CodeBlockType {
NO_BLOCK,
FOR_LOOP,
WHILE_LOOP,
CONTEXT_MANAGER,
TRY_EXCEPT,
};
struct CodeBlock {
CodeBlockType type;
std::vector<int> id;
int parent; // parent index in co_blocks
int start; // start index of this block in co_code, inclusive
int end; // end index of this block in co_code, exclusive
std::string toString() const {
if(parent == -1) return "";
std::string s = "[";
for(int i = 0; i < id.size(); i++){
s += std::to_string(id[i]);
if(i != id.size()-1) s += "-";
}
s += ": ";
s += std::to_string(type);
s += "]";
return s;
}
bool operator==(const std::vector<int>& other) const {
return id == other;
}
bool operator!=(const std::vector<int>& other) const {
return id != other;
}
int depth() const {
return id.size();
}
};
struct CodeObject { struct CodeObject {
_Source src; _Source src;
_Str name; _Str name;
@ -3705,48 +3767,36 @@ struct CodeObject {
std::vector<std::pair<_Str, NameScope>> co_names; std::vector<std::pair<_Str, NameScope>> co_names;
std::vector<_Str> co_global_names; std::vector<_Str> co_global_names;
std::vector<std::vector<int>> co_loops = {{}}; std::vector<CodeBlock> co_blocks = { CodeBlock{NO_BLOCK, {}, -1} };
int _currLoopIndex = 0;
std::string getBlockStr(int block){ // tmp variables
std::vector<int> loopId = co_loops[block]; int _currBlockIndex = 0;
std::string s = ""; bool __isCurrBlockLoop() const {
for(int i=0; i<loopId.size(); i++){ return co_blocks[_currBlockIndex].type == FOR_LOOP || co_blocks[_currBlockIndex].type == WHILE_LOOP;
s += std::to_string(loopId[i]);
if(i != loopId.size()-1) s += "-";
}
return s;
} }
void __enterLoop(int depth){ void __enterBlock(CodeBlockType type){
const std::vector<int>& prevLoopId = co_loops[_currLoopIndex]; const CodeBlock& currBlock = co_blocks[_currBlockIndex];
if(depth - prevLoopId.size() == 1){ std::vector<int> copy(currBlock.id);
std::vector<int> copy = prevLoopId; copy.push_back(-1);
copy.push_back(0); int t = 0;
int t = 0; while(true){
while(true){ copy[copy.size()-1] = t;
copy[copy.size()-1] = t; auto it = std::find(co_blocks.begin(), co_blocks.end(), copy);
auto it = std::find(co_loops.begin(), co_loops.end(), copy); if(it == co_blocks.end()) break;
if(it == co_loops.end()) break; t++;
t++;
}
co_loops.push_back(copy);
}else{
UNREACHABLE();
} }
_currLoopIndex = co_loops.size()-1; co_blocks.push_back(CodeBlock{type, copy, _currBlockIndex, (int)co_code.size()});
_currBlockIndex = co_blocks.size()-1;
} }
void __exitLoop(){ void __exitBlock(){
std::vector<int> copy = co_loops[_currLoopIndex]; co_blocks[_currBlockIndex].end = co_code.size();
copy.pop_back(); _currBlockIndex = co_blocks[_currBlockIndex].parent;
auto it = std::find(co_loops.begin(), co_loops.end(), copy); if(_currBlockIndex < 0) UNREACHABLE();
if(it == co_loops.end()) UNREACHABLE();
_currLoopIndex = it - co_loops.begin();
} }
// for goto use // for goto use
// note: some opcodes moves the bytecode, such as listcomp
// goto/label should be put at toplevel statements // goto/label should be put at toplevel statements
emhash8::HashMap<_Str, int> co_labels; emhash8::HashMap<_Str, int> co_labels;
@ -3785,7 +3835,7 @@ public:
PyVar _module; PyVar _module;
PyVarDict f_locals; PyVarDict f_locals;
inline PyVarDict copy_f_locals(){ inline PyVarDict copy_f_locals() const {
return f_locals; return f_locals;
} }
@ -3850,31 +3900,25 @@ public:
this->ip = i; this->ip = i;
} }
inline void jumpRelative(int i){ void jumpAbsoluteSafe(int target){
this->ip += i;
}
void jumpAbsoluteSafe(int i){
const ByteCode& prev = code->co_code[this->ip]; const ByteCode& prev = code->co_code[this->ip];
const std::vector<int> prevLoopId = code->co_loops[prev.block]; int i = prev.block;
this->ip = i; this->ip = target;
if(isCodeEnd()){ if(isCodeEnd()){
for(int i=0; i<prevLoopId.size(); i++) __pop(); while(i>=0){
return; if(code->co_blocks[i].type == FOR_LOOP) __pop();
} i = code->co_blocks[i].parent;
const ByteCode& next = code->co_code[i];
const std::vector<int> nextLoopId = code->co_loops[next.block];
int sizeDelta = prevLoopId.size() - nextLoopId.size();
if(sizeDelta < 0){
throw std::runtime_error("invalid jump from " + code->getBlockStr(prev.block) + " to " + code->getBlockStr(next.block));
}else{
for(int i=0; i<nextLoopId.size(); i++){
if(nextLoopId[i] != prevLoopId[i]){
throw std::runtime_error("invalid jump from " + code->getBlockStr(prev.block) + " to " + code->getBlockStr(next.block));
}
} }
}else{
const ByteCode& next = code->co_code[target];
while(i>=0 && i!=next.block){
if(code->co_blocks[i].type == FOR_LOOP) __pop();
i = code->co_blocks[i].parent;
}
if(i!=next.block) throw std::runtime_error(
"invalid jump from " + code->co_blocks[prev.block].toString() + " to " + code->co_blocks[next.block].toString()
);
} }
for(int i=0; i<sizeDelta; i++) __pop();
} }
pkpy::ArgList popNValuesReversed(VM* vm, int n){ pkpy::ArgList popNValuesReversed(VM* vm, int n){
@ -4029,7 +4073,6 @@ protected:
setAttr(fn, __module__, frame->_module); setAttr(fn, __module__, frame->_module);
setAttr(cls, f->name, fn); setAttr(cls, f->name, fn);
} }
// frame->f_globals()[clsName] = cls;
} break; } break;
case OP_RETURN_VALUE: return frame->popValue(this); case OP_RETURN_VALUE: return frame->popValue(this);
case OP_PRINT_EXPR: case OP_PRINT_EXPR:
@ -4140,7 +4183,6 @@ protected:
frame->push(std::move(ret)); frame->push(std::move(ret));
} break; } break;
case OP_JUMP_ABSOLUTE: frame->jumpAbsolute(byte.arg); break; case OP_JUMP_ABSOLUTE: frame->jumpAbsolute(byte.arg); break;
case OP_JUMP_RELATIVE: frame->jumpRelative(byte.arg); break;
case OP_SAFE_JUMP_ABSOLUTE: frame->jumpAbsoluteSafe(byte.arg); break; case OP_SAFE_JUMP_ABSOLUTE: frame->jumpAbsoluteSafe(byte.arg); break;
case OP_GOTO: { case OP_GOTO: {
PyVar obj = frame->popValue(this); PyVar obj = frame->popValue(this);
@ -4171,10 +4213,20 @@ protected:
auto& it = PyIter_AS_C(frame->__top()); auto& it = PyIter_AS_C(frame->__top());
if(it->hasNext()){ if(it->hasNext()){
PyRef_AS_C(it->var)->set(this, frame, it->next()); PyRef_AS_C(it->var)->set(this, frame, it->next());
}else{
int blockEnd = frame->code->co_blocks[byte.block].end;
frame->jumpAbsoluteSafe(blockEnd);
} }
else{ } break;
frame->jumpAbsoluteSafe(byte.arg); case OP_LOOP_CONTINUE:
} {
int blockStart = frame->code->co_blocks[byte.block].start;
frame->jumpAbsolute(blockStart);
} break;
case OP_LOOP_BREAK:
{
int blockEnd = frame->code->co_blocks[byte.block].end;
frame->jumpAbsoluteSafe(blockEnd);
} break; } break;
case OP_JUMP_IF_FALSE_OR_POP: case OP_JUMP_IF_FALSE_OR_POP:
{ {
@ -4288,7 +4340,7 @@ public:
return asRepr(obj); return asRepr(obj);
} }
Frame* topFrame(){ inline Frame* topFrame() const {
if(callstack.size() == 0) UNREACHABLE(); if(callstack.size() == 0) UNREACHABLE();
return callstack.back().get(); return callstack.back().get();
} }
@ -4440,11 +4492,7 @@ public:
if(_module == nullptr) _module = _main; if(_module == nullptr) _module = _main;
try { try {
_Code code = compile(source, filename, mode); _Code code = compile(source, filename, mode);
// if(filename != "<builtins>") std::cout << disassemble(code) << std::endl;
// if(filename != "<builtins>"){
// std::cout << disassemble(code) << std::endl;
// }
return _exec(code, _module, {}); return _exec(code, _module, {});
}catch (const _Error& e){ }catch (const _Error& e){
*_stderr << e.what() << '\n'; *_stderr << e.what() << '\n';
@ -4667,7 +4715,6 @@ public:
int prev_line = -1; int prev_line = -1;
for(int i=0; i<code->co_code.size(); i++){ for(int i=0; i<code->co_code.size(); i++){
const ByteCode& byte = code->co_code[i]; const ByteCode& byte = code->co_code[i];
//if(byte.op == OP_NO_OP || byte.op == OP_DELETED_OP) continue;
_Str line = std::to_string(byte.line); _Str line = std::to_string(byte.line);
if(byte.line == prev_line) line = ""; if(byte.line == prev_line) line = "";
else{ else{
@ -4677,7 +4724,7 @@ public:
ss << pad(line, 12) << " " << pad(std::to_string(i), 3); ss << pad(line, 12) << " " << pad(std::to_string(i), 3);
ss << " " << pad(OP_NAMES[byte.op], 20) << " "; ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
ss << '[' << code->getBlockStr(byte.block) << ']'; ss << code->co_blocks[byte.block].toString();
if(i != code->co_code.size() - 1) ss << '\n'; if(i != code->co_code.size() - 1) ss << '\n';
} }
_StrStream consts; _StrStream consts;
@ -5129,19 +5176,12 @@ struct GrammarRule{
Precedence precedence; Precedence precedence;
}; };
struct Loop {
int start;
std::vector<int> breaks;
Loop(int start) : start(start) {}
};
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
class Compiler { class Compiler {
public: public:
pkpy::unique_ptr<Parser> parser; pkpy::unique_ptr<Parser> parser;
std::stack<_Code> codes; std::stack<_Code> codes;
std::stack<Loop> loops;
bool isCompilingClass = false; bool isCompilingClass = false;
int lexingCnt = 0; int lexingCnt = 0;
VM* vm; VM* vm;
@ -5156,10 +5196,6 @@ public:
return parser->src->mode; return parser->src->mode;
} }
Loop& getLoop() {
return loops.top();
}
Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){ Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
this->vm = vm; this->vm = vm;
this->parser = pkpy::make_unique<Parser>( this->parser = pkpy::make_unique<Parser>(
@ -5689,8 +5725,8 @@ __LISTCOMP:
patchJump(_skipPatch); patchJump(_skipPatch);
emitCode(OP_GET_ITER); emitCode(OP_GET_ITER);
Loop& loop = enterLoop(); getCode()->__enterBlock(FOR_LOOP);
int patch = emitCode(OP_FOR_ITER); emitCode(OP_FOR_ITER);
if(_cond_end_return != -1) { // there is an if condition if(_cond_end_return != -1) { // there is an if condition
emitCode(OP_JUMP_ABSOLUTE, _cond_start); emitCode(OP_JUMP_ABSOLUTE, _cond_start);
@ -5706,9 +5742,8 @@ __LISTCOMP:
emitCode(OP_LIST_APPEND); emitCode(OP_LIST_APPEND);
} }
emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine(); emitCode(OP_LOOP_CONTINUE); keepOpcodeLine();
patchJump(patch); getCode()->__exitBlock();
exitLoop();
matchNewLines(mode()==SINGLE_MODE); matchNewLines(mode()==SINGLE_MODE);
consume(TK("]")); consume(TK("]"));
} }
@ -5824,7 +5859,7 @@ __LISTCOMP:
int emitCode(Opcode opcode, int arg=-1) { int emitCode(Opcode opcode, int arg=-1) {
int line = parser->previous.line; int line = parser->previous.line;
getCode()->co_code.push_back( getCode()->co_code.push_back(
ByteCode{(uint8_t)opcode, arg, (uint16_t)line, (uint16_t)getCode()->_currLoopIndex} ByteCode{(uint8_t)opcode, arg, (uint16_t)line, (uint16_t)getCode()->_currBlockIndex}
); );
return getCode()->co_code.size() - 1; return getCode()->co_code.size() - 1;
} }
@ -5931,28 +5966,14 @@ __LISTCOMP:
} }
} }
Loop& enterLoop(){
getCode()->__enterLoop(loops.size()+1);
Loop lp((int)getCode()->co_code.size());
loops.push(lp);
return loops.top();
}
void exitLoop(){
getCode()->__exitLoop();
Loop& lp = loops.top();
for(int addr : lp.breaks) patchJump(addr);
loops.pop();
}
void compileWhileLoop() { void compileWhileLoop() {
Loop& loop = enterLoop(); getCode()->__enterBlock(WHILE_LOOP);
EXPR_TUPLE(); EXPR_TUPLE();
int patch = emitCode(OP_POP_JUMP_IF_FALSE); int patch = emitCode(OP_POP_JUMP_IF_FALSE);
compileBlockBody(); compileBlockBody();
emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine(); emitCode(OP_LOOP_CONTINUE); keepOpcodeLine();
patchJump(patch); patchJump(patch);
exitLoop(); getCode()->__exitBlock();
} }
void EXPR_FOR_VARS(){ void EXPR_FOR_VARS(){
@ -5965,26 +5986,40 @@ __LISTCOMP:
} }
void compileForLoop() { void compileForLoop() {
EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); EXPR_FOR_VARS();consume(TK("in")); EXPR_TUPLE();
emitCode(OP_GET_ITER); emitCode(OP_GET_ITER);
Loop& loop = enterLoop(); getCode()->__enterBlock(FOR_LOOP);
int patch = emitCode(OP_FOR_ITER); emitCode(OP_FOR_ITER);
compileBlockBody(); compileBlockBody();
emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine(); emitCode(OP_LOOP_CONTINUE); keepOpcodeLine();
getCode()->__exitBlock();
}
void compileTryExcept() {
getCode()->__enterBlock(TRY_EXCEPT);
compileBlockBody();
int patch = emitCode(OP_JUMP_ABSOLUTE);
getCode()->__exitBlock();
consume(TK("except"));
if(match(TK("@id"))){ // exception name
compileBlockBody();
}
if(match(TK("finally"))){
consume(TK(":"));
syntaxError("finally is not supported yet");
}
patchJump(patch); patchJump(patch);
exitLoop();
} }
void compileStatement() { void compileStatement() {
if (match(TK("break"))) { if (match(TK("break"))) {
if (loops.empty()) syntaxError("'break' outside loop"); if (!getCode()->__isCurrBlockLoop()) syntaxError("'break' outside loop");
consumeEndStatement(); consumeEndStatement();
int patch = emitCode(OP_SAFE_JUMP_ABSOLUTE); emitCode(OP_LOOP_BREAK);
getLoop().breaks.push_back(patch);
} else if (match(TK("continue"))) { } else if (match(TK("continue"))) {
if (loops.empty()) syntaxError("'continue' not properly in loop"); if (!getCode()->__isCurrBlockLoop()) syntaxError("'continue' not properly in loop");
consumeEndStatement(); consumeEndStatement();
emitCode(OP_JUMP_ABSOLUTE, getLoop().start); emitCode(OP_LOOP_CONTINUE);
} else if (match(TK("return"))) { } else if (match(TK("return"))) {
if (codes.size() == 1) if (codes.size() == 1)
syntaxError("'return' outside function"); syntaxError("'return' outside function");
@ -6001,7 +6036,9 @@ __LISTCOMP:
compileWhileLoop(); compileWhileLoop();
} else if (match(TK("for"))) { } else if (match(TK("for"))) {
compileForLoop(); compileForLoop();
} else if(match(TK("assert"))){ } else if (match(TK("try"))) {
compileTryExcept();
}else if(match(TK("assert"))){
EXPR(); EXPR();
emitCode(OP_ASSERT); emitCode(OP_ASSERT);
consumeEndStatement(); consumeEndStatement();
@ -6035,7 +6072,12 @@ __LISTCOMP:
} else if(match(TK("raise"))){ } else if(match(TK("raise"))){
consume(TK("@id")); // dummy exception type consume(TK("@id")); // dummy exception type
emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str()))); emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
consume(TK("("));EXPR();consume(TK(")")); if(match(TK("("))){
EXPR();
consume(TK(")"));
}else{
emitCode(OP_LOAD_NONE); // ...?
}
emitCode(OP_RAISE_ERROR); emitCode(OP_RAISE_ERROR);
consumeEndStatement(); consumeEndStatement();
} else if(match(TK("del"))){ } else if(match(TK("del"))){
@ -6053,13 +6095,10 @@ __LISTCOMP:
} else { } else {
EXPR_ANY(); EXPR_ANY();
consumeEndStatement(); consumeEndStatement();
// If last op is not an assignment, pop the result. // If last op is not an assignment, pop the result.
uint8_t lastOp = getCode()->co_code.back().op; uint8_t lastOp = getCode()->co_code.back().op;
if( lastOp != OP_STORE_NAME_REF && lastOp != OP_STORE_REF){ if( lastOp!=OP_STORE_NAME_REF && lastOp!=OP_STORE_REF){
if(mode()==SINGLE_MODE && parser->indents.top() == 0){ if(mode()==SINGLE_MODE && parser->indents.top()==0) emitCode(OP_PRINT_EXPR);
emitCode(OP_PRINT_EXPR);
}
emitCode(OP_POP_TOP); emitCode(OP_POP_TOP);
} }
} }
@ -6078,7 +6117,6 @@ __LISTCOMP:
isCompilingClass = true; isCompilingClass = true;
__compileBlockBody(&Compiler::compileFunction); __compileBlockBody(&Compiler::compileFunction);
isCompilingClass = false; isCompilingClass = false;
if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE); if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
else emitCode(OP_LOAD_NAME_REF, superClsNameIdx); else emitCode(OP_LOAD_NAME_REF, superClsNameIdx);
emitCode(OP_BUILD_CLASS, clsNameIdx); emitCode(OP_BUILD_CLASS, clsNameIdx);

@ -1 +1 @@
Subproject commit ebafc0182c61f6f6aa5d2a2a382e7dcafa8be3cd Subproject commit 2e0c419cbb8a9f6dfa9390c8c87593fece581925