From c6236465012fe5c7a0fbbd5c686e85637ea92e6b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 7 Nov 2022 15:04:41 +0800 Subject: [PATCH] add pointer system --- amalgamate.py | 2 +- src/codeobject.h | 33 +++++------ src/compiler.h | 149 +++++++++++++++-------------------------------- src/obj.h | 4 +- src/opcodes.h | 19 +++--- src/parser.h | 1 + src/pointer.h | 47 +++++++++++++++ src/vm.h | 147 ++++++++++++++++++++++------------------------ 8 files changed, 192 insertions(+), 210 deletions(-) create mode 100644 src/pointer.h diff --git a/amalgamate.py b/amalgamate.py index f6a45b0b..ebbdcf68 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -3,7 +3,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: pipeline = [ ["str.h", "builtins.h"], - ["obj.h", "iter.h", "parser.h", "codeobject.h"], + ["obj.h", "iter.h", "parser.h", "pointer.h", "codeobject.h"], ["error.h", "vm.h", "compiler.h"], ["pocketpy.h"] ] diff --git a/src/codeobject.h b/src/codeobject.h index 0cbda99c..a1bb45b1 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -1,6 +1,7 @@ #pragma once #include "obj.h" +#include "pointer.h" enum Opcode { #define OPCODE(name) OP_##name, @@ -31,28 +32,22 @@ public: _Str co_name; PyVarList co_consts; - std::vector<_Str> co_names; + std::vector co_name_ptrs; + + int addNamePtr(const _Str& name, NameScope scope){ + auto p = NamePointer(name, scope); + for(int i=0; icurrent, "expected statement end"); } - bool matchAssignment() { - if (match(TK("="))) return true; - if (match(TK("+="))) return true; - if (match(TK("-="))) return true; - if (match(TK("*="))) return true; - if (match(TK("/="))) return true; - if (match(TK("//="))) return true; - return false; - } - -#define OP_STORE_AUTO (codes.size()==1) ? OP_STORE_NAME : OP_STORE_FAST - void exprLiteral() { PyVar value = parser->previous.value; int index = getCode()->addConst(value); @@ -304,55 +298,41 @@ public: } - void exprName() { - Token tkname = parser->previous; - _Str name(tkname.start, tkname.length); - int index = getCode()->addName(name); - - if (l_value && matchAssignment()) { - _TokenType assignment = parser->previous.type; - matchNewLines(); - if (assignment == TK("=")) { // name = (expr); - compileExpressionTuple(); - } else { // name += / -= / *= ... = (expr); - emitCode(OP_LOAD_NAME, index); - compileExpression(); - emitAssignOp(assignment); + void exprAssign(){ + _TokenType op = parser->previous.type; + if(op == TK("=")) { // a = (expr) + parsePrecedence((Precedence)(rules[op].precedence + 1)); + emitCode(OP_STORE_PTR); + }else{ // a += (expr) -> a = a + (expr) + // TODO: optimization is needed for inplace operators + emitCode(OP_DUP_TOP); + parsePrecedence((Precedence)(rules[op].precedence + 1)); + switch (op) { + case TK("+="): emitCode(OP_BINARY_OP, 0); break; + case TK("-="): emitCode(OP_BINARY_OP, 1); break; + case TK("*="): emitCode(OP_BINARY_OP, 2); break; + case TK("/="): emitCode(OP_BINARY_OP, 3); break; + case TK("//="): emitCode(OP_BINARY_OP, 4); break; + default: UNREACHABLE(); } - emitCode(OP_STORE_AUTO, index); - } else { // Just the name and no assignment followed by. - emitCode(OP_LOAD_NAME, index); - } - } - - void emitAssignOp(_TokenType assignment){ - switch (assignment) { - case TK("+="): emitCode(OP_BINARY_OP, 0); break; - case TK("-="): emitCode(OP_BINARY_OP, 1); break; - case TK("*="): emitCode(OP_BINARY_OP, 2); break; - case TK("/="): emitCode(OP_BINARY_OP, 3); break; - case TK("//="): emitCode(OP_BINARY_OP, 4); break; - default: UNREACHABLE(); + emitCode(OP_STORE_PTR); } } void exprOr() { int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP); - matchNewLines(); parsePrecedence(PREC_LOGICAL_OR); patchJump(patch); } void exprAnd() { int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP); - matchNewLines(); parsePrecedence(PREC_LOGICAL_AND); patchJump(patch); } void exprBinaryOp() { _TokenType op = parser->previous.type; - matchNewLines(); parsePrecedence((Precedence)(rules[op].precedence + 1)); switch (op) { @@ -422,38 +402,25 @@ public: emitCode(OP_CALL, ARGC); } + void exprName() { + Token tkname = parser->previous; + int index = getCode()->addNamePtr( + tkname.str(), + codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL + ); + emitCode(OP_LOAD_NAME_PTR, index); + } + void exprAttrib() { consume(TK("@id")); const _Str& name = parser->previous.str(); - int index = getCode()->addName(name); - - if (match(TK("("))) { - emitCode(OP_LOAD_ATTR, index); - exprCall(); - return; - } - - if (l_value && matchAssignment()) { - _TokenType assignment = parser->previous.type; - matchNewLines(); - if (assignment == TK("=")) { - compileExpressionTuple(); - } else { // name += / -= / *= ... = (expr); - emitCode(OP_DUP_TOP); - emitCode(OP_LOAD_ATTR, index); - compileExpression(); - emitAssignOp(assignment); - } - emitCode(OP_STORE_ATTR, index); - } else { - emitCode(OP_LOAD_ATTR, index); - } + int index = getCode()->addNamePtr(name, NAME_ATTR); + emitCode(OP_BUILD_ATTR_PTR, index); } // [:], [:b] // [a], [a:], [a:b] void exprSubscript() { - bool slice = false; if(match(TK(":"))){ emitCode(OP_LOAD_NONE); if(match(TK("]"))){ @@ -463,7 +430,6 @@ public: consume(TK("]")); } emitCode(OP_BUILD_SLICE); - slice = true; }else{ compileExpression(); if(match(TK(":"))){ @@ -474,26 +440,12 @@ public: consume(TK("]")); } emitCode(OP_BUILD_SLICE); - slice = true; }else{ consume(TK("]")); } } - if (l_value && matchAssignment()) { - if(slice) throw SyntaxError(path, parser->previous, "can't assign to slice"); - _TokenType assignment = parser->previous.type; - matchNewLines(); - - if (assignment == TK("=")) { - compileExpressionTuple(); - } else { - UNREACHABLE(); - } - emitCode(OP_STORE_SUBSCR); - } else { - emitCode(OP_BINARY_SUBSCR); - } + emitCode(OP_BUILD_INDEX_PTR); } void exprValue() { @@ -569,7 +521,7 @@ public: Token compileImportPath() { consume(TK("@id")); Token tkmodule = parser->previous; - int index = getCode()->addName(tkmodule.str()); + int index = getCode()->addNamePtr(tkmodule.str(), NAME_GLOBAL); emitCode(OP_IMPORT_NAME, index); return tkmodule; } @@ -582,8 +534,8 @@ public: consume(TK("@id")); tkmodule = parser->previous; } - int index = getCode()->addName(tkmodule.str()); - emitCode(OP_STORE_NAME, index); + int index = getCode()->addNamePtr(tkmodule.str(), NAME_GLOBAL); + emitCode(OP_STORE_NAME_PTR, index); } while (match(TK(",")) && (matchNewLines(), true)); consumeEndStatement(); } @@ -650,14 +602,16 @@ public: void compileForStatement() { consume(TK("@id")); - const _Str& iterName = parser->previous.str(); - int iterIndex = getCode()->addName(iterName); + int iterIndex = getCode()->addNamePtr( + parser->previous.str(), + codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL + ); consume(TK("in")); compileExpressionTuple(); emitCode(OP_GET_ITER); Loop& loop = enterLoop(true); int patch = emitCode(OP_FOR_ITER); - emitCode(OP_STORE_AUTO, iterIndex); + emitCode(OP_STORE_NAME_PTR, iterIndex); compileBlockBody(); emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine(); patchJump(patch); @@ -704,15 +658,8 @@ public: emitCode(OP_RAISE_ERROR); consumeEndStatement(); } else if(match(TK("del"))){ - // TODO: The del implementation is problematic in some cases. compileExpression(); - ByteCode& lastCode = getCode()->co_code.back(); - if(lastCode.op == OP_BINARY_SUBSCR){ - lastCode.op = OP_DELETE_SUBSCR; - lastCode.arg = -1; - }else{ - throw SyntaxError(path, parser->previous, "you should use 'del a[b]' syntax"); - } + emitCode(OP_DELETE_PTR); consumeEndStatement(); } else if(match(TK("pass"))){ consumeEndStatement(); @@ -722,7 +669,7 @@ public: // If last op is not an assignment, pop the result. uint8_t lastOp = getCode()->co_code.back().op; - if( lastOp != OP_STORE_NAME && lastOp != OP_STORE_FAST && lastOp != OP_STORE_SUBSCR && lastOp != OP_STORE_ATTR){ + if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){ if(repl_mode && parser->indents.top() == 0){ emitCode(OP_PRINT_EXPR); } @@ -733,11 +680,11 @@ public: void compileClass(){ consume(TK("@id")); - int clsNameIdx = getCode()->addName(parser->previous.str()); + int clsNameIdx = getCode()->addNamePtr(parser->previous.str(), NAME_GLOBAL); int superClsNameIdx = -1; if(match(TK("("))){ consume(TK("@id")); - superClsNameIdx = getCode()->addName(parser->previous.str()); + superClsNameIdx = getCode()->addNamePtr(parser->previous.str(), NAME_GLOBAL); consume(TK(")")); } emitCode(OP_LOAD_NONE); @@ -746,7 +693,7 @@ public: isCompilingClass = false; if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE); - else emitCode(OP_LOAD_NAME, superClsNameIdx); + else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx); emitCode(OP_BUILD_CLASS, clsNameIdx); } diff --git a/src/obj.h b/src/obj.h index cebaf7fd..36c29e9d 100644 --- a/src/obj.h +++ b/src/obj.h @@ -12,12 +12,14 @@ class PyObject; class CodeObject; +class BasePointer; class VM; typedef std::shared_ptr PyVar; typedef PyVar PyVarOrNull; typedef std::vector PyVarList; typedef std::unordered_map<_Str, PyVar> StlDict; +typedef std::shared_ptr _Pointer; typedef PyVar (*_CppFunc)(VM*, PyVarList); typedef std::shared_ptr _Code; @@ -60,7 +62,7 @@ public: _Iterator(PyVar _ref) : _ref(_ref) {} }; -typedef std::variant,BoundedMethod,_Range,_Slice> _Value; +typedef std::variant,BoundedMethod,_Range,_Slice,_Pointer> _Value; #define UNREACHABLE() throw std::runtime_error("Unreachable code") diff --git a/src/opcodes.h b/src/opcodes.h index c255bc37..4de76db2 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -1,12 +1,7 @@ #ifdef OPCODE OPCODE(LOAD_CONST) -OPCODE(LOAD_NAME) - OPCODE(IMPORT_NAME) -OPCODE(STORE_FAST) -OPCODE(STORE_NAME) - OPCODE(PRINT_EXPR) OPCODE(POP_TOP) OPCODE(CALL) @@ -28,13 +23,6 @@ OPCODE(BUILD_MAP) OPCODE(BUILD_SLICE) OPCODE(UNPACK_SEQUENCE) -OPCODE(BINARY_SUBSCR) -OPCODE(STORE_SUBSCR) -OPCODE(DELETE_SUBSCR) - -OPCODE(LOAD_ATTR) -OPCODE(STORE_ATTR) - OPCODE(GET_ITER) OPCODE(FOR_ITER) @@ -54,4 +42,11 @@ OPCODE(RAISE_ERROR) OPCODE(STORE_FUNCTION) OPCODE(BUILD_CLASS) +OPCODE(LOAD_NAME_PTR) // no arg +OPCODE(BUILD_ATTR_PTR) // arg for the name_ptr, [ptr, name_ptr] -> (*ptr).name_ptr +OPCODE(BUILD_INDEX_PTR) // no arg, [ptr, expr] -> (*ptr)[expr] +OPCODE(STORE_NAME_PTR) // arg for the name_ptr, [expr], directly store to the name_ptr without pushing it to the stack +OPCODE(STORE_PTR) // no arg, [ptr, expr] -> *ptr = expr +OPCODE(DELETE_PTR) // no arg, [ptr] -> [] -> delete ptr + #endif \ No newline at end of file diff --git a/src/parser.h b/src/parser.h index 64980c2a..265f68ad 100644 --- a/src/parser.h +++ b/src/parser.h @@ -66,6 +66,7 @@ struct Token{ enum Precedence { PREC_NONE, PREC_LOWEST, + PREC_ASSIGNMENT, // = PREC_LOGICAL_OR, // or PREC_LOGICAL_AND, // and PREC_EQUALITY, // == != diff --git a/src/pointer.h b/src/pointer.h new file mode 100644 index 00000000..4a0d4f23 --- /dev/null +++ b/src/pointer.h @@ -0,0 +1,47 @@ +#pragma once + +#include "obj.h" + +class Frame; + +struct BasePointer { + virtual PyVar get(VM*, Frame*) const = 0; + virtual void set(VM*, Frame*, PyVar) const = 0; +}; + +enum NameScope { + NAME_LOCAL = 0, + NAME_GLOBAL = 1, + NAME_ATTR = 2, +}; + +struct NamePointer : BasePointer { + const _Str name; + const NameScope scope; + NamePointer(const _Str& name, NameScope scope) : name(name), scope(scope) {} + + PyVar get(VM* vm, Frame* frame) const; + void set(VM* vm, Frame* frame, PyVar val) const; + + bool operator==(const NamePointer& other) const { + return name == other.name && scope == other.scope; + } +}; + +struct AttrPointer : BasePointer { + const _Pointer root; + const NamePointer* attr; + AttrPointer(const _Pointer& root, const NamePointer* attr) : root(root), attr(attr) {} + + PyVar get(VM* vm, Frame* frame) const; + void set(VM* vm, Frame* frame, PyVar val) const; +}; + +struct IndexPointer : BasePointer { + const _Pointer root; + const PyVar index; + IndexPointer(_Pointer root, PyVar index) : root(root), index(index) {} + + PyVar get(VM* vm, Frame* frame) const; + void set(VM* vm, Frame* frame, PyVar val) const; +}; diff --git a/src/vm.h b/src/vm.h index 4ffd20f1..6cfd28d2 100644 --- a/src/vm.h +++ b/src/vm.h @@ -136,46 +136,33 @@ public: switch (byte.op) { - case OP_LOAD_CONST: - frame->pushValue(frame->code->co_consts[byte.arg]); - break; - case OP_LOAD_NAME: - { - const _Str& name = frame->code->co_names[byte.arg]; - auto it = frame->f_locals.find(name); - if(it != frame->f_locals.end()){ - frame->pushValue(it->second); - break; - } - - it = frame->f_globals->find(name); - if(it != frame->f_globals->end()){ - frame->pushValue(it->second); - break; - } - - it = builtins->attribs.find(name); - if(it != builtins->attribs.end()){ - frame->pushValue(it->second); - break; - } - - nameError(name); - } break; - case OP_STORE_FAST: - { - const _Str& name = frame->code->co_names[byte.arg]; - frame->f_locals[name] = frame->popValue(); - } break; - case OP_STORE_NAME: - { - const _Str& name = frame->code->co_names[byte.arg]; - if(frame->f_locals.find(name) != frame->f_locals.end()){ - frame->f_locals[name] = frame->popValue(); - }else{ - frame->f_globals->operator[](name) = frame->popValue(); - } - } break; + case OP_LOAD_CONST: frame->pushValue(frame->code->co_consts[byte.arg]); break; + case OP_LOAD_NAME_PTR: { + const NamePointer* p = &frame->code->co_name_ptrs[byte.arg]; + frame->pushValue(PyPointer(_Pointer(p))); + } break; + case OP_STORE_NAME_PTR: { + const NamePointer& p = frame->code->co_name_ptrs[byte.arg]; + p.set(this, frame.get(), frame->popValue()); + } break; + case OP_BUILD_ATTR_PTR: { + const NamePointer* p = &frame->code->co_name_ptrs[byte.arg]; + _Pointer root = PyPointer_AS_C(frame->popValue()); + frame->pushValue(PyPointer( + std::make_shared(root, p) + )); + } break; + case OP_BUILD_INDEX_PTR: { + PyVar index = frame->popValue(); + _Pointer root = PyPointer_AS_C(frame->popValue()); + frame->pushValue(PyPointer( + std::make_shared(root, index) + )); + } break; + case OP_STORE_PTR: { + _Pointer p = PyPointer_AS_C(frame->popValue()); + p->set(this, frame.get(), frame->popValue()); + } break; case OP_STORE_FUNCTION: { PyVar obj = frame->popValue(); @@ -184,7 +171,7 @@ public: } break; case OP_BUILD_CLASS: { - _Str clsName = frame->code->co_names[byte.arg]; + const _Str& clsName = frame->code->co_name_ptrs[byte.arg].name; PyVar clsBase = frame->popValue(); if(clsBase == None) clsBase = _tp_object; __checkType(clsBase, _tp_type); @@ -248,19 +235,6 @@ public: PyVar obj_bool = asBool(obj); frame->pushValue(PyBool(!PyBool_AS_C(obj_bool))); } break; - case OP_LOAD_ATTR: - { - PyVar obj = frame->popValue(); - const _Str& name = frame->code->co_names[byte.arg]; - frame->pushValue(getAttr(obj, name)); - } break; - case OP_STORE_ATTR: - { - PyVar value = frame->popValue(); - PyVar obj = frame->popValue(); - const _Str& name = frame->code->co_names[byte.arg]; - setAttr(obj, name, value); - } break; case OP_POP_JUMP_IF_FALSE: if(!PyBool_AS_C(asBool(frame->popValue()))) frame->jumpTo(byte.arg); break; @@ -294,19 +268,6 @@ public: PyVarList items = frame->popNReversed(byte.arg); frame->pushValue(PyTuple(items)); } break; - case OP_BINARY_SUBSCR: - { - PyVar key = frame->popValue(); - PyVar obj = frame->popValue(); - frame->pushValue(call(obj, __getitem__, {key})); - } break; - case OP_STORE_SUBSCR: - { - PyVar value = frame->popValue(); - PyVar key = frame->popValue(); - PyVar obj = frame->popValue(); - call(obj, __setitem__, {key, value}); - } break; case OP_DUP_TOP: frame->pushValue(frame->topValue()); break; case OP_CALL: { @@ -363,7 +324,7 @@ public: } break; case OP_IMPORT_NAME: { - const _Str& name = frame->code->co_names[byte.arg]; + const _Str& name = frame->code->co_name_ptrs[byte.arg].name; auto it = _modules.find(name); if(it == _modules.end()){ _error("ImportError", "module '" + name + "' not found"); @@ -371,12 +332,6 @@ public: frame->pushValue(it->second); } } break; - case OP_DELETE_SUBSCR: - { - PyVar index = frame->popValue(); - PyVar obj = frame->popValue(); - call(obj, "__delitem__", {index}); - } break; default: _error("SystemError", _Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); break; @@ -525,7 +480,7 @@ public: PyVar _tp_object, _tp_type, _tp_int, _tp_float, _tp_bool, _tp_str; PyVar _tp_list, _tp_tuple; PyVar _tp_function, _tp_native_function, _tp_native_iterator, _tp_bounded_method; - PyVar _tp_slice, _tp_range, _tp_module; + PyVar _tp_slice, _tp_range, _tp_module, _tp_pointer; DEF_NATIVE(Int, int, _tp_int) DEF_NATIVE(Float, float, _tp_float) @@ -538,6 +493,7 @@ public: DEF_NATIVE(BoundedMethod, BoundedMethod, _tp_bounded_method) DEF_NATIVE(Range, _Range, _tp_range) DEF_NATIVE(Slice, _Slice, _tp_slice) + DEF_NATIVE(Pointer, _Pointer, _tp_pointer) inline bool PyBool_AS_C(PyVar obj){return obj == True;} inline PyVar PyBool(bool value){return value ? True : False;} @@ -558,6 +514,7 @@ public: _tp_slice = newClassType("slice"); _tp_range = newClassType("range"); _tp_module = newClassType("module"); + _tp_pointer = newClassType("_pointer"); newClassType("NoneType"); @@ -605,4 +562,42 @@ public: exec(code, {}, _m); _modules[name] = _m; } -}; \ No newline at end of file +}; + +/**************** Pointers' Impl ****************/ + +PyVar NamePointer::get(VM* vm, Frame* frame) const{ + switch(scope) { + case NAME_LOCAL: frame->f_locals[name] = frame->popValue(); break; + case NAME_GLOBAL: frame->f_globals->operator[](name) = frame->popValue(); break; + } + UNREACHABLE(); +} + +void NamePointer::set(VM* vm, Frame* frame, PyVar val) const{ + switch(scope) { + case NAME_LOCAL: frame->f_locals[name] = val; break; + case NAME_GLOBAL: frame->f_globals->operator[](name) = val; break; + } + UNREACHABLE(); +} + +PyVar AttrPointer::get(VM* vm, Frame* frame) const{ + PyVar obj = root->get(vm, frame); + return vm->getAttr(obj, attr->name); +} + +void AttrPointer::set(VM* vm, Frame* frame, PyVar val) const{ + PyVar obj = root->get(vm, frame); + vm->setAttr(obj, attr->name, val); +} + +PyVar IndexPointer::get(VM* vm, Frame* frame) const{ + PyVar obj = root->get(vm, frame); + return vm->call(obj, __getitem__, {index}); +} + +void IndexPointer::set(VM* vm, Frame* frame, PyVar val) const{ + PyVar obj = root->get(vm, frame); + vm->call(obj, __setitem__, {index, val}); +} \ No newline at end of file