diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c01fbb29..b23cb7b1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -5,12 +5,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v3 - - name: Setup Clang - uses: egor-tensin/setup-clang@v1 - with: - version: 15 - platform: x64 - - name: Compiling + - uses: ilammy/msvc-dev-cmd@v1 + - name: Compile shell: bash run: | python3 build.py windows @@ -25,32 +21,6 @@ jobs: run: python3 scripts/run_tests.py - name: Benchmark run: python3 scripts/run_tests.py benchmark - build_web: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Setup emsdk - uses: mymindstorm/setup-emsdk@v12 - with: - version: 3.1.25 - actions-cache-folder: 'emsdk-cache' - - name: Verify emsdk - run: emcc -v - - name: Compiling - run: | - mkdir -p output/web/lib - python3 build.py web - cp web/lib/* output/web/lib - - uses: crazy-max/ghaction-github-pages@v3 - with: - target_branch: gh-pages - build_dir: web - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - if: github.event_name == 'push' - - uses: actions/upload-artifact@v3 - with: - path: output build_linux: runs-on: ubuntu-latest steps: @@ -60,16 +30,17 @@ jobs: with: version: 15 platform: x64 - - name: Coverage Test - run: | - sudo apt install -y libc++-15-dev libc++1-15 libc++abi-15-dev libc++abi1-15 libclang-rt-15-dev - python3 preprocess.py - bash run_tests.sh - - uses: actions/upload-artifact@v3 - with: - name: coverage - path: .coverage - - name: Compiling + - name: Install libc++ + run: sudo apt install -y libc++-15-dev libc++1-15 libc++abi-15-dev libc++abi1-15 libclang-rt-15-dev + # - name: Coverage Test + # run: | + # python3 preprocess.py + # bash run_tests.sh + # - uses: actions/upload-artifact@v3 + # with: + # name: coverage + # path: .coverage + - name: Compile run: | python3 build.py linux python3 build.py linux -lib @@ -83,6 +54,19 @@ jobs: run: python3 scripts/run_tests.py - name: Benchmark run: python3 scripts/run_tests.py benchmark + build_macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v3 + - run: | + python3 amalgamate.py + cd plugins/macos/pocketpy + mkdir -p output/macos + xcodebuild clean build CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO + cp -r build/Release/pocketpy.bundle output/macos + - uses: actions/upload-artifact@v3 + with: + path: plugins/macos/pocketpy/output build_android: runs-on: ubuntu-latest steps: @@ -93,7 +77,7 @@ jobs: channel: 'stable' cache: true - run: flutter --version - - name: Compiling + - name: Compile run: | python3 amalgamate.py cd plugins/flutter/example @@ -114,16 +98,29 @@ jobs: - uses: actions/upload-artifact@v3 with: path: plugins/flutter/example/build/app/outputs/flutter-apk/output - build_macos: - runs-on: macos-latest - steps: - - uses: actions/checkout@v3 - - run: | - python3 amalgamate.py - cd plugins/macos/pocketpy - mkdir -p output/macos - xcodebuild clean build CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO - cp -r build/Release/pocketpy.bundle output/macos - - uses: actions/upload-artifact@v3 - with: - path: plugins/macos/pocketpy/output \ No newline at end of file + build_web: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup emsdk + uses: mymindstorm/setup-emsdk@v12 + with: + version: 3.1.25 + actions-cache-folder: 'emsdk-cache' + - name: Verify emsdk + run: emcc -v + - name: Compile + run: | + mkdir -p output/web/lib + python3 build.py web + cp web/lib/* output/web/lib + - uses: crazy-max/ghaction-github-pages@v3 + with: + target_branch: gh-pages + build_dir: web + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + - uses: actions/upload-artifact@v3 + with: + path: output \ No newline at end of file diff --git a/.gitignore b/.gitignore index 33c370e4..67e402d6 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ plugins/godot/godot-cpp/ src/_generated.h profile.sh test +tmp.rar diff --git a/amalgamate.py b/amalgamate.py index b2564c5b..9775e061 100644 --- a/amalgamate.py +++ b/amalgamate.py @@ -6,9 +6,9 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f: OPCODES_TEXT = f.read() pipeline = [ - ["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"], - ["obj.h", "parser.h", "codeobject.h", "frame.h"], - ["vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"], + ["common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"], + ["obj.h", "codeobject.h", "frame.h"], + ["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"], ["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"] ] diff --git a/benchmarks/fib.py b/benchmarks/fib.py index 377a1a87..1bb89670 100644 --- a/benchmarks/fib.py +++ b/benchmarks/fib.py @@ -3,4 +3,6 @@ def fib(n): return n return fib(n-1) + fib(n-2) -assert fib(32) == 2178309 \ No newline at end of file +assert fib(32) == 2178309 + +# 7049155 calls \ No newline at end of file diff --git a/build.py b/build.py index 42f3de6a..f186572f 100644 --- a/build.py +++ b/build.py @@ -20,7 +20,7 @@ def lib_pre_build(): def lib_post_build(): os.remove("src/tmp.cpp") -windows_common = "clang-cl.exe -std:c++17 /utf-8 -GR- -EHsc -O2 -Wno-deprecated-declarations" +windows_common = "CL -std:c++17 /utf-8 -GR- -EHsc -O2" windows_cmd = windows_common + " -Fe:pocketpy src/main.cpp" windows_lib_cmd = windows_common + " -LD -Fe:pocketpy src/tmp.cpp" diff --git a/preprocess.py b/preprocess.py index deffcff7..23ac599e 100644 --- a/preprocess.py +++ b/preprocess.py @@ -20,7 +20,7 @@ def generate_python_sources(): #include namespace pkpy{ - std::map kPythonLibs = { + inline static std::map kPythonLibs = { ''' for key, value in sources.items(): header += ' '*8 + '{"' + key + '", "' + value + '"},' diff --git a/python/dict.py b/python/_dict.py similarity index 94% rename from python/dict.py rename to python/_dict.py index 2a3b8137..24adfc9b 100644 --- a/python/dict.py +++ b/python/_dict.py @@ -1,8 +1,12 @@ class dict: - def __init__(self, capacity=13): - self._capacity = capacity + def __init__(self, mapping=None): + self._capacity = 16 self._a = [None] * self._capacity self._len = 0 + + if mapping is not None: + for k,v in mapping: + self[k] = v def __len__(self): return self._len diff --git a/python/set.py b/python/_set.py similarity index 100% rename from python/set.py rename to python/_set.py diff --git a/run_profile.sh b/run_profile.sh new file mode 100644 index 00000000..c7bea6c3 --- /dev/null +++ b/run_profile.sh @@ -0,0 +1,5 @@ +clang++ -pg -O2 -std=c++17 -fno-rtti -stdlib=libc++ -Wall -o pocketpy src/main.cpp +time ./pocketpy benchmarks/fib.py +mv benchmarks/gmon.out . +gprof pocketpy gmon.out > gprof.txt +rm gmon.out \ No newline at end of file diff --git a/run_profile_test.sh b/run_profile_test.sh new file mode 100644 index 00000000..9d7ccc71 --- /dev/null +++ b/run_profile_test.sh @@ -0,0 +1,10 @@ +clang++ -O2 -std=c++17 -fno-rtti --coverage -stdlib=libc++ -Wall -o pocketpy src/main.cpp +time ./pocketpy benchmarks/fib.py +rm -rf .coverage +mkdir -p .coverage +llvm-cov-15 gcov main.gc -r -s src/ >> .coverage/coverage.txt +mv *.gcov .coverage +rm main.gc* + +# -fprofile-instr-generate -fcoverage-mapping +# llvm-cov-15 show main.gc -instr-profile=default.profraw -format=html -output-dir .coverage \ No newline at end of file diff --git a/scripts/run_tests.py b/scripts/run_tests.py index 8641d221..f444f235 100644 --- a/scripts/run_tests.py +++ b/scripts/run_tests.py @@ -27,7 +27,11 @@ def test_dir(path): print(f' cpython: {_1 - _0:.6f}s (100%)') print(f' pocketpy: {_2 - _1:.6f}s ({(_2 - _1) / (_1 - _0) * 100:.2f}%)') else: - if not test_file(filepath): exit(1) + if not test_file(filepath): + print('-' * 50) + print("TEST FAILED! Press any key to continue...") + input() + if len(sys.argv) == 2: assert 'benchmark' in sys.argv[1] diff --git a/src/ceval.h b/src/ceval.h index 2eaed684..edad3433 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -1,350 +1,426 @@ #pragma once +#include "common.h" #include "vm.h" -#include "ref.h" namespace pkpy{ -Str _read_file_cwd(const Str& name, bool* ok); +#define DISPATCH() goto __NEXT_STEP -PyVar VM::run_frame(Frame* frame){ - while(frame->has_next_bytecode()){ - const Bytecode& byte = frame->next_bytecode(); - switch (byte.op) - { - case OP_NO_OP: continue; - case OP_SETUP_DECORATOR: continue; - case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); continue; - case OP_LOAD_FUNCTION: { - const PyVar obj = frame->co->consts[byte.arg]; - Function f = CAST(Function, obj); // copy - f._module = frame->_module; - frame->push(VAR(f)); - } continue; - case OP_SETUP_CLOSURE: { - Function& f = CAST(Function&, frame->top()); // reference - f._closure = frame->_locals; - } continue; - case OP_LOAD_NAME_REF: { - frame->push(PyRef(NameRef(frame->co->names[byte.arg]))); - } continue; - case OP_LOAD_NAME: { - frame->push(NameRef(frame->co->names[byte.arg]).get(this, frame)); - } continue; - case OP_STORE_NAME: { - auto& p = frame->co->names[byte.arg]; - NameRef(p).set(this, frame, frame->pop()); - } continue; - case OP_BUILD_ATTR_REF: case OP_BUILD_ATTR: { - auto& attr = frame->co->names[byte.arg]; - PyVar obj = frame->pop_value(this); - AttrRef ref = AttrRef(obj, NameRef(attr)); - if(byte.op == OP_BUILD_ATTR) frame->push(ref.get(this, frame)); - else frame->push(PyRef(ref)); - } continue; - case OP_BUILD_INDEX: { - PyVar index = frame->pop_value(this); - auto ref = IndexRef(frame->pop_value(this), index); - if(byte.arg > 0) frame->push(ref.get(this, frame)); - else frame->push(PyRef(ref)); - } continue; - case OP_FAST_INDEX: case OP_FAST_INDEX_REF: { - auto& a = frame->co->names[byte.arg & 0xFFFF]; - auto& x = frame->co->names[(byte.arg >> 16) & 0xFFFF]; - auto ref = IndexRef(NameRef(a).get(this, frame), NameRef(x).get(this, frame)); - if(byte.op == OP_FAST_INDEX) frame->push(ref.get(this, frame)); - else frame->push(PyRef(ref)); - } continue; - case OP_ROT_TWO: ::std::swap(frame->top(), frame->top_1()); continue; - case OP_STORE_REF: { - // PyVar obj = frame->pop_value(this); - // PyVarRef r = frame->pop(); - // PyRef_AS_C(r)->set(this, frame, std::move(obj)); - PyRef_AS_C(frame->top_1())->set(this, frame, frame->top_value(this)); - frame->_pop(); frame->_pop(); - } continue; - case OP_DELETE_REF: - PyRef_AS_C(frame->top())->del(this, frame); - frame->_pop(); - continue; - case OP_BUILD_TUPLE: { - Args items = frame->pop_n_values_reversed(this, byte.arg); - frame->push(VAR(std::move(items))); - } continue; - case OP_BUILD_TUPLE_REF: { - Args items = frame->pop_n_reversed(byte.arg); - frame->push(PyRef(TupleRef(std::move(items)))); - } continue; - case OP_BUILD_STRING: { - Args items = frame->pop_n_values_reversed(this, byte.arg); - StrStream ss; - for(int i=0; ipush(VAR(ss.str())); - } continue; - case OP_LOAD_EVAL_FN: frame->push(builtins->attr(m_eval)); continue; - case OP_BEGIN_CLASS: { - auto& name = frame->co->names[byte.arg]; - PyVar clsBase = frame->pop_value(this); - if(clsBase == None) clsBase = _t(tp_object); - check_type(clsBase, tp_type); - PyVar cls = new_type_object(frame->_module, name.first, OBJ_GET(Type, clsBase)); - frame->push(cls); - } continue; - case OP_END_CLASS: { - PyVar cls = frame->pop(); - cls->attr()._try_perfect_rehash(); - }; continue; - case OP_STORE_CLASS_ATTR: { - auto& name = frame->co->names[byte.arg]; - PyVar obj = frame->pop_value(this); - PyVar& cls = frame->top(); - cls->attr().set(name.first, std::move(obj)); - } continue; - case OP_RETURN_VALUE: return frame->pop_value(this); - case OP_PRINT_EXPR: { - const PyVar expr = frame->top_value(this); - if(expr != None) *_stdout << CAST(Str, asRepr(expr)) << '\n'; - } continue; - case OP_POP_TOP: frame->_pop(); continue; - case OP_BINARY_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_BITWISE_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_INPLACE_BINARY_OP: { - Args args(2); - args[1] = frame->pop(); - args[0] = frame->top_value(this); - PyVar ret = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); - PyRef_AS_C(frame->top())->set(this, frame, std::move(ret)); - frame->_pop(); - } continue; - case OP_INPLACE_BITWISE_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - PyVar ret = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); - PyRef_AS_C(frame->top())->set(this, frame, std::move(ret)); - frame->_pop(); - } continue; - case OP_COMPARE_OP: { - Args args(2); - args[1] = frame->pop_value(this); - args[0] = frame->top_value(this); - frame->top() = fast_call(CMP_SPECIAL_METHODS[byte.arg], std::move(args)); - } continue; - case OP_IS_OP: { - PyVar rhs = frame->pop_value(this); - bool ret_c = rhs == frame->top_value(this); - if(byte.arg == 1) ret_c = !ret_c; - frame->top() = VAR(ret_c); - } continue; - case OP_CONTAINS_OP: { - PyVar rhs = frame->pop_value(this); - bool ret_c = CAST(bool, call(rhs, __contains__, one_arg(frame->pop_value(this)))); - if(byte.arg == 1) ret_c = !ret_c; - frame->push(VAR(ret_c)); - } continue; - case OP_UNARY_NEGATIVE: - frame->top() = num_negated(frame->top_value(this)); - continue; - case OP_UNARY_NOT: { - PyVar obj = frame->pop_value(this); - const PyVar& obj_bool = asBool(obj); - frame->push(VAR(!_CAST(bool, obj_bool))); - } continue; - case OP_POP_JUMP_IF_FALSE: - if(!_CAST(bool, asBool(frame->pop_value(this)))) frame->jump_abs(byte.arg); - continue; - case OP_LOAD_NONE: frame->push(None); continue; - case OP_LOAD_TRUE: frame->push(True); continue; - case OP_LOAD_FALSE: frame->push(False); continue; - case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue; - case OP_ASSERT: { - PyVar _msg = frame->pop_value(this); - Str msg = CAST(Str, asStr(_msg)); - PyVar expr = frame->pop_value(this); - if(asBool(expr) != True) _error("AssertionError", msg); - } continue; - case OP_EXCEPTION_MATCH: { - const auto& e = CAST(Exception&, frame->top()); - StrName name = frame->co->names[byte.arg].first; - frame->push(VAR(e.match_type(name))); - } continue; - case OP_RAISE: { - PyVar obj = frame->pop_value(this); - Str msg = obj == None ? "" : CAST(Str, asStr(obj)); - StrName type = frame->co->names[byte.arg].first; - _error(type, msg); - } continue; - case OP_RE_RAISE: _raise(); continue; - case OP_BUILD_LIST: - frame->push(VAR(frame->pop_n_values_reversed(this, byte.arg).move_to_list())); - continue; - case OP_BUILD_MAP: { - Args items = frame->pop_n_values_reversed(this, byte.arg*2); - PyVar obj = call(builtins->attr("dict")); - for(int i=0; ipush(obj); - } continue; - case OP_BUILD_SET: { - PyVar list = VAR( - frame->pop_n_values_reversed(this, byte.arg).move_to_list() - ); - PyVar obj = call(builtins->attr("set"), one_arg(list)); - frame->push(obj); - } continue; - case OP_LIST_APPEND: { - PyVar obj = frame->pop_value(this); - List& list = CAST(List&, frame->top_1()); - list.push_back(std::move(obj)); - } continue; - case OP_MAP_ADD: { - PyVar value = frame->pop_value(this); - PyVar key = frame->pop_value(this); - call(frame->top_1(), __setitem__, two_args(key, value)); - } continue; - case OP_SET_ADD: { - PyVar obj = frame->pop_value(this); - call(frame->top_1(), "add", one_arg(obj)); - } continue; - case OP_DUP_TOP_VALUE: frame->push(frame->top_value(this)); continue; - case OP_UNARY_STAR: { - if(byte.arg > 0){ // rvalue - frame->top() = VAR(StarWrapper(frame->top_value(this), true)); - }else{ - PyRef_AS_C(frame->top()); // check ref - frame->top() = VAR(StarWrapper(frame->top(), false)); - } - } continue; - case OP_CALL_KWARGS_UNPACK: case OP_CALL_KWARGS: { - int ARGC = byte.arg & 0xFFFF; - int KWARGC = (byte.arg >> 16) & 0xFFFF; - Args kwargs = frame->pop_n_values_reversed(this, KWARGC*2); - Args args = frame->pop_n_values_reversed(this, ARGC); - if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); - PyVar callable = frame->pop_value(this); - PyVar ret = call(callable, std::move(args), kwargs, true); - if(ret == _py_op_call) return ret; - frame->push(std::move(ret)); - } continue; - case OP_CALL_UNPACK: case OP_CALL: { - Args args = frame->pop_n_values_reversed(this, byte.arg); - if(byte.op == OP_CALL_UNPACK) unpack_args(args); - PyVar callable = frame->pop_value(this); - PyVar ret = call(callable, std::move(args), no_arg(), true); - if(ret == _py_op_call) return ret; - frame->push(std::move(ret)); - } continue; - case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); continue; - case OP_SAFE_JUMP_ABSOLUTE: frame->jump_abs_safe(byte.arg); continue; - case OP_GOTO: { - StrName label = frame->co->names[byte.arg].first; - auto it = frame->co->labels.find(label); - if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found"); - frame->jump_abs_safe(it->second); - } continue; - case OP_GET_ITER: { - PyVar obj = frame->pop_value(this); - PyVar iter = asIter(obj); - check_type(frame->top(), tp_ref); - PyIter_AS_C(iter)->loop_var = frame->pop(); - frame->push(std::move(iter)); - } continue; - case OP_FOR_ITER: { - BaseIter* it = PyIter_AS_C(frame->top()); - PyVar obj = it->next(); - if(obj != nullptr){ - PyRef_AS_C(it->loop_var)->set(this, frame, std::move(obj)); - }else{ - int blockEnd = frame->co->blocks[byte.block].end; - frame->jump_abs_safe(blockEnd); - } - } continue; - case OP_LOOP_CONTINUE: { - int blockStart = frame->co->blocks[byte.block].start; - frame->jump_abs(blockStart); - } continue; - case OP_LOOP_BREAK: { - int blockEnd = frame->co->blocks[byte.block].end; - frame->jump_abs_safe(blockEnd); - } continue; - case OP_JUMP_IF_FALSE_OR_POP: { - const PyVar expr = frame->top_value(this); - if(asBool(expr)==False) frame->jump_abs(byte.arg); - else frame->pop_value(this); - } continue; - case OP_JUMP_IF_TRUE_OR_POP: { - const PyVar expr = frame->top_value(this); - if(asBool(expr)==True) frame->jump_abs(byte.arg); - else frame->pop_value(this); - } continue; - case OP_BUILD_SLICE: { - PyVar stop = frame->pop_value(this); - PyVar start = frame->pop_value(this); - Slice s; - if(start != None) { s.start = CAST(int, start);} - if(stop != None) { s.stop = CAST(int, stop);} - frame->push(VAR(s)); - } continue; - case OP_IMPORT_NAME: { - StrName name = frame->co->names[byte.arg].first; - PyVar* ext_mod = _modules.try_get(name); - if(ext_mod == nullptr){ - Str source; - auto it2 = _lazy_modules.find(name); - if(it2 == _lazy_modules.end()){ - bool ok = false; - source = _read_file_cwd(name.str() + ".py", &ok); - if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found"); - }else{ - source = it2->second; - _lazy_modules.erase(it2); - } - CodeObject_ code = compile(source, name.str(), EXEC_MODE); - PyVar new_mod = new_module(name); - _exec(code, new_mod); - frame->push(new_mod); - new_mod->attr()._try_perfect_rehash(); - }else{ - frame->push(*ext_mod); - } - } continue; - case OP_STORE_ALL_NAMES: { - PyVar obj = frame->pop_value(this); - for(auto& [name, value]: obj->attr().items()){ - Str s = name.str(); - if(s.empty() || s[0] == '_') continue; - frame->f_globals().set(name, value); - } - }; continue; - case OP_YIELD_VALUE: return _py_op_yield; - // TODO: using "goto" inside with block may cause __exit__ not called - case OP_WITH_ENTER: call(frame->pop_value(this), __enter__); continue; - case OP_WITH_EXIT: call(frame->pop_value(this), __exit__); continue; - case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); continue; - case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); continue; - default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented"); - } - } - - if(frame->co->src->mode == EVAL_MODE || frame->co->src->mode == JSON_MODE){ - if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE"); - return frame->pop_value(this); - } -#if PK_EXTRA_CHECK - if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE"); +inline PyObject* VM::run_frame(Frame* frame){ +__NEXT_STEP:; + /* NOTE: + * Be aware of accidental gc! + * DO NOT leave any strong reference of PyObject* in the C stack + * For example, frame->popx() returns a strong reference which may be dangerous + * `Args` containing strong references is safe if it is passed to `call` or `fast_call` + */ +#if !DEBUG_NO_AUTO_GC + heap._auto_collect(); #endif - return None; + + const Bytecode& byte = frame->next_bytecode(); +#if DEBUG_CEVAL_STEP + std::cout << frame->stack_info() << " " << OP_NAMES[byte.op] << std::endl; +#endif + + switch (byte.op) + { + case OP_NO_OP: DISPATCH(); + /*****************************************/ + case OP_POP_TOP: frame->pop(); DISPATCH(); + case OP_DUP_TOP: frame->push(frame->top()); DISPATCH(); + case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); DISPATCH(); + case OP_PRINT_EXPR: { + PyObject* obj = frame->top(); // use top() to avoid accidental gc + if(obj != None) *_stdout << CAST(Str&, asRepr(obj)) << '\n'; + frame->pop(); + } DISPATCH(); + /*****************************************/ + case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); DISPATCH(); + case OP_LOAD_NONE: frame->push(None); DISPATCH(); + case OP_LOAD_TRUE: frame->push(True); DISPATCH(); + case OP_LOAD_FALSE: frame->push(False); DISPATCH(); + case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); DISPATCH(); + case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); DISPATCH(); + case OP_LOAD_FUNCTION: { + FuncDecl_ decl = frame->co->func_decls[byte.arg]; + PyObject* obj = VAR(Function({decl, frame->_module, frame->_locals})); + frame->push(obj); + } DISPATCH(); + case OP_LOAD_NULL: frame->push(_py_null); DISPATCH(); + /*****************************************/ + case OP_LOAD_NAME: { + StrName name = frame->co->names[byte.arg]; + PyObject* val; + val = frame->f_locals().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = frame->f_closure_try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = frame->f_globals().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = vm->builtins->attr().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + vm->NameError(name); + } DISPATCH(); + case OP_LOAD_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + PyObject* val = frame->f_globals().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + val = vm->builtins->attr().try_get(name); + if(val != nullptr) { frame->push(val); DISPATCH(); } + vm->NameError(name); + } DISPATCH(); + case OP_LOAD_ATTR: { + PyObject* a = frame->top(); + StrName name = frame->co->names[byte.arg]; + frame->top() = getattr(a, name); + } DISPATCH(); + case OP_LOAD_METHOD: { + PyObject* a = frame->top(); + StrName name = frame->co->names[byte.arg]; + PyObject* self; + frame->top() = get_unbound_method(a, name, &self, true, true); + frame->push(self); + } DISPATCH(); + case OP_LOAD_SUBSCR: { + Args args(2); + args[1] = frame->popx(); // b + args[0] = frame->top(); // a + frame->top() = fast_call(__getitem__, std::move(args)); + } DISPATCH(); + case OP_STORE_LOCAL: { + StrName name = frame->co->names[byte.arg]; + frame->f_locals().set(name, frame->popx()); + } DISPATCH(); + case OP_STORE_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + frame->f_globals().set(name, frame->popx()); + } DISPATCH(); + case OP_STORE_ATTR: { + StrName name = frame->co->names[byte.arg]; + PyObject* a = frame->top(); + PyObject* val = frame->top_1(); + setattr(a, name, val); + frame->pop_n(2); + } DISPATCH(); + case OP_STORE_SUBSCR: { + Args args(3); + args[1] = frame->popx(); // b + args[0] = frame->popx(); // a + args[2] = frame->popx(); // val + fast_call(__setitem__, std::move(args)); + } DISPATCH(); + case OP_DELETE_LOCAL: { + StrName name = frame->co->names[byte.arg]; + if(frame->f_locals().contains(name)){ + frame->f_locals().erase(name); + }else{ + NameError(name); + } + } DISPATCH(); + case OP_DELETE_GLOBAL: { + StrName name = frame->co->names[byte.arg]; + if(frame->f_globals().contains(name)){ + frame->f_globals().erase(name); + }else{ + NameError(name); + } + } DISPATCH(); + case OP_DELETE_ATTR: { + PyObject* a = frame->popx(); + StrName name = frame->co->names[byte.arg]; + if(!a->is_attr_valid()) TypeError("cannot delete attribute"); + if(!a->attr().contains(name)) AttributeError(a, name); + a->attr().erase(name); + } DISPATCH(); + case OP_DELETE_SUBSCR: { + PyObject* b = frame->popx(); + PyObject* a = frame->popx(); + fast_call(__delitem__, Args{a, b}); + } DISPATCH(); + /*****************************************/ + case OP_BUILD_LIST: + frame->push(VAR(frame->popx_n_reversed(byte.arg).to_list())); + DISPATCH(); + case OP_BUILD_DICT: { + PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); + PyObject* obj = call(builtins->attr(m_dict), Args{t}); + frame->push(obj); + } DISPATCH(); + case OP_BUILD_SET: { + PyObject* t = VAR(frame->popx_n_reversed(byte.arg)); + PyObject* obj = call(builtins->attr(m_set), Args{t}); + frame->push(obj); + } DISPATCH(); + case OP_BUILD_SLICE: { + PyObject* step = frame->popx(); + PyObject* stop = frame->popx(); + PyObject* start = frame->popx(); + Slice s; + if(start != None) s.start = CAST(int, start); + if(stop != None) s.stop = CAST(int, stop); + if(step != None) s.step = CAST(int, step); + frame->push(VAR(s)); + } DISPATCH(); + case OP_BUILD_TUPLE: { + Tuple items = frame->popx_n_reversed(byte.arg); + frame->push(VAR(std::move(items))); + } DISPATCH(); + case OP_BUILD_STRING: { + std::stringstream ss; // asStr() may run extra bytecode + for(int i=byte.arg-1; i>=0; i--) ss << CAST(Str&, asStr(frame->top_n(i))); + frame->pop_n(byte.arg); + frame->push(VAR(ss.str())); + } DISPATCH(); + /*****************************************/ + case OP_BINARY_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args)); + } DISPATCH(); + case OP_COMPARE_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(COMPARE_SPECIAL_METHODS[byte.arg], std::move(args)); + } DISPATCH(); + case OP_BITWISE_OP: { + Args args(2); + args[1] = frame->popx(); // lhs + args[0] = frame->top(); // rhs + frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args)); + } DISPATCH(); + case OP_IS_OP: { + PyObject* rhs = frame->popx(); + PyObject* lhs = frame->top(); + bool ret_c = lhs == rhs; + if(byte.arg == 1) ret_c = !ret_c; + frame->top() = VAR(ret_c); + } DISPATCH(); + case OP_CONTAINS_OP: { + Args args(2); + args[0] = frame->popx(); + args[1] = frame->top(); + PyObject* ret = fast_call(__contains__, std::move(args)); + bool ret_c = CAST(bool, ret); + if(byte.arg == 1) ret_c = !ret_c; + frame->top() = VAR(ret_c); + } DISPATCH(); + /*****************************************/ + case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); DISPATCH(); + case OP_POP_JUMP_IF_FALSE: + if(!asBool(frame->popx())) frame->jump_abs(byte.arg); + DISPATCH(); + case OP_JUMP_IF_TRUE_OR_POP: + if(asBool(frame->top()) == true) frame->jump_abs(byte.arg); + else frame->pop(); + DISPATCH(); + case OP_JUMP_IF_FALSE_OR_POP: + if(asBool(frame->top()) == false) frame->jump_abs(byte.arg); + else frame->pop(); + DISPATCH(); + case OP_LOOP_CONTINUE: { + int target = frame->co->blocks[byte.block].start; + frame->jump_abs(target); + } DISPATCH(); + case OP_LOOP_BREAK: { + int target = frame->co->blocks[byte.block].end; + frame->jump_abs_break(target); + } DISPATCH(); + case OP_GOTO: { + StrName label = frame->co->names[byte.arg]; + auto it = frame->co->labels.find(label); + if(it == frame->co->labels.end()) _error("KeyError", fmt("label ", label.escape(), " not found")); + frame->jump_abs_break(it->second); + } DISPATCH(); + /*****************************************/ + // TODO: examine this later + case OP_CALL: case OP_CALL_UNPACK: { + int ARGC = byte.arg; + + bool method_call = frame->top_n(ARGC) != _py_null; + if(method_call) ARGC++; // add self into args + Args args = frame->popx_n_reversed(ARGC); + if(!method_call) frame->pop(); + + if(byte.op == OP_CALL_UNPACK) unpack_args(args); + PyObject* callable = frame->popx(); + PyObject* ret = call(callable, std::move(args), no_arg(), true); + if(ret == _py_op_call) return ret; + frame->push(std::move(ret)); + } DISPATCH(); + case OP_CALL_KWARGS: case OP_CALL_KWARGS_UNPACK: { + int ARGC = byte.arg & 0xFFFF; + int KWARGC = (byte.arg >> 16) & 0xFFFF; + Args kwargs = frame->popx_n_reversed(KWARGC*2); + + bool method_call = frame->top_n(ARGC) != _py_null; + if(method_call) ARGC++; // add self into args + Args args = frame->popx_n_reversed(ARGC); + if(!method_call) frame->pop(); + + if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args); + PyObject* callable = frame->popx(); + PyObject* ret = call(callable, std::move(args), kwargs, true); + if(ret == _py_op_call) return ret; + frame->push(std::move(ret)); + } DISPATCH(); + case OP_RETURN_VALUE: return frame->popx(); + case OP_YIELD_VALUE: return _py_op_yield; + /*****************************************/ + case OP_LIST_APPEND: { + PyObject* obj = frame->popx(); + List& list = CAST(List&, frame->top_1()); + list.push_back(obj); + } DISPATCH(); + case OP_DICT_ADD: { + PyObject* kv = frame->popx(); + Tuple& t = CAST(Tuple& ,kv); + fast_call(__setitem__, Args{frame->top_1(), t[0], t[1]}); + } DISPATCH(); + case OP_SET_ADD: { + PyObject* obj = frame->popx(); + fast_call(m_add, Args{frame->top_1(), obj}); + } DISPATCH(); + /*****************************************/ + case OP_UNARY_NEGATIVE: + frame->top() = num_negated(frame->top()); + DISPATCH(); + case OP_UNARY_NOT: + frame->top() = VAR(!asBool(frame->top())); + DISPATCH(); + case OP_UNARY_STAR: + frame->top() = VAR(StarWrapper(frame->top())); + DISPATCH(); + /*****************************************/ + case OP_GET_ITER: + frame->top() = asIter(frame->top()); + DISPATCH(); + case OP_FOR_ITER: { + BaseIter* it = PyIter_AS_C(frame->top()); + PyObject* obj = it->next(); + if(obj != nullptr){ + frame->push(obj); + }else{ + int target = frame->co->blocks[byte.block].end; + frame->jump_abs_break(target); + } + } DISPATCH(); + /*****************************************/ + case OP_IMPORT_NAME: { + StrName name = frame->co->names[byte.arg]; + PyObject* ext_mod = _modules.try_get(name); + if(ext_mod == nullptr){ + Str source; + auto it = _lazy_modules.find(name); + if(it == _lazy_modules.end()){ + bool ok = false; + source = _read_file_cwd(fmt(name, ".py"), &ok); + if(!ok) _error("ImportError", fmt("module ", name.escape(), " not found")); + }else{ + source = it->second; + _lazy_modules.erase(it); + } + CodeObject_ code = compile(source, name.sv(), EXEC_MODE); + PyObject* new_mod = new_module(name); + _exec(code, new_mod); + new_mod->attr()._try_perfect_rehash(); + frame->push(new_mod); + }else{ + frame->push(ext_mod); + } + } DISPATCH(); + case OP_IMPORT_STAR: { + PyObject* obj = frame->popx(); + for(auto& [name, value]: obj->attr().items()){ + std::string_view s = name.sv(); + if(s.empty() || s[0] == '_') continue; + frame->f_globals().set(name, value); + } + }; DISPATCH(); + /*****************************************/ + case OP_UNPACK_SEQUENCE: case OP_UNPACK_EX: { + // asIter or iter->next may run bytecode, accidential gc may happen + auto _lock = heap.gc_scope_lock(); // lock the gc via RAII!! + PyObject* obj = asIter(frame->popx()); + BaseIter* iter = PyIter_AS_C(obj); + for(int i=0; inext(); + if(item == nullptr) ValueError("not enough values to unpack"); + frame->push(item); + } + // handle extra items + if(byte.op == OP_UNPACK_EX){ + List extras; + while(true){ + PyObject* item = iter->next(); + if(item == nullptr) break; + extras.push_back(item); + } + frame->push(VAR(extras)); + }else{ + if(iter->next() != nullptr) ValueError("too many values to unpack"); + } + }; DISPATCH(); + /*****************************************/ + case OP_BEGIN_CLASS: { + StrName name = frame->co->names[byte.arg]; + PyObject* super_cls = frame->popx(); + if(super_cls == None) super_cls = _t(tp_object); + check_type(super_cls, tp_type); + PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, super_cls)); + frame->push(cls); + } DISPATCH(); + case OP_END_CLASS: { + PyObject* cls = frame->popx(); + cls->attr()._try_perfect_rehash(); + }; DISPATCH(); + case OP_STORE_CLASS_ATTR: { + StrName name = frame->co->names[byte.arg]; + PyObject* obj = frame->popx(); + PyObject* cls = frame->top(); + cls->attr().set(name, obj); + } DISPATCH(); + /*****************************************/ + // // TODO: using "goto" inside with block may cause __exit__ not called + // case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); DISPATCH(); + // case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); DISPATCH(); + /*****************************************/ + case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); DISPATCH(); + case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); DISPATCH(); + /*****************************************/ + case OP_ASSERT: { + PyObject* obj = frame->top(); + Str msg; + if(is_type(obj, tp_tuple)){ + auto& t = CAST(Tuple&, obj); + if(t.size() != 2) ValueError("assert tuple must have 2 elements"); + obj = t[0]; + msg = CAST(Str&, asStr(t[1])); + } + bool ok = asBool(obj); + frame->pop(); + if(!ok) _error("AssertionError", msg); + } DISPATCH(); + case OP_EXCEPTION_MATCH: { + const auto& e = CAST(Exception&, frame->top()); + StrName name = frame->co->names[byte.arg]; + frame->push(VAR(e.match_type(name))); + } DISPATCH(); + case OP_RAISE: { + PyObject* obj = frame->popx(); + Str msg = obj == None ? "" : CAST(Str, asStr(obj)); + StrName type = frame->co->names[byte.arg]; + _error(type, msg); + } DISPATCH(); + case OP_RE_RAISE: _raise(); DISPATCH(); + default: throw std::runtime_error(fmt(OP_NAMES[byte.op], " is not implemented")); + } + UNREACHABLE(); } +#undef DISPATCH + } // namespace pkpy \ No newline at end of file diff --git a/src/cffi.h b/src/cffi.h index de589893..7e532117 100644 --- a/src/cffi.h +++ b/src/cffi.h @@ -2,8 +2,6 @@ #include "common.h" #include "vm.h" -#include -#include namespace pkpy { @@ -14,7 +12,7 @@ struct NativeProxyFunc { _Fp func; NativeProxyFunc(_Fp func) : func(func) {} - PyVar operator()(VM* vm, Args& args) { + PyObject* operator()(VM* vm, Args& args) { if (args.size() != N) { vm->TypeError("expected " + std::to_string(N) + " arguments, but got " + std::to_string(args.size())); } @@ -22,13 +20,13 @@ struct NativeProxyFunc { } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { func(py_cast(vm, args[Is])...); return vm->None; } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { __Ret ret = func(py_cast(vm, args[Is])...); return VAR(std::move(ret)); } @@ -41,7 +39,7 @@ struct NativeProxyMethod { _Fp func; NativeProxyMethod(_Fp func) : func(func) {} - PyVar operator()(VM* vm, Args& args) { + PyObject* operator()(VM* vm, Args& args) { int actual_size = args.size() - 1; if (actual_size != N) { vm->TypeError("expected " + std::to_string(N) + " arguments, but got " + std::to_string(actual_size)); @@ -50,14 +48,14 @@ struct NativeProxyMethod { } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { T& self = py_cast(vm, args[0]); (self.*func)(py_cast(vm, args[Is+1])...); return vm->None; } template - std::enable_if_t, PyVar> call(VM* vm, Args& args, std::index_sequence) { + std::enable_if_t, PyObject*> call(VM* vm, Args& args, std::index_sequence) { T& self = py_cast(vm, args[0]); __Ret ret = (self.*func)(py_cast(vm, args[Is+1])...); return VAR(std::move(ret)); @@ -133,14 +131,14 @@ struct TypeDB{ return index == 0 ? nullptr : &_by_index[index-1]; } - const TypeInfo* get(const char name[]) const { + const TypeInfo* get(std::string_view name) const { auto it = _by_name.find(name); if(it == _by_name.end()) return nullptr; return get(it->second); } const TypeInfo* get(const Str& s) const { - return get(s.c_str()); + return get(s.sv()); } template @@ -152,7 +150,7 @@ struct TypeDB{ static TypeDB _type_db; -auto _ = [](){ +inline static auto ___x = [](){ #define REGISTER_BASIC_TYPE(T) _type_db.register_type(#T, {}); _type_db.register_type("void", {}); REGISTER_BASIC_TYPE(char); @@ -200,12 +198,12 @@ struct Pointer{ return Pointer(ctype, level, ptr-offset*unit_size()); } - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<-1>(type, "__new__", CPP_NOT_IMPLEMENTED()); vm->bind_method<0>(type, "__repr__", [](VM* vm, Args& args) { Pointer& self = CAST(Pointer&, args[0]); - StrStream ss; + std::stringstream ss; ss << "<" << self.ctype->name; for(int i=0; i"; @@ -266,9 +264,9 @@ struct Pointer{ } template - inline T& ref() noexcept { return *reinterpret_cast(ptr); } + T& ref() noexcept { return *reinterpret_cast(ptr); } - PyVar get(VM* vm){ + PyObject* get(VM* vm){ if(level > 1) return VAR_T(Pointer, ctype, level-1, ref()); switch(ctype->index){ #define CASE(T) case type_index(): return VAR(ref()) @@ -291,7 +289,7 @@ struct Pointer{ return VAR_T(Pointer, *this); } - void set(VM* vm, const PyVar& val){ + void set(VM* vm, PyObject* val){ if(level > 1) { Pointer& p = CAST(Pointer&, val); ref() = p.ptr; // We don't check the type, just copy the underlying address @@ -321,7 +319,7 @@ struct Pointer{ Pointer _to(VM* vm, StrName name){ auto it = ctype->members.find(name); if(it == ctype->members.end()){ - vm->AttributeError(Str("struct '") + ctype->name + "' has no member " + name.str().escape(true)); + vm->AttributeError(fmt("struct '", ctype->name, "' has no member ", name.escape())); } const MemberInfo& info = it->second; return {info.type, level, ptr+info.offset}; @@ -359,7 +357,7 @@ struct Value { Value& operator=(const Value& other) = delete; Value(const Value& other) = delete; - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<-1>(type, "__new__", CPP_NOT_IMPLEMENTED()); vm->bind_method<0>(type, "ptr", [](VM* vm, Args& args) { @@ -388,11 +386,11 @@ struct CType{ CType() : type(_type_db.get()) {} CType(const TypeInfo* type) : type(type) {} - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<1>(type, "__new__", [](VM* vm, Args& args) { const Str& name = CAST(Str&, args[0]); const TypeInfo* type = _type_db.get(name); - if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true)); + if(type == nullptr) vm->TypeError("unknown type: " + name.escape()); return VAR_T(CType, type); }); @@ -403,9 +401,9 @@ struct CType{ } }; -void add_module_c(VM* vm){ - PyVar mod = vm->new_module("c"); - PyVar ptr_t = Pointer::register_class(vm, mod); +inline void add_module_c(VM* vm){ + PyObject* mod = vm->new_module("c"); + Pointer::register_class(vm, mod); Value::register_class(vm, mod); CType::register_class(vm, mod); @@ -434,22 +432,22 @@ void add_module_c(VM* vm){ Pointer& self = CAST(Pointer&, args[0]); const Str& name = CAST(Str&, args[1]); int level = 0; - for(int i=name.size()-1; i>=0; i--){ + for(int i=name.length()-1; i>=0; i--){ if(name[i] == '*') level++; else break; } if(level == 0) vm->TypeError("expect a pointer type, such as 'int*'"); - Str type_s = name.substr(0, name.size()-level); + Str type_s = name.substr(0, name.length()-level); const TypeInfo* type = _type_db.get(type_s); - if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape(true)); + if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape()); return VAR_T(Pointer, type, level, self.ptr); }); vm->bind_func<1>(mod, "sizeof", [](VM* vm, Args& args) { const Str& name = CAST(Str&, args[0]); - if(name.find('*') != Str::npos) return VAR(sizeof(void*)); + if(name.index("*") != -1) return VAR(sizeof(void*)); const TypeInfo* type = _type_db.get(name); - if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true)); + if(type == nullptr) vm->TypeError("unknown type: " + name.escape()); return VAR(type->size); }); @@ -462,11 +460,11 @@ void add_module_c(VM* vm){ }); } -PyVar py_var(VM* vm, void* p){ +inline PyObject* py_var(VM* vm, void* p){ return VAR_T(Pointer, _type_db.get(), (char*)p); } -PyVar py_var(VM* vm, char* p){ +inline PyObject* py_var(VM* vm, char* p){ return VAR_T(Pointer, _type_db.get(), (char*)p); } @@ -491,7 +489,7 @@ struct pointer { }; template -T py_pointer_cast(VM* vm, const PyVar& var){ +T py_pointer_cast(VM* vm, PyObject* var){ static_assert(std::is_pointer_v); Pointer& p = CAST(Pointer&, var); const TypeInfo* type = _type_db.get::baseT>(); @@ -503,14 +501,14 @@ T py_pointer_cast(VM* vm, const PyVar& var){ } template -T py_value_cast(VM* vm, const PyVar& var){ +T py_value_cast(VM* vm, PyObject* var){ static_assert(std::is_pod_v); Value& v = CAST(Value&, var); return *reinterpret_cast(v.data); } template -std::enable_if_t>, PyVar> +std::enable_if_t>, PyObject*> py_var(VM* vm, T p){ const TypeInfo* type = _type_db.get::baseT>(); if(type == nullptr) type = _type_db.get(); @@ -518,9 +516,9 @@ py_var(VM* vm, T p){ } template -std::enable_if_t>, PyVar> +std::enable_if_t>, PyObject*> py_var(VM* vm, T p){ - if constexpr(std::is_same_v) return p; + if constexpr(std::is_same_v) return p; const TypeInfo* type = _type_db.get(); return VAR_T(Value, type, &p); } diff --git a/src/codeobject.h b/src/codeobject.h index 97ad83b7..640cadbd 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -5,12 +5,7 @@ namespace pkpy{ -enum NameScope { - NAME_LOCAL = 0, - NAME_GLOBAL, - NAME_ATTR, - NAME_SPECIAL, -}; +enum NameScope { NAME_LOCAL, NAME_GLOBAL }; enum Opcode { #define OPCODE(name) OP_##name, @@ -18,24 +13,19 @@ enum Opcode { #undef OPCODE }; -static const char* OP_NAMES[] = { +inline const char* OP_NAMES[] = { #define OPCODE(name) #name, #include "opcodes.h" #undef OPCODE }; struct Bytecode{ - uint8_t op; + uint16_t op; + uint16_t block; int arg; int line; - uint16_t block; }; -Str pad(const Str& s, const int n){ - if(s.size() >= n) return s.substr(0, n); - return s + std::string(n - s.size(), ' '); -} - enum CodeBlockType { NO_BLOCK, FOR_LOOP, @@ -44,16 +34,14 @@ enum CodeBlockType { TRY_EXCEPT, }; +#define BC_NOARG -1 +#define BC_KEEPLINE -1 + struct CodeBlock { CodeBlockType type; int parent; // parent index in blocks int start; // start index of this block in codes, inclusive int end; // end index of this block in codes, exclusive - - std::string to_string() const { - if(parent == -1) return ""; - return "[B:" + std::to_string(type) + "]"; - } }; struct CodeObject { @@ -68,57 +56,22 @@ struct CodeObject { std::vector codes; List consts; - std::vector> names; - std::map global_names; + std::vector names; + std::set global_names; std::vector blocks = { CodeBlock{NO_BLOCK, -1} }; std::map labels; + std::vector func_decls; + // may be.. just use a large NameDict? uint32_t perfect_locals_capacity = 2; uint32_t perfect_hash_seed = 0; void optimize(VM* vm); - bool add_label(StrName label){ - if(labels.count(label)) return false; - labels[label] = codes.size(); - return true; + void _gc_mark() const { + for(PyObject* v : consts) OBJ_MARK(v); + for(auto& decl: func_decls) decl->_gc_mark(); } - - int add_name(StrName name, NameScope scope){ - if(scope == NAME_LOCAL && global_names.count(name)) scope = NAME_GLOBAL; - auto p = std::make_pair(name, scope); - for(int i=0; i #include -#include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -27,52 +25,60 @@ #include #include #include -#include +#include +#include +#include -#define PK_VERSION "0.9.5" -#define PK_EXTRA_CHECK 0 +#define PK_VERSION "0.9.7" + +// debug macros +#define DEBUG_NO_BUILTIN_MODULES 0 +#define DEBUG_EXTRA_CHECK 0 +#define DEBUG_DIS_EXEC 0 +#define DEBUG_DIS_EXEC_MIN 1 +#define DEBUG_CEVAL_STEP 0 +#define DEBUG_FULL_EXCEPTION 0 +#define DEBUG_MEMORY_POOL 0 +#define DEBUG_NO_MEMORY_POOL 0 +#define DEBUG_NO_AUTO_GC 0 +#define DEBUG_GC_STATS 0 #if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__) #define PK_ENABLE_FILEIO 0 #else -#define PK_ENABLE_FILEIO 1 +#define PK_ENABLE_FILEIO 0 // TODO: refactor this #endif #if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__) typedef int32_t i64; typedef float f64; -#define S_TO_INT std::stoi -#define S_TO_FLOAT std::stof +#define S_TO_INT(...) static_cast(std::stoi(__VA_ARGS__)) +#define S_TO_FLOAT(...) static_cast(std::stof(__VA_ARGS__)) #else typedef int64_t i64; typedef double f64; -#define S_TO_INT std::stoll -#define S_TO_FLOAT std::stod +#define S_TO_INT(...) static_cast(std::stoll(__VA_ARGS__)) +#define S_TO_FLOAT(...) static_cast(std::stod(__VA_ARGS__)) #endif namespace pkpy{ namespace std = ::std; -struct Dummy { }; -struct DummyInstance { }; +struct Dummy { }; +struct DummyInstance { }; struct DummyModule { }; -#define DUMMY_VAL Dummy() struct Type { int index; Type(): index(-1) {} Type(int index): index(index) {} - inline bool operator==(Type other) const noexcept { - return this->index == other.index; - } - inline bool operator!=(Type other) const noexcept { - return this->index != other.index; - } + bool operator==(Type other) const noexcept { return this->index == other.index; } + bool operator!=(Type other) const noexcept { return this->index != other.index; } + operator int() const noexcept { return this->index; } }; -//#define THREAD_LOCAL thread_local -#define THREAD_LOCAL +#define THREAD_LOCAL // thread_local #define CPP_LAMBDA(x) ([](VM* vm, Args& args) { return x; }) #define CPP_NOT_IMPLEMENTED() ([](VM* vm, Args& args) { vm->NotImplementedError(); return vm->None; }) @@ -82,7 +88,30 @@ struct Type { #define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!"); #endif -const float kLocalsLoadFactor = 0.67f; -const float kInstAttrLoadFactor = 0.67f; -const float kTypeAttrLoadFactor = 0.5f; +inline const float kLocalsLoadFactor = 0.67f; +inline const float kInstAttrLoadFactor = 0.67f; +inline const float kTypeAttrLoadFactor = 0.5f; + +static_assert(sizeof(i64) == sizeof(int*)); +static_assert(sizeof(f64) == sizeof(int*)); +static_assert(std::numeric_limits::is_iec559); +static_assert(std::numeric_limits::is_iec559); + +struct PyObject; +#define BITS(p) (reinterpret_cast(p)) +inline bool is_tagged(PyObject* p) noexcept { return (BITS(p) & 0b11) != 0b00; } +inline bool is_int(PyObject* p) noexcept { return (BITS(p) & 0b11) == 0b01; } +inline bool is_float(PyObject* p) noexcept { return (BITS(p) & 0b11) == 0b10; } + +inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept { + return is_tagged(a) && is_tagged(b); +} + +inline bool is_both_int(PyObject* a, PyObject* b) noexcept { + return is_int(a) && is_int(b); +} + +struct Expr; +typedef std::unique_ptr Expr_; + } // namespace pkpy \ No newline at end of file diff --git a/src/compiler.h b/src/compiler.h index 3312d5be..77d33c3f 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -1,350 +1,143 @@ #pragma once #include "codeobject.h" -#include "parser.h" -#include "error.h" -#include "ceval.h" +#include "common.h" +#include "expr.h" namespace pkpy{ class Compiler; -typedef void (Compiler::*GrammarFn)(); -typedef void (Compiler::*CompilerAction)(); +typedef void (Compiler::*PrattCallback)(); -struct GrammarRule{ - GrammarFn prefix; - GrammarFn infix; +struct PrattRule{ + PrattCallback prefix; + PrattCallback infix; Precedence precedence; }; -enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; - class Compiler { - std::unique_ptr parser; - std::stack codes; - int lexing_count = 0; - bool used = false; + inline static PrattRule rules[kTokenCount]; + std::unique_ptr lexer; + stack contexts; VM* vm; - std::map rules; + bool used; + // for parsing token stream + int i = 0; + std::vector tokens; - CodeObject_ co() const{ return codes.top(); } - CompileMode mode() const{ return parser->src->mode; } - NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } + const Token& prev() { return tokens.at(i-1); } + const Token& curr() { return tokens.at(i); } + const Token& next() { return tokens.at(i+1); } + void advance(int delta=1) { i += delta; } -public: - Compiler(VM* vm, const char* source, Str filename, CompileMode mode){ - this->vm = vm; - this->parser = std::make_unique( - make_sp(source, filename, mode) - ); + CodeEmitContext* ctx() { return &contexts.top(); } + CompileMode mode() const{ return lexer->src->mode; } + NameScope name_scope() const { return contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL; } + template + CodeObject_ push_context(Args&&... args){ + CodeObject_ co = make_sp(std::forward(args)...); + contexts.push(CodeEmitContext(vm, co)); + return co; + } + + void pop_context(){ + if(!ctx()->s_expr.empty()){ + throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr()); + } + // if the last op does not return, add a default return None + if(ctx()->co->codes.empty() || ctx()->co->codes.back().op != OP_RETURN_VALUE){ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + } + ctx()->co->optimize(vm); + contexts.pop(); + } + + static void init_pratt_rules(){ + if(rules[TK(".")].precedence != PREC_NONE) return; // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ #define METHOD(name) &Compiler::name #define NO_INFIX nullptr, PREC_NONE for(TokenIndex i=0; i")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; - rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; - rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; - rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; - rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; - rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; - rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; - rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; - rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK("True")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("False")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; - rules[TK("None")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("...")] = { METHOD(exprValue), NO_INFIX }; - rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; - rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; - rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; - rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; - rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; - rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK(">>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT }; - rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA }; + rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB }; + rules[TK("(")] = { METHOD(exprGroup), METHOD(exprCall), PREC_CALL }; + rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscr), PREC_SUBSCRIPT }; + rules[TK("{")] = { METHOD(exprMap), NO_INFIX }; + rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM }; + rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM }; + rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR }; + rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT }; + rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; + rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY }; + rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION }; + rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND }; rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR }; rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR }; + rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY }; + rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE }; + rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST }; + rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND }; + rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR }; + rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; + rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX }; + rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX }; + rules[TK("@id")] = { METHOD(exprName), NO_INFIX }; + rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX }; + rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX }; + rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX }; #undef METHOD #undef NO_INFIX - -#define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression -#define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed -#define EXPR_ANY() parse_expression(PREC_ASSIGNMENT) - } - -private: - Str eat_string_until(char quote, bool raw) { - bool quote3 = parser->match_n_chars(2, quote); - std::vector buff; - while (true) { - char c = parser->eatchar_include_newline(); - if (c == quote){ - if(quote3 && !parser->match_n_chars(2, quote)){ - buff.push_back(c); - continue; - } - break; - } - if (c == '\0'){ - if(quote3 && parser->src->mode == REPL_MODE){ - throw NeedMoreLines(false); - } - SyntaxError("EOL while scanning string literal"); - } - if (c == '\n'){ - if(!quote3) SyntaxError("EOL while scanning string literal"); - else{ - buff.push_back(c); - continue; - } - } - if (!raw && c == '\\') { - switch (parser->eatchar_include_newline()) { - case '"': buff.push_back('"'); break; - case '\'': buff.push_back('\''); break; - case '\\': buff.push_back('\\'); break; - case 'n': buff.push_back('\n'); break; - case 'r': buff.push_back('\r'); break; - case 't': buff.push_back('\t'); break; - default: SyntaxError("invalid escape char"); - } - } else { - buff.push_back(c); - } - } - return Str(buff.data(), buff.size()); - } - - void eat_string(char quote, StringType type) { - Str s = eat_string_until(quote, type == RAW_STRING); - if(type == F_STRING){ - parser->set_next_token(TK("@fstr"), VAR(s)); - }else{ - parser->set_next_token(TK("@str"), VAR(s)); - } - } - - void eat_number() { - static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); - std::smatch m; - - const char* i = parser->token_start; - while(*i != '\n' && *i != '\0') i++; - std::string s = std::string(parser->token_start, i); - - try{ - if (std::regex_search(s, m, pattern)) { - // here is m.length()-1, since the first char was eaten by lex_token() - for(int j=0; jeatchar(); - - int base = 10; - size_t size; - if (m[1].matched) base = 16; - if (m[2].matched) { - if(base == 16) SyntaxError("hex literal should not contain a dot"); - parser->set_next_token(TK("@num"), VAR(S_TO_FLOAT(m[0], &size))); - } else { - parser->set_next_token(TK("@num"), VAR(S_TO_INT(m[0], &size, base))); - } - if (size != m.length()) UNREACHABLE(); - } - }catch(std::exception& _){ - SyntaxError("invalid number literal"); - } - } - - void lex_token(){ - lexing_count++; - _lex_token(); - lexing_count--; - } - - // Lex the next token and set it as the next token. - void _lex_token() { - parser->prev = parser->curr; - parser->curr = parser->next_token(); - //std::cout << parser->curr.info() << std::endl; - - while (parser->peekchar() != '\0') { - parser->token_start = parser->curr_char; - char c = parser->eatchar_include_newline(); - switch (c) { - case '\'': case '"': eat_string(c, NORMAL_STRING); return; - case '#': parser->skip_line_comment(); break; - case '{': parser->set_next_token(TK("{")); return; - case '}': parser->set_next_token(TK("}")); return; - case ',': parser->set_next_token(TK(",")); return; - case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return; - case ';': parser->set_next_token(TK(";")); return; - case '(': parser->set_next_token(TK("(")); return; - case ')': parser->set_next_token(TK(")")); return; - case '[': parser->set_next_token(TK("[")); return; - case ']': parser->set_next_token(TK("]")); return; - case '@': parser->set_next_token(TK("@")); return; - case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return; - case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return; - case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return; - case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return; - case '?': parser->set_next_token(TK("?")); return; - case '.': { - if(parser->matchchar('.')) { - if(parser->matchchar('.')) { - parser->set_next_token(TK("...")); - } else { - SyntaxError("invalid token '..'"); - } - } else { - parser->set_next_token(TK(".")); - } - return; - } - case '=': parser->set_next_token_2('=', TK("="), TK("==")); return; - case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return; - case '>': { - if(parser->matchchar('=')) parser->set_next_token(TK(">=")); - else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>=")); - else parser->set_next_token(TK(">")); - return; - } - case '<': { - if(parser->matchchar('=')) parser->set_next_token(TK("<=")); - else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<=")); - else parser->set_next_token(TK("<")); - return; - } - case '-': { - if(parser->matchchar('=')) parser->set_next_token(TK("-=")); - else if(parser->matchchar('>')) parser->set_next_token(TK("->")); - else parser->set_next_token(TK("-")); - return; - } - case '!': - if(parser->matchchar('=')) parser->set_next_token(TK("!=")); - else SyntaxError("expected '=' after '!'"); - break; - case '*': - if (parser->matchchar('*')) { - parser->set_next_token(TK("**")); // '**' - } else { - parser->set_next_token_2('=', TK("*"), TK("*=")); - } - return; - case '/': - if(parser->matchchar('/')) { - parser->set_next_token_2('=', TK("//"), TK("//=")); - } else { - parser->set_next_token_2('=', TK("/"), TK("/=")); - } - return; - case '\r': break; // just ignore '\r' - case ' ': case '\t': parser->eat_spaces(); break; - case '\n': { - parser->set_next_token(TK("@eol")); - if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level"); - return; - } - default: { - if(c == 'f'){ - if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;} - if(parser->matchchar('"')) {eat_string('"', F_STRING); return;} - }else if(c == 'r'){ - if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;} - if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;} - } - - if (c >= '0' && c <= '9') { - eat_number(); - return; - } - - switch (parser->eat_name()) - { - case 0: break; - case 1: SyntaxError("invalid char: " + std::string(1, c)); - case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); - case 3: SyntaxError("@id contains invalid char"); break; - case 4: SyntaxError("invalid JSON token"); break; - default: UNREACHABLE(); - } - return; - } - } - } - - parser->token_start = parser->curr_char; - parser->set_next_token(TK("@eof")); - } - - inline TokenIndex peek() { - return parser->curr.type; - } - - // not sure this will work - TokenIndex peek_next() { - if(parser->nexts.empty()) return TK("@eof"); - return parser->nexts.front().type; } bool match(TokenIndex expected) { - if (peek() != expected) return false; - lex_token(); + if (curr().type != expected) return false; + advance(); return true; } void consume(TokenIndex expected) { if (!match(expected)){ - StrStream ss; - ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'"; - SyntaxError(ss.str()); + SyntaxError( + fmt("expected '", TK_STR(expected), "', but got '", TK_STR(curr().type), "'") + ); } } + bool match_newlines_repl(){ + return match_newlines(mode()==REPL_MODE); + } + bool match_newlines(bool repl_throw=false) { bool consumed = false; - if (peek() == TK("@eol")) { - while (peek() == TK("@eol")) lex_token(); + if (curr().type == TK("@eol")) { + while (curr().type == TK("@eol")) advance(); consumed = true; } - if (repl_throw && peek() == TK("@eof")){ - throw NeedMoreLines(co()->_is_compiling_class); + if (repl_throw && curr().type == TK("@eof")){ + throw NeedMoreLines(ctx()->is_compiling_class); } return consumed; } bool match_end_stmt() { if (match(TK(";"))) { match_newlines(); return true; } - if (match_newlines() || peek()==TK("@eof")) return true; - if (peek() == TK("@dedent")) return true; + if (match_newlines() || curr().type == TK("@eof")) return true; + if (curr().type == TK("@dedent")) return true; return false; } @@ -352,686 +145,682 @@ private: if (!match_end_stmt()) SyntaxError("expected statement end"); } - void exprLiteral() { - PyVar value = parser->prev.value; - int index = co()->add_const(value); - emit(OP_LOAD_CONST, index); + /*************************************************/ + + void EXPR(bool push_stack=true) { + parse_expression(PREC_TUPLE+1, push_stack); } - void exprFString() { - static const std::regex pattern(R"(\{(.*?)\})"); - PyVar value = parser->prev.value; - Str s = CAST(Str, value); - std::sregex_iterator begin(s.begin(), s.end(), pattern); - std::sregex_iterator end; - int size = 0; - int i = 0; - for(auto it = begin; it != end; it++) { - std::smatch m = *it; - if (i < m.position()) { - std::string literal = s.substr(i, m.position() - i); - emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - size++; - } - emit(OP_LOAD_EVAL_FN); - emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str()))); - emit(OP_CALL, 1); - size++; - i = (int)(m.position() + m.length()); - } - if (i < s.size()) { - std::string literal = s.substr(i, s.size() - i); - emit(OP_LOAD_CONST, co()->add_const(VAR(literal))); - size++; - } - emit(OP_BUILD_STRING, size); + void EXPR_TUPLE(bool push_stack=true) { + parse_expression(PREC_TUPLE, push_stack); } - void exprLambda() { - Function func; - func.name = ""; + // special case for `for loop` and `comp` + Expr_ EXPR_VARS(){ + std::vector items; + do { + consume(TK("@id")); + items.push_back(make_expr(prev().str(), name_scope())); + } while(match(TK(","))); + if(items.size()==1) return std::move(items[0]); + return make_expr(std::move(items)); + } + + template + std::unique_ptr make_expr(Args&&... args) { + std::unique_ptr expr = std::make_unique(std::forward(args)...); + expr->line = prev().line; + return expr; + } + + // PASS + void exprLiteral(){ + ctx()->s_expr.push(make_expr(prev().value)); + } + + // PASS + void exprFString(){ + ctx()->s_expr.push(make_expr(std::get(prev().value))); + } + + // PASS + void exprLambda(){ + auto e = make_expr(name_scope()); if(!match(TK(":"))){ - _compile_f_args(func, false); + _compile_f_args(e->decl, false); consume(TK(":")); } - func.code = make_sp(parser->src, func.name.str()); - this->codes.push(func.code); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - emit(OP_RETURN_VALUE); - func.code->optimize(vm); - this->codes.pop(); - emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); + e->decl->code = push_context(lexer->src, e->decl->name.sv()); + EXPR(false); // https://github.com/blueloveTH/pocketpy/issues/37 + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + pop_context(); + ctx()->s_expr.push(std::move(e)); } - void exprAssign() { - int lhs = co()->codes.empty() ? -1 : co()->codes.size() - 1; - co()->_rvalue += 1; - TokenIndex op = parser->prev.type; - if(op == TK("=")) { // a = (expr) - EXPR_TUPLE(); - if(lhs!=-1 && co()->codes[lhs].op == OP_LOAD_NAME_REF){ - if(co()->_is_compiling_class){ - emit(OP_STORE_CLASS_ATTR, co()->codes[lhs].arg); - }else{ - emit(OP_STORE_NAME, co()->codes[lhs].arg); - } - co()->codes[lhs].op = OP_NO_OP; - co()->codes[lhs].arg = -1; - }else{ - if(co()->_is_compiling_class) SyntaxError(); - emit(OP_STORE_REF); - } - }else{ // a += (expr) -> a = a + (expr) - if(co()->_is_compiling_class) SyntaxError(); - EXPR(); - switch (op) { - case TK("+="): emit(OP_INPLACE_BINARY_OP, 0); break; - case TK("-="): emit(OP_INPLACE_BINARY_OP, 1); break; - case TK("*="): emit(OP_INPLACE_BINARY_OP, 2); break; - case TK("/="): emit(OP_INPLACE_BINARY_OP, 3); break; - case TK("//="): emit(OP_INPLACE_BINARY_OP, 4); break; - case TK("%="): emit(OP_INPLACE_BINARY_OP, 5); break; - case TK("<<="): emit(OP_INPLACE_BITWISE_OP, 0); break; - case TK(">>="): emit(OP_INPLACE_BITWISE_OP, 1); break; - case TK("&="): emit(OP_INPLACE_BITWISE_OP, 2); break; - case TK("|="): emit(OP_INPLACE_BITWISE_OP, 3); break; - case TK("^="): emit(OP_INPLACE_BITWISE_OP, 4); break; - default: UNREACHABLE(); - } - } - co()->_rvalue -= 1; - } - - void exprComma() { - int size = 1; // an expr is in the stack now + // PASS + void exprTuple(){ + std::vector items; + items.push_back(ctx()->s_expr.popx()); do { EXPR(); // NOTE: "1," will fail, "1,2" will be ok - size++; + items.push_back(ctx()->s_expr.popx()); } while(match(TK(","))); - emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size); + ctx()->s_expr.push(make_expr( + std::move(items) + )); } - void exprOr() { - int patch = emit(OP_JUMP_IF_TRUE_OR_POP); - parse_expression(PREC_LOGICAL_OR); - patch_jump(patch); + // PASS + void exprOr(){ + auto e = make_expr(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_OR + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } - void exprAnd() { - int patch = emit(OP_JUMP_IF_FALSE_OR_POP); - parse_expression(PREC_LOGICAL_AND); - patch_jump(patch); + // PASS + void exprAnd(){ + auto e = make_expr(); + e->lhs = ctx()->s_expr.popx(); + parse_expression(PREC_LOGICAL_AND + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } - void exprTernary() { - int patch = emit(OP_POP_JUMP_IF_FALSE); + // PASS + void exprTernary(){ + auto e = make_expr(); + e->cond = ctx()->s_expr.popx(); EXPR(); // if true - int patch2 = emit(OP_JUMP_ABSOLUTE); + e->true_expr = ctx()->s_expr.popx(); consume(TK(":")); - patch_jump(patch); EXPR(); // if false - patch_jump(patch2); + e->false_expr = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } - void exprBinaryOp() { - TokenIndex op = parser->prev.type; - parse_expression((Precedence)(rules[op].precedence + 1)); - - switch (op) { - case TK("+"): emit(OP_BINARY_OP, 0); break; - case TK("-"): emit(OP_BINARY_OP, 1); break; - case TK("*"): emit(OP_BINARY_OP, 2); break; - case TK("/"): emit(OP_BINARY_OP, 3); break; - case TK("//"): emit(OP_BINARY_OP, 4); break; - case TK("%"): emit(OP_BINARY_OP, 5); break; - case TK("**"): emit(OP_BINARY_OP, 6); break; - - case TK("<"): emit(OP_COMPARE_OP, 0); break; - case TK("<="): emit(OP_COMPARE_OP, 1); break; - case TK("=="): emit(OP_COMPARE_OP, 2); break; - case TK("!="): emit(OP_COMPARE_OP, 3); break; - case TK(">"): emit(OP_COMPARE_OP, 4); break; - case TK(">="): emit(OP_COMPARE_OP, 5); break; - case TK("in"): emit(OP_CONTAINS_OP, 0); break; - case TK("not in"): emit(OP_CONTAINS_OP, 1); break; - case TK("is"): emit(OP_IS_OP, 0); break; - case TK("is not"): emit(OP_IS_OP, 1); break; - - case TK("<<"): emit(OP_BITWISE_OP, 0); break; - case TK(">>"): emit(OP_BITWISE_OP, 1); break; - case TK("&"): emit(OP_BITWISE_OP, 2); break; - case TK("|"): emit(OP_BITWISE_OP, 3); break; - case TK("^"): emit(OP_BITWISE_OP, 4); break; - default: UNREACHABLE(); - } + // PASS + void exprBinaryOp(){ + auto e = make_expr(); + e->op = prev().type; + e->lhs = ctx()->s_expr.popx(); + parse_expression(rules[e->op].precedence + 1); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); } + // PASS void exprNot() { - parse_expression((Precedence)(PREC_LOGICAL_NOT + 1)); - emit(OP_UNARY_NOT); + parse_expression(PREC_LOGICAL_NOT + 1); + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); } - void exprUnaryOp() { - TokenIndex op = parser->prev.type; - parse_expression((Precedence)(PREC_UNARY + 1)); - switch (op) { - case TK("-"): emit(OP_UNARY_NEGATIVE); break; - case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break; + // PASS + void exprUnaryOp(){ + TokenIndex op = prev().type; + parse_expression(PREC_UNARY + 1); + switch(op){ + case TK("-"): + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); + break; + case TK("*"): + ctx()->s_expr.push(make_expr(ctx()->s_expr.popx())); + break; default: UNREACHABLE(); } } - void exprGrouping() { - match_newlines(mode()==REPL_MODE); - EXPR_TUPLE(); - match_newlines(mode()==REPL_MODE); + // PASS + void exprGroup(){ + match_newlines_repl(); + EXPR_TUPLE(); // () is just for change precedence + match_newlines_repl(); consume(TK(")")); } - void _consume_comp(Opcode op0, Opcode op1, int _patch, int _body_start){ - int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1); - int _body_end = co()->codes.size(); - co()->codes[_patch].op = OP_JUMP_ABSOLUTE; - co()->codes[_patch].arg = _body_end; - emit(op0, 0); - EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE(); - match_newlines(mode()==REPL_MODE); - - int _skipPatch = emit(OP_JUMP_ABSOLUTE); - int _cond_start = co()->codes.size(); - int _cond_end_return = -1; - if(match(TK("if"))) { - EXPR_TUPLE(); - _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1); + // PASS + template + void _consume_comp(Expr_ expr){ + static_assert(std::is_base_of::value); + std::unique_ptr ce = make_expr(); + ce->expr = std::move(expr); + ce->vars = EXPR_VARS(); + consume(TK("in")); + EXPR(); + ce->iter = ctx()->s_expr.popx(); + match_newlines_repl(); + if(match(TK("if"))){ + EXPR(); + ce->cond = ctx()->s_expr.popx(); } - patch_jump(_skipPatch); - - emit(OP_GET_ITER); - co()->_enter_block(FOR_LOOP); - emit(OP_FOR_ITER); - - if(_cond_end_return != -1) { // there is an if condition - emit(OP_JUMP_ABSOLUTE, _cond_start); - patch_jump(_cond_end_return); - int ifpatch = emit(OP_POP_JUMP_IF_FALSE); - emit(OP_JUMP_ABSOLUTE, _body_start); - patch_jump(_body_end_return); - emit(op1); - patch_jump(ifpatch); - }else{ - emit(OP_JUMP_ABSOLUTE, _body_start); - patch_jump(_body_end_return); - emit(op1); - } - - emit(OP_LOOP_CONTINUE, -1, true); - co()->_exit_block(); - match_newlines(mode()==REPL_MODE); + ctx()->s_expr.push(std::move(ce)); + match_newlines_repl(); } + // PASS void exprList() { - int _patch = emit(OP_NO_OP); - int _body_start = co()->codes.size(); - int ARGC = 0; + int line = prev().line; + std::vector items; do { - match_newlines(mode()==REPL_MODE); - if (peek() == TK("]")) break; - EXPR(); ARGC++; - match_newlines(mode()==REPL_MODE); - if(ARGC == 1 && match(TK("for"))){ - _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start); + match_newlines_repl(); + if (curr().type == TK("]")) break; + EXPR(); + items.push_back(ctx()->s_expr.popx()); + match_newlines_repl(); + if(items.size()==1 && match(TK("for"))){ + _consume_comp(std::move(items[0])); consume(TK("]")); return; } + match_newlines_repl(); } while (match(TK(","))); - match_newlines(mode()==REPL_MODE); consume(TK("]")); - emit(OP_BUILD_LIST, ARGC); + auto e = make_expr(std::move(items)); + e->line = line; // override line + ctx()->s_expr.push(std::move(e)); } + // PASS void exprMap() { - int _patch = emit(OP_NO_OP); - int _body_start = co()->codes.size(); - bool parsing_dict = false; - int ARGC = 0; + bool parsing_dict = false; // {...} may be dict or set + std::vector items; do { - match_newlines(mode()==REPL_MODE); - if (peek() == TK("}")) break; + match_newlines_repl(); + if (curr().type == TK("}")) break; EXPR(); - if(peek() == TK(":")) parsing_dict = true; + if(curr().type == TK(":")) parsing_dict = true; if(parsing_dict){ consume(TK(":")); EXPR(); + auto dict_item = make_expr(); + dict_item->key = ctx()->s_expr.popx(); + dict_item->value = ctx()->s_expr.popx(); + items.push_back(std::move(dict_item)); + }else{ + items.push_back(ctx()->s_expr.popx()); } - ARGC++; - match_newlines(mode()==REPL_MODE); - if(ARGC == 1 && match(TK("for"))){ - if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start); - else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start); + match_newlines_repl(); + if(items.size()==1 && match(TK("for"))){ + if(parsing_dict) _consume_comp(std::move(items[0])); + else _consume_comp(std::move(items[0])); consume(TK("}")); return; } + match_newlines_repl(); } while (match(TK(","))); consume(TK("}")); - - if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC); - else emit(OP_BUILD_SET, ARGC); + if(items.size()==0 || parsing_dict){ + auto e = make_expr(std::move(items)); + ctx()->s_expr.push(std::move(e)); + }else{ + auto e = make_expr(std::move(items)); + ctx()->s_expr.push(std::move(e)); + } } + // PASS void exprCall() { - int ARGC = 0; - int KWARGC = 0; - bool need_unpack = false; + auto e = make_expr(); + e->callable = ctx()->s_expr.popx(); do { - match_newlines(mode()==REPL_MODE); - if (peek() == TK(")")) break; - if(peek() == TK("@id") && peek_next() == TK("=")) { + match_newlines_repl(); + if (curr().type==TK(")")) break; + if(curr().type==TK("@id") && next().type==TK("=")) { consume(TK("@id")); - const Str& key = parser->prev.str(); - emit(OP_LOAD_CONST, co()->add_const(VAR(key))); + Str key = prev().str(); consume(TK("=")); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - KWARGC++; + EXPR(); + e->kwargs.push_back({key, ctx()->s_expr.popx()}); } else{ - if(KWARGC > 0) SyntaxError("positional argument follows keyword argument"); - co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1; - if(co()->codes.back().op == OP_UNARY_STAR) need_unpack = true; - ARGC++; + if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument"); + EXPR(); + e->args.push_back(ctx()->s_expr.popx()); } - match_newlines(mode()==REPL_MODE); + match_newlines_repl(); } while (match(TK(","))); consume(TK(")")); - if(ARGC > 32767) SyntaxError("too many positional arguments"); - if(KWARGC > 32767) SyntaxError("too many keyword arguments"); - if(KWARGC > 0){ - emit(need_unpack ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC << 16) | ARGC); - }else{ - emit(need_unpack ? OP_CALL_UNPACK : OP_CALL, ARGC); + if(e->args.size() > 32767) SyntaxError("too many positional arguments"); + if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments"); + ctx()->s_expr.push(std::move(e)); + } + + // PASS + void exprName(){ + Str name = prev().str(); + NameScope scope = name_scope(); + if(ctx()->co->global_names.count(name)){ + scope = NAME_GLOBAL; } + ctx()->s_expr.push(make_expr(name, scope)); } - void exprName(){ _exprName(false); } - - void _exprName(bool force_lvalue) { - Token tkname = parser->prev; - int index = co()->add_name(tkname.str(), name_scope()); - bool fast_load = !force_lvalue && co()->_rvalue>0; - emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index); - } - + // PASS void exprAttrib() { consume(TK("@id")); - const Str& name = parser->prev.str(); - int index = co()->add_name(name, NAME_ATTR); - emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index); - } - - // [:], [:b] - // [a], [a:], [a:b] - void exprSubscript() { - if(match(TK(":"))){ - emit(OP_LOAD_NONE); - if(match(TK("]"))){ - emit(OP_LOAD_NONE); - }else{ - EXPR_TUPLE(); - consume(TK("]")); - } - emit(OP_BUILD_SLICE); - }else{ - EXPR_TUPLE(); - if(match(TK(":"))){ - if(match(TK("]"))){ - emit(OP_LOAD_NONE); - }else{ - EXPR_TUPLE(); - consume(TK("]")); - } - emit(OP_BUILD_SLICE); - }else{ - consume(TK("]")); - } - } - - emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0)); - } - - void exprValue() { - TokenIndex op = parser->prev.type; - switch (op) { - case TK("None"): emit(OP_LOAD_NONE); break; - case TK("True"): emit(OP_LOAD_TRUE); break; - case TK("False"): emit(OP_LOAD_FALSE); break; - case TK("..."): emit(OP_LOAD_ELLIPSIS); break; - default: UNREACHABLE(); - } - } - - int emit(Opcode opcode, int arg=-1, bool keepline=false) { - int line = parser->prev.line; - co()->codes.push_back( - Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i} + ctx()->s_expr.push( + make_expr(ctx()->s_expr.popx(), prev().str()) ); - int i = co()->codes.size() - 1; - if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line; - return i; } - inline void patch_jump(int addr_index) { - int target = co()->codes.size(); - co()->codes[addr_index].arg = target; + // PASS + void exprSubscr() { + auto e = make_expr(); + e->a = ctx()->s_expr.popx(); + auto slice = make_expr(); + bool is_slice = false; + // a[<0> : state<3> : state<5>] + int state = 0; + do{ + switch(state){ + case 0: + if(match(TK(":"))){ + is_slice=true; + state=2; + break; + } + if(match(TK("]"))) SyntaxError(); + EXPR_TUPLE(); + slice->start = ctx()->s_expr.popx(); + state=1; + break; + case 1: + if(match(TK(":"))){ + is_slice=true; + state=2; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + SyntaxError("expected ':' or ']'"); + break; + case 2: + if(match(TK(":"))){ + state=4; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + EXPR_TUPLE(); + slice->stop = ctx()->s_expr.popx(); + state=3; + break; + case 3: + if(match(TK(":"))){ + state=4; + break; + } + if(match(TK("]"))) goto __SUBSCR_END; + SyntaxError("expected ':' or ']'"); + break; + case 4: + if(match(TK("]"))) goto __SUBSCR_END; + EXPR_TUPLE(); + slice->step = ctx()->s_expr.popx(); + state=5; + break; + case 5: consume(TK("]")); goto __SUBSCR_END; + } + }while(true); +__SUBSCR_END: + if(is_slice){ + e->b = std::move(slice); + }else{ + if(state != 1) UNREACHABLE(); + e->b = std::move(slice->start); + } + ctx()->s_expr.push(std::move(e)); } - void compile_block_body(CompilerAction action=nullptr) { - if(action == nullptr) action = &Compiler::compile_stmt; + // PASS + void exprLiteral0() { + ctx()->s_expr.push(make_expr(prev().type)); + } + + void compile_block_body() { consume(TK(":")); - if(peek()!=TK("@eol") && peek()!=TK("@eof")){ - (this->*action)(); // inline block + if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){ + compile_stmt(); // inline block return; } if(!match_newlines(mode()==REPL_MODE)){ SyntaxError("expected a new line after ':'"); } consume(TK("@indent")); - while (peek() != TK("@dedent")) { + while (curr().type != TK("@dedent")) { match_newlines(); - (this->*action)(); + compile_stmt(); match_newlines(); } consume(TK("@dedent")); } - Token _compile_import() { + Str _compile_import() { consume(TK("@id")); - Token tkmodule = parser->prev; - int index = co()->add_name(tkmodule.str(), NAME_SPECIAL); - emit(OP_IMPORT_NAME, index); - return tkmodule; + Str name = prev().str(); + int index = ctx()->add_name(name); + ctx()->emit(OP_IMPORT_NAME, index, prev().line); + return name; } // import a as b void compile_normal_import() { do { - Token tkmodule = _compile_import(); + Str name = _compile_import(); if (match(TK("as"))) { consume(TK("@id")); - tkmodule = parser->prev; + name = prev().str(); } - int index = co()->add_name(tkmodule.str(), name_scope()); - emit(OP_STORE_NAME, index); + int index = ctx()->add_name(name); + auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL; + ctx()->emit(op, index, prev().line); } while (match(TK(","))); consume_end_stmt(); } // from a import b as c, d as e void compile_from_import() { - Token tkmodule = _compile_import(); + _compile_import(); consume(TK("import")); if (match(TK("*"))) { - if(name_scope() != NAME_GLOBAL) SyntaxError("import * can only be used in global scope"); - emit(OP_STORE_ALL_NAMES); + if(name_scope() != NAME_GLOBAL) SyntaxError("import * should be used in global scope"); + ctx()->emit(OP_IMPORT_STAR, BC_NOARG, prev().line); consume_end_stmt(); return; } do { - emit(OP_DUP_TOP_VALUE); + ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); consume(TK("@id")); - Token tkname = parser->prev; - int index = co()->add_name(tkname.str(), NAME_ATTR); - emit(OP_BUILD_ATTR, index); + Str name = prev().str(); + int index = ctx()->add_name(name); + ctx()->emit(OP_LOAD_ATTR, index, prev().line); if (match(TK("as"))) { consume(TK("@id")); - tkname = parser->prev; + name = prev().str(); } - index = co()->add_name(tkname.str(), name_scope()); - emit(OP_STORE_NAME, index); + index = ctx()->add_name(name); + auto op = name_scope()==NAME_LOCAL ? OP_STORE_LOCAL : OP_STORE_GLOBAL; + ctx()->emit(op, index, prev().line); } while (match(TK(","))); - emit(OP_POP_TOP); + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); consume_end_stmt(); } - void parse_expression(Precedence precedence) { - lex_token(); - GrammarFn prefix = rules[parser->prev.type].prefix; - if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type)); + void parse_expression(int precedence, bool push_stack=true) { + advance(); + PrattCallback prefix = rules[prev().type].prefix; + if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type)); (this->*prefix)(); - bool meet_assign_token = false; - while (rules[peek()].precedence >= precedence) { - lex_token(); - TokenIndex op = parser->prev.type; - if (op == TK("=")){ - if(meet_assign_token) SyntaxError(); - meet_assign_token = true; - } - GrammarFn infix = rules[op].infix; + while (rules[curr().type].precedence >= precedence) { + TokenIndex op = curr().type; + advance(); + PrattCallback infix = rules[op].infix; if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true"); (this->*infix)(); } + if(!push_stack) ctx()->emit_expr(); } + // PASS void compile_if_stmt() { - match_newlines(); - co()->_rvalue += 1; - EXPR_TUPLE(); // condition - co()->_rvalue -= 1; - int ifpatch = emit(OP_POP_JUMP_IF_FALSE); + EXPR(false); // condition + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); - if (match(TK("elif"))) { - int exit_jump = emit(OP_JUMP_ABSOLUTE); - patch_jump(ifpatch); + int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(patch); compile_if_stmt(); - patch_jump(exit_jump); + ctx()->patch_jump(exit_patch); } else if (match(TK("else"))) { - int exit_jump = emit(OP_JUMP_ABSOLUTE); - patch_jump(ifpatch); + int exit_patch = ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line); + ctx()->patch_jump(patch); compile_block_body(); - patch_jump(exit_jump); + ctx()->patch_jump(exit_patch); } else { - patch_jump(ifpatch); + ctx()->patch_jump(patch); } } + // PASS void compile_while_loop() { - co()->_enter_block(WHILE_LOOP); - co()->_rvalue += 1; - EXPR_TUPLE(); // condition - co()->_rvalue -= 1; - int patch = emit(OP_POP_JUMP_IF_FALSE); + ctx()->enter_block(WHILE_LOOP); + EXPR(false); // condition + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line); compile_block_body(); - emit(OP_LOOP_CONTINUE, -1, true); - patch_jump(patch); - co()->_exit_block(); - } - - void EXPR_FOR_VARS(){ - int size = 0; - do { - consume(TK("@id")); - _exprName(true); size++; - } while (match(TK(","))); - if(size > 1) emit(OP_BUILD_TUPLE_REF, size); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx()->patch_jump(patch); + ctx()->exit_block(); } + // PASS void compile_for_loop() { - EXPR_FOR_VARS();consume(TK("in")); - co()->_rvalue += 1; EXPR_TUPLE(); co()->_rvalue -= 1; - emit(OP_GET_ITER); - co()->_enter_block(FOR_LOOP); - emit(OP_FOR_ITER); + Expr_ vars = EXPR_VARS(); + consume(TK("in")); + EXPR(false); + ctx()->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); + ctx()->enter_block(FOR_LOOP); + ctx()->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + bool ok = vars->emit_store(ctx()); + if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind compile_block_body(); - emit(OP_LOOP_CONTINUE, -1, true); - co()->_exit_block(); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx()->exit_block(); } void compile_try_except() { - co()->_enter_block(TRY_EXCEPT); - emit(OP_TRY_BLOCK_ENTER); + ctx()->enter_block(TRY_EXCEPT); + ctx()->emit(OP_TRY_BLOCK_ENTER, BC_NOARG, prev().line); compile_block_body(); - emit(OP_TRY_BLOCK_EXIT); - std::vector patches = { emit(OP_JUMP_ABSOLUTE) }; - co()->_exit_block(); - + ctx()->emit(OP_TRY_BLOCK_EXIT, BC_NOARG, BC_KEEPLINE); + std::vector patches = { + ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE) + }; + ctx()->exit_block(); do { consume(TK("except")); if(match(TK("@id"))){ - int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL); - emit(OP_EXCEPTION_MATCH, name_idx); + int namei = ctx()->add_name(prev().str()); + ctx()->emit(OP_EXCEPTION_MATCH, namei, prev().line); }else{ - emit(OP_LOAD_TRUE); + ctx()->emit(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE); } - int patch = emit(OP_POP_JUMP_IF_FALSE); - emit(OP_POP_TOP); // pop the exception on match + int patch = ctx()->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + // pop the exception on match + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); compile_block_body(); - patches.push_back(emit(OP_JUMP_ABSOLUTE)); - patch_jump(patch); - }while(peek() == TK("except")); - emit(OP_RE_RAISE); // no match, re-raise - for (int patch : patches) patch_jump(patch); + patches.push_back(ctx()->emit(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)); + ctx()->patch_jump(patch); + }while(curr().type == TK("except")); + // no match, re-raise + ctx()->emit(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE); + for (int patch : patches) ctx()->patch_jump(patch); + } + + void compile_decorated(){ + std::vector decorators; + do{ + EXPR(); + decorators.push_back(ctx()->s_expr.popx()); + if(!match_newlines_repl()) SyntaxError(); + }while(match(TK("@"))); + consume(TK("def")); + compile_function(decorators); + } + + bool try_compile_assignment(){ + Expr* lhs_p = ctx()->s_expr.top().get(); + bool inplace; + switch (curr().type) { + case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="): + case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): { + if(ctx()->is_compiling_class) SyntaxError(); + inplace = true; + advance(); + auto e = make_expr(); + e->op = prev().type - 1; // -1 to remove = + e->lhs = ctx()->s_expr.popx(); + EXPR_TUPLE(); + e->rhs = ctx()->s_expr.popx(); + ctx()->s_expr.push(std::move(e)); + } break; + case TK("="): + inplace = false; + advance(); + EXPR_TUPLE(); + break; + default: return false; + } + // std::cout << ctx()->_log_s_expr() << std::endl; + Expr_ rhs = ctx()->s_expr.popx(); + + if(lhs_p->is_starred() || rhs->is_starred()){ + SyntaxError("can't use starred expression here"); + } + + rhs->emit(ctx()); + bool ok = lhs_p->emit_store(ctx()); + if(!ok) SyntaxError(); + if(!inplace) ctx()->s_expr.pop(); + return true; } void compile_stmt() { - if (match(TK("break"))) { - if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop"); - consume_end_stmt(); - emit(OP_LOOP_BREAK); - } else if (match(TK("continue"))) { - if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); - consume_end_stmt(); - emit(OP_LOOP_CONTINUE); - } else if (match(TK("yield"))) { - if (codes.size() == 1) SyntaxError("'yield' outside function"); - co()->_rvalue += 1; - EXPR_TUPLE(); - co()->_rvalue -= 1; - consume_end_stmt(); - co()->is_generator = true; - emit(OP_YIELD_VALUE, -1, true); - } else if (match(TK("return"))) { - if (codes.size() == 1) SyntaxError("'return' outside function"); - if(match_end_stmt()){ - emit(OP_LOAD_NONE); - }else{ - co()->_rvalue += 1; - EXPR_TUPLE(); // return value - co()->_rvalue -= 1; + advance(); + int kw_line = prev().line; // backup line number + switch(prev().type){ + case TK("break"): + if (!ctx()->is_curr_block_loop()) SyntaxError("'break' outside loop"); + ctx()->emit(OP_LOOP_BREAK, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("continue"): + if (!ctx()->is_curr_block_loop()) SyntaxError("'continue' not properly in loop"); + ctx()->emit(OP_LOOP_CONTINUE, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("yield"): + if (contexts.size() <= 1) SyntaxError("'yield' outside function"); + EXPR_TUPLE(false); + // if yield present, mark the function as generator + ctx()->co->is_generator = true; + ctx()->emit(OP_YIELD_VALUE, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("return"): + if (contexts.size() <= 1) SyntaxError("'return' outside function"); + if(match_end_stmt()){ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, kw_line); + }else{ + EXPR_TUPLE(false); + consume_end_stmt(); + } + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, kw_line); + break; + /*************************************************/ + case TK("if"): compile_if_stmt(); break; + case TK("while"): compile_while_loop(); break; + case TK("for"): compile_for_loop(); break; + case TK("import"): compile_normal_import(); break; + case TK("from"): compile_from_import(); break; + case TK("def"): compile_function(); break; + case TK("@"): compile_decorated(); break; + case TK("try"): compile_try_except(); break; + case TK("pass"): consume_end_stmt(); break; + /*************************************************/ + case TK("assert"): + EXPR_TUPLE(false); + ctx()->emit(OP_ASSERT, BC_NOARG, kw_line); + consume_end_stmt(); + break; + case TK("global"): + do { + consume(TK("@id")); + ctx()->co->global_names.insert(prev().str()); + } while (match(TK(","))); + consume_end_stmt(); + break; + case TK("raise"): { + consume(TK("@id")); + int dummy_t = ctx()->add_name(prev().str()); + if(match(TK("(")) && !match(TK(")"))){ + EXPR(false); consume(TK(")")); + }else{ + ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); + } + ctx()->emit(OP_RAISE, dummy_t, kw_line); + consume_end_stmt(); + } break; + case TK("del"): { + EXPR_TUPLE(); + Expr_ e = ctx()->s_expr.popx(); + bool ok = e->emit_del(ctx()); + if(!ok) SyntaxError(); + consume_end_stmt(); + } break; + case TK("with"): { + // TODO: reimpl this + EXPR(false); + ctx()->emit(OP_POP_TOP, BC_NOARG, prev().line); + consume(TK("as")); + consume(TK("@id")); + // int index = ctx()->add_name(prev().str()); + // emit(OP_STORE_NAME, index); + // emit(OP_LOAD_NAME_REF, index); + // emit(OP_WITH_ENTER); + compile_block_body(); + // emit(OP_LOAD_NAME_REF, index); + // emit(OP_WITH_EXIT); + } break; + /*************************************************/ + // TODO: refactor goto/label use special $ syntax + case TK("label"): { + if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); + consume(TK(".")); consume(TK("@id")); + bool ok = ctx()->add_label(prev().str()); + if(!ok) SyntaxError("label " + prev().str().escape() + " already exists"); + consume_end_stmt(); + } break; + case TK("goto"): + if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); + consume(TK(".")); consume(TK("@id")); + ctx()->emit(OP_GOTO, ctx()->add_name(prev().str()), prev().line); + consume_end_stmt(); + break; + /*************************************************/ + // handle dangling expression or assignment + default: { + advance(-1); // do revert since we have pre-called advance() at the beginning + EXPR_TUPLE(); + if(!try_compile_assignment()){ + ctx()->emit_expr(); + if(mode()==REPL_MODE && name_scope()==NAME_GLOBAL){ + ctx()->emit(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); + }else{ + ctx()->emit(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); + } + } consume_end_stmt(); } - emit(OP_RETURN_VALUE, -1, true); - } else if (match(TK("if"))) { - compile_if_stmt(); - } else if (match(TK("while"))) { - compile_while_loop(); - } else if (match(TK("for"))) { - compile_for_loop(); - } else if (match(TK("import"))){ - compile_normal_import(); - } else if (match(TK("from"))){ - compile_from_import(); - } else if (match(TK("def"))){ - compile_function(); - } else if (match(TK("@"))){ - EXPR(); - if(!match_newlines(mode()==REPL_MODE)){ - SyntaxError("expected a new line after '@'"); - } - emit(OP_SETUP_DECORATOR); - consume(TK("def")); - compile_function(); - } else if (match(TK("try"))) { - compile_try_except(); - } else if(match(TK("assert"))) { - co()->_rvalue += 1; - EXPR(); - if (match(TK(","))) EXPR(); - else emit(OP_LOAD_CONST, co()->add_const(VAR(""))); - co()->_rvalue -= 1; - emit(OP_ASSERT); - consume_end_stmt(); - } else if(match(TK("with"))){ - EXPR(); - consume(TK("as")); - consume(TK("@id")); - Token tkname = parser->prev; - int index = co()->add_name(tkname.str(), name_scope()); - emit(OP_STORE_NAME, index); - emit(OP_LOAD_NAME_REF, index); - emit(OP_WITH_ENTER); - compile_block_body(); - emit(OP_LOAD_NAME_REF, index); - emit(OP_WITH_EXIT); - } else if(match(TK("label"))){ - if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); - consume(TK(".")); consume(TK("@id")); - Str label = parser->prev.str(); - bool ok = co()->add_label(label); - if(!ok) SyntaxError("label '" + label + "' already exists"); - consume_end_stmt(); - } else if(match(TK("goto"))){ // https://entrian.com/goto/ - if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); - consume(TK(".")); consume(TK("@id")); - emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL)); - consume_end_stmt(); - } else if(match(TK("raise"))){ - consume(TK("@id")); - int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL); - if(match(TK("(")) && !match(TK(")"))){ - EXPR(); consume(TK(")")); - }else{ - emit(OP_LOAD_NONE); - } - emit(OP_RAISE, dummy_t); - consume_end_stmt(); - } else if(match(TK("del"))){ - EXPR_TUPLE(); - emit(OP_DELETE_REF); - consume_end_stmt(); - } else if(match(TK("global"))){ - do { - consume(TK("@id")); - co()->global_names[parser->prev.str()] = 1; - } while (match(TK(","))); - consume_end_stmt(); - } else if(match(TK("pass"))){ - consume_end_stmt(); - } else { - int begin = co()->codes.size(); - EXPR_ANY(); - int end = co()->codes.size(); - consume_end_stmt(); - // If last op is not an assignment, pop the result. - uint8_t last_op = co()->codes.back().op; - if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF && - last_op!=OP_INPLACE_BINARY_OP && last_op!=OP_INPLACE_BITWISE_OP && - last_op!=OP_STORE_ALL_NAMES && last_op!=OP_STORE_CLASS_ATTR){ - for(int i=begin; icodes[i].op==OP_BUILD_TUPLE_REF) co()->codes[i].op = OP_BUILD_TUPLE; - } - if(mode()==REPL_MODE && name_scope() == NAME_GLOBAL) emit(OP_PRINT_EXPR, -1, true); - emit(OP_POP_TOP, -1, true); - } } } + // PASS void compile_class(){ consume(TK("@id")); - int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); - int super_cls_name_idx = -1; + int namei = ctx()->add_name(prev().str()); + int super_namei = -1; if(match(TK("(")) && match(TK("@id"))){ - super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL); + super_namei = ctx()->add_name(prev().str()); consume(TK(")")); } - if(super_cls_name_idx == -1) emit(OP_LOAD_NONE); - else emit(OP_LOAD_NAME, super_cls_name_idx); - emit(OP_BEGIN_CLASS, cls_name_idx); - co()->_is_compiling_class = true; + if(super_namei == -1) ctx()->emit(OP_LOAD_NONE, BC_NOARG, prev().line); + else ctx()->emit(OP_LOAD_GLOBAL, super_namei, prev().line); + ctx()->emit(OP_BEGIN_CLASS, namei, BC_KEEPLINE); + ctx()->is_compiling_class = true; compile_block_body(); - co()->_is_compiling_class = false; - emit(OP_END_CLASS); + ctx()->is_compiling_class = false; + ctx()->emit(OP_END_CLASS, BC_NOARG, BC_KEEPLINE); } - void _compile_f_args(Function& func, bool enable_type_hints){ + void _compile_f_args(FuncDecl_ decl, bool enable_type_hints){ int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs do { if(state == 3) SyntaxError("**kwargs should be the last argument"); @@ -1045,134 +834,139 @@ private: } consume(TK("@id")); - const Str& name = parser->prev.str(); - if(func.has_name(name)) SyntaxError("duplicate argument name"); + const Str& name = prev().str(); + if(decl->has_name(name)) SyntaxError("duplicate argument name"); // eat type hints if(enable_type_hints && match(TK(":"))) consume(TK("@id")); - if(state == 0 && peek() == TK("=")) state = 2; - + if(state == 0 && curr().type == TK("=")) state = 2; switch (state) { - case 0: func.args.push_back(name); break; - case 1: func.starred_arg = name; state+=1; break; + case 0: decl->args.push_back(name); break; + case 1: decl->starred_arg = name; state+=1; break; case 2: { consume(TK("=")); - PyVarOrNull value = read_literal(); + PyObject* value = read_literal(); if(value == nullptr){ - SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type)); + SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type)); } - func.kwargs.set(name, value); - func.kwargs_order.push_back(name); + decl->kwargs.set(name, value); + decl->kwargs_order.push_back(name); } break; case 3: SyntaxError("**kwargs is not supported yet"); break; } } while (match(TK(","))); } - void compile_function(){ - bool has_decorator = !co()->codes.empty() && co()->codes.back().op == OP_SETUP_DECORATOR; - Function func; + void compile_function(const std::vector& decorators={}){ + FuncDecl_ decl = make_sp(); StrName obj_name; consume(TK("@id")); - func.name = parser->prev.str(); - if(!co()->_is_compiling_class && match(TK("::"))){ + decl->name = prev().str(); + if(!ctx()->is_compiling_class && match(TK("::"))){ consume(TK("@id")); - obj_name = func.name; - func.name = parser->prev.str(); + obj_name = decl->name; + decl->name = prev().str(); } consume(TK("(")); if (!match(TK(")"))) { - _compile_f_args(func, true); + _compile_f_args(decl, true); consume(TK(")")); } if(match(TK("->"))){ if(!match(TK("None"))) consume(TK("@id")); } - func.code = make_sp(parser->src, func.name.str()); - this->codes.push(func.code); + decl->code = push_context(lexer->src, decl->name.sv()); compile_block_body(); - func.code->optimize(vm); - this->codes.pop(); - emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func))); - if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE); - if(!co()->_is_compiling_class){ + pop_context(); + ctx()->emit(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line); + // add decorators + for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){ + (*it)->emit(ctx()); + ctx()->emit(OP_ROT_TWO, BC_NOARG, (*it)->line); + ctx()->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_CALL, 1, (*it)->line); + } + if(!ctx()->is_compiling_class){ if(obj_name.empty()){ - if(has_decorator) emit(OP_CALL, 1); - emit(OP_STORE_NAME, co()->add_name(func.name, name_scope())); + auto e = make_expr(decl->name, name_scope()); + e->emit_store(ctx()); } else { - if(has_decorator) SyntaxError("decorator is not supported here"); - emit(OP_LOAD_NAME, co()->add_name(obj_name, name_scope())); - int index = co()->add_name(func.name, NAME_ATTR); - emit(OP_BUILD_ATTR_REF, index); - emit(OP_ROT_TWO); - emit(OP_STORE_REF); + ctx()->emit(OP_LOAD_GLOBAL, ctx()->add_name(obj_name), prev().line); + int index = ctx()->add_name(decl->name); + ctx()->emit(OP_STORE_ATTR, index, prev().line); } }else{ - if(has_decorator) emit(OP_CALL, 1); - emit(OP_STORE_CLASS_ATTR, co()->add_name(func.name, name_scope())); + int index = ctx()->add_name(decl->name); + ctx()->emit(OP_STORE_CLASS_ATTR, index, prev().line); } } - PyVarOrNull read_literal(){ - if(match(TK("-"))){ - consume(TK("@num")); - PyVar val = parser->prev.value; - return vm->num_negated(val); + PyObject* read_literal(){ + advance(); + switch(prev().type){ + case TK("-"): { + consume(TK("@num")); + PyObject* val = LiteralExpr(prev().value).to_object(ctx()); + return vm->num_negated(val); + } + case TK("@num"): return LiteralExpr(prev().value).to_object(ctx()); + case TK("@str"): return LiteralExpr(prev().value).to_object(ctx()); + case TK("True"): return VAR(true); + case TK("False"): return VAR(false); + case TK("None"): return vm->None; + case TK("..."): return vm->Ellipsis; + default: break; } - if(match(TK("@num"))) return parser->prev.value; - if(match(TK("@str"))) return parser->prev.value; - if(match(TK("True"))) return VAR(true); - if(match(TK("False"))) return VAR(false); - if(match(TK("None"))) return vm->None; - if(match(TK("..."))) return vm->Ellipsis; return nullptr; } - /***** Error Reporter *****/ - void throw_err(Str type, Str msg){ - int lineno = parser->curr.line; - const char* cursor = parser->curr.start; - // if error occurs in lexing, lineno should be `parser->current_line` - if(lexing_count > 0){ - lineno = parser->current_line; - cursor = parser->curr_char; - } - if(parser->peekchar() == '\n') lineno--; - auto e = Exception("SyntaxError", msg); - e.st_push(parser->src->snapshot(lineno, cursor)); - throw e; - } - void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } - void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } - void IndentationError(Str msg){ throw_err("IndentationError", msg); } + void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); } + void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); } + void IndentationError(Str msg){ lexer->throw_err("IndentationError", msg, curr().line, curr().start); } public: + Compiler(VM* vm, const Str& source, const Str& filename, CompileMode mode){ + this->vm = vm; + this->used = false; + this->lexer = std::make_unique( + make_sp(source, filename, mode) + ); + // TODO: check if already initialized + init_pratt_rules(); + } + CodeObject_ compile(){ - // can only be called once if(used) UNREACHABLE(); used = true; - CodeObject_ code = make_sp(parser->src, Str("")); - codes.push(code); + tokens = lexer->run(); + // if(lexer->src->filename == ""){ + // for(auto& t: tokens) std::cout << t.info() << std::endl; + // } - lex_token(); lex_token(); - match_newlines(); + CodeObject_ code = push_context(lexer->src, lexer->src->filename); + + advance(); // skip @sof, so prev() is always valid + match_newlines(); // skip possible leading '\n' if(mode()==EVAL_MODE) { - EXPR_TUPLE(); + EXPR_TUPLE(false); consume(TK("@eof")); - code->optimize(vm); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + pop_context(); return code; }else if(mode()==JSON_MODE){ - PyVarOrNull value = read_literal(); - if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value)); - else if(match(TK("{"))) exprMap(); - else if(match(TK("["))) exprList(); - else SyntaxError("expect a JSON object or array"); + EXPR(); + Expr_ e = ctx()->s_expr.popx(); + if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array"); consume(TK("@eof")); - return code; // no need to optimize for JSON decoding + e->emit(ctx()); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + pop_context(); + return code; } while (!match(TK("@eof"))) { @@ -1183,7 +977,7 @@ public: } match_newlines(); } - code->optimize(vm); + pop_context(); return code; } }; diff --git a/src/error.h b/src/error.h index 945e929c..bd7182cd 100644 --- a/src/error.h +++ b/src/error.h @@ -1,6 +1,7 @@ #pragma once #include "namedict.h" +#include "str.h" #include "tuplelist.h" namespace pkpy{ @@ -22,7 +23,7 @@ enum CompileMode { }; struct SourceData { - const char* source; + std::string source; Str filename; std::vector line_starts; CompileMode mode; @@ -37,25 +38,32 @@ struct SourceData { return {_start, i}; } - SourceData(const char* source, Str filename, CompileMode mode) { - source = strdup(source); + SourceData(const Str& source, const Str& filename, CompileMode mode) { + int index = 0; // Skip utf8 BOM if there is any. - if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; + if (strncmp(source.begin(), "\xEF\xBB\xBF", 3) == 0) index += 3; + // Remove all '\r' + std::stringstream ss; + while(index < source.length()){ + if(source[index] != '\r') ss << source[index]; + index++; + } + this->filename = filename; - this->source = source; - line_starts.push_back(source); + this->source = ss.str(); + line_starts.push_back(this->source.c_str()); this->mode = mode; } Str snapshot(int lineno, const char* cursor=nullptr){ - StrStream ss; + std::stringstream ss; ss << " " << "File \"" << filename << "\", line " << lineno << '\n'; std::pair pair = get_line(lineno); Str line = ""; int removed_spaces = 0; if(pair.first && pair.second){ line = Str(pair.first, pair.second-pair.first).lstrip(); - removed_spaces = pair.second - pair.first - line.size(); + removed_spaces = pair.second - pair.first - line.length(); if(line.empty()) line = ""; } ss << " " << line; @@ -65,14 +73,13 @@ struct SourceData { } return ss.str(); } - - ~SourceData() { free((void*)source); } }; class Exception { + using StackTrace = stack; StrName type; Str msg; - std::stack stacktrace; + StackTrace stacktrace; public: Exception(StrName type, Str msg): type(type), msg(msg) {} bool match_type(StrName type) const { return this->type == type;} @@ -84,12 +91,12 @@ public: } Str summary() const { - std::stack st(stacktrace); - StrStream ss; + StackTrace st(stacktrace); + std::stringstream ss; if(is_re) ss << "Traceback (most recent call last):\n"; while(!st.empty()) { ss << st.top() << '\n'; st.pop(); } - if (!msg.empty()) ss << type.str() << ": " << msg; - else ss << type.str(); + if (!msg.empty()) ss << type.sv() << ": " << msg; + else ss << type.sv(); return ss.str(); } }; diff --git a/src/expr.h b/src/expr.h new file mode 100644 index 00000000..6a337824 --- /dev/null +++ b/src/expr.h @@ -0,0 +1,701 @@ +#pragma once + +#include "codeobject.h" +#include "common.h" +#include "lexer.h" +#include "error.h" +#include "ceval.h" +#include "str.h" + +namespace pkpy{ + +struct CodeEmitContext; + +struct Expr{ + int line = 0; + virtual ~Expr() = default; + virtual void emit(CodeEmitContext* ctx) = 0; + virtual std::string str() const = 0; + + virtual bool is_starred() const { return false; } + virtual bool is_literal() const { return false; } + virtual bool is_json_object() const { return false; } + virtual bool is_attrib() const { return false; } + + // for OP_DELETE_XXX + [[nodiscard]] virtual bool emit_del(CodeEmitContext* ctx) { return false; } + + // for OP_STORE_XXX + [[nodiscard]] virtual bool emit_store(CodeEmitContext* ctx) { return false; } +}; + +struct CodeEmitContext{ + VM* vm; + CodeObject_ co; + stack s_expr; + CodeEmitContext(VM* vm, CodeObject_ co): vm(vm), co(co) {} + + int curr_block_i = 0; + bool is_compiling_class = false; + + bool is_curr_block_loop() const { + return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP; + } + + void enter_block(CodeBlockType type){ + co->blocks.push_back(CodeBlock{ + type, curr_block_i, (int)co->codes.size() + }); + curr_block_i = co->blocks.size()-1; + } + + void exit_block(){ + co->blocks[curr_block_i].end = co->codes.size(); + curr_block_i = co->blocks[curr_block_i].parent; + if(curr_block_i < 0) UNREACHABLE(); + } + + // clear the expression stack and generate bytecode + void emit_expr(){ + if(s_expr.size() != 1){ + throw std::runtime_error("s_expr.size() != 1\n" + _log_s_expr()); + } + Expr_ expr = s_expr.popx(); + expr->emit(this); + } + + std::string _log_s_expr(){ + std::stringstream ss; + for(auto& e: s_expr.data()) ss << e->str() << " "; + return ss.str(); + } + + int emit(Opcode opcode, int arg, int line) { + co->codes.push_back( + Bytecode{(uint16_t)opcode, (uint16_t)curr_block_i, arg, line} + ); + int i = co->codes.size() - 1; + if(line==BC_KEEPLINE){ + if(i>=1) co->codes[i].line = co->codes[i-1].line; + else co->codes[i].line = 1; + } + return i; + } + + void patch_jump(int index) { + int target = co->codes.size(); + co->codes[index].arg = target; + } + + bool add_label(StrName label){ + if(co->labels.count(label)) return false; + co->labels[label] = co->codes.size(); + return true; + } + + int add_name(StrName name){ + for(int i=0; inames.size(); i++){ + if(co->names[i] == name) return i; + } + co->names.push_back(name); + return co->names.size() - 1; + } + + int add_const(PyObject* v){ + co->consts.push_back(v); + return co->consts.size() - 1; + } + + int add_func_decl(FuncDecl_ decl){ + co->func_decls.push_back(decl); + return co->func_decls.size() - 1; + } +}; + +// PASS +struct NameExpr: Expr{ + StrName name; + NameScope scope; + NameExpr(StrName name, NameScope scope): name(name), scope(scope) {} + + std::string str() const override { return fmt("Name(", name.escape(), ")"); } + + void emit(CodeEmitContext* ctx) override { + int index = ctx->add_name(name); + ctx->emit(OP_LOAD_NAME, index, line); + } + + bool emit_del(CodeEmitContext* ctx) override { + int index = ctx->add_name(name); + switch(scope){ + case NAME_LOCAL: + ctx->emit(OP_DELETE_LOCAL, index, line); + break; + case NAME_GLOBAL: + ctx->emit(OP_DELETE_GLOBAL, index, line); + break; + default: UNREACHABLE(); break; + } + return true; + } + + bool emit_store(CodeEmitContext* ctx) override { + int index = ctx->add_name(name); + if(ctx->is_compiling_class){ + ctx->emit(OP_STORE_CLASS_ATTR, index, line); + return true; + } + switch(scope){ + case NAME_LOCAL: + ctx->emit(OP_STORE_LOCAL, index, line); + break; + case NAME_GLOBAL: + ctx->emit(OP_STORE_GLOBAL, index, line); + break; + default: UNREACHABLE(); break; + } + return true; + } +}; + +struct StarredExpr: Expr{ + Expr_ child; + StarredExpr(Expr_&& child): child(std::move(child)) {} + std::string str() const override { return "Starred()"; } + + bool is_starred() const override { return true; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_STAR, BC_NOARG, line); + } + + bool emit_store(CodeEmitContext* ctx) override { + // simply proxy to child + return child->emit_store(ctx); + } +}; + +// PASS +struct NotExpr: Expr{ + Expr_ child; + NotExpr(Expr_&& child): child(std::move(child)) {} + std::string str() const override { return "Not()"; } + + void emit(CodeEmitContext* ctx) override { + child->emit(ctx); + ctx->emit(OP_UNARY_NOT, BC_NOARG, line); + } +}; + +// PASS +struct AndExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + std::string str() const override { return "And()"; } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line); + rhs->emit(ctx); + ctx->patch_jump(patch); + } +}; + +// PASS +struct OrExpr: Expr{ + Expr_ lhs; + Expr_ rhs; + std::string str() const override { return "Or()"; } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line); + rhs->emit(ctx); + ctx->patch_jump(patch); + } +}; + +// [None, True, False, ...] +struct Literal0Expr: Expr{ + TokenIndex token; + Literal0Expr(TokenIndex token): token(token) {} + std::string str() const override { return TK_STR(token); } + + void emit(CodeEmitContext* ctx) override { + switch (token) { + case TK("None"): ctx->emit(OP_LOAD_NONE, BC_NOARG, line); break; + case TK("True"): ctx->emit(OP_LOAD_TRUE, BC_NOARG, line); break; + case TK("False"): ctx->emit(OP_LOAD_FALSE, BC_NOARG, line); break; + case TK("..."): ctx->emit(OP_LOAD_ELLIPSIS, BC_NOARG, line); break; + default: UNREACHABLE(); + } + } + + bool is_json_object() const override { return true; } +}; + +// @num, @str which needs to invoke OP_LOAD_CONST +struct LiteralExpr: Expr{ + TokenValue value; + LiteralExpr(TokenValue value): value(value) {} + std::string str() const override { + if(std::holds_alternative(value)){ + return std::to_string(std::get(value)); + } + + if(std::holds_alternative(value)){ + return std::to_string(std::get(value)); + } + + if(std::holds_alternative(value)){ + Str s = std::get(value).escape(); + return s.str(); + } + + UNREACHABLE(); + } + + PyObject* to_object(CodeEmitContext* ctx){ + VM* vm = ctx->vm; + PyObject* obj = nullptr; + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + if(std::holds_alternative(value)){ + obj = VAR(std::get(value)); + } + return obj; + } + + void emit(CodeEmitContext* ctx) override { + PyObject* obj = to_object(ctx); + if(obj == nullptr) UNREACHABLE(); + int index = ctx->add_const(obj); + ctx->emit(OP_LOAD_CONST, index, line); + } + + bool is_literal() const override { return true; } + bool is_json_object() const override { return true; } +}; + +// PASS +struct NegatedExpr: Expr{ + Expr_ child; + NegatedExpr(Expr_&& child): child(std::move(child)) {} + std::string str() const override { return "Negated()"; } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + // if child is a int of float, do constant folding + if(child->is_literal()){ + LiteralExpr* lit = static_cast(child.get()); + PyObject* obj = nullptr; + if(std::holds_alternative(lit->value)){ + obj = VAR(-std::get(lit->value)); + } + if(std::holds_alternative(lit->value)){ + obj = VAR(-std::get(lit->value)); + } + if(obj != nullptr){ + ctx->emit(OP_LOAD_CONST, ctx->add_const(obj), line); + return; + } + } + child->emit(ctx); + ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line); + } + + bool is_json_object() const override { + return child->is_literal(); + } +}; + +// PASS +struct SliceExpr: Expr{ + Expr_ start; + Expr_ stop; + Expr_ step; + std::string str() const override { return "Slice()"; } + + void emit(CodeEmitContext* ctx) override { + if(start){ + start->emit(ctx); + }else{ + ctx->emit(OP_LOAD_NONE, BC_NOARG, line); + } + + if(stop){ + stop->emit(ctx); + }else{ + ctx->emit(OP_LOAD_NONE, BC_NOARG, line); + } + + if(step){ + step->emit(ctx); + }else{ + ctx->emit(OP_LOAD_NONE, BC_NOARG, line); + } + + ctx->emit(OP_BUILD_SLICE, BC_NOARG, line); + } +}; + +struct DictItemExpr: Expr{ + Expr_ key; + Expr_ value; + std::string str() const override { return "DictItem()"; } + + void emit(CodeEmitContext* ctx) override { + value->emit(ctx); + key->emit(ctx); // reverse order + ctx->emit(OP_BUILD_TUPLE, 2, line); + } +}; + +struct SequenceExpr: Expr{ + std::vector items; + SequenceExpr(std::vector&& items): items(std::move(items)) {} + virtual Opcode opcode() const = 0; + + void emit(CodeEmitContext* ctx) override { + for(auto& item: items) item->emit(ctx); + ctx->emit(opcode(), items.size(), line); + } +}; + +struct ListExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; + std::string str() const override { return "List()"; } + Opcode opcode() const override { return OP_BUILD_LIST; } + + bool is_json_object() const override { return true; } +}; + +struct DictExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; + std::string str() const override { return "Dict()"; } + Opcode opcode() const override { return OP_BUILD_DICT; } + + bool is_json_object() const override { return true; } +}; + +struct SetExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; + std::string str() const override { return "Set()"; } + Opcode opcode() const override { return OP_BUILD_SET; } +}; + +struct TupleExpr: SequenceExpr{ + using SequenceExpr::SequenceExpr; + std::string str() const override { return "Tuple()"; } + Opcode opcode() const override { return OP_BUILD_TUPLE; } + + bool emit_store(CodeEmitContext* ctx) override { + // TOS is an iterable + // items may contain StarredExpr, we should check it + int starred_i = -1; + for(int i=0; iis_starred()) continue; + if(starred_i == -1) starred_i = i; + else return false; // multiple StarredExpr not allowed + } + + if(starred_i == -1){ + ctx->emit(OP_UNPACK_SEQUENCE, items.size(), line); + }else{ + // starred assignment target must be in a tuple + if(items.size() == 1) return false; + // starred assignment target must be the last one (differ from CPython) + if(starred_i != items.size()-1) return false; + // a,*b = [1,2,3] + // stack is [1,2,3] -> [1,[2,3]] + ctx->emit(OP_UNPACK_EX, items.size()-1, line); + } + // do reverse emit + for(int i=items.size()-1; i>=0; i--){ + bool ok = items[i]->emit_store(ctx); + if(!ok) return false; + } + return true; + } + + bool emit_del(CodeEmitContext* ctx) override{ + for(auto& e: items){ + bool ok = e->emit_del(ctx); + if(!ok) return false; + } + return true; + } +}; + +struct CompExpr: Expr{ + Expr_ expr; // loop expr + Expr_ vars; // loop vars + Expr_ iter; // loop iter + Expr_ cond; // optional if condition + + virtual Opcode op0() = 0; + virtual Opcode op1() = 0; + + void emit(CodeEmitContext* ctx){ + ctx->emit(op0(), 0, line); + iter->emit(ctx); + ctx->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE); + ctx->enter_block(FOR_LOOP); + ctx->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE); + bool ok = vars->emit_store(ctx); + // this error occurs in `vars` instead of this line, but...nevermind + if(!ok) UNREACHABLE(); // TODO: raise a SyntaxError instead + if(cond){ + cond->emit(ctx); + int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + expr->emit(ctx); + ctx->emit(op1(), BC_NOARG, BC_KEEPLINE); + ctx->patch_jump(patch); + }else{ + expr->emit(ctx); + ctx->emit(op1(), BC_NOARG, BC_KEEPLINE); + } + ctx->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE); + ctx->exit_block(); + } +}; + +struct ListCompExpr: CompExpr{ + Opcode op0() override { return OP_BUILD_LIST; } + Opcode op1() override { return OP_LIST_APPEND; } + std::string str() const override { return "ListComp()"; } +}; + +struct DictCompExpr: CompExpr{ + Opcode op0() override { return OP_BUILD_DICT; } + Opcode op1() override { return OP_DICT_ADD; } + std::string str() const override { return "DictComp()"; } +}; + +struct SetCompExpr: CompExpr{ + Opcode op0() override { return OP_BUILD_SET; } + Opcode op1() override { return OP_SET_ADD; } + std::string str() const override { return "SetComp()"; } +}; + +struct LambdaExpr: Expr{ + FuncDecl_ decl; + NameScope scope; + std::string str() const override { return "Lambda()"; } + + LambdaExpr(NameScope scope){ + this->decl = make_sp(); + this->decl->name = ""; + this->scope = scope; + } + + void emit(CodeEmitContext* ctx) override { + int index = ctx->add_func_decl(decl); + ctx->emit(OP_LOAD_FUNCTION, index, line); + } +}; + +struct FStringExpr: Expr{ + Str src; + FStringExpr(const Str& src): src(src) {} + std::string str() const override { + return fmt("f", src.escape()); + } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + static const std::regex pattern(R"(\{(.*?)\})"); + std::cregex_iterator begin(src.begin(), src.end(), pattern); + std::cregex_iterator end; + int size = 0; + int i = 0; + for(auto it = begin; it != end; it++) { + std::cmatch m = *it; + if (i < m.position()) { + Str literal = src.substr(i, m.position() - i); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); + size++; + } + ctx->emit(OP_LOAD_BUILTIN_EVAL, BC_NOARG, line); + ctx->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line); + ctx->emit(OP_CALL, 1, line); + size++; + i = (int)(m.position() + m.length()); + } + if (i < src.length()) { + Str literal = src.substr(i, src.length() - i); + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line); + size++; + } + ctx->emit(OP_BUILD_STRING, size, line); + } +}; + +struct SubscrExpr: Expr{ + Expr_ a; + Expr_ b; + std::string str() const override { return "Subscr()"; } + + void emit(CodeEmitContext* ctx) override{ + a->emit(ctx); + b->emit(ctx); + ctx->emit(OP_LOAD_SUBSCR, BC_NOARG, line); + } + + bool emit_del(CodeEmitContext* ctx) override { + a->emit(ctx); + b->emit(ctx); + ctx->emit(OP_DELETE_SUBSCR, BC_NOARG, line); + return true; + } + + bool emit_store(CodeEmitContext* ctx) override { + a->emit(ctx); + b->emit(ctx); + ctx->emit(OP_STORE_SUBSCR, BC_NOARG, line); + return true; + } +}; + +struct AttribExpr: Expr{ + Expr_ a; + Str b; + AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {} + AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {} + std::string str() const override { return "Attrib()"; } + + void emit(CodeEmitContext* ctx) override{ + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_LOAD_ATTR, index, line); + } + + bool emit_del(CodeEmitContext* ctx) override { + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_DELETE_ATTR, index, line); + return true; + } + + bool emit_store(CodeEmitContext* ctx) override { + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_STORE_ATTR, index, line); + return true; + } + + void emit_method(CodeEmitContext* ctx) { + a->emit(ctx); + int index = ctx->add_name(b); + ctx->emit(OP_LOAD_METHOD, index, line); + } + + bool is_attrib() const override { return true; } +}; + +// PASS +struct CallExpr: Expr{ + Expr_ callable; + std::vector args; + std::vector> kwargs; + std::string str() const override { return "Call()"; } + + bool need_unpack() const { + for(auto& item: args) if(item->is_starred()) return true; + return false; + } + + void emit(CodeEmitContext* ctx) override { + VM* vm = ctx->vm; + // TODO: if callable is a AttrExpr, we should try to use `fast_call` + // instead of use `boundmethod` proxy + if(callable->is_attrib()){ + auto p = static_cast(callable.get()); + p->emit_method(ctx); + }else{ + callable->emit(ctx); + ctx->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); + } + // emit args + for(auto& item: args) item->emit(ctx); + // emit kwargs + for(auto& item: kwargs){ + // TODO: optimize this + ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(item.first)), line); + item.second->emit(ctx); + } + int KWARGC = (int)kwargs.size(); + int ARGC = (int)args.size(); + if(KWARGC > 0){ + ctx->emit(need_unpack() ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC<<16)|ARGC, line); + }else{ + ctx->emit(need_unpack() ? OP_CALL_UNPACK : OP_CALL, ARGC, line); + } + } +}; + +struct BinaryExpr: Expr{ + TokenIndex op; + Expr_ lhs; + Expr_ rhs; + std::string str() const override { return TK_STR(op); } + + void emit(CodeEmitContext* ctx) override { + lhs->emit(ctx); + rhs->emit(ctx); + switch (op) { + case TK("+"): ctx->emit(OP_BINARY_OP, 0, line); break; + case TK("-"): ctx->emit(OP_BINARY_OP, 1, line); break; + case TK("*"): ctx->emit(OP_BINARY_OP, 2, line); break; + case TK("/"): ctx->emit(OP_BINARY_OP, 3, line); break; + case TK("//"): ctx->emit(OP_BINARY_OP, 4, line); break; + case TK("%"): ctx->emit(OP_BINARY_OP, 5, line); break; + case TK("**"): ctx->emit(OP_BINARY_OP, 6, line); break; + + case TK("<"): ctx->emit(OP_COMPARE_OP, 0, line); break; + case TK("<="): ctx->emit(OP_COMPARE_OP, 1, line); break; + case TK("=="): ctx->emit(OP_COMPARE_OP, 2, line); break; + case TK("!="): ctx->emit(OP_COMPARE_OP, 3, line); break; + case TK(">"): ctx->emit(OP_COMPARE_OP, 4, line); break; + case TK(">="): ctx->emit(OP_COMPARE_OP, 5, line); break; + case TK("in"): ctx->emit(OP_CONTAINS_OP, 0, line); break; + case TK("not in"): ctx->emit(OP_CONTAINS_OP, 1, line); break; + case TK("is"): ctx->emit(OP_IS_OP, 0, line); break; + case TK("is not"): ctx->emit(OP_IS_OP, 1, line); break; + + case TK("<<"): ctx->emit(OP_BITWISE_OP, 0, line); break; + case TK(">>"): ctx->emit(OP_BITWISE_OP, 1, line); break; + case TK("&"): ctx->emit(OP_BITWISE_OP, 2, line); break; + case TK("|"): ctx->emit(OP_BITWISE_OP, 3, line); break; + case TK("^"): ctx->emit(OP_BITWISE_OP, 4, line); break; + default: UNREACHABLE(); + } + } +}; + +// PASS +struct TernaryExpr: Expr{ + Expr_ cond; + Expr_ true_expr; + Expr_ false_expr; + std::string str() const override { return "Ternary()"; } + + void emit(CodeEmitContext* ctx) override { + cond->emit(ctx); + int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line); + true_expr->emit(ctx); + int patch_2 = ctx->emit(OP_JUMP_ABSOLUTE, BC_NOARG, true_expr->line); + ctx->patch_jump(patch); + false_expr->emit(ctx); + ctx->patch_jump(patch_2); + } +}; + + +} // namespace pkpy \ No newline at end of file diff --git a/src/frame.h b/src/frame.h index 4b456d33..752e7cac 100644 --- a/src/frame.h +++ b/src/frame.h @@ -1,38 +1,39 @@ #pragma once #include "codeobject.h" +#include "memory.h" +#include "vector.h" namespace pkpy{ -static THREAD_LOCAL uint64_t kFrameGlobalId = 0; +static THREAD_LOCAL i64 kFrameGlobalId = 0; + +using ValueStack = pod_vector; struct Frame { - std::vector _data; + ValueStack _data; int _ip = -1; int _next_ip = 0; const CodeObject* co; - PyVar _module; + PyObject* _module; NameDict_ _locals; NameDict_ _closure; - const uint64_t id; - std::vector>> s_try_block; + const i64 id; + std::vector> s_try_block; - inline NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); } - inline NameDict& f_globals() noexcept { return _module->attr(); } - - inline PyVar* f_closure_try_get(StrName name) noexcept { + NameDict& f_locals() noexcept { return _locals!=nullptr ? *_locals : _module->attr(); } + NameDict& f_globals() noexcept { return _module->attr(); } + PyObject* f_closure_try_get(StrName name){ if(_closure == nullptr) return nullptr; return _closure->try_get(name); } - Frame(const CodeObject_& co, - const PyVar& _module, - const NameDict_& _locals=nullptr, - const NameDict_& _closure=nullptr) - : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { } + Frame(const CodeObject_& co, PyObject* _module, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr) + : co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { + } - inline const Bytecode& next_bytecode() { + const Bytecode& next_bytecode() { _ip = _next_ip++; return co->codes[_ip]; } @@ -42,82 +43,76 @@ struct Frame { return co->src->snapshot(line); } - // Str stack_info(){ - // StrStream ss; - // ss << "["; - // for(int i=0; i<_data.size(); i++){ - // ss << OBJ_TP_NAME(_data[i]); - // if(i != _data.size()-1) ss << ", "; - // } - // ss << "]"; - // return ss.str(); - // } - - inline bool has_next_bytecode() const { - return _next_ip < co->codes.size(); + std::string stack_info(){ + std::stringstream ss; + ss << id << " ["; + for(int i=0; i<_data.size(); i++){ + ss << (i64)_data[i]; + if(i != _data.size()-1) ss << ", "; + } + ss << "]"; + return ss.str(); } - inline PyVar pop(){ -#if PK_EXTRA_CHECK - if(_data.empty()) throw std::runtime_error("_data.empty() is true"); -#endif - PyVar v = std::move(_data.back()); - _data.pop_back(); - return v; - } - - inline void _pop(){ -#if PK_EXTRA_CHECK + void pop(){ +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif _data.pop_back(); } - inline void try_deref(VM*, PyVar&); - - inline PyVar pop_value(VM* vm){ - PyVar value = pop(); - try_deref(vm, value); - return value; + PyObject* popx(){ +#if DEBUG_EXTRA_CHECK + if(_data.empty()) throw std::runtime_error("_data.empty() is true"); +#endif + PyObject* ret = _data.back(); + _data.pop_back(); + return ret; } - inline PyVar top_value(VM* vm){ - PyVar value = top(); - try_deref(vm, value); - return value; - } - - inline PyVar& top(){ -#if PK_EXTRA_CHECK + PyObject*& top(){ +#if DEBUG_EXTRA_CHECK if(_data.empty()) throw std::runtime_error("_data.empty() is true"); #endif return _data.back(); } - inline PyVar& top_1(){ -#if PK_EXTRA_CHECK + PyObject*& top_1(){ +#if DEBUG_EXTRA_CHECK if(_data.size() < 2) throw std::runtime_error("_data.size() < 2"); #endif return _data[_data.size()-2]; } - template - inline void push(T&& obj){ _data.push_back(std::forward(obj)); } + PyObject*& top_n(int n){ + n += 1; +#if DEBUG_EXTRA_CHECK + if(_data.size() < n) throw std::runtime_error("_data.size() < n"); +#endif + return _data[_data.size()-n]; + } - inline void jump_abs(int i){ _next_ip = i; } - inline void jump_rel(int i){ _next_ip += i; } + void push(PyObject* obj){ +#if DEBUG_EXTRA_CHECK + if(obj == nullptr) throw std::runtime_error("obj == nullptr"); +#endif + _data.push_back(obj); + } - inline void on_try_block_enter(){ + void jump_abs(int i){ _next_ip = i; } + void jump_rel(int i){ _next_ip += i; } + + void on_try_block_enter(){ s_try_block.emplace_back(co->codes[_ip].block, _data); } - inline void on_try_block_exit(){ + void on_try_block_exit(){ s_try_block.pop_back(); } bool jump_to_exception_handler(){ if(s_try_block.empty()) return false; - PyVar obj = pop(); + PyObject* obj = popx(); auto& p = s_try_block.back(); _data = std::move(p.second); _data.push_back(obj); @@ -127,12 +122,12 @@ struct Frame { } int _exit_block(int i){ - if(co->blocks[i].type == FOR_LOOP) _pop(); + if(co->blocks[i].type == FOR_LOOP) pop(); else if(co->blocks[i].type == TRY_EXCEPT) on_try_block_exit(); return co->blocks[i].parent; } - void jump_abs_safe(int target){ + void jump_abs_break(int target){ const Bytecode& prev = co->codes[_ip]; int i = prev.block; _next_ip = target; @@ -145,20 +140,35 @@ struct Frame { } } - Args pop_n_values_reversed(VM* vm, int n){ + Args popx_n_reversed(int n){ Args v(n); - for(int i=n-1; i>=0; i--){ - v[i] = pop(); - try_deref(vm, v[i]); - } + for(int i=n-1; i>=0; i--) v[i] = popx(); return v; } - Args pop_n_reversed(int n){ - Args v(n); - for(int i=n-1; i>=0; i--) v[i] = pop(); - return v; + void pop_n(int n){ + _data.pop_back_n(n); + } + + void _gc_mark() const { + for(PyObject* obj : _data) OBJ_MARK(obj); + OBJ_MARK(_module); + if(_locals != nullptr) _locals->_gc_mark(); + if(_closure != nullptr) _closure->_gc_mark(); + for(auto& p : s_try_block){ + for(PyObject* obj : p.second) OBJ_MARK(obj); + } + co->_gc_mark(); } }; + +struct FrameDeleter{ + void operator()(Frame* frame) const { + frame->~Frame(); + pool128.dealloc(frame); + } +}; +using Frame_ = std::unique_ptr; + }; // namespace pkpy \ No newline at end of file diff --git a/src/gc.h b/src/gc.h new file mode 100644 index 00000000..752a18cb --- /dev/null +++ b/src/gc.h @@ -0,0 +1,151 @@ +#pragma once + +#include "common.h" +#include "memory.h" +#include "obj.h" +#include "codeobject.h" +#include "namedict.h" + +namespace pkpy { +struct ManagedHeap{ + std::vector _no_gc; + std::vector gen; + VM* vm; + ManagedHeap(VM* vm): vm(vm) {} + + static const int kMinGCThreshold = 3072; + int gc_threshold = kMinGCThreshold; + int gc_counter = 0; + + /********************/ + int _gc_lock_counter = 0; + struct ScopeLock{ + ManagedHeap* heap; + ScopeLock(ManagedHeap* heap): heap(heap){ + heap->_gc_lock_counter++; + } + ~ScopeLock(){ + heap->_gc_lock_counter--; + } + }; + + ScopeLock gc_scope_lock(){ + return ScopeLock(this); + } + /********************/ + + template + PyObject* gcnew(Type type, T&& val){ + using __T = Py_>; + PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward(val)); + gen.push_back(obj); + gc_counter++; + return obj; + } + + template + PyObject* _new(Type type, T&& val){ + using __T = Py_>; + PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward(val)); + obj->gc.enabled = false; + _no_gc.push_back(obj); + return obj; + } + +#if DEBUG_GC_STATS + inline static std::map deleted; +#endif + + ~ManagedHeap(){ + for(PyObject* obj: _no_gc) obj->~PyObject(), pool64.dealloc(obj); +#if DEBUG_GC_STATS + for(auto& [type, count]: deleted){ + std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl; + } +#endif + } + + int sweep(){ + std::vector alive; + for(PyObject* obj: gen){ + if(obj->gc.marked){ + obj->gc.marked = false; + alive.push_back(obj); + }else{ +#if DEBUG_GC_STATS + deleted[obj->type] += 1; +#endif + obj->~PyObject(), pool64.dealloc(obj); + } + } + + // clear _no_gc marked flag + for(PyObject* obj: _no_gc) obj->gc.marked = false; + + int freed = gen.size() - alive.size(); + // std::cout << "GC: " << alive.size() << "/" << gen.size() << " (" << freed << " freed)" << std::endl; + gen.clear(); + gen.swap(alive); + return freed; + } + + void _auto_collect(){ + if(_gc_lock_counter > 0) return; + if(gc_counter < gc_threshold) return; + gc_counter = 0; + collect(); + gc_threshold = gen.size() * 2; + if(gc_threshold < kMinGCThreshold) gc_threshold = kMinGCThreshold; + } + + int collect(){ + if(_gc_lock_counter > 0) UNREACHABLE(); + mark(); + int freed = sweep(); + return freed; + } + + void mark(); +}; + +inline void NameDict::_gc_mark() const{ + for(uint16_t i=0; i<_capacity; i++){ + if(_items[i].first.empty()) continue; + OBJ_MARK(_items[i].second); + } +} + +inline void FuncDecl::_gc_mark() const{ + code->_gc_mark(); + kwargs._gc_mark(); +} + +template<> inline void _gc_mark(List& t){ + for(PyObject* obj: t) OBJ_MARK(obj); +} + +template<> inline void _gc_mark(Tuple& t){ + for(int i=0; i inline void _gc_mark(Function& t){ + t.decl->_gc_mark(); + if(t._module != nullptr) OBJ_MARK(t._module); + if(t._closure != nullptr) t._closure->_gc_mark(); +} + +template<> inline void _gc_mark(BoundMethod& t){ + OBJ_MARK(t.obj); + OBJ_MARK(t.method); +} + +template<> inline void _gc_mark(StarWrapper& t){ + OBJ_MARK(t.obj); +} + +template<> inline void _gc_mark(Super& t){ + OBJ_MARK(t.first); +} +// NOTE: std::function may capture some PyObject*, they can not be marked + +} // namespace pkpy \ No newline at end of file diff --git a/src/io.h b/src/io.h index 549a9c6c..a5fbb614 100644 --- a/src/io.h +++ b/src/io.h @@ -10,7 +10,7 @@ namespace pkpy{ -Str _read_file_cwd(const Str& name, bool* ok){ +inline Str _read_file_cwd(const Str& name, bool* ok){ std::filesystem::path path(name.c_str()); bool exists = std::filesystem::exists(path); if(!exists){ @@ -42,7 +42,7 @@ struct FileIO { if(!_fs.is_open()) vm->IOError(strerror(errno)); } - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<2>(type, "__new__", [](VM* vm, Args& args){ return VAR_T(FileIO, vm, CAST(Str, args[0]), CAST(Str, args[1]) @@ -78,16 +78,16 @@ struct FileIO { } }; -void add_module_io(VM* vm){ - PyVar mod = vm->new_module("io"); - PyVar type = FileIO::register_class(vm, mod); +inline void add_module_io(VM* vm){ + PyObject* mod = vm->new_module("io"); + PyObject* type = FileIO::register_class(vm, mod); vm->bind_builtin_func<2>("open", [type](VM* vm, const Args& args){ return vm->call(type, args); }); } -void add_module_os(VM* vm){ - PyVar mod = vm->new_module("os"); +inline void add_module_os(VM* vm){ + PyObject* mod = vm->new_module("os"); // Working directory is shared by all VMs!! vm->bind_func<0>(mod, "getcwd", [](VM* vm, const Args& args){ return VAR(std::filesystem::current_path().string()); @@ -157,10 +157,10 @@ void add_module_os(VM* vm){ #else namespace pkpy{ -void add_module_io(VM* vm){} -void add_module_os(VM* vm){} +inline void add_module_io(VM* vm){} +inline void add_module_os(VM* vm){} -Str _read_file_cwd(const Str& name, bool* ok){ +inline Str _read_file_cwd(const Str& name, bool* ok){ *ok = false; return Str(); } diff --git a/src/iter.h b/src/iter.h index 293602b4..bd048c05 100644 --- a/src/iter.h +++ b/src/iter.h @@ -6,18 +6,18 @@ namespace pkpy{ class RangeIter : public BaseIter { i64 current; - Range r; + Range r; // copy by value, so we don't need to keep ref public: - RangeIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { - this->r = OBJ_GET(Range, _ref); + RangeIter(VM* vm, PyObject* ref) : BaseIter(vm) { + this->r = OBJ_GET(Range, ref); this->current = r.start; } - inline bool _has_next(){ + bool _has_next(){ return r.step > 0 ? current < r.stop : current > r.stop; } - PyVar next(){ + PyObject* next(){ if(!_has_next()) return nullptr; current += r.step; return VAR(current-r.step); @@ -26,43 +26,65 @@ public: template class ArrayIter : public BaseIter { - size_t index = 0; - const T* p; + PyObject* ref; + int index; public: - ArrayIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { p = &OBJ_GET(T, _ref);} - PyVar next(){ + ArrayIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {} + + PyObject* next() override{ + const T* p = &OBJ_GET(T, ref); if(index == p->size()) return nullptr; return p->operator[](index++); } + + void _gc_mark() const override { + OBJ_MARK(ref); + } }; class StringIter : public BaseIter { - int index = 0; - Str* str; + PyObject* ref; + int index; public: - StringIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { - str = &OBJ_GET(Str, _ref); - } + StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {} - PyVar next() { + PyObject* next() override{ + // TODO: optimize this to use iterator + // operator[] is O(n) complexity + Str* str = &OBJ_GET(Str, ref); if(index == str->u8_length()) return nullptr; return VAR(str->u8_getitem(index++)); } + + void _gc_mark() const override { + OBJ_MARK(ref); + } }; -PyVar Generator::next(){ +inline PyObject* Generator::next(){ if(state == 2) return nullptr; vm->callstack.push(std::move(frame)); - PyVar ret = vm->_exec(); + PyObject* ret = vm->_exec(); if(ret == vm->_py_op_yield){ frame = std::move(vm->callstack.top()); vm->callstack.pop(); state = 1; - return frame->pop_value(vm); + return frame->popx(); }else{ state = 2; return nullptr; } } +inline void Generator::_gc_mark() const{ + if(frame != nullptr) frame->_gc_mark(); +} + +template +void _gc_mark(T& t) { + if constexpr(std::is_base_of_v){ + t._gc_mark(); + } +} + } // namespace pkpy \ No newline at end of file diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 00000000..88698729 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,504 @@ +#pragma once + +#include "common.h" +#include "error.h" +#include "str.h" + +namespace pkpy{ + +typedef uint8_t TokenIndex; + +constexpr const char* kTokens[] = { + "is not", "not in", + "@eof", "@eol", "@sof", + "@id", "@num", "@str", "@fstr", + "@indent", "@dedent", + /*****************************************/ + "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed + "*", "*=", "/", "/=", "//", "//=", "%", "%=", + "&", "&=", "|", "|=", "^", "^=", + "<<", "<<=", ">>", ">>=", + /*****************************************/ + ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "::", + "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=", + /** KW_BEGIN **/ + "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", + "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", + "goto", "label", // extended keywords, not available in cpython + "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise" +}; + +using TokenValue = std::variant; +const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); + +constexpr TokenIndex TK(const char token[]) { + for(int k=0; k kTokenKwMap = [](){ + std::map map; + for(int k=TK("class"); k <= >= + PREC_BITWISE_OR, // | + PREC_BITWISE_XOR, // ^ + PREC_BITWISE_AND, // & + PREC_BITWISE_SHIFT, // << >> + PREC_TERM, // + - + PREC_FACTOR, // * / % // + PREC_UNARY, // - not + PREC_EXPONENT, // ** + PREC_CALL, // () + PREC_SUBSCRIPT, // [] + PREC_ATTRIB, // .index + PREC_PRIMARY, +}; + +enum StringType { NORMAL_STRING, RAW_STRING, F_STRING }; + +struct Lexer { + shared_ptr src; + const char* token_start; + const char* curr_char; + int current_line = 1; + std::vector nexts; + stack indents; + int brackets_level = 0; + bool used = false; + + char peekchar() const{ return *curr_char; } + + bool match_n_chars(int n, char c0){ + const char* c = curr_char; + for(int i=0; i 0) return true; + int spaces = eat_spaces(); + if(peekchar() == '#') skip_line_comment(); + if(peekchar() == '\0' || peekchar() == '\n') return true; + // https://docs.python.org/3/reference/lexical_analysis.html#indentation + if(spaces > indents.top()){ + indents.push(spaces); + nexts.push_back(Token{TK("@indent"), token_start, 0, current_line}); + } else if(spaces < indents.top()){ + while(spaces < indents.top()){ + indents.pop(); + nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line}); + } + if(spaces != indents.top()){ + return false; + } + } + return true; + } + + char eatchar() { + char c = peekchar(); + if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); + curr_char++; + return c; + } + + char eatchar_include_newline() { + char c = peekchar(); + curr_char++; + if (c == '\n'){ + current_line++; + src->line_starts.push_back(curr_char); + } + return c; + } + + int eat_name() { + curr_char--; + while(true){ + unsigned char c = peekchar(); + int u8bytes = utf8len(c, true); + if(u8bytes == 0) return 1; + if(u8bytes == 1){ + if(isalpha(c) || c=='_' || isdigit(c)) { + curr_char++; + continue; + }else{ + break; + } + } + // handle multibyte char + std::string u8str(curr_char, u8bytes); + if(u8str.size() != u8bytes) return 2; + uint32_t value = 0; + for(int k=0; k < u8bytes; k++){ + uint8_t b = u8str[k]; + if(k==0){ + if(u8bytes == 2) value = (b & 0b00011111) << 6; + else if(u8bytes == 3) value = (b & 0b00001111) << 12; + else if(u8bytes == 4) value = (b & 0b00000111) << 18; + }else{ + value |= (b & 0b00111111) << (6*(u8bytes-k-1)); + } + } + if(is_unicode_Lo_char(value)) curr_char += u8bytes; + else break; + } + + int length = (int)(curr_char - token_start); + if(length == 0) return 3; + std::string_view name(token_start, length); + + if(src->mode == JSON_MODE){ + if(name == "true"){ + add_token(TK("True")); + } else if(name == "false"){ + add_token(TK("False")); + } else if(name == "null"){ + add_token(TK("None")); + } else { + return 4; + } + return 0; + } + + if(kTokenKwMap.count(name)){ + if(name == "not"){ + if(strncmp(curr_char, " in", 3) == 0){ + curr_char += 3; + add_token(TK("not in")); + return 0; + } + }else if(name == "is"){ + if(strncmp(curr_char, " not", 4) == 0){ + curr_char += 4; + add_token(TK("is not")); + return 0; + } + } + add_token(kTokenKwMap.at(name)); + } else { + add_token(TK("@id")); + } + return 0; + } + + void skip_line_comment() { + char c; + while ((c = peekchar()) != '\0') { + if (c == '\n') return; + eatchar(); + } + } + + bool matchchar(char c) { + if (peekchar() != c) return false; + eatchar_include_newline(); + return true; + } + + void add_token(TokenIndex type, TokenValue value={}) { + switch(type){ + case TK("{"): case TK("["): case TK("("): brackets_level++; break; + case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; + } + nexts.push_back( Token{ + type, + token_start, + (int)(curr_char - token_start), + current_line - ((type == TK("@eol")) ? 1 : 0), + value + }); + } + + void add_token_2(char c, TokenIndex one, TokenIndex two) { + if (matchchar(c)) add_token(two); + else add_token(one); + } + + Str eat_string_until(char quote, bool raw) { + bool quote3 = match_n_chars(2, quote); + std::vector buff; + while (true) { + char c = eatchar_include_newline(); + if (c == quote){ + if(quote3 && !match_n_chars(2, quote)){ + buff.push_back(c); + continue; + } + break; + } + if (c == '\0'){ + if(quote3 && src->mode == REPL_MODE){ + throw NeedMoreLines(false); + } + SyntaxError("EOL while scanning string literal"); + } + if (c == '\n'){ + if(!quote3) SyntaxError("EOL while scanning string literal"); + else{ + buff.push_back(c); + continue; + } + } + if (!raw && c == '\\') { + switch (eatchar_include_newline()) { + case '"': buff.push_back('"'); break; + case '\'': buff.push_back('\''); break; + case '\\': buff.push_back('\\'); break; + case 'n': buff.push_back('\n'); break; + case 'r': buff.push_back('\r'); break; + case 't': buff.push_back('\t'); break; + default: SyntaxError("invalid escape char"); + } + } else { + buff.push_back(c); + } + } + return Str(buff.data(), buff.size()); + } + + void eat_string(char quote, StringType type) { + Str s = eat_string_until(quote, type == RAW_STRING); + if(type == F_STRING){ + add_token(TK("@fstr"), s); + }else{ + add_token(TK("@str"), s); + } + } + + void eat_number() { + static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?"); + std::smatch m; + + const char* i = token_start; + while(*i != '\n' && *i != '\0') i++; + std::string s = std::string(token_start, i); + + try{ + if (std::regex_search(s, m, pattern)) { + // here is m.length()-1, since the first char was eaten by lex_token() + for(int j=0; j=")); + else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>=")); + else add_token(TK(">")); + return true; + } + case '<': { + if(matchchar('=')) add_token(TK("<=")); + else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<=")); + else add_token(TK("<")); + return true; + } + case '-': { + if(matchchar('=')) add_token(TK("-=")); + else if(matchchar('>')) add_token(TK("->")); + else add_token(TK("-")); + return true; + } + case '!': + if(matchchar('=')) add_token(TK("!=")); + else SyntaxError("expected '=' after '!'"); + break; + case '*': + if (matchchar('*')) { + add_token(TK("**")); // '**' + } else { + add_token_2('=', TK("*"), TK("*=")); + } + return true; + case '/': + if(matchchar('/')) { + add_token_2('=', TK("//"), TK("//=")); + } else { + add_token_2('=', TK("/"), TK("/=")); + } + return true; + case ' ': case '\t': eat_spaces(); break; + case '\n': { + add_token(TK("@eol")); + if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level"); + return true; + } + default: { + if(c == 'f'){ + if(matchchar('\'')) {eat_string('\'', F_STRING); return true;} + if(matchchar('"')) {eat_string('"', F_STRING); return true;} + }else if(c == 'r'){ + if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;} + if(matchchar('"')) {eat_string('"', RAW_STRING); return true;} + } + if (c >= '0' && c <= '9') { + eat_number(); + return true; + } + switch (eat_name()) + { + case 0: break; + case 1: SyntaxError("invalid char: " + std::string(1, c)); + case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); + case 3: SyntaxError("@id contains invalid char"); break; + case 4: SyntaxError("invalid JSON token"); break; + default: UNREACHABLE(); + } + return true; + } + } + } + + token_start = curr_char; + while(indents.size() > 1){ + indents.pop(); + add_token(TK("@dedent")); + return true; + } + add_token(TK("@eof")); + return false; + } + + /***** Error Reporter *****/ + void throw_err(Str type, Str msg){ + int lineno = current_line; + const char* cursor = curr_char; + if(peekchar() == '\n'){ + lineno--; + cursor--; + } + throw_err(type, msg, lineno, cursor); + } + + void throw_err(Str type, Str msg, int lineno, const char* cursor){ + auto e = Exception("SyntaxError", msg); + e.st_push(src->snapshot(lineno, cursor)); + throw e; + } + void SyntaxError(Str msg){ throw_err("SyntaxError", msg); } + void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); } + void IndentationError(Str msg){ throw_err("IndentationError", msg); } + + Lexer(shared_ptr src) { + this->src = src; + this->token_start = src->source.c_str(); + this->curr_char = src->source.c_str(); + this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line}); + this->indents.push(0); + } + + std::vector run() { + if(used) UNREACHABLE(); + used = true; + while (lex_one_token()); + return std::move(nexts); + } +}; + +} // namespace pkpy \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 432cef8a..b3156c41 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -21,7 +21,6 @@ std::string getline(bool* eof=nullptr) { std::string output; output.resize(length); WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL); - if(!output.empty() && output.back() == '\r') output.pop_back(); return output; } @@ -66,16 +65,20 @@ int main(int argc, char** argv){ filepath = std::filesystem::absolute(filepath); if(!std::filesystem::exists(filepath)){ std::cerr << "File not found: " << argv_1 << std::endl; - return 1; + return 2; } std::ifstream file(filepath); - if(!file.is_open()) return 1; + if(!file.is_open()){ + std::cerr << "Failed to open file: " << argv_1 << std::endl; + return 3; + } std::string src((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + file.close(); // set parent path as cwd std::filesystem::current_path(filepath.parent_path()); - pkpy::PyVarOrNull ret = nullptr; + pkpy::PyObject* ret = nullptr; ret = vm->exec(src.c_str(), argv_1, pkpy::EXEC_MODE); pkpy_delete(vm); return ret != nullptr ? 0 : 1; diff --git a/src/memory.h b/src/memory.h index 2e446528..fe84cbbe 100644 --- a/src/memory.h +++ b/src/memory.h @@ -4,31 +4,13 @@ namespace pkpy{ -struct PyObject; - -template -struct SpAllocator { - template - inline static int* alloc(){ - return (int*)malloc(sizeof(int) + sizeof(U)); - } - - inline static void dealloc(int* counter){ - ((T*)(counter + 1))->~T(); - free(counter); - } -}; - template struct shared_ptr { - union { - int* counter; - i64 bits; - }; + int* counter; -#define _t() (T*)(counter + 1) -#define _inc_counter() if(!is_tagged() && counter) ++(*counter) -#define _dec_counter() if(!is_tagged() && counter && --(*counter) == 0) SpAllocator::dealloc(counter) + T* _t() const noexcept { return (T*)(counter + 1); } + void _inc_counter() { if(counter) ++(*counter); } + void _dec_counter() { if(counter && --(*counter) == 0) {((T*)(counter + 1))->~T(); free(counter);} } public: shared_ptr() : counter(nullptr) {} @@ -69,7 +51,6 @@ public: T* get() const { return _t(); } int use_count() const { - if(is_tagged()) return 0; return counter ? *counter : 0; } @@ -77,78 +58,247 @@ public: _dec_counter(); counter = nullptr; } - - inline constexpr bool is_tagged() const { - if constexpr(!std::is_same_v) return false; - return (bits & 0b11) != 0b00; - } - inline bool is_tag_00() const { return (bits & 0b11) == 0b00; } - inline bool is_tag_01() const { return (bits & 0b11) == 0b01; } - inline bool is_tag_10() const { return (bits & 0b11) == 0b10; } - inline bool is_tag_11() const { return (bits & 0b11) == 0b11; } }; -#undef _t -#undef _inc_counter -#undef _dec_counter +template +shared_ptr make_sp(Args&&... args) { + int* p = (int*)malloc(sizeof(int) + sizeof(T)); + *p = 1; + new(p+1) T(std::forward(args)...); + return shared_ptr(p); +} - template - shared_ptr make_sp(Args&&... args) { - static_assert(std::is_base_of_v, "U must be derived from T"); - static_assert(std::has_virtual_destructor_v, "T must have virtual destructor"); - static_assert(!std::is_same_v || (!std::is_same_v && !std::is_same_v)); - int* p = SpAllocator::template alloc(); *p = 1; - new(p+1) U(std::forward(args)...); - return shared_ptr(p); +struct LinkedListNode{ + LinkedListNode* prev; + LinkedListNode* next; +}; + +template +struct DoubleLinkedList{ + static_assert(std::is_base_of_v); + int _size; + LinkedListNode head; + LinkedListNode tail; + + DoubleLinkedList(): _size(0){ + head.prev = nullptr; + head.next = &tail; + tail.prev = &head; + tail.next = nullptr; } - template - shared_ptr make_sp(Args&&... args) { - int* p = SpAllocator::template alloc(); *p = 1; - new(p+1) T(std::forward(args)...); - return shared_ptr(p); + void push_back(T* node){ + node->prev = tail.prev; + node->next = &tail; + tail.prev->next = node; + tail.prev = node; + _size++; } -static_assert(sizeof(i64) == sizeof(int*)); -static_assert(sizeof(f64) == sizeof(int*)); -static_assert(sizeof(shared_ptr) == sizeof(int*)); -static_assert(std::numeric_limits::is_iec559); -static_assert(std::numeric_limits::is_iec559); + void push_front(T* node){ + node->prev = &head; + node->next = head.next; + head.next->prev = node; + head.next = node; + _size++; + } -template -struct SmallArrayPool { - std::vector buckets[__Bucket+1]; + void pop_back(){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::pop_back() called on empty list"); +#endif + tail.prev->prev->next = &tail; + tail.prev = tail.prev->prev; + _size--; + } - T* alloc(int n){ - if(n == 0) return nullptr; - if(n > __Bucket || buckets[n].empty()){ - return new T[n]; - }else{ - T* p = buckets[n].back(); - buckets[n].pop_back(); - return p; + void pop_front(){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::pop_front() called on empty list"); +#endif + head.next->next->prev = &head; + head.next = head.next->next; + _size--; + } + + T* back() const { +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::back() called on empty list"); +#endif + return static_cast(tail.prev); + } + + T* front() const { +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::front() called on empty list"); +#endif + return static_cast(head.next); + } + + void erase(T* node){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("DoubleLinkedList::erase() called on empty list"); + LinkedListNode* n = head.next; + while(n != &tail){ + if(n == node) break; + n = n->next; } + if(n != node) throw std::runtime_error("DoubleLinkedList::erase() called on node not in the list"); +#endif + node->prev->next = node->next; + node->next->prev = node->prev; + _size--; } - void dealloc(T* p, int n){ - if(n == 0) return; - if(n > __Bucket || buckets[n].size() >= __BucketSize){ - delete[] p; - }else{ - buckets[n].push_back(p); + void move_all_back(DoubleLinkedList& other){ + if(other.empty()) return; + other.tail.prev->next = &tail; + tail.prev->next = other.head.next; + other.head.next->prev = tail.prev; + tail.prev = other.tail.prev; + _size += other._size; + other.head.next = &other.tail; + other.tail.prev = &other.head; + other._size = 0; + } + + bool empty() const { +#if DEBUG_MEMORY_POOL + if(size() == 0){ + if(head.next != &tail || tail.prev != &head){ + throw std::runtime_error("DoubleLinkedList::size() returned 0 but the list is not empty"); + } + return true; } +#endif + return _size == 0; } - ~SmallArrayPool(){ - for(int i=1; i<=__Bucket; i++){ - for(auto p: buckets[i]) delete[] p; + int size() const { return _size; } + + void apply(std::function func){ + LinkedListNode* p = head.next; + while(p != &tail){ + LinkedListNode* next = p->next; + func(static_cast(p)); + p = next; } } }; +template +struct MemoryPool{ + static const size_t __MaxBlocks = 256*1024 / __BlockSize; + struct Block{ + void* arena; + char data[__BlockSize]; + }; -typedef shared_ptr PyVar; -typedef PyVar PyVarOrNull; -typedef PyVar PyVarRef; + struct Arena: LinkedListNode{ + Block _blocks[__MaxBlocks]; + Block* _free_list[__MaxBlocks]; + int _free_list_size; + bool dirty; + + Arena(): _free_list_size(__MaxBlocks), dirty(false){ + for(int i=0; i<__MaxBlocks; i++){ + _blocks[i].arena = this; + _free_list[i] = &_blocks[i]; + } + } + + bool empty() const { return _free_list_size == 0; } + bool full() const { return _free_list_size == __MaxBlocks; } + + void tidy(){ +#if DEBUG_MEMORY_POOL + if(!full()) throw std::runtime_error("Arena::tidy() called on non-full arena"); +#endif + std::sort(_free_list, _free_list+__MaxBlocks); + } + + Block* alloc(){ +#if DEBUG_MEMORY_POOL + if(empty()) throw std::runtime_error("Arena::alloc() called on empty arena"); +#endif + _free_list_size--; + return _free_list[_free_list_size]; + } + + void dealloc(Block* block){ +#if DEBUG_MEMORY_POOL + if(full()) throw std::runtime_error("Arena::dealloc() called on full arena"); +#endif + _free_list[_free_list_size] = block; + _free_list_size++; + } + }; + + DoubleLinkedList _arenas; + DoubleLinkedList _empty_arenas; + + template + void* alloc() { return alloc(sizeof(__T)); } + + void* alloc(size_t size){ +#if DEBUG_NO_MEMORY_POOL + return malloc(size); +#endif + if(size > __BlockSize){ + void* p = malloc(sizeof(void*) + size); + memset(p, 0, sizeof(void*)); + return (char*)p + sizeof(void*); + } + + if(_arenas.empty()){ + // std::cout << _arenas.size() << ',' << _empty_arenas.size() << ',' << _full_arenas.size() << std::endl; + _arenas.push_back(new Arena()); + } + Arena* arena = _arenas.back(); + void* p = arena->alloc()->data; + if(arena->empty()){ + _arenas.pop_back(); + arena->dirty = true; + _empty_arenas.push_back(arena); + } + return p; + } + + void dealloc(void* p){ +#if DEBUG_NO_MEMORY_POOL + free(p); + return; +#endif +#if DEBUG_MEMORY_POOL + if(p == nullptr) throw std::runtime_error("MemoryPool::dealloc() called on nullptr"); +#endif + Block* block = (Block*)((char*)p - sizeof(void*)); + if(block->arena == nullptr){ + free(block); + }else{ + Arena* arena = (Arena*)block->arena; + if(arena->empty()){ + _empty_arenas.erase(arena); + _arenas.push_front(arena); + arena->dealloc(block); + }else{ + arena->dealloc(block); + if(arena->full() && arena->dirty){ + _arenas.erase(arena); + delete arena; + } + } + } + } + + ~MemoryPool(){ + _arenas.apply([](Arena* arena){ delete arena; }); + _empty_arenas.apply([](Arena* arena){ delete arena; }); + } +}; + +inline MemoryPool<64> pool64; +inline MemoryPool<128> pool128; +// inline MemoryPool<256> pool256; }; // namespace pkpy diff --git a/src/namedict.h b/src/namedict.h index 8b8a3516..5ea056cf 100644 --- a/src/namedict.h +++ b/src/namedict.h @@ -6,44 +6,9 @@ namespace pkpy{ -const int kNameDictNodeSize = sizeof(StrName) + sizeof(PyVar); - -template -struct DictArrayPool { - std::vector buckets[__Bucket+1]; - - StrName* alloc(uint16_t n){ - StrName* _keys; - if(n > __Bucket || buckets[n].empty()){ - _keys = (StrName*)malloc(kNameDictNodeSize * n); - memset((void*)_keys, 0, kNameDictNodeSize * n); - }else{ - _keys = buckets[n].back(); - memset((void*)_keys, 0, sizeof(StrName) * n); - buckets[n].pop_back(); - } - return _keys; - } - - void dealloc(StrName* head, uint16_t n){ - PyVar* _values = (PyVar*)(head + n); - if(n > __Bucket || buckets[n].size() >= __BucketSize){ - for(int i=0; i kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117}; -static DictArrayPool<32> _dict_pool; -uint16_t find_next_capacity(uint16_t n){ +inline static uint16_t find_next_capacity(uint16_t n){ uint16_t x = 2; while(x < n) x <<= 1; return x; @@ -51,7 +16,7 @@ uint16_t find_next_capacity(uint16_t n){ #define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) ) -uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& keys){ +inline static uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& keys){ if(keys.empty()) return kHashSeeds[0]; std::set indices; std::pair best_score = {kHashSeeds[0], 0.0f}; @@ -68,77 +33,65 @@ uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector& k } struct NameDict { + using Item = std::pair; uint16_t _capacity; uint16_t _size; float _load_factor; uint16_t _hash_seed; uint16_t _mask; - StrName* _keys; + Item* _items; - inline PyVar& value(uint16_t i){ - return reinterpret_cast(_keys + _capacity)[i]; - } - - inline const PyVar& value(uint16_t i) const { - return reinterpret_cast(_keys + _capacity)[i]; + void _alloc(int cap){ + _items = (Item*)pool128.alloc(cap * sizeof(Item)); + memset(_items, 0, cap * sizeof(Item)); } NameDict(uint16_t capacity=2, float load_factor=0.67, uint16_t hash_seed=kHashSeeds[0]): _capacity(capacity), _size(0), _load_factor(load_factor), _hash_seed(hash_seed), _mask(capacity-1) { - _keys = _dict_pool.alloc(capacity); - } + _alloc(capacity); + } NameDict(const NameDict& other) { memcpy(this, &other, sizeof(NameDict)); - _keys = _dict_pool.alloc(_capacity); + _alloc(_capacity); for(int i=0; i<_capacity; i++){ - _keys[i] = other._keys[i]; - value(i) = other.value(i); + _items[i] = other._items[i]; } } NameDict& operator=(const NameDict& other) { - _dict_pool.dealloc(_keys, _capacity); + pool128.dealloc(_items); memcpy(this, &other, sizeof(NameDict)); - _keys = _dict_pool.alloc(_capacity); + _alloc(_capacity); for(int i=0; i<_capacity; i++){ - _keys[i] = other._keys[i]; - value(i) = other.value(i); + _items[i] = other._items[i]; } return *this; } - ~NameDict(){ _dict_pool.dealloc(_keys, _capacity); } + ~NameDict(){ pool128.dealloc(_items); } NameDict(NameDict&&) = delete; NameDict& operator=(NameDict&&) = delete; uint16_t size() const { return _size; } -#define HASH_PROBE(key, ok, i) \ -ok = false; \ -i = _hash(key, _mask, _hash_seed); \ -while(!_keys[i].empty()) { \ - if(_keys[i] == (key)) { ok = true; break; } \ - i = (i + 1) & _mask; \ +#define HASH_PROBE(key, ok, i) \ +ok = false; \ +i = _hash(key, _mask, _hash_seed); \ +while(!_items[i].first.empty()) { \ + if(_items[i].first == (key)) { ok = true; break; } \ + i = (i + 1) & _mask; \ } - const PyVar& operator[](StrName key) const { + PyObject* operator[](StrName key) const { bool ok; uint16_t i; HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); - return value(i); + if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key)); + return _items[i].second; } - PyVar& get(StrName key){ - bool ok; uint16_t i; - HASH_PROBE(key, ok, i); - if(!ok) throw std::out_of_range("NameDict key not found: " + key.str()); - return value(i); - } - - template - void set(StrName key, T&& val){ + void set(StrName key, PyObject* val){ bool ok; uint16_t i; HASH_PROBE(key, ok, i); if(!ok) { @@ -147,29 +100,27 @@ while(!_keys[i].empty()) { \ _rehash(true); HASH_PROBE(key, ok, i); } - _keys[i] = key; + _items[i].first = key; } - value(i) = std::forward(val); + _items[i].second = val; } void _rehash(bool resize){ - StrName* old_keys = _keys; - PyVar* old_values = &value(0); + Item* old_items = _items; uint16_t old_capacity = _capacity; if(resize){ _capacity = find_next_capacity(_capacity * 2); _mask = _capacity - 1; } - _keys = _dict_pool.alloc(_capacity); + _alloc(_capacity); for(uint16_t i=0; i> items() const { - std::vector> v; + std::vector items() const { + std::vector v; for(uint16_t i=0; i<_capacity; i++){ - if(_keys[i].empty()) continue; - v.push_back(std::make_pair(_keys[i], value(i))); + if(_items[i].first.empty()) continue; + v.push_back(_items[i]); } return v; } @@ -225,11 +177,13 @@ while(!_keys[i].empty()) { \ std::vector keys() const { std::vector v; for(uint16_t i=0; i<_capacity; i++){ - if(_keys[i].empty()) continue; - v.push_back(_keys[i]); + if(_items[i].first.empty()) continue; + v.push_back(_items[i].first); } return v; } + + void _gc_mark() const; #undef HASH_PROBE #undef _hash }; diff --git a/src/obj.h b/src/obj.h index fb2a0e12..8d9fe55d 100644 --- a/src/obj.h +++ b/src/obj.h @@ -3,7 +3,6 @@ #include "common.h" #include "namedict.h" #include "tuplelist.h" -#include namespace pkpy { @@ -12,7 +11,7 @@ struct Frame; struct BaseRef; class VM; -typedef std::function NativeFuncRaw; +typedef std::function NativeFuncRaw; typedef shared_ptr CodeObject_; typedef shared_ptr NameDict_; @@ -22,10 +21,10 @@ struct NativeFunc { bool method; NativeFunc(NativeFuncRaw f, int argc, bool method) : f(f), argc(argc), method(method) {} - inline PyVar operator()(VM* vm, Args& args) const; + PyObject* operator()(VM* vm, Args& args) const; }; -struct Function { +struct FuncDecl { StrName name; CodeObject_ code; std::vector args; @@ -33,22 +32,28 @@ struct Function { NameDict kwargs; // empty if no k=v std::vector kwargs_order; - // runtime settings - PyVar _module = nullptr; - NameDict_ _closure = nullptr; - bool has_name(StrName val) const { bool _0 = std::find(args.begin(), args.end(), val) != args.end(); bool _1 = starred_arg == val; bool _2 = kwargs.contains(val); return _0 || _1 || _2; } + + void _gc_mark() const; +}; + +using FuncDecl_ = shared_ptr; + +struct Function{ + FuncDecl_ decl; + PyObject* _module; + NameDict_ _closure; }; struct BoundMethod { - PyVar obj; - PyVar method; - BoundMethod(const PyVar& obj, const PyVar& method) : obj(obj), method(method) {} + PyObject* obj; + PyObject* method; + BoundMethod(PyObject* obj, PyObject* method) : obj(obj), method(method) {} }; struct Range { @@ -58,14 +63,17 @@ struct Range { }; struct StarWrapper { - PyVar obj; - bool rvalue; - StarWrapper(const PyVar& obj, bool rvalue): obj(obj), rvalue(rvalue) {} + PyObject* obj; + StarWrapper(PyObject* obj): obj(obj) {} }; +using Super = std::pair; + +// TODO: re-examine the design of Slice struct Slice { int start = 0; - int stop = 0x7fffffff; + int stop = 0x7fffffff; + int step = 1; void normalize(int len){ if(start < 0) start += len; @@ -79,27 +87,37 @@ struct Slice { class BaseIter { protected: VM* vm; - PyVar _ref; // keep a reference to the object so it will not be deleted while iterating public: - virtual PyVar next() = 0; - PyVarRef loop_var; - BaseIter(VM* vm, PyVar _ref) : vm(vm), _ref(_ref) {} + BaseIter(VM* vm) : vm(vm) {} + virtual void _gc_mark() const {} + virtual PyObject* next() = 0; virtual ~BaseIter() = default; }; +struct GCHeader { + bool enabled; // whether this object is managed by GC + bool marked; // whether this object is marked + GCHeader() : enabled(true), marked(false) {} +}; + struct PyObject { + GCHeader gc; Type type; NameDict* _attr; - inline bool is_attr_valid() const noexcept { return _attr != nullptr; } - inline NameDict& attr() noexcept { return *_attr; } - inline const PyVar& attr(StrName name) const noexcept { return _attr->get(name); } + bool is_attr_valid() const noexcept { return _attr != nullptr; } + NameDict& attr() noexcept { return *_attr; } + PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; } virtual void* value() = 0; + virtual void _obj_gc_mark() = 0; PyObject(Type type) : type(type) {} virtual ~PyObject() { delete _attr; } }; +template +void _gc_mark(T& t); + template struct Py_ : PyObject { T _value; @@ -107,7 +125,7 @@ struct Py_ : PyObject { Py_(Type type, const T& val): PyObject(type), _value(val) { _init(); } Py_(Type type, T&& val): PyObject(type), _value(std::move(val)) { _init(); } - inline void _init() noexcept { + void _init() noexcept { if constexpr (std::is_same_v || std::is_same_v) { _attr = new NameDict(8, kTypeAttrLoadFactor); }else if constexpr(std::is_same_v){ @@ -119,75 +137,71 @@ struct Py_ : PyObject { } } void* value() override { return &_value; } + + void _obj_gc_mark() override { + if(gc.marked) return; + gc.marked = true; + if(_attr != nullptr) _attr->_gc_mark(); + pkpy::_gc_mark(_value); // handle PyObject* inside _value `T` + } }; -#define OBJ_GET(T, obj) (((Py_*)((obj).get()))->_value) +#define OBJ_GET(T, obj) (((Py_*)(obj))->_value) +#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_obj_gc_mark() + +Str obj_type_name(VM* vm, Type type); + +#if DEBUG_NO_BUILTIN_MODULES +#define OBJ_NAME(obj) Str("") +#else #define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__)) +#endif const int kTpIntIndex = 2; const int kTpFloatIndex = 3; -inline bool is_type(const PyVar& obj, Type type) noexcept { +inline bool is_type(PyObject* obj, Type type) { +#if DEBUG_EXTRA_CHECK + if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr"); +#endif switch(type.index){ - case kTpIntIndex: return obj.is_tag_01(); - case kTpFloatIndex: return obj.is_tag_10(); - default: return !obj.is_tagged() && obj->type == type; + case kTpIntIndex: return is_int(obj); + case kTpFloatIndex: return is_float(obj); + default: return !is_tagged(obj) && obj->type == type; } } -inline bool is_both_int_or_float(const PyVar& a, const PyVar& b) noexcept { - return a.is_tagged() && b.is_tagged(); -} - -inline bool is_both_int(const PyVar& a, const PyVar& b) noexcept { - return (a.bits & b.bits & 0b11) == 0b01; -} - -inline bool is_int(const PyVar& obj) noexcept { - return obj.is_tag_01(); -} - -inline bool is_float(const PyVar& obj) noexcept { - return obj.is_tag_10(); -} - #define PY_CLASS(T, mod, name) \ static Type _type(VM* vm) { \ static const StrName __x0(#mod); \ static const StrName __x1(#name); \ return OBJ_GET(Type, vm->_modules[__x0]->attr(__x1)); \ } \ - static PyVar register_class(VM* vm, PyVar mod) { \ - PyVar type = vm->new_type_object(mod, #name, vm->tp_object); \ + static PyObject* register_class(VM* vm, PyObject* mod) { \ + PyObject* type = vm->new_type_object(mod, #name, vm->tp_object); \ if(OBJ_NAME(mod) != #mod) UNREACHABLE(); \ T::_register(vm, mod, type); \ type->attr()._try_perfect_rehash(); \ return type; \ } -union __8B { +union BitsCvt { i64 _int; f64 _float; - __8B(i64 val) : _int(val) {} - __8B(f64 val) : _float(val) {} + BitsCvt(i64 val) : _int(val) {} + BitsCvt(f64 val) : _float(val) {} }; -template struct is_py_class : std::false_type {}; +template struct is_py_class : std::false_type {}; template struct is_py_class> : std::true_type {}; -template -void _check_py_class(VM* vm, const PyVar& var); - -template -T py_pointer_cast(VM* vm, const PyVar& var); - -template -T py_value_cast(VM* vm, const PyVar& var); - -struct Discarded {}; +template void _check_py_class(VM*, PyObject*); +template T py_pointer_cast(VM*, PyObject*); +template T py_value_cast(VM*, PyObject*); +struct Discarded { }; template -__T py_cast(VM* vm, const PyVar& obj) { +__T py_cast(VM* vm, PyObject* obj) { using T = std::decay_t<__T>; if constexpr(std::is_pointer_v){ return py_pointer_cast(vm, obj); @@ -202,7 +216,7 @@ __T py_cast(VM* vm, const PyVar& obj) { } template -__T _py_cast(VM* vm, const PyVar& obj) { +__T _py_cast(VM* vm, PyObject* obj) { using T = std::decay_t<__T>; if constexpr(std::is_pointer_v<__T>){ return py_pointer_cast<__T>(vm, obj); @@ -214,7 +228,7 @@ __T _py_cast(VM* vm, const PyVar& obj) { } #define VAR(x) py_var(vm, x) -#define VAR_T(T, ...) vm->new_object(T::_type(vm), T(__VA_ARGS__)) +#define VAR_T(T, ...) vm->heap.gcnew(T::_type(vm), T(__VA_ARGS__)) #define CAST(T, x) py_cast(vm, x) #define _CAST(T, x) _py_cast(vm, x) diff --git a/src/opcodes.h b/src/opcodes.h index 1ad9dcad..87d9e88f 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -1,95 +1,95 @@ #ifdef OPCODE +/**************************/ OPCODE(NO_OP) +/**************************/ OPCODE(POP_TOP) -OPCODE(DUP_TOP_VALUE) -OPCODE(CALL) -OPCODE(CALL_UNPACK) -OPCODE(CALL_KWARGS) -OPCODE(CALL_KWARGS_UNPACK) -OPCODE(RETURN_VALUE) +OPCODE(DUP_TOP) OPCODE(ROT_TWO) +OPCODE(PRINT_EXPR) +/**************************/ +OPCODE(LOAD_CONST) +OPCODE(LOAD_NONE) +OPCODE(LOAD_TRUE) +OPCODE(LOAD_FALSE) +OPCODE(LOAD_ELLIPSIS) +OPCODE(LOAD_BUILTIN_EVAL) +OPCODE(LOAD_FUNCTION) +OPCODE(LOAD_NULL) +/**************************/ +OPCODE(LOAD_NAME) +OPCODE(LOAD_GLOBAL) +OPCODE(LOAD_ATTR) +OPCODE(LOAD_METHOD) +OPCODE(LOAD_SUBSCR) +OPCODE(STORE_LOCAL) +OPCODE(STORE_GLOBAL) +OPCODE(STORE_ATTR) +OPCODE(STORE_SUBSCR) + +OPCODE(DELETE_LOCAL) +OPCODE(DELETE_GLOBAL) +OPCODE(DELETE_ATTR) +OPCODE(DELETE_SUBSCR) +/**************************/ +OPCODE(BUILD_LIST) +OPCODE(BUILD_DICT) +OPCODE(BUILD_SET) +OPCODE(BUILD_SLICE) +OPCODE(BUILD_TUPLE) +OPCODE(BUILD_STRING) +/**************************/ OPCODE(BINARY_OP) OPCODE(COMPARE_OP) OPCODE(BITWISE_OP) OPCODE(IS_OP) OPCODE(CONTAINS_OP) - +/**************************/ +OPCODE(JUMP_ABSOLUTE) +OPCODE(POP_JUMP_IF_FALSE) +OPCODE(JUMP_IF_TRUE_OR_POP) +OPCODE(JUMP_IF_FALSE_OR_POP) +OPCODE(LOOP_CONTINUE) +OPCODE(LOOP_BREAK) +OPCODE(GOTO) +/**************************/ +OPCODE(CALL) +OPCODE(CALL_UNPACK) +OPCODE(CALL_KWARGS) +OPCODE(CALL_KWARGS_UNPACK) +OPCODE(RETURN_VALUE) +OPCODE(YIELD_VALUE) +/**************************/ +OPCODE(LIST_APPEND) +OPCODE(DICT_ADD) +OPCODE(SET_ADD) +/**************************/ OPCODE(UNARY_NEGATIVE) OPCODE(UNARY_NOT) OPCODE(UNARY_STAR) - -OPCODE(BUILD_LIST) -OPCODE(BUILD_MAP) -OPCODE(BUILD_SET) -OPCODE(BUILD_SLICE) -OPCODE(BUILD_TUPLE) -OPCODE(BUILD_TUPLE_REF) -OPCODE(BUILD_STRING) - -OPCODE(LIST_APPEND) -OPCODE(MAP_ADD) -OPCODE(SET_ADD) -OPCODE(IMPORT_NAME) -OPCODE(PRINT_EXPR) - +/**************************/ OPCODE(GET_ITER) OPCODE(FOR_ITER) - +/**************************/ +OPCODE(IMPORT_NAME) +OPCODE(IMPORT_STAR) +/**************************/ +OPCODE(UNPACK_SEQUENCE) +OPCODE(UNPACK_EX) +/**************************/ +OPCODE(BEGIN_CLASS) +OPCODE(END_CLASS) +OPCODE(STORE_CLASS_ATTR) +/**************************/ OPCODE(WITH_ENTER) OPCODE(WITH_EXIT) -OPCODE(LOOP_BREAK) -OPCODE(LOOP_CONTINUE) - -OPCODE(POP_JUMP_IF_FALSE) -OPCODE(JUMP_ABSOLUTE) -OPCODE(SAFE_JUMP_ABSOLUTE) -OPCODE(JUMP_IF_TRUE_OR_POP) -OPCODE(JUMP_IF_FALSE_OR_POP) - -OPCODE(GOTO) - -OPCODE(LOAD_CONST) -OPCODE(LOAD_NONE) -OPCODE(LOAD_TRUE) -OPCODE(LOAD_FALSE) -OPCODE(LOAD_EVAL_FN) -OPCODE(LOAD_FUNCTION) -OPCODE(LOAD_ELLIPSIS) -OPCODE(LOAD_NAME) -OPCODE(LOAD_NAME_REF) - +/**************************/ +OPCODE(TRY_BLOCK_ENTER) +OPCODE(TRY_BLOCK_EXIT) OPCODE(ASSERT) OPCODE(EXCEPTION_MATCH) OPCODE(RAISE) OPCODE(RE_RAISE) - -OPCODE(BUILD_INDEX) -OPCODE(BUILD_ATTR) -OPCODE(BUILD_ATTR_REF) -OPCODE(STORE_NAME) -OPCODE(STORE_FUNCTION) -OPCODE(STORE_REF) -OPCODE(DELETE_REF) - -OPCODE(TRY_BLOCK_ENTER) -OPCODE(TRY_BLOCK_EXIT) - -OPCODE(YIELD_VALUE) - -OPCODE(FAST_INDEX) // a[x] -OPCODE(FAST_INDEX_REF) // a[x] - -OPCODE(INPLACE_BINARY_OP) -OPCODE(INPLACE_BITWISE_OP) - -OPCODE(SETUP_CLOSURE) -OPCODE(SETUP_DECORATOR) -OPCODE(STORE_ALL_NAMES) - -OPCODE(BEGIN_CLASS) -OPCODE(END_CLASS) -OPCODE(STORE_CLASS_ATTR) - +/**************************/ #endif \ No newline at end of file diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index c867ea4a..00000000 --- a/src/parser.h +++ /dev/null @@ -1,302 +0,0 @@ -#pragma once - -#include "error.h" -#include "obj.h" - -namespace pkpy{ - -typedef uint8_t TokenIndex; - -constexpr const char* kTokens[] = { - "@error", "@eof", "@eol", "@sof", - ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::", - "+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->", - "<<", ">>", "&", "|", "^", "?", "@", - "==", "!=", ">=", "<=", - "+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=", - /** KW_BEGIN **/ - "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", - "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", - "goto", "label", // extended keywords, not available in cpython - "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise", - /** KW_END **/ - "is not", "not in", - "@id", "@num", "@str", "@fstr", - "@indent", "@dedent" -}; - -const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]); - -constexpr TokenIndex TK(const char token[]) { - for(int k=0; k kTokenKwMap = [](){ - std::map map; - for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k; - return map; -}(); - - -struct Token{ - TokenIndex type; - - const char* start; - int length; - int line; - PyVar value; - - Str str() const { return Str(start, length);} - - Str info() const { - StrStream ss; - Str raw = str(); - if (raw == Str("\n")) raw = "\\n"; - ss << line << ": " << TK_STR(type) << " '" << raw << "'"; - return ss.str(); - } -}; - -// https://docs.python.org/3/reference/expressions.html -enum Precedence { - PREC_NONE, - PREC_ASSIGNMENT, // = - PREC_COMMA, // , - PREC_TERNARY, // ?: - PREC_LOGICAL_OR, // or - PREC_LOGICAL_AND, // and - PREC_LOGICAL_NOT, // not - PREC_EQUALITY, // == != - PREC_TEST, // in / is / is not / not in - PREC_COMPARISION, // < > <= >= - PREC_BITWISE_OR, // | - PREC_BITWISE_XOR, // ^ - PREC_BITWISE_AND, // & - PREC_BITWISE_SHIFT, // << >> - PREC_TERM, // + - - PREC_FACTOR, // * / % // - PREC_UNARY, // - not - PREC_EXPONENT, // ** - PREC_CALL, // () - PREC_SUBSCRIPT, // [] - PREC_ATTRIB, // .index - PREC_PRIMARY, -}; - -// The context of the parsing phase for the compiler. -struct Parser { - shared_ptr src; - - const char* token_start; - const char* curr_char; - int current_line = 1; - Token prev, curr; - std::queue nexts; - std::stack indents; - - int brackets_level = 0; - - Token next_token(){ - if(nexts.empty()){ - return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line}; - } - Token t = nexts.front(); - if(t.type == TK("@eof") && indents.size()>1){ - nexts.pop(); - indents.pop(); - return Token{TK("@dedent"), token_start, 0, current_line}; - } - nexts.pop(); - return t; - } - - inline char peekchar() const{ return *curr_char; } - - bool match_n_chars(int n, char c0){ - const char* c = curr_char; - for(int i=0; i 0) return true; - int spaces = eat_spaces(); - if(peekchar() == '#') skip_line_comment(); - if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true; - // https://docs.python.org/3/reference/lexical_analysis.html#indentation - if(spaces > indents.top()){ - indents.push(spaces); - nexts.push(Token{TK("@indent"), token_start, 0, current_line}); - } else if(spaces < indents.top()){ - while(spaces < indents.top()){ - indents.pop(); - nexts.push(Token{TK("@dedent"), token_start, 0, current_line}); - } - if(spaces != indents.top()){ - return false; - } - } - return true; - } - - char eatchar() { - char c = peekchar(); - if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline"); - curr_char++; - return c; - } - - char eatchar_include_newline() { - char c = peekchar(); - curr_char++; - if (c == '\n'){ - current_line++; - src->line_starts.push_back(curr_char); - } - return c; - } - - int eat_name() { - curr_char--; - while(true){ - uint8_t c = peekchar(); - int u8bytes = 0; - if((c & 0b10000000) == 0b00000000) u8bytes = 1; - else if((c & 0b11100000) == 0b11000000) u8bytes = 2; - else if((c & 0b11110000) == 0b11100000) u8bytes = 3; - else if((c & 0b11111000) == 0b11110000) u8bytes = 4; - else return 1; - if(u8bytes == 1){ - if(isalpha(c) || c=='_' || isdigit(c)) { - curr_char++; - continue; - }else{ - break; - } - } - // handle multibyte char - std::string u8str(curr_char, u8bytes); - if(u8str.size() != u8bytes) return 2; - uint32_t value = 0; - for(int k=0; k < u8bytes; k++){ - uint8_t b = u8str[k]; - if(k==0){ - if(u8bytes == 2) value = (b & 0b00011111) << 6; - else if(u8bytes == 3) value = (b & 0b00001111) << 12; - else if(u8bytes == 4) value = (b & 0b00000111) << 18; - }else{ - value |= (b & 0b00111111) << (6*(u8bytes-k-1)); - } - } - if(is_unicode_Lo_char(value)) curr_char += u8bytes; - else break; - } - - int length = (int)(curr_char - token_start); - if(length == 0) return 3; - std::string_view name(token_start, length); - - if(src->mode == JSON_MODE){ - if(name == "true"){ - set_next_token(TK("True")); - } else if(name == "false"){ - set_next_token(TK("False")); - } else if(name == "null"){ - set_next_token(TK("None")); - } else { - return 4; - } - return 0; - } - - if(kTokenKwMap.count(name)){ - if(name == "not"){ - if(strncmp(curr_char, " in", 3) == 0){ - curr_char += 3; - set_next_token(TK("not in")); - return 0; - } - }else if(name == "is"){ - if(strncmp(curr_char, " not", 4) == 0){ - curr_char += 4; - set_next_token(TK("is not")); - return 0; - } - } - set_next_token(kTokenKwMap.at(name)); - } else { - set_next_token(TK("@id")); - } - return 0; - } - - void skip_line_comment() { - char c; - while ((c = peekchar()) != '\0') { - if (c == '\n') return; - eatchar(); - } - } - - bool matchchar(char c) { - if (peekchar() != c) return false; - eatchar_include_newline(); - return true; - } - - void set_next_token(TokenIndex type, PyVar value=nullptr) { - switch(type){ - case TK("{"): case TK("["): case TK("("): brackets_level++; break; - case TK(")"): case TK("]"): case TK("}"): brackets_level--; break; - } - nexts.push( Token{ - type, - token_start, - (int)(curr_char - token_start), - current_line - ((type == TK("@eol")) ? 1 : 0), - value - }); - } - - void set_next_token_2(char c, TokenIndex one, TokenIndex two) { - if (matchchar(c)) set_next_token(two); - else set_next_token(one); - } - - Parser(shared_ptr src) { - this->src = src; - this->token_start = src->source; - this->curr_char = src->source; - this->nexts.push(Token{TK("@sof"), token_start, 0, current_line}); - this->indents.push(0); - } -}; - -} // namespace pkpy \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h index 0b105367..d1169135 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -2,6 +2,7 @@ #include "ceval.h" #include "compiler.h" +#include "obj.h" #include "repl.h" #include "iter.h" #include "cffi.h" @@ -10,12 +11,14 @@ namespace pkpy { -CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { - Compiler compiler(this, source.c_str(), filename, mode); +inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { + Compiler compiler(this, source, filename, mode); try{ return compiler.compile(); }catch(Exception& e){ - // std::cout << e.summary() << std::endl; +#if DEBUG_FULL_EXCEPTION + std::cerr << e.summary() << std::endl; +#endif _error(e); return nullptr; } @@ -42,7 +45,7 @@ CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) { }); -void init_builtins(VM* _vm) { +inline void init_builtins(VM* _vm) { BIND_NUM_ARITH_OPT(__add__, +) BIND_NUM_ARITH_OPT(__sub__, -) BIND_NUM_ARITH_OPT(__mul__, *) @@ -66,10 +69,12 @@ void init_builtins(VM* _vm) { vm->check_type(args[0], vm->tp_type); Type type = OBJ_GET(Type, args[0]); if(!vm->isinstance(args[1], type)){ - vm->TypeError("super(type, obj): obj must be an instance or subtype of type"); + Str _0 = obj_type_name(vm, OBJ_GET(Type, vm->_t(args[1]))); + Str _1 = obj_type_name(vm, type); + vm->TypeError("super(): " + _0.escape() + " is not an instance of " + _1.escape()); } - Type base = vm->_all_types[type.index].base; - return vm->new_object(vm->tp_super, Super(args[1], base)); + Type base = vm->_all_types[type].base; + return vm->heap.gcnew(vm->tp_super, Super(args[1], base)); }); _vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) { @@ -79,16 +84,16 @@ void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("id", [](VM* vm, Args& args) { - const PyVar& obj = args[0]; - if(obj.is_tagged()) return VAR((i64)0); - return VAR(obj.bits); + PyObject* obj = args[0]; + if(is_tagged(obj)) return VAR((i64)0); + return VAR(BITS(obj)); }); _vm->bind_builtin_func<2>("divmod", [](VM* vm, Args& args) { i64 lhs = CAST(i64, args[0]); i64 rhs = CAST(i64, args[1]); if(rhs == 0) vm->ZeroDivisionError(); - return VAR(two_args(VAR(lhs/rhs), VAR(lhs%rhs))); + return VAR(Tuple({VAR(lhs/rhs), VAR(lhs%rhs)})); }); _vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) { @@ -110,7 +115,7 @@ void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("repr", CPP_LAMBDA(vm->asRepr(args[0]))); - _vm->bind_builtin_func<1>("len", CPP_LAMBDA(vm->call(args[0], __len__, no_arg()))); + _vm->bind_builtin_func<1>("len", CPP_LAMBDA(vm->fast_call(__len__, Args{args[0]}))); _vm->bind_builtin_func<1>("hash", [](VM* vm, Args& args){ i64 value = vm->hash(args[0]); @@ -126,8 +131,8 @@ void init_builtins(VM* _vm) { _vm->bind_builtin_func<1>("ord", [](VM* vm, Args& args) { const Str& s = CAST(Str&, args[0]); - if (s.size() != 1) vm->TypeError("ord() expected an ASCII character"); - return VAR((i64)(s.c_str()[0])); + if (s.length()!=1) vm->TypeError("ord() expected an ASCII character"); + return VAR((i64)(s[0])); }); _vm->bind_builtin_func<2>("hasattr", [](VM* vm, Args& args) { @@ -164,17 +169,16 @@ void init_builtins(VM* _vm) { std::vector keys = t_attr.keys(); names.insert(keys.begin(), keys.end()); List ret; - for (StrName name : names) ret.push_back(VAR(name.str())); + for (StrName name : names) ret.push_back(VAR(name.sv())); return VAR(std::move(ret)); }); _vm->bind_method<0>("object", "__repr__", [](VM* vm, Args& args) { - PyVar self = args[0]; - std::uintptr_t addr = self.is_tagged() ? 0 : (uintptr_t)self.get(); - StrStream ss; - ss << std::hex << addr; - Str s = "<" + OBJ_NAME(vm->_t(self)) + " object at 0x" + ss.str() + ">"; - return VAR(s); + PyObject* self = args[0]; + if(is_tagged(self)) self = nullptr; + std::stringstream ss; + ss << "<" << OBJ_NAME(vm->_t(self)) << " object at " << std::hex << self << ">"; + return VAR(ss.str()); }); _vm->bind_method<1>("object", "__eq__", CPP_LAMBDA(VAR(args[0] == args[1]))); @@ -233,11 +237,11 @@ void init_builtins(VM* _vm) { const Str& s = CAST(Str&, args[0]); try{ size_t parsed = 0; - i64 val = S_TO_INT(s, &parsed, 10); - if(parsed != s.size()) throw std::invalid_argument(""); + i64 val = S_TO_INT(s.str(), &parsed, 10); + if(parsed != s.length()) throw std::invalid_argument(""); return VAR(val); }catch(std::invalid_argument&){ - vm->ValueError("invalid literal for int(): " + s.escape(true)); + vm->ValueError("invalid literal for int(): " + s.escape()); } } vm->TypeError("int() argument must be a int, float, bool or str"); @@ -280,7 +284,7 @@ void init_builtins(VM* _vm) { if(s == "inf") return VAR(INFINITY); if(s == "-inf") return VAR(-INFINITY); try{ - f64 val = S_TO_FLOAT(s); + f64 val = S_TO_FLOAT(s.str()); return VAR(val); }catch(std::invalid_argument&){ vm->ValueError("invalid literal for float(): '" + s + "'"); @@ -293,7 +297,7 @@ void init_builtins(VM* _vm) { _vm->bind_method<0>("float", "__repr__", [](VM* vm, Args& args) { f64 val = CAST(f64, args[0]); if(std::isinf(val) || std::isnan(val)) return VAR(std::to_string(val)); - StrStream ss; + std::stringstream ss; ss << std::setprecision(std::numeric_limits::max_digits10-1-2) << val; std::string s = ss.str(); if(std::all_of(s.begin()+1, s.end(), isdigit)) s += ".0"; @@ -323,7 +327,7 @@ void init_builtins(VM* _vm) { _vm->bind_method<1>("str", "__contains__", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); const Str& other = CAST(Str&, args[1]); - return VAR(self.find(other) != Str::npos); + return VAR(self.index(other) != -1); }); _vm->bind_method<0>("str", "__str__", CPP_LAMBDA(args[0])); @@ -331,7 +335,7 @@ void init_builtins(VM* _vm) { _vm->bind_method<0>("str", "__repr__", [](VM* vm, Args& args) { const Str& _self = CAST(Str&, args[0]); - return VAR(_self.escape(true)); + return VAR(_self.escape()); }); _vm->bind_method<0>("str", "__json__", [](VM* vm, Args& args) { @@ -357,7 +361,7 @@ void init_builtins(VM* _vm) { if(is_type(args[1], vm->tp_slice)){ Slice s = _CAST(Slice, args[1]); s.normalize(self.u8_length()); - return VAR(self.u8_substr(s.start, s.stop)); + return VAR(self.u8_slice(s.start, s.stop)); } int index = CAST(int, args[1]); @@ -378,34 +382,31 @@ void init_builtins(VM* _vm) { }); _vm->bind_method<2>("str", "replace", [](VM* vm, Args& args) { - const Str& _self = CAST(Str&, args[0]); - const Str& _old = CAST(Str&, args[1]); - const Str& _new = CAST(Str&, args[2]); - Str _copy = _self; - size_t pos = 0; - while ((pos = _copy.find(_old, pos)) != std::string::npos) { - _copy.replace(pos, _old.length(), _new); - pos += _new.length(); - } - return VAR(_copy); + const Str& self = CAST(Str&, args[0]); + const Str& old = CAST(Str&, args[1]); + const Str& new_ = CAST(Str&, args[2]); + return VAR(self.replace(old, new_)); }); _vm->bind_method<1>("str", "startswith", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); const Str& prefix = CAST(Str&, args[1]); - return VAR(self.find(prefix) == 0); + return VAR(self.index(prefix) == 0); }); _vm->bind_method<1>("str", "endswith", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); const Str& suffix = CAST(Str&, args[1]); - return VAR(self.rfind(suffix) == self.length() - suffix.length()); + int offset = self.length() - suffix.length(); + if(offset < 0) return vm->False; + bool ok = memcmp(self.data+offset, suffix.data, suffix.length()) == 0; + return VAR(ok); }); _vm->bind_method<1>("str", "join", [](VM* vm, Args& args) { const Str& self = CAST(Str&, args[0]); - StrStream ss; - PyVar obj = vm->asList(args[1]); + FastStrStream ss; + PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); for (int i = 0; i < list.size(); ++i) { if (i > 0) ss << self; @@ -423,9 +424,9 @@ void init_builtins(VM* _vm) { _vm->bind_method<1>("list", "extend", [](VM* vm, Args& args) { List& self = CAST(List&, args[0]); - PyVar obj = vm->asList(args[1]); + PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); - self.insert(self.end(), list.begin(), list.end()); + self.extend(list); return vm->None; }); @@ -440,7 +441,7 @@ void init_builtins(VM* _vm) { int n = CAST(int, args[1]); List result; result.reserve(self.size() * n); - for(int i = 0; i < n; i++) result.insert(result.end(), self.begin(), self.end()); + for(int i = 0; i < n; i++) result.extend(self); return VAR(std::move(result)); }); @@ -450,7 +451,7 @@ void init_builtins(VM* _vm) { if(index < 0) index += self.size(); if(index < 0) index = 0; if(index > self.size()) index = self.size(); - self.insert(self.begin() + index, args[2]); + self.insert(index, args[2]); return vm->None; }); @@ -463,10 +464,10 @@ void init_builtins(VM* _vm) { _vm->bind_method<1>("list", "__add__", [](VM* vm, Args& args) { const List& self = CAST(List&, args[0]); - const List& obj = CAST(List&, args[1]); - List new_list = self; - new_list.insert(new_list.end(), obj.begin(), obj.end()); - return VAR(new_list); + const List& other = CAST(List&, args[1]); + List new_list(self); // copy construct + new_list.extend(other); + return VAR(std::move(new_list)); }); _vm->bind_method<0>("list", "__len__", [](VM* vm, Args& args) { @@ -506,14 +507,14 @@ void init_builtins(VM* _vm) { List& self = CAST(List&, args[0]); int index = CAST(int, args[1]); index = vm->normalized_index(index, self.size()); - self.erase(self.begin() + index); + self.erase(index); return vm->None; }); /************ PyTuple ************/ _vm->bind_static_method<1>("tuple", "__new__", [](VM* vm, Args& args) { List list = CAST(List, vm->asList(args[0])); - return VAR(Tuple::from_list(std::move(list))); + return VAR(Tuple(std::move(list))); }); _vm->bind_method<0>("tuple", "__iter__", [](VM* vm, Args& args) { @@ -528,7 +529,7 @@ void init_builtins(VM* _vm) { s.normalize(self.size()); List new_list; for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]); - return VAR(Tuple::from_list(std::move(new_list))); + return VAR(Tuple(std::move(new_list))); } int index = CAST(int, args[1]); @@ -542,7 +543,7 @@ void init_builtins(VM* _vm) { }); /************ PyBool ************/ - _vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(vm->asBool(args[0]))); + _vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(VAR(vm->asBool(args[0])))); _vm->bind_method<0>("bool", "__repr__", [](VM* vm, Args& args) { bool val = CAST(bool, args[0]); @@ -564,50 +565,47 @@ void init_builtins(VM* _vm) { } #ifdef _WIN32 -#define __EXPORT __declspec(dllexport) +#define __EXPORT __declspec(dllexport) inline #elif __APPLE__ -#define __EXPORT __attribute__((visibility("default"))) __attribute__((used)) +#define __EXPORT __attribute__((visibility("default"))) __attribute__((used)) inline #elif __EMSCRIPTEN__ #include -#define __EXPORT EMSCRIPTEN_KEEPALIVE +#define __EXPORT EMSCRIPTEN_KEEPALIVE inline #else -#define __EXPORT +#define __EXPORT inline #endif -void add_module_time(VM* vm){ - PyVar mod = vm->new_module("time"); +inline void add_module_time(VM* vm){ + PyObject* mod = vm->new_module("time"); vm->bind_func<0>(mod, "time", [](VM* vm, Args& args) { auto now = std::chrono::high_resolution_clock::now(); return VAR(std::chrono::duration_cast(now.time_since_epoch()).count() / 1000000.0); }); } -void add_module_sys(VM* vm){ - PyVar mod = vm->new_module("sys"); +inline void add_module_sys(VM* vm){ + PyObject* mod = vm->new_module("sys"); vm->setattr(mod, "version", VAR(PK_VERSION)); - - vm->bind_func<1>(mod, "getrefcount", CPP_LAMBDA(VAR(args[0].use_count()))); vm->bind_func<0>(mod, "getrecursionlimit", CPP_LAMBDA(VAR(vm->recursionlimit))); - vm->bind_func<1>(mod, "setrecursionlimit", [](VM* vm, Args& args) { vm->recursionlimit = CAST(int, args[0]); return vm->None; }); } -void add_module_json(VM* vm){ - PyVar mod = vm->new_module("json"); +inline void add_module_json(VM* vm){ + PyObject* mod = vm->new_module("json"); vm->bind_func<1>(mod, "loads", [](VM* vm, Args& args) { const Str& expr = CAST(Str&, args[0]); CodeObject_ code = vm->compile(expr, "", JSON_MODE); return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals); }); - vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__))); + vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->fast_call(__json__, Args{args[0]}))); } -void add_module_math(VM* vm){ - PyVar mod = vm->new_module("math"); +inline void add_module_math(VM* vm){ + PyObject* mod = vm->new_module("math"); vm->setattr(mod, "pi", VAR(3.1415926535897932384)); vm->setattr(mod, "e" , VAR(2.7182818284590452354)); @@ -625,12 +623,12 @@ void add_module_math(VM* vm){ vm->bind_func<1>(mod, "sqrt", CPP_LAMBDA(VAR(std::sqrt(vm->num_to_float(args[0]))))); } -void add_module_dis(VM* vm){ - PyVar mod = vm->new_module("dis"); +inline void add_module_dis(VM* vm){ + PyObject* mod = vm->new_module("dis"); vm->bind_func<1>(mod, "dis", [](VM* vm, Args& args) { - PyVar f = args[0]; + PyObject* f = args[0]; if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, args[0]).method; - CodeObject_ code = CAST(Function, f).code; + CodeObject_ code = CAST(Function&, f).decl->code; (*vm->_stdout) << vm->disassemble(code); return vm->None; }); @@ -641,17 +639,17 @@ struct ReMatch { i64 start; i64 end; - std::smatch m; - ReMatch(i64 start, i64 end, std::smatch m) : start(start), end(end), m(m) {} + std::cmatch m; + ReMatch(i64 start, i64 end, std::cmatch m) : start(start), end(end), m(m) {} - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_method<-1>(type, "__init__", CPP_NOT_IMPLEMENTED()); vm->bind_method<0>(type, "start", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).start))); vm->bind_method<0>(type, "end", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).end))); vm->bind_method<0>(type, "span", [](VM* vm, Args& args) { auto& self = CAST(ReMatch&, args[0]); - return VAR(two_args(VAR(self.start), VAR(self.end))); + return VAR(Tuple({VAR(self.start), VAR(self.end)})); }); vm->bind_method<1>(type, "group", [](VM* vm, Args& args) { @@ -663,20 +661,20 @@ struct ReMatch { } }; -PyVar _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){ - std::regex re(pattern); - std::smatch m; - if(std::regex_search(string, m, re)){ - if(fromStart && m.position() != 0) return vm->None; - i64 start = string._to_u8_index(m.position()); - i64 end = string._to_u8_index(m.position() + m.length()); +inline PyObject* _regex_search(const Str& pattern, const Str& string, bool from_start, VM* vm){ + std::regex re(pattern.begin(), pattern.end()); + std::cmatch m; + if(std::regex_search(string.begin(), string.end(), m, re)){ + if(from_start && m.position() != 0) return vm->None; + i64 start = string._byte_index_to_unicode(m.position()); + i64 end = string._byte_index_to_unicode(m.position() + m.length()); return VAR_T(ReMatch, start, end, m); } return vm->None; }; -void add_module_re(VM* vm){ - PyVar mod = vm->new_module("re"); +inline void add_module_re(VM* vm){ + PyObject* mod = vm->new_module("re"); ReMatch::register_class(vm, mod); vm->bind_func<2>(mod, "match", [](VM* vm, Args& args) { @@ -695,16 +693,16 @@ void add_module_re(VM* vm){ const Str& pattern = CAST(Str&, args[0]); const Str& repl = CAST(Str&, args[1]); const Str& string = CAST(Str&, args[2]); - std::regex re(pattern); - return VAR(std::regex_replace(string, re, repl)); + std::regex re(pattern.begin(), pattern.end()); + return VAR(std::regex_replace(string.str(), re, repl.str())); }); vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) { const Str& pattern = CAST(Str&, args[0]); const Str& string = CAST(Str&, args[1]); - std::regex re(pattern); - std::sregex_token_iterator it(string.begin(), string.end(), re, -1); - std::sregex_token_iterator end; + std::regex re(pattern.begin(), pattern.end()); + std::cregex_token_iterator it(string.begin(), string.end(), re, -1); + std::cregex_token_iterator end; List vec; for(; it != end; ++it){ vec.push_back(VAR(it->str())); @@ -740,7 +738,7 @@ struct Random{ gen.seed(seed); } - static void _register(VM* vm, PyVar mod, PyVar type){ + static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<0>(type, "__new__", CPP_LAMBDA(VAR_T(Random))); vm->bind_method<1>(type, "seed", native_proxy_callable(&Random::seed)); vm->bind_method<2>(type, "randint", native_proxy_callable(&Random::randint)); @@ -749,15 +747,21 @@ struct Random{ } }; -void add_module_random(VM* vm){ - PyVar mod = vm->new_module("random"); +inline void add_module_random(VM* vm){ + PyObject* mod = vm->new_module("random"); Random::register_class(vm, mod); CodeObject_ code = vm->compile(kPythonLibs["random"], "random.py", EXEC_MODE); vm->_exec(code, mod); } -void VM::post_init(){ +inline void add_module_gc(VM* vm){ + PyObject* mod = vm->new_module("gc"); + vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect()))); +} + +inline void VM::post_init(){ init_builtins(this); +#if !DEBUG_NO_BUILTIN_MODULES add_module_sys(this); add_module_time(this); add_module_json(this); @@ -767,7 +771,8 @@ void VM::post_init(){ add_module_random(this); add_module_io(this); add_module_os(this); - add_module_c(this); + // add_module_c(this); + add_module_gc(this); for(const char* name: {"this", "functools", "collections", "heapq", "bisect"}){ _lazy_modules[name] = kPythonLibs[name]; @@ -775,21 +780,22 @@ void VM::post_init(){ CodeObject_ code = compile(kPythonLibs["builtins"], "", EXEC_MODE); this->_exec(code, this->builtins); - code = compile(kPythonLibs["dict"], "", EXEC_MODE); + code = compile(kPythonLibs["_dict"], "", EXEC_MODE); this->_exec(code, this->builtins); - code = compile(kPythonLibs["set"], "", EXEC_MODE); + code = compile(kPythonLibs["_set"], "", EXEC_MODE); this->_exec(code, this->builtins); // property is defined in builtins.py so we need to add it after builtins is loaded _t(tp_object)->attr().set(__class__, property(CPP_LAMBDA(vm->_t(args[0])))); _t(tp_type)->attr().set(__base__, property([](VM* vm, Args& args){ - const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0]).index]; - return info.base.index == -1 ? vm->None : vm->_all_types[info.base.index].obj; + const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])]; + return info.base.index == -1 ? vm->None : vm->_all_types[info.base].obj; })); _t(tp_type)->attr().set(__name__, property([](VM* vm, Args& args){ - const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0]).index]; + const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])]; return VAR(info.name); })); +#endif } } // namespace pkpy @@ -851,11 +857,11 @@ extern "C" { /// Return `__repr__` of the result. /// If the variable is not found, return `nullptr`. char* pkpy_vm_get_global(pkpy::VM* vm, const char* name){ - pkpy::PyVar* val = vm->_main->attr().try_get(name); + pkpy::PyObject* val = vm->_main->attr().try_get(name); if(val == nullptr) return nullptr; try{ - pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(*val)); - return strdup(repr.c_str()); + pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(val)); + return repr.c_str_dup(); }catch(...){ return nullptr; } @@ -867,11 +873,11 @@ extern "C" { /// Return `__repr__` of the result. /// If there is any error, return `nullptr`. char* pkpy_vm_eval(pkpy::VM* vm, const char* source){ - pkpy::PyVarOrNull ret = vm->exec(source, "", pkpy::EVAL_MODE); + pkpy::PyObject* ret = vm->exec(source, "", pkpy::EVAL_MODE); if(ret == nullptr) return nullptr; try{ - pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(ret)); - return strdup(repr.c_str()); + pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(ret)); + return repr.c_str_dup(); }catch(...){ return nullptr; } @@ -908,12 +914,12 @@ extern "C" { /// /// Return a json representing the result. char* pkpy_vm_read_output(pkpy::VM* vm){ - if(vm->use_stdio) return nullptr; - pkpy::StrStream* s_out = (pkpy::StrStream*)(vm->_stdout); - pkpy::StrStream* s_err = (pkpy::StrStream*)(vm->_stderr); + if(vm->is_stdio_used()) return nullptr; + std::stringstream* s_out = (std::stringstream*)(vm->_stdout); + std::stringstream* s_err = (std::stringstream*)(vm->_stderr); pkpy::Str _stdout = s_out->str(); pkpy::Str _stderr = s_err->str(); - pkpy::StrStream ss; + std::stringstream ss; ss << '{' << "\"stdout\": " << _stdout.escape(false); ss << ", " << "\"stderr\": " << _stderr.escape(false) << '}'; s_out->str(""); s_err->str(""); @@ -950,13 +956,13 @@ extern "C" { for(int i=0; mod[i]; i++) if(mod[i] == ' ') return nullptr; for(int i=0; name[i]; i++) if(name[i] == ' ') return nullptr; std::string f_header = std::string(mod) + '.' + name + '#' + std::to_string(kGlobalBindId++); - pkpy::PyVar obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod); + pkpy::PyObject* obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod); vm->bind_func<-1>(obj, name, [ret_code, f_header](pkpy::VM* vm, const pkpy::Args& args){ - pkpy::StrStream ss; + std::stringstream ss; ss << f_header; for(int i=0; icall(args[i], pkpy::__json__); + pkpy::PyObject* x = vm->fast_call(pkpy::__json__, pkpy::Args{args[i]}); ss << pkpy::CAST(pkpy::Str&, x); } char* packet = strdup(ss.str().c_str()); diff --git a/src/ref.h b/src/ref.h deleted file mode 100644 index 9719d218..00000000 --- a/src/ref.h +++ /dev/null @@ -1,171 +0,0 @@ -#pragma once - -#include "obj.h" -#include "vm.h" - -namespace pkpy { - -struct BaseRef { - virtual PyVar get(VM*, Frame*) const = 0; - virtual void set(VM*, Frame*, PyVar) const = 0; - virtual void del(VM*, Frame*) const = 0; - virtual ~BaseRef() = default; -}; - -struct NameRef : BaseRef { - const std::pair pair; - inline StrName name() const { return pair.first; } - inline NameScope scope() const { return pair.second; } - NameRef(const std::pair& pair) : pair(pair) {} - - PyVar get(VM* vm, Frame* frame) const{ - PyVar* val; - val = frame->f_locals().try_get(name()); - if(val != nullptr) return *val; - val = frame->f_closure_try_get(name()); - if(val != nullptr) return *val; - val = frame->f_globals().try_get(name()); - if(val != nullptr) return *val; - val = vm->builtins->attr().try_get(name()); - if(val != nullptr) return *val; - vm->NameError(name()); - return nullptr; - } - - void set(VM* vm, Frame* frame, PyVar val) const{ - switch(scope()) { - case NAME_LOCAL: frame->f_locals().set(name(), std::move(val)); break; - case NAME_GLOBAL: - if(frame->f_locals().try_set(name(), std::move(val))) return; - frame->f_globals().set(name(), std::move(val)); - break; - default: UNREACHABLE(); - } - } - - void del(VM* vm, Frame* frame) const{ - switch(scope()) { - case NAME_LOCAL: { - if(frame->f_locals().contains(name())){ - frame->f_locals().erase(name()); - }else{ - vm->NameError(name()); - } - } break; - case NAME_GLOBAL: - { - if(frame->f_locals().contains(name())){ - frame->f_locals().erase(name()); - }else{ - if(frame->f_globals().contains(name())){ - frame->f_globals().erase(name()); - }else{ - vm->NameError(name()); - } - } - } break; - default: UNREACHABLE(); - } - } -}; - -struct AttrRef : BaseRef { - mutable PyVar obj; - NameRef attr; - AttrRef(PyVar obj, NameRef attr) : obj(obj), attr(attr) {} - - PyVar get(VM* vm, Frame* frame) const{ - return vm->getattr(obj, attr.name()); - } - - void set(VM* vm, Frame* frame, PyVar val) const{ - vm->setattr(obj, attr.name(), std::move(val)); - } - - void del(VM* vm, Frame* frame) const{ - if(!obj->is_attr_valid()) vm->TypeError("cannot delete attribute"); - if(!obj->attr().contains(attr.name())) vm->AttributeError(obj, attr.name()); - obj->attr().erase(attr.name()); - } -}; - -struct IndexRef : BaseRef { - mutable PyVar obj; - PyVar index; - IndexRef(PyVar obj, PyVar index) : obj(obj), index(index) {} - - PyVar get(VM* vm, Frame* frame) const{ - return vm->fast_call(__getitem__, two_args(obj, index)); - } - - void set(VM* vm, Frame* frame, PyVar val) const{ - Args args(3); - args[0] = obj; args[1] = index; args[2] = std::move(val); - vm->fast_call(__setitem__, std::move(args)); - } - - void del(VM* vm, Frame* frame) const{ - vm->fast_call(__delitem__, two_args(obj, index)); - } -}; - -struct TupleRef : BaseRef { - Tuple objs; - TupleRef(Tuple&& objs) : objs(std::move(objs)) {} - - PyVar get(VM* vm, Frame* frame) const{ - Tuple args(objs.size()); - for (int i = 0; i < objs.size(); i++) { - args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame); - } - return VAR(std::move(args)); - } - - void set(VM* vm, Frame* frame, PyVar val) const{ - val = vm->asIter(val); - BaseIter* iter = vm->PyIter_AS_C(val); - for(int i=0; itp_star_wrapper)){ - auto& star = _CAST(StarWrapper&, objs[i]); - if(star.rvalue) vm->ValueError("can't use starred expression here"); - if(i != objs.size()-1) vm->ValueError("* can only be used at the end"); - auto ref = vm->PyRef_AS_C(star.obj); - List list; - while((x = iter->next()) != nullptr) list.push_back(x); - ref->set(vm, frame, VAR(std::move(list))); - return; - }else{ - x = iter->next(); - if(x == nullptr) vm->ValueError("not enough values to unpack"); - vm->PyRef_AS_C(objs[i])->set(vm, frame, x); - } - } - PyVarOrNull x = iter->next(); - if(x != nullptr) vm->ValueError("too many values to unpack"); - } - - void del(VM* vm, Frame* frame) const{ - for(int i=0; iPyRef_AS_C(objs[i])->del(vm, frame); - } -}; - - -template -PyVarRef VM::PyRef(P&& value) { - static_assert(std::is_base_of_v>); - return new_object(tp_ref, std::forward

(value)); -} - -const BaseRef* VM::PyRef_AS_C(const PyVar& obj) -{ - if(!is_type(obj, tp_ref)) TypeError("expected an l-value"); - return static_cast(obj->value()); -} - -/***** Frame's Impl *****/ -inline void Frame::try_deref(VM* vm, PyVar& v){ - if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this); -} - -} // namespace pkpy \ No newline at end of file diff --git a/src/str.h b/src/str.h index 7c7ccd88..e458dad1 100644 --- a/src/str.h +++ b/src/str.h @@ -1,67 +1,187 @@ #pragma once #include "common.h" +#include "memory.h" +#include "vector.h" namespace pkpy { -typedef std::stringstream StrStream; +// TODO: check error if return 0 +inline int utf8len(unsigned char c, bool suppress=false){ + if((c & 0b10000000) == 0) return 1; + if((c & 0b11100000) == 0b11000000) return 2; + if((c & 0b11110000) == 0b11100000) return 3; + if((c & 0b11111000) == 0b11110000) return 4; + if((c & 0b11111100) == 0b11111000) return 5; + if((c & 0b11111110) == 0b11111100) return 6; + if(!suppress) throw std::runtime_error("invalid utf8 char: " + std::to_string(c)); + return 0; +} -class Str : public std::string { - mutable std::vector* _u8_index = nullptr; +struct Str{ + int size; + bool is_ascii; + char* data; - void utf8_lazy_init() const{ - if(_u8_index != nullptr) return; - _u8_index = new std::vector(); - _u8_index->reserve(size()); - if(size() > 65535) throw std::runtime_error("str has more than 65535 bytes."); - for(uint16_t i = 0; i < size(); i++){ - // https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80 - if((at(i) & 0xC0) != 0x80) _u8_index->push_back(i); + Str(): size(0), is_ascii(true), data(nullptr) {} + + Str(int size, bool is_ascii): size(size), is_ascii(is_ascii) { + data = (char*)pool64.alloc(size); + } + +#define STR_INIT() \ + data = (char*)pool64.alloc(size); \ + for(int i=0; i(*s._u8_index); - } - } - Str(Str&& s) : std::string(std::move(s)) { - delete _u8_index; - _u8_index = s._u8_index; - s._u8_index = nullptr; + Str(const std::string& s): size(s.size()), is_ascii(true) { + STR_INIT() } - i64 _to_u8_index(i64 index) const{ - utf8_lazy_init(); - auto p = std::lower_bound(_u8_index->begin(), _u8_index->end(), index); - if(p != _u8_index->end() && *p != index) UNREACHABLE(); - return p - _u8_index->begin(); + Str(std::string_view s): size(s.size()), is_ascii(true) { + STR_INIT() } - int u8_length() const { - utf8_lazy_init(); - return _u8_index->size(); + Str(const char* s): size(strlen(s)), is_ascii(true) { + STR_INIT() } - Str u8_getitem(int i) const{ - return u8_substr(i, i+1); + Str(const char* s, int len): size(len), is_ascii(true) { + STR_INIT() } - Str u8_substr(int start, int end) const{ - utf8_lazy_init(); - if(start >= end) return Str(); - int c_end = end >= _u8_index->size() ? size() : _u8_index->at(end); - return substr(_u8_index->at(start), c_end - _u8_index->at(start)); +#undef STR_INIT + + Str(const Str& other): size(other.size), is_ascii(other.is_ascii) { + data = (char*)pool64.alloc(size); + memcpy(data, other.data, size); + } + + Str(Str&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) { + other.data = nullptr; + other.size = 0; + } + + const char* begin() const { return data; } + const char* end() const { return data + size; } + char operator[](int idx) const { return data[idx]; } + int length() const { return size; } + bool empty() const { return size == 0; } + size_t hash() const{ return std::hash()(sv()); } + + Str& operator=(const Str& other){ + if(data!=nullptr) pool64.dealloc(data); + size = other.size; + is_ascii = other.is_ascii; + data = (char*)pool64.alloc(size); + memcpy(data, other.data, size); + return *this; + } + + Str& operator=(Str&& other) noexcept{ + if(data!=nullptr) pool64.dealloc(data); + size = other.size; + is_ascii = other.is_ascii; + data = other.data; + other.data = nullptr; + return *this; + } + + ~Str(){ + if(data!=nullptr) pool64.dealloc(data); + } + + Str operator+(const Str& other) const { + Str ret(size + other.size, is_ascii && other.is_ascii); + memcpy(ret.data, data, size); + memcpy(ret.data + size, other.data, other.size); + return ret; + } + + Str operator+(const char* p) const { + Str other(p); + return *this + other; + } + + friend Str operator+(const char* p, const Str& str){ + Str other(p); + return other + str; + } + + friend std::ostream& operator<<(std::ostream& os, const Str& str){ + if(str.data!=nullptr) os.write(str.data, str.size); + return os; + } + + bool operator==(const Str& other) const { + if(size != other.size) return false; + return memcmp(data, other.data, size) == 0; + } + + bool operator!=(const Str& other) const { + if(size != other.size) return true; + return memcmp(data, other.data, size) != 0; + } + + bool operator<(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret < 0; + return size < other.size; + } + + bool operator<(const std::string_view& other) const { + int ret = strncmp(data, other.data(), std::min(size, (int)other.size())); + if(ret != 0) return ret < 0; + return size < (int)other.size(); + } + + friend bool operator<(const std::string_view& other, const Str& str){ + return str > other; + } + + bool operator>(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret > 0; + return size > other.size; + } + + bool operator<=(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret < 0; + return size <= other.size; + } + + bool operator>=(const Str& other) const { + int ret = strncmp(data, other.data, std::min(size, other.size)); + if(ret != 0) return ret > 0; + return size >= other.size; + } + + Str substr(int start, int len) const { + Str ret(len, is_ascii); + memcpy(ret.data, data + start, len); + return ret; + } + + char* c_str_dup() const { + char* p = (char*)malloc(size + 1); + memcpy(p, data, size); + p[size] = 0; + return p; + } + + std::string_view sv() const { + return std::string_view(data, size); + } + + std::string str() const { + return std::string(data, size); } Str lstrip() const { - Str copy(*this); + std::string copy(data, size); copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { // std::isspace(c) does not working on windows (Debug) return c != ' ' && c != '\t' && c != '\r' && c != '\n'; @@ -69,12 +189,8 @@ public: return Str(copy); } - size_t hash() const { - return std::hash()(*this); - } - - Str escape(bool single_quote) const { - StrStream ss; + Str escape(bool single_quote=true) const { + std::stringstream ss; ss << (single_quote ? '\'' : '"'); for (int i=0; ioperator[](i); @@ -104,30 +220,78 @@ public: return ss.str(); } - Str& operator=(const Str& s){ - this->std::string::operator=(s); - delete _u8_index; - if(s._u8_index != nullptr){ - _u8_index = new std::vector(*s._u8_index); + int index(const Str& sub, int start=0) const { + auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size); + if(p == data + size) return -1; + return p - data; + } + + Str replace(const Str& old, const Str& new_) const { + std::stringstream ss; + int start = 0; + while(true){ + int i = index(old, start); + if(i == -1){ + ss << substr(start, size - start); + break; + } + ss << substr(start, i - start); + ss << new_; + start = i + old.size; } - return *this; + return ss.str(); } - Str& operator=(Str&& s){ - this->std::string::operator=(std::move(s)); - delete _u8_index; - this->_u8_index = s._u8_index; - s._u8_index = nullptr; - return *this; + /*************unicode*************/ + + // TODO: check error + int _unicode_index_to_byte(int i) const{ + if(is_ascii) return i; + int j = 0; + while(i > 0){ + j += utf8len(data[j]); + i--; + } + return j; } - ~Str(){ delete _u8_index;} + int _byte_index_to_unicode(int n) const{ + if(is_ascii) return n; + int cnt = 0; + for(int i=0; i +inline std::string fmt(Args&&... args) { + std::stringstream ss; + (ss << ... << args); + return ss.str(); +} + const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101}; -bool is_unicode_Lo_char(uint32_t c) { +inline bool is_unicode_Lo_char(uint32_t c) { auto index = std::lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA; if(c == kLoRangeA[index]) return true; index -= 1; @@ -142,15 +306,19 @@ struct StrName { StrName(uint16_t index): index(index) {} StrName(const char* s): index(get(s).index) {} StrName(const Str& s){ - if(s._cached_sn_index != 0){ - index = s._cached_sn_index; - } else { - index = get(s).index; - } + index = get(s.sv()).index; } - const Str& str() const { return _r_interned[index-1]; } + std::string_view sv() const { return _r_interned[index-1].sv(); } bool empty() const { return index == 0; } + friend std::ostream& operator<<(std::ostream& os, const StrName& sn){ + return os << sn.sv(); + } + + Str escape() const { + return _r_interned[index-1].escape(); + } + bool operator==(const StrName& other) const noexcept { return this->index == other.index; } @@ -170,11 +338,7 @@ struct StrName { static std::map> _interned; static std::vector _r_interned; - static StrName get(const Str& s){ - return get(s.c_str()); - } - - static StrName get(const char* s){ + static StrName get(std::string_view s){ auto it = _interned.find(s); if(it != _interned.end()) return StrName(it->second); uint16_t index = (uint16_t)(_r_interned.size() + 1); @@ -184,8 +348,33 @@ struct StrName { } }; -std::map> StrName::_interned; -std::vector StrName::_r_interned; +struct FastStrStream{ + pod_vector parts; + + FastStrStream& operator<<(const Str& s){ + parts.push_back(&s); + return *this; + } + + Str str() const{ + int len = 0; + bool is_ascii = true; + for(auto& s: parts){ + len += s->length(); + is_ascii &= s->is_ascii; + } + Str result(len, is_ascii); + char* p = result.data; + for(auto& s: parts){ + memcpy(p, s->data, s->length()); + p += s->length(); + } + return result; + } +}; + +inline std::map> StrName::_interned; +inline std::vector StrName::_r_interned; const StrName __class__ = StrName::get("__class__"); const StrName __base__ = StrName::get("__base__"); @@ -209,10 +398,13 @@ const StrName __call__ = StrName::get("__call__"); const StrName m_eval = StrName::get("eval"); const StrName m_self = StrName::get("self"); +const StrName m_dict = StrName::get("dict"); +const StrName m_set = StrName::get("set"); +const StrName m_add = StrName::get("add"); const StrName __enter__ = StrName::get("__enter__"); const StrName __exit__ = StrName::get("__exit__"); -const StrName CMP_SPECIAL_METHODS[] = { +const StrName COMPARE_SPECIAL_METHODS[] = { StrName::get("__lt__"), StrName::get("__le__"), StrName::get("__eq__"), StrName::get("__ne__"), StrName::get("__gt__"), StrName::get("__ge__") }; diff --git a/src/tuplelist.h b/src/tuplelist.h index 5594a563..0abc993f 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -3,108 +3,84 @@ #include "common.h" #include "memory.h" #include "str.h" +#include "vector.h" namespace pkpy { - using List = std::vector; - class Args { - static THREAD_LOCAL SmallArrayPool _pool; +using List = pod_vector; - PyVar* _args; - int _size; +class Args { + PyObject** _args; + int _size; - inline void _alloc(int n){ - this->_args = _pool.alloc(n); - this->_size = n; - } + void _alloc(int n){ + this->_args = (n==0) ? nullptr : (PyObject**)pool64.alloc(n * sizeof(void*)); + this->_size = n; + } - public: - Args(int n){ _alloc(n); } +public: + Args(int n){ _alloc(n); } - Args(const Args& other){ - _alloc(other._size); - for(int i=0; i<_size; i++) _args[i] = other._args[i]; - } + Args(const Args& other){ + _alloc(other._size); + for(int i=0; i<_size; i++) _args[i] = other._args[i]; + } - Args(Args&& other) noexcept { - this->_args = other._args; - this->_size = other._size; - other._args = nullptr; - other._size = 0; - } + Args(Args&& other) noexcept { + this->_args = other._args; + this->_size = other._size; + other._args = nullptr; + other._size = 0; + } - static pkpy::Args from_list(List&& other) noexcept { - Args ret(other.size()); - memcpy((void*)ret._args, (void*)other.data(), sizeof(PyVar)*ret.size()); - memset((void*)other.data(), 0, sizeof(PyVar)*ret.size()); - other.clear(); - return ret; - } + Args(std::initializer_list list) : Args(list.size()){ + int i = 0; + for(PyObject* p : list) _args[i++] = p; + } - PyVar& operator[](int i){ return _args[i]; } - const PyVar& operator[](int i) const { return _args[i]; } + Args(List&& other) noexcept : Args(other.size()){ + for(int i=0; i<_size; i++) _args[i] = other[i]; + other.clear(); + } - Args& operator=(Args&& other) noexcept { - _pool.dealloc(_args, _size); - this->_args = other._args; - this->_size = other._size; - other._args = nullptr; - other._size = 0; - return *this; - } + PyObject*& operator[](int i){ return _args[i]; } + PyObject* operator[](int i) const { return _args[i]; } - inline int size() const { return _size; } + Args& operator=(Args&& other) noexcept { + if(_args!=nullptr) pool64.dealloc(_args); + this->_args = other._args; + this->_size = other._size; + other._args = nullptr; + other._size = 0; + return *this; + } - List move_to_list() noexcept { - List ret(_size); - memcpy((void*)ret.data(), (void*)_args, sizeof(PyVar)*_size); - memset((void*)_args, 0, sizeof(PyVar)*_size); - return ret; - } + int size() const { return _size; } - void extend_self(const PyVar& self){ - static_assert(std::is_standard_layout_v); - PyVar* old_args = _args; - int old_size = _size; - _alloc(old_size+1); - _args[0] = self; - if(old_size == 0) return; + List to_list() noexcept { + List ret(_size); + // TODO: use move/memcpy + for(int i=0; i<_size; i++) ret[i] = _args[i]; + return ret; + } - memcpy((void*)(_args+1), (void*)old_args, sizeof(PyVar)*old_size); - memset((void*)old_args, 0, sizeof(PyVar)*old_size); - _pool.dealloc(old_args, old_size); - } + void extend_self(PyObject* self){ + PyObject** old_args = _args; + int old_size = _size; + _alloc(old_size+1); + _args[0] = self; + for(int i=0; i - Args one_arg(T&& a) { - Args ret(1); - ret[0] = std::forward(a); - return ret; - } +typedef Args Tuple; - template - Args two_args(T1&& a, T2&& b) { - Args ret(2); - ret[0] = std::forward(a); - ret[1] = std::forward(b); - return ret; - } - - template - Args three_args(T1&& a, T2&& b, T3&& c) { - Args ret(3); - ret[0] = std::forward(a); - ret[1] = std::forward(b); - ret[2] = std::forward(c); - return ret; - } - - typedef Args Tuple; - THREAD_LOCAL SmallArrayPool Args::_pool; } // namespace pkpy \ No newline at end of file diff --git a/src/vector.h b/src/vector.h new file mode 100644 index 00000000..8caeb362 --- /dev/null +++ b/src/vector.h @@ -0,0 +1,126 @@ +#pragma once + +#include "common.h" +#include "memory.h" + +namespace pkpy{ + +template +struct pod_vector{ + static_assert(128 % sizeof(T) == 0); + static_assert(std::is_pod_v); + static constexpr int N = 128 / sizeof(T); + static_assert(N > 4); + int _size; + int _capacity; + T* _data; + + pod_vector(): _size(0), _capacity(N) { + _data = (T*)pool128.alloc(_capacity * sizeof(T)); + } + + pod_vector(int size): _size(size), _capacity(std::max(N, size)) { + _data = (T*)pool128.alloc(_capacity * sizeof(T)); + } + + pod_vector(const pod_vector& other): _size(other._size), _capacity(other._capacity) { + _data = (T*)pool128.alloc(_capacity * sizeof(T)); + memcpy(_data, other._data, sizeof(T) * _size); + } + + pod_vector(pod_vector&& other) noexcept { + _size = other._size; + _capacity = other._capacity; + _data = other._data; + other._data = nullptr; + } + + pod_vector& operator=(pod_vector&& other) noexcept { + if(_data!=nullptr) pool128.dealloc(_data); + _size = other._size; + _capacity = other._capacity; + _data = other._data; + other._data = nullptr; + return *this; + } + + // remove copy assignment + pod_vector& operator=(const pod_vector& other) = delete; + + template + void push_back(__ValueT&& t) { + if (_size == _capacity) reserve(_capacity*2); + _data[_size++] = std::forward<__ValueT>(t); + } + + void reserve(int cap){ + if(cap < _capacity) return; + _capacity = cap; + T* old_data = _data; + _data = (T*)pool128.alloc(_capacity * sizeof(T)); + if(old_data!=nullptr){ + memcpy(_data, old_data, sizeof(T) * _size); + pool128.dealloc(old_data); + } + } + + void pop_back() { _size--; } + void extend(const pod_vector& other){ + for(int i=0; i + void insert(int i, __ValueT&& val){ + if (_size == _capacity) reserve(_capacity*2); + for(int j=_size; j>i; j--) _data[j] = _data[j-1]; + _data[i] = std::forward<__ValueT>(val); + _size++; + } + + void erase(int i){ + for(int j=i; j<_size-1; j++) _data[j] = _data[j+1]; + _size--; + } + + ~pod_vector() { + if(_data!=nullptr) pool128.dealloc(_data); + } +}; + + +template > +class stack{ + Container vec; +public: + void push(const T& t){ vec.push_back(t); } + void push(T&& t){ vec.push_back(std::move(t)); } + void pop(){ vec.pop_back(); } + void clear(){ vec.clear(); } + bool empty() const { return vec.empty(); } + size_t size() const { return vec.size(); } + T& top(){ return vec.back(); } + const T& top() const { return vec.back(); } + T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; } + const Container& data() const { return vec; } +}; + +template +using pod_stack = stack>; +} // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index 22523e45..f4eb40e0 100644 --- a/src/vm.h +++ b/src/vm.h @@ -1,40 +1,50 @@ #pragma once +#include "common.h" #include "frame.h" #include "error.h" +#include "gc.h" +#include "memory.h" +#include "obj.h" +#include "str.h" +#include namespace pkpy{ +Str _read_file_cwd(const Str& name, bool* ok); + #define DEF_NATIVE_2(ctype, ptype) \ - template<> ctype py_cast(VM* vm, const PyVar& obj) { \ + template<> inline ctype py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype _py_cast(VM* vm, const PyVar& obj) { \ + template<> inline ctype _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype& py_cast(VM* vm, const PyVar& obj) { \ + template<> inline ctype& py_cast(VM* vm, PyObject* obj) { \ vm->check_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ - template<> ctype& _py_cast(VM* vm, const PyVar& obj) { \ + template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ - PyVar py_var(VM* vm, const ctype& value) { return vm->new_object(vm->ptype, value);} \ - PyVar py_var(VM* vm, ctype&& value) { return vm->new_object(vm->ptype, std::move(value));} + inline PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \ + inline PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));} + class Generator: public BaseIter { - std::unique_ptr frame; + Frame_ frame; int state; // 0,1,2 public: - Generator(VM* vm, std::unique_ptr&& frame) - : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {} + Generator(VM* vm, Frame_&& frame) + : BaseIter(vm), frame(std::move(frame)), state(0) {} - PyVar next(); + PyObject* next() override; + void _gc_mark() const override; }; struct PyTypeInfo{ - PyVar obj; + PyObject* obj; Type base; Str name; }; @@ -42,161 +52,161 @@ struct PyTypeInfo{ class VM { VM* vm; // self reference for simplify code public: - std::stack< std::unique_ptr > callstack; - PyVar _py_op_call; - PyVar _py_op_yield; + ManagedHeap heap; + stack< Frame_ > callstack; std::vector _all_types; - PyVar run_frame(Frame* frame); + PyObject* run_frame(Frame* frame); - NameDict _modules; // loaded modules - std::map _lazy_modules; // lazy loaded modules - PyVar None, True, False, Ellipsis; + NameDict _modules; // loaded modules + std::map _lazy_modules; // lazy loaded modules - bool use_stdio; + PyObject* _py_op_call; + PyObject* _py_op_yield; + PyObject* _py_null; + PyObject* None; + PyObject* True; + PyObject* False; + PyObject* Ellipsis; + PyObject* builtins; // builtins module + PyObject* _main; // __main__ module + + std::stringstream _stdout_buffer; + std::stringstream _stderr_buffer; std::ostream* _stdout; std::ostream* _stderr; - - PyVar builtins; // builtins module - PyVar _main; // __main__ module - int recursionlimit = 1000; - VM(bool use_stdio){ + // for quick access + Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; + Type tp_list, tp_tuple; + Type tp_function, tp_native_function, tp_iterator, tp_bound_method; + Type tp_slice, tp_range, tp_module; + Type tp_super, tp_exception, tp_star_wrapper; + + VM(bool use_stdio) : heap(this){ this->vm = this; - this->use_stdio = use_stdio; - if(use_stdio){ - this->_stdout = &std::cout; - this->_stderr = &std::cerr; - }else{ - this->_stdout = new StrStream(); - this->_stderr = new StrStream(); - } - + this->_stdout = use_stdio ? &std::cout : &_stdout_buffer; + this->_stderr = use_stdio ? &std::cerr : &_stderr_buffer; init_builtin_types(); - // for(int i=0; i<128; i++) _ascii_str_pool[i] = new_object(tp_str, std::string(1, (char)i)); } - PyVar asStr(const PyVar& obj){ - PyVarOrNull f = getattr(obj, __str__, false, true); - if(f != nullptr) return call(f); - return asRepr(obj); - } + bool is_stdio_used() const { return _stdout == &std::cout; } - inline Frame* top_frame() const { -#if PK_EXTRA_CHECK + Frame* top_frame() const { +#if DEBUG_EXTRA_CHECK if(callstack.empty()) UNREACHABLE(); #endif return callstack.top().get(); } - PyVar asIter(const PyVar& obj){ - if(is_type(obj, tp_native_iterator)) return obj; - PyVarOrNull iter_f = getattr(obj, __iter__, false, true); - if(iter_f != nullptr) return call(iter_f); - TypeError(OBJ_NAME(_t(obj)).escape(true) + " object is not iterable"); + PyObject* asStr(PyObject* obj){ + PyObject* self; + PyObject* f = get_unbound_method(obj, __str__, &self, false); + if(self != _py_null) return call(f, Args{self}); + return asRepr(obj); + } + + PyObject* asIter(PyObject* obj){ + if(is_type(obj, tp_iterator)) return obj; + PyObject* self; + PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false); + if(self != _py_null) return call(iter_f, Args{self}); + TypeError(OBJ_NAME(_t(obj)).escape() + " object is not iterable"); return nullptr; } - PyVar asList(const PyVar& iterable){ + PyObject* asList(PyObject* iterable){ if(is_type(iterable, tp_list)) return iterable; - return call(_t(tp_list), one_arg(iterable)); + return call(_t(tp_list), Args{iterable}); } - PyVar* find_name_in_mro(PyObject* cls, StrName name){ - PyVar* val; + PyObject* find_name_in_mro(PyObject* cls, StrName name){ + PyObject* val; do{ val = cls->attr().try_get(name); if(val != nullptr) return val; - Type cls_t = static_cast*>(cls)->_value; - Type base = _all_types[cls_t.index].base; + Type cls_t = OBJ_GET(Type, cls); + Type base = _all_types[cls_t].base; if(base.index == -1) break; - cls = _all_types[base.index].obj.get(); + cls = _all_types[base].obj; }while(true); return nullptr; } - bool isinstance(const PyVar& obj, Type cls_t){ + bool isinstance(PyObject* obj, Type cls_t){ Type obj_t = OBJ_GET(Type, _t(obj)); do{ if(obj_t == cls_t) return true; - Type base = _all_types[obj_t.index].base; + Type base = _all_types[obj_t].base; if(base.index == -1) break; obj_t = base; }while(true); return false; } - PyVar fast_call(StrName name, Args&& args){ - PyVar* val = find_name_in_mro(_t(args[0]).get(), name); - if(val != nullptr) return call(*val, std::move(args)); + PyObject* fast_call(StrName name, Args&& args){ + PyObject* val = find_name_in_mro(_t(args[0]), name); + if(val != nullptr) return call(val, std::move(args)); AttributeError(args[0], name); return nullptr; } - inline PyVar call(const PyVar& _callable){ - return call(_callable, no_arg(), no_arg(), false); - } - template - inline std::enable_if_t, Args>, PyVar> - call(const PyVar& _callable, ArgT&& args){ - return call(_callable, std::forward(args), no_arg(), false); + std::enable_if_t, Args>, PyObject*> + call(PyObject* callable, ArgT&& args){ + return call(callable, std::forward(args), no_arg(), false); } - template - inline std::enable_if_t, Args>, PyVar> - call(const PyVar& obj, const StrName name, ArgT&& args){ - return call(getattr(obj, name, true, true), std::forward(args), no_arg(), false); - } - - inline PyVar call(const PyVar& obj, StrName name){ - return call(getattr(obj, name, true, true), no_arg(), no_arg(), false); - } - - - // repl mode is only for setting `frame->id` to 0 - PyVarOrNull exec(Str source, Str filename, CompileMode mode, PyVar _module=nullptr){ + PyObject* exec(Str source, Str filename, CompileMode mode, PyObject* _module=nullptr){ if(_module == nullptr) _module = _main; try { CodeObject_ code = compile(source, filename, mode); +#if DEBUG_DIS_EXEC + if(_module == _main) std::cout << disassemble(code) << '\n'; +#endif return _exec(code, _module); }catch (const Exception& e){ *_stderr << e.summary() << '\n'; - }catch (const std::exception& e) { + + } +#if !DEBUG_FULL_EXCEPTION + catch (const std::exception& e) { *_stderr << "An std::exception occurred! It could be a bug.\n"; *_stderr << e.what() << '\n'; } +#endif callstack = {}; return nullptr; } template - inline std::unique_ptr _new_frame(Args&&... args){ + Frame_ _new_frame(Args&&... args){ if(callstack.size() > recursionlimit){ _error("RecursionError", "maximum recursion depth exceeded"); } - return std::make_unique(std::forward(args)...); + Frame* frame = new(pool128.alloc()) Frame(std::forward(args)...); + return Frame_(frame); } template - inline PyVar _exec(Args&&... args){ + PyObject* _exec(Args&&... args){ callstack.push(_new_frame(std::forward(args)...)); return _exec(); } - PyVar property(NativeFuncRaw fget){ - PyVar p = builtins->attr("property"); - PyVar method = new_object(tp_native_function, NativeFunc(fget, 1, false)); - return call(p, one_arg(method)); + PyObject* property(NativeFuncRaw fget){ + PyObject* p = builtins->attr("property"); + PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false)); + return call(p, Args{method}); } - PyVar new_type_object(PyVar mod, StrName name, Type base){ - PyVar obj = make_sp>(tp_type, _all_types.size()); + PyObject* new_type_object(PyObject* mod, StrName name, Type base){ + PyObject* obj = heap._new(tp_type, _all_types.size()); PyTypeInfo info{ - .obj = obj, - .base = base, - .name = (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.str()): name.str() + obj, + base, + (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv() }; if(mod != nullptr) mod->attr().set(name, obj); _all_types.push_back(info); @@ -204,41 +214,17 @@ public: } Type _new_type_object(StrName name, Type base=0) { - PyVar obj = new_type_object(nullptr, name, base); + PyObject* obj = new_type_object(nullptr, name, base); return OBJ_GET(Type, obj); } - template - inline PyVar new_object(const PyVar& type, const T& _value) { -#if PK_EXTRA_CHECK - if(!is_type(type, tp_type)) UNREACHABLE(); -#endif - return make_sp>>(OBJ_GET(Type, type), _value); - } - template - inline PyVar new_object(const PyVar& type, T&& _value) { -#if PK_EXTRA_CHECK - if(!is_type(type, tp_type)) UNREACHABLE(); -#endif - return make_sp>>(OBJ_GET(Type, type), std::move(_value)); - } - - template - inline PyVar new_object(Type type, const T& _value) { - return make_sp>>(type, _value); - } - template - inline PyVar new_object(Type type, T&& _value) { - return make_sp>>(type, std::move(_value)); - } - - PyVar _find_type(const Str& type){ - PyVar* obj = builtins->attr().try_get(type); - if(!obj){ + PyObject* _find_type(const Str& type){ + PyObject* obj = builtins->attr().try_get(type); + if(obj == nullptr){ for(auto& t: _all_types) if(t.name == type) return t.obj; - throw std::runtime_error("type not found: " + type); + throw std::runtime_error(fmt("type not found: ", type)); } - return *obj; + return obj; } template @@ -274,22 +260,15 @@ public: return index; } - // for quick access - Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str; - Type tp_list, tp_tuple; - Type tp_function, tp_native_function, tp_native_iterator, tp_bound_method; - Type tp_slice, tp_range, tp_module, tp_ref; - Type tp_super, tp_exception, tp_star_wrapper; - template - inline PyVar PyIter(P&& value) { + PyObject* PyIter(P&& value) { static_assert(std::is_base_of_v>); - return new_object(tp_native_iterator, std::forward

(value)); + return heap.gcnew

(tp_iterator, std::forward

(value)); } - inline BaseIter* PyIter_AS_C(const PyVar& obj) + BaseIter* PyIter_AS_C(PyObject* obj) { - check_type(obj, tp_native_iterator); + check_type(obj, tp_iterator); return static_cast(obj->value()); } @@ -304,79 +283,63 @@ public: else throw UnhandledException(); } -public: void IOError(const Str& msg) { _error("IOError", msg); } void NotImplementedError(){ _error("NotImplementedError", ""); } void TypeError(const Str& msg){ _error("TypeError", msg); } void ZeroDivisionError(){ _error("ZeroDivisionError", "division by zero"); } void IndexError(const Str& msg){ _error("IndexError", msg); } void ValueError(const Str& msg){ _error("ValueError", msg); } - void NameError(StrName name){ _error("NameError", "name " + name.str().escape(true) + " is not defined"); } + void NameError(StrName name){ _error("NameError", fmt("name ", name.escape() + " is not defined")); } - void AttributeError(PyVar obj, StrName name){ - _error("AttributeError", "type " + OBJ_NAME(_t(obj)).escape(true) + " has no attribute " + name.str().escape(true)); + void AttributeError(PyObject* obj, StrName name){ + // OBJ_NAME calls getattr, which may lead to a infinite recursion + _error("AttributeError", fmt("type ", OBJ_NAME(_t(obj)).escape(), " has no attribute ", name.escape())); } void AttributeError(Str msg){ _error("AttributeError", msg); } - inline void check_type(const PyVar& obj, Type type){ + void check_type(PyObject* obj, Type type){ if(is_type(obj, type)) return; - TypeError("expected " + OBJ_NAME(_t(type)).escape(true) + ", but got " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", but got " + OBJ_NAME(_t(obj)).escape()); } - inline PyVar& _t(Type t){ + PyObject* _t(Type t){ return _all_types[t.index].obj; } - inline PyVar& _t(const PyVar& obj){ + PyObject* _t(PyObject* obj){ if(is_int(obj)) return _t(tp_int); if(is_float(obj)) return _t(tp_float); return _all_types[OBJ_GET(Type, _t(obj->type)).index].obj; } - ~VM() { - if(!use_stdio){ - delete _stdout; - delete _stderr; - } - } - - inline PyVarOrNull getattr(const PyVar& obj, StrName name, bool throw_err=true, bool class_only=false){ - return getattr(&obj, name, throw_err, class_only); - } - template - inline void setattr(PyVar& obj, StrName name, T&& value){ - setattr(&obj, name, std::forward(value)); - } + ~VM() { heap.collect(); } CodeObject_ compile(Str source, Str filename, CompileMode mode); - void post_init(); - PyVar num_negated(const PyVar& obj); - f64 num_to_float(const PyVar& obj); - const PyVar& asBool(const PyVar& obj); - i64 hash(const PyVar& obj); - PyVar asRepr(const PyVar& obj); - PyVar new_module(StrName name); + PyObject* num_negated(PyObject* obj); + f64 num_to_float(PyObject* obj); + bool asBool(PyObject* obj); + i64 hash(PyObject* obj); + PyObject* asRepr(PyObject* obj); + PyObject* new_module(StrName name); Str disassemble(CodeObject_ co); void init_builtin_types(); - PyVar call(const PyVar& _callable, Args args, const Args& kwargs, bool opCall); + PyObject* call(PyObject* callable, Args args, const Args& kwargs, bool opCall); void unpack_args(Args& args); - PyVarOrNull getattr(const PyVar* obj, StrName name, bool throw_err=true, bool class_only=false); + PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true); + PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false); template - void setattr(PyVar* obj, StrName name, T&& value); + void setattr(PyObject* obj, StrName name, T&& value); template - void bind_method(PyVar obj, Str funcName, NativeFuncRaw fn); + void bind_method(PyObject*, Str, NativeFuncRaw); template - void bind_func(PyVar obj, Str funcName, NativeFuncRaw fn); - void _error(Exception e); - PyVar _exec(); - - template - PyVarRef PyRef(P&& value); - const BaseRef* PyRef_AS_C(const PyVar& obj); + void bind_func(PyObject*, Str, NativeFuncRaw); + void _error(Exception); + PyObject* _exec(); + void post_init(); }; -PyVar NativeFunc::operator()(VM* vm, Args& args) const{ +inline PyObject* NativeFunc::operator()(VM* vm, Args& args) const{ int args_size = args.size() - (int)method; // remove self if(argc != -1 && args_size != argc) { vm->TypeError("expected " + std::to_string(argc) + " arguments, but got " + std::to_string(args_size)); @@ -384,45 +347,12 @@ PyVar NativeFunc::operator()(VM* vm, Args& args) const{ return f(vm, args); } -void CodeObject::optimize(VM* vm){ - std::vector keys; - for(auto& p: names) if(p.second == NAME_LOCAL) keys.push_back(p.first); - uint32_t base_n = (uint32_t)(keys.size() / kLocalsLoadFactor + 0.5); +inline void CodeObject::optimize(VM* vm){ + // here we simple pass all names, but only some of them are NAME_LOCAL + // TODO: ... + uint32_t base_n = (uint32_t)(names.size() / kLocalsLoadFactor + 0.5); perfect_locals_capacity = find_next_capacity(base_n); - perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, keys); - - for(int i=1; inum_negated(consts[pos]); - } - - if(i>=2 && codes[i].op == OP_BUILD_INDEX){ - const Bytecode& a = codes[i-1]; - const Bytecode& x = codes[i-2]; - if(codes[i].arg == 1){ - if(a.op == OP_LOAD_NAME && x.op == OP_LOAD_NAME){ - codes[i].op = OP_FAST_INDEX; - }else continue; - }else{ - if(a.op == OP_LOAD_NAME_REF && x.op == OP_LOAD_NAME_REF){ - codes[i].op = OP_FAST_INDEX_REF; - }else continue; - } - codes[i].arg = (a.arg << 16) | x.arg; - codes[i-1].op = OP_NO_OP; - codes[i-2].op = OP_NO_OP; - } - } - - // pre-compute sn in co_consts - for(int i=0; itp_str)){ - Str& s = OBJ_GET(Str, consts[i]); - s._cached_sn_index = StrName::get(s.c_str()).index; - } - } + perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, names); } DEF_NATIVE_2(Str, tp_str) @@ -436,13 +366,13 @@ DEF_NATIVE_2(Slice, tp_slice) DEF_NATIVE_2(Exception, tp_exception) DEF_NATIVE_2(StarWrapper, tp_star_wrapper) -#define PY_CAST_INT(T) \ -template<> T py_cast(VM* vm, const PyVar& obj){ \ - vm->check_type(obj, vm->tp_int); \ - return (T)(obj.bits >> 2); \ -} \ -template<> T _py_cast(VM* vm, const PyVar& obj){ \ - return (T)(obj.bits >> 2); \ +#define PY_CAST_INT(T) \ +template<> inline T py_cast(VM* vm, PyObject* obj){ \ + vm->check_type(obj, vm->tp_int); \ + return (T)(BITS(obj) >> 2); \ +} \ +template<> inline T _py_cast(VM* vm, PyObject* obj){ \ + return (T)(BITS(obj) >> 2); \ } PY_CAST_INT(char) @@ -457,38 +387,38 @@ PY_CAST_INT(unsigned long) PY_CAST_INT(unsigned long long) -template<> float py_cast(VM* vm, const PyVar& obj){ +template<> inline float py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); - i64 bits = obj.bits; + i64 bits = BITS(obj); bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } -template<> float _py_cast(VM* vm, const PyVar& obj){ - i64 bits = obj.bits; +template<> inline float _py_cast(VM* vm, PyObject* obj){ + i64 bits = BITS(obj); bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } -template<> double py_cast(VM* vm, const PyVar& obj){ +template<> inline double py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_float); - i64 bits = obj.bits; + i64 bits = BITS(obj); bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } -template<> double _py_cast(VM* vm, const PyVar& obj){ - i64 bits = obj.bits; +template<> inline double _py_cast(VM* vm, PyObject* obj){ + i64 bits = BITS(obj); bits = (bits >> 2) << 2; - return __8B(bits)._float; + return BitsCvt(bits)._float; } -#define PY_VAR_INT(T) \ - PyVar py_var(VM* vm, T _val){ \ - i64 val = static_cast(_val); \ - if(((val << 2) >> 2) != val){ \ +#define PY_VAR_INT(T) \ + inline PyObject* py_var(VM* vm, T _val){ \ + i64 val = static_cast(_val); \ + if(((val << 2) >> 2) != val){ \ vm->_error("OverflowError", std::to_string(val) + " is out of range"); \ } \ val = (val << 2) | 0b01; \ - return PyVar(reinterpret_cast(val)); \ + return reinterpret_cast(val); \ } PY_VAR_INT(char) @@ -502,77 +432,82 @@ PY_VAR_INT(unsigned int) PY_VAR_INT(unsigned long) PY_VAR_INT(unsigned long long) -#define PY_VAR_FLOAT(T) \ - PyVar py_var(VM* vm, T _val){ \ - f64 val = static_cast(_val); \ - i64 bits = __8B(val)._int; \ - bits = (bits >> 2) << 2; \ - bits |= 0b10; \ - return PyVar(reinterpret_cast(bits)); \ +#define PY_VAR_FLOAT(T) \ + inline PyObject* py_var(VM* vm, T _val){ \ + f64 val = static_cast(_val); \ + i64 bits = BitsCvt(val)._int; \ + bits = (bits >> 2) << 2; \ + bits |= 0b10; \ + return reinterpret_cast(bits); \ } PY_VAR_FLOAT(float) PY_VAR_FLOAT(double) -const PyVar& py_var(VM* vm, bool val){ +inline PyObject* py_var(VM* vm, bool val){ return val ? vm->True : vm->False; } -template<> bool py_cast(VM* vm, const PyVar& obj){ +template<> inline bool py_cast(VM* vm, PyObject* obj){ vm->check_type(obj, vm->tp_bool); return obj == vm->True; } -template<> bool _py_cast(VM* vm, const PyVar& obj){ +template<> inline bool _py_cast(VM* vm, PyObject* obj){ return obj == vm->True; } -PyVar py_var(VM* vm, const char val[]){ +inline PyObject* py_var(VM* vm, const char val[]){ return VAR(Str(val)); } -PyVar py_var(VM* vm, std::string val){ +inline PyObject* py_var(VM* vm, std::string val){ return VAR(Str(std::move(val))); } +inline PyObject* py_var(VM* vm, std::string_view val){ + return VAR(Str(val)); +} + template -void _check_py_class(VM* vm, const PyVar& obj){ +void _check_py_class(VM* vm, PyObject* obj){ vm->check_type(obj, T::_type(vm)); } -PyVar VM::num_negated(const PyVar& obj){ +inline PyObject* VM::num_negated(PyObject* obj){ if (is_int(obj)){ return VAR(-CAST(i64, obj)); }else if(is_float(obj)){ return VAR(-CAST(f64, obj)); } - TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape()); return nullptr; } -f64 VM::num_to_float(const PyVar& obj){ +inline f64 VM::num_to_float(PyObject* obj){ if(is_float(obj)){ return CAST(f64, obj); } else if (is_int(obj)){ return (f64)CAST(i64, obj); } - TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape()); return 0; } -const PyVar& VM::asBool(const PyVar& obj){ - if(is_type(obj, tp_bool)) return obj; - if(obj == None) return False; - if(is_type(obj, tp_int)) return VAR(CAST(i64, obj) != 0); - if(is_type(obj, tp_float)) return VAR(CAST(f64, obj) != 0.0); - PyVarOrNull len_fn = getattr(obj, __len__, false, true); - if(len_fn != nullptr){ - PyVar ret = call(len_fn); - return VAR(CAST(i64, ret) > 0); +inline bool VM::asBool(PyObject* obj){ + if(is_type(obj, tp_bool)) return obj == True; + if(obj == None) return false; + if(is_type(obj, tp_int)) return CAST(i64, obj) != 0; + if(is_type(obj, tp_float)) return CAST(f64, obj) != 0.0; + PyObject* self; + PyObject* len_f = get_unbound_method(obj, __len__, &self, false); + if(self != _py_null){ + PyObject* ret = call(len_f, Args{self}); + return CAST(i64, ret) > 0; } - return True; + return true; } -i64 VM::hash(const PyVar& obj){ +inline i64 VM::hash(PyObject* obj){ if (is_type(obj, tp_str)) return CAST(Str&, obj).hash(); if (is_int(obj)) return CAST(i64, obj); if (is_type(obj, tp_tuple)) { @@ -580,45 +515,52 @@ i64 VM::hash(const PyVar& obj){ const Tuple& items = CAST(Tuple&, obj); for (int i=0; i> 2)); // recommended by Github Copilot + // recommended by Github Copilot + x = x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); } return x; } - if (is_type(obj, tp_type)) return obj.bits; + if (is_type(obj, tp_type)) return BITS(obj); if (is_type(obj, tp_bool)) return _CAST(bool, obj) ? 1 : 0; if (is_float(obj)){ f64 val = CAST(f64, obj); return (i64)std::hash()(val); } - TypeError("unhashable type: " + OBJ_NAME(_t(obj)).escape(true)); + TypeError("unhashable type: " + OBJ_NAME(_t(obj)).escape()); return 0; } -PyVar VM::asRepr(const PyVar& obj){ - return call(obj, __repr__); +inline PyObject* VM::asRepr(PyObject* obj){ + // TODO: fastcall does not take care of super() proxy! + return fast_call(__repr__, Args{obj}); } -PyVar VM::new_module(StrName name) { - PyVar obj = new_object(tp_module, DummyModule()); - obj->attr().set(__name__, VAR(name.str())); +inline PyObject* VM::new_module(StrName name) { + PyObject* obj = heap._new(tp_module, DummyModule()); + obj->attr().set(__name__, VAR(name.sv())); + // we do not allow override in order to avoid memory leak + // it is because Module objects are not garbage collected + if(_modules.contains(name)) UNREACHABLE(); _modules.set(name, obj); return obj; } -Str VM::disassemble(CodeObject_ co){ +inline Str VM::disassemble(CodeObject_ co){ + auto pad = [](const Str& s, const int n){ + if(s.length() >= n) return s.substr(0, n); + return s + std::string(n - s.length(), ' '); + }; + std::vector jumpTargets; for(auto byte : co->codes){ - if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_SAFE_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ + if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){ jumpTargets.push_back(byte.arg); } } - StrStream ss; - ss << std::string(54, '-') << '\n'; - ss << co->name << ":\n"; + std::stringstream ss; int prev_line = -1; for(int i=0; icodes.size(); i++){ const Bytecode& byte = co->codes[i]; - if(byte.op == OP_NO_OP) continue; Str line = std::to_string(byte.line); if(byte.line == prev_line) line = ""; else{ @@ -636,50 +578,56 @@ Str VM::disassemble(CodeObject_ co){ ss << " " << pad(OP_NAMES[byte.op], 20) << " "; // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5); std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); - if(byte.op == OP_LOAD_CONST){ - argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")"; + switch(byte.op){ + case OP_LOAD_CONST: + argStr += fmt(" (", CAST(Str, asRepr(co->consts[byte.arg])), ")"); + break; + case OP_LOAD_NAME: case OP_LOAD_GLOBAL: + case OP_STORE_LOCAL: case OP_STORE_GLOBAL: + case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: + case OP_IMPORT_NAME: case OP_BEGIN_CLASS: + case OP_DELETE_LOCAL: case OP_DELETE_GLOBAL: + argStr += fmt(" (", co->names[byte.arg].sv(), ")"); + break; + case OP_BINARY_OP: + argStr += fmt(" (", BINARY_SPECIAL_METHODS[byte.arg], ")"); + break; + case OP_COMPARE_OP: + argStr += fmt(" (", COMPARE_SPECIAL_METHODS[byte.arg], ")"); + break; + case OP_BITWISE_OP: + argStr += fmt(" (", BITWISE_SPECIAL_METHODS[byte.arg], ")"); + break; } - if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME || byte.op == OP_RAISE || byte.op == OP_STORE_NAME){ - argStr += " (" + co->names[byte.arg].first.str().escape(true) + ")"; - } - if(byte.op == OP_FAST_INDEX || byte.op == OP_FAST_INDEX_REF){ - auto& a = co->names[byte.arg & 0xFFFF]; - auto& x = co->names[(byte.arg >> 16) & 0xFFFF]; - argStr += " (" + a.first.str() + '[' + x.first.str() + "])"; - } - ss << pad(argStr, 20); // may overflow - ss << co->blocks[byte.block].to_string(); + ss << pad(argStr, 40); // may overflow + ss << co->blocks[byte.block].type; if(i != co->codes.size() - 1) ss << '\n'; } - StrStream consts; - consts << "co_consts: "; - consts << CAST(Str, asRepr(VAR(co->consts))); - StrStream names; +#if !DEBUG_DIS_EXEC_MIN + std::stringstream consts; + consts << "co_consts: "; + consts << CAST(Str&, asRepr(VAR(co->consts))); + + std::stringstream names; names << "co_names: "; List list; for(int i=0; inames.size(); i++){ - list.push_back(VAR(co->names[i].first.str())); + list.push_back(VAR(co->names[i].sv())); } names << CAST(Str, asRepr(VAR(list))); - ss << '\n' << consts.str() << '\n' << names.str() << '\n'; - - for(int i=0; iconsts.size(); i++){ - PyVar obj = co->consts[i]; - if(is_type(obj, tp_function)){ - const auto& f = CAST(Function&, obj); - ss << disassemble(f.code); - } + ss << '\n' << consts.str() << '\n' << names.str(); +#endif + for(auto& decl: co->func_decls){ + ss << "\n\n" << "Disassembly of " << decl->name << ":\n"; + ss << disassemble(decl->code); } return Str(ss.str()); } -void VM::init_builtin_types(){ - // Py_(Type type, T&& val) - PyVar _tp_object = make_sp>(Type(1), Type(0)); - PyVar _tp_type = make_sp>(Type(1), Type(1)); - _all_types.push_back({.obj = _tp_object, .base = -1, .name = "object"}); - _all_types.push_back({.obj = _tp_type, .base = 0, .name = "type"}); +inline void VM::init_builtin_types(){ + _all_types.push_back({heap._new(Type(1), Type(0)), -1, "object"}); + _all_types.push_back({heap._new(Type(1), Type(1)), 0, "type"}); tp_object = 0; tp_type = 1; tp_int = _new_type_object("int"); @@ -693,22 +641,22 @@ void VM::init_builtin_types(){ tp_slice = _new_type_object("slice"); tp_range = _new_type_object("range"); tp_module = _new_type_object("module"); - tp_ref = _new_type_object("_ref"); tp_star_wrapper = _new_type_object("_star_wrapper"); - tp_function = _new_type_object("function"); tp_native_function = _new_type_object("native_function"); - tp_native_iterator = _new_type_object("native_iterator"); + tp_iterator = _new_type_object("iterator"); tp_bound_method = _new_type_object("bound_method"); tp_super = _new_type_object("super"); tp_exception = _new_type_object("Exception"); - this->None = new_object(_new_type_object("NoneType"), DUMMY_VAL); - this->Ellipsis = new_object(_new_type_object("ellipsis"), DUMMY_VAL); - this->True = new_object(tp_bool, true); - this->False = new_object(tp_bool, false); - this->_py_op_call = new_object(_new_type_object("_py_op_call"), DUMMY_VAL); - this->_py_op_yield = new_object(_new_type_object("_py_op_yield"), DUMMY_VAL); + this->None = heap._new(_new_type_object("NoneType"), {}); + this->Ellipsis = heap._new(_new_type_object("ellipsis"), {}); + this->True = heap._new(tp_bool, {}); + this->False = heap._new(tp_bool, {}); + this->_py_null = heap._new(_new_type_object("_py_null"), {}); + this->_py_op_call = heap._new(_new_type_object("_py_op_call"), {}); + this->_py_op_yield = heap._new(_new_type_object("_py_op_yield"), {}); + this->builtins = new_module("builtins"); this->_main = new_module("__main__"); @@ -725,64 +673,65 @@ void VM::init_builtin_types(){ post_init(); for(int i=0; i<_all_types.size(); i++){ - auto& t = _all_types[i]; - t.obj->attr()._try_perfect_rehash(); + _all_types[i].obj->attr()._try_perfect_rehash(); } for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } -PyVar VM::call(const PyVar& _callable, Args args, const Args& kwargs, bool opCall){ - if(is_type(_callable, tp_type)){ - PyVar* new_f = _callable->attr().try_get(__new__); - PyVar obj; +// TODO: callable/args here may be garbage collected accidentally +inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ + if(is_type(callable, tp_type)){ + PyObject* new_f = callable->attr().try_get(__new__); + PyObject* obj; if(new_f != nullptr){ - obj = call(*new_f, std::move(args), kwargs, false); + obj = call(new_f, std::move(args), kwargs, false); }else{ - obj = new_object(_callable, DummyInstance()); - PyVarOrNull init_f = getattr(obj, __init__, false, true); - if (init_f != nullptr) call(init_f, std::move(args), kwargs, false); + obj = heap.gcnew(OBJ_GET(Type, callable), {}); + PyObject* self; + PyObject* init_f = get_unbound_method(obj, __init__, &self, false); + args.extend_self(self); + if (self != _py_null) call(init_f, std::move(args), kwargs, false); } return obj; } - const PyVar* callable = &_callable; - if(is_type(*callable, tp_bound_method)){ - auto& bm = CAST(BoundMethod&, *callable); - callable = &bm.method; // get unbound method + if(is_type(callable, tp_bound_method)){ + auto& bm = CAST(BoundMethod&, callable); + callable = bm.method; // get unbound method args.extend_self(bm.obj); } - if(is_type(*callable, tp_native_function)){ - const auto& f = OBJ_GET(NativeFunc, *callable); + if(is_type(callable, tp_native_function)){ + const auto& f = OBJ_GET(NativeFunc, callable); if(kwargs.size() != 0) TypeError("native_function does not accept keyword arguments"); return f(this, args); - } else if(is_type(*callable, tp_function)){ - const Function& fn = CAST(Function&, *callable); + } else if(is_type(callable, tp_function)){ + const Function& fn = CAST(Function&, callable); NameDict_ locals = make_sp( - fn.code->perfect_locals_capacity, + fn.decl->code->perfect_locals_capacity, kLocalsLoadFactor, - fn.code->perfect_hash_seed + fn.decl->code->perfect_hash_seed ); int i = 0; - for(StrName name : fn.args){ + for(StrName name : fn.decl->args){ if(i < args.size()){ - locals->set(name, std::move(args[i++])); + locals->set(name, args[i++]); continue; } - TypeError("missing positional argument " + name.str().escape(true)); + TypeError(fmt("missing positional argument ", name.escape())); } - locals->update(fn.kwargs); + locals->update(fn.decl->kwargs); - if(!fn.starred_arg.empty()){ + if(!fn.decl->starred_arg.empty()){ List vargs; // handle *args - while(i < args.size()) vargs.push_back(std::move(args[i++])); - locals->set(fn.starred_arg, VAR(Tuple::from_list(std::move(vargs)))); + while(i < args.size()) vargs.push_back(args[i++]); + locals->set(fn.decl->starred_arg, VAR(Tuple(std::move(vargs)))); }else{ - for(StrName key : fn.kwargs_order){ + for(StrName key : fn.decl->kwargs_order){ if(i < args.size()){ - locals->set(key, std::move(args[i++])); + locals->set(key, args[i++]); }else{ break; } @@ -792,115 +741,153 @@ PyVar VM::call(const PyVar& _callable, Args args, const Args& kwargs, bool opCal for(int i=0; ikwargs.contains(key)){ + TypeError(fmt(key.escape(), " is an invalid keyword argument for ", fn.decl->name, "()")); } locals->set(key, kwargs[i+1]); } - const PyVar& _module = fn._module != nullptr ? fn._module : top_frame()->_module; - auto _frame = _new_frame(fn.code, _module, locals, fn._closure); - if(fn.code->is_generator) return PyIter(Generator(this, std::move(_frame))); + PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; + auto _frame = _new_frame(fn.decl->code, _module, locals, fn._closure); + if(fn.decl->code->is_generator) return PyIter(Generator(this, std::move(_frame))); callstack.push(std::move(_frame)); if(opCall) return _py_op_call; return _exec(); } - PyVarOrNull call_f = getattr(_callable, __call__, false, true); - if(call_f != nullptr){ + PyObject* self; + PyObject* call_f = get_unbound_method(callable, __call__, &self, false); + if(self != _py_null){ + args.extend_self(self); return call(call_f, std::move(args), kwargs, false); } - TypeError(OBJ_NAME(_t(*callable)).escape(true) + " object is not callable"); + TypeError(OBJ_NAME(_t(callable)).escape() + " object is not callable"); return None; } -void VM::unpack_args(Args& args){ +inline void VM::unpack_args(Args& args){ List unpacked; for(int i=0; i; - // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance -PyVarOrNull VM::getattr(const PyVar* obj, StrName name, bool throw_err, bool class_only){ - PyObject* objtype = _t(*obj).get(); - if(is_type(*obj, tp_super)){ - const Super& super = OBJ_GET(Super, *obj); - obj = &super.first; - objtype = _t(super.second).get(); +inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){ + // TODO: class_only impl may not be correct + PyObject* objtype = _t(obj); + // handle super() proxy + if(is_type(obj, tp_super)){ + const Super& super = OBJ_GET(Super, obj); + obj = super.first; + objtype = _t(super.second); } - PyVar* cls_var = find_name_in_mro(objtype, name); + PyObject* cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor - PyVar* descr_get = _t(*cls_var)->attr().try_get(__get__); - if(descr_get != nullptr) return call(*descr_get, two_args(*cls_var, *obj)); + PyObject* descr_get = _t(cls_var)->attr().try_get(__get__); + if(descr_get != nullptr) return call(descr_get, Args{cls_var, obj}); } // handle instance __dict__ - if(!class_only && !(*obj).is_tagged() && (*obj)->is_attr_valid()){ - PyVar* val = (*obj)->attr().try_get(name); - if(val != nullptr) return *val; + if(!is_tagged(obj) && obj->is_attr_valid()){ + PyObject* val = obj->attr().try_get(name); + if(val != nullptr) return val; } if(cls_var != nullptr){ // bound method is non-data descriptor - if(is_type(*cls_var, tp_function) || is_type(*cls_var, tp_native_function)){ - return VAR(BoundMethod(*obj, *cls_var)); + if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ + return VAR(BoundMethod(obj, cls_var)); } - return *cls_var; + return cls_var; } - if(throw_err) AttributeError(*obj, name); + if(throw_err) AttributeError(obj, name); + return nullptr; +} + +// used by OP_LOAD_METHOD +// try to load a unbound method (fallback to `getattr` if not found) +inline PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err, bool fallback){ + *self = _py_null; + // TODO: class_only impl may not be correct + PyObject* objtype = _t(obj); + // handle super() proxy + if(is_type(obj, tp_super)){ + const Super& super = OBJ_GET(Super, obj); + obj = super.first; + objtype = _t(super.second); + } + PyObject* cls_var = find_name_in_mro(objtype, name); + + if(fallback){ + if(cls_var != nullptr){ + // handle descriptor + PyObject* descr_get = _t(cls_var)->attr().try_get(__get__); + if(descr_get != nullptr) return call(descr_get, Args{cls_var, obj}); + } + // handle instance __dict__ + if(!is_tagged(obj) && obj->is_attr_valid()){ + PyObject* val = obj->attr().try_get(name); + if(val != nullptr) return val; + } + } + + if(cls_var != nullptr){ + if(is_type(cls_var, tp_function) || is_type(cls_var, tp_native_function)){ + *self = obj; + } + return cls_var; + } + if(throw_err) AttributeError(obj, name); return nullptr; } template -void VM::setattr(PyVar* obj, StrName name, T&& value){ - static_assert(std::is_same_v, PyVar>); - PyObject* objtype = _t(*obj).get(); - if(is_type(*obj, tp_super)){ - Super& super = OBJ_GET(Super, *obj); - obj = &super.first; - objtype = _t(super.second).get(); +inline void VM::setattr(PyObject* obj, StrName name, T&& value){ + static_assert(std::is_same_v, PyObject*>); + PyObject* objtype = _t(obj); + // handle super() proxy + if(is_type(obj, tp_super)){ + Super& super = OBJ_GET(Super, obj); + obj = super.first; + objtype = _t(super.second); } - PyVar* cls_var = find_name_in_mro(objtype, name); + PyObject* cls_var = find_name_in_mro(objtype, name); if(cls_var != nullptr){ // handle descriptor - const PyVar& cls_var_t = _t(*cls_var); + PyObject* cls_var_t = _t(cls_var); if(cls_var_t->attr().contains(__get__)){ - PyVar* descr_set = cls_var_t->attr().try_get(__set__); + PyObject* descr_set = cls_var_t->attr().try_get(__set__); if(descr_set != nullptr){ - call(*descr_set, three_args(*cls_var, *obj, std::forward(value))); + call(descr_set, Args{cls_var, obj, std::forward(value)}); }else{ - TypeError("readonly attribute: " + name.str().escape(true)); + TypeError(fmt("readonly attribute: ", name.escape())); } return; } } // handle instance __dict__ - if((*obj).is_tagged() || !(*obj)->is_attr_valid()) TypeError("cannot set attribute"); - (*obj)->attr().set(name, std::forward(value)); + if(is_tagged(obj) || !obj->is_attr_valid()) TypeError("cannot set attribute"); + obj->attr().set(name, std::forward(value)); } template -void VM::bind_method(PyVar obj, Str name, NativeFuncRaw fn) { +void VM::bind_method(PyObject* obj, Str name, NativeFuncRaw fn) { check_type(obj, tp_type); obj->attr().set(name, VAR(NativeFunc(fn, ARGC, true))); } template -void VM::bind_func(PyVar obj, Str name, NativeFuncRaw fn) { +void VM::bind_func(PyObject* obj, Str name, NativeFuncRaw fn) { obj->attr().set(name, VAR(NativeFunc(fn, ARGC, false))); } -void VM::_error(Exception e){ +inline void VM::_error(Exception e){ if(callstack.empty()){ e.is_re = false; throw e; @@ -909,17 +896,16 @@ void VM::_error(Exception e){ _raise(); } -PyVar VM::_exec(){ +inline PyObject* VM::_exec(){ Frame* frame = top_frame(); - i64 base_id = frame->id; - PyVar ret = nullptr; + const i64 base_id = frame->id; bool need_raise = false; while(true){ if(frame->id < base_id) UNREACHABLE(); try{ if(need_raise){ need_raise = false; _raise(); } - ret = run_frame(frame); + PyObject* ret = run_frame(frame); if(ret == _py_op_yield) return _py_op_yield; if(ret != _py_op_call){ if(frame->id == base_id){ // [ frameBase<- ] @@ -936,11 +922,16 @@ PyVar VM::_exec(){ }catch(HandledException& e){ continue; }catch(UnhandledException& e){ - PyVar obj = frame->pop(); + PyObject* obj = frame->popx(); Exception& _e = CAST(Exception&, obj); _e.st_push(frame->snapshot()); callstack.pop(); - if(callstack.empty()) throw _e; + if(callstack.empty()){ +#if DEBUG_FULL_EXCEPTION + std::cerr << _e.summary() << std::endl; +#endif + throw _e; + } frame = callstack.top().get(); frame->push(obj); if(frame->id < base_id) throw ToBeRaisedException(); @@ -951,4 +942,13 @@ PyVar VM::_exec(){ } } +inline void ManagedHeap::mark() { + for(PyObject* obj: _no_gc) OBJ_MARK(obj); + for(auto& frame : vm->callstack.data()) frame->_gc_mark(); +} + +inline Str obj_type_name(VM *vm, Type type){ + return vm->_all_types[type].name; +} + } // namespace pkpy \ No newline at end of file diff --git a/tests/07_dict.py b/tests/07_dict.py index 9c3826fc..50f21cff 100644 --- a/tests/07_dict.py +++ b/tests/07_dict.py @@ -42,4 +42,7 @@ d1 = {1:2, 3:4} d2 = {3:4, 1:2} d3 = {1:2, 3:4, 5:6} assert d1 == d2 -assert d1 != d3 \ No newline at end of file +assert d1 != d3 + +a = dict([(1, 2), (3, 4)]) +assert a == {1: 2, 3: 4} \ No newline at end of file diff --git a/tests/25_rawstring.py b/tests/25_rawstring.py index 2e6d8db2..98aa4e99 100644 --- a/tests/25_rawstring.py +++ b/tests/25_rawstring.py @@ -17,6 +17,8 @@ asds1321321321测试\测试''' assert s == 'asdasd\nasds1321321321测试\\测试' +assert f'123{2*2}56789' == '123456789' + s = f'''->->{s}<-<- {123} ''' diff --git a/tests/70_random.py b/tests/70_random.py index 93d576ed..85bfc2b0 100644 --- a/tests/70_random.py +++ b/tests/70_random.py @@ -11,21 +11,21 @@ r.shuffle(a) r.choice(a) r.choice(b) -from sys import version as v +# from sys import version as v -assert type(v) is str +# assert type(v) is str -class Context: - def __init__(self): - self.x = 0 +# class Context: +# def __init__(self): +# self.x = 0 - def __enter__(self): - self.x = 1 +# def __enter__(self): +# self.x = 1 - def __exit__(self): - self.x = 2 +# def __exit__(self): +# self.x = 2 -with Context() as c: - assert c.x == 1 +# with Context() as c: +# assert c.x == 1 -assert c.x == 2 \ No newline at end of file +# assert c.x == 2 \ No newline at end of file diff --git a/tests/80_json.py b/tests/80_json.py index b285faed..6a589c32 100644 --- a/tests/80_json.py +++ b/tests/80_json.py @@ -3,12 +3,12 @@ a = { 'b': 2, 'c': None, 'd': [1, 2, 3], - # 'e': { - # 'a': 1, - # 'b': 2, - # 'c': None, - # 'd': [1, 2, 3], - # }, + 'e': { + 'a': 1, + 'b': 2, + 'c': None, + 'd': [1, 2, 3], + }, "f": 'This is a string', 'g': [True, False, None], 'h': False