Merge pull request #52 from blueloveTH/dev

a major refactor
This commit is contained in:
BLUELOVETH 2023-04-09 17:00:23 +08:00 committed by GitHub
commit facc873856
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 4231 additions and 3010 deletions

View File

@ -5,12 +5,8 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- name: Setup Clang
uses: egor-tensin/setup-clang@v1
with:
version: 15
platform: x64
- name: Compiling
- uses: ilammy/msvc-dev-cmd@v1
- name: Compile
shell: bash
run: |
python3 build.py windows
@ -25,32 +21,6 @@ jobs:
run: python3 scripts/run_tests.py
- name: Benchmark
run: python3 scripts/run_tests.py benchmark
build_web:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup emsdk
uses: mymindstorm/setup-emsdk@v12
with:
version: 3.1.25
actions-cache-folder: 'emsdk-cache'
- name: Verify emsdk
run: emcc -v
- name: Compiling
run: |
mkdir -p output/web/lib
python3 build.py web
cp web/lib/* output/web/lib
- uses: crazy-max/ghaction-github-pages@v3
with:
target_branch: gh-pages
build_dir: web
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
if: github.event_name == 'push'
- uses: actions/upload-artifact@v3
with:
path: output
build_linux:
runs-on: ubuntu-latest
steps:
@ -60,16 +30,17 @@ jobs:
with:
version: 15
platform: x64
- name: Coverage Test
run: |
sudo apt install -y libc++-15-dev libc++1-15 libc++abi-15-dev libc++abi1-15 libclang-rt-15-dev
python3 preprocess.py
bash run_tests.sh
- uses: actions/upload-artifact@v3
with:
name: coverage
path: .coverage
- name: Compiling
- name: Install libc++
run: sudo apt install -y libc++-15-dev libc++1-15 libc++abi-15-dev libc++abi1-15 libclang-rt-15-dev
# - name: Coverage Test
# run: |
# python3 preprocess.py
# bash run_tests.sh
# - uses: actions/upload-artifact@v3
# with:
# name: coverage
# path: .coverage
- name: Compile
run: |
python3 build.py linux
python3 build.py linux -lib
@ -83,6 +54,19 @@ jobs:
run: python3 scripts/run_tests.py
- name: Benchmark
run: python3 scripts/run_tests.py benchmark
build_macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
- run: |
python3 amalgamate.py
cd plugins/macos/pocketpy
mkdir -p output/macos
xcodebuild clean build CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO
cp -r build/Release/pocketpy.bundle output/macos
- uses: actions/upload-artifact@v3
with:
path: plugins/macos/pocketpy/output
build_android:
runs-on: ubuntu-latest
steps:
@ -93,7 +77,7 @@ jobs:
channel: 'stable'
cache: true
- run: flutter --version
- name: Compiling
- name: Compile
run: |
python3 amalgamate.py
cd plugins/flutter/example
@ -114,16 +98,29 @@ jobs:
- uses: actions/upload-artifact@v3
with:
path: plugins/flutter/example/build/app/outputs/flutter-apk/output
build_macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
- run: |
python3 amalgamate.py
cd plugins/macos/pocketpy
mkdir -p output/macos
xcodebuild clean build CODE_SIGN_IDENTITY="" CODE_SIGNING_REQUIRED=NO CODE_SIGNING_ALLOWED=NO
cp -r build/Release/pocketpy.bundle output/macos
- uses: actions/upload-artifact@v3
with:
path: plugins/macos/pocketpy/output
build_web:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup emsdk
uses: mymindstorm/setup-emsdk@v12
with:
version: 3.1.25
actions-cache-folder: 'emsdk-cache'
- name: Verify emsdk
run: emcc -v
- name: Compile
run: |
mkdir -p output/web/lib
python3 build.py web
cp web/lib/* output/web/lib
- uses: crazy-max/ghaction-github-pages@v3
with:
target_branch: gh-pages
build_dir: web
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
- uses: actions/upload-artifact@v3
with:
path: output

1
.gitignore vendored
View File

@ -23,3 +23,4 @@ plugins/godot/godot-cpp/
src/_generated.h
profile.sh
test
tmp.rar

View File

@ -6,9 +6,9 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f:
OPCODES_TEXT = f.read()
pipeline = [
["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"],
["obj.h", "parser.h", "codeobject.h", "frame.h"],
["vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"],
["common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
["obj.h", "codeobject.h", "frame.h"],
["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"],
["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]
]

View File

@ -3,4 +3,6 @@ def fib(n):
return n
return fib(n-1) + fib(n-2)
assert fib(32) == 2178309
assert fib(32) == 2178309
# 7049155 calls

View File

@ -20,7 +20,7 @@ def lib_pre_build():
def lib_post_build():
os.remove("src/tmp.cpp")
windows_common = "clang-cl.exe -std:c++17 /utf-8 -GR- -EHsc -O2 -Wno-deprecated-declarations"
windows_common = "CL -std:c++17 /utf-8 -GR- -EHsc -O2"
windows_cmd = windows_common + " -Fe:pocketpy src/main.cpp"
windows_lib_cmd = windows_common + " -LD -Fe:pocketpy src/tmp.cpp"

View File

@ -20,7 +20,7 @@ def generate_python_sources():
#include <string>
namespace pkpy{
std::map<std::string, const char*> kPythonLibs = {
inline static std::map<std::string, const char*> kPythonLibs = {
'''
for key, value in sources.items():
header += ' '*8 + '{"' + key + '", "' + value + '"},'

View File

@ -1,8 +1,12 @@
class dict:
def __init__(self, capacity=13):
self._capacity = capacity
def __init__(self, mapping=None):
self._capacity = 16
self._a = [None] * self._capacity
self._len = 0
if mapping is not None:
for k,v in mapping:
self[k] = v
def __len__(self):
return self._len

5
run_profile.sh Normal file
View File

@ -0,0 +1,5 @@
clang++ -pg -O2 -std=c++17 -fno-rtti -stdlib=libc++ -Wall -o pocketpy src/main.cpp
time ./pocketpy benchmarks/fib.py
mv benchmarks/gmon.out .
gprof pocketpy gmon.out > gprof.txt
rm gmon.out

10
run_profile_test.sh Normal file
View File

@ -0,0 +1,10 @@
clang++ -O2 -std=c++17 -fno-rtti --coverage -stdlib=libc++ -Wall -o pocketpy src/main.cpp
time ./pocketpy benchmarks/fib.py
rm -rf .coverage
mkdir -p .coverage
llvm-cov-15 gcov main.gc -r -s src/ >> .coverage/coverage.txt
mv *.gcov .coverage
rm main.gc*
# -fprofile-instr-generate -fcoverage-mapping
# llvm-cov-15 show main.gc -instr-profile=default.profraw -format=html -output-dir .coverage

View File

@ -27,7 +27,11 @@ def test_dir(path):
print(f' cpython: {_1 - _0:.6f}s (100%)')
print(f' pocketpy: {_2 - _1:.6f}s ({(_2 - _1) / (_1 - _0) * 100:.2f}%)')
else:
if not test_file(filepath): exit(1)
if not test_file(filepath):
print('-' * 50)
print("TEST FAILED! Press any key to continue...")
input()
if len(sys.argv) == 2:
assert 'benchmark' in sys.argv[1]

View File

@ -1,350 +1,426 @@
#pragma once
#include "common.h"
#include "vm.h"
#include "ref.h"
namespace pkpy{
Str _read_file_cwd(const Str& name, bool* ok);
#define DISPATCH() goto __NEXT_STEP
PyVar VM::run_frame(Frame* frame){
while(frame->has_next_bytecode()){
const Bytecode& byte = frame->next_bytecode();
switch (byte.op)
{
case OP_NO_OP: continue;
case OP_SETUP_DECORATOR: continue;
case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); continue;
case OP_LOAD_FUNCTION: {
const PyVar obj = frame->co->consts[byte.arg];
Function f = CAST(Function, obj); // copy
f._module = frame->_module;
frame->push(VAR(f));
} continue;
case OP_SETUP_CLOSURE: {
Function& f = CAST(Function&, frame->top()); // reference
f._closure = frame->_locals;
} continue;
case OP_LOAD_NAME_REF: {
frame->push(PyRef(NameRef(frame->co->names[byte.arg])));
} continue;
case OP_LOAD_NAME: {
frame->push(NameRef(frame->co->names[byte.arg]).get(this, frame));
} continue;
case OP_STORE_NAME: {
auto& p = frame->co->names[byte.arg];
NameRef(p).set(this, frame, frame->pop());
} continue;
case OP_BUILD_ATTR_REF: case OP_BUILD_ATTR: {
auto& attr = frame->co->names[byte.arg];
PyVar obj = frame->pop_value(this);
AttrRef ref = AttrRef(obj, NameRef(attr));
if(byte.op == OP_BUILD_ATTR) frame->push(ref.get(this, frame));
else frame->push(PyRef(ref));
} continue;
case OP_BUILD_INDEX: {
PyVar index = frame->pop_value(this);
auto ref = IndexRef(frame->pop_value(this), index);
if(byte.arg > 0) frame->push(ref.get(this, frame));
else frame->push(PyRef(ref));
} continue;
case OP_FAST_INDEX: case OP_FAST_INDEX_REF: {
auto& a = frame->co->names[byte.arg & 0xFFFF];
auto& x = frame->co->names[(byte.arg >> 16) & 0xFFFF];
auto ref = IndexRef(NameRef(a).get(this, frame), NameRef(x).get(this, frame));
if(byte.op == OP_FAST_INDEX) frame->push(ref.get(this, frame));
else frame->push(PyRef(ref));
} continue;
case OP_ROT_TWO: ::std::swap(frame->top(), frame->top_1()); continue;
case OP_STORE_REF: {
// PyVar obj = frame->pop_value(this);
// PyVarRef r = frame->pop();
// PyRef_AS_C(r)->set(this, frame, std::move(obj));
PyRef_AS_C(frame->top_1())->set(this, frame, frame->top_value(this));
frame->_pop(); frame->_pop();
} continue;
case OP_DELETE_REF:
PyRef_AS_C(frame->top())->del(this, frame);
frame->_pop();
continue;
case OP_BUILD_TUPLE: {
Args items = frame->pop_n_values_reversed(this, byte.arg);
frame->push(VAR(std::move(items)));
} continue;
case OP_BUILD_TUPLE_REF: {
Args items = frame->pop_n_reversed(byte.arg);
frame->push(PyRef(TupleRef(std::move(items))));
} continue;
case OP_BUILD_STRING: {
Args items = frame->pop_n_values_reversed(this, byte.arg);
StrStream ss;
for(int i=0; i<items.size(); i++) ss << CAST(Str, asStr(items[i]));
frame->push(VAR(ss.str()));
} continue;
case OP_LOAD_EVAL_FN: frame->push(builtins->attr(m_eval)); continue;
case OP_BEGIN_CLASS: {
auto& name = frame->co->names[byte.arg];
PyVar clsBase = frame->pop_value(this);
if(clsBase == None) clsBase = _t(tp_object);
check_type(clsBase, tp_type);
PyVar cls = new_type_object(frame->_module, name.first, OBJ_GET(Type, clsBase));
frame->push(cls);
} continue;
case OP_END_CLASS: {
PyVar cls = frame->pop();
cls->attr()._try_perfect_rehash();
}; continue;
case OP_STORE_CLASS_ATTR: {
auto& name = frame->co->names[byte.arg];
PyVar obj = frame->pop_value(this);
PyVar& cls = frame->top();
cls->attr().set(name.first, std::move(obj));
} continue;
case OP_RETURN_VALUE: return frame->pop_value(this);
case OP_PRINT_EXPR: {
const PyVar expr = frame->top_value(this);
if(expr != None) *_stdout << CAST(Str, asRepr(expr)) << '\n';
} continue;
case OP_POP_TOP: frame->_pop(); continue;
case OP_BINARY_OP: {
Args args(2);
args[1] = frame->pop_value(this);
args[0] = frame->top_value(this);
frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args));
} continue;
case OP_BITWISE_OP: {
Args args(2);
args[1] = frame->pop_value(this);
args[0] = frame->top_value(this);
frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args));
} continue;
case OP_INPLACE_BINARY_OP: {
Args args(2);
args[1] = frame->pop();
args[0] = frame->top_value(this);
PyVar ret = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args));
PyRef_AS_C(frame->top())->set(this, frame, std::move(ret));
frame->_pop();
} continue;
case OP_INPLACE_BITWISE_OP: {
Args args(2);
args[1] = frame->pop_value(this);
args[0] = frame->top_value(this);
PyVar ret = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args));
PyRef_AS_C(frame->top())->set(this, frame, std::move(ret));
frame->_pop();
} continue;
case OP_COMPARE_OP: {
Args args(2);
args[1] = frame->pop_value(this);
args[0] = frame->top_value(this);
frame->top() = fast_call(CMP_SPECIAL_METHODS[byte.arg], std::move(args));
} continue;
case OP_IS_OP: {
PyVar rhs = frame->pop_value(this);
bool ret_c = rhs == frame->top_value(this);
if(byte.arg == 1) ret_c = !ret_c;
frame->top() = VAR(ret_c);
} continue;
case OP_CONTAINS_OP: {
PyVar rhs = frame->pop_value(this);
bool ret_c = CAST(bool, call(rhs, __contains__, one_arg(frame->pop_value(this))));
if(byte.arg == 1) ret_c = !ret_c;
frame->push(VAR(ret_c));
} continue;
case OP_UNARY_NEGATIVE:
frame->top() = num_negated(frame->top_value(this));
continue;
case OP_UNARY_NOT: {
PyVar obj = frame->pop_value(this);
const PyVar& obj_bool = asBool(obj);
frame->push(VAR(!_CAST(bool, obj_bool)));
} continue;
case OP_POP_JUMP_IF_FALSE:
if(!_CAST(bool, asBool(frame->pop_value(this)))) frame->jump_abs(byte.arg);
continue;
case OP_LOAD_NONE: frame->push(None); continue;
case OP_LOAD_TRUE: frame->push(True); continue;
case OP_LOAD_FALSE: frame->push(False); continue;
case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); continue;
case OP_ASSERT: {
PyVar _msg = frame->pop_value(this);
Str msg = CAST(Str, asStr(_msg));
PyVar expr = frame->pop_value(this);
if(asBool(expr) != True) _error("AssertionError", msg);
} continue;
case OP_EXCEPTION_MATCH: {
const auto& e = CAST(Exception&, frame->top());
StrName name = frame->co->names[byte.arg].first;
frame->push(VAR(e.match_type(name)));
} continue;
case OP_RAISE: {
PyVar obj = frame->pop_value(this);
Str msg = obj == None ? "" : CAST(Str, asStr(obj));
StrName type = frame->co->names[byte.arg].first;
_error(type, msg);
} continue;
case OP_RE_RAISE: _raise(); continue;
case OP_BUILD_LIST:
frame->push(VAR(frame->pop_n_values_reversed(this, byte.arg).move_to_list()));
continue;
case OP_BUILD_MAP: {
Args items = frame->pop_n_values_reversed(this, byte.arg*2);
PyVar obj = call(builtins->attr("dict"));
for(int i=0; i<items.size(); i+=2){
call(obj, __setitem__, two_args(items[i], items[i+1]));
}
frame->push(obj);
} continue;
case OP_BUILD_SET: {
PyVar list = VAR(
frame->pop_n_values_reversed(this, byte.arg).move_to_list()
);
PyVar obj = call(builtins->attr("set"), one_arg(list));
frame->push(obj);
} continue;
case OP_LIST_APPEND: {
PyVar obj = frame->pop_value(this);
List& list = CAST(List&, frame->top_1());
list.push_back(std::move(obj));
} continue;
case OP_MAP_ADD: {
PyVar value = frame->pop_value(this);
PyVar key = frame->pop_value(this);
call(frame->top_1(), __setitem__, two_args(key, value));
} continue;
case OP_SET_ADD: {
PyVar obj = frame->pop_value(this);
call(frame->top_1(), "add", one_arg(obj));
} continue;
case OP_DUP_TOP_VALUE: frame->push(frame->top_value(this)); continue;
case OP_UNARY_STAR: {
if(byte.arg > 0){ // rvalue
frame->top() = VAR(StarWrapper(frame->top_value(this), true));
}else{
PyRef_AS_C(frame->top()); // check ref
frame->top() = VAR(StarWrapper(frame->top(), false));
}
} continue;
case OP_CALL_KWARGS_UNPACK: case OP_CALL_KWARGS: {
int ARGC = byte.arg & 0xFFFF;
int KWARGC = (byte.arg >> 16) & 0xFFFF;
Args kwargs = frame->pop_n_values_reversed(this, KWARGC*2);
Args args = frame->pop_n_values_reversed(this, ARGC);
if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args);
PyVar callable = frame->pop_value(this);
PyVar ret = call(callable, std::move(args), kwargs, true);
if(ret == _py_op_call) return ret;
frame->push(std::move(ret));
} continue;
case OP_CALL_UNPACK: case OP_CALL: {
Args args = frame->pop_n_values_reversed(this, byte.arg);
if(byte.op == OP_CALL_UNPACK) unpack_args(args);
PyVar callable = frame->pop_value(this);
PyVar ret = call(callable, std::move(args), no_arg(), true);
if(ret == _py_op_call) return ret;
frame->push(std::move(ret));
} continue;
case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); continue;
case OP_SAFE_JUMP_ABSOLUTE: frame->jump_abs_safe(byte.arg); continue;
case OP_GOTO: {
StrName label = frame->co->names[byte.arg].first;
auto it = frame->co->labels.find(label);
if(it == frame->co->labels.end()) _error("KeyError", "label " + label.str().escape(true) + " not found");
frame->jump_abs_safe(it->second);
} continue;
case OP_GET_ITER: {
PyVar obj = frame->pop_value(this);
PyVar iter = asIter(obj);
check_type(frame->top(), tp_ref);
PyIter_AS_C(iter)->loop_var = frame->pop();
frame->push(std::move(iter));
} continue;
case OP_FOR_ITER: {
BaseIter* it = PyIter_AS_C(frame->top());
PyVar obj = it->next();
if(obj != nullptr){
PyRef_AS_C(it->loop_var)->set(this, frame, std::move(obj));
}else{
int blockEnd = frame->co->blocks[byte.block].end;
frame->jump_abs_safe(blockEnd);
}
} continue;
case OP_LOOP_CONTINUE: {
int blockStart = frame->co->blocks[byte.block].start;
frame->jump_abs(blockStart);
} continue;
case OP_LOOP_BREAK: {
int blockEnd = frame->co->blocks[byte.block].end;
frame->jump_abs_safe(blockEnd);
} continue;
case OP_JUMP_IF_FALSE_OR_POP: {
const PyVar expr = frame->top_value(this);
if(asBool(expr)==False) frame->jump_abs(byte.arg);
else frame->pop_value(this);
} continue;
case OP_JUMP_IF_TRUE_OR_POP: {
const PyVar expr = frame->top_value(this);
if(asBool(expr)==True) frame->jump_abs(byte.arg);
else frame->pop_value(this);
} continue;
case OP_BUILD_SLICE: {
PyVar stop = frame->pop_value(this);
PyVar start = frame->pop_value(this);
Slice s;
if(start != None) { s.start = CAST(int, start);}
if(stop != None) { s.stop = CAST(int, stop);}
frame->push(VAR(s));
} continue;
case OP_IMPORT_NAME: {
StrName name = frame->co->names[byte.arg].first;
PyVar* ext_mod = _modules.try_get(name);
if(ext_mod == nullptr){
Str source;
auto it2 = _lazy_modules.find(name);
if(it2 == _lazy_modules.end()){
bool ok = false;
source = _read_file_cwd(name.str() + ".py", &ok);
if(!ok) _error("ImportError", "module " + name.str().escape(true) + " not found");
}else{
source = it2->second;
_lazy_modules.erase(it2);
}
CodeObject_ code = compile(source, name.str(), EXEC_MODE);
PyVar new_mod = new_module(name);
_exec(code, new_mod);
frame->push(new_mod);
new_mod->attr()._try_perfect_rehash();
}else{
frame->push(*ext_mod);
}
} continue;
case OP_STORE_ALL_NAMES: {
PyVar obj = frame->pop_value(this);
for(auto& [name, value]: obj->attr().items()){
Str s = name.str();
if(s.empty() || s[0] == '_') continue;
frame->f_globals().set(name, value);
}
}; continue;
case OP_YIELD_VALUE: return _py_op_yield;
// TODO: using "goto" inside with block may cause __exit__ not called
case OP_WITH_ENTER: call(frame->pop_value(this), __enter__); continue;
case OP_WITH_EXIT: call(frame->pop_value(this), __exit__); continue;
case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); continue;
case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); continue;
default: throw std::runtime_error(Str("opcode ") + OP_NAMES[byte.op] + " is not implemented");
}
}
if(frame->co->src->mode == EVAL_MODE || frame->co->src->mode == JSON_MODE){
if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE");
return frame->pop_value(this);
}
#if PK_EXTRA_CHECK
if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE");
inline PyObject* VM::run_frame(Frame* frame){
__NEXT_STEP:;
/* NOTE:
* Be aware of accidental gc!
* DO NOT leave any strong reference of PyObject* in the C stack
* For example, frame->popx() returns a strong reference which may be dangerous
* `Args` containing strong references is safe if it is passed to `call` or `fast_call`
*/
#if !DEBUG_NO_AUTO_GC
heap._auto_collect();
#endif
return None;
const Bytecode& byte = frame->next_bytecode();
#if DEBUG_CEVAL_STEP
std::cout << frame->stack_info() << " " << OP_NAMES[byte.op] << std::endl;
#endif
switch (byte.op)
{
case OP_NO_OP: DISPATCH();
/*****************************************/
case OP_POP_TOP: frame->pop(); DISPATCH();
case OP_DUP_TOP: frame->push(frame->top()); DISPATCH();
case OP_ROT_TWO: std::swap(frame->top(), frame->top_1()); DISPATCH();
case OP_PRINT_EXPR: {
PyObject* obj = frame->top(); // use top() to avoid accidental gc
if(obj != None) *_stdout << CAST(Str&, asRepr(obj)) << '\n';
frame->pop();
} DISPATCH();
/*****************************************/
case OP_LOAD_CONST: frame->push(frame->co->consts[byte.arg]); DISPATCH();
case OP_LOAD_NONE: frame->push(None); DISPATCH();
case OP_LOAD_TRUE: frame->push(True); DISPATCH();
case OP_LOAD_FALSE: frame->push(False); DISPATCH();
case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); DISPATCH();
case OP_LOAD_BUILTIN_EVAL: frame->push(builtins->attr(m_eval)); DISPATCH();
case OP_LOAD_FUNCTION: {
FuncDecl_ decl = frame->co->func_decls[byte.arg];
PyObject* obj = VAR(Function({decl, frame->_module, frame->_locals}));
frame->push(obj);
} DISPATCH();
case OP_LOAD_NULL: frame->push(_py_null); DISPATCH();
/*****************************************/
case OP_LOAD_NAME: {
StrName name = frame->co->names[byte.arg];
PyObject* val;
val = frame->f_locals().try_get(name);
if(val != nullptr) { frame->push(val); DISPATCH(); }
val = frame->f_closure_try_get(name);
if(val != nullptr) { frame->push(val); DISPATCH(); }
val = frame->f_globals().try_get(name);
if(val != nullptr) { frame->push(val); DISPATCH(); }
val = vm->builtins->attr().try_get(name);
if(val != nullptr) { frame->push(val); DISPATCH(); }
vm->NameError(name);
} DISPATCH();
case OP_LOAD_GLOBAL: {
StrName name = frame->co->names[byte.arg];
PyObject* val = frame->f_globals().try_get(name);
if(val != nullptr) { frame->push(val); DISPATCH(); }
val = vm->builtins->attr().try_get(name);
if(val != nullptr) { frame->push(val); DISPATCH(); }
vm->NameError(name);
} DISPATCH();
case OP_LOAD_ATTR: {
PyObject* a = frame->top();
StrName name = frame->co->names[byte.arg];
frame->top() = getattr(a, name);
} DISPATCH();
case OP_LOAD_METHOD: {
PyObject* a = frame->top();
StrName name = frame->co->names[byte.arg];
PyObject* self;
frame->top() = get_unbound_method(a, name, &self, true, true);
frame->push(self);
} DISPATCH();
case OP_LOAD_SUBSCR: {
Args args(2);
args[1] = frame->popx(); // b
args[0] = frame->top(); // a
frame->top() = fast_call(__getitem__, std::move(args));
} DISPATCH();
case OP_STORE_LOCAL: {
StrName name = frame->co->names[byte.arg];
frame->f_locals().set(name, frame->popx());
} DISPATCH();
case OP_STORE_GLOBAL: {
StrName name = frame->co->names[byte.arg];
frame->f_globals().set(name, frame->popx());
} DISPATCH();
case OP_STORE_ATTR: {
StrName name = frame->co->names[byte.arg];
PyObject* a = frame->top();
PyObject* val = frame->top_1();
setattr(a, name, val);
frame->pop_n(2);
} DISPATCH();
case OP_STORE_SUBSCR: {
Args args(3);
args[1] = frame->popx(); // b
args[0] = frame->popx(); // a
args[2] = frame->popx(); // val
fast_call(__setitem__, std::move(args));
} DISPATCH();
case OP_DELETE_LOCAL: {
StrName name = frame->co->names[byte.arg];
if(frame->f_locals().contains(name)){
frame->f_locals().erase(name);
}else{
NameError(name);
}
} DISPATCH();
case OP_DELETE_GLOBAL: {
StrName name = frame->co->names[byte.arg];
if(frame->f_globals().contains(name)){
frame->f_globals().erase(name);
}else{
NameError(name);
}
} DISPATCH();
case OP_DELETE_ATTR: {
PyObject* a = frame->popx();
StrName name = frame->co->names[byte.arg];
if(!a->is_attr_valid()) TypeError("cannot delete attribute");
if(!a->attr().contains(name)) AttributeError(a, name);
a->attr().erase(name);
} DISPATCH();
case OP_DELETE_SUBSCR: {
PyObject* b = frame->popx();
PyObject* a = frame->popx();
fast_call(__delitem__, Args{a, b});
} DISPATCH();
/*****************************************/
case OP_BUILD_LIST:
frame->push(VAR(frame->popx_n_reversed(byte.arg).to_list()));
DISPATCH();
case OP_BUILD_DICT: {
PyObject* t = VAR(frame->popx_n_reversed(byte.arg));
PyObject* obj = call(builtins->attr(m_dict), Args{t});
frame->push(obj);
} DISPATCH();
case OP_BUILD_SET: {
PyObject* t = VAR(frame->popx_n_reversed(byte.arg));
PyObject* obj = call(builtins->attr(m_set), Args{t});
frame->push(obj);
} DISPATCH();
case OP_BUILD_SLICE: {
PyObject* step = frame->popx();
PyObject* stop = frame->popx();
PyObject* start = frame->popx();
Slice s;
if(start != None) s.start = CAST(int, start);
if(stop != None) s.stop = CAST(int, stop);
if(step != None) s.step = CAST(int, step);
frame->push(VAR(s));
} DISPATCH();
case OP_BUILD_TUPLE: {
Tuple items = frame->popx_n_reversed(byte.arg);
frame->push(VAR(std::move(items)));
} DISPATCH();
case OP_BUILD_STRING: {
std::stringstream ss; // asStr() may run extra bytecode
for(int i=byte.arg-1; i>=0; i--) ss << CAST(Str&, asStr(frame->top_n(i)));
frame->pop_n(byte.arg);
frame->push(VAR(ss.str()));
} DISPATCH();
/*****************************************/
case OP_BINARY_OP: {
Args args(2);
args[1] = frame->popx(); // lhs
args[0] = frame->top(); // rhs
frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args));
} DISPATCH();
case OP_COMPARE_OP: {
Args args(2);
args[1] = frame->popx(); // lhs
args[0] = frame->top(); // rhs
frame->top() = fast_call(COMPARE_SPECIAL_METHODS[byte.arg], std::move(args));
} DISPATCH();
case OP_BITWISE_OP: {
Args args(2);
args[1] = frame->popx(); // lhs
args[0] = frame->top(); // rhs
frame->top() = fast_call(BITWISE_SPECIAL_METHODS[byte.arg], std::move(args));
} DISPATCH();
case OP_IS_OP: {
PyObject* rhs = frame->popx();
PyObject* lhs = frame->top();
bool ret_c = lhs == rhs;
if(byte.arg == 1) ret_c = !ret_c;
frame->top() = VAR(ret_c);
} DISPATCH();
case OP_CONTAINS_OP: {
Args args(2);
args[0] = frame->popx();
args[1] = frame->top();
PyObject* ret = fast_call(__contains__, std::move(args));
bool ret_c = CAST(bool, ret);
if(byte.arg == 1) ret_c = !ret_c;
frame->top() = VAR(ret_c);
} DISPATCH();
/*****************************************/
case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); DISPATCH();
case OP_POP_JUMP_IF_FALSE:
if(!asBool(frame->popx())) frame->jump_abs(byte.arg);
DISPATCH();
case OP_JUMP_IF_TRUE_OR_POP:
if(asBool(frame->top()) == true) frame->jump_abs(byte.arg);
else frame->pop();
DISPATCH();
case OP_JUMP_IF_FALSE_OR_POP:
if(asBool(frame->top()) == false) frame->jump_abs(byte.arg);
else frame->pop();
DISPATCH();
case OP_LOOP_CONTINUE: {
int target = frame->co->blocks[byte.block].start;
frame->jump_abs(target);
} DISPATCH();
case OP_LOOP_BREAK: {
int target = frame->co->blocks[byte.block].end;
frame->jump_abs_break(target);
} DISPATCH();
case OP_GOTO: {
StrName label = frame->co->names[byte.arg];
auto it = frame->co->labels.find(label);
if(it == frame->co->labels.end()) _error("KeyError", fmt("label ", label.escape(), " not found"));
frame->jump_abs_break(it->second);
} DISPATCH();
/*****************************************/
// TODO: examine this later
case OP_CALL: case OP_CALL_UNPACK: {
int ARGC = byte.arg;
bool method_call = frame->top_n(ARGC) != _py_null;
if(method_call) ARGC++; // add self into args
Args args = frame->popx_n_reversed(ARGC);
if(!method_call) frame->pop();
if(byte.op == OP_CALL_UNPACK) unpack_args(args);
PyObject* callable = frame->popx();
PyObject* ret = call(callable, std::move(args), no_arg(), true);
if(ret == _py_op_call) return ret;
frame->push(std::move(ret));
} DISPATCH();
case OP_CALL_KWARGS: case OP_CALL_KWARGS_UNPACK: {
int ARGC = byte.arg & 0xFFFF;
int KWARGC = (byte.arg >> 16) & 0xFFFF;
Args kwargs = frame->popx_n_reversed(KWARGC*2);
bool method_call = frame->top_n(ARGC) != _py_null;
if(method_call) ARGC++; // add self into args
Args args = frame->popx_n_reversed(ARGC);
if(!method_call) frame->pop();
if(byte.op == OP_CALL_KWARGS_UNPACK) unpack_args(args);
PyObject* callable = frame->popx();
PyObject* ret = call(callable, std::move(args), kwargs, true);
if(ret == _py_op_call) return ret;
frame->push(std::move(ret));
} DISPATCH();
case OP_RETURN_VALUE: return frame->popx();
case OP_YIELD_VALUE: return _py_op_yield;
/*****************************************/
case OP_LIST_APPEND: {
PyObject* obj = frame->popx();
List& list = CAST(List&, frame->top_1());
list.push_back(obj);
} DISPATCH();
case OP_DICT_ADD: {
PyObject* kv = frame->popx();
Tuple& t = CAST(Tuple& ,kv);
fast_call(__setitem__, Args{frame->top_1(), t[0], t[1]});
} DISPATCH();
case OP_SET_ADD: {
PyObject* obj = frame->popx();
fast_call(m_add, Args{frame->top_1(), obj});
} DISPATCH();
/*****************************************/
case OP_UNARY_NEGATIVE:
frame->top() = num_negated(frame->top());
DISPATCH();
case OP_UNARY_NOT:
frame->top() = VAR(!asBool(frame->top()));
DISPATCH();
case OP_UNARY_STAR:
frame->top() = VAR(StarWrapper(frame->top()));
DISPATCH();
/*****************************************/
case OP_GET_ITER:
frame->top() = asIter(frame->top());
DISPATCH();
case OP_FOR_ITER: {
BaseIter* it = PyIter_AS_C(frame->top());
PyObject* obj = it->next();
if(obj != nullptr){
frame->push(obj);
}else{
int target = frame->co->blocks[byte.block].end;
frame->jump_abs_break(target);
}
} DISPATCH();
/*****************************************/
case OP_IMPORT_NAME: {
StrName name = frame->co->names[byte.arg];
PyObject* ext_mod = _modules.try_get(name);
if(ext_mod == nullptr){
Str source;
auto it = _lazy_modules.find(name);
if(it == _lazy_modules.end()){
bool ok = false;
source = _read_file_cwd(fmt(name, ".py"), &ok);
if(!ok) _error("ImportError", fmt("module ", name.escape(), " not found"));
}else{
source = it->second;
_lazy_modules.erase(it);
}
CodeObject_ code = compile(source, name.sv(), EXEC_MODE);
PyObject* new_mod = new_module(name);
_exec(code, new_mod);
new_mod->attr()._try_perfect_rehash();
frame->push(new_mod);
}else{
frame->push(ext_mod);
}
} DISPATCH();
case OP_IMPORT_STAR: {
PyObject* obj = frame->popx();
for(auto& [name, value]: obj->attr().items()){
std::string_view s = name.sv();
if(s.empty() || s[0] == '_') continue;
frame->f_globals().set(name, value);
}
}; DISPATCH();
/*****************************************/
case OP_UNPACK_SEQUENCE: case OP_UNPACK_EX: {
// asIter or iter->next may run bytecode, accidential gc may happen
auto _lock = heap.gc_scope_lock(); // lock the gc via RAII!!
PyObject* obj = asIter(frame->popx());
BaseIter* iter = PyIter_AS_C(obj);
for(int i=0; i<byte.arg; i++){
PyObject* item = iter->next();
if(item == nullptr) ValueError("not enough values to unpack");
frame->push(item);
}
// handle extra items
if(byte.op == OP_UNPACK_EX){
List extras;
while(true){
PyObject* item = iter->next();
if(item == nullptr) break;
extras.push_back(item);
}
frame->push(VAR(extras));
}else{
if(iter->next() != nullptr) ValueError("too many values to unpack");
}
}; DISPATCH();
/*****************************************/
case OP_BEGIN_CLASS: {
StrName name = frame->co->names[byte.arg];
PyObject* super_cls = frame->popx();
if(super_cls == None) super_cls = _t(tp_object);
check_type(super_cls, tp_type);
PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, super_cls));
frame->push(cls);
} DISPATCH();
case OP_END_CLASS: {
PyObject* cls = frame->popx();
cls->attr()._try_perfect_rehash();
}; DISPATCH();
case OP_STORE_CLASS_ATTR: {
StrName name = frame->co->names[byte.arg];
PyObject* obj = frame->popx();
PyObject* cls = frame->top();
cls->attr().set(name, obj);
} DISPATCH();
/*****************************************/
// // TODO: using "goto" inside with block may cause __exit__ not called
// case OP_WITH_ENTER: call(frame->pop_value(this), __enter__, no_arg()); DISPATCH();
// case OP_WITH_EXIT: call(frame->pop_value(this), __exit__, no_arg()); DISPATCH();
/*****************************************/
case OP_TRY_BLOCK_ENTER: frame->on_try_block_enter(); DISPATCH();
case OP_TRY_BLOCK_EXIT: frame->on_try_block_exit(); DISPATCH();
/*****************************************/
case OP_ASSERT: {
PyObject* obj = frame->top();
Str msg;
if(is_type(obj, tp_tuple)){
auto& t = CAST(Tuple&, obj);
if(t.size() != 2) ValueError("assert tuple must have 2 elements");
obj = t[0];
msg = CAST(Str&, asStr(t[1]));
}
bool ok = asBool(obj);
frame->pop();
if(!ok) _error("AssertionError", msg);
} DISPATCH();
case OP_EXCEPTION_MATCH: {
const auto& e = CAST(Exception&, frame->top());
StrName name = frame->co->names[byte.arg];
frame->push(VAR(e.match_type(name)));
} DISPATCH();
case OP_RAISE: {
PyObject* obj = frame->popx();
Str msg = obj == None ? "" : CAST(Str, asStr(obj));
StrName type = frame->co->names[byte.arg];
_error(type, msg);
} DISPATCH();
case OP_RE_RAISE: _raise(); DISPATCH();
default: throw std::runtime_error(fmt(OP_NAMES[byte.op], " is not implemented"));
}
UNREACHABLE();
}
#undef DISPATCH
} // namespace pkpy

View File

@ -2,8 +2,6 @@
#include "common.h"
#include "vm.h"
#include <type_traits>
#include <vector>
namespace pkpy {
@ -14,7 +12,7 @@ struct NativeProxyFunc {
_Fp func;
NativeProxyFunc(_Fp func) : func(func) {}
PyVar operator()(VM* vm, Args& args) {
PyObject* operator()(VM* vm, Args& args) {
if (args.size() != N) {
vm->TypeError("expected " + std::to_string(N) + " arguments, but got " + std::to_string(args.size()));
}
@ -22,13 +20,13 @@ struct NativeProxyFunc {
}
template<typename __Ret, size_t... Is>
std::enable_if_t<std::is_void_v<__Ret>, PyVar> call(VM* vm, Args& args, std::index_sequence<Is...>) {
std::enable_if_t<std::is_void_v<__Ret>, PyObject*> call(VM* vm, Args& args, std::index_sequence<Is...>) {
func(py_cast<Params>(vm, args[Is])...);
return vm->None;
}
template<typename __Ret, size_t... Is>
std::enable_if_t<!std::is_void_v<__Ret>, PyVar> call(VM* vm, Args& args, std::index_sequence<Is...>) {
std::enable_if_t<!std::is_void_v<__Ret>, PyObject*> call(VM* vm, Args& args, std::index_sequence<Is...>) {
__Ret ret = func(py_cast<Params>(vm, args[Is])...);
return VAR(std::move(ret));
}
@ -41,7 +39,7 @@ struct NativeProxyMethod {
_Fp func;
NativeProxyMethod(_Fp func) : func(func) {}
PyVar operator()(VM* vm, Args& args) {
PyObject* operator()(VM* vm, Args& args) {
int actual_size = args.size() - 1;
if (actual_size != N) {
vm->TypeError("expected " + std::to_string(N) + " arguments, but got " + std::to_string(actual_size));
@ -50,14 +48,14 @@ struct NativeProxyMethod {
}
template<typename __Ret, size_t... Is>
std::enable_if_t<std::is_void_v<__Ret>, PyVar> call(VM* vm, Args& args, std::index_sequence<Is...>) {
std::enable_if_t<std::is_void_v<__Ret>, PyObject*> call(VM* vm, Args& args, std::index_sequence<Is...>) {
T& self = py_cast<T&>(vm, args[0]);
(self.*func)(py_cast<Params>(vm, args[Is+1])...);
return vm->None;
}
template<typename __Ret, size_t... Is>
std::enable_if_t<!std::is_void_v<__Ret>, PyVar> call(VM* vm, Args& args, std::index_sequence<Is...>) {
std::enable_if_t<!std::is_void_v<__Ret>, PyObject*> call(VM* vm, Args& args, std::index_sequence<Is...>) {
T& self = py_cast<T&>(vm, args[0]);
__Ret ret = (self.*func)(py_cast<Params>(vm, args[Is+1])...);
return VAR(std::move(ret));
@ -133,14 +131,14 @@ struct TypeDB{
return index == 0 ? nullptr : &_by_index[index-1];
}
const TypeInfo* get(const char name[]) const {
const TypeInfo* get(std::string_view name) const {
auto it = _by_name.find(name);
if(it == _by_name.end()) return nullptr;
return get(it->second);
}
const TypeInfo* get(const Str& s) const {
return get(s.c_str());
return get(s.sv());
}
template<typename T>
@ -152,7 +150,7 @@ struct TypeDB{
static TypeDB _type_db;
auto _ = [](){
inline static auto ___x = [](){
#define REGISTER_BASIC_TYPE(T) _type_db.register_type<T>(#T, {});
_type_db.register_type<void>("void", {});
REGISTER_BASIC_TYPE(char);
@ -200,12 +198,12 @@ struct Pointer{
return Pointer(ctype, level, ptr-offset*unit_size());
}
static void _register(VM* vm, PyVar mod, PyVar type){
static void _register(VM* vm, PyObject* mod, PyObject* type){
vm->bind_static_method<-1>(type, "__new__", CPP_NOT_IMPLEMENTED());
vm->bind_method<0>(type, "__repr__", [](VM* vm, Args& args) {
Pointer& self = CAST(Pointer&, args[0]);
StrStream ss;
std::stringstream ss;
ss << "<" << self.ctype->name;
for(int i=0; i<self.level; i++) ss << "*";
ss << " at " << (i64)self.ptr << ">";
@ -266,9 +264,9 @@ struct Pointer{
}
template<typename T>
inline T& ref() noexcept { return *reinterpret_cast<T*>(ptr); }
T& ref() noexcept { return *reinterpret_cast<T*>(ptr); }
PyVar get(VM* vm){
PyObject* get(VM* vm){
if(level > 1) return VAR_T(Pointer, ctype, level-1, ref<char*>());
switch(ctype->index){
#define CASE(T) case type_index<T>(): return VAR(ref<T>())
@ -291,7 +289,7 @@ struct Pointer{
return VAR_T(Pointer, *this);
}
void set(VM* vm, const PyVar& val){
void set(VM* vm, PyObject* val){
if(level > 1) {
Pointer& p = CAST(Pointer&, val);
ref<char*>() = p.ptr; // We don't check the type, just copy the underlying address
@ -321,7 +319,7 @@ struct Pointer{
Pointer _to(VM* vm, StrName name){
auto it = ctype->members.find(name);
if(it == ctype->members.end()){
vm->AttributeError(Str("struct '") + ctype->name + "' has no member " + name.str().escape(true));
vm->AttributeError(fmt("struct '", ctype->name, "' has no member ", name.escape()));
}
const MemberInfo& info = it->second;
return {info.type, level, ptr+info.offset};
@ -359,7 +357,7 @@ struct Value {
Value& operator=(const Value& other) = delete;
Value(const Value& other) = delete;
static void _register(VM* vm, PyVar mod, PyVar type){
static void _register(VM* vm, PyObject* mod, PyObject* type){
vm->bind_static_method<-1>(type, "__new__", CPP_NOT_IMPLEMENTED());
vm->bind_method<0>(type, "ptr", [](VM* vm, Args& args) {
@ -388,11 +386,11 @@ struct CType{
CType() : type(_type_db.get<void>()) {}
CType(const TypeInfo* type) : type(type) {}
static void _register(VM* vm, PyVar mod, PyVar type){
static void _register(VM* vm, PyObject* mod, PyObject* type){
vm->bind_static_method<1>(type, "__new__", [](VM* vm, Args& args) {
const Str& name = CAST(Str&, args[0]);
const TypeInfo* type = _type_db.get(name);
if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true));
if(type == nullptr) vm->TypeError("unknown type: " + name.escape());
return VAR_T(CType, type);
});
@ -403,9 +401,9 @@ struct CType{
}
};
void add_module_c(VM* vm){
PyVar mod = vm->new_module("c");
PyVar ptr_t = Pointer::register_class(vm, mod);
inline void add_module_c(VM* vm){
PyObject* mod = vm->new_module("c");
Pointer::register_class(vm, mod);
Value::register_class(vm, mod);
CType::register_class(vm, mod);
@ -434,22 +432,22 @@ void add_module_c(VM* vm){
Pointer& self = CAST(Pointer&, args[0]);
const Str& name = CAST(Str&, args[1]);
int level = 0;
for(int i=name.size()-1; i>=0; i--){
for(int i=name.length()-1; i>=0; i--){
if(name[i] == '*') level++;
else break;
}
if(level == 0) vm->TypeError("expect a pointer type, such as 'int*'");
Str type_s = name.substr(0, name.size()-level);
Str type_s = name.substr(0, name.length()-level);
const TypeInfo* type = _type_db.get(type_s);
if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape(true));
if(type == nullptr) vm->TypeError("unknown type: " + type_s.escape());
return VAR_T(Pointer, type, level, self.ptr);
});
vm->bind_func<1>(mod, "sizeof", [](VM* vm, Args& args) {
const Str& name = CAST(Str&, args[0]);
if(name.find('*') != Str::npos) return VAR(sizeof(void*));
if(name.index("*") != -1) return VAR(sizeof(void*));
const TypeInfo* type = _type_db.get(name);
if(type == nullptr) vm->TypeError("unknown type: " + name.escape(true));
if(type == nullptr) vm->TypeError("unknown type: " + name.escape());
return VAR(type->size);
});
@ -462,11 +460,11 @@ void add_module_c(VM* vm){
});
}
PyVar py_var(VM* vm, void* p){
inline PyObject* py_var(VM* vm, void* p){
return VAR_T(Pointer, _type_db.get<void>(), (char*)p);
}
PyVar py_var(VM* vm, char* p){
inline PyObject* py_var(VM* vm, char* p){
return VAR_T(Pointer, _type_db.get<char>(), (char*)p);
}
@ -491,7 +489,7 @@ struct pointer {
};
template<typename T>
T py_pointer_cast(VM* vm, const PyVar& var){
T py_pointer_cast(VM* vm, PyObject* var){
static_assert(std::is_pointer_v<T>);
Pointer& p = CAST(Pointer&, var);
const TypeInfo* type = _type_db.get<typename pointer<T>::baseT>();
@ -503,14 +501,14 @@ T py_pointer_cast(VM* vm, const PyVar& var){
}
template<typename T>
T py_value_cast(VM* vm, const PyVar& var){
T py_value_cast(VM* vm, PyObject* var){
static_assert(std::is_pod_v<T>);
Value& v = CAST(Value&, var);
return *reinterpret_cast<T*>(v.data);
}
template<typename T>
std::enable_if_t<std::is_pointer_v<std::decay_t<T>>, PyVar>
std::enable_if_t<std::is_pointer_v<std::decay_t<T>>, PyObject*>
py_var(VM* vm, T p){
const TypeInfo* type = _type_db.get<typename pointer<T>::baseT>();
if(type == nullptr) type = _type_db.get<void>();
@ -518,9 +516,9 @@ py_var(VM* vm, T p){
}
template<typename T>
std::enable_if_t<!std::is_pointer_v<std::decay_t<T>>, PyVar>
std::enable_if_t<!std::is_pointer_v<std::decay_t<T>>, PyObject*>
py_var(VM* vm, T p){
if constexpr(std::is_same_v<T, PyVar>) return p;
if constexpr(std::is_same_v<T, PyObject*>) return p;
const TypeInfo* type = _type_db.get<T>();
return VAR_T(Value, type, &p);
}

View File

@ -5,12 +5,7 @@
namespace pkpy{
enum NameScope {
NAME_LOCAL = 0,
NAME_GLOBAL,
NAME_ATTR,
NAME_SPECIAL,
};
enum NameScope { NAME_LOCAL, NAME_GLOBAL };
enum Opcode {
#define OPCODE(name) OP_##name,
@ -18,24 +13,19 @@ enum Opcode {
#undef OPCODE
};
static const char* OP_NAMES[] = {
inline const char* OP_NAMES[] = {
#define OPCODE(name) #name,
#include "opcodes.h"
#undef OPCODE
};
struct Bytecode{
uint8_t op;
uint16_t op;
uint16_t block;
int arg;
int line;
uint16_t block;
};
Str pad(const Str& s, const int n){
if(s.size() >= n) return s.substr(0, n);
return s + std::string(n - s.size(), ' ');
}
enum CodeBlockType {
NO_BLOCK,
FOR_LOOP,
@ -44,16 +34,14 @@ enum CodeBlockType {
TRY_EXCEPT,
};
#define BC_NOARG -1
#define BC_KEEPLINE -1
struct CodeBlock {
CodeBlockType type;
int parent; // parent index in blocks
int start; // start index of this block in codes, inclusive
int end; // end index of this block in codes, exclusive
std::string to_string() const {
if(parent == -1) return "";
return "[B:" + std::to_string(type) + "]";
}
};
struct CodeObject {
@ -68,57 +56,22 @@ struct CodeObject {
std::vector<Bytecode> codes;
List consts;
std::vector<std::pair<StrName, NameScope>> names;
std::map<StrName, int> global_names;
std::vector<StrName> names;
std::set<Str> global_names;
std::vector<CodeBlock> blocks = { CodeBlock{NO_BLOCK, -1} };
std::map<StrName, int> labels;
std::vector<FuncDecl_> func_decls;
// may be.. just use a large NameDict?
uint32_t perfect_locals_capacity = 2;
uint32_t perfect_hash_seed = 0;
void optimize(VM* vm);
bool add_label(StrName label){
if(labels.count(label)) return false;
labels[label] = codes.size();
return true;
void _gc_mark() const {
for(PyObject* v : consts) OBJ_MARK(v);
for(auto& decl: func_decls) decl->_gc_mark();
}
int add_name(StrName name, NameScope scope){
if(scope == NAME_LOCAL && global_names.count(name)) scope = NAME_GLOBAL;
auto p = std::make_pair(name, scope);
for(int i=0; i<names.size(); i++){
if(names[i] == p) return i;
}
names.push_back(p);
return names.size() - 1;
}
int add_const(PyVar v){
consts.push_back(v);
return consts.size() - 1;
}
/************************************************/
int _curr_block_i = 0;
int _rvalue = 0;
bool _is_compiling_class = false;
bool _is_curr_block_loop() const {
return blocks[_curr_block_i].type == FOR_LOOP || blocks[_curr_block_i].type == WHILE_LOOP;
}
void _enter_block(CodeBlockType type){
blocks.push_back(CodeBlock{type, _curr_block_i, (int)codes.size()});
_curr_block_i = blocks.size()-1;
}
void _exit_block(){
blocks[_curr_block_i].end = codes.size();
_curr_block_i = blocks[_curr_block_i].parent;
if(_curr_block_i < 0) UNREACHABLE();
}
/************************************************/
};
} // namespace pkpy

View File

@ -3,22 +3,20 @@
#ifdef _MSC_VER
#pragma warning (disable:4267)
#pragma warning (disable:4101)
#pragma warning (disable:4244)
#define _CRT_NONSTDC_NO_DEPRECATE
#define strdup _strdup
#endif
#include <sstream>
#include <regex>
#include <stack>
#include <cmath>
#include <cstdlib>
#include <stdexcept>
#include <vector>
#include <string>
#include <cstring>
#include <chrono>
#include <string_view>
#include <queue>
#include <iomanip>
#include <memory>
#include <functional>
@ -27,52 +25,60 @@
#include <set>
#include <algorithm>
#include <random>
#include <chrono>
#include <initializer_list>
#include <variant>
#include <type_traits>
#define PK_VERSION "0.9.5"
#define PK_EXTRA_CHECK 0
#define PK_VERSION "0.9.7"
// debug macros
#define DEBUG_NO_BUILTIN_MODULES 0
#define DEBUG_EXTRA_CHECK 0
#define DEBUG_DIS_EXEC 0
#define DEBUG_DIS_EXEC_MIN 1
#define DEBUG_CEVAL_STEP 0
#define DEBUG_FULL_EXCEPTION 0
#define DEBUG_MEMORY_POOL 0
#define DEBUG_NO_MEMORY_POOL 0
#define DEBUG_NO_AUTO_GC 0
#define DEBUG_GC_STATS 0
#if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__)
#define PK_ENABLE_FILEIO 0
#else
#define PK_ENABLE_FILEIO 1
#define PK_ENABLE_FILEIO 0 // TODO: refactor this
#endif
#if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__)
typedef int32_t i64;
typedef float f64;
#define S_TO_INT std::stoi
#define S_TO_FLOAT std::stof
#define S_TO_INT(...) static_cast<i64>(std::stoi(__VA_ARGS__))
#define S_TO_FLOAT(...) static_cast<f64>(std::stof(__VA_ARGS__))
#else
typedef int64_t i64;
typedef double f64;
#define S_TO_INT std::stoll
#define S_TO_FLOAT std::stod
#define S_TO_INT(...) static_cast<i64>(std::stoll(__VA_ARGS__))
#define S_TO_FLOAT(...) static_cast<f64>(std::stod(__VA_ARGS__))
#endif
namespace pkpy{
namespace std = ::std;
struct Dummy { };
struct DummyInstance { };
struct Dummy { };
struct DummyInstance { };
struct DummyModule { };
#define DUMMY_VAL Dummy()
struct Type {
int index;
Type(): index(-1) {}
Type(int index): index(index) {}
inline bool operator==(Type other) const noexcept {
return this->index == other.index;
}
inline bool operator!=(Type other) const noexcept {
return this->index != other.index;
}
bool operator==(Type other) const noexcept { return this->index == other.index; }
bool operator!=(Type other) const noexcept { return this->index != other.index; }
operator int() const noexcept { return this->index; }
};
//#define THREAD_LOCAL thread_local
#define THREAD_LOCAL
#define THREAD_LOCAL // thread_local
#define CPP_LAMBDA(x) ([](VM* vm, Args& args) { return x; })
#define CPP_NOT_IMPLEMENTED() ([](VM* vm, Args& args) { vm->NotImplementedError(); return vm->None; })
@ -82,7 +88,30 @@ struct Type {
#define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!");
#endif
const float kLocalsLoadFactor = 0.67f;
const float kInstAttrLoadFactor = 0.67f;
const float kTypeAttrLoadFactor = 0.5f;
inline const float kLocalsLoadFactor = 0.67f;
inline const float kInstAttrLoadFactor = 0.67f;
inline const float kTypeAttrLoadFactor = 0.5f;
static_assert(sizeof(i64) == sizeof(int*));
static_assert(sizeof(f64) == sizeof(int*));
static_assert(std::numeric_limits<float>::is_iec559);
static_assert(std::numeric_limits<double>::is_iec559);
struct PyObject;
#define BITS(p) (reinterpret_cast<i64>(p))
inline bool is_tagged(PyObject* p) noexcept { return (BITS(p) & 0b11) != 0b00; }
inline bool is_int(PyObject* p) noexcept { return (BITS(p) & 0b11) == 0b01; }
inline bool is_float(PyObject* p) noexcept { return (BITS(p) & 0b11) == 0b10; }
inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept {
return is_tagged(a) && is_tagged(b);
}
inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
return is_int(a) && is_int(b);
}
struct Expr;
typedef std::unique_ptr<Expr> Expr_;
} // namespace pkpy

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
#pragma once
#include "namedict.h"
#include "str.h"
#include "tuplelist.h"
namespace pkpy{
@ -22,7 +23,7 @@ enum CompileMode {
};
struct SourceData {
const char* source;
std::string source;
Str filename;
std::vector<const char*> line_starts;
CompileMode mode;
@ -37,25 +38,32 @@ struct SourceData {
return {_start, i};
}
SourceData(const char* source, Str filename, CompileMode mode) {
source = strdup(source);
SourceData(const Str& source, const Str& filename, CompileMode mode) {
int index = 0;
// Skip utf8 BOM if there is any.
if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
if (strncmp(source.begin(), "\xEF\xBB\xBF", 3) == 0) index += 3;
// Remove all '\r'
std::stringstream ss;
while(index < source.length()){
if(source[index] != '\r') ss << source[index];
index++;
}
this->filename = filename;
this->source = source;
line_starts.push_back(source);
this->source = ss.str();
line_starts.push_back(this->source.c_str());
this->mode = mode;
}
Str snapshot(int lineno, const char* cursor=nullptr){
StrStream ss;
std::stringstream ss;
ss << " " << "File \"" << filename << "\", line " << lineno << '\n';
std::pair<const char*,const char*> pair = get_line(lineno);
Str line = "<?>";
int removed_spaces = 0;
if(pair.first && pair.second){
line = Str(pair.first, pair.second-pair.first).lstrip();
removed_spaces = pair.second - pair.first - line.size();
removed_spaces = pair.second - pair.first - line.length();
if(line.empty()) line = "<?>";
}
ss << " " << line;
@ -65,14 +73,13 @@ struct SourceData {
}
return ss.str();
}
~SourceData() { free((void*)source); }
};
class Exception {
using StackTrace = stack<Str>;
StrName type;
Str msg;
std::stack<Str> stacktrace;
StackTrace stacktrace;
public:
Exception(StrName type, Str msg): type(type), msg(msg) {}
bool match_type(StrName type) const { return this->type == type;}
@ -84,12 +91,12 @@ public:
}
Str summary() const {
std::stack<Str> st(stacktrace);
StrStream ss;
StackTrace st(stacktrace);
std::stringstream ss;
if(is_re) ss << "Traceback (most recent call last):\n";
while(!st.empty()) { ss << st.top() << '\n'; st.pop(); }
if (!msg.empty()) ss << type.str() << ": " << msg;
else ss << type.str();
if (!msg.empty()) ss << type.sv() << ": " << msg;
else ss << type.sv();
return ss.str();
}
};

701
src/expr.h Normal file
View File

@ -0,0 +1,701 @@
#pragma once
#include "codeobject.h"
#include "common.h"
#include "lexer.h"
#include "error.h"
#include "ceval.h"
#include "str.h"
namespace pkpy{
struct CodeEmitContext;
struct Expr{
int line = 0;
virtual ~Expr() = default;
virtual void emit(CodeEmitContext* ctx) = 0;
virtual std::string str() const = 0;
virtual bool is_starred() const { return false; }
virtual bool is_literal() const { return false; }
virtual bool is_json_object() const { return false; }
virtual bool is_attrib() const { return false; }
// for OP_DELETE_XXX
[[nodiscard]] virtual bool emit_del(CodeEmitContext* ctx) { return false; }
// for OP_STORE_XXX
[[nodiscard]] virtual bool emit_store(CodeEmitContext* ctx) { return false; }
};
struct CodeEmitContext{
VM* vm;
CodeObject_ co;
stack<Expr_> s_expr;
CodeEmitContext(VM* vm, CodeObject_ co): vm(vm), co(co) {}
int curr_block_i = 0;
bool is_compiling_class = false;
bool is_curr_block_loop() const {
return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP;
}
void enter_block(CodeBlockType type){
co->blocks.push_back(CodeBlock{
type, curr_block_i, (int)co->codes.size()
});
curr_block_i = co->blocks.size()-1;
}
void exit_block(){
co->blocks[curr_block_i].end = co->codes.size();
curr_block_i = co->blocks[curr_block_i].parent;
if(curr_block_i < 0) UNREACHABLE();
}
// clear the expression stack and generate bytecode
void emit_expr(){
if(s_expr.size() != 1){
throw std::runtime_error("s_expr.size() != 1\n" + _log_s_expr());
}
Expr_ expr = s_expr.popx();
expr->emit(this);
}
std::string _log_s_expr(){
std::stringstream ss;
for(auto& e: s_expr.data()) ss << e->str() << " ";
return ss.str();
}
int emit(Opcode opcode, int arg, int line) {
co->codes.push_back(
Bytecode{(uint16_t)opcode, (uint16_t)curr_block_i, arg, line}
);
int i = co->codes.size() - 1;
if(line==BC_KEEPLINE){
if(i>=1) co->codes[i].line = co->codes[i-1].line;
else co->codes[i].line = 1;
}
return i;
}
void patch_jump(int index) {
int target = co->codes.size();
co->codes[index].arg = target;
}
bool add_label(StrName label){
if(co->labels.count(label)) return false;
co->labels[label] = co->codes.size();
return true;
}
int add_name(StrName name){
for(int i=0; i<co->names.size(); i++){
if(co->names[i] == name) return i;
}
co->names.push_back(name);
return co->names.size() - 1;
}
int add_const(PyObject* v){
co->consts.push_back(v);
return co->consts.size() - 1;
}
int add_func_decl(FuncDecl_ decl){
co->func_decls.push_back(decl);
return co->func_decls.size() - 1;
}
};
// PASS
struct NameExpr: Expr{
StrName name;
NameScope scope;
NameExpr(StrName name, NameScope scope): name(name), scope(scope) {}
std::string str() const override { return fmt("Name(", name.escape(), ")"); }
void emit(CodeEmitContext* ctx) override {
int index = ctx->add_name(name);
ctx->emit(OP_LOAD_NAME, index, line);
}
bool emit_del(CodeEmitContext* ctx) override {
int index = ctx->add_name(name);
switch(scope){
case NAME_LOCAL:
ctx->emit(OP_DELETE_LOCAL, index, line);
break;
case NAME_GLOBAL:
ctx->emit(OP_DELETE_GLOBAL, index, line);
break;
default: UNREACHABLE(); break;
}
return true;
}
bool emit_store(CodeEmitContext* ctx) override {
int index = ctx->add_name(name);
if(ctx->is_compiling_class){
ctx->emit(OP_STORE_CLASS_ATTR, index, line);
return true;
}
switch(scope){
case NAME_LOCAL:
ctx->emit(OP_STORE_LOCAL, index, line);
break;
case NAME_GLOBAL:
ctx->emit(OP_STORE_GLOBAL, index, line);
break;
default: UNREACHABLE(); break;
}
return true;
}
};
struct StarredExpr: Expr{
Expr_ child;
StarredExpr(Expr_&& child): child(std::move(child)) {}
std::string str() const override { return "Starred()"; }
bool is_starred() const override { return true; }
void emit(CodeEmitContext* ctx) override {
child->emit(ctx);
ctx->emit(OP_UNARY_STAR, BC_NOARG, line);
}
bool emit_store(CodeEmitContext* ctx) override {
// simply proxy to child
return child->emit_store(ctx);
}
};
// PASS
struct NotExpr: Expr{
Expr_ child;
NotExpr(Expr_&& child): child(std::move(child)) {}
std::string str() const override { return "Not()"; }
void emit(CodeEmitContext* ctx) override {
child->emit(ctx);
ctx->emit(OP_UNARY_NOT, BC_NOARG, line);
}
};
// PASS
struct AndExpr: Expr{
Expr_ lhs;
Expr_ rhs;
std::string str() const override { return "And()"; }
void emit(CodeEmitContext* ctx) override {
lhs->emit(ctx);
int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line);
rhs->emit(ctx);
ctx->patch_jump(patch);
}
};
// PASS
struct OrExpr: Expr{
Expr_ lhs;
Expr_ rhs;
std::string str() const override { return "Or()"; }
void emit(CodeEmitContext* ctx) override {
lhs->emit(ctx);
int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line);
rhs->emit(ctx);
ctx->patch_jump(patch);
}
};
// [None, True, False, ...]
struct Literal0Expr: Expr{
TokenIndex token;
Literal0Expr(TokenIndex token): token(token) {}
std::string str() const override { return TK_STR(token); }
void emit(CodeEmitContext* ctx) override {
switch (token) {
case TK("None"): ctx->emit(OP_LOAD_NONE, BC_NOARG, line); break;
case TK("True"): ctx->emit(OP_LOAD_TRUE, BC_NOARG, line); break;
case TK("False"): ctx->emit(OP_LOAD_FALSE, BC_NOARG, line); break;
case TK("..."): ctx->emit(OP_LOAD_ELLIPSIS, BC_NOARG, line); break;
default: UNREACHABLE();
}
}
bool is_json_object() const override { return true; }
};
// @num, @str which needs to invoke OP_LOAD_CONST
struct LiteralExpr: Expr{
TokenValue value;
LiteralExpr(TokenValue value): value(value) {}
std::string str() const override {
if(std::holds_alternative<i64>(value)){
return std::to_string(std::get<i64>(value));
}
if(std::holds_alternative<f64>(value)){
return std::to_string(std::get<f64>(value));
}
if(std::holds_alternative<Str>(value)){
Str s = std::get<Str>(value).escape();
return s.str();
}
UNREACHABLE();
}
PyObject* to_object(CodeEmitContext* ctx){
VM* vm = ctx->vm;
PyObject* obj = nullptr;
if(std::holds_alternative<i64>(value)){
obj = VAR(std::get<i64>(value));
}
if(std::holds_alternative<f64>(value)){
obj = VAR(std::get<f64>(value));
}
if(std::holds_alternative<Str>(value)){
obj = VAR(std::get<Str>(value));
}
return obj;
}
void emit(CodeEmitContext* ctx) override {
PyObject* obj = to_object(ctx);
if(obj == nullptr) UNREACHABLE();
int index = ctx->add_const(obj);
ctx->emit(OP_LOAD_CONST, index, line);
}
bool is_literal() const override { return true; }
bool is_json_object() const override { return true; }
};
// PASS
struct NegatedExpr: Expr{
Expr_ child;
NegatedExpr(Expr_&& child): child(std::move(child)) {}
std::string str() const override { return "Negated()"; }
void emit(CodeEmitContext* ctx) override {
VM* vm = ctx->vm;
// if child is a int of float, do constant folding
if(child->is_literal()){
LiteralExpr* lit = static_cast<LiteralExpr*>(child.get());
PyObject* obj = nullptr;
if(std::holds_alternative<i64>(lit->value)){
obj = VAR(-std::get<i64>(lit->value));
}
if(std::holds_alternative<f64>(lit->value)){
obj = VAR(-std::get<f64>(lit->value));
}
if(obj != nullptr){
ctx->emit(OP_LOAD_CONST, ctx->add_const(obj), line);
return;
}
}
child->emit(ctx);
ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line);
}
bool is_json_object() const override {
return child->is_literal();
}
};
// PASS
struct SliceExpr: Expr{
Expr_ start;
Expr_ stop;
Expr_ step;
std::string str() const override { return "Slice()"; }
void emit(CodeEmitContext* ctx) override {
if(start){
start->emit(ctx);
}else{
ctx->emit(OP_LOAD_NONE, BC_NOARG, line);
}
if(stop){
stop->emit(ctx);
}else{
ctx->emit(OP_LOAD_NONE, BC_NOARG, line);
}
if(step){
step->emit(ctx);
}else{
ctx->emit(OP_LOAD_NONE, BC_NOARG, line);
}
ctx->emit(OP_BUILD_SLICE, BC_NOARG, line);
}
};
struct DictItemExpr: Expr{
Expr_ key;
Expr_ value;
std::string str() const override { return "DictItem()"; }
void emit(CodeEmitContext* ctx) override {
value->emit(ctx);
key->emit(ctx); // reverse order
ctx->emit(OP_BUILD_TUPLE, 2, line);
}
};
struct SequenceExpr: Expr{
std::vector<Expr_> items;
SequenceExpr(std::vector<Expr_>&& items): items(std::move(items)) {}
virtual Opcode opcode() const = 0;
void emit(CodeEmitContext* ctx) override {
for(auto& item: items) item->emit(ctx);
ctx->emit(opcode(), items.size(), line);
}
};
struct ListExpr: SequenceExpr{
using SequenceExpr::SequenceExpr;
std::string str() const override { return "List()"; }
Opcode opcode() const override { return OP_BUILD_LIST; }
bool is_json_object() const override { return true; }
};
struct DictExpr: SequenceExpr{
using SequenceExpr::SequenceExpr;
std::string str() const override { return "Dict()"; }
Opcode opcode() const override { return OP_BUILD_DICT; }
bool is_json_object() const override { return true; }
};
struct SetExpr: SequenceExpr{
using SequenceExpr::SequenceExpr;
std::string str() const override { return "Set()"; }
Opcode opcode() const override { return OP_BUILD_SET; }
};
struct TupleExpr: SequenceExpr{
using SequenceExpr::SequenceExpr;
std::string str() const override { return "Tuple()"; }
Opcode opcode() const override { return OP_BUILD_TUPLE; }
bool emit_store(CodeEmitContext* ctx) override {
// TOS is an iterable
// items may contain StarredExpr, we should check it
int starred_i = -1;
for(int i=0; i<items.size(); i++){
if(!items[i]->is_starred()) continue;
if(starred_i == -1) starred_i = i;
else return false; // multiple StarredExpr not allowed
}
if(starred_i == -1){
ctx->emit(OP_UNPACK_SEQUENCE, items.size(), line);
}else{
// starred assignment target must be in a tuple
if(items.size() == 1) return false;
// starred assignment target must be the last one (differ from CPython)
if(starred_i != items.size()-1) return false;
// a,*b = [1,2,3]
// stack is [1,2,3] -> [1,[2,3]]
ctx->emit(OP_UNPACK_EX, items.size()-1, line);
}
// do reverse emit
for(int i=items.size()-1; i>=0; i--){
bool ok = items[i]->emit_store(ctx);
if(!ok) return false;
}
return true;
}
bool emit_del(CodeEmitContext* ctx) override{
for(auto& e: items){
bool ok = e->emit_del(ctx);
if(!ok) return false;
}
return true;
}
};
struct CompExpr: Expr{
Expr_ expr; // loop expr
Expr_ vars; // loop vars
Expr_ iter; // loop iter
Expr_ cond; // optional if condition
virtual Opcode op0() = 0;
virtual Opcode op1() = 0;
void emit(CodeEmitContext* ctx){
ctx->emit(op0(), 0, line);
iter->emit(ctx);
ctx->emit(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
ctx->enter_block(FOR_LOOP);
ctx->emit(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
bool ok = vars->emit_store(ctx);
// this error occurs in `vars` instead of this line, but...nevermind
if(!ok) UNREACHABLE(); // TODO: raise a SyntaxError instead
if(cond){
cond->emit(ctx);
int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
expr->emit(ctx);
ctx->emit(op1(), BC_NOARG, BC_KEEPLINE);
ctx->patch_jump(patch);
}else{
expr->emit(ctx);
ctx->emit(op1(), BC_NOARG, BC_KEEPLINE);
}
ctx->emit(OP_LOOP_CONTINUE, BC_NOARG, BC_KEEPLINE);
ctx->exit_block();
}
};
struct ListCompExpr: CompExpr{
Opcode op0() override { return OP_BUILD_LIST; }
Opcode op1() override { return OP_LIST_APPEND; }
std::string str() const override { return "ListComp()"; }
};
struct DictCompExpr: CompExpr{
Opcode op0() override { return OP_BUILD_DICT; }
Opcode op1() override { return OP_DICT_ADD; }
std::string str() const override { return "DictComp()"; }
};
struct SetCompExpr: CompExpr{
Opcode op0() override { return OP_BUILD_SET; }
Opcode op1() override { return OP_SET_ADD; }
std::string str() const override { return "SetComp()"; }
};
struct LambdaExpr: Expr{
FuncDecl_ decl;
NameScope scope;
std::string str() const override { return "Lambda()"; }
LambdaExpr(NameScope scope){
this->decl = make_sp<FuncDecl>();
this->decl->name = "<lambda>";
this->scope = scope;
}
void emit(CodeEmitContext* ctx) override {
int index = ctx->add_func_decl(decl);
ctx->emit(OP_LOAD_FUNCTION, index, line);
}
};
struct FStringExpr: Expr{
Str src;
FStringExpr(const Str& src): src(src) {}
std::string str() const override {
return fmt("f", src.escape());
}
void emit(CodeEmitContext* ctx) override {
VM* vm = ctx->vm;
static const std::regex pattern(R"(\{(.*?)\})");
std::cregex_iterator begin(src.begin(), src.end(), pattern);
std::cregex_iterator end;
int size = 0;
int i = 0;
for(auto it = begin; it != end; it++) {
std::cmatch m = *it;
if (i < m.position()) {
Str literal = src.substr(i, m.position() - i);
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line);
size++;
}
ctx->emit(OP_LOAD_BUILTIN_EVAL, BC_NOARG, line);
ctx->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line);
ctx->emit(OP_CALL, 1, line);
size++;
i = (int)(m.position() + m.length());
}
if (i < src.length()) {
Str literal = src.substr(i, src.length() - i);
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line);
size++;
}
ctx->emit(OP_BUILD_STRING, size, line);
}
};
struct SubscrExpr: Expr{
Expr_ a;
Expr_ b;
std::string str() const override { return "Subscr()"; }
void emit(CodeEmitContext* ctx) override{
a->emit(ctx);
b->emit(ctx);
ctx->emit(OP_LOAD_SUBSCR, BC_NOARG, line);
}
bool emit_del(CodeEmitContext* ctx) override {
a->emit(ctx);
b->emit(ctx);
ctx->emit(OP_DELETE_SUBSCR, BC_NOARG, line);
return true;
}
bool emit_store(CodeEmitContext* ctx) override {
a->emit(ctx);
b->emit(ctx);
ctx->emit(OP_STORE_SUBSCR, BC_NOARG, line);
return true;
}
};
struct AttribExpr: Expr{
Expr_ a;
Str b;
AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {}
AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {}
std::string str() const override { return "Attrib()"; }
void emit(CodeEmitContext* ctx) override{
a->emit(ctx);
int index = ctx->add_name(b);
ctx->emit(OP_LOAD_ATTR, index, line);
}
bool emit_del(CodeEmitContext* ctx) override {
a->emit(ctx);
int index = ctx->add_name(b);
ctx->emit(OP_DELETE_ATTR, index, line);
return true;
}
bool emit_store(CodeEmitContext* ctx) override {
a->emit(ctx);
int index = ctx->add_name(b);
ctx->emit(OP_STORE_ATTR, index, line);
return true;
}
void emit_method(CodeEmitContext* ctx) {
a->emit(ctx);
int index = ctx->add_name(b);
ctx->emit(OP_LOAD_METHOD, index, line);
}
bool is_attrib() const override { return true; }
};
// PASS
struct CallExpr: Expr{
Expr_ callable;
std::vector<Expr_> args;
std::vector<std::pair<Str, Expr_>> kwargs;
std::string str() const override { return "Call()"; }
bool need_unpack() const {
for(auto& item: args) if(item->is_starred()) return true;
return false;
}
void emit(CodeEmitContext* ctx) override {
VM* vm = ctx->vm;
// TODO: if callable is a AttrExpr, we should try to use `fast_call`
// instead of use `boundmethod` proxy
if(callable->is_attrib()){
auto p = static_cast<AttribExpr*>(callable.get());
p->emit_method(ctx);
}else{
callable->emit(ctx);
ctx->emit(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE);
}
// emit args
for(auto& item: args) item->emit(ctx);
// emit kwargs
for(auto& item: kwargs){
// TODO: optimize this
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(item.first)), line);
item.second->emit(ctx);
}
int KWARGC = (int)kwargs.size();
int ARGC = (int)args.size();
if(KWARGC > 0){
ctx->emit(need_unpack() ? OP_CALL_KWARGS_UNPACK : OP_CALL_KWARGS, (KWARGC<<16)|ARGC, line);
}else{
ctx->emit(need_unpack() ? OP_CALL_UNPACK : OP_CALL, ARGC, line);
}
}
};
struct BinaryExpr: Expr{
TokenIndex op;
Expr_ lhs;
Expr_ rhs;
std::string str() const override { return TK_STR(op); }
void emit(CodeEmitContext* ctx) override {
lhs->emit(ctx);
rhs->emit(ctx);
switch (op) {
case TK("+"): ctx->emit(OP_BINARY_OP, 0, line); break;
case TK("-"): ctx->emit(OP_BINARY_OP, 1, line); break;
case TK("*"): ctx->emit(OP_BINARY_OP, 2, line); break;
case TK("/"): ctx->emit(OP_BINARY_OP, 3, line); break;
case TK("//"): ctx->emit(OP_BINARY_OP, 4, line); break;
case TK("%"): ctx->emit(OP_BINARY_OP, 5, line); break;
case TK("**"): ctx->emit(OP_BINARY_OP, 6, line); break;
case TK("<"): ctx->emit(OP_COMPARE_OP, 0, line); break;
case TK("<="): ctx->emit(OP_COMPARE_OP, 1, line); break;
case TK("=="): ctx->emit(OP_COMPARE_OP, 2, line); break;
case TK("!="): ctx->emit(OP_COMPARE_OP, 3, line); break;
case TK(">"): ctx->emit(OP_COMPARE_OP, 4, line); break;
case TK(">="): ctx->emit(OP_COMPARE_OP, 5, line); break;
case TK("in"): ctx->emit(OP_CONTAINS_OP, 0, line); break;
case TK("not in"): ctx->emit(OP_CONTAINS_OP, 1, line); break;
case TK("is"): ctx->emit(OP_IS_OP, 0, line); break;
case TK("is not"): ctx->emit(OP_IS_OP, 1, line); break;
case TK("<<"): ctx->emit(OP_BITWISE_OP, 0, line); break;
case TK(">>"): ctx->emit(OP_BITWISE_OP, 1, line); break;
case TK("&"): ctx->emit(OP_BITWISE_OP, 2, line); break;
case TK("|"): ctx->emit(OP_BITWISE_OP, 3, line); break;
case TK("^"): ctx->emit(OP_BITWISE_OP, 4, line); break;
default: UNREACHABLE();
}
}
};
// PASS
struct TernaryExpr: Expr{
Expr_ cond;
Expr_ true_expr;
Expr_ false_expr;
std::string str() const override { return "Ternary()"; }
void emit(CodeEmitContext* ctx) override {
cond->emit(ctx);
int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line);
true_expr->emit(ctx);
int patch_2 = ctx->emit(OP_JUMP_ABSOLUTE, BC_NOARG, true_expr->line);
ctx->patch_jump(patch);
false_expr->emit(ctx);
ctx->patch_jump(patch_2);
}
};
} // namespace pkpy

View File

@ -1,38 +1,39 @@
#pragma once
#include "codeobject.h"
#include "memory.h"
#include "vector.h"
namespace pkpy{
static THREAD_LOCAL uint64_t kFrameGlobalId = 0;
static THREAD_LOCAL i64 kFrameGlobalId = 0;
using ValueStack = pod_vector<PyObject*>;
struct Frame {
std::vector<PyVar> _data;
ValueStack _data;
int _ip = -1;
int _next_ip = 0;
const CodeObject* co;
PyVar _module;
PyObject* _module;
NameDict_ _locals;
NameDict_ _closure;
const uint64_t id;
std::vector<std::pair<int, std::vector<PyVar>>> s_try_block;
const i64 id;
std::vector<std::pair<int, ValueStack>> s_try_block;
inline NameDict& f_locals() noexcept { return _locals != nullptr ? *_locals : _module->attr(); }
inline NameDict& f_globals() noexcept { return _module->attr(); }
inline PyVar* f_closure_try_get(StrName name) noexcept {
NameDict& f_locals() noexcept { return _locals!=nullptr ? *_locals : _module->attr(); }
NameDict& f_globals() noexcept { return _module->attr(); }
PyObject* f_closure_try_get(StrName name){
if(_closure == nullptr) return nullptr;
return _closure->try_get(name);
}
Frame(const CodeObject_& co,
const PyVar& _module,
const NameDict_& _locals=nullptr,
const NameDict_& _closure=nullptr)
: co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) { }
Frame(const CodeObject_& co, PyObject* _module, NameDict_ _locals=nullptr, NameDict_ _closure=nullptr)
: co(co.get()), _module(_module), _locals(_locals), _closure(_closure), id(kFrameGlobalId++) {
}
inline const Bytecode& next_bytecode() {
const Bytecode& next_bytecode() {
_ip = _next_ip++;
return co->codes[_ip];
}
@ -42,82 +43,76 @@ struct Frame {
return co->src->snapshot(line);
}
// Str stack_info(){
// StrStream ss;
// ss << "[";
// for(int i=0; i<_data.size(); i++){
// ss << OBJ_TP_NAME(_data[i]);
// if(i != _data.size()-1) ss << ", ";
// }
// ss << "]";
// return ss.str();
// }
inline bool has_next_bytecode() const {
return _next_ip < co->codes.size();
std::string stack_info(){
std::stringstream ss;
ss << id << " [";
for(int i=0; i<_data.size(); i++){
ss << (i64)_data[i];
if(i != _data.size()-1) ss << ", ";
}
ss << "]";
return ss.str();
}
inline PyVar pop(){
#if PK_EXTRA_CHECK
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
#endif
PyVar v = std::move(_data.back());
_data.pop_back();
return v;
}
inline void _pop(){
#if PK_EXTRA_CHECK
void pop(){
#if DEBUG_EXTRA_CHECK
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
#endif
_data.pop_back();
}
inline void try_deref(VM*, PyVar&);
inline PyVar pop_value(VM* vm){
PyVar value = pop();
try_deref(vm, value);
return value;
PyObject* popx(){
#if DEBUG_EXTRA_CHECK
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
#endif
PyObject* ret = _data.back();
_data.pop_back();
return ret;
}
inline PyVar top_value(VM* vm){
PyVar value = top();
try_deref(vm, value);
return value;
}
inline PyVar& top(){
#if PK_EXTRA_CHECK
PyObject*& top(){
#if DEBUG_EXTRA_CHECK
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
#endif
return _data.back();
}
inline PyVar& top_1(){
#if PK_EXTRA_CHECK
PyObject*& top_1(){
#if DEBUG_EXTRA_CHECK
if(_data.size() < 2) throw std::runtime_error("_data.size() < 2");
#endif
return _data[_data.size()-2];
}
template<typename T>
inline void push(T&& obj){ _data.push_back(std::forward<T>(obj)); }
PyObject*& top_n(int n){
n += 1;
#if DEBUG_EXTRA_CHECK
if(_data.size() < n) throw std::runtime_error("_data.size() < n");
#endif
return _data[_data.size()-n];
}
inline void jump_abs(int i){ _next_ip = i; }
inline void jump_rel(int i){ _next_ip += i; }
void push(PyObject* obj){
#if DEBUG_EXTRA_CHECK
if(obj == nullptr) throw std::runtime_error("obj == nullptr");
#endif
_data.push_back(obj);
}
inline void on_try_block_enter(){
void jump_abs(int i){ _next_ip = i; }
void jump_rel(int i){ _next_ip += i; }
void on_try_block_enter(){
s_try_block.emplace_back(co->codes[_ip].block, _data);
}
inline void on_try_block_exit(){
void on_try_block_exit(){
s_try_block.pop_back();
}
bool jump_to_exception_handler(){
if(s_try_block.empty()) return false;
PyVar obj = pop();
PyObject* obj = popx();
auto& p = s_try_block.back();
_data = std::move(p.second);
_data.push_back(obj);
@ -127,12 +122,12 @@ struct Frame {
}
int _exit_block(int i){
if(co->blocks[i].type == FOR_LOOP) _pop();
if(co->blocks[i].type == FOR_LOOP) pop();
else if(co->blocks[i].type == TRY_EXCEPT) on_try_block_exit();
return co->blocks[i].parent;
}
void jump_abs_safe(int target){
void jump_abs_break(int target){
const Bytecode& prev = co->codes[_ip];
int i = prev.block;
_next_ip = target;
@ -145,20 +140,35 @@ struct Frame {
}
}
Args pop_n_values_reversed(VM* vm, int n){
Args popx_n_reversed(int n){
Args v(n);
for(int i=n-1; i>=0; i--){
v[i] = pop();
try_deref(vm, v[i]);
}
for(int i=n-1; i>=0; i--) v[i] = popx();
return v;
}
Args pop_n_reversed(int n){
Args v(n);
for(int i=n-1; i>=0; i--) v[i] = pop();
return v;
void pop_n(int n){
_data.pop_back_n(n);
}
void _gc_mark() const {
for(PyObject* obj : _data) OBJ_MARK(obj);
OBJ_MARK(_module);
if(_locals != nullptr) _locals->_gc_mark();
if(_closure != nullptr) _closure->_gc_mark();
for(auto& p : s_try_block){
for(PyObject* obj : p.second) OBJ_MARK(obj);
}
co->_gc_mark();
}
};
struct FrameDeleter{
void operator()(Frame* frame) const {
frame->~Frame();
pool128.dealloc(frame);
}
};
using Frame_ = std::unique_ptr<Frame, FrameDeleter>;
}; // namespace pkpy

151
src/gc.h Normal file
View File

@ -0,0 +1,151 @@
#pragma once
#include "common.h"
#include "memory.h"
#include "obj.h"
#include "codeobject.h"
#include "namedict.h"
namespace pkpy {
struct ManagedHeap{
std::vector<PyObject*> _no_gc;
std::vector<PyObject*> gen;
VM* vm;
ManagedHeap(VM* vm): vm(vm) {}
static const int kMinGCThreshold = 3072;
int gc_threshold = kMinGCThreshold;
int gc_counter = 0;
/********************/
int _gc_lock_counter = 0;
struct ScopeLock{
ManagedHeap* heap;
ScopeLock(ManagedHeap* heap): heap(heap){
heap->_gc_lock_counter++;
}
~ScopeLock(){
heap->_gc_lock_counter--;
}
};
ScopeLock gc_scope_lock(){
return ScopeLock(this);
}
/********************/
template<typename T>
PyObject* gcnew(Type type, T&& val){
using __T = Py_<std::decay_t<T>>;
PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward<T>(val));
gen.push_back(obj);
gc_counter++;
return obj;
}
template<typename T>
PyObject* _new(Type type, T&& val){
using __T = Py_<std::decay_t<T>>;
PyObject* obj = new(pool64.alloc<__T>()) __T(type, std::forward<T>(val));
obj->gc.enabled = false;
_no_gc.push_back(obj);
return obj;
}
#if DEBUG_GC_STATS
inline static std::map<Type, int> deleted;
#endif
~ManagedHeap(){
for(PyObject* obj: _no_gc) obj->~PyObject(), pool64.dealloc(obj);
#if DEBUG_GC_STATS
for(auto& [type, count]: deleted){
std::cout << "GC: " << obj_type_name(vm, type) << "=" << count << std::endl;
}
#endif
}
int sweep(){
std::vector<PyObject*> alive;
for(PyObject* obj: gen){
if(obj->gc.marked){
obj->gc.marked = false;
alive.push_back(obj);
}else{
#if DEBUG_GC_STATS
deleted[obj->type] += 1;
#endif
obj->~PyObject(), pool64.dealloc(obj);
}
}
// clear _no_gc marked flag
for(PyObject* obj: _no_gc) obj->gc.marked = false;
int freed = gen.size() - alive.size();
// std::cout << "GC: " << alive.size() << "/" << gen.size() << " (" << freed << " freed)" << std::endl;
gen.clear();
gen.swap(alive);
return freed;
}
void _auto_collect(){
if(_gc_lock_counter > 0) return;
if(gc_counter < gc_threshold) return;
gc_counter = 0;
collect();
gc_threshold = gen.size() * 2;
if(gc_threshold < kMinGCThreshold) gc_threshold = kMinGCThreshold;
}
int collect(){
if(_gc_lock_counter > 0) UNREACHABLE();
mark();
int freed = sweep();
return freed;
}
void mark();
};
inline void NameDict::_gc_mark() const{
for(uint16_t i=0; i<_capacity; i++){
if(_items[i].first.empty()) continue;
OBJ_MARK(_items[i].second);
}
}
inline void FuncDecl::_gc_mark() const{
code->_gc_mark();
kwargs._gc_mark();
}
template<> inline void _gc_mark<List>(List& t){
for(PyObject* obj: t) OBJ_MARK(obj);
}
template<> inline void _gc_mark<Tuple>(Tuple& t){
for(int i=0; i<t.size(); i++) OBJ_MARK(t[i]);
}
template<> inline void _gc_mark<Function>(Function& t){
t.decl->_gc_mark();
if(t._module != nullptr) OBJ_MARK(t._module);
if(t._closure != nullptr) t._closure->_gc_mark();
}
template<> inline void _gc_mark<BoundMethod>(BoundMethod& t){
OBJ_MARK(t.obj);
OBJ_MARK(t.method);
}
template<> inline void _gc_mark<StarWrapper>(StarWrapper& t){
OBJ_MARK(t.obj);
}
template<> inline void _gc_mark<Super>(Super& t){
OBJ_MARK(t.first);
}
// NOTE: std::function may capture some PyObject*, they can not be marked
} // namespace pkpy

View File

@ -10,7 +10,7 @@
namespace pkpy{
Str _read_file_cwd(const Str& name, bool* ok){
inline Str _read_file_cwd(const Str& name, bool* ok){
std::filesystem::path path(name.c_str());
bool exists = std::filesystem::exists(path);
if(!exists){
@ -42,7 +42,7 @@ struct FileIO {
if(!_fs.is_open()) vm->IOError(strerror(errno));
}
static void _register(VM* vm, PyVar mod, PyVar type){
static void _register(VM* vm, PyObject* mod, PyObject* type){
vm->bind_static_method<2>(type, "__new__", [](VM* vm, Args& args){
return VAR_T(FileIO,
vm, CAST(Str, args[0]), CAST(Str, args[1])
@ -78,16 +78,16 @@ struct FileIO {
}
};
void add_module_io(VM* vm){
PyVar mod = vm->new_module("io");
PyVar type = FileIO::register_class(vm, mod);
inline void add_module_io(VM* vm){
PyObject* mod = vm->new_module("io");
PyObject* type = FileIO::register_class(vm, mod);
vm->bind_builtin_func<2>("open", [type](VM* vm, const Args& args){
return vm->call(type, args);
});
}
void add_module_os(VM* vm){
PyVar mod = vm->new_module("os");
inline void add_module_os(VM* vm){
PyObject* mod = vm->new_module("os");
// Working directory is shared by all VMs!!
vm->bind_func<0>(mod, "getcwd", [](VM* vm, const Args& args){
return VAR(std::filesystem::current_path().string());
@ -157,10 +157,10 @@ void add_module_os(VM* vm){
#else
namespace pkpy{
void add_module_io(VM* vm){}
void add_module_os(VM* vm){}
inline void add_module_io(VM* vm){}
inline void add_module_os(VM* vm){}
Str _read_file_cwd(const Str& name, bool* ok){
inline Str _read_file_cwd(const Str& name, bool* ok){
*ok = false;
return Str();
}

View File

@ -6,18 +6,18 @@ namespace pkpy{
class RangeIter : public BaseIter {
i64 current;
Range r;
Range r; // copy by value, so we don't need to keep ref
public:
RangeIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) {
this->r = OBJ_GET(Range, _ref);
RangeIter(VM* vm, PyObject* ref) : BaseIter(vm) {
this->r = OBJ_GET(Range, ref);
this->current = r.start;
}
inline bool _has_next(){
bool _has_next(){
return r.step > 0 ? current < r.stop : current > r.stop;
}
PyVar next(){
PyObject* next(){
if(!_has_next()) return nullptr;
current += r.step;
return VAR(current-r.step);
@ -26,43 +26,65 @@ public:
template <typename T>
class ArrayIter : public BaseIter {
size_t index = 0;
const T* p;
PyObject* ref;
int index;
public:
ArrayIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) { p = &OBJ_GET(T, _ref);}
PyVar next(){
ArrayIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {}
PyObject* next() override{
const T* p = &OBJ_GET(T, ref);
if(index == p->size()) return nullptr;
return p->operator[](index++);
}
void _gc_mark() const override {
OBJ_MARK(ref);
}
};
class StringIter : public BaseIter {
int index = 0;
Str* str;
PyObject* ref;
int index;
public:
StringIter(VM* vm, PyVar _ref) : BaseIter(vm, _ref) {
str = &OBJ_GET(Str, _ref);
}
StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {}
PyVar next() {
PyObject* next() override{
// TODO: optimize this to use iterator
// operator[] is O(n) complexity
Str* str = &OBJ_GET(Str, ref);
if(index == str->u8_length()) return nullptr;
return VAR(str->u8_getitem(index++));
}
void _gc_mark() const override {
OBJ_MARK(ref);
}
};
PyVar Generator::next(){
inline PyObject* Generator::next(){
if(state == 2) return nullptr;
vm->callstack.push(std::move(frame));
PyVar ret = vm->_exec();
PyObject* ret = vm->_exec();
if(ret == vm->_py_op_yield){
frame = std::move(vm->callstack.top());
vm->callstack.pop();
state = 1;
return frame->pop_value(vm);
return frame->popx();
}else{
state = 2;
return nullptr;
}
}
inline void Generator::_gc_mark() const{
if(frame != nullptr) frame->_gc_mark();
}
template<typename T>
void _gc_mark(T& t) {
if constexpr(std::is_base_of_v<BaseIter, T>){
t._gc_mark();
}
}
} // namespace pkpy

504
src/lexer.h Normal file
View File

@ -0,0 +1,504 @@
#pragma once
#include "common.h"
#include "error.h"
#include "str.h"
namespace pkpy{
typedef uint8_t TokenIndex;
constexpr const char* kTokens[] = {
"is not", "not in",
"@eof", "@eol", "@sof",
"@id", "@num", "@str", "@fstr",
"@indent", "@dedent",
/*****************************************/
"+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed
"*", "*=", "/", "/=", "//", "//=", "%", "%=",
"&", "&=", "|", "|=", "^", "^=",
"<<", "<<=", ">>", ">>=",
/*****************************************/
".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "::",
"**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=",
/** KW_BEGIN **/
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
"None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
"goto", "label", // extended keywords, not available in cpython
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise"
};
using TokenValue = std::variant<std::monostate, i64, f64, Str>;
const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
constexpr TokenIndex TK(const char token[]) {
for(int k=0; k<kTokenCount; k++){
const char* i = kTokens[k];
const char* j = token;
while(*i && *j && *i == *j) { i++; j++;}
if(*i == *j) return k;
}
UNREACHABLE();
}
#define TK_STR(t) kTokens[t]
const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
std::map<std::string_view, TokenIndex> map;
for(int k=TK("class"); k<kTokenCount; k++) map[kTokens[k]] = k;
return map;
}();
struct Token{
TokenIndex type;
const char* start;
int length;
int line;
TokenValue value;
Str str() const { return Str(start, length);}
std::string_view sv() const { return std::string_view(start, length);}
std::string info() const {
std::stringstream ss;
ss << line << ": " << TK_STR(type) << " '" << (
sv()=="\n" ? "\\n" : sv()
) << "'";
return ss.str();
}
};
// https://docs.python.org/3/reference/expressions.html#operator-precedence
enum Precedence {
PREC_NONE,
PREC_TUPLE, // ,
PREC_TERNARY, // ?:
PREC_LOGICAL_OR, // or
PREC_LOGICAL_AND, // and
PREC_LOGICAL_NOT, // not
PREC_EQUALITY, // == !=
PREC_TEST, // in / is / is not / not in
PREC_COMPARISION, // < > <= >=
PREC_BITWISE_OR, // |
PREC_BITWISE_XOR, // ^
PREC_BITWISE_AND, // &
PREC_BITWISE_SHIFT, // << >>
PREC_TERM, // + -
PREC_FACTOR, // * / % //
PREC_UNARY, // - not
PREC_EXPONENT, // **
PREC_CALL, // ()
PREC_SUBSCRIPT, // []
PREC_ATTRIB, // .index
PREC_PRIMARY,
};
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
struct Lexer {
shared_ptr<SourceData> src;
const char* token_start;
const char* curr_char;
int current_line = 1;
std::vector<Token> nexts;
stack<int> indents;
int brackets_level = 0;
bool used = false;
char peekchar() const{ return *curr_char; }
bool match_n_chars(int n, char c0){
const char* c = curr_char;
for(int i=0; i<n; i++){
if(*c == '\0') return false;
if(*c != c0) return false;
c++;
}
for(int i=0; i<n; i++) eatchar_include_newline();
return true;
}
int eat_spaces(){
int count = 0;
while (true) {
switch (peekchar()) {
case ' ' : count+=1; break;
case '\t': count+=4; break;
default: return count;
}
eatchar();
}
}
bool eat_indentation(){
if(brackets_level > 0) return true;
int spaces = eat_spaces();
if(peekchar() == '#') skip_line_comment();
if(peekchar() == '\0' || peekchar() == '\n') return true;
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
if(spaces > indents.top()){
indents.push(spaces);
nexts.push_back(Token{TK("@indent"), token_start, 0, current_line});
} else if(spaces < indents.top()){
while(spaces < indents.top()){
indents.pop();
nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line});
}
if(spaces != indents.top()){
return false;
}
}
return true;
}
char eatchar() {
char c = peekchar();
if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
curr_char++;
return c;
}
char eatchar_include_newline() {
char c = peekchar();
curr_char++;
if (c == '\n'){
current_line++;
src->line_starts.push_back(curr_char);
}
return c;
}
int eat_name() {
curr_char--;
while(true){
unsigned char c = peekchar();
int u8bytes = utf8len(c, true);
if(u8bytes == 0) return 1;
if(u8bytes == 1){
if(isalpha(c) || c=='_' || isdigit(c)) {
curr_char++;
continue;
}else{
break;
}
}
// handle multibyte char
std::string u8str(curr_char, u8bytes);
if(u8str.size() != u8bytes) return 2;
uint32_t value = 0;
for(int k=0; k < u8bytes; k++){
uint8_t b = u8str[k];
if(k==0){
if(u8bytes == 2) value = (b & 0b00011111) << 6;
else if(u8bytes == 3) value = (b & 0b00001111) << 12;
else if(u8bytes == 4) value = (b & 0b00000111) << 18;
}else{
value |= (b & 0b00111111) << (6*(u8bytes-k-1));
}
}
if(is_unicode_Lo_char(value)) curr_char += u8bytes;
else break;
}
int length = (int)(curr_char - token_start);
if(length == 0) return 3;
std::string_view name(token_start, length);
if(src->mode == JSON_MODE){
if(name == "true"){
add_token(TK("True"));
} else if(name == "false"){
add_token(TK("False"));
} else if(name == "null"){
add_token(TK("None"));
} else {
return 4;
}
return 0;
}
if(kTokenKwMap.count(name)){
if(name == "not"){
if(strncmp(curr_char, " in", 3) == 0){
curr_char += 3;
add_token(TK("not in"));
return 0;
}
}else if(name == "is"){
if(strncmp(curr_char, " not", 4) == 0){
curr_char += 4;
add_token(TK("is not"));
return 0;
}
}
add_token(kTokenKwMap.at(name));
} else {
add_token(TK("@id"));
}
return 0;
}
void skip_line_comment() {
char c;
while ((c = peekchar()) != '\0') {
if (c == '\n') return;
eatchar();
}
}
bool matchchar(char c) {
if (peekchar() != c) return false;
eatchar_include_newline();
return true;
}
void add_token(TokenIndex type, TokenValue value={}) {
switch(type){
case TK("{"): case TK("["): case TK("("): brackets_level++; break;
case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
}
nexts.push_back( Token{
type,
token_start,
(int)(curr_char - token_start),
current_line - ((type == TK("@eol")) ? 1 : 0),
value
});
}
void add_token_2(char c, TokenIndex one, TokenIndex two) {
if (matchchar(c)) add_token(two);
else add_token(one);
}
Str eat_string_until(char quote, bool raw) {
bool quote3 = match_n_chars(2, quote);
std::vector<char> buff;
while (true) {
char c = eatchar_include_newline();
if (c == quote){
if(quote3 && !match_n_chars(2, quote)){
buff.push_back(c);
continue;
}
break;
}
if (c == '\0'){
if(quote3 && src->mode == REPL_MODE){
throw NeedMoreLines(false);
}
SyntaxError("EOL while scanning string literal");
}
if (c == '\n'){
if(!quote3) SyntaxError("EOL while scanning string literal");
else{
buff.push_back(c);
continue;
}
}
if (!raw && c == '\\') {
switch (eatchar_include_newline()) {
case '"': buff.push_back('"'); break;
case '\'': buff.push_back('\''); break;
case '\\': buff.push_back('\\'); break;
case 'n': buff.push_back('\n'); break;
case 'r': buff.push_back('\r'); break;
case 't': buff.push_back('\t'); break;
default: SyntaxError("invalid escape char");
}
} else {
buff.push_back(c);
}
}
return Str(buff.data(), buff.size());
}
void eat_string(char quote, StringType type) {
Str s = eat_string_until(quote, type == RAW_STRING);
if(type == F_STRING){
add_token(TK("@fstr"), s);
}else{
add_token(TK("@str"), s);
}
}
void eat_number() {
static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
std::smatch m;
const char* i = token_start;
while(*i != '\n' && *i != '\0') i++;
std::string s = std::string(token_start, i);
try{
if (std::regex_search(s, m, pattern)) {
// here is m.length()-1, since the first char was eaten by lex_token()
for(int j=0; j<m.length()-1; j++) eatchar();
int base = 10;
size_t size;
if (m[1].matched) base = 16;
if (m[2].matched) {
if(base == 16) SyntaxError("hex literal should not contain a dot");
add_token(TK("@num"), S_TO_FLOAT(m[0], &size));
} else {
add_token(TK("@num"), S_TO_INT(m[0], &size, base));
}
if (size != m.length()) UNREACHABLE();
}
}catch(std::exception& _){
SyntaxError("invalid number literal");
}
}
bool lex_one_token() {
while (peekchar() != '\0') {
token_start = curr_char;
char c = eatchar_include_newline();
switch (c) {
case '\'': case '"': eat_string(c, NORMAL_STRING); return true;
case '#': skip_line_comment(); break;
case '{': add_token(TK("{")); return true;
case '}': add_token(TK("}")); return true;
case ',': add_token(TK(",")); return true;
case ':': add_token_2(':', TK(":"), TK("::")); return true;
case ';': add_token(TK(";")); return true;
case '(': add_token(TK("(")); return true;
case ')': add_token(TK(")")); return true;
case '[': add_token(TK("[")); return true;
case ']': add_token(TK("]")); return true;
case '@': add_token(TK("@")); return true;
case '%': add_token_2('=', TK("%"), TK("%=")); return true;
case '&': add_token_2('=', TK("&"), TK("&=")); return true;
case '|': add_token_2('=', TK("|"), TK("|=")); return true;
case '^': add_token_2('=', TK("^"), TK("^=")); return true;
case '?': add_token(TK("?")); return true;
case '.': {
if(matchchar('.')) {
if(matchchar('.')) {
add_token(TK("..."));
} else {
SyntaxError("invalid token '..'");
}
} else {
add_token(TK("."));
}
return true;
}
case '=': add_token_2('=', TK("="), TK("==")); return true;
case '+': add_token_2('=', TK("+"), TK("+=")); return true;
case '>': {
if(matchchar('=')) add_token(TK(">="));
else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>="));
else add_token(TK(">"));
return true;
}
case '<': {
if(matchchar('=')) add_token(TK("<="));
else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<="));
else add_token(TK("<"));
return true;
}
case '-': {
if(matchchar('=')) add_token(TK("-="));
else if(matchchar('>')) add_token(TK("->"));
else add_token(TK("-"));
return true;
}
case '!':
if(matchchar('=')) add_token(TK("!="));
else SyntaxError("expected '=' after '!'");
break;
case '*':
if (matchchar('*')) {
add_token(TK("**")); // '**'
} else {
add_token_2('=', TK("*"), TK("*="));
}
return true;
case '/':
if(matchchar('/')) {
add_token_2('=', TK("//"), TK("//="));
} else {
add_token_2('=', TK("/"), TK("/="));
}
return true;
case ' ': case '\t': eat_spaces(); break;
case '\n': {
add_token(TK("@eol"));
if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level");
return true;
}
default: {
if(c == 'f'){
if(matchchar('\'')) {eat_string('\'', F_STRING); return true;}
if(matchchar('"')) {eat_string('"', F_STRING); return true;}
}else if(c == 'r'){
if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;}
if(matchchar('"')) {eat_string('"', RAW_STRING); return true;}
}
if (c >= '0' && c <= '9') {
eat_number();
return true;
}
switch (eat_name())
{
case 0: break;
case 1: SyntaxError("invalid char: " + std::string(1, c));
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
case 3: SyntaxError("@id contains invalid char"); break;
case 4: SyntaxError("invalid JSON token"); break;
default: UNREACHABLE();
}
return true;
}
}
}
token_start = curr_char;
while(indents.size() > 1){
indents.pop();
add_token(TK("@dedent"));
return true;
}
add_token(TK("@eof"));
return false;
}
/***** Error Reporter *****/
void throw_err(Str type, Str msg){
int lineno = current_line;
const char* cursor = curr_char;
if(peekchar() == '\n'){
lineno--;
cursor--;
}
throw_err(type, msg, lineno, cursor);
}
void throw_err(Str type, Str msg, int lineno, const char* cursor){
auto e = Exception("SyntaxError", msg);
e.st_push(src->snapshot(lineno, cursor));
throw e;
}
void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
void IndentationError(Str msg){ throw_err("IndentationError", msg); }
Lexer(shared_ptr<SourceData> src) {
this->src = src;
this->token_start = src->source.c_str();
this->curr_char = src->source.c_str();
this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line});
this->indents.push(0);
}
std::vector<Token> run() {
if(used) UNREACHABLE();
used = true;
while (lex_one_token());
return std::move(nexts);
}
};
} // namespace pkpy

View File

@ -21,7 +21,6 @@ std::string getline(bool* eof=nullptr) {
std::string output;
output.resize(length);
WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL);
if(!output.empty() && output.back() == '\r') output.pop_back();
return output;
}
@ -66,16 +65,20 @@ int main(int argc, char** argv){
filepath = std::filesystem::absolute(filepath);
if(!std::filesystem::exists(filepath)){
std::cerr << "File not found: " << argv_1 << std::endl;
return 1;
return 2;
}
std::ifstream file(filepath);
if(!file.is_open()) return 1;
if(!file.is_open()){
std::cerr << "Failed to open file: " << argv_1 << std::endl;
return 3;
}
std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
// set parent path as cwd
std::filesystem::current_path(filepath.parent_path());
pkpy::PyVarOrNull ret = nullptr;
pkpy::PyObject* ret = nullptr;
ret = vm->exec(src.c_str(), argv_1, pkpy::EXEC_MODE);
pkpy_delete(vm);
return ret != nullptr ? 0 : 1;

View File

@ -4,31 +4,13 @@
namespace pkpy{
struct PyObject;
template<typename T>
struct SpAllocator {
template<typename U>
inline static int* alloc(){
return (int*)malloc(sizeof(int) + sizeof(U));
}
inline static void dealloc(int* counter){
((T*)(counter + 1))->~T();
free(counter);
}
};
template <typename T>
struct shared_ptr {
union {
int* counter;
i64 bits;
};
int* counter;
#define _t() (T*)(counter + 1)
#define _inc_counter() if(!is_tagged() && counter) ++(*counter)
#define _dec_counter() if(!is_tagged() && counter && --(*counter) == 0) SpAllocator<T>::dealloc(counter)
T* _t() const noexcept { return (T*)(counter + 1); }
void _inc_counter() { if(counter) ++(*counter); }
void _dec_counter() { if(counter && --(*counter) == 0) {((T*)(counter + 1))->~T(); free(counter);} }
public:
shared_ptr() : counter(nullptr) {}
@ -69,7 +51,6 @@ public:
T* get() const { return _t(); }
int use_count() const {
if(is_tagged()) return 0;
return counter ? *counter : 0;
}
@ -77,78 +58,247 @@ public:
_dec_counter();
counter = nullptr;
}
inline constexpr bool is_tagged() const {
if constexpr(!std::is_same_v<T, PyObject>) return false;
return (bits & 0b11) != 0b00;
}
inline bool is_tag_00() const { return (bits & 0b11) == 0b00; }
inline bool is_tag_01() const { return (bits & 0b11) == 0b01; }
inline bool is_tag_10() const { return (bits & 0b11) == 0b10; }
inline bool is_tag_11() const { return (bits & 0b11) == 0b11; }
};
#undef _t
#undef _inc_counter
#undef _dec_counter
template <typename T, typename... Args>
shared_ptr<T> make_sp(Args&&... args) {
int* p = (int*)malloc(sizeof(int) + sizeof(T));
*p = 1;
new(p+1) T(std::forward<Args>(args)...);
return shared_ptr<T>(p);
}
template <typename T, typename U, typename... Args>
shared_ptr<T> make_sp(Args&&... args) {
static_assert(std::is_base_of_v<T, U>, "U must be derived from T");
static_assert(std::has_virtual_destructor_v<T>, "T must have virtual destructor");
static_assert(!std::is_same_v<T, PyObject> || (!std::is_same_v<U, i64> && !std::is_same_v<U, f64>));
int* p = SpAllocator<T>::template alloc<U>(); *p = 1;
new(p+1) U(std::forward<Args>(args)...);
return shared_ptr<T>(p);
struct LinkedListNode{
LinkedListNode* prev;
LinkedListNode* next;
};
template<typename T>
struct DoubleLinkedList{
static_assert(std::is_base_of_v<LinkedListNode, T>);
int _size;
LinkedListNode head;
LinkedListNode tail;
DoubleLinkedList(): _size(0){
head.prev = nullptr;
head.next = &tail;
tail.prev = &head;
tail.next = nullptr;
}
template <typename T, typename... Args>
shared_ptr<T> make_sp(Args&&... args) {
int* p = SpAllocator<T>::template alloc<T>(); *p = 1;
new(p+1) T(std::forward<Args>(args)...);
return shared_ptr<T>(p);
void push_back(T* node){
node->prev = tail.prev;
node->next = &tail;
tail.prev->next = node;
tail.prev = node;
_size++;
}
static_assert(sizeof(i64) == sizeof(int*));
static_assert(sizeof(f64) == sizeof(int*));
static_assert(sizeof(shared_ptr<PyObject>) == sizeof(int*));
static_assert(std::numeric_limits<float>::is_iec559);
static_assert(std::numeric_limits<double>::is_iec559);
void push_front(T* node){
node->prev = &head;
node->next = head.next;
head.next->prev = node;
head.next = node;
_size++;
}
template<typename T, int __Bucket, int __BucketSize=32>
struct SmallArrayPool {
std::vector<T*> buckets[__Bucket+1];
void pop_back(){
#if DEBUG_MEMORY_POOL
if(empty()) throw std::runtime_error("DoubleLinkedList::pop_back() called on empty list");
#endif
tail.prev->prev->next = &tail;
tail.prev = tail.prev->prev;
_size--;
}
T* alloc(int n){
if(n == 0) return nullptr;
if(n > __Bucket || buckets[n].empty()){
return new T[n];
}else{
T* p = buckets[n].back();
buckets[n].pop_back();
return p;
void pop_front(){
#if DEBUG_MEMORY_POOL
if(empty()) throw std::runtime_error("DoubleLinkedList::pop_front() called on empty list");
#endif
head.next->next->prev = &head;
head.next = head.next->next;
_size--;
}
T* back() const {
#if DEBUG_MEMORY_POOL
if(empty()) throw std::runtime_error("DoubleLinkedList::back() called on empty list");
#endif
return static_cast<T*>(tail.prev);
}
T* front() const {
#if DEBUG_MEMORY_POOL
if(empty()) throw std::runtime_error("DoubleLinkedList::front() called on empty list");
#endif
return static_cast<T*>(head.next);
}
void erase(T* node){
#if DEBUG_MEMORY_POOL
if(empty()) throw std::runtime_error("DoubleLinkedList::erase() called on empty list");
LinkedListNode* n = head.next;
while(n != &tail){
if(n == node) break;
n = n->next;
}
if(n != node) throw std::runtime_error("DoubleLinkedList::erase() called on node not in the list");
#endif
node->prev->next = node->next;
node->next->prev = node->prev;
_size--;
}
void dealloc(T* p, int n){
if(n == 0) return;
if(n > __Bucket || buckets[n].size() >= __BucketSize){
delete[] p;
}else{
buckets[n].push_back(p);
void move_all_back(DoubleLinkedList<T>& other){
if(other.empty()) return;
other.tail.prev->next = &tail;
tail.prev->next = other.head.next;
other.head.next->prev = tail.prev;
tail.prev = other.tail.prev;
_size += other._size;
other.head.next = &other.tail;
other.tail.prev = &other.head;
other._size = 0;
}
bool empty() const {
#if DEBUG_MEMORY_POOL
if(size() == 0){
if(head.next != &tail || tail.prev != &head){
throw std::runtime_error("DoubleLinkedList::size() returned 0 but the list is not empty");
}
return true;
}
#endif
return _size == 0;
}
~SmallArrayPool(){
for(int i=1; i<=__Bucket; i++){
for(auto p: buckets[i]) delete[] p;
int size() const { return _size; }
void apply(std::function<void(T*)> func){
LinkedListNode* p = head.next;
while(p != &tail){
LinkedListNode* next = p->next;
func(static_cast<T*>(p));
p = next;
}
}
};
template<int __BlockSize=128>
struct MemoryPool{
static const size_t __MaxBlocks = 256*1024 / __BlockSize;
struct Block{
void* arena;
char data[__BlockSize];
};
typedef shared_ptr<PyObject> PyVar;
typedef PyVar PyVarOrNull;
typedef PyVar PyVarRef;
struct Arena: LinkedListNode{
Block _blocks[__MaxBlocks];
Block* _free_list[__MaxBlocks];
int _free_list_size;
bool dirty;
Arena(): _free_list_size(__MaxBlocks), dirty(false){
for(int i=0; i<__MaxBlocks; i++){
_blocks[i].arena = this;
_free_list[i] = &_blocks[i];
}
}
bool empty() const { return _free_list_size == 0; }
bool full() const { return _free_list_size == __MaxBlocks; }
void tidy(){
#if DEBUG_MEMORY_POOL
if(!full()) throw std::runtime_error("Arena::tidy() called on non-full arena");
#endif
std::sort(_free_list, _free_list+__MaxBlocks);
}
Block* alloc(){
#if DEBUG_MEMORY_POOL
if(empty()) throw std::runtime_error("Arena::alloc() called on empty arena");
#endif
_free_list_size--;
return _free_list[_free_list_size];
}
void dealloc(Block* block){
#if DEBUG_MEMORY_POOL
if(full()) throw std::runtime_error("Arena::dealloc() called on full arena");
#endif
_free_list[_free_list_size] = block;
_free_list_size++;
}
};
DoubleLinkedList<Arena> _arenas;
DoubleLinkedList<Arena> _empty_arenas;
template<typename __T>
void* alloc() { return alloc(sizeof(__T)); }
void* alloc(size_t size){
#if DEBUG_NO_MEMORY_POOL
return malloc(size);
#endif
if(size > __BlockSize){
void* p = malloc(sizeof(void*) + size);
memset(p, 0, sizeof(void*));
return (char*)p + sizeof(void*);
}
if(_arenas.empty()){
// std::cout << _arenas.size() << ',' << _empty_arenas.size() << ',' << _full_arenas.size() << std::endl;
_arenas.push_back(new Arena());
}
Arena* arena = _arenas.back();
void* p = arena->alloc()->data;
if(arena->empty()){
_arenas.pop_back();
arena->dirty = true;
_empty_arenas.push_back(arena);
}
return p;
}
void dealloc(void* p){
#if DEBUG_NO_MEMORY_POOL
free(p);
return;
#endif
#if DEBUG_MEMORY_POOL
if(p == nullptr) throw std::runtime_error("MemoryPool::dealloc() called on nullptr");
#endif
Block* block = (Block*)((char*)p - sizeof(void*));
if(block->arena == nullptr){
free(block);
}else{
Arena* arena = (Arena*)block->arena;
if(arena->empty()){
_empty_arenas.erase(arena);
_arenas.push_front(arena);
arena->dealloc(block);
}else{
arena->dealloc(block);
if(arena->full() && arena->dirty){
_arenas.erase(arena);
delete arena;
}
}
}
}
~MemoryPool(){
_arenas.apply([](Arena* arena){ delete arena; });
_empty_arenas.apply([](Arena* arena){ delete arena; });
}
};
inline MemoryPool<64> pool64;
inline MemoryPool<128> pool128;
// inline MemoryPool<256> pool256;
}; // namespace pkpy

View File

@ -6,44 +6,9 @@
namespace pkpy{
const int kNameDictNodeSize = sizeof(StrName) + sizeof(PyVar);
template<int __Bucket, int __BucketSize=32>
struct DictArrayPool {
std::vector<StrName*> buckets[__Bucket+1];
StrName* alloc(uint16_t n){
StrName* _keys;
if(n > __Bucket || buckets[n].empty()){
_keys = (StrName*)malloc(kNameDictNodeSize * n);
memset((void*)_keys, 0, kNameDictNodeSize * n);
}else{
_keys = buckets[n].back();
memset((void*)_keys, 0, sizeof(StrName) * n);
buckets[n].pop_back();
}
return _keys;
}
void dealloc(StrName* head, uint16_t n){
PyVar* _values = (PyVar*)(head + n);
if(n > __Bucket || buckets[n].size() >= __BucketSize){
for(int i=0; i<n; i++) _values[i].~PyVar();
free(head);
}else{
buckets[n].push_back(head);
}
}
~DictArrayPool(){
// let it leak, since this object is static
}
};
const std::vector<uint16_t> kHashSeeds = {9629, 43049, 13267, 59509, 39251, 1249, 35803, 54469, 27689, 9719, 34897, 18973, 30661, 19913, 27919, 32143, 3467, 28019, 1051, 39419, 1361, 28547, 48197, 2609, 24317, 22861, 41467, 17623, 52837, 59053, 33589, 32117};
static DictArrayPool<32> _dict_pool;
uint16_t find_next_capacity(uint16_t n){
inline static uint16_t find_next_capacity(uint16_t n){
uint16_t x = 2;
while(x < n) x <<= 1;
return x;
@ -51,7 +16,7 @@ uint16_t find_next_capacity(uint16_t n){
#define _hash(key, mask, hash_seed) ( ( (key).index * (hash_seed) >> 8 ) & (mask) )
uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys){
inline static uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& keys){
if(keys.empty()) return kHashSeeds[0];
std::set<uint16_t> indices;
std::pair<uint16_t, float> best_score = {kHashSeeds[0], 0.0f};
@ -68,77 +33,65 @@ uint16_t find_perfect_hash_seed(uint16_t capacity, const std::vector<StrName>& k
}
struct NameDict {
using Item = std::pair<StrName, PyObject*>;
uint16_t _capacity;
uint16_t _size;
float _load_factor;
uint16_t _hash_seed;
uint16_t _mask;
StrName* _keys;
Item* _items;
inline PyVar& value(uint16_t i){
return reinterpret_cast<PyVar*>(_keys + _capacity)[i];
}
inline const PyVar& value(uint16_t i) const {
return reinterpret_cast<const PyVar*>(_keys + _capacity)[i];
void _alloc(int cap){
_items = (Item*)pool128.alloc(cap * sizeof(Item));
memset(_items, 0, cap * sizeof(Item));
}
NameDict(uint16_t capacity=2, float load_factor=0.67, uint16_t hash_seed=kHashSeeds[0]):
_capacity(capacity), _size(0), _load_factor(load_factor),
_hash_seed(hash_seed), _mask(capacity-1) {
_keys = _dict_pool.alloc(capacity);
}
_alloc(capacity);
}
NameDict(const NameDict& other) {
memcpy(this, &other, sizeof(NameDict));
_keys = _dict_pool.alloc(_capacity);
_alloc(_capacity);
for(int i=0; i<_capacity; i++){
_keys[i] = other._keys[i];
value(i) = other.value(i);
_items[i] = other._items[i];
}
}
NameDict& operator=(const NameDict& other) {
_dict_pool.dealloc(_keys, _capacity);
pool128.dealloc(_items);
memcpy(this, &other, sizeof(NameDict));
_keys = _dict_pool.alloc(_capacity);
_alloc(_capacity);
for(int i=0; i<_capacity; i++){
_keys[i] = other._keys[i];
value(i) = other.value(i);
_items[i] = other._items[i];
}
return *this;
}
~NameDict(){ _dict_pool.dealloc(_keys, _capacity); }
~NameDict(){ pool128.dealloc(_items); }
NameDict(NameDict&&) = delete;
NameDict& operator=(NameDict&&) = delete;
uint16_t size() const { return _size; }
#define HASH_PROBE(key, ok, i) \
ok = false; \
i = _hash(key, _mask, _hash_seed); \
while(!_keys[i].empty()) { \
if(_keys[i] == (key)) { ok = true; break; } \
i = (i + 1) & _mask; \
#define HASH_PROBE(key, ok, i) \
ok = false; \
i = _hash(key, _mask, _hash_seed); \
while(!_items[i].first.empty()) { \
if(_items[i].first == (key)) { ok = true; break; } \
i = (i + 1) & _mask; \
}
const PyVar& operator[](StrName key) const {
PyObject* operator[](StrName key) const {
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
return value(i);
if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
return _items[i].second;
}
PyVar& get(StrName key){
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
return value(i);
}
template<typename T>
void set(StrName key, T&& val){
void set(StrName key, PyObject* val){
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
if(!ok) {
@ -147,29 +100,27 @@ while(!_keys[i].empty()) { \
_rehash(true);
HASH_PROBE(key, ok, i);
}
_keys[i] = key;
_items[i].first = key;
}
value(i) = std::forward<T>(val);
_items[i].second = val;
}
void _rehash(bool resize){
StrName* old_keys = _keys;
PyVar* old_values = &value(0);
Item* old_items = _items;
uint16_t old_capacity = _capacity;
if(resize){
_capacity = find_next_capacity(_capacity * 2);
_mask = _capacity - 1;
}
_keys = _dict_pool.alloc(_capacity);
_alloc(_capacity);
for(uint16_t i=0; i<old_capacity; i++){
if(old_keys[i].empty()) continue;
if(old_items[i].first.empty()) continue;
bool ok; uint16_t j;
HASH_PROBE(old_keys[i], ok, j);
HASH_PROBE(old_items[i].first, ok, j);
if(ok) UNREACHABLE();
_keys[j] = old_keys[i];
value(j) = old_values[i]; // std::move makes a segfault
_items[j] = old_items[i];
}
_dict_pool.dealloc(old_keys, old_capacity);
pool128.dealloc(old_items);
}
void _try_perfect_rehash(){
@ -177,22 +128,22 @@ while(!_keys[i].empty()) { \
_rehash(false); // do not resize
}
inline PyVar* try_get(StrName key){
PyObject* try_get(StrName key) const{
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
if(!ok) return nullptr;
return &value(i);
return _items[i].second;
}
inline bool try_set(StrName key, PyVar&& val){
bool try_set(StrName key, PyObject* val){
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
if(!ok) return false;
value(i) = std::move(val);
_items[i].second = val;
return true;
}
inline bool contains(StrName key) const {
bool contains(StrName key) const {
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
return ok;
@ -200,24 +151,25 @@ while(!_keys[i].empty()) { \
void update(const NameDict& other){
for(uint16_t i=0; i<other._capacity; i++){
if(other._keys[i].empty()) continue;
set(other._keys[i], other.value(i));
auto& item = other._items[i];
if(!item.first.empty()) set(item.first, item.second);
}
}
void erase(StrName key){
bool ok; uint16_t i;
HASH_PROBE(key, ok, i);
if(!ok) throw std::out_of_range("NameDict key not found: " + key.str());
_keys[i] = StrName(); value(i).reset();
if(!ok) throw std::out_of_range(fmt("NameDict key not found: ", key));
_items[i].first = StrName();
_items[i].second = nullptr;
_size--;
}
std::vector<std::pair<StrName, PyVar>> items() const {
std::vector<std::pair<StrName, PyVar>> v;
std::vector<Item> items() const {
std::vector<Item> v;
for(uint16_t i=0; i<_capacity; i++){
if(_keys[i].empty()) continue;
v.push_back(std::make_pair(_keys[i], value(i)));
if(_items[i].first.empty()) continue;
v.push_back(_items[i]);
}
return v;
}
@ -225,11 +177,13 @@ while(!_keys[i].empty()) { \
std::vector<StrName> keys() const {
std::vector<StrName> v;
for(uint16_t i=0; i<_capacity; i++){
if(_keys[i].empty()) continue;
v.push_back(_keys[i]);
if(_items[i].first.empty()) continue;
v.push_back(_items[i].first);
}
return v;
}
void _gc_mark() const;
#undef HASH_PROBE
#undef _hash
};

140
src/obj.h
View File

@ -3,7 +3,6 @@
#include "common.h"
#include "namedict.h"
#include "tuplelist.h"
#include <type_traits>
namespace pkpy {
@ -12,7 +11,7 @@ struct Frame;
struct BaseRef;
class VM;
typedef std::function<PyVar(VM*, Args&)> NativeFuncRaw;
typedef std::function<PyObject*(VM*, Args&)> NativeFuncRaw;
typedef shared_ptr<CodeObject> CodeObject_;
typedef shared_ptr<NameDict> NameDict_;
@ -22,10 +21,10 @@ struct NativeFunc {
bool method;
NativeFunc(NativeFuncRaw f, int argc, bool method) : f(f), argc(argc), method(method) {}
inline PyVar operator()(VM* vm, Args& args) const;
PyObject* operator()(VM* vm, Args& args) const;
};
struct Function {
struct FuncDecl {
StrName name;
CodeObject_ code;
std::vector<StrName> args;
@ -33,22 +32,28 @@ struct Function {
NameDict kwargs; // empty if no k=v
std::vector<StrName> kwargs_order;
// runtime settings
PyVar _module = nullptr;
NameDict_ _closure = nullptr;
bool has_name(StrName val) const {
bool _0 = std::find(args.begin(), args.end(), val) != args.end();
bool _1 = starred_arg == val;
bool _2 = kwargs.contains(val);
return _0 || _1 || _2;
}
void _gc_mark() const;
};
using FuncDecl_ = shared_ptr<FuncDecl>;
struct Function{
FuncDecl_ decl;
PyObject* _module;
NameDict_ _closure;
};
struct BoundMethod {
PyVar obj;
PyVar method;
BoundMethod(const PyVar& obj, const PyVar& method) : obj(obj), method(method) {}
PyObject* obj;
PyObject* method;
BoundMethod(PyObject* obj, PyObject* method) : obj(obj), method(method) {}
};
struct Range {
@ -58,14 +63,17 @@ struct Range {
};
struct StarWrapper {
PyVar obj;
bool rvalue;
StarWrapper(const PyVar& obj, bool rvalue): obj(obj), rvalue(rvalue) {}
PyObject* obj;
StarWrapper(PyObject* obj): obj(obj) {}
};
using Super = std::pair<PyObject*, Type>;
// TODO: re-examine the design of Slice
struct Slice {
int start = 0;
int stop = 0x7fffffff;
int stop = 0x7fffffff;
int step = 1;
void normalize(int len){
if(start < 0) start += len;
@ -79,27 +87,37 @@ struct Slice {
class BaseIter {
protected:
VM* vm;
PyVar _ref; // keep a reference to the object so it will not be deleted while iterating
public:
virtual PyVar next() = 0;
PyVarRef loop_var;
BaseIter(VM* vm, PyVar _ref) : vm(vm), _ref(_ref) {}
BaseIter(VM* vm) : vm(vm) {}
virtual void _gc_mark() const {}
virtual PyObject* next() = 0;
virtual ~BaseIter() = default;
};
struct GCHeader {
bool enabled; // whether this object is managed by GC
bool marked; // whether this object is marked
GCHeader() : enabled(true), marked(false) {}
};
struct PyObject {
GCHeader gc;
Type type;
NameDict* _attr;
inline bool is_attr_valid() const noexcept { return _attr != nullptr; }
inline NameDict& attr() noexcept { return *_attr; }
inline const PyVar& attr(StrName name) const noexcept { return _attr->get(name); }
bool is_attr_valid() const noexcept { return _attr != nullptr; }
NameDict& attr() noexcept { return *_attr; }
PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; }
virtual void* value() = 0;
virtual void _obj_gc_mark() = 0;
PyObject(Type type) : type(type) {}
virtual ~PyObject() { delete _attr; }
};
template<typename T>
void _gc_mark(T& t);
template <typename T>
struct Py_ : PyObject {
T _value;
@ -107,7 +125,7 @@ struct Py_ : PyObject {
Py_(Type type, const T& val): PyObject(type), _value(val) { _init(); }
Py_(Type type, T&& val): PyObject(type), _value(std::move(val)) { _init(); }
inline void _init() noexcept {
void _init() noexcept {
if constexpr (std::is_same_v<T, Type> || std::is_same_v<T, DummyModule>) {
_attr = new NameDict(8, kTypeAttrLoadFactor);
}else if constexpr(std::is_same_v<T, DummyInstance>){
@ -119,75 +137,71 @@ struct Py_ : PyObject {
}
}
void* value() override { return &_value; }
void _obj_gc_mark() override {
if(gc.marked) return;
gc.marked = true;
if(_attr != nullptr) _attr->_gc_mark();
pkpy::_gc_mark<T>(_value); // handle PyObject* inside _value `T`
}
};
#define OBJ_GET(T, obj) (((Py_<T>*)((obj).get()))->_value)
#define OBJ_GET(T, obj) (((Py_<T>*)(obj))->_value)
#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_obj_gc_mark()
Str obj_type_name(VM* vm, Type type);
#if DEBUG_NO_BUILTIN_MODULES
#define OBJ_NAME(obj) Str("<?>")
#else
#define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__))
#endif
const int kTpIntIndex = 2;
const int kTpFloatIndex = 3;
inline bool is_type(const PyVar& obj, Type type) noexcept {
inline bool is_type(PyObject* obj, Type type) {
#if DEBUG_EXTRA_CHECK
if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr");
#endif
switch(type.index){
case kTpIntIndex: return obj.is_tag_01();
case kTpFloatIndex: return obj.is_tag_10();
default: return !obj.is_tagged() && obj->type == type;
case kTpIntIndex: return is_int(obj);
case kTpFloatIndex: return is_float(obj);
default: return !is_tagged(obj) && obj->type == type;
}
}
inline bool is_both_int_or_float(const PyVar& a, const PyVar& b) noexcept {
return a.is_tagged() && b.is_tagged();
}
inline bool is_both_int(const PyVar& a, const PyVar& b) noexcept {
return (a.bits & b.bits & 0b11) == 0b01;
}
inline bool is_int(const PyVar& obj) noexcept {
return obj.is_tag_01();
}
inline bool is_float(const PyVar& obj) noexcept {
return obj.is_tag_10();
}
#define PY_CLASS(T, mod, name) \
static Type _type(VM* vm) { \
static const StrName __x0(#mod); \
static const StrName __x1(#name); \
return OBJ_GET(Type, vm->_modules[__x0]->attr(__x1)); \
} \
static PyVar register_class(VM* vm, PyVar mod) { \
PyVar type = vm->new_type_object(mod, #name, vm->tp_object); \
static PyObject* register_class(VM* vm, PyObject* mod) { \
PyObject* type = vm->new_type_object(mod, #name, vm->tp_object); \
if(OBJ_NAME(mod) != #mod) UNREACHABLE(); \
T::_register(vm, mod, type); \
type->attr()._try_perfect_rehash(); \
return type; \
}
union __8B {
union BitsCvt {
i64 _int;
f64 _float;
__8B(i64 val) : _int(val) {}
__8B(f64 val) : _float(val) {}
BitsCvt(i64 val) : _int(val) {}
BitsCvt(f64 val) : _float(val) {}
};
template <typename, typename = void> struct is_py_class : std::false_type {};
template <typename, typename=void> struct is_py_class : std::false_type {};
template <typename T> struct is_py_class<T, std::void_t<decltype(T::_type)>> : std::true_type {};
template<typename T>
void _check_py_class(VM* vm, const PyVar& var);
template<typename T>
T py_pointer_cast(VM* vm, const PyVar& var);
template<typename T>
T py_value_cast(VM* vm, const PyVar& var);
struct Discarded {};
template<typename T> void _check_py_class(VM*, PyObject*);
template<typename T> T py_pointer_cast(VM*, PyObject*);
template<typename T> T py_value_cast(VM*, PyObject*);
struct Discarded { };
template<typename __T>
__T py_cast(VM* vm, const PyVar& obj) {
__T py_cast(VM* vm, PyObject* obj) {
using T = std::decay_t<__T>;
if constexpr(std::is_pointer_v<T>){
return py_pointer_cast<T>(vm, obj);
@ -202,7 +216,7 @@ __T py_cast(VM* vm, const PyVar& obj) {
}
template<typename __T>
__T _py_cast(VM* vm, const PyVar& obj) {
__T _py_cast(VM* vm, PyObject* obj) {
using T = std::decay_t<__T>;
if constexpr(std::is_pointer_v<__T>){
return py_pointer_cast<__T>(vm, obj);
@ -214,7 +228,7 @@ __T _py_cast(VM* vm, const PyVar& obj) {
}
#define VAR(x) py_var(vm, x)
#define VAR_T(T, ...) vm->new_object(T::_type(vm), T(__VA_ARGS__))
#define VAR_T(T, ...) vm->heap.gcnew<T>(T::_type(vm), T(__VA_ARGS__))
#define CAST(T, x) py_cast<T>(vm, x)
#define _CAST(T, x) _py_cast<T>(vm, x)

View File

@ -1,95 +1,95 @@
#ifdef OPCODE
/**************************/
OPCODE(NO_OP)
/**************************/
OPCODE(POP_TOP)
OPCODE(DUP_TOP_VALUE)
OPCODE(CALL)
OPCODE(CALL_UNPACK)
OPCODE(CALL_KWARGS)
OPCODE(CALL_KWARGS_UNPACK)
OPCODE(RETURN_VALUE)
OPCODE(DUP_TOP)
OPCODE(ROT_TWO)
OPCODE(PRINT_EXPR)
/**************************/
OPCODE(LOAD_CONST)
OPCODE(LOAD_NONE)
OPCODE(LOAD_TRUE)
OPCODE(LOAD_FALSE)
OPCODE(LOAD_ELLIPSIS)
OPCODE(LOAD_BUILTIN_EVAL)
OPCODE(LOAD_FUNCTION)
OPCODE(LOAD_NULL)
/**************************/
OPCODE(LOAD_NAME)
OPCODE(LOAD_GLOBAL)
OPCODE(LOAD_ATTR)
OPCODE(LOAD_METHOD)
OPCODE(LOAD_SUBSCR)
OPCODE(STORE_LOCAL)
OPCODE(STORE_GLOBAL)
OPCODE(STORE_ATTR)
OPCODE(STORE_SUBSCR)
OPCODE(DELETE_LOCAL)
OPCODE(DELETE_GLOBAL)
OPCODE(DELETE_ATTR)
OPCODE(DELETE_SUBSCR)
/**************************/
OPCODE(BUILD_LIST)
OPCODE(BUILD_DICT)
OPCODE(BUILD_SET)
OPCODE(BUILD_SLICE)
OPCODE(BUILD_TUPLE)
OPCODE(BUILD_STRING)
/**************************/
OPCODE(BINARY_OP)
OPCODE(COMPARE_OP)
OPCODE(BITWISE_OP)
OPCODE(IS_OP)
OPCODE(CONTAINS_OP)
/**************************/
OPCODE(JUMP_ABSOLUTE)
OPCODE(POP_JUMP_IF_FALSE)
OPCODE(JUMP_IF_TRUE_OR_POP)
OPCODE(JUMP_IF_FALSE_OR_POP)
OPCODE(LOOP_CONTINUE)
OPCODE(LOOP_BREAK)
OPCODE(GOTO)
/**************************/
OPCODE(CALL)
OPCODE(CALL_UNPACK)
OPCODE(CALL_KWARGS)
OPCODE(CALL_KWARGS_UNPACK)
OPCODE(RETURN_VALUE)
OPCODE(YIELD_VALUE)
/**************************/
OPCODE(LIST_APPEND)
OPCODE(DICT_ADD)
OPCODE(SET_ADD)
/**************************/
OPCODE(UNARY_NEGATIVE)
OPCODE(UNARY_NOT)
OPCODE(UNARY_STAR)
OPCODE(BUILD_LIST)
OPCODE(BUILD_MAP)
OPCODE(BUILD_SET)
OPCODE(BUILD_SLICE)
OPCODE(BUILD_TUPLE)
OPCODE(BUILD_TUPLE_REF)
OPCODE(BUILD_STRING)
OPCODE(LIST_APPEND)
OPCODE(MAP_ADD)
OPCODE(SET_ADD)
OPCODE(IMPORT_NAME)
OPCODE(PRINT_EXPR)
/**************************/
OPCODE(GET_ITER)
OPCODE(FOR_ITER)
/**************************/
OPCODE(IMPORT_NAME)
OPCODE(IMPORT_STAR)
/**************************/
OPCODE(UNPACK_SEQUENCE)
OPCODE(UNPACK_EX)
/**************************/
OPCODE(BEGIN_CLASS)
OPCODE(END_CLASS)
OPCODE(STORE_CLASS_ATTR)
/**************************/
OPCODE(WITH_ENTER)
OPCODE(WITH_EXIT)
OPCODE(LOOP_BREAK)
OPCODE(LOOP_CONTINUE)
OPCODE(POP_JUMP_IF_FALSE)
OPCODE(JUMP_ABSOLUTE)
OPCODE(SAFE_JUMP_ABSOLUTE)
OPCODE(JUMP_IF_TRUE_OR_POP)
OPCODE(JUMP_IF_FALSE_OR_POP)
OPCODE(GOTO)
OPCODE(LOAD_CONST)
OPCODE(LOAD_NONE)
OPCODE(LOAD_TRUE)
OPCODE(LOAD_FALSE)
OPCODE(LOAD_EVAL_FN)
OPCODE(LOAD_FUNCTION)
OPCODE(LOAD_ELLIPSIS)
OPCODE(LOAD_NAME)
OPCODE(LOAD_NAME_REF)
/**************************/
OPCODE(TRY_BLOCK_ENTER)
OPCODE(TRY_BLOCK_EXIT)
OPCODE(ASSERT)
OPCODE(EXCEPTION_MATCH)
OPCODE(RAISE)
OPCODE(RE_RAISE)
OPCODE(BUILD_INDEX)
OPCODE(BUILD_ATTR)
OPCODE(BUILD_ATTR_REF)
OPCODE(STORE_NAME)
OPCODE(STORE_FUNCTION)
OPCODE(STORE_REF)
OPCODE(DELETE_REF)
OPCODE(TRY_BLOCK_ENTER)
OPCODE(TRY_BLOCK_EXIT)
OPCODE(YIELD_VALUE)
OPCODE(FAST_INDEX) // a[x]
OPCODE(FAST_INDEX_REF) // a[x]
OPCODE(INPLACE_BINARY_OP)
OPCODE(INPLACE_BITWISE_OP)
OPCODE(SETUP_CLOSURE)
OPCODE(SETUP_DECORATOR)
OPCODE(STORE_ALL_NAMES)
OPCODE(BEGIN_CLASS)
OPCODE(END_CLASS)
OPCODE(STORE_CLASS_ATTR)
/**************************/
#endif

View File

@ -1,302 +0,0 @@
#pragma once
#include "error.h"
#include "obj.h"
namespace pkpy{
typedef uint8_t TokenIndex;
constexpr const char* kTokens[] = {
"@error", "@eof", "@eol", "@sof",
".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
"+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
"<<", ">>", "&", "|", "^", "?", "@",
"==", "!=", ">=", "<=",
"+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
/** KW_BEGIN **/
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
"None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
"goto", "label", // extended keywords, not available in cpython
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
/** KW_END **/
"is not", "not in",
"@id", "@num", "@str", "@fstr",
"@indent", "@dedent"
};
const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
constexpr TokenIndex TK(const char token[]) {
for(int k=0; k<kTokenCount; k++){
const char* i = kTokens[k];
const char* j = token;
while(*i && *j && *i == *j) { i++; j++;}
if(*i == *j) return k;
}
UNREACHABLE();
}
#define TK_STR(t) kTokens[t]
const TokenIndex kTokenKwBegin = TK("class");
const TokenIndex kTokenKwEnd = TK("raise");
const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
std::map<std::string_view, TokenIndex> map;
for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
return map;
}();
struct Token{
TokenIndex type;
const char* start;
int length;
int line;
PyVar value;
Str str() const { return Str(start, length);}
Str info() const {
StrStream ss;
Str raw = str();
if (raw == Str("\n")) raw = "\\n";
ss << line << ": " << TK_STR(type) << " '" << raw << "'";
return ss.str();
}
};
// https://docs.python.org/3/reference/expressions.html
enum Precedence {
PREC_NONE,
PREC_ASSIGNMENT, // =
PREC_COMMA, // ,
PREC_TERNARY, // ?:
PREC_LOGICAL_OR, // or
PREC_LOGICAL_AND, // and
PREC_LOGICAL_NOT, // not
PREC_EQUALITY, // == !=
PREC_TEST, // in / is / is not / not in
PREC_COMPARISION, // < > <= >=
PREC_BITWISE_OR, // |
PREC_BITWISE_XOR, // ^
PREC_BITWISE_AND, // &
PREC_BITWISE_SHIFT, // << >>
PREC_TERM, // + -
PREC_FACTOR, // * / % //
PREC_UNARY, // - not
PREC_EXPONENT, // **
PREC_CALL, // ()
PREC_SUBSCRIPT, // []
PREC_ATTRIB, // .index
PREC_PRIMARY,
};
// The context of the parsing phase for the compiler.
struct Parser {
shared_ptr<SourceData> src;
const char* token_start;
const char* curr_char;
int current_line = 1;
Token prev, curr;
std::queue<Token> nexts;
std::stack<int> indents;
int brackets_level = 0;
Token next_token(){
if(nexts.empty()){
return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line};
}
Token t = nexts.front();
if(t.type == TK("@eof") && indents.size()>1){
nexts.pop();
indents.pop();
return Token{TK("@dedent"), token_start, 0, current_line};
}
nexts.pop();
return t;
}
inline char peekchar() const{ return *curr_char; }
bool match_n_chars(int n, char c0){
const char* c = curr_char;
for(int i=0; i<n; i++){
if(*c == '\0') return false;
if(*c != c0) return false;
c++;
}
for(int i=0; i<n; i++) eatchar_include_newline();
return true;
}
int eat_spaces(){
int count = 0;
while (true) {
switch (peekchar()) {
case ' ' : count+=1; break;
case '\t': count+=4; break;
default: return count;
}
eatchar();
}
}
bool eat_indentation(){
if(brackets_level > 0) return true;
int spaces = eat_spaces();
if(peekchar() == '#') skip_line_comment();
if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
if(spaces > indents.top()){
indents.push(spaces);
nexts.push(Token{TK("@indent"), token_start, 0, current_line});
} else if(spaces < indents.top()){
while(spaces < indents.top()){
indents.pop();
nexts.push(Token{TK("@dedent"), token_start, 0, current_line});
}
if(spaces != indents.top()){
return false;
}
}
return true;
}
char eatchar() {
char c = peekchar();
if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
curr_char++;
return c;
}
char eatchar_include_newline() {
char c = peekchar();
curr_char++;
if (c == '\n'){
current_line++;
src->line_starts.push_back(curr_char);
}
return c;
}
int eat_name() {
curr_char--;
while(true){
uint8_t c = peekchar();
int u8bytes = 0;
if((c & 0b10000000) == 0b00000000) u8bytes = 1;
else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
else return 1;
if(u8bytes == 1){
if(isalpha(c) || c=='_' || isdigit(c)) {
curr_char++;
continue;
}else{
break;
}
}
// handle multibyte char
std::string u8str(curr_char, u8bytes);
if(u8str.size() != u8bytes) return 2;
uint32_t value = 0;
for(int k=0; k < u8bytes; k++){
uint8_t b = u8str[k];
if(k==0){
if(u8bytes == 2) value = (b & 0b00011111) << 6;
else if(u8bytes == 3) value = (b & 0b00001111) << 12;
else if(u8bytes == 4) value = (b & 0b00000111) << 18;
}else{
value |= (b & 0b00111111) << (6*(u8bytes-k-1));
}
}
if(is_unicode_Lo_char(value)) curr_char += u8bytes;
else break;
}
int length = (int)(curr_char - token_start);
if(length == 0) return 3;
std::string_view name(token_start, length);
if(src->mode == JSON_MODE){
if(name == "true"){
set_next_token(TK("True"));
} else if(name == "false"){
set_next_token(TK("False"));
} else if(name == "null"){
set_next_token(TK("None"));
} else {
return 4;
}
return 0;
}
if(kTokenKwMap.count(name)){
if(name == "not"){
if(strncmp(curr_char, " in", 3) == 0){
curr_char += 3;
set_next_token(TK("not in"));
return 0;
}
}else if(name == "is"){
if(strncmp(curr_char, " not", 4) == 0){
curr_char += 4;
set_next_token(TK("is not"));
return 0;
}
}
set_next_token(kTokenKwMap.at(name));
} else {
set_next_token(TK("@id"));
}
return 0;
}
void skip_line_comment() {
char c;
while ((c = peekchar()) != '\0') {
if (c == '\n') return;
eatchar();
}
}
bool matchchar(char c) {
if (peekchar() != c) return false;
eatchar_include_newline();
return true;
}
void set_next_token(TokenIndex type, PyVar value=nullptr) {
switch(type){
case TK("{"): case TK("["): case TK("("): brackets_level++; break;
case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
}
nexts.push( Token{
type,
token_start,
(int)(curr_char - token_start),
current_line - ((type == TK("@eol")) ? 1 : 0),
value
});
}
void set_next_token_2(char c, TokenIndex one, TokenIndex two) {
if (matchchar(c)) set_next_token(two);
else set_next_token(one);
}
Parser(shared_ptr<SourceData> src) {
this->src = src;
this->token_start = src->source;
this->curr_char = src->source;
this->nexts.push(Token{TK("@sof"), token_start, 0, current_line});
this->indents.push(0);
}
};
} // namespace pkpy

View File

@ -2,6 +2,7 @@
#include "ceval.h"
#include "compiler.h"
#include "obj.h"
#include "repl.h"
#include "iter.h"
#include "cffi.h"
@ -10,12 +11,14 @@
namespace pkpy {
CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) {
Compiler compiler(this, source.c_str(), filename, mode);
inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) {
Compiler compiler(this, source, filename, mode);
try{
return compiler.compile();
}catch(Exception& e){
// std::cout << e.summary() << std::endl;
#if DEBUG_FULL_EXCEPTION
std::cerr << e.summary() << std::endl;
#endif
_error(e);
return nullptr;
}
@ -42,7 +45,7 @@ CodeObject_ VM::compile(Str source, Str filename, CompileMode mode) {
});
void init_builtins(VM* _vm) {
inline void init_builtins(VM* _vm) {
BIND_NUM_ARITH_OPT(__add__, +)
BIND_NUM_ARITH_OPT(__sub__, -)
BIND_NUM_ARITH_OPT(__mul__, *)
@ -66,10 +69,12 @@ void init_builtins(VM* _vm) {
vm->check_type(args[0], vm->tp_type);
Type type = OBJ_GET(Type, args[0]);
if(!vm->isinstance(args[1], type)){
vm->TypeError("super(type, obj): obj must be an instance or subtype of type");
Str _0 = obj_type_name(vm, OBJ_GET(Type, vm->_t(args[1])));
Str _1 = obj_type_name(vm, type);
vm->TypeError("super(): " + _0.escape() + " is not an instance of " + _1.escape());
}
Type base = vm->_all_types[type.index].base;
return vm->new_object(vm->tp_super, Super(args[1], base));
Type base = vm->_all_types[type].base;
return vm->heap.gcnew(vm->tp_super, Super(args[1], base));
});
_vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) {
@ -79,16 +84,16 @@ void init_builtins(VM* _vm) {
});
_vm->bind_builtin_func<1>("id", [](VM* vm, Args& args) {
const PyVar& obj = args[0];
if(obj.is_tagged()) return VAR((i64)0);
return VAR(obj.bits);
PyObject* obj = args[0];
if(is_tagged(obj)) return VAR((i64)0);
return VAR(BITS(obj));
});
_vm->bind_builtin_func<2>("divmod", [](VM* vm, Args& args) {
i64 lhs = CAST(i64, args[0]);
i64 rhs = CAST(i64, args[1]);
if(rhs == 0) vm->ZeroDivisionError();
return VAR(two_args(VAR(lhs/rhs), VAR(lhs%rhs)));
return VAR(Tuple({VAR(lhs/rhs), VAR(lhs%rhs)}));
});
_vm->bind_builtin_func<1>("eval", [](VM* vm, Args& args) {
@ -110,7 +115,7 @@ void init_builtins(VM* _vm) {
});
_vm->bind_builtin_func<1>("repr", CPP_LAMBDA(vm->asRepr(args[0])));
_vm->bind_builtin_func<1>("len", CPP_LAMBDA(vm->call(args[0], __len__, no_arg())));
_vm->bind_builtin_func<1>("len", CPP_LAMBDA(vm->fast_call(__len__, Args{args[0]})));
_vm->bind_builtin_func<1>("hash", [](VM* vm, Args& args){
i64 value = vm->hash(args[0]);
@ -126,8 +131,8 @@ void init_builtins(VM* _vm) {
_vm->bind_builtin_func<1>("ord", [](VM* vm, Args& args) {
const Str& s = CAST(Str&, args[0]);
if (s.size() != 1) vm->TypeError("ord() expected an ASCII character");
return VAR((i64)(s.c_str()[0]));
if (s.length()!=1) vm->TypeError("ord() expected an ASCII character");
return VAR((i64)(s[0]));
});
_vm->bind_builtin_func<2>("hasattr", [](VM* vm, Args& args) {
@ -164,17 +169,16 @@ void init_builtins(VM* _vm) {
std::vector<StrName> keys = t_attr.keys();
names.insert(keys.begin(), keys.end());
List ret;
for (StrName name : names) ret.push_back(VAR(name.str()));
for (StrName name : names) ret.push_back(VAR(name.sv()));
return VAR(std::move(ret));
});
_vm->bind_method<0>("object", "__repr__", [](VM* vm, Args& args) {
PyVar self = args[0];
std::uintptr_t addr = self.is_tagged() ? 0 : (uintptr_t)self.get();
StrStream ss;
ss << std::hex << addr;
Str s = "<" + OBJ_NAME(vm->_t(self)) + " object at 0x" + ss.str() + ">";
return VAR(s);
PyObject* self = args[0];
if(is_tagged(self)) self = nullptr;
std::stringstream ss;
ss << "<" << OBJ_NAME(vm->_t(self)) << " object at " << std::hex << self << ">";
return VAR(ss.str());
});
_vm->bind_method<1>("object", "__eq__", CPP_LAMBDA(VAR(args[0] == args[1])));
@ -233,11 +237,11 @@ void init_builtins(VM* _vm) {
const Str& s = CAST(Str&, args[0]);
try{
size_t parsed = 0;
i64 val = S_TO_INT(s, &parsed, 10);
if(parsed != s.size()) throw std::invalid_argument("<?>");
i64 val = S_TO_INT(s.str(), &parsed, 10);
if(parsed != s.length()) throw std::invalid_argument("<?>");
return VAR(val);
}catch(std::invalid_argument&){
vm->ValueError("invalid literal for int(): " + s.escape(true));
vm->ValueError("invalid literal for int(): " + s.escape());
}
}
vm->TypeError("int() argument must be a int, float, bool or str");
@ -280,7 +284,7 @@ void init_builtins(VM* _vm) {
if(s == "inf") return VAR(INFINITY);
if(s == "-inf") return VAR(-INFINITY);
try{
f64 val = S_TO_FLOAT(s);
f64 val = S_TO_FLOAT(s.str());
return VAR(val);
}catch(std::invalid_argument&){
vm->ValueError("invalid literal for float(): '" + s + "'");
@ -293,7 +297,7 @@ void init_builtins(VM* _vm) {
_vm->bind_method<0>("float", "__repr__", [](VM* vm, Args& args) {
f64 val = CAST(f64, args[0]);
if(std::isinf(val) || std::isnan(val)) return VAR(std::to_string(val));
StrStream ss;
std::stringstream ss;
ss << std::setprecision(std::numeric_limits<f64>::max_digits10-1-2) << val;
std::string s = ss.str();
if(std::all_of(s.begin()+1, s.end(), isdigit)) s += ".0";
@ -323,7 +327,7 @@ void init_builtins(VM* _vm) {
_vm->bind_method<1>("str", "__contains__", [](VM* vm, Args& args) {
const Str& self = CAST(Str&, args[0]);
const Str& other = CAST(Str&, args[1]);
return VAR(self.find(other) != Str::npos);
return VAR(self.index(other) != -1);
});
_vm->bind_method<0>("str", "__str__", CPP_LAMBDA(args[0]));
@ -331,7 +335,7 @@ void init_builtins(VM* _vm) {
_vm->bind_method<0>("str", "__repr__", [](VM* vm, Args& args) {
const Str& _self = CAST(Str&, args[0]);
return VAR(_self.escape(true));
return VAR(_self.escape());
});
_vm->bind_method<0>("str", "__json__", [](VM* vm, Args& args) {
@ -357,7 +361,7 @@ void init_builtins(VM* _vm) {
if(is_type(args[1], vm->tp_slice)){
Slice s = _CAST(Slice, args[1]);
s.normalize(self.u8_length());
return VAR(self.u8_substr(s.start, s.stop));
return VAR(self.u8_slice(s.start, s.stop));
}
int index = CAST(int, args[1]);
@ -378,34 +382,31 @@ void init_builtins(VM* _vm) {
});
_vm->bind_method<2>("str", "replace", [](VM* vm, Args& args) {
const Str& _self = CAST(Str&, args[0]);
const Str& _old = CAST(Str&, args[1]);
const Str& _new = CAST(Str&, args[2]);
Str _copy = _self;
size_t pos = 0;
while ((pos = _copy.find(_old, pos)) != std::string::npos) {
_copy.replace(pos, _old.length(), _new);
pos += _new.length();
}
return VAR(_copy);
const Str& self = CAST(Str&, args[0]);
const Str& old = CAST(Str&, args[1]);
const Str& new_ = CAST(Str&, args[2]);
return VAR(self.replace(old, new_));
});
_vm->bind_method<1>("str", "startswith", [](VM* vm, Args& args) {
const Str& self = CAST(Str&, args[0]);
const Str& prefix = CAST(Str&, args[1]);
return VAR(self.find(prefix) == 0);
return VAR(self.index(prefix) == 0);
});
_vm->bind_method<1>("str", "endswith", [](VM* vm, Args& args) {
const Str& self = CAST(Str&, args[0]);
const Str& suffix = CAST(Str&, args[1]);
return VAR(self.rfind(suffix) == self.length() - suffix.length());
int offset = self.length() - suffix.length();
if(offset < 0) return vm->False;
bool ok = memcmp(self.data+offset, suffix.data, suffix.length()) == 0;
return VAR(ok);
});
_vm->bind_method<1>("str", "join", [](VM* vm, Args& args) {
const Str& self = CAST(Str&, args[0]);
StrStream ss;
PyVar obj = vm->asList(args[1]);
FastStrStream ss;
PyObject* obj = vm->asList(args[1]);
const List& list = CAST(List&, obj);
for (int i = 0; i < list.size(); ++i) {
if (i > 0) ss << self;
@ -423,9 +424,9 @@ void init_builtins(VM* _vm) {
_vm->bind_method<1>("list", "extend", [](VM* vm, Args& args) {
List& self = CAST(List&, args[0]);
PyVar obj = vm->asList(args[1]);
PyObject* obj = vm->asList(args[1]);
const List& list = CAST(List&, obj);
self.insert(self.end(), list.begin(), list.end());
self.extend(list);
return vm->None;
});
@ -440,7 +441,7 @@ void init_builtins(VM* _vm) {
int n = CAST(int, args[1]);
List result;
result.reserve(self.size() * n);
for(int i = 0; i < n; i++) result.insert(result.end(), self.begin(), self.end());
for(int i = 0; i < n; i++) result.extend(self);
return VAR(std::move(result));
});
@ -450,7 +451,7 @@ void init_builtins(VM* _vm) {
if(index < 0) index += self.size();
if(index < 0) index = 0;
if(index > self.size()) index = self.size();
self.insert(self.begin() + index, args[2]);
self.insert(index, args[2]);
return vm->None;
});
@ -463,10 +464,10 @@ void init_builtins(VM* _vm) {
_vm->bind_method<1>("list", "__add__", [](VM* vm, Args& args) {
const List& self = CAST(List&, args[0]);
const List& obj = CAST(List&, args[1]);
List new_list = self;
new_list.insert(new_list.end(), obj.begin(), obj.end());
return VAR(new_list);
const List& other = CAST(List&, args[1]);
List new_list(self); // copy construct
new_list.extend(other);
return VAR(std::move(new_list));
});
_vm->bind_method<0>("list", "__len__", [](VM* vm, Args& args) {
@ -506,14 +507,14 @@ void init_builtins(VM* _vm) {
List& self = CAST(List&, args[0]);
int index = CAST(int, args[1]);
index = vm->normalized_index(index, self.size());
self.erase(self.begin() + index);
self.erase(index);
return vm->None;
});
/************ PyTuple ************/
_vm->bind_static_method<1>("tuple", "__new__", [](VM* vm, Args& args) {
List list = CAST(List, vm->asList(args[0]));
return VAR(Tuple::from_list(std::move(list)));
return VAR(Tuple(std::move(list)));
});
_vm->bind_method<0>("tuple", "__iter__", [](VM* vm, Args& args) {
@ -528,7 +529,7 @@ void init_builtins(VM* _vm) {
s.normalize(self.size());
List new_list;
for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]);
return VAR(Tuple::from_list(std::move(new_list)));
return VAR(Tuple(std::move(new_list)));
}
int index = CAST(int, args[1]);
@ -542,7 +543,7 @@ void init_builtins(VM* _vm) {
});
/************ PyBool ************/
_vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(vm->asBool(args[0])));
_vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(VAR(vm->asBool(args[0]))));
_vm->bind_method<0>("bool", "__repr__", [](VM* vm, Args& args) {
bool val = CAST(bool, args[0]);
@ -564,50 +565,47 @@ void init_builtins(VM* _vm) {
}
#ifdef _WIN32
#define __EXPORT __declspec(dllexport)
#define __EXPORT __declspec(dllexport) inline
#elif __APPLE__
#define __EXPORT __attribute__((visibility("default"))) __attribute__((used))
#define __EXPORT __attribute__((visibility("default"))) __attribute__((used)) inline
#elif __EMSCRIPTEN__
#include <emscripten.h>
#define __EXPORT EMSCRIPTEN_KEEPALIVE
#define __EXPORT EMSCRIPTEN_KEEPALIVE inline
#else
#define __EXPORT
#define __EXPORT inline
#endif
void add_module_time(VM* vm){
PyVar mod = vm->new_module("time");
inline void add_module_time(VM* vm){
PyObject* mod = vm->new_module("time");
vm->bind_func<0>(mod, "time", [](VM* vm, Args& args) {
auto now = std::chrono::high_resolution_clock::now();
return VAR(std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count() / 1000000.0);
});
}
void add_module_sys(VM* vm){
PyVar mod = vm->new_module("sys");
inline void add_module_sys(VM* vm){
PyObject* mod = vm->new_module("sys");
vm->setattr(mod, "version", VAR(PK_VERSION));
vm->bind_func<1>(mod, "getrefcount", CPP_LAMBDA(VAR(args[0].use_count())));
vm->bind_func<0>(mod, "getrecursionlimit", CPP_LAMBDA(VAR(vm->recursionlimit)));
vm->bind_func<1>(mod, "setrecursionlimit", [](VM* vm, Args& args) {
vm->recursionlimit = CAST(int, args[0]);
return vm->None;
});
}
void add_module_json(VM* vm){
PyVar mod = vm->new_module("json");
inline void add_module_json(VM* vm){
PyObject* mod = vm->new_module("json");
vm->bind_func<1>(mod, "loads", [](VM* vm, Args& args) {
const Str& expr = CAST(Str&, args[0]);
CodeObject_ code = vm->compile(expr, "<json>", JSON_MODE);
return vm->_exec(code, vm->top_frame()->_module, vm->top_frame()->_locals);
});
vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->call(args[0], __json__)));
vm->bind_func<1>(mod, "dumps", CPP_LAMBDA(vm->fast_call(__json__, Args{args[0]})));
}
void add_module_math(VM* vm){
PyVar mod = vm->new_module("math");
inline void add_module_math(VM* vm){
PyObject* mod = vm->new_module("math");
vm->setattr(mod, "pi", VAR(3.1415926535897932384));
vm->setattr(mod, "e" , VAR(2.7182818284590452354));
@ -625,12 +623,12 @@ void add_module_math(VM* vm){
vm->bind_func<1>(mod, "sqrt", CPP_LAMBDA(VAR(std::sqrt(vm->num_to_float(args[0])))));
}
void add_module_dis(VM* vm){
PyVar mod = vm->new_module("dis");
inline void add_module_dis(VM* vm){
PyObject* mod = vm->new_module("dis");
vm->bind_func<1>(mod, "dis", [](VM* vm, Args& args) {
PyVar f = args[0];
PyObject* f = args[0];
if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, args[0]).method;
CodeObject_ code = CAST(Function, f).code;
CodeObject_ code = CAST(Function&, f).decl->code;
(*vm->_stdout) << vm->disassemble(code);
return vm->None;
});
@ -641,17 +639,17 @@ struct ReMatch {
i64 start;
i64 end;
std::smatch m;
ReMatch(i64 start, i64 end, std::smatch m) : start(start), end(end), m(m) {}
std::cmatch m;
ReMatch(i64 start, i64 end, std::cmatch m) : start(start), end(end), m(m) {}
static void _register(VM* vm, PyVar mod, PyVar type){
static void _register(VM* vm, PyObject* mod, PyObject* type){
vm->bind_method<-1>(type, "__init__", CPP_NOT_IMPLEMENTED());
vm->bind_method<0>(type, "start", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).start)));
vm->bind_method<0>(type, "end", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).end)));
vm->bind_method<0>(type, "span", [](VM* vm, Args& args) {
auto& self = CAST(ReMatch&, args[0]);
return VAR(two_args(VAR(self.start), VAR(self.end)));
return VAR(Tuple({VAR(self.start), VAR(self.end)}));
});
vm->bind_method<1>(type, "group", [](VM* vm, Args& args) {
@ -663,20 +661,20 @@ struct ReMatch {
}
};
PyVar _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){
std::regex re(pattern);
std::smatch m;
if(std::regex_search(string, m, re)){
if(fromStart && m.position() != 0) return vm->None;
i64 start = string._to_u8_index(m.position());
i64 end = string._to_u8_index(m.position() + m.length());
inline PyObject* _regex_search(const Str& pattern, const Str& string, bool from_start, VM* vm){
std::regex re(pattern.begin(), pattern.end());
std::cmatch m;
if(std::regex_search(string.begin(), string.end(), m, re)){
if(from_start && m.position() != 0) return vm->None;
i64 start = string._byte_index_to_unicode(m.position());
i64 end = string._byte_index_to_unicode(m.position() + m.length());
return VAR_T(ReMatch, start, end, m);
}
return vm->None;
};
void add_module_re(VM* vm){
PyVar mod = vm->new_module("re");
inline void add_module_re(VM* vm){
PyObject* mod = vm->new_module("re");
ReMatch::register_class(vm, mod);
vm->bind_func<2>(mod, "match", [](VM* vm, Args& args) {
@ -695,16 +693,16 @@ void add_module_re(VM* vm){
const Str& pattern = CAST(Str&, args[0]);
const Str& repl = CAST(Str&, args[1]);
const Str& string = CAST(Str&, args[2]);
std::regex re(pattern);
return VAR(std::regex_replace(string, re, repl));
std::regex re(pattern.begin(), pattern.end());
return VAR(std::regex_replace(string.str(), re, repl.str()));
});
vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) {
const Str& pattern = CAST(Str&, args[0]);
const Str& string = CAST(Str&, args[1]);
std::regex re(pattern);
std::sregex_token_iterator it(string.begin(), string.end(), re, -1);
std::sregex_token_iterator end;
std::regex re(pattern.begin(), pattern.end());
std::cregex_token_iterator it(string.begin(), string.end(), re, -1);
std::cregex_token_iterator end;
List vec;
for(; it != end; ++it){
vec.push_back(VAR(it->str()));
@ -740,7 +738,7 @@ struct Random{
gen.seed(seed);
}
static void _register(VM* vm, PyVar mod, PyVar type){
static void _register(VM* vm, PyObject* mod, PyObject* type){
vm->bind_static_method<0>(type, "__new__", CPP_LAMBDA(VAR_T(Random)));
vm->bind_method<1>(type, "seed", native_proxy_callable(&Random::seed));
vm->bind_method<2>(type, "randint", native_proxy_callable(&Random::randint));
@ -749,15 +747,21 @@ struct Random{
}
};
void add_module_random(VM* vm){
PyVar mod = vm->new_module("random");
inline void add_module_random(VM* vm){
PyObject* mod = vm->new_module("random");
Random::register_class(vm, mod);
CodeObject_ code = vm->compile(kPythonLibs["random"], "random.py", EXEC_MODE);
vm->_exec(code, mod);
}
void VM::post_init(){
inline void add_module_gc(VM* vm){
PyObject* mod = vm->new_module("gc");
vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect())));
}
inline void VM::post_init(){
init_builtins(this);
#if !DEBUG_NO_BUILTIN_MODULES
add_module_sys(this);
add_module_time(this);
add_module_json(this);
@ -767,7 +771,8 @@ void VM::post_init(){
add_module_random(this);
add_module_io(this);
add_module_os(this);
add_module_c(this);
// add_module_c(this);
add_module_gc(this);
for(const char* name: {"this", "functools", "collections", "heapq", "bisect"}){
_lazy_modules[name] = kPythonLibs[name];
@ -775,21 +780,22 @@ void VM::post_init(){
CodeObject_ code = compile(kPythonLibs["builtins"], "<builtins>", EXEC_MODE);
this->_exec(code, this->builtins);
code = compile(kPythonLibs["dict"], "<builtins>", EXEC_MODE);
code = compile(kPythonLibs["_dict"], "<builtins>", EXEC_MODE);
this->_exec(code, this->builtins);
code = compile(kPythonLibs["set"], "<builtins>", EXEC_MODE);
code = compile(kPythonLibs["_set"], "<builtins>", EXEC_MODE);
this->_exec(code, this->builtins);
// property is defined in builtins.py so we need to add it after builtins is loaded
_t(tp_object)->attr().set(__class__, property(CPP_LAMBDA(vm->_t(args[0]))));
_t(tp_type)->attr().set(__base__, property([](VM* vm, Args& args){
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0]).index];
return info.base.index == -1 ? vm->None : vm->_all_types[info.base.index].obj;
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])];
return info.base.index == -1 ? vm->None : vm->_all_types[info.base].obj;
}));
_t(tp_type)->attr().set(__name__, property([](VM* vm, Args& args){
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0]).index];
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])];
return VAR(info.name);
}));
#endif
}
} // namespace pkpy
@ -851,11 +857,11 @@ extern "C" {
/// Return `__repr__` of the result.
/// If the variable is not found, return `nullptr`.
char* pkpy_vm_get_global(pkpy::VM* vm, const char* name){
pkpy::PyVar* val = vm->_main->attr().try_get(name);
pkpy::PyObject* val = vm->_main->attr().try_get(name);
if(val == nullptr) return nullptr;
try{
pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(*val));
return strdup(repr.c_str());
pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(val));
return repr.c_str_dup();
}catch(...){
return nullptr;
}
@ -867,11 +873,11 @@ extern "C" {
/// Return `__repr__` of the result.
/// If there is any error, return `nullptr`.
char* pkpy_vm_eval(pkpy::VM* vm, const char* source){
pkpy::PyVarOrNull ret = vm->exec(source, "<eval>", pkpy::EVAL_MODE);
pkpy::PyObject* ret = vm->exec(source, "<eval>", pkpy::EVAL_MODE);
if(ret == nullptr) return nullptr;
try{
pkpy::Str repr = pkpy::CAST(pkpy::Str, vm->asRepr(ret));
return strdup(repr.c_str());
pkpy::Str repr = pkpy::CAST(pkpy::Str&, vm->asRepr(ret));
return repr.c_str_dup();
}catch(...){
return nullptr;
}
@ -908,12 +914,12 @@ extern "C" {
///
/// Return a json representing the result.
char* pkpy_vm_read_output(pkpy::VM* vm){
if(vm->use_stdio) return nullptr;
pkpy::StrStream* s_out = (pkpy::StrStream*)(vm->_stdout);
pkpy::StrStream* s_err = (pkpy::StrStream*)(vm->_stderr);
if(vm->is_stdio_used()) return nullptr;
std::stringstream* s_out = (std::stringstream*)(vm->_stdout);
std::stringstream* s_err = (std::stringstream*)(vm->_stderr);
pkpy::Str _stdout = s_out->str();
pkpy::Str _stderr = s_err->str();
pkpy::StrStream ss;
std::stringstream ss;
ss << '{' << "\"stdout\": " << _stdout.escape(false);
ss << ", " << "\"stderr\": " << _stderr.escape(false) << '}';
s_out->str(""); s_err->str("");
@ -950,13 +956,13 @@ extern "C" {
for(int i=0; mod[i]; i++) if(mod[i] == ' ') return nullptr;
for(int i=0; name[i]; i++) if(name[i] == ' ') return nullptr;
std::string f_header = std::string(mod) + '.' + name + '#' + std::to_string(kGlobalBindId++);
pkpy::PyVar obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod);
pkpy::PyObject* obj = vm->_modules.contains(mod) ? vm->_modules[mod] : vm->new_module(mod);
vm->bind_func<-1>(obj, name, [ret_code, f_header](pkpy::VM* vm, const pkpy::Args& args){
pkpy::StrStream ss;
std::stringstream ss;
ss << f_header;
for(int i=0; i<args.size(); i++){
ss << ' ';
pkpy::PyVar x = vm->call(args[i], pkpy::__json__);
pkpy::PyObject* x = vm->fast_call(pkpy::__json__, pkpy::Args{args[i]});
ss << pkpy::CAST(pkpy::Str&, x);
}
char* packet = strdup(ss.str().c_str());

171
src/ref.h
View File

@ -1,171 +0,0 @@
#pragma once
#include "obj.h"
#include "vm.h"
namespace pkpy {
struct BaseRef {
virtual PyVar get(VM*, Frame*) const = 0;
virtual void set(VM*, Frame*, PyVar) const = 0;
virtual void del(VM*, Frame*) const = 0;
virtual ~BaseRef() = default;
};
struct NameRef : BaseRef {
const std::pair<StrName, NameScope> pair;
inline StrName name() const { return pair.first; }
inline NameScope scope() const { return pair.second; }
NameRef(const std::pair<StrName, NameScope>& pair) : pair(pair) {}
PyVar get(VM* vm, Frame* frame) const{
PyVar* val;
val = frame->f_locals().try_get(name());
if(val != nullptr) return *val;
val = frame->f_closure_try_get(name());
if(val != nullptr) return *val;
val = frame->f_globals().try_get(name());
if(val != nullptr) return *val;
val = vm->builtins->attr().try_get(name());
if(val != nullptr) return *val;
vm->NameError(name());
return nullptr;
}
void set(VM* vm, Frame* frame, PyVar val) const{
switch(scope()) {
case NAME_LOCAL: frame->f_locals().set(name(), std::move(val)); break;
case NAME_GLOBAL:
if(frame->f_locals().try_set(name(), std::move(val))) return;
frame->f_globals().set(name(), std::move(val));
break;
default: UNREACHABLE();
}
}
void del(VM* vm, Frame* frame) const{
switch(scope()) {
case NAME_LOCAL: {
if(frame->f_locals().contains(name())){
frame->f_locals().erase(name());
}else{
vm->NameError(name());
}
} break;
case NAME_GLOBAL:
{
if(frame->f_locals().contains(name())){
frame->f_locals().erase(name());
}else{
if(frame->f_globals().contains(name())){
frame->f_globals().erase(name());
}else{
vm->NameError(name());
}
}
} break;
default: UNREACHABLE();
}
}
};
struct AttrRef : BaseRef {
mutable PyVar obj;
NameRef attr;
AttrRef(PyVar obj, NameRef attr) : obj(obj), attr(attr) {}
PyVar get(VM* vm, Frame* frame) const{
return vm->getattr(obj, attr.name());
}
void set(VM* vm, Frame* frame, PyVar val) const{
vm->setattr(obj, attr.name(), std::move(val));
}
void del(VM* vm, Frame* frame) const{
if(!obj->is_attr_valid()) vm->TypeError("cannot delete attribute");
if(!obj->attr().contains(attr.name())) vm->AttributeError(obj, attr.name());
obj->attr().erase(attr.name());
}
};
struct IndexRef : BaseRef {
mutable PyVar obj;
PyVar index;
IndexRef(PyVar obj, PyVar index) : obj(obj), index(index) {}
PyVar get(VM* vm, Frame* frame) const{
return vm->fast_call(__getitem__, two_args(obj, index));
}
void set(VM* vm, Frame* frame, PyVar val) const{
Args args(3);
args[0] = obj; args[1] = index; args[2] = std::move(val);
vm->fast_call(__setitem__, std::move(args));
}
void del(VM* vm, Frame* frame) const{
vm->fast_call(__delitem__, two_args(obj, index));
}
};
struct TupleRef : BaseRef {
Tuple objs;
TupleRef(Tuple&& objs) : objs(std::move(objs)) {}
PyVar get(VM* vm, Frame* frame) const{
Tuple args(objs.size());
for (int i = 0; i < objs.size(); i++) {
args[i] = vm->PyRef_AS_C(objs[i])->get(vm, frame);
}
return VAR(std::move(args));
}
void set(VM* vm, Frame* frame, PyVar val) const{
val = vm->asIter(val);
BaseIter* iter = vm->PyIter_AS_C(val);
for(int i=0; i<objs.size(); i++){
PyVarOrNull x;
if(is_type(objs[i], vm->tp_star_wrapper)){
auto& star = _CAST(StarWrapper&, objs[i]);
if(star.rvalue) vm->ValueError("can't use starred expression here");
if(i != objs.size()-1) vm->ValueError("* can only be used at the end");
auto ref = vm->PyRef_AS_C(star.obj);
List list;
while((x = iter->next()) != nullptr) list.push_back(x);
ref->set(vm, frame, VAR(std::move(list)));
return;
}else{
x = iter->next();
if(x == nullptr) vm->ValueError("not enough values to unpack");
vm->PyRef_AS_C(objs[i])->set(vm, frame, x);
}
}
PyVarOrNull x = iter->next();
if(x != nullptr) vm->ValueError("too many values to unpack");
}
void del(VM* vm, Frame* frame) const{
for(int i=0; i<objs.size(); i++) vm->PyRef_AS_C(objs[i])->del(vm, frame);
}
};
template<typename P>
PyVarRef VM::PyRef(P&& value) {
static_assert(std::is_base_of_v<BaseRef, std::decay_t<P>>);
return new_object(tp_ref, std::forward<P>(value));
}
const BaseRef* VM::PyRef_AS_C(const PyVar& obj)
{
if(!is_type(obj, tp_ref)) TypeError("expected an l-value");
return static_cast<const BaseRef*>(obj->value());
}
/***** Frame's Impl *****/
inline void Frame::try_deref(VM* vm, PyVar& v){
if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this);
}
} // namespace pkpy

346
src/str.h
View File

@ -1,67 +1,187 @@
#pragma once
#include "common.h"
#include "memory.h"
#include "vector.h"
namespace pkpy {
typedef std::stringstream StrStream;
// TODO: check error if return 0
inline int utf8len(unsigned char c, bool suppress=false){
if((c & 0b10000000) == 0) return 1;
if((c & 0b11100000) == 0b11000000) return 2;
if((c & 0b11110000) == 0b11100000) return 3;
if((c & 0b11111000) == 0b11110000) return 4;
if((c & 0b11111100) == 0b11111000) return 5;
if((c & 0b11111110) == 0b11111100) return 6;
if(!suppress) throw std::runtime_error("invalid utf8 char: " + std::to_string(c));
return 0;
}
class Str : public std::string {
mutable std::vector<uint16_t>* _u8_index = nullptr;
struct Str{
int size;
bool is_ascii;
char* data;
void utf8_lazy_init() const{
if(_u8_index != nullptr) return;
_u8_index = new std::vector<uint16_t>();
_u8_index->reserve(size());
if(size() > 65535) throw std::runtime_error("str has more than 65535 bytes.");
for(uint16_t i = 0; i < size(); i++){
// https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80
if((at(i) & 0xC0) != 0x80) _u8_index->push_back(i);
Str(): size(0), is_ascii(true), data(nullptr) {}
Str(int size, bool is_ascii): size(size), is_ascii(is_ascii) {
data = (char*)pool64.alloc(size);
}
#define STR_INIT() \
data = (char*)pool64.alloc(size); \
for(int i=0; i<size; i++){ \
data[i] = s[i]; \
if(!isascii(s[i])) is_ascii = false; \
}
}
public:
uint16_t _cached_sn_index = 0;
Str() : std::string() {}
Str(const char* s) : std::string(s) {}
Str(const char* s, size_t n) : std::string(s, n) {}
Str(const std::string& s) : std::string(s) {}
Str(const Str& s) : std::string(s) {
if(s._u8_index != nullptr){
_u8_index = new std::vector<uint16_t>(*s._u8_index);
}
}
Str(Str&& s) : std::string(std::move(s)) {
delete _u8_index;
_u8_index = s._u8_index;
s._u8_index = nullptr;
Str(const std::string& s): size(s.size()), is_ascii(true) {
STR_INIT()
}
i64 _to_u8_index(i64 index) const{
utf8_lazy_init();
auto p = std::lower_bound(_u8_index->begin(), _u8_index->end(), index);
if(p != _u8_index->end() && *p != index) UNREACHABLE();
return p - _u8_index->begin();
Str(std::string_view s): size(s.size()), is_ascii(true) {
STR_INIT()
}
int u8_length() const {
utf8_lazy_init();
return _u8_index->size();
Str(const char* s): size(strlen(s)), is_ascii(true) {
STR_INIT()
}
Str u8_getitem(int i) const{
return u8_substr(i, i+1);
Str(const char* s, int len): size(len), is_ascii(true) {
STR_INIT()
}
Str u8_substr(int start, int end) const{
utf8_lazy_init();
if(start >= end) return Str();
int c_end = end >= _u8_index->size() ? size() : _u8_index->at(end);
return substr(_u8_index->at(start), c_end - _u8_index->at(start));
#undef STR_INIT
Str(const Str& other): size(other.size), is_ascii(other.is_ascii) {
data = (char*)pool64.alloc(size);
memcpy(data, other.data, size);
}
Str(Str&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) {
other.data = nullptr;
other.size = 0;
}
const char* begin() const { return data; }
const char* end() const { return data + size; }
char operator[](int idx) const { return data[idx]; }
int length() const { return size; }
bool empty() const { return size == 0; }
size_t hash() const{ return std::hash<std::string_view>()(sv()); }
Str& operator=(const Str& other){
if(data!=nullptr) pool64.dealloc(data);
size = other.size;
is_ascii = other.is_ascii;
data = (char*)pool64.alloc(size);
memcpy(data, other.data, size);
return *this;
}
Str& operator=(Str&& other) noexcept{
if(data!=nullptr) pool64.dealloc(data);
size = other.size;
is_ascii = other.is_ascii;
data = other.data;
other.data = nullptr;
return *this;
}
~Str(){
if(data!=nullptr) pool64.dealloc(data);
}
Str operator+(const Str& other) const {
Str ret(size + other.size, is_ascii && other.is_ascii);
memcpy(ret.data, data, size);
memcpy(ret.data + size, other.data, other.size);
return ret;
}
Str operator+(const char* p) const {
Str other(p);
return *this + other;
}
friend Str operator+(const char* p, const Str& str){
Str other(p);
return other + str;
}
friend std::ostream& operator<<(std::ostream& os, const Str& str){
if(str.data!=nullptr) os.write(str.data, str.size);
return os;
}
bool operator==(const Str& other) const {
if(size != other.size) return false;
return memcmp(data, other.data, size) == 0;
}
bool operator!=(const Str& other) const {
if(size != other.size) return true;
return memcmp(data, other.data, size) != 0;
}
bool operator<(const Str& other) const {
int ret = strncmp(data, other.data, std::min(size, other.size));
if(ret != 0) return ret < 0;
return size < other.size;
}
bool operator<(const std::string_view& other) const {
int ret = strncmp(data, other.data(), std::min(size, (int)other.size()));
if(ret != 0) return ret < 0;
return size < (int)other.size();
}
friend bool operator<(const std::string_view& other, const Str& str){
return str > other;
}
bool operator>(const Str& other) const {
int ret = strncmp(data, other.data, std::min(size, other.size));
if(ret != 0) return ret > 0;
return size > other.size;
}
bool operator<=(const Str& other) const {
int ret = strncmp(data, other.data, std::min(size, other.size));
if(ret != 0) return ret < 0;
return size <= other.size;
}
bool operator>=(const Str& other) const {
int ret = strncmp(data, other.data, std::min(size, other.size));
if(ret != 0) return ret > 0;
return size >= other.size;
}
Str substr(int start, int len) const {
Str ret(len, is_ascii);
memcpy(ret.data, data + start, len);
return ret;
}
char* c_str_dup() const {
char* p = (char*)malloc(size + 1);
memcpy(p, data, size);
p[size] = 0;
return p;
}
std::string_view sv() const {
return std::string_view(data, size);
}
std::string str() const {
return std::string(data, size);
}
Str lstrip() const {
Str copy(*this);
std::string copy(data, size);
copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
// std::isspace(c) does not working on windows (Debug)
return c != ' ' && c != '\t' && c != '\r' && c != '\n';
@ -69,12 +189,8 @@ public:
return Str(copy);
}
size_t hash() const {
return std::hash<std::string>()(*this);
}
Str escape(bool single_quote) const {
StrStream ss;
Str escape(bool single_quote=true) const {
std::stringstream ss;
ss << (single_quote ? '\'' : '"');
for (int i=0; i<length(); i++) {
char c = this->operator[](i);
@ -104,30 +220,78 @@ public:
return ss.str();
}
Str& operator=(const Str& s){
this->std::string::operator=(s);
delete _u8_index;
if(s._u8_index != nullptr){
_u8_index = new std::vector<uint16_t>(*s._u8_index);
int index(const Str& sub, int start=0) const {
auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size);
if(p == data + size) return -1;
return p - data;
}
Str replace(const Str& old, const Str& new_) const {
std::stringstream ss;
int start = 0;
while(true){
int i = index(old, start);
if(i == -1){
ss << substr(start, size - start);
break;
}
ss << substr(start, i - start);
ss << new_;
start = i + old.size;
}
return *this;
return ss.str();
}
Str& operator=(Str&& s){
this->std::string::operator=(std::move(s));
delete _u8_index;
this->_u8_index = s._u8_index;
s._u8_index = nullptr;
return *this;
/*************unicode*************/
// TODO: check error
int _unicode_index_to_byte(int i) const{
if(is_ascii) return i;
int j = 0;
while(i > 0){
j += utf8len(data[j]);
i--;
}
return j;
}
~Str(){ delete _u8_index;}
int _byte_index_to_unicode(int n) const{
if(is_ascii) return n;
int cnt = 0;
for(int i=0; i<n; i++){
if((data[i] & 0xC0) != 0x80) cnt++;
}
return cnt;
}
Str u8_getitem(int i) const{
i = _unicode_index_to_byte(i);
return substr(i, utf8len(data[i]));
}
Str u8_slice(int start, int end) const{
// TODO: optimize this
start = _unicode_index_to_byte(start);
end = _unicode_index_to_byte(end);
return substr(start, end - start);
}
int u8_length() const {
return _byte_index_to_unicode(size);
}
};
template<typename... Args>
inline std::string fmt(Args&&... args) {
std::stringstream ss;
(ss << ... << args);
return ss.str();
}
const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560};
const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101};
bool is_unicode_Lo_char(uint32_t c) {
inline bool is_unicode_Lo_char(uint32_t c) {
auto index = std::lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA;
if(c == kLoRangeA[index]) return true;
index -= 1;
@ -142,15 +306,19 @@ struct StrName {
StrName(uint16_t index): index(index) {}
StrName(const char* s): index(get(s).index) {}
StrName(const Str& s){
if(s._cached_sn_index != 0){
index = s._cached_sn_index;
} else {
index = get(s).index;
}
index = get(s.sv()).index;
}
const Str& str() const { return _r_interned[index-1]; }
std::string_view sv() const { return _r_interned[index-1].sv(); }
bool empty() const { return index == 0; }
friend std::ostream& operator<<(std::ostream& os, const StrName& sn){
return os << sn.sv();
}
Str escape() const {
return _r_interned[index-1].escape();
}
bool operator==(const StrName& other) const noexcept {
return this->index == other.index;
}
@ -170,11 +338,7 @@ struct StrName {
static std::map<Str, uint16_t, std::less<>> _interned;
static std::vector<Str> _r_interned;
static StrName get(const Str& s){
return get(s.c_str());
}
static StrName get(const char* s){
static StrName get(std::string_view s){
auto it = _interned.find(s);
if(it != _interned.end()) return StrName(it->second);
uint16_t index = (uint16_t)(_r_interned.size() + 1);
@ -184,8 +348,33 @@ struct StrName {
}
};
std::map<Str, uint16_t, std::less<>> StrName::_interned;
std::vector<Str> StrName::_r_interned;
struct FastStrStream{
pod_vector<const Str*> parts;
FastStrStream& operator<<(const Str& s){
parts.push_back(&s);
return *this;
}
Str str() const{
int len = 0;
bool is_ascii = true;
for(auto& s: parts){
len += s->length();
is_ascii &= s->is_ascii;
}
Str result(len, is_ascii);
char* p = result.data;
for(auto& s: parts){
memcpy(p, s->data, s->length());
p += s->length();
}
return result;
}
};
inline std::map<Str, uint16_t, std::less<>> StrName::_interned;
inline std::vector<Str> StrName::_r_interned;
const StrName __class__ = StrName::get("__class__");
const StrName __base__ = StrName::get("__base__");
@ -209,10 +398,13 @@ const StrName __call__ = StrName::get("__call__");
const StrName m_eval = StrName::get("eval");
const StrName m_self = StrName::get("self");
const StrName m_dict = StrName::get("dict");
const StrName m_set = StrName::get("set");
const StrName m_add = StrName::get("add");
const StrName __enter__ = StrName::get("__enter__");
const StrName __exit__ = StrName::get("__exit__");
const StrName CMP_SPECIAL_METHODS[] = {
const StrName COMPARE_SPECIAL_METHODS[] = {
StrName::get("__lt__"), StrName::get("__le__"), StrName::get("__eq__"),
StrName::get("__ne__"), StrName::get("__gt__"), StrName::get("__ge__")
};

View File

@ -3,108 +3,84 @@
#include "common.h"
#include "memory.h"
#include "str.h"
#include "vector.h"
namespace pkpy {
using List = std::vector<PyVar>;
class Args {
static THREAD_LOCAL SmallArrayPool<PyVar, 10> _pool;
using List = pod_vector<PyObject*>;
PyVar* _args;
int _size;
class Args {
PyObject** _args;
int _size;
inline void _alloc(int n){
this->_args = _pool.alloc(n);
this->_size = n;
}
void _alloc(int n){
this->_args = (n==0) ? nullptr : (PyObject**)pool64.alloc(n * sizeof(void*));
this->_size = n;
}
public:
Args(int n){ _alloc(n); }
public:
Args(int n){ _alloc(n); }
Args(const Args& other){
_alloc(other._size);
for(int i=0; i<_size; i++) _args[i] = other._args[i];
}
Args(const Args& other){
_alloc(other._size);
for(int i=0; i<_size; i++) _args[i] = other._args[i];
}
Args(Args&& other) noexcept {
this->_args = other._args;
this->_size = other._size;
other._args = nullptr;
other._size = 0;
}
Args(Args&& other) noexcept {
this->_args = other._args;
this->_size = other._size;
other._args = nullptr;
other._size = 0;
}
static pkpy::Args from_list(List&& other) noexcept {
Args ret(other.size());
memcpy((void*)ret._args, (void*)other.data(), sizeof(PyVar)*ret.size());
memset((void*)other.data(), 0, sizeof(PyVar)*ret.size());
other.clear();
return ret;
}
Args(std::initializer_list<PyObject*> list) : Args(list.size()){
int i = 0;
for(PyObject* p : list) _args[i++] = p;
}
PyVar& operator[](int i){ return _args[i]; }
const PyVar& operator[](int i) const { return _args[i]; }
Args(List&& other) noexcept : Args(other.size()){
for(int i=0; i<_size; i++) _args[i] = other[i];
other.clear();
}
Args& operator=(Args&& other) noexcept {
_pool.dealloc(_args, _size);
this->_args = other._args;
this->_size = other._size;
other._args = nullptr;
other._size = 0;
return *this;
}
PyObject*& operator[](int i){ return _args[i]; }
PyObject* operator[](int i) const { return _args[i]; }
inline int size() const { return _size; }
Args& operator=(Args&& other) noexcept {
if(_args!=nullptr) pool64.dealloc(_args);
this->_args = other._args;
this->_size = other._size;
other._args = nullptr;
other._size = 0;
return *this;
}
List move_to_list() noexcept {
List ret(_size);
memcpy((void*)ret.data(), (void*)_args, sizeof(PyVar)*_size);
memset((void*)_args, 0, sizeof(PyVar)*_size);
return ret;
}
int size() const { return _size; }
void extend_self(const PyVar& self){
static_assert(std::is_standard_layout_v<PyVar>);
PyVar* old_args = _args;
int old_size = _size;
_alloc(old_size+1);
_args[0] = self;
if(old_size == 0) return;
List to_list() noexcept {
List ret(_size);
// TODO: use move/memcpy
for(int i=0; i<_size; i++) ret[i] = _args[i];
return ret;
}
memcpy((void*)(_args+1), (void*)old_args, sizeof(PyVar)*old_size);
memset((void*)old_args, 0, sizeof(PyVar)*old_size);
_pool.dealloc(old_args, old_size);
}
void extend_self(PyObject* self){
PyObject** old_args = _args;
int old_size = _size;
_alloc(old_size+1);
_args[0] = self;
for(int i=0; i<old_size; i++) _args[i+1] = old_args[i];
if(old_args!=nullptr) pool64.dealloc(old_args);
}
~Args(){ _pool.dealloc(_args, _size); }
};
~Args(){ if(_args!=nullptr) pool64.dealloc(_args); }
};
inline const Args& no_arg() {
static const Args _zero(0);
inline const Args& no_arg() { return _zero; }
return _zero;
}
template<typename T>
Args one_arg(T&& a) {
Args ret(1);
ret[0] = std::forward<T>(a);
return ret;
}
typedef Args Tuple;
template<typename T1, typename T2>
Args two_args(T1&& a, T2&& b) {
Args ret(2);
ret[0] = std::forward<T1>(a);
ret[1] = std::forward<T2>(b);
return ret;
}
template<typename T1, typename T2, typename T3>
Args three_args(T1&& a, T2&& b, T3&& c) {
Args ret(3);
ret[0] = std::forward<T1>(a);
ret[1] = std::forward<T2>(b);
ret[2] = std::forward<T3>(c);
return ret;
}
typedef Args Tuple;
THREAD_LOCAL SmallArrayPool<PyVar, 10> Args::_pool;
} // namespace pkpy

126
src/vector.h Normal file
View File

@ -0,0 +1,126 @@
#pragma once
#include "common.h"
#include "memory.h"
namespace pkpy{
template<typename T>
struct pod_vector{
static_assert(128 % sizeof(T) == 0);
static_assert(std::is_pod_v<T>);
static constexpr int N = 128 / sizeof(T);
static_assert(N > 4);
int _size;
int _capacity;
T* _data;
pod_vector(): _size(0), _capacity(N) {
_data = (T*)pool128.alloc(_capacity * sizeof(T));
}
pod_vector(int size): _size(size), _capacity(std::max(N, size)) {
_data = (T*)pool128.alloc(_capacity * sizeof(T));
}
pod_vector(const pod_vector& other): _size(other._size), _capacity(other._capacity) {
_data = (T*)pool128.alloc(_capacity * sizeof(T));
memcpy(_data, other._data, sizeof(T) * _size);
}
pod_vector(pod_vector&& other) noexcept {
_size = other._size;
_capacity = other._capacity;
_data = other._data;
other._data = nullptr;
}
pod_vector& operator=(pod_vector&& other) noexcept {
if(_data!=nullptr) pool128.dealloc(_data);
_size = other._size;
_capacity = other._capacity;
_data = other._data;
other._data = nullptr;
return *this;
}
// remove copy assignment
pod_vector& operator=(const pod_vector& other) = delete;
template<typename __ValueT>
void push_back(__ValueT&& t) {
if (_size == _capacity) reserve(_capacity*2);
_data[_size++] = std::forward<__ValueT>(t);
}
void reserve(int cap){
if(cap < _capacity) return;
_capacity = cap;
T* old_data = _data;
_data = (T*)pool128.alloc(_capacity * sizeof(T));
if(old_data!=nullptr){
memcpy(_data, old_data, sizeof(T) * _size);
pool128.dealloc(old_data);
}
}
void pop_back() { _size--; }
void extend(const pod_vector& other){
for(int i=0; i<other.size(); i++) push_back(other[i]);
}
T& operator[](int index) { return _data[index]; }
const T& operator[](int index) const { return _data[index]; }
T* begin() { return _data; }
T* end() { return _data + _size; }
const T* begin() const { return _data; }
const T* end() const { return _data + _size; }
T& back() { return _data[_size - 1]; }
const T& back() const { return _data[_size - 1]; }
bool empty() const { return _size == 0; }
int size() const { return _size; }
T* data() { return _data; }
const T* data() const { return _data; }
void pop_back_n(int n) { _size -= n; }
void clear() { _size=0; }
template<typename __ValueT>
void insert(int i, __ValueT&& val){
if (_size == _capacity) reserve(_capacity*2);
for(int j=_size; j>i; j--) _data[j] = _data[j-1];
_data[i] = std::forward<__ValueT>(val);
_size++;
}
void erase(int i){
for(int j=i; j<_size-1; j++) _data[j] = _data[j+1];
_size--;
}
~pod_vector() {
if(_data!=nullptr) pool128.dealloc(_data);
}
};
template <typename T, typename Container=std::vector<T>>
class stack{
Container vec;
public:
void push(const T& t){ vec.push_back(t); }
void push(T&& t){ vec.push_back(std::move(t)); }
void pop(){ vec.pop_back(); }
void clear(){ vec.clear(); }
bool empty() const { return vec.empty(); }
size_t size() const { return vec.size(); }
T& top(){ return vec.back(); }
const T& top() const { return vec.back(); }
T popx(){ T t = std::move(vec.back()); vec.pop_back(); return t; }
const Container& data() const { return vec; }
};
template <typename T>
using pod_stack = stack<T, pod_vector<T>>;
} // namespace pkpy

802
src/vm.h

File diff suppressed because it is too large Load Diff

View File

@ -42,4 +42,7 @@ d1 = {1:2, 3:4}
d2 = {3:4, 1:2}
d3 = {1:2, 3:4, 5:6}
assert d1 == d2
assert d1 != d3
assert d1 != d3
a = dict([(1, 2), (3, 4)])
assert a == {1: 2, 3: 4}

View File

@ -17,6 +17,8 @@ asds1321321321测试\测试'''
assert s == 'asdasd\nasds1321321321测试\\测试'
assert f'123{2*2}56789' == '123456789'
s = f'''->->{s}<-<-
{123}
'''

View File

@ -11,21 +11,21 @@ r.shuffle(a)
r.choice(a)
r.choice(b)
from sys import version as v
# from sys import version as v
assert type(v) is str
# assert type(v) is str
class Context:
def __init__(self):
self.x = 0
# class Context:
# def __init__(self):
# self.x = 0
def __enter__(self):
self.x = 1
# def __enter__(self):
# self.x = 1
def __exit__(self):
self.x = 2
# def __exit__(self):
# self.x = 2
with Context() as c:
assert c.x == 1
# with Context() as c:
# assert c.x == 1
assert c.x == 2
# assert c.x == 2

View File

@ -3,12 +3,12 @@ a = {
'b': 2,
'c': None,
'd': [1, 2, 3],
# 'e': {
# 'a': 1,
# 'b': 2,
# 'c': None,
# 'd': [1, 2, 3],
# },
'e': {
'a': 1,
'b': 2,
'c': None,
'd': [1, 2, 3],
},
"f": 'This is a string',
'g': [True, False, None],
'h': False