From 849c6aabb5605a6a474f55344411cd1d7775952b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 14:20:18 +0800 Subject: [PATCH 01/13] ... --- docs/features/basic.md | 1 + docs/features/goto.md | 5 +---- docs/features/ub.md | 10 ++++++++++ src/ceval.h | 5 +++-- src/frame.h | 5 +++-- src/obj.h | 1 + src/vm.h | 17 ++++++++--------- 7 files changed, 27 insertions(+), 17 deletions(-) create mode 100644 docs/features/ub.md diff --git a/docs/features/basic.md b/docs/features/basic.md index 376c552c..04312fa0 100644 --- a/docs/features/basic.md +++ b/docs/features/basic.md @@ -1,6 +1,7 @@ --- icon: dot order: 100 +title: Basic Features --- # basic diff --git a/docs/features/goto.md b/docs/features/goto.md index 1f338691..e694ae43 100644 --- a/docs/features/goto.md +++ b/docs/features/goto.md @@ -1,5 +1,6 @@ --- icon: dot +title: Goto Statement --- # goto/label @@ -25,7 +26,3 @@ for i in range(10): label .exit ``` - -!!! -If we detect an illegal divert, you will get an `UnexpectedError` or the behaviour is undefined. -!!! diff --git a/docs/features/ub.md b/docs/features/ub.md new file mode 100644 index 00000000..6eadedac --- /dev/null +++ b/docs/features/ub.md @@ -0,0 +1,10 @@ +--- +icon: dot +title: Undefined Behaviour +--- + +These are the undefined behaviours of pkpy. The behaviour of pkpy is undefined if you do the following things. + +1. Delete a builtin object. For example, `del int.__add__`. +2. Call an unbound method with the wrong type of `self`. For example, `int.__add__('1', 2)`. +3. Use goto statement to jump out of a context block. \ No newline at end of file diff --git a/src/ceval.h b/src/ceval.h index f14a5373..bd5a4197 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -83,11 +83,12 @@ __NEXT_STEP:; TARGET(LOAD_FUNCTION) { FuncDecl_ decl = co->func_decls[byte.arg]; bool is_simple = decl->starred_arg==-1 && decl->kwargs.size()==0 && !decl->code->is_generator; + int argc = decl->args.size(); PyObject* obj; if(decl->nested){ - obj = VAR(Function({decl, is_simple, frame->_module, frame->_locals.to_namedict()})); + obj = VAR(Function({decl, is_simple, argc, frame->_module, frame->_locals.to_namedict()})); }else{ - obj = VAR(Function({decl, is_simple, frame->_module})); + obj = VAR(Function({decl, is_simple, argc, frame->_module})); } PUSH(obj); } DISPATCH(); diff --git a/src/frame.h b/src/frame.h index c5f0164a..5a3228e8 100644 --- a/src/frame.h +++ b/src/frame.h @@ -63,8 +63,9 @@ struct ValueStackImpl { // We allocate extra MAX_SIZE/128 places to keep `_sp` valid when `is_overflow() == true`. PyObject* _begin[MAX_SIZE + MAX_SIZE/128]; PyObject** _sp; + PyObject** _max_end; - ValueStackImpl(): _sp(_begin) {} + ValueStackImpl(): _sp(_begin), _max_end(_begin + MAX_SIZE) {} PyObject*& top(){ return _sp[-1]; } PyObject* top() const { return _sp[-1]; } @@ -90,7 +91,7 @@ struct ValueStackImpl { _sp = sp; } void clear() { _sp = _begin; } - bool is_overflow() const { return _sp >= _begin + MAX_SIZE; } + bool is_overflow() const { return _sp >= _max_end; } ValueStackImpl(const ValueStackImpl&) = delete; ValueStackImpl(ValueStackImpl&&) = delete; diff --git a/src/obj.h b/src/obj.h index a504b597..7dce1a56 100644 --- a/src/obj.h +++ b/src/obj.h @@ -47,6 +47,7 @@ using FuncDecl_ = shared_ptr; struct Function{ FuncDecl_ decl; bool is_simple; + int argc; // cached argc PyObject* _module; NameDict_ _closure; }; diff --git a/src/vm.h b/src/vm.h index f4e88bc3..682b1569 100644 --- a/src/vm.h +++ b/src/vm.h @@ -1001,12 +1001,11 @@ inline PyObject* VM::_py_call(PyObject** p0, PyObject* callable, ArgsView args, const Function& fn = CAST(Function&, callable); const CodeObject* co = fn.decl->code.get(); - PyObject* _module = fn._module != nullptr ? fn._module : callstack.top()._module; - if(args.size() < fn.decl->args.size()){ + if(args.size() < fn.argc){ vm->TypeError(fmt( "expected ", - fn.decl->args.size(), + fn.argc, " positional arguments, but got ", args.size(), " (", fn.decl->code->name, ')' @@ -1015,11 +1014,11 @@ inline PyObject* VM::_py_call(PyObject** p0, PyObject* callable, ArgsView args, // if this function is simple, a.k.a, no kwargs and no *args and not a generator // we can use a fast path to avoid using buffer copy - if(fn.is_simple && kwargs.size()==0){ - if(args.size() > fn.decl->args.size()) TypeError("too many positional arguments"); - int spaces = co->varnames.size() - fn.decl->args.size(); + if(fn.is_simple){ + if(args.size() > fn.argc) TypeError("too many positional arguments"); + int spaces = co->varnames.size() - fn.argc; for(int j=0; jis_generator){ PyObject* ret = PyIter(Generator( this, - Frame(&s_data, nullptr, co, _module, callable), + Frame(&s_data, nullptr, co, fn._module, callable), ArgsView(buffer, buffer + co->varnames.size()) )); return ret; @@ -1070,7 +1069,7 @@ inline PyObject* VM::_py_call(PyObject** p0, PyObject* callable, ArgsView args, // copy buffer to stack for(int i=0; ivarnames.size(); i++) PUSH(buffer[i]); - callstack.emplace(&s_data, p0, co, _module, callable); + callstack.emplace(&s_data, p0, co, fn._module, callable); return nullptr; } From f60cd8a21e5e4354c1ee5059eab3823425c789d9 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 16:09:37 +0800 Subject: [PATCH 02/13] fix a bug --- src/compiler.h | 10 +++++----- tests/99_bugs.py | 9 ++++++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index f1928a6b..8a98486b 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -62,11 +62,11 @@ class Compiler { if(!ctx()->s_expr.empty()){ throw std::runtime_error("!ctx()->s_expr.empty()\n" + ctx()->_log_s_expr()); } - // if the last op does not return, add a default return None - if(ctx()->co->codes.empty() || ctx()->co->codes.back().op != OP_RETURN_VALUE){ - ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); - ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); - } + // add a `return None` in the end as a guard + // previously, we only do this if the last opcode is not a return + // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return + ctx()->emit(OP_LOAD_NONE, BC_NOARG, BC_KEEPLINE); + ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); ctx()->co->optimize(vm); if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){ SyntaxError("maximum number of local variables exceeded"); diff --git a/tests/99_bugs.py b/tests/99_bugs.py index 9332374c..4c1eda34 100644 --- a/tests/99_bugs.py +++ b/tests/99_bugs.py @@ -4,4 +4,11 @@ mp = map(lambda x: x**2, [1, 2, 3, 4, 5] ) assert list(mp) == [1, 4, 9, 16, 25] -assert not 3>4 \ No newline at end of file +assert not 3>4 + +def f(x): + if x>1: + return 1 + +assert f(2) == 1 +assert f(0) == None \ No newline at end of file From 2f5e2a20f5b9e2e1a391c3b01689b98713f3c019 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 16:41:35 +0800 Subject: [PATCH 03/13] ... --- python/builtins.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/builtins.py b/python/builtins.py index 34a931da..ddce6c49 100644 --- a/python/builtins.py +++ b/python/builtins.py @@ -136,14 +136,16 @@ def list@remove(self, value): for i in range(len(self)): if self[i] == value: del self[i] - return True - return False + return + value = repr(value) + raise ValueError(f'{value} is not in list') def list@index(self, value): for i in range(len(self)): if self[i] == value: return i - return -1 + value = repr(value) + raise ValueError(f'{value} is not in list') def list@pop(self, i=-1): res = self[i] From 66052fadd548fb43bdb48515c40a9e545b308661 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 16:49:26 +0800 Subject: [PATCH 04/13] ... --- src/obj.h | 7 +++++++ src/pocketpy.h | 12 ++++++++++++ tests/99_bugs.py | 9 ++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/obj.h b/src/obj.h index 7dce1a56..4ce5b395 100644 --- a/src/obj.h +++ b/src/obj.h @@ -56,6 +56,13 @@ struct BoundMethod { PyObject* self; PyObject* func; BoundMethod(PyObject* self, PyObject* func) : self(self), func(func) {} + + bool operator==(const BoundMethod& rhs) const noexcept { + return self == rhs.self && func == rhs.func; + } + bool operator!=(const BoundMethod& rhs) const noexcept { + return self != rhs.self || func != rhs.func; + } }; struct Range { diff --git a/src/pocketpy.h b/src/pocketpy.h index 0bfa339b..aee2d6c1 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -972,6 +972,18 @@ inline void VM::post_init(){ return CAST(BoundMethod&, args[0]).func; })); + vm->bind_method<1>(_t(tp_bound_method), "__eq__", [](VM* vm, ArgsView args){ + if(!is_non_tagged_type(args[1], vm->tp_bound_method)) return vm->False; + bool ok = _CAST(BoundMethod&, args[0]) == _CAST(BoundMethod&, args[1]); + return VAR(ok); + }); + + vm->bind_method<1>(_t(tp_bound_method), "__ne__", [](VM* vm, ArgsView args){ + if(!is_non_tagged_type(args[1], vm->tp_bound_method)) return vm->True; + bool ok = _CAST(BoundMethod&, args[0]) != _CAST(BoundMethod&, args[1]); + return VAR(ok); + }); + _t(tp_slice)->attr().set("start", property([](VM* vm, ArgsView args){ return CAST(Slice&, args[0]).start; })); diff --git a/tests/99_bugs.py b/tests/99_bugs.py index 4c1eda34..4eeb827a 100644 --- a/tests/99_bugs.py +++ b/tests/99_bugs.py @@ -11,4 +11,11 @@ def f(x): return 1 assert f(2) == 1 -assert f(0) == None \ No newline at end of file +assert f(0) == None + +a = [1, 2] +b = [3, 4] +assert a.append == a.append +assert a.append is not a.append +assert a.append is not b.append +assert a.append != b.append \ No newline at end of file From 744e5c20b01dd25a95d1dcdc45620d36161f5b17 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 20:22:36 +0800 Subject: [PATCH 05/13] ... --- docs/quick-start/interop.md | 3 ++- src/ceval.h | 2 +- src/pocketpy.h | 4 ++-- src/vm.h | 31 +++++++++++++++++++++++-------- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/docs/quick-start/interop.md b/docs/quick-start/interop.md index b98b9819..0183b5c0 100644 --- a/docs/quick-start/interop.md +++ b/docs/quick-start/interop.md @@ -39,4 +39,5 @@ std::cout << CAST(Str, i); // abc + `is_type(PyObject* obj, Type type)` + `is_non_tagged_type(PyObject* obj, Type type)` -+ `VM::check_type(PyObject* obj, Type type)` \ No newline at end of file ++ `VM::check_type(PyObject* obj, Type type)` ++ `VM::check_non_tagged_type(PyObject* obj, Type type)` \ No newline at end of file diff --git a/src/ceval.h b/src/ceval.h index bd5a4197..969a886a 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -487,7 +487,7 @@ __NEXT_STEP:; StrName name(byte.arg); PyObject* super_cls = POPX(); if(super_cls == None) super_cls = _t(tp_object); - check_type(super_cls, tp_type); + check_non_tagged_type(super_cls, tp_type); PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, super_cls)); PUSH(cls); } DISPATCH(); diff --git a/src/pocketpy.h b/src/pocketpy.h index aee2d6c1..2dd32c80 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -68,7 +68,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<2>("super", [](VM* vm, ArgsView args) { - vm->check_type(args[0], vm->tp_type); + vm->check_non_tagged_type(args[0], vm->tp_type); Type type = OBJ_GET(Type, args[0]); if(!vm->isinstance(args[1], type)){ Str _0 = obj_type_name(vm, OBJ_GET(Type, vm->_t(args[1]))); @@ -80,7 +80,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<2>("isinstance", [](VM* vm, ArgsView args) { - vm->check_type(args[1], vm->tp_type); + vm->check_non_tagged_type(args[1], vm->tp_type); Type type = OBJ_GET(Type, args[1]); return VAR(vm->isinstance(args[0], type)); }); diff --git a/src/vm.h b/src/vm.h index 682b1569..ceb6579d 100644 --- a/src/vm.h +++ b/src/vm.h @@ -30,14 +30,14 @@ inline int set_read_file_cwd(ReadFileCwdFunc func) { _read_file_cwd = func; retu #define DEF_NATIVE_2(ctype, ptype) \ template<> inline ctype py_cast(VM* vm, PyObject* obj) { \ - vm->check_type(obj, vm->ptype); \ + vm->check_non_tagged_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ template<> inline ctype _py_cast(VM* vm, PyObject* obj) { \ return OBJ_GET(ctype, obj); \ } \ template<> inline ctype& py_cast(VM* vm, PyObject* obj) { \ - vm->check_type(obj, vm->ptype); \ + vm->check_non_tagged_type(obj, vm->ptype); \ return OBJ_GET(ctype, obj); \ } \ template<> inline ctype& _py_cast(VM* vm, PyObject* obj) { \ @@ -362,6 +362,21 @@ public: TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", but got " + OBJ_NAME(_t(obj)).escape()); } + void check_non_tagged_type(PyObject* obj, Type type){ + if(is_non_tagged_type(obj, type)) return; + TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", but got " + OBJ_NAME(_t(obj)).escape()); + } + + void check_int(PyObject* obj){ + if(is_int(obj)) return; + check_type(obj, tp_int); + } + + void check_float(PyObject* obj){ + if(is_float(obj)) return; + check_type(obj, tp_float); + } + PyObject* _t(Type t){ return _all_types[t.index].obj; } @@ -434,7 +449,7 @@ DEF_NATIVE_2(MappingProxy, tp_mappingproxy) #define PY_CAST_INT(T) \ template<> inline T py_cast(VM* vm, PyObject* obj){ \ - vm->check_type(obj, vm->tp_int); \ + vm->check_int(obj); \ return (T)(BITS(obj) >> 2); \ } \ template<> inline T _py_cast(VM* vm, PyObject* obj){ \ @@ -454,7 +469,7 @@ PY_CAST_INT(unsigned long long) template<> inline float py_cast(VM* vm, PyObject* obj){ - vm->check_type(obj, vm->tp_float); + vm->check_float(obj); i64 bits = BITS(obj); bits = (bits >> 2) << 2; return BitsCvt(bits)._float; @@ -465,7 +480,7 @@ template<> inline float _py_cast(VM* vm, PyObject* obj){ return BitsCvt(bits)._float; } template<> inline double py_cast(VM* vm, PyObject* obj){ - vm->check_type(obj, vm->tp_float); + vm->check_float(obj); i64 bits = BITS(obj); bits = (bits >> 2) << 2; return BitsCvt(bits)._float; @@ -515,7 +530,7 @@ inline PyObject* py_var(VM* vm, bool val){ } template<> inline bool py_cast(VM* vm, PyObject* obj){ - vm->check_type(obj, vm->tp_bool); + vm->check_non_tagged_type(obj, vm->tp_bool); return obj == vm->True; } template<> inline bool _py_cast(VM* vm, PyObject* obj){ @@ -536,7 +551,7 @@ inline PyObject* py_var(VM* vm, std::string_view val){ template void _check_py_class(VM* vm, PyObject* obj){ - vm->check_type(obj, T::_type(vm)); + vm->check_non_tagged_type(obj, T::_type(vm)); } inline PyObject* VM::num_negated(PyObject* obj){ @@ -1169,7 +1184,7 @@ inline void VM::setattr(PyObject* obj, StrName name, PyObject* value){ template void VM::bind_method(PyObject* obj, Str name, NativeFuncC fn) { - check_type(obj, tp_type); + check_non_tagged_type(obj, tp_type); obj->attr().set(name, VAR(NativeFunc(fn, ARGC, true))); } From cdbd884398348f2e502beedc1bb52075ae14717b Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 20:52:18 +0800 Subject: [PATCH 06/13] ... --- src/frame.h | 3 + src/pocketpy.h | 168 ++++++++++++++++++++++++++++--------------------- src/vm.h | 5 -- 3 files changed, 99 insertions(+), 77 deletions(-) diff --git a/src/frame.h b/src/frame.h index 5a3228e8..85cc698d 100644 --- a/src/frame.h +++ b/src/frame.h @@ -133,6 +133,9 @@ struct Frame { Bytecode next_bytecode() { _ip = _next_ip++; +#if DEBUG_EXTRA_CHECK + if(_ip >= co->codes.size()) FATAL_ERROR(); +#endif return co->codes[_ip]; } diff --git a/src/pocketpy.h b/src/pocketpy.h index 2dd32c80..ccc1934d 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -27,23 +27,33 @@ inline CodeObject_ VM::compile(Str source, Str filename, CompileMode mode, bool } #define BIND_NUM_ARITH_OPT(name, op) \ - _vm->_bind_methods<1>({"int","float"}, #name, [](VM* vm, ArgsView args){ \ - if(is_both_int(args[0], args[1])){ \ + _vm->bind_method<1>("int", #name, [](VM* vm, ArgsView args){ \ + if(is_int(args[1])){ \ return VAR(_CAST(i64, args[0]) op _CAST(i64, args[1])); \ }else{ \ return VAR(vm->num_to_float(args[0]) op vm->num_to_float(args[1])); \ } \ + }); \ + _vm->bind_method<1>("float", #name, [](VM* vm, ArgsView args){ \ + return VAR(_CAST(f64, args[0]) op vm->num_to_float(args[1])); \ }); + + #define BIND_NUM_LOGICAL_OPT(name, op, is_eq) \ - _vm->_bind_methods<1>({"int","float"}, #name, [](VM* vm, ArgsView args){ \ - if(is_both_int(args[0], args[1])) \ - return VAR(_CAST(i64, args[0]) op _CAST(i64, args[1])); \ - if(!is_both_int_or_float(args[0], args[1])){ \ - if constexpr(is_eq) return VAR(args[0] op args[1]); \ - vm->TypeError("unsupported operand type(s) for " #op ); \ - } \ - return VAR(vm->num_to_float(args[0]) op vm->num_to_float(args[1])); \ + _vm->bind_method<1>("int", #name, [](VM* vm, ArgsView args){ \ + if(is_int(args[1])) return VAR(_CAST(i64, args[0]) op _CAST(i64, args[1])); \ + if(is_float(args[1])) return VAR(vm->num_to_float(args[0]) op _CAST(f64, args[1])); \ + if constexpr(is_eq) return VAR(args[0] op args[1]); \ + vm->TypeError("unsupported operand type(s) for " #op ); \ + return vm->None; \ + }); \ + _vm->bind_method<1>("float", #name, [](VM* vm, ArgsView args){ \ + if(is_float(args[1])) return VAR(_CAST(f64, args[0]) op _CAST(f64, args[1])); \ + if(is_int(args[1])) return VAR(_CAST(f64, args[0]) op _CAST(i64, args[1])); \ + if constexpr(is_eq) return VAR(args[0] op args[1]); \ + vm->TypeError("unsupported operand type(s) for " #op ); \ + return vm->None; \ }); @@ -85,6 +95,11 @@ inline void init_builtins(VM* _vm) { return VAR(vm->isinstance(args[0], type)); }); + _vm->bind_builtin_func<0>("globals", [](VM* vm, ArgsView args) { + PyObject* mod = vm->top_frame()->_module; + return VAR(MappingProxy(mod)); + }); + _vm->bind_builtin_func<1>("id", [](VM* vm, ArgsView args) { PyObject* obj = args[0]; if(is_tagged(obj)) return VAR((i64)0); @@ -213,13 +228,19 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<0>("NoneType", "__repr__", CPP_LAMBDA(VAR("None"))); _vm->bind_method<0>("NoneType", "__json__", CPP_LAMBDA(VAR("null"))); - _vm->_bind_methods<1>({"int", "float"}, "__truediv__", [](VM* vm, ArgsView args) { + _vm->bind_method<1>("int", "__truediv__", [](VM* vm, ArgsView args) { f64 rhs = vm->num_to_float(args[1]); if (rhs == 0) vm->ZeroDivisionError(); - return VAR(vm->num_to_float(args[0]) / rhs); + return VAR(_CAST(i64, args[0]) / rhs); }); - _vm->_bind_methods<1>({"int", "float"}, "__pow__", [](VM* vm, ArgsView args) { + _vm->bind_method<1>("float", "__truediv__", [](VM* vm, ArgsView args) { + f64 rhs = vm->num_to_float(args[1]); + if (rhs == 0) vm->ZeroDivisionError(); + return VAR(_CAST(f64, args[0]) / rhs); + }); + + auto py_number_pow = [](VM* vm, ArgsView args) { if(is_both_int(args[0], args[1])){ i64 lhs = _CAST(i64, args[0]); i64 rhs = _CAST(i64, args[1]); @@ -236,12 +257,15 @@ inline void init_builtins(VM* _vm) { }else{ return VAR((f64)std::pow(vm->num_to_float(args[0]), vm->num_to_float(args[1]))); } - }); + }; + + _vm->bind_method<1>("int", "__pow__", py_number_pow); + _vm->bind_method<1>("float", "__pow__", py_number_pow); /************ PyInt ************/ _vm->bind_static_method<1>("int", "__new__", [](VM* vm, ArgsView args) { - if (is_type(args[0], vm->tp_int)) return args[0]; if (is_type(args[0], vm->tp_float)) return VAR((i64)CAST(f64, args[0])); + if (is_type(args[0], vm->tp_int)) return args[0]; if (is_type(args[0], vm->tp_bool)) return VAR(_CAST(bool, args[0]) ? 1 : 0); if (is_type(args[0], vm->tp_str)) { const Str& s = CAST(Str&, args[0]); @@ -324,18 +348,18 @@ inline void init_builtins(VM* _vm) { _vm->bind_static_method<1>("str", "__new__", CPP_LAMBDA(vm->asStr(args[0]))); _vm->bind_method<1>("str", "__add__", [](VM* vm, ArgsView args) { - const Str& lhs = CAST(Str&, args[0]); + const Str& lhs = _CAST(Str&, args[0]); const Str& rhs = CAST(Str&, args[1]); return VAR(lhs + rhs); }); _vm->bind_method<0>("str", "__len__", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); return VAR(self.u8_length()); }); _vm->bind_method<1>("str", "__contains__", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); const Str& other = CAST(Str&, args[1]); return VAR(self.index(other) != -1); }); @@ -344,29 +368,29 @@ inline void init_builtins(VM* _vm) { _vm->bind_method<0>("str", "__iter__", CPP_LAMBDA(vm->PyIter(StringIter(vm, args[0])))); _vm->bind_method<0>("str", "__repr__", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); return VAR(self.escape()); }); _vm->bind_method<0>("str", "__json__", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); return VAR(self.escape(false)); }); _vm->bind_method<1>("str", "__eq__", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); if(!is_type(args[1], vm->tp_str)) return VAR(false); return VAR(self == CAST(Str&, args[1])); }); _vm->bind_method<1>("str", "__ne__", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); if(!is_type(args[1], vm->tp_str)) return VAR(true); return VAR(self != CAST(Str&, args[1])); }); _vm->bind_method<1>("str", "__getitem__", [](VM* vm, ArgsView args) { - const Str& self (CAST(Str&, args[0])); + const Str& self = _CAST(Str&, args[0]); if(is_type(args[1], vm->tp_slice)){ const Slice& s = _CAST(Slice&, args[1]); @@ -381,20 +405,20 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("str", "__gt__", [](VM* vm, ArgsView args) { - const Str& self (CAST(Str&, args[0])); - const Str& obj (CAST(Str&, args[1])); + const Str& self = _CAST(Str&, args[0]); + const Str& obj = CAST(Str&, args[1]); return VAR(self > obj); }); _vm->bind_method<1>("str", "__lt__", [](VM* vm, ArgsView args) { - const Str& self (CAST(Str&, args[0])); - const Str& obj (CAST(Str&, args[1])); + const Str& self = _CAST(Str&, args[0]); + const Str& obj = CAST(Str&, args[1]); return VAR(self < obj); }); _vm->bind_method<-1>("str", "replace", [](VM* vm, ArgsView args) { if(args.size() != 1+2 && args.size() != 1+3) vm->TypeError("replace() takes 2 or 3 arguments"); - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); const Str& old = CAST(Str&, args[1]); const Str& new_ = CAST(Str&, args[2]); int count = args.size()==1+3 ? CAST(int, args[3]) : -1; @@ -402,7 +426,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("str", "index", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); const Str& sub = CAST(Str&, args[1]); int index = self.index(sub); if(index == -1) vm->ValueError("substring not found"); @@ -410,13 +434,13 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("str", "startswith", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); const Str& prefix = CAST(Str&, args[1]); return VAR(self.index(prefix) == 0); }); _vm->bind_method<1>("str", "endswith", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); const Str& suffix = CAST(Str&, args[1]); int offset = self.length() - suffix.length(); if(offset < 0) return vm->False; @@ -425,14 +449,14 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("str", "encode", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); std::vector buffer(self.length()); memcpy(buffer.data(), self.data, self.length()); return VAR(Bytes(std::move(buffer))); }); _vm->bind_method<1>("str", "join", [](VM* vm, ArgsView args) { - const Str& self = CAST(Str&, args[0]); + const Str& self = _CAST(Str&, args[0]); FastStrStream ss; PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); @@ -445,13 +469,13 @@ inline void init_builtins(VM* _vm) { /************ PyList ************/ _vm->bind_method<1>("list", "append", [](VM* vm, ArgsView args) { - List& self = CAST(List&, args[0]); + List& self = _CAST(List&, args[0]); self.push_back(args[1]); return vm->None; }); _vm->bind_method<1>("list", "extend", [](VM* vm, ArgsView args) { - List& self = CAST(List&, args[0]); + List& self = _CAST(List&, args[0]); PyObject* obj = vm->asList(args[1]); const List& list = CAST(List&, obj); self.extend(list); @@ -459,13 +483,13 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("list", "reverse", [](VM* vm, ArgsView args) { - List& self = CAST(List&, args[0]); + List& self = _CAST(List&, args[0]); std::reverse(self.begin(), self.end()); return vm->None; }); _vm->bind_method<1>("list", "__mul__", [](VM* vm, ArgsView args) { - const List& self = CAST(List&, args[0]); + const List& self = _CAST(List&, args[0]); int n = CAST(int, args[1]); List result; result.reserve(self.size() * n); @@ -474,7 +498,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<2>("list", "insert", [](VM* vm, ArgsView args) { - List& self = CAST(List&, args[0]); + List& self = _CAST(List&, args[0]); int index = CAST(int, args[1]); if(index < 0) index += self.size(); if(index < 0) index = 0; @@ -484,14 +508,14 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("list", "clear", [](VM* vm, ArgsView args) { - CAST(List&, args[0]).clear(); + _CAST(List&, args[0]).clear(); return vm->None; }); - _vm->bind_method<0>("list", "copy", CPP_LAMBDA(VAR(CAST(List, args[0])))); + _vm->bind_method<0>("list", "copy", CPP_LAMBDA(VAR(_CAST(List, args[0])))); _vm->bind_method<1>("list", "__add__", [](VM* vm, ArgsView args) { - const List& self = CAST(List&, args[0]); + const List& self = _CAST(List&, args[0]); const List& other = CAST(List&, args[1]); List new_list(self); // copy construct new_list.extend(other); @@ -499,7 +523,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("list", "__len__", [](VM* vm, ArgsView args) { - const List& self = CAST(List&, args[0]); + const List& self = _CAST(List&, args[0]); return VAR(self.size()); }); @@ -508,7 +532,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("list", "__getitem__", [](VM* vm, ArgsView args) { - const List& self = CAST(List&, args[0]); + const List& self = _CAST(List&, args[0]); if(is_type(args[1], vm->tp_slice)){ const Slice& s = _CAST(Slice&, args[1]); @@ -525,7 +549,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<2>("list", "__setitem__", [](VM* vm, ArgsView args) { - List& self = CAST(List&, args[0]); + List& self = _CAST(List&, args[0]); int index = CAST(int, args[1]); index = vm->normalized_index(index, self.size()); self[index] = args[2]; @@ -533,7 +557,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("list", "__delitem__", [](VM* vm, ArgsView args) { - List& self = CAST(List&, args[0]); + List& self = _CAST(List&, args[0]); int index = CAST(int, args[1]); index = vm->normalized_index(index, self.size()); self.erase(index); @@ -551,7 +575,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("tuple", "__getitem__", [](VM* vm, ArgsView args) { - const Tuple& self = CAST(Tuple&, args[0]); + const Tuple& self = _CAST(Tuple&, args[0]); if(is_type(args[1], vm->tp_slice)){ const Slice& s = _CAST(Slice&, args[1]); @@ -568,7 +592,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("tuple", "__len__", [](VM* vm, ArgsView args) { - const Tuple& self = CAST(Tuple&, args[0]); + const Tuple& self = _CAST(Tuple&, args[0]); return VAR(self.size()); }); @@ -576,17 +600,17 @@ inline void init_builtins(VM* _vm) { _vm->bind_static_method<1>("bool", "__new__", CPP_LAMBDA(VAR(vm->asBool(args[0])))); _vm->bind_method<0>("bool", "__repr__", [](VM* vm, ArgsView args) { - bool val = CAST(bool, args[0]); + bool val = _CAST(bool, args[0]); return VAR(val ? "True" : "False"); }); _vm->bind_method<0>("bool", "__json__", [](VM* vm, ArgsView args) { - bool val = CAST(bool, args[0]); + bool val = _CAST(bool, args[0]); return VAR(val ? "true" : "false"); }); _vm->bind_method<1>("bool", "__xor__", [](VM* vm, ArgsView args) { - bool self = CAST(bool, args[0]); + bool self = _CAST(bool, args[0]); bool other = CAST(bool, args[1]); return VAR(self ^ other); }); @@ -606,14 +630,14 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("bytes", "__getitem__", [](VM* vm, ArgsView args) { - const Bytes& self = CAST(Bytes&, args[0]); + const Bytes& self = _CAST(Bytes&, args[0]); int index = CAST(int, args[1]); index = vm->normalized_index(index, self.size()); return VAR(self[index]); }); _vm->bind_method<0>("bytes", "__repr__", [](VM* vm, ArgsView args) { - const Bytes& self = CAST(Bytes&, args[0]); + const Bytes& self = _CAST(Bytes&, args[0]); std::stringstream ss; ss << "b'"; for(int i=0; ibind_method<0>("bytes", "__len__", [](VM* vm, ArgsView args) { - const Bytes& self = CAST(Bytes&, args[0]); + const Bytes& self = _CAST(Bytes&, args[0]); return VAR(self.size()); }); _vm->bind_method<0>("bytes", "decode", [](VM* vm, ArgsView args) { - const Bytes& self = CAST(Bytes&, args[0]); + const Bytes& self = _CAST(Bytes&, args[0]); // TODO: check encoding is utf-8 return VAR(Str(self.str())); }); _vm->bind_method<1>("bytes", "__eq__", [](VM* vm, ArgsView args) { - const Bytes& self = CAST(Bytes&, args[0]); + const Bytes& self = _CAST(Bytes&, args[0]); if(!is_type(args[1], vm->tp_bytes)) return VAR(false); const Bytes& other = CAST(Bytes&, args[1]); return VAR(self == other); }); _vm->bind_method<1>("bytes", "__ne__", [](VM* vm, ArgsView args) { - const Bytes& self = CAST(Bytes&, args[0]); + const Bytes& self = _CAST(Bytes&, args[0]); if(!is_type(args[1], vm->tp_bytes)) return VAR(true); const Bytes& other = CAST(Bytes&, args[1]); return VAR(self != other); @@ -654,7 +678,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("slice", "__repr__", [](VM* vm, ArgsView args) { - const Slice& self = CAST(Slice&, args[0]); + const Slice& self = _CAST(Slice&, args[0]); std::stringstream ss; ss << "slice("; ss << CAST(Str, vm->asRepr(self.start)) << ", "; @@ -665,21 +689,21 @@ inline void init_builtins(VM* _vm) { /************ MappingProxy ************/ _vm->bind_method<0>("mappingproxy", "keys", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); List keys; for(StrName name : self.attr().keys()) keys.push_back(VAR(name.sv())); return VAR(std::move(keys)); }); _vm->bind_method<0>("mappingproxy", "values", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); List values; for(auto& item : self.attr().items()) values.push_back(item.second); return VAR(std::move(values)); }); _vm->bind_method<0>("mappingproxy", "items", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); List items; for(auto& item : self.attr().items()){ PyObject* t = VAR(Tuple({VAR(item.first.sv()), item.second})); @@ -689,12 +713,12 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("mappingproxy", "__len__", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); return VAR(self.attr().size()); }); _vm->bind_method<1>("mappingproxy", "__getitem__", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); StrName key = CAST(Str&, args[1]); PyObject* ret = self.attr().try_get(key); if(ret == nullptr) vm->AttributeError(key.sv()); @@ -702,7 +726,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<0>("mappingproxy", "__repr__", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); std::stringstream ss; ss << "mappingproxy({"; bool first = true; @@ -716,7 +740,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_method<1>("mappingproxy", "__contains__", [](VM* vm, ArgsView args) { - MappingProxy& self = CAST(MappingProxy&, args[0]); + MappingProxy& self = _CAST(MappingProxy&, args[0]); StrName key = CAST(Str&, args[1]); return VAR(self.attr().contains(key)); }); @@ -809,16 +833,16 @@ struct ReMatch { static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_method<-1>(type, "__init__", CPP_NOT_IMPLEMENTED()); - vm->bind_method<0>(type, "start", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).start))); - vm->bind_method<0>(type, "end", CPP_LAMBDA(VAR(CAST(ReMatch&, args[0]).end))); + vm->bind_method<0>(type, "start", CPP_LAMBDA(VAR(_CAST(ReMatch&, args[0]).start))); + vm->bind_method<0>(type, "end", CPP_LAMBDA(VAR(_CAST(ReMatch&, args[0]).end))); vm->bind_method<0>(type, "span", [](VM* vm, ArgsView args) { - auto& self = CAST(ReMatch&, args[0]); + auto& self = _CAST(ReMatch&, args[0]); return VAR(Tuple({VAR(self.start), VAR(self.end)})); }); vm->bind_method<1>(type, "group", [](VM* vm, ArgsView args) { - auto& self = CAST(ReMatch&, args[0]); + auto& self = _CAST(ReMatch&, args[0]); int index = CAST(int, args[1]); index = vm->normalized_index(index, self.m.size()); return VAR(self.m[index].str()); @@ -888,13 +912,13 @@ struct Random{ vm->bind_static_method<0>(type, "__new__", CPP_LAMBDA(VAR_T(Random))); vm->bind_method<1>(type, "seed", [](VM* vm, ArgsView args) { - Random& self = CAST(Random&, args[0]); + Random& self = _CAST(Random&, args[0]); self.gen.seed(CAST(i64, args[1])); return vm->None; }); vm->bind_method<2>(type, "randint", [](VM* vm, ArgsView args) { - Random& self = CAST(Random&, args[0]); + Random& self = _CAST(Random&, args[0]); i64 a = CAST(i64, args[1]); i64 b = CAST(i64, args[2]); std::uniform_int_distribution dis(a, b); @@ -902,13 +926,13 @@ struct Random{ }); vm->bind_method<0>(type, "random", [](VM* vm, ArgsView args) { - Random& self = CAST(Random&, args[0]); + Random& self = _CAST(Random&, args[0]); std::uniform_real_distribution dis(0.0, 1.0); return VAR(dis(self.gen)); }); vm->bind_method<2>(type, "uniform", [](VM* vm, ArgsView args) { - Random& self = CAST(Random&, args[0]); + Random& self = _CAST(Random&, args[0]); f64 a = CAST(f64, args[1]); f64 b = CAST(f64, args[2]); std::uniform_real_distribution dis(a, b); diff --git a/src/vm.h b/src/vm.h index ceb6579d..1bac852e 100644 --- a/src/vm.h +++ b/src/vm.h @@ -298,11 +298,6 @@ public: bind_func(std::forward(args)...); } - template - void _bind_methods(std::vector types, Str name, NativeFuncC fn) { - for(auto& type: types) bind_method(type, name, fn); - } - template void bind_builtin_func(Str name, NativeFuncC fn) { bind_func(builtins, name, fn); From 06068ab3e07a66d9017b97da28754ceee13d062a Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Wed, 3 May 2023 21:43:37 +0800 Subject: [PATCH 07/13] ... --- docs/features/basic.md | 4 +--- docs/features/goto.md | 10 ++++------ python/builtins.py | 4 +++- src/ceval.h | 2 +- src/compiler.h | 9 ++++----- src/lexer.h | 26 +++++++++++++++++++------- src/pocketpy.h | 20 +++++++++++++++----- tests/27_goto.py | 8 ++++---- 8 files changed, 51 insertions(+), 32 deletions(-) diff --git a/docs/features/basic.md b/docs/features/basic.md index 04312fa0..3f8b3fd0 100644 --- a/docs/features/basic.md +++ b/docs/features/basic.md @@ -1,11 +1,9 @@ --- icon: dot -order: 100 title: Basic Features +order: 100 --- -# basic - The following table shows the basic features of pkpy with respect to [cpython](https://github.com/python/cpython). The features marked with `YES` are supported, and the features marked with `NO` are not supported. diff --git a/docs/features/goto.md b/docs/features/goto.md index e694ae43..1bbc8f3d 100644 --- a/docs/features/goto.md +++ b/docs/features/goto.md @@ -3,8 +3,6 @@ icon: dot title: Goto Statement --- -# goto/label - pkpy supports `goto` and `label` just like C. You are allowed to change the control flow unconditionally. ## Syntax @@ -12,8 +10,8 @@ pkpy supports `goto` and `label` just like C. You are allowed to change the cont Labels are named a dot `.` and an identifier. ``` -goto . -label . +$goto +$label ``` ## Example @@ -22,7 +20,7 @@ label . for i in range(10): for j in range(10): for k in range(10): - goto .exit + $goto exit -label .exit +$label exit ``` diff --git a/python/builtins.py b/python/builtins.py index ddce6c49..a3275a6d 100644 --- a/python/builtins.py +++ b/python/builtins.py @@ -1,6 +1,8 @@ +import sys as _sys + def print(*args, sep=' ', end='\n'): s = sep.join([str(i) for i in args]) - __sys_stdout_write(s + end) + _sys.stdout.write(s + end) def round(x, ndigits=0): assert ndigits >= 0 diff --git a/src/ceval.h b/src/ceval.h index 969a886a..7b1d08e0 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -501,7 +501,7 @@ __NEXT_STEP:; TOP()->attr().set(_name, _0); DISPATCH(); /*****************************************/ - // // TODO: using "goto" inside with block may cause __exit__ not called + // TODO: using "goto" inside with block may cause __exit__ not called TARGET(WITH_ENTER) call_method(POPX(), __enter__); DISPATCH(); diff --git a/src/compiler.h b/src/compiler.h index 8a98486b..6e930394 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -795,17 +795,16 @@ __SUBSCR_END: ctx()->emit(OP_WITH_EXIT, BC_NOARG, prev().line); } break; /*************************************************/ - // TODO: refactor goto/label use special $ syntax - case TK("label"): { + case TK("$label"): { if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE"); - consume(TK(".")); consume(TK("@id")); + consume(TK("@id")); bool ok = ctx()->add_label(prev().str()); if(!ok) SyntaxError("label " + prev().str().escape() + " already exists"); consume_end_stmt(); } break; - case TK("goto"): + case TK("$goto"): if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE"); - consume(TK(".")); consume(TK("@id")); + consume(TK("@id")); ctx()->emit(OP_GOTO, StrName(prev().str()).index, prev().line); consume_end_stmt(); break; diff --git a/src/lexer.h b/src/lexer.h index 270c7c1d..db105087 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -21,10 +21,11 @@ constexpr const char* kTokens[] = { /*****************************************/ ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=", + /** SPEC_BEGIN **/ + "$goto", "$label", /** KW_BEGIN **/ "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", "None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally", - "goto", "label", // extended keywords, not available in cpython "while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise" }; @@ -38,13 +39,7 @@ constexpr TokenIndex TK(const char token[]) { while(*i && *j && *i == *j) { i++; j++;} if(*i == *j) return k; } -#ifdef __GNUC__ - // for old version of gcc, it is not smart enough to ignore FATAL_ERROR() - // so we must do a normal return return 255; -#else - FATAL_ERROR(); -#endif } #define TK_STR(t) kTokens[t] @@ -125,6 +120,13 @@ struct Lexer { return true; } + bool match_string(const char* s){ + int s_len = strlen(s); + bool ok = strncmp(curr_char, s, s_len) == 0; + if(ok) for(int i=0; i_stdout) << (need_more_lines ? "... " : ">>> "); + vm->_stdout(vm, need_more_lines ? "... " : ">>> "); bool eof = false; std::string line = pkpy::getline(&eof); if(eof) break; diff --git a/src/pocketpy.h b/src/pocketpy.h index a8a9b5ab..70c0451e 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -770,12 +770,12 @@ inline void add_module_sys(VM* vm){ vm->setattr(mod, "stderr", stderr_); vm->bind_func<1>(stdout_, "write", [](VM* vm, ArgsView args) { - (*vm->_stdout) << CAST(Str&, args[0]).sv(); + vm->_stdout(vm, CAST(Str&, args[0])); return vm->None; }); vm->bind_func<1>(stderr_, "write", [](VM* vm, ArgsView args) { - (*vm->_stderr) << CAST(Str&, args[0]).sv(); + vm->_stderr(vm, CAST(Str&, args[0])); return vm->None; }); } @@ -828,7 +828,7 @@ inline void add_module_dis(VM* vm){ PyObject* f = args[0]; if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, args[0]).func; CodeObject_ code = CAST(Function&, f).decl->code; - (*vm->_stdout) << vm->disassemble(code); + vm->_stdout(vm, vm->disassemble(code)); return vm->None; }); } @@ -1105,15 +1105,9 @@ extern "C" { } __EXPORT - pkpy::VM* pkpy_new_vm(bool use_stdio=true, bool enable_os=true){ - pkpy::VM* p = new pkpy::VM(use_stdio, enable_os); + pkpy::VM* pkpy_new_vm(bool enable_os=true){ + pkpy::VM* p = new pkpy::VM(enable_os); _pk_deleter_map[p] = [](void* p){ delete (pkpy::VM*)p; }; return p; } - - __EXPORT - char* pkpy_vm_read_output(pkpy::VM* vm){ - std::string json = vm->read_output(); - return strdup(json.c_str()); - } } \ No newline at end of file diff --git a/src/repl.h b/src/repl.h index f766257a..3932755f 100644 --- a/src/repl.h +++ b/src/repl.h @@ -49,10 +49,10 @@ protected: VM* vm; public: REPL(VM* vm) : vm(vm){ - (*vm->_stdout) << ("pocketpy " PK_VERSION " (" __DATE__ ", " __TIME__ ") "); - (*vm->_stdout) << "[" << std::to_string(sizeof(void*) * 8) << " bit]" "\n"; - (*vm->_stdout) << ("https://github.com/blueloveTH/pocketpy" "\n"); - (*vm->_stdout) << ("Type \"exit()\" to exit." "\n"); + vm->_stdout(vm, "pocketpy " PK_VERSION " (" __DATE__ ", " __TIME__ ") "); + vm->_stdout(vm, fmt("[", sizeof(void*)*8, " bit]" "\n")); + vm->_stdout(vm, "https://github.com/blueloveTH/pocketpy" "\n"); + vm->_stdout(vm, "Type \"exit()\" to exit." "\n"); } bool input(std::string line){ diff --git a/src/vm.h b/src/vm.h index 1bac852e..ead7b57b 100644 --- a/src/vm.h +++ b/src/vm.h @@ -73,6 +73,8 @@ struct FrameId{ Frame* operator->() const { return &data->operator[](index); } }; +typedef void(*PrintFunc)(VM*, const Str&); + class VM { VM* vm; // self reference for simplify code public: @@ -93,10 +95,8 @@ public: PyObject* StopIteration; PyObject* _main; // __main__ module - std::stringstream _stdout_buffer; - std::stringstream _stderr_buffer; - std::ostream* _stdout; - std::ostream* _stderr; + PrintFunc _stdout; + PrintFunc _stderr; bool _initialized; @@ -109,31 +109,16 @@ public: const bool enable_os; - VM(bool use_stdio=true, bool enable_os=true) : heap(this), enable_os(enable_os) { + VM(bool enable_os=true) : heap(this), enable_os(enable_os) { this->vm = this; - this->_stdout = use_stdio ? &std::cout : &_stdout_buffer; - this->_stderr = use_stdio ? &std::cerr : &_stderr_buffer; + _stdout = [](VM* vm, const Str& s) { std::cout << s; }; + _stderr = [](VM* vm, const Str& s) { std::cerr << s; }; callstack.reserve(8); _initialized = false; init_builtin_types(); _initialized = true; } - bool is_stdio_used() const { return _stdout == &std::cout; } - - std::string read_output(){ - if(is_stdio_used()) UNREACHABLE(); - std::stringstream* s_out = (std::stringstream*)(vm->_stdout); - std::stringstream* s_err = (std::stringstream*)(vm->_stderr); - pkpy::Str _stdout = s_out->str(); - pkpy::Str _stderr = s_err->str(); - std::stringstream ss; - ss << '{' << "\"stdout\": " << _stdout.escape(false); - ss << ", " << "\"stderr\": " << _stderr.escape(false) << '}'; - s_out->str(""); s_err->str(""); - return ss.str(); - } - FrameId top_frame() { #if DEBUG_EXTRA_CHECK if(callstack.empty()) FATAL_ERROR(); @@ -195,13 +180,13 @@ public: #endif return _exec(code, _module); }catch (const Exception& e){ - *_stderr << e.summary() << '\n'; - + _stderr(this, e.summary() + "\n"); } #if !DEBUG_FULL_EXCEPTION catch (const std::exception& e) { - *_stderr << "An std::exception occurred! It could be a bug.\n"; - *_stderr << e.what() << '\n'; + _stderr(this, "An std::exception occurred! It could be a bug.\n"); + _stderr(this, e.what()); + _stderr(this, "\n"); } #endif callstack.clear(); From db2492829bfc6cd6af6516083a9cbee5dcdd8975 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 4 May 2023 16:44:45 +0800 Subject: [PATCH 11/13] ... --- docs/C-API/vm.md | 12 ++---------- docs/quick-start/installation.md | 6 ++---- web/index.js | 2 +- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/docs/C-API/vm.md b/docs/C-API/vm.md index bf08c691..89a6fb37 100644 --- a/docs/C-API/vm.md +++ b/docs/C-API/vm.md @@ -3,7 +3,7 @@ title: VM icon: dot order: 10 --- -#### `VM* pkpy_new_vm(bool use_stdio)` +#### `VM* pkpy_new_vm()` Create a virtual machine. @@ -27,12 +27,4 @@ Run a given source on a virtual machine. Get a global variable of a virtual machine. Return `__repr__` of the result. -If the variable is not found, return `nullptr`. - -#### `char* pkpy_vm_read_output(VM* vm)` - -Read the standard output and standard error as string of a virtual machine. -The `vm->use_stdio` should be `false`. -After this operation, both stream will be cleared. - -Return a json representing the result. \ No newline at end of file +If the variable is not found, return `nullptr`. \ No newline at end of file diff --git a/docs/quick-start/installation.md b/docs/quick-start/installation.md index 48cc4ba2..f58780e3 100644 --- a/docs/quick-start/installation.md +++ b/docs/quick-start/installation.md @@ -56,14 +56,12 @@ You need to use the C++ `new` operator to create a `VM` instance. VM* vm = new VM(); ``` -The constructor can take 2 extra parameters. +The constructor can take 1 extra parameters. -#### `VM(bool use_stdio=true, bool enable_os=true)` +#### `VM(bool enable_os=true)` -+ `use_stdio`, if `true`, the `print()` function outputs string to `stdout`. Error messages will be send to `stderr`; If `false`, they will be sent to an internal buffer. In the latter case, you need to read them via `read_output` manually. + `enable_os`, whether to enable OS-related features or not. This setting controls the availability of some priviledged modules such os `io` and `os` as well as builtin function `open`. - When you are done with the `VM` instance, you need to use the C++ `delete` operator to free the memory. ```cpp diff --git a/web/index.js b/web/index.js index 8fd21e2c..2621b788 100644 --- a/web/index.js +++ b/web/index.js @@ -113,7 +113,7 @@ var Module = { term.write(text + "\r\n"); }, 'onRuntimeInitialized': function(text) { - var vm = Module.ccall('pkpy_new_vm', 'number', ['boolean', 'boolean'], [true, true]); + var vm = Module.ccall('pkpy_new_vm', 'number', ['boolean'], [true]); repl = Module.ccall('pkpy_new_repl', 'number', ['number'], [vm]); term.write(need_more_lines ? "... " : ">>> "); }, From 3429068d6393864eaf507c5607e729c3fe68b246 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 4 May 2023 16:51:44 +0800 Subject: [PATCH 12/13] ... --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index b3ef2a6c..50499eb2 100644 --- a/build.py +++ b/build.py @@ -50,7 +50,7 @@ if "web" in sys.argv: os.system(r''' rm -rf web/lib/ mkdir -p web/lib/ -em++ src/main.cpp -fno-rtti -fexceptions -O3 -sEXPORTED_FUNCTIONS=_pkpy_delete,_pkpy_new_repl,_pkpy_repl_input,_pkpy_new_vm,_pkpy_vm_add_module,_pkpy_vm_eval,_pkpy_vm_exec,_pkpy_vm_get_global,_pkpy_vm_read_output -sEXPORTED_RUNTIME_METHODS=ccall -o web/lib/pocketpy.js +em++ src/main.cpp -fno-rtti -fexceptions -O3 -sEXPORTED_FUNCTIONS=_pkpy_delete,_pkpy_new_repl,_pkpy_repl_input,_pkpy_new_vm,_pkpy_vm_add_module,_pkpy_vm_eval,_pkpy_vm_exec,_pkpy_vm_get_global -sEXPORTED_RUNTIME_METHODS=ccall -o web/lib/pocketpy.js ''') DONE() From 8489bbb2c5e2ef55b77f194491a8df9d564ce94a Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 4 May 2023 21:50:01 +0800 Subject: [PATCH 13/13] ... --- src/io.h | 63 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/src/io.h b/src/io.h index 0c055718..2e1dfe30 100644 --- a/src/io.h +++ b/src/io.h @@ -6,8 +6,8 @@ #if PK_ENABLE_OS -#include #include +#include namespace pkpy{ @@ -15,9 +15,14 @@ inline int _ = set_read_file_cwd([](const Str& name){ std::filesystem::path path(name.sv()); bool exists = std::filesystem::exists(path); if(!exists) return Bytes(); - std::ifstream ifs(path, std::ios::binary); - std::vector buffer(std::istreambuf_iterator(ifs), {}); - ifs.close(); + std::string cname = name.str(); + FILE* fp = fopen(cname.c_str(), "rb"); + if(!fp) return Bytes(); + fseek(fp, 0, SEEK_END); + std::vector buffer(ftell(fp)); + fseek(fp, 0, SEEK_SET); + fread(buffer.data(), 1, buffer.size(), fp); + fclose(fp); return Bytes(std::move(buffer)); }); @@ -26,42 +31,34 @@ struct FileIO { Str file; Str mode; - std::fstream _fs; + FILE* fp; bool is_text() const { return mode != "rb" && mode != "wb" && mode != "ab"; } - FileIO(VM* vm, Str file, Str mode): file(file), mode(mode) { - std::ios_base::openmode extra = static_cast(0); - if(mode == "rb" || mode == "wb" || mode == "ab"){ - extra |= std::ios::binary; - } - if(mode == "rt" || mode == "r" || mode == "rb"){ - _fs.open(file.str(), std::ios::in | extra); - }else if(mode == "wt" || mode == "w" || mode == "wb"){ - _fs.open(file.str(), std::ios::out | extra); - }else if(mode == "at" || mode == "a" || mode == "ab"){ - _fs.open(file.str(), std::ios::app | extra); - }else{ - vm->ValueError("invalid mode"); - } - if(!_fs.is_open()) vm->IOError(strerror(errno)); + FileIO(VM* vm, std::string file, std::string mode): file(file), mode(mode) { + fp = fopen(file.c_str(), mode.c_str()); + if(!fp) vm->IOError(strerror(errno)); + } + + void close(){ + if(fp == nullptr) return; + fclose(fp); + fp = nullptr; } static void _register(VM* vm, PyObject* mod, PyObject* type){ vm->bind_static_method<2>(type, "__new__", [](VM* vm, ArgsView args){ return VAR_T(FileIO, - vm, CAST(Str, args[0]), CAST(Str, args[1]) + vm, CAST(Str&, args[0]).str(), CAST(Str&, args[1]).str() ); }); vm->bind_method<0>(type, "read", [](VM* vm, ArgsView args){ FileIO& io = CAST(FileIO&, args[0]); - std::vector buffer; - while(true){ - char c = io._fs.get(); - if(io._fs.eof()) break; - buffer.push_back(c); - } + fseek(io.fp, 0, SEEK_END); + std::vector buffer(ftell(io.fp)); + fseek(io.fp, 0, SEEK_SET); + fread(buffer.data(), 1, buffer.size(), io.fp); Bytes b(std::move(buffer)); if(io.is_text()) return VAR(Str(b.str())); return VAR(std::move(b)); @@ -69,23 +66,25 @@ struct FileIO { vm->bind_method<1>(type, "write", [](VM* vm, ArgsView args){ FileIO& io = CAST(FileIO&, args[0]); - if(io.is_text()) io._fs << CAST(Str&, args[1]); - else{ + if(io.is_text()){ + Str& s = CAST(Str&, args[1]); + fwrite(s.data, 1, s.length(), io.fp); + }else{ Bytes& buffer = CAST(Bytes&, args[1]); - io._fs.write(buffer.data(), buffer.size()); + fwrite(buffer.data(), 1, buffer.size(), io.fp); } return vm->None; }); vm->bind_method<0>(type, "close", [](VM* vm, ArgsView args){ FileIO& io = CAST(FileIO&, args[0]); - io._fs.close(); + io.close(); return vm->None; }); vm->bind_method<0>(type, "__exit__", [](VM* vm, ArgsView args){ FileIO& io = CAST(FileIO&, args[0]); - io._fs.close(); + io.close(); return vm->None; });