From 8bb3cefb347a55695632403a18994b8b5ad19c81 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Fri, 14 Apr 2023 20:33:54 +0800 Subject: [PATCH] add a fast path test --- run_profile.sh | 2 +- src/ceval.h | 18 +++++++-- src/frame.h | 4 ++ src/tuplelist.h | 36 ++++++++++++----- src/vm.h | 105 ++++++++++++++++++++++++++---------------------- 5 files changed, 103 insertions(+), 62 deletions(-) diff --git a/run_profile.sh b/run_profile.sh index 388c44b3..c7bea6c3 100644 --- a/run_profile.sh +++ b/run_profile.sh @@ -1,5 +1,5 @@ clang++ -pg -O2 -std=c++17 -fno-rtti -stdlib=libc++ -Wall -o pocketpy src/main.cpp -time ./pocketpy benchmarks/primes.py +time ./pocketpy benchmarks/fib.py mv benchmarks/gmon.out . gprof pocketpy gmon.out > gprof.txt rm gmon.out \ No newline at end of file diff --git a/src/ceval.h b/src/ceval.h index db0f2b2e..9005a173 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -343,14 +343,24 @@ __NEXT_STEP:; TARGET(CALL) TARGET(CALL_UNPACK) { int ARGC = byte.arg; - + PyObject* callable = frame->top_n(ARGC+1); bool method_call = frame->top_n(ARGC) != _py_null; - if(method_call) ARGC++; // add self into args - Args args = frame->popx_n_reversed(ARGC); + + // fast path + if(byte.op==OP_CALL && is_type(callable, tp_function)){ + ArgsView args = frame->top_n_view(ARGC + int(method_call)); + PyObject* ret = _py_call(callable, args, {}); + frame->pop_n(ARGC + 2); + if(ret == nullptr) goto __PY_OP_CALL; + else frame->push(ret); // a generator + DISPATCH(); + } + + Args args = frame->popx_n_reversed(ARGC + int(method_call)); if(!method_call) frame->pop(); if(byte.op == OP_CALL_UNPACK) unpack_args(args); - PyObject* callable = frame->popx(); + frame->pop(); PyObject* ret = call(callable, std::move(args), no_arg(), true); if(ret == _py_op_call) { __ret=ret; goto __PY_OP_CALL; } frame->push(ret); diff --git a/src/frame.h b/src/frame.h index 84edd3f8..44c6b327 100644 --- a/src/frame.h +++ b/src/frame.h @@ -254,6 +254,10 @@ struct Frame { _data.pop_back_n(n); } + ArgsView top_n_view(int n){ + return ArgsView(_data.end()-n, _data.end()); + } + void _gc_mark() const { // do return if this frame has been moved if(_data._data == nullptr) return; diff --git a/src/tuplelist.h b/src/tuplelist.h index 0abc993f..dc1ad3c6 100644 --- a/src/tuplelist.h +++ b/src/tuplelist.h @@ -9,7 +9,7 @@ namespace pkpy { using List = pod_vector; -class Args { +class Tuple { PyObject** _args; int _size; @@ -19,26 +19,26 @@ class Args { } public: - Args(int n){ _alloc(n); } + Tuple(int n){ _alloc(n); } - Args(const Args& other){ + Tuple(const Tuple& other){ _alloc(other._size); for(int i=0; i<_size; i++) _args[i] = other._args[i]; } - Args(Args&& other) noexcept { + Tuple(Tuple&& other) noexcept { this->_args = other._args; this->_size = other._size; other._args = nullptr; other._size = 0; } - Args(std::initializer_list list) : Args(list.size()){ + Tuple(std::initializer_list list) : Tuple(list.size()){ int i = 0; for(PyObject* p : list) _args[i++] = p; } - Args(List&& other) noexcept : Args(other.size()){ + Tuple(List&& other) noexcept : Tuple(other.size()){ for(int i=0; i<_size; i++) _args[i] = other[i]; other.clear(); } @@ -46,7 +46,7 @@ public: PyObject*& operator[](int i){ return _args[i]; } PyObject* operator[](int i) const { return _args[i]; } - Args& operator=(Args&& other) noexcept { + Tuple& operator=(Tuple&& other) noexcept { if(_args!=nullptr) pool64.dealloc(_args); this->_args = other._args; this->_size = other._size; @@ -57,6 +57,9 @@ public: int size() const { return _size; } + PyObject** begin() const { return _args; } + PyObject** end() const { return _args + _size; } + List to_list() noexcept { List ret(_size); // TODO: use move/memcpy @@ -73,14 +76,29 @@ public: if(old_args!=nullptr) pool64.dealloc(old_args); } - ~Args(){ if(_args!=nullptr) pool64.dealloc(_args); } + ~Tuple(){ if(_args!=nullptr) pool64.dealloc(_args); } }; +using Args = Tuple; inline const Args& no_arg() { static const Args _zero(0); return _zero; } -typedef Args Tuple; +// a lightweight view for function args, it does not own the memory +struct ArgsView{ + PyObject** _begin; + PyObject** _end; + + ArgsView(PyObject** begin, PyObject** end) : _begin(begin), _end(end) {} + ArgsView(const Tuple& t) : _begin(t.begin()), _end(t.end()) {} + ArgsView(): _begin(nullptr), _end(nullptr) {} + + PyObject** begin() const { return _begin; } + PyObject** end() const { return _end; } + int size() const { return _end - _begin; } + bool empty() const { return _begin == _end; } + PyObject* operator[](int i) const { return _begin[i]; } +}; } // namespace pkpy \ No newline at end of file diff --git a/src/vm.h b/src/vm.h index 0abeb635..68764d70 100644 --- a/src/vm.h +++ b/src/vm.h @@ -338,6 +338,7 @@ public: Str disassemble(CodeObject_ co); void init_builtin_types(); PyObject* call(PyObject* callable, Args args, const Args& kwargs, bool opCall); + PyObject* _py_call(PyObject* callable, ArgsView args, ArgsView kwargs); void unpack_args(Args& args); PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true); PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false); @@ -687,6 +688,59 @@ inline void VM::init_builtin_types(){ for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash(); } +inline PyObject* VM::_py_call(PyObject* callable, ArgsView args, ArgsView kwargs){ + // callable is a `function` object + const Function& fn = CAST(Function&, callable); + const CodeObject* co = fn.decl->code.get(); + FastLocals locals(co); + + int i = 0; + for(int index: fn.decl->args){ + if(i < args.size()){ + locals[index] = args[i++]; + }else{ + StrName name = co->varnames[index]; + TypeError(fmt("missing positional argument ", name.escape())); + } + } + + // prepare kwdefaults + for(auto& kv: fn.decl->kwargs) locals[kv.key] = kv.value; + + // handle *args + if(fn.decl->starred_arg != -1){ + List vargs; // handle *args + while(i < args.size()) vargs.push_back(args[i++]); + locals[fn.decl->starred_arg] = VAR(Tuple(std::move(vargs))); + }else{ + // kwdefaults override + for(auto& kv: fn.decl->kwargs){ + if(i < args.size()){ + locals[kv.key] = args[i++]; + }else{ + break; + } + } + if(i < args.size()) TypeError("too many arguments"); + } + + for(int i=0; iname, "()")); + } + } + PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; + if(co->is_generator){ + return PyIter(Generator(this, Frame(co, _module, std::move(locals), fn._closure))); + } + _push_new_frame(co, _module, std::move(locals), fn._closure); + return nullptr; +} + // TODO: callable/args here may be garbage collected accidentally inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){ if(is_type(callable, tp_bound_method)){ @@ -700,54 +754,9 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo if(kwargs.size() != 0) TypeError("native_function does not accept keyword arguments"); return f(this, args); } else if(is_type(callable, tp_function)){ - const Function& fn = CAST(Function&, callable); - const CodeObject* co = fn.decl->code.get(); - FastLocals locals(co); - - int i = 0; - for(int index: fn.decl->args){ - if(i < args.size()){ - locals[index] = args[i++]; - }else{ - StrName name = co->varnames[index]; - TypeError(fmt("missing positional argument ", name.escape())); - } - } - - // prepare kwdefaults - for(auto& kv: fn.decl->kwargs) locals[kv.key] = kv.value; - - // handle *args - if(fn.decl->starred_arg != -1){ - List vargs; // handle *args - while(i < args.size()) vargs.push_back(args[i++]); - locals[fn.decl->starred_arg] = VAR(Tuple(std::move(vargs))); - }else{ - // kwdefaults override - for(auto& kv: fn.decl->kwargs){ - if(i < args.size()){ - locals[kv.key] = args[i++]; - }else{ - break; - } - } - if(i < args.size()) TypeError("too many arguments"); - } - - for(int i=0; iname, "()")); - } - } - PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module; - if(co->is_generator){ - return PyIter(Generator(this, Frame(co, _module, std::move(locals), fn._closure))); - } - _push_new_frame(co, _module, std::move(locals), fn._closure); + // ret is nullptr or a generator + PyObject* ret = _py_call(callable, args, kwargs); + if(ret != nullptr) return ret; if(opCall) return _py_op_call; return _run_top_frame(); }