diff --git a/docs/C-API/vm.md b/docs/C-API/vm.md index 4164f294..cccd1a98 100644 --- a/docs/C-API/vm.md +++ b/docs/C-API/vm.md @@ -30,4 +30,11 @@ Delete a virtual machine. #### `void pkpy_delete_repl(REPL* repl)` -Delete a REPL. \ No newline at end of file +Delete a REPL. + +#### `void pkpy_vm_compile(VM* vm, const char* source, const char* filename, int mode, bool* ok, char* res)` + +Compile a source into bytecode and serialize it into a string. + ++ `ok`: whether the compilation is successful. ++ `res`: if `ok` is true, `res` is the bytecode string, otherwise it is the error message. \ No newline at end of file diff --git a/docs/LuaC-API/variables.md b/docs/LuaC-API/variables.md index d8ecb99e..ec471ea4 100644 --- a/docs/LuaC-API/variables.md +++ b/docs/LuaC-API/variables.md @@ -24,3 +24,7 @@ A wrapper of `OP_LOAD_ATTR` bytecode. #### `bool pkpy_setattr(pkpy_vm*, const char* name)` A wrapper of `OP_STORE_ATTR` bytecode. + +#### `bool pkpy_eval(pkpy_vm*, const char* code)` + +Evaluate the code and push the result to the top of the stack. \ No newline at end of file diff --git a/src/codeobject.h b/src/codeobject.h index 6919fc02..f7614226 100644 --- a/src/codeobject.h +++ b/src/codeobject.h @@ -47,6 +47,87 @@ struct CodeBlock { type(type), parent(parent), for_loop_depth(for_loop_depth), start(start), end(-1) {} }; + +struct CodeObjectSerializer{ + std::string buffer; + int depth = 0; + + std::set names; + + static const char END = '\n'; + + CodeObjectSerializer(){ + write_str(PK_VERSION); + } + + void write_int(i64 v){ + buffer += 'i'; + buffer += std::to_string(v); + buffer += END; + } + + void write_float(f64 v){ + buffer += 'f'; + buffer += std::to_string(v); + buffer += END; + } + + void write_str(const Str& v){ + buffer += 's'; + buffer += v.escape(false).str(); + buffer += END; + } + + void write_bool(bool v){ + buffer += 'b'; + buffer += v ? '1' : '0'; + buffer += END; + } + + void write_begin_mark(){ + buffer += '['; + buffer += END; + depth++; + } + + void write_name(StrName name){ + PK_ASSERT(StrName::is_valid(name.index)); + buffer += 'n'; + buffer += std::to_string(name.index); + buffer += END; + names.insert(name); + } + + void write_end_mark(){ + buffer += ']'; + buffer += END; + depth--; + PK_ASSERT(depth >= 0); + } + + template + void write_bytes(T v){ + static_assert(std::is_trivially_copyable::value); + buffer += 'm'; + buffer.append((const char*)&v, sizeof(T)); + buffer += END; + } + + void write_object(VM* vm, PyObject* obj); + void write_code(VM* vm, const CodeObject* co); + + std::string str(){ + PK_ASSERT(depth == 0); + for(auto name: names){ + PK_ASSERT(StrName::is_valid(name.index)); + write_name(name); + write_str(name.sv()); + } + return std::move(buffer); + } +}; + + struct CodeObject { shared_ptr src; Str name; @@ -68,6 +149,84 @@ struct CodeObject { for(PyObject* v : consts) OBJ_MARK(v); for(auto& decl: func_decls) decl->_gc_mark(); } + + void write(VM* vm, CodeObjectSerializer& ss) const{ + ss.write_begin_mark(); // [ + ss.write_str(src->source); // src->source + ss.write_str(src->filename); // src->filename + ss.write_int(src->mode); // src->mode + ss.write_end_mark(); // ] + ss.write_str(name); // name + ss.write_bool(is_generator); // is_generator + ss.write_begin_mark(); // [ + for(Bytecode bc: codes){ + if(StrName::is_valid(bc.arg)){ + // std::cout << bc.arg << StrName(bc.arg).sv() << std::endl; + ss.names.insert(StrName(bc.arg)); + } + ss.write_bytes(bc); + } + ss.write_end_mark(); // ] + ss.write_begin_mark(); // [ + for(int line: lines){ + ss.write_int(line); // line + } + ss.write_end_mark(); // ] + ss.write_begin_mark(); // [ + for(PyObject* o: consts){ + ss.write_object(vm, o); + } + ss.write_end_mark(); // ] + ss.write_begin_mark(); // [ + for(StrName name: varnames){ + ss.write_name(name); // name + } + ss.write_end_mark(); // ] + ss.write_begin_mark(); // [ + for(CodeBlock block: blocks){ + ss.write_bytes(block); // block + } + ss.write_end_mark(); // ] + ss.write_begin_mark(); // [ + for(auto& label: labels.items()){ + ss.write_name(label.first); // label.first + ss.write_int(label.second); // label.second + } + ss.write_end_mark(); // ] + ss.write_begin_mark(); // [ + for(auto& decl: func_decls){ + ss.write_code(vm, decl->code.get()); // decl->code + ss.write_begin_mark(); // [ + for(int arg: decl->args) ss.write_int(arg); + ss.write_end_mark(); // ] + + ss.write_begin_mark(); // [ + for(auto kw: decl->kwargs){ + ss.write_int(kw.key); // kw.key + ss.write_object(vm, kw.value); // kw.value + } + ss.write_end_mark(); // ] + + ss.write_int(decl->starred_arg); + ss.write_int(decl->starred_kwarg); + ss.write_bool(decl->nested); + } + ss.write_end_mark(); // ] + } + + Str serialize(VM* vm) const{ + CodeObjectSerializer ss; + ss.write_code(vm, this); + return ss.str(); + } }; +inline void CodeObjectSerializer::write_code(VM* vm, const CodeObject* co){ + buffer += '('; + buffer += END; + co->write(vm, *this); + buffer += ')'; + buffer += END; +} + } // namespace pkpy \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h index f45c6d9e..b3093c91 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -1329,19 +1329,27 @@ inline void add_module_traceback(VM* vm){ inline void add_module_dis(VM* vm){ PyObject* mod = vm->new_module("dis"); - vm->bind_func<1>(mod, "dis", [](VM* vm, ArgsView args) { - if(is_type(args[0], vm->tp_str)){ - const Str& source = CAST(Str, args[0]); - CodeObject_ code = vm->compile(source, "", EXEC_MODE); - vm->_stdout(vm, vm->disassemble(code)); - return vm->None; + + static const auto get_code = [](VM* vm, PyObject* obj)->CodeObject_{ + if(is_type(obj, vm->tp_str)){ + const Str& source = CAST(Str, obj); + return vm->compile(source, "", EXEC_MODE); } - PyObject* f = args[0]; - if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, args[0]).func; - CodeObject_ code = CAST(Function&, f).decl->code; + PyObject* f = obj; + if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, obj).func; + return CAST(Function&, f).decl->code; + }; + + vm->bind_func<1>(mod, "dis", [](VM* vm, ArgsView args) { + CodeObject_ code = get_code(vm, args[0]); vm->_stdout(vm, vm->disassemble(code)); return vm->None; }); + + vm->bind_func<1>(mod, "_s", [](VM* vm, ArgsView args) { + CodeObject_ code = get_code(vm, args[0]); + return VAR(code->serialize(vm)); + }); } inline void add_module_gc(VM* vm){ @@ -1454,6 +1462,21 @@ extern "C" { vm->exec(source, filename, (pkpy::CompileMode)mode, mod); } + PK_LEGACY_EXPORT + void pkpy_vm_compile(pkpy::VM* vm, const char* source, const char* filename, int mode, bool* ok, char* res){ + try{ + pkpy::CodeObject_ code = vm->compile(source, filename, (pkpy::CompileMode)mode); + res = code->serialize(vm).c_str_dup(); + *ok = true; + }catch(pkpy::Exception& e){ + *ok = false; + res = e.summary().c_str_dup(); + }catch(...){ + *ok = false; + res = strdup("unknown error"); + } + } + PK_LEGACY_EXPORT pkpy::REPL* pkpy_new_repl(pkpy::VM* vm){ pkpy::REPL* p = new pkpy::REPL(vm); diff --git a/src/str.h b/src/str.h index d5ac7bf6..b0ef48e1 100644 --- a/src/str.h +++ b/src/str.h @@ -355,6 +355,11 @@ struct StrName { return os << sn.sv(); } + static bool is_valid(int index) { + // check _r_interned[index-1] is valid + return index > 0 && index <= _r_interned.size(); + } + Str escape() const { return _r_interned[index-1].escape(); } diff --git a/src/vm.h b/src/vm.h index 3955f2d3..8155df61 100644 --- a/src/vm.h +++ b/src/vm.h @@ -1564,4 +1564,11 @@ inline void Dict::_probe(PyObject *key, bool &ok, int &i) const{ } } +inline void CodeObjectSerializer::write_object(VM *vm, PyObject *obj){ + buffer += 'o'; + PyObject* s = vm->py_repr(obj); + write_str(CAST(Str&, s)); + buffer += END; +} + } // namespace pkpy \ No newline at end of file