refactor importer

This commit is contained in:
blueloveTH 2023-08-28 04:05:24 +08:00
parent eac3230a71
commit 0c80a626ba
8 changed files with 139 additions and 103 deletions

View File

@ -106,8 +106,7 @@ OPCODE(UNARY_INVERT)
OPCODE(GET_ITER) OPCODE(GET_ITER)
OPCODE(FOR_ITER) OPCODE(FOR_ITER)
/**************************/ /**************************/
OPCODE(IMPORT_NAME) OPCODE(IMPORT_PATH)
OPCODE(IMPORT_NAME_REL)
OPCODE(POP_IMPORT_STAR) OPCODE(POP_IMPORT_STAR)
/**************************/ /**************************/
OPCODE(UNPACK_SEQUENCE) OPCODE(UNPACK_SEQUENCE)

View File

@ -72,8 +72,9 @@ struct Str{
Str upper() const; Str upper() const;
Str escape(bool single_quote=true) const; Str escape(bool single_quote=true) const;
int index(const Str& sub, int start=0) const; int index(const Str& sub, int start=0) const;
Str replace(char old, char new_) const;
Str replace(const Str& old, const Str& new_, int count=-1) const; Str replace(const Str& old, const Str& new_, int count=-1) const;
std::vector<std::string_view> split(const Str& sep) const; std::vector<std::string_view> split(const Str& sep, bool remove_empty) const;
/*************unicode*************/ /*************unicode*************/
int _unicode_index_to_byte(int i) const; int _unicode_index_to_byte(int i) const;
@ -186,6 +187,7 @@ const StrName __enter__ = StrName::get("__enter__");
const StrName __exit__ = StrName::get("__exit__"); const StrName __exit__ = StrName::get("__exit__");
const StrName __name__ = StrName::get("__name__"); const StrName __name__ = StrName::get("__name__");
const StrName __all__ = StrName::get("__all__"); const StrName __all__ = StrName::get("__all__");
const StrName __package__ = StrName::get("__package__");
const StrName pk_id_add = StrName::get("add"); const StrName pk_id_add = StrName::get("add");
const StrName pk_id_set = StrName::get("set"); const StrName pk_id_set = StrName::get("set");

View File

@ -368,6 +368,7 @@ public:
void UnboundLocalError(StrName name){ _error("UnboundLocalError", fmt("local variable ", name.escape() + " referenced before assignment")); } void UnboundLocalError(StrName name){ _error("UnboundLocalError", fmt("local variable ", name.escape() + " referenced before assignment")); }
void KeyError(PyObject* obj){ _error("KeyError", PK_OBJ_GET(Str, py_repr(obj))); } void KeyError(PyObject* obj){ _error("KeyError", PK_OBJ_GET(Str, py_repr(obj))); }
void BinaryOptError(const char* op) { TypeError(fmt("unsupported operand type(s) for ", op)); } void BinaryOptError(const char* op) { TypeError(fmt("unsupported operand type(s) for ", op)); }
void ImportError(const Str& msg){ _error("ImportError", msg); }
void AttributeError(PyObject* obj, StrName name){ void AttributeError(PyObject* obj, StrName name){
// OBJ_NAME calls getattr, which may lead to a infinite recursion // OBJ_NAME calls getattr, which may lead to a infinite recursion
@ -409,31 +410,21 @@ public:
} }
struct ImportContext{ struct ImportContext{
// 0: normal; 1: __init__.py; 2: relative std::vector<StrName> pending;
std::vector<std::pair<StrName, int>> pending;
struct Temp{ struct Temp{
VM* vm; ImportContext* ctx;
StrName name; StrName name;
Temp(ImportContext* ctx, StrName name) : ctx(ctx), name(name){
Temp(VM* vm, StrName name, int type): vm(vm), name(name){ ctx->pending.push_back(name);
ImportContext* ctx = &vm->_import_context;
ctx->pending.emplace_back(name, type);
}
~Temp(){
ImportContext* ctx = &vm->_import_context;
ctx->pending.pop_back();
} }
~Temp(){ ctx->pending.pop_back(); }
}; };
Temp temp(VM* vm, StrName name, int type){ Temp scope(StrName name){ return {this, name}; }
return Temp(vm, name, type);
}
}; };
ImportContext _import_context; ImportContext _import_context;
PyObject* py_import(Str path, bool relative=false); PyObject* py_import(Str path, PyObject* _module);
~VM(); ~VM();
#if PK_DEBUG_CEVAL_STEP #if PK_DEBUG_CEVAL_STEP
@ -447,7 +438,7 @@ public:
bool py_bool(PyObject* obj); bool py_bool(PyObject* obj);
i64 py_hash(PyObject* obj); i64 py_hash(PyObject* obj);
PyObject* py_list(PyObject*); PyObject* py_list(PyObject*);
PyObject* new_module(StrName name); PyObject* new_module(Str name, Str package="");
Str disassemble(CodeObject_ co); Str disassemble(CodeObject_ co);
void init_builtin_types(); void init_builtin_types();
PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true); PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true);

View File

@ -590,13 +590,9 @@ __NEXT_STEP:;
} }
DISPATCH(); DISPATCH();
/*****************************************/ /*****************************************/
TARGET(IMPORT_NAME) TARGET(IMPORT_PATH)
_0 = co_consts[byte.arg]; _0 = co_consts[byte.arg];
PUSH(py_import(CAST(Str&, _0))); PUSH(py_import(CAST(Str&, _0), frame->_module));
DISPATCH();
TARGET(IMPORT_NAME_REL)
_0 = co_consts[byte.arg];
PUSH(py_import(CAST(Str&, _0), true));
DISPATCH(); DISPATCH();
TARGET(POP_IMPORT_STAR) { TARGET(POP_IMPORT_STAR) {
_0 = POPX(); // pop the module _0 = POPX(); // pop the module
@ -606,7 +602,7 @@ __NEXT_STEP:;
_name = StrName::get(CAST(Str&, key).sv()); _name = StrName::get(CAST(Str&, key).sv());
PyObject* value = _0->attr().try_get(_name); PyObject* value = _0->attr().try_get(_name);
if(value == nullptr){ if(value == nullptr){
_error("ImportError", fmt("cannot import name ", _name.escape())); ImportError(fmt("cannot import name ", _name.escape()));
}else{ }else{
frame->f_globals().set(_name, value); frame->f_globals().set(_name, value);
} }

View File

@ -479,7 +479,7 @@ __SUBSCR_END:
do { do {
consume(TK("@id")); consume(TK("@id"));
Str name = prev().str(); Str name = prev().str();
ctx()->emit(OP_IMPORT_NAME, ctx()->add_const(VAR(name)), prev().line); ctx()->emit(OP_IMPORT_PATH, ctx()->add_const(VAR(name)), prev().line);
if (match(TK("as"))) { if (match(TK("as"))) {
consume(TK("@id")); consume(TK("@id"));
name = prev().str(); name = prev().str();
@ -493,37 +493,45 @@ __SUBSCR_END:
// from a.b import c [as d] // from a.b import c [as d]
// from . import a [as b] // from . import a [as b]
// from .a import b [as c] // from .a import b [as c]
// from ..a import b [as c]
// from .a.b import c [as d] // from .a.b import c [as d]
// from xxx import * // from xxx import *
void Compiler::compile_from_import() { void Compiler::compile_from_import() {
if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope"); if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
Opcode op = OP_IMPORT_NAME; int dots = 0;
if(match(TK("."))) op = OP_IMPORT_NAME_REL;
std::vector<Str> parts;
if(op == OP_IMPORT_NAME_REL){ while(true){
switch(curr().type){
case TK("."): dots++; break;
case TK("..."): dots+=3; break;
default: goto __EAT_DOTS_END;
}
advance();
}
__EAT_DOTS_END:
std::stringstream ss;
for(int i=0; i<dots; i++) ss << '.';
if(dots > 0){
// @id is optional if dots > 0
if(match(TK("@id"))){ if(match(TK("@id"))){
parts.push_back(prev().str()); ss << prev().str();
while (match(TK("."))) { while (match(TK("."))) {
consume(TK("@id")); consume(TK("@id"));
parts.push_back(prev().str()); ss << "." << prev().str();
} }
} }
}else{ }else{
// @id is required if dots == 0
consume(TK("@id")); consume(TK("@id"));
parts.push_back(prev().str()); ss << prev().str();
while (match(TK("."))) { while (match(TK("."))) {
consume(TK("@id")); consume(TK("@id"));
parts.push_back(prev().str()); ss << "." << prev().str();
} }
} }
FastStrStream ss; ctx()->emit(OP_IMPORT_PATH, ctx()->add_const(VAR(ss.str())), prev().line);
for (int i=0; i<parts.size(); i++) {
if(i > 0) ss << ".";
ss << parts[i];
}
ctx()->emit(op, ctx()->add_const(VAR(ss.str())), prev().line);
consume(TK("import")); consume(TK("import"));
if (match(TK("*"))) { if (match(TK("*"))) {
@ -537,7 +545,6 @@ __SUBSCR_END:
ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE); ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
consume(TK("@id")); consume(TK("@id"));
Str name = prev().str(); Str name = prev().str();
// module's __getattr__ should be customized or use a new opcode...
ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line); ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
if (match(TK("as"))) { if (match(TK("as"))) {
consume(TK("@id")); consume(TK("@id"));

View File

@ -171,18 +171,18 @@ void init_builtins(VM* _vm) {
if(ext == ".so" || ext == ".dll" || ext == ".dylib"){ if(ext == ".so" || ext == ".dll" || ext == ".dylib"){
dylib_entry_t entry = load_dylib(name.c_str()); dylib_entry_t entry = load_dylib(name.c_str());
if(!entry){ if(!entry){
vm->_error("ImportError", "cannot load dynamic library: " + name.escape()); vm->ImportError("cannot load dynamic library: " + name.escape());
} }
vm->_c.s_view.push(ArgsView(vm->s_data.end(), vm->s_data.end())); vm->_c.s_view.push(ArgsView(vm->s_data.end(), vm->s_data.end()));
const char* name = entry(vm, PK_VERSION); const char* name = entry(vm, PK_VERSION);
vm->_c.s_view.pop(); vm->_c.s_view.pop();
if(name == nullptr){ if(name == nullptr){
vm->_error("ImportError", "module initialization failed: " + Str(name).escape()); vm->ImportError("module initialization failed: " + Str(name).escape());
} }
return vm->_modules[name]; return vm->_modules[name];
} }
} }
return vm->py_import(name); return vm->py_import(name, vm->top_frame()->_module);
}); });
_vm->bind_builtin_func<2>("divmod", [](VM* vm, ArgsView args) { _vm->bind_builtin_func<2>("divmod", [](VM* vm, ArgsView args) {
@ -560,7 +560,7 @@ void init_builtins(VM* _vm) {
_vm->bind(_vm->_t(_vm->tp_str), "split(self, sep=' ')", [](VM* vm, ArgsView args) { _vm->bind(_vm->_t(_vm->tp_str), "split(self, sep=' ')", [](VM* vm, ArgsView args) {
const Str& self = _CAST(Str&, args[0]); const Str& self = _CAST(Str&, args[0]);
std::vector<std::string_view> parts = self.split(CAST(Str&, args[1])); std::vector<std::string_view> parts = self.split(CAST(Str&, args[1]), false);
List ret(parts.size()); List ret(parts.size());
for(int i=0; i<parts.size(); i++) ret[i] = VAR(Str(parts[i])); for(int i=0; i<parts.size(); i++) ret[i] = VAR(Str(parts[i]));
return VAR(std::move(ret)); return VAR(std::move(ret));
@ -1225,6 +1225,16 @@ void init_builtins(VM* _vm) {
} }
return vm->True; return vm->True;
}); });
_vm->bind__repr__(_vm->tp_module, [](VM* vm, PyObject* obj) {
const Str& package = CAST(Str&, obj->attr(__package__));
Str name = CAST(Str&, obj->attr(__name__));
if(!package.empty()){
name = package + "." + name;
}
return VAR(fmt("<module ", name.escape(), ">"));
});
/************ property ************/ /************ property ************/
_vm->bind_constructor<-1>("property", [](VM* vm, ArgsView args) { _vm->bind_constructor<-1>("property", [](VM* vm, ArgsView args) {
if(args.size() == 1+1){ if(args.size() == 1+1){

View File

@ -254,6 +254,14 @@ int utf8len(unsigned char c, bool suppress){
return p - data; return p - data;
} }
Str Str::replace(char old, char new_) const{
Str copied = *this;
for(int i=0; i<copied.size; i++){
if(copied.data[i] == old) copied.data[i] = new_;
}
return copied;
}
Str Str::replace(const Str& old, const Str& new_, int count) const { Str Str::replace(const Str& old, const Str& new_, int count) const {
std::stringstream ss; std::stringstream ss;
int start = 0; int start = 0;
@ -308,16 +316,19 @@ int utf8len(unsigned char c, bool suppress){
return _byte_index_to_unicode(size); return _byte_index_to_unicode(size);
} }
std::vector<std::string_view> Str::split(const Str& sep) const{ std::vector<std::string_view> Str::split(const Str& sep, bool remove_empty) const{
std::vector<std::string_view> result; std::vector<std::string_view> result;
std::string_view tmp;
int start = 0; int start = 0;
while(true){ while(true){
int i = index(sep, start); int i = index(sep, start);
if(i == -1) break; if(i == -1) break;
result.push_back(sv().substr(start, i - start)); tmp = sv().substr(start, i - start);
if(!remove_empty || !tmp.empty()) result.push_back(tmp);
start = i + sep.size; start = i + sep.size;
} }
result.push_back(sv().substr(start, size - start)); tmp = sv().substr(start, size - start);
if(!remove_empty || !tmp.empty()) result.push_back(tmp);
return result; return result;
} }

View File

@ -215,57 +215,73 @@ namespace pkpy{
return call_method(obj, __next__); return call_method(obj, __next__);
} }
PyObject* VM::py_import(Str name, bool relative){ PyObject* VM::py_import(Str path, PyObject* _module){
// path is '.' separated if(path.empty()) vm->ValueError("empty module name");
Str filename;
int type; auto f_join = [](const std::vector<std::string_view>& cpnts){
if(relative){ std::stringstream ss;
ImportContext* ctx = &_import_context; for(int i=0; i<cpnts.size(); i++){
type = 2; if(i != 0) ss << ".";
for(auto it=ctx->pending.rbegin(); it!=ctx->pending.rend(); ++it){ ss << cpnts[i];
if(it->second == 2) continue;
if(it->second == 1){
filename = fmt(it->first, kPlatformSep, name, ".py");
name = fmt(it->first, '.', name).c_str();
break;
}
} }
if(filename.length() == 0) _error("ImportError", "relative import outside of package"); return Str(ss.str());
}else{ };
type = 0;
filename = fmt(name, ".py"); if(path[0] == '.'){
} Str _mod_name = CAST(Str&, _module->attr(__name__));
for(auto& [k, v]: _import_context.pending){ Str _mod_package = CAST(Str&, _module->attr(__package__));
if(k == name){ // get _module's fullname
vm->_error("ImportError", fmt("circular import ", name.escape())); if(!_mod_package.empty()) _mod_name = _mod_package + "." + _mod_name;
// convert relative path to absolute path
std::vector<std::string_view> cpnts = _mod_name.split(".", true);
int prefix = 0; // how many dots in the prefix
for(int i=0; i<path.length(); i++){
if(path[i] == '.') prefix++;
else break;
} }
if(prefix > cpnts.size()) ImportError("attempted relative import beyond top-level package");
path = path.substr(prefix); // remove prefix
for(int i=1; i<prefix; i++) cpnts.pop_back();
cpnts.push_back(path.sv());
path = f_join(cpnts);
} }
StrName name(path); // path to StrName
// check circular import
for(StrName pending_name: _import_context.pending){
if(pending_name == name) ImportError(fmt("circular import ", name.escape()));
}
PyObject* ext_mod = _modules.try_get(name); PyObject* ext_mod = _modules.try_get(name);
if(ext_mod == nullptr){ if(ext_mod != nullptr) return ext_mod;
Str source;
auto it = _lazy_modules.find(name); // try import
if(it == _lazy_modules.end()){ Str filename = path.replace('.', kPlatformSep) + ".py";
Bytes b = _import_handler(filename); Str source;
if(!relative && !b){ auto it = _lazy_modules.find(name);
filename = fmt(name, kPlatformSep, "__init__.py"); if(it == _lazy_modules.end()){
b = _import_handler(filename); Bytes b = _import_handler(filename);
if(b) type = 1; if(!b){
} filename = path.replace('.', kPlatformSep).str() + kPlatformSep + "__init__.py";
if(!b) _error("ImportError", fmt("module ", name.escape(), " not found")); b = _import_handler(filename);
source = Str(b.str());
}else{
source = it->second;
_lazy_modules.erase(it);
} }
auto _ = _import_context.temp(this, name, type); if(!b) ImportError(fmt("module ", path.escape(), " not found"));
CodeObject_ code = compile(source, filename, EXEC_MODE); source = Str(b.str());
PyObject* new_mod = new_module(name);
_exec(code, new_mod);
new_mod->attr()._try_perfect_rehash();
return new_mod;
}else{ }else{
return ext_mod; source = it->second;
_lazy_modules.erase(it);
} }
auto _ = _import_context.scope(name);
CodeObject_ code = compile(source, filename, EXEC_MODE);
auto all_cpnts = path.split(".", true);
Str name_cpnt = all_cpnts.back();
all_cpnts.pop_back();
PyObject* new_mod = new_module(name_cpnt, f_join(all_cpnts));
_exec(code, new_mod);
new_mod->attr()._try_perfect_rehash();
return new_mod;
} }
VM::~VM() { VM::~VM() {
@ -471,12 +487,15 @@ PyObject* VM::format(Str spec, PyObject* obj){
return VAR(ret); return VAR(ret);
} }
PyObject* VM::new_module(StrName name) { PyObject* VM::new_module(Str name, Str package) {
PyObject* obj = heap._new<DummyModule>(tp_module); PyObject* obj = heap._new<DummyModule>(tp_module);
obj->attr().set("__name__", VAR(name.sv())); obj->attr().set(__name__, VAR(name));
obj->attr().set(__package__, VAR(package));
// we do not allow override in order to avoid memory leak // we do not allow override in order to avoid memory leak
// it is because Module objects are not garbage collected // it is because Module objects are not garbage collected
if(_modules.contains(name)) throw std::runtime_error("module already exists"); if(_modules.contains(name)) throw std::runtime_error("module already exists");
// convert to fullname and set it into _modules
if(!package.empty()) name = package + "." + name;
_modules.set(name, obj); _modules.set(name, obj);
return obj; return obj;
} }
@ -484,14 +503,14 @@ PyObject* VM::new_module(StrName name) {
static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){ static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){
std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg); std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
switch(byte.op){ switch(byte.op){
case OP_LOAD_CONST: case OP_FORMAT_STRING: case OP_LOAD_CONST: case OP_FORMAT_STRING: case OP_IMPORT_PATH:
if(vm != nullptr){ if(vm != nullptr){
argStr += fmt(" (", CAST(Str, vm->py_repr(co->consts[byte.arg])), ")"); argStr += fmt(" (", CAST(Str, vm->py_repr(co->consts[byte.arg])), ")");
} }
break; break;
case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL: case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL:
case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR: case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR:
case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_RAISE: case OP_BEGIN_CLASS: case OP_RAISE:
case OP_DELETE_GLOBAL: case OP_INC_GLOBAL: case OP_DEC_GLOBAL: case OP_STORE_CLASS_ATTR: case OP_DELETE_GLOBAL: case OP_INC_GLOBAL: case OP_DEC_GLOBAL: case OP_STORE_CLASS_ATTR:
argStr += fmt(" (", StrName(byte.arg).sv(), ")"); argStr += fmt(" (", StrName(byte.arg).sv(), ")");
break; break;
@ -884,10 +903,6 @@ PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
return nullptr; return nullptr;
} }
// https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){ PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
PyObject* objtype; PyObject* objtype;
@ -919,6 +934,11 @@ PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
} }
return cls_var; return cls_var;
} }
if(is_non_tagged_type(obj, tp_module)){
// try import and cache it!
}
if(throw_err) AttributeError(obj, name); if(throw_err) AttributeError(obj, name);
return nullptr; return nullptr;
} }