refactor importer

This commit is contained in:
blueloveTH 2023-08-28 04:05:24 +08:00
parent eac3230a71
commit 0c80a626ba
8 changed files with 139 additions and 103 deletions

View File

@ -106,8 +106,7 @@ OPCODE(UNARY_INVERT)
OPCODE(GET_ITER)
OPCODE(FOR_ITER)
/**************************/
OPCODE(IMPORT_NAME)
OPCODE(IMPORT_NAME_REL)
OPCODE(IMPORT_PATH)
OPCODE(POP_IMPORT_STAR)
/**************************/
OPCODE(UNPACK_SEQUENCE)

View File

@ -72,8 +72,9 @@ struct Str{
Str upper() const;
Str escape(bool single_quote=true) const;
int index(const Str& sub, int start=0) const;
Str replace(char old, char new_) const;
Str replace(const Str& old, const Str& new_, int count=-1) const;
std::vector<std::string_view> split(const Str& sep) const;
std::vector<std::string_view> split(const Str& sep, bool remove_empty) const;
/*************unicode*************/
int _unicode_index_to_byte(int i) const;
@ -186,6 +187,7 @@ const StrName __enter__ = StrName::get("__enter__");
const StrName __exit__ = StrName::get("__exit__");
const StrName __name__ = StrName::get("__name__");
const StrName __all__ = StrName::get("__all__");
const StrName __package__ = StrName::get("__package__");
const StrName pk_id_add = StrName::get("add");
const StrName pk_id_set = StrName::get("set");

View File

@ -368,6 +368,7 @@ public:
void UnboundLocalError(StrName name){ _error("UnboundLocalError", fmt("local variable ", name.escape() + " referenced before assignment")); }
void KeyError(PyObject* obj){ _error("KeyError", PK_OBJ_GET(Str, py_repr(obj))); }
void BinaryOptError(const char* op) { TypeError(fmt("unsupported operand type(s) for ", op)); }
void ImportError(const Str& msg){ _error("ImportError", msg); }
void AttributeError(PyObject* obj, StrName name){
// OBJ_NAME calls getattr, which may lead to a infinite recursion
@ -409,31 +410,21 @@ public:
}
struct ImportContext{
// 0: normal; 1: __init__.py; 2: relative
std::vector<std::pair<StrName, int>> pending;
std::vector<StrName> pending;
struct Temp{
VM* vm;
ImportContext* ctx;
StrName name;
Temp(VM* vm, StrName name, int type): vm(vm), name(name){
ImportContext* ctx = &vm->_import_context;
ctx->pending.emplace_back(name, type);
}
~Temp(){
ImportContext* ctx = &vm->_import_context;
ctx->pending.pop_back();
Temp(ImportContext* ctx, StrName name) : ctx(ctx), name(name){
ctx->pending.push_back(name);
}
~Temp(){ ctx->pending.pop_back(); }
};
Temp temp(VM* vm, StrName name, int type){
return Temp(vm, name, type);
}
Temp scope(StrName name){ return {this, name}; }
};
ImportContext _import_context;
PyObject* py_import(Str path, bool relative=false);
PyObject* py_import(Str path, PyObject* _module);
~VM();
#if PK_DEBUG_CEVAL_STEP
@ -447,7 +438,7 @@ public:
bool py_bool(PyObject* obj);
i64 py_hash(PyObject* obj);
PyObject* py_list(PyObject*);
PyObject* new_module(StrName name);
PyObject* new_module(Str name, Str package="");
Str disassemble(CodeObject_ co);
void init_builtin_types();
PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true);

View File

@ -590,13 +590,9 @@ __NEXT_STEP:;
}
DISPATCH();
/*****************************************/
TARGET(IMPORT_NAME)
TARGET(IMPORT_PATH)
_0 = co_consts[byte.arg];
PUSH(py_import(CAST(Str&, _0)));
DISPATCH();
TARGET(IMPORT_NAME_REL)
_0 = co_consts[byte.arg];
PUSH(py_import(CAST(Str&, _0), true));
PUSH(py_import(CAST(Str&, _0), frame->_module));
DISPATCH();
TARGET(POP_IMPORT_STAR) {
_0 = POPX(); // pop the module
@ -606,7 +602,7 @@ __NEXT_STEP:;
_name = StrName::get(CAST(Str&, key).sv());
PyObject* value = _0->attr().try_get(_name);
if(value == nullptr){
_error("ImportError", fmt("cannot import name ", _name.escape()));
ImportError(fmt("cannot import name ", _name.escape()));
}else{
frame->f_globals().set(_name, value);
}

View File

@ -479,7 +479,7 @@ __SUBSCR_END:
do {
consume(TK("@id"));
Str name = prev().str();
ctx()->emit(OP_IMPORT_NAME, ctx()->add_const(VAR(name)), prev().line);
ctx()->emit(OP_IMPORT_PATH, ctx()->add_const(VAR(name)), prev().line);
if (match(TK("as"))) {
consume(TK("@id"));
name = prev().str();
@ -493,37 +493,45 @@ __SUBSCR_END:
// from a.b import c [as d]
// from . import a [as b]
// from .a import b [as c]
// from ..a import b [as c]
// from .a.b import c [as d]
// from xxx import *
void Compiler::compile_from_import() {
if(name_scope() != NAME_GLOBAL) SyntaxError("import statement should be used in global scope");
Opcode op = OP_IMPORT_NAME;
if(match(TK("."))) op = OP_IMPORT_NAME_REL;
std::vector<Str> parts;
int dots = 0;
if(op == OP_IMPORT_NAME_REL){
while(true){
switch(curr().type){
case TK("."): dots++; break;
case TK("..."): dots+=3; break;
default: goto __EAT_DOTS_END;
}
advance();
}
__EAT_DOTS_END:
std::stringstream ss;
for(int i=0; i<dots; i++) ss << '.';
if(dots > 0){
// @id is optional if dots > 0
if(match(TK("@id"))){
parts.push_back(prev().str());
ss << prev().str();
while (match(TK("."))) {
consume(TK("@id"));
parts.push_back(prev().str());
ss << "." << prev().str();
}
}
}else{
// @id is required if dots == 0
consume(TK("@id"));
parts.push_back(prev().str());
ss << prev().str();
while (match(TK("."))) {
consume(TK("@id"));
parts.push_back(prev().str());
ss << "." << prev().str();
}
}
FastStrStream ss;
for (int i=0; i<parts.size(); i++) {
if(i > 0) ss << ".";
ss << parts[i];
}
ctx()->emit(op, ctx()->add_const(VAR(ss.str())), prev().line);
ctx()->emit(OP_IMPORT_PATH, ctx()->add_const(VAR(ss.str())), prev().line);
consume(TK("import"));
if (match(TK("*"))) {
@ -537,7 +545,6 @@ __SUBSCR_END:
ctx()->emit(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
consume(TK("@id"));
Str name = prev().str();
// module's __getattr__ should be customized or use a new opcode...
ctx()->emit(OP_LOAD_ATTR, StrName(name).index, prev().line);
if (match(TK("as"))) {
consume(TK("@id"));

View File

@ -171,18 +171,18 @@ void init_builtins(VM* _vm) {
if(ext == ".so" || ext == ".dll" || ext == ".dylib"){
dylib_entry_t entry = load_dylib(name.c_str());
if(!entry){
vm->_error("ImportError", "cannot load dynamic library: " + name.escape());
vm->ImportError("cannot load dynamic library: " + name.escape());
}
vm->_c.s_view.push(ArgsView(vm->s_data.end(), vm->s_data.end()));
const char* name = entry(vm, PK_VERSION);
vm->_c.s_view.pop();
if(name == nullptr){
vm->_error("ImportError", "module initialization failed: " + Str(name).escape());
vm->ImportError("module initialization failed: " + Str(name).escape());
}
return vm->_modules[name];
}
}
return vm->py_import(name);
return vm->py_import(name, vm->top_frame()->_module);
});
_vm->bind_builtin_func<2>("divmod", [](VM* vm, ArgsView args) {
@ -560,7 +560,7 @@ void init_builtins(VM* _vm) {
_vm->bind(_vm->_t(_vm->tp_str), "split(self, sep=' ')", [](VM* vm, ArgsView args) {
const Str& self = _CAST(Str&, args[0]);
std::vector<std::string_view> parts = self.split(CAST(Str&, args[1]));
std::vector<std::string_view> parts = self.split(CAST(Str&, args[1]), false);
List ret(parts.size());
for(int i=0; i<parts.size(); i++) ret[i] = VAR(Str(parts[i]));
return VAR(std::move(ret));
@ -1225,6 +1225,16 @@ void init_builtins(VM* _vm) {
}
return vm->True;
});
_vm->bind__repr__(_vm->tp_module, [](VM* vm, PyObject* obj) {
const Str& package = CAST(Str&, obj->attr(__package__));
Str name = CAST(Str&, obj->attr(__name__));
if(!package.empty()){
name = package + "." + name;
}
return VAR(fmt("<module ", name.escape(), ">"));
});
/************ property ************/
_vm->bind_constructor<-1>("property", [](VM* vm, ArgsView args) {
if(args.size() == 1+1){

View File

@ -254,6 +254,14 @@ int utf8len(unsigned char c, bool suppress){
return p - data;
}
Str Str::replace(char old, char new_) const{
Str copied = *this;
for(int i=0; i<copied.size; i++){
if(copied.data[i] == old) copied.data[i] = new_;
}
return copied;
}
Str Str::replace(const Str& old, const Str& new_, int count) const {
std::stringstream ss;
int start = 0;
@ -308,16 +316,19 @@ int utf8len(unsigned char c, bool suppress){
return _byte_index_to_unicode(size);
}
std::vector<std::string_view> Str::split(const Str& sep) const{
std::vector<std::string_view> Str::split(const Str& sep, bool remove_empty) const{
std::vector<std::string_view> result;
std::string_view tmp;
int start = 0;
while(true){
int i = index(sep, start);
if(i == -1) break;
result.push_back(sv().substr(start, i - start));
tmp = sv().substr(start, i - start);
if(!remove_empty || !tmp.empty()) result.push_back(tmp);
start = i + sep.size;
}
result.push_back(sv().substr(start, size - start));
tmp = sv().substr(start, size - start);
if(!remove_empty || !tmp.empty()) result.push_back(tmp);
return result;
}

View File

@ -215,57 +215,73 @@ namespace pkpy{
return call_method(obj, __next__);
}
PyObject* VM::py_import(Str name, bool relative){
// path is '.' separated
Str filename;
int type;
if(relative){
ImportContext* ctx = &_import_context;
type = 2;
for(auto it=ctx->pending.rbegin(); it!=ctx->pending.rend(); ++it){
if(it->second == 2) continue;
if(it->second == 1){
filename = fmt(it->first, kPlatformSep, name, ".py");
name = fmt(it->first, '.', name).c_str();
break;
PyObject* VM::py_import(Str path, PyObject* _module){
if(path.empty()) vm->ValueError("empty module name");
auto f_join = [](const std::vector<std::string_view>& cpnts){
std::stringstream ss;
for(int i=0; i<cpnts.size(); i++){
if(i != 0) ss << ".";
ss << cpnts[i];
}
return Str(ss.str());
};
if(path[0] == '.'){
Str _mod_name = CAST(Str&, _module->attr(__name__));
Str _mod_package = CAST(Str&, _module->attr(__package__));
// get _module's fullname
if(!_mod_package.empty()) _mod_name = _mod_package + "." + _mod_name;
// convert relative path to absolute path
std::vector<std::string_view> cpnts = _mod_name.split(".", true);
int prefix = 0; // how many dots in the prefix
for(int i=0; i<path.length(); i++){
if(path[i] == '.') prefix++;
else break;
}
if(filename.length() == 0) _error("ImportError", "relative import outside of package");
}else{
type = 0;
filename = fmt(name, ".py");
}
for(auto& [k, v]: _import_context.pending){
if(k == name){
vm->_error("ImportError", fmt("circular import ", name.escape()));
if(prefix > cpnts.size()) ImportError("attempted relative import beyond top-level package");
path = path.substr(prefix); // remove prefix
for(int i=1; i<prefix; i++) cpnts.pop_back();
cpnts.push_back(path.sv());
path = f_join(cpnts);
}
StrName name(path); // path to StrName
// check circular import
for(StrName pending_name: _import_context.pending){
if(pending_name == name) ImportError(fmt("circular import ", name.escape()));
}
PyObject* ext_mod = _modules.try_get(name);
if(ext_mod == nullptr){
if(ext_mod != nullptr) return ext_mod;
// try import
Str filename = path.replace('.', kPlatformSep) + ".py";
Str source;
auto it = _lazy_modules.find(name);
if(it == _lazy_modules.end()){
Bytes b = _import_handler(filename);
if(!relative && !b){
filename = fmt(name, kPlatformSep, "__init__.py");
if(!b){
filename = path.replace('.', kPlatformSep).str() + kPlatformSep + "__init__.py";
b = _import_handler(filename);
if(b) type = 1;
}
if(!b) _error("ImportError", fmt("module ", name.escape(), " not found"));
if(!b) ImportError(fmt("module ", path.escape(), " not found"));
source = Str(b.str());
}else{
source = it->second;
_lazy_modules.erase(it);
}
auto _ = _import_context.temp(this, name, type);
auto _ = _import_context.scope(name);
CodeObject_ code = compile(source, filename, EXEC_MODE);
PyObject* new_mod = new_module(name);
auto all_cpnts = path.split(".", true);
Str name_cpnt = all_cpnts.back();
all_cpnts.pop_back();
PyObject* new_mod = new_module(name_cpnt, f_join(all_cpnts));
_exec(code, new_mod);
new_mod->attr()._try_perfect_rehash();
return new_mod;
}else{
return ext_mod;
}
}
VM::~VM() {
@ -471,12 +487,15 @@ PyObject* VM::format(Str spec, PyObject* obj){
return VAR(ret);
}
PyObject* VM::new_module(StrName name) {
PyObject* VM::new_module(Str name, Str package) {
PyObject* obj = heap._new<DummyModule>(tp_module);
obj->attr().set("__name__", VAR(name.sv()));
obj->attr().set(__name__, VAR(name));
obj->attr().set(__package__, VAR(package));
// we do not allow override in order to avoid memory leak
// it is because Module objects are not garbage collected
if(_modules.contains(name)) throw std::runtime_error("module already exists");
// convert to fullname and set it into _modules
if(!package.empty()) name = package + "." + name;
_modules.set(name, obj);
return obj;
}
@ -484,14 +503,14 @@ PyObject* VM::new_module(StrName name) {
static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){
std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
switch(byte.op){
case OP_LOAD_CONST: case OP_FORMAT_STRING:
case OP_LOAD_CONST: case OP_FORMAT_STRING: case OP_IMPORT_PATH:
if(vm != nullptr){
argStr += fmt(" (", CAST(Str, vm->py_repr(co->consts[byte.arg])), ")");
}
break;
case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL:
case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR:
case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_RAISE:
case OP_BEGIN_CLASS: case OP_RAISE:
case OP_DELETE_GLOBAL: case OP_INC_GLOBAL: case OP_DEC_GLOBAL: case OP_STORE_CLASS_ATTR:
argStr += fmt(" (", StrName(byte.arg).sv(), ")");
break;
@ -884,10 +903,6 @@ PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
return nullptr;
}
// https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
PyObject* objtype;
@ -919,6 +934,11 @@ PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
}
return cls_var;
}
if(is_non_tagged_type(obj, tp_module)){
// try import and cache it!
}
if(throw_err) AttributeError(obj, name);
return nullptr;
}