diff --git a/python/_dict.py b/python/_dict.py deleted file mode 100644 index 97a3515f..00000000 --- a/python/_dict.py +++ /dev/null @@ -1,119 +0,0 @@ -class dict: - def __init__(self, mapping=None): - self._capacity = 16 - self._a = [None] * self._capacity - self._len = 0 - - if mapping is not None: - for k,v in mapping: - self[k] = v - - def __len__(self): - return self._len - - def __probe(self, key): - i = hash(key) % self._capacity - while self._a[i] is not None: - if self._a[i][0] == key: - return True, i - i = (i + 1) % self._capacity - return False, i - - def __getitem__(self, key): - ok, i = self.__probe(key) - if not ok: - raise KeyError(repr(key)) - return self._a[i][1] - - def __contains__(self, key): - ok, i = self.__probe(key) - return ok - - def __setitem__(self, key, value): - ok, i = self.__probe(key) - if ok: - self._a[i][1] = value - else: - self._a[i] = [key, value] - self._len += 1 - if self._len > self._capacity * 0.67: - self._capacity *= 2 - self.__rehash() - - def __delitem__(self, key): - ok, i = self.__probe(key) - if not ok: - raise KeyError(repr(key)) - self._a[i] = None - self._len -= 1 - - def __rehash(self): - old_a = self._a - self._a = [None] * self._capacity - self._len = 0 - for kv in old_a: - if kv is not None: - self[kv[0]] = kv[1] - - def get(self, key, default=None): - ok, i = self.__probe(key) - if ok: - return self._a[i][1] - return default - - def keys(self): - for kv in self._a: - if kv is not None: - yield kv[0] - - def values(self): - for kv in self._a: - if kv is not None: - yield kv[1] - - def items(self): - for kv in self._a: - if kv is not None: - yield kv[0], kv[1] - - def clear(self): - self._a = [None] * self._capacity - self._len = 0 - - def update(self, other): - for k, v in other.items(): - self[k] = v - - def copy(self): - d = dict() - for kv in self._a: - if kv is not None: - d[kv[0]] = kv[1] - return d - - def __repr__(self): - a = [repr(k)+': '+repr(v) for k,v in self.items()] - return '{'+ ', '.join(a) + '}' - - def __json__(self): - a = [] - for k,v in self.items(): - if type(k) is not str: - raise TypeError('json keys must be strings, got ' + repr(k) ) - a.append(k.__json__()+': '+v.__json__()) - return '{'+ ', '.join(a) + '}' - - def __eq__(self, __o: object) -> bool: - if type(__o) is not dict: - return False - if len(self) != len(__o): - return False - for k in self.keys(): - if k not in __o: - return False - if self[k] != __o[k]: - return False - return True - - def __ne__(self, __o: object) -> bool: - return not self.__eq__(__o) \ No newline at end of file diff --git a/src/ceval.h b/src/ceval.h index 262ef246..a1c3dfb1 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -368,6 +368,7 @@ __NEXT_STEP:; DISPATCH(); #undef BINARY_OP_SPECIAL +#undef PREDICT_INT_OP TARGET(IS_OP) _1 = POPX(); // rhs @@ -398,14 +399,14 @@ __NEXT_STEP:; frame->jump_abs(byte.arg); DISPATCH(); TARGET(POP_JUMP_IF_FALSE) - if(!asBool(POPX())) frame->jump_abs(byte.arg); + if(!py_bool(POPX())) frame->jump_abs(byte.arg); DISPATCH(); TARGET(JUMP_IF_TRUE_OR_POP) - if(asBool(TOP()) == true) frame->jump_abs(byte.arg); + if(py_bool(TOP()) == true) frame->jump_abs(byte.arg); else POP(); DISPATCH(); TARGET(JUMP_IF_FALSE_OR_POP) - if(asBool(TOP()) == false) frame->jump_abs(byte.arg); + if(py_bool(TOP()) == false) frame->jump_abs(byte.arg); else POP(); DISPATCH(); TARGET(LOOP_CONTINUE) @@ -461,10 +462,10 @@ __NEXT_STEP:; DISPATCH(); /*****************************************/ TARGET(UNARY_NEGATIVE) - TOP() = num_negated(TOP()); + TOP() = py_negate(TOP()); DISPATCH(); TARGET(UNARY_NOT) - TOP() = VAR(!asBool(TOP())); + TOP() = VAR(!py_bool(TOP())); DISPATCH(); /*****************************************/ TARGET(GET_ITER) @@ -578,7 +579,7 @@ __NEXT_STEP:; _0 = t[0]; msg = CAST(Str&, py_str(t[1])); } - bool ok = asBool(_0); + bool ok = py_bool(_0); POP(); if(!ok) _error("AssertionError", msg); } DISPATCH(); diff --git a/src/compiler.h b/src/compiler.h index 083b1ba2..02b69f4a 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -980,7 +980,7 @@ __SUBSCR_END: case TK("-"): { consume(TK("@num")); PyObject* val = to_object(prev().value); - return vm->num_negated(val); + return vm->py_negate(val); } case TK("@num"): return to_object(prev().value); case TK("@str"): return to_object(prev().value); diff --git a/src/dict.h b/src/dict.h index 33026235..f0c39eb0 100644 --- a/src/dict.h +++ b/src/dict.h @@ -70,6 +70,34 @@ struct Dict{ return _items[i].second; } + bool contains(PyObject* key) const{ + bool ok; int i; + _probe(key, ok, i); + return ok; + } + + void erase(PyObject* key){ + bool ok; int i; + _probe(key, ok, i); + if(!ok) return; + _items[i].first = nullptr; + _size--; + } + + std::vector items() const { + std::vector v; + for(uint16_t i=0; i<_capacity; i++){ + if(_items[i].first == nullptr) continue; + v.push_back(_items[i]); + } + return v; + } + + void clear(){ + memset(_items, 0, _capacity * sizeof(Item)); + _size = 0; + } + ~Dict(){ pool128.dealloc(_items); } }; diff --git a/src/pocketpy.h b/src/pocketpy.h index f4d4f366..58be1cb5 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -2,6 +2,7 @@ #include "ceval.h" #include "compiler.h" +#include "dict.h" #include "obj.h" #include "repl.h" #include "iter.h" @@ -139,7 +140,7 @@ inline void init_builtins(VM* _vm) { }); _vm->bind_builtin_func<1>("hash", [](VM* vm, ArgsView args){ - i64 value = vm->hash(args[0]); + i64 value = vm->py_hash(args[0]); if(((value << 2) >> 2) != value) value >>= 2; return VAR(value); }); @@ -262,7 +263,7 @@ inline void init_builtins(VM* _vm) { _vm->bind__pow__(_vm->tp_int, py_number_pow); _vm->bind__pow__(_vm->tp_float, py_number_pow); - /************ PyInt ************/ + /************ int ************/ _vm->bind_constructor<2>("int", [](VM* vm, ArgsView args) { if (is_type(args[1], vm->tp_float)) return VAR((i64)CAST(f64, args[1])); if (is_type(args[1], vm->tp_int)) return args[1]; @@ -295,6 +296,8 @@ inline void init_builtins(VM* _vm) { _vm->bind__repr__(_vm->tp_int, [](VM* vm, PyObject* obj) { return VAR(std::to_string(_CAST(i64, obj))); }); _vm->bind__json__(_vm->tp_int, [](VM* vm, PyObject* obj) { return VAR(std::to_string(_CAST(i64, obj))); }); + _vm->bind__neg__(_vm->tp_int, [](VM* vm, PyObject* obj) { return VAR(-_CAST(i64, obj)); }); + _vm->bind__hash__(_vm->tp_int, [](VM* vm, PyObject* obj) { return _CAST(i64, obj); }); #define INT_BITWISE_OP(name, op) \ @@ -310,7 +313,7 @@ inline void init_builtins(VM* _vm) { #undef INT_BITWISE_OP - /************ PyFloat ************/ + /************ float ************/ _vm->bind_constructor<2>("float", [](VM* vm, ArgsView args) { if (is_type(args[1], vm->tp_int)) return VAR((f64)CAST(i64, args[1])); if (is_type(args[1], vm->tp_float)) return args[1]; @@ -335,6 +338,8 @@ inline void init_builtins(VM* _vm) { return (i64)std::hash()(val); }); + _vm->bind__neg__(_vm->tp_float, [](VM* vm, PyObject* obj) { return VAR(-_CAST(f64, obj)); }); + _vm->bind__repr__(_vm->tp_float, [](VM* vm, PyObject* obj) { f64 val = _CAST(f64, obj); if(std::isinf(val) || std::isnan(val)) return VAR(std::to_string(val)); @@ -350,7 +355,7 @@ inline void init_builtins(VM* _vm) { return VAR(std::to_string(val)); }); - /************ PyString ************/ + /************ str ************/ _vm->bind_constructor<2>("str", CPP_LAMBDA(vm->py_str(args[1]))); _vm->bind__hash__(_vm->tp_str, [](VM* vm, PyObject* obj) { @@ -475,9 +480,9 @@ inline void init_builtins(VM* _vm) { return VAR(Str(p)); }); - /************ PyList ************/ + /************ list ************/ _vm->bind_constructor<2>("list", [](VM* vm, ArgsView args) { - return vm->asList(args[1]); + return vm->py_list(args[1]); }); _vm->bind_method<1>("list", "append", [](VM* vm, ArgsView args) { @@ -563,9 +568,9 @@ inline void init_builtins(VM* _vm) { self.erase(i); }); - /************ PyTuple ************/ + /************ tuple ************/ _vm->bind_constructor<2>("tuple", [](VM* vm, ArgsView args) { - List list = CAST(List, vm->asList(args[1])); + List list = CAST(List, vm->py_list(args[1])); return VAR(Tuple(std::move(list))); }); @@ -573,7 +578,7 @@ inline void init_builtins(VM* _vm) { i64 x = 1000003; const Tuple& items = CAST(Tuple&, obj); for (int i=0; ihash(items[i]); + i64 y = vm->py_hash(items[i]); // recommended by Github Copilot x = x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); } @@ -590,7 +595,7 @@ inline void init_builtins(VM* _vm) { }); /************ bool ************/ - _vm->bind_constructor<2>("bool", CPP_LAMBDA(VAR(vm->asBool(args[1])))); + _vm->bind_constructor<2>("bool", CPP_LAMBDA(VAR(vm->py_bool(args[1])))); _vm->bind__hash__(_vm->tp_bool, [](VM* vm, PyObject* obj) { return (i64)_CAST(bool, obj); }); @@ -758,6 +763,165 @@ inline void init_builtins(VM* _vm) { MappingProxy& self = _CAST(MappingProxy&, obj); return self.attr().contains(CAST(Str&, key)); }); + + /************ dict ************/ + _vm->bind_constructor<-1>("dict", [](VM* vm, ArgsView args){ + return VAR(Dict(vm)); + }); + + _vm->bind_method<-1>("dict", "__init__", [](VM* vm, ArgsView args){ + if(args.size() == 1+0) return vm->None; + if(args.size() == 1+1){ + auto _lock = vm->heap.gc_scope_lock(); + Dict& self = _CAST(Dict&, args[0]); + List& list = CAST(List&, vm->py_list(args[1])); + for(PyObject* item : list){ + Tuple& t = CAST(Tuple&, item); + if(t.size() != 2){ + vm->ValueError("dict() takes an iterable of tuples (key, value)"); + return vm->None; + } + self.set(t[0], t[1]); + } + } + vm->TypeError("dict() takes at most 1 argument"); + return vm->None; + }); + + _vm->bind__len__(_vm->tp_dict, [](VM* vm, PyObject* obj) { + return (i64)_CAST(Dict&, obj).size(); + }); + + _vm->bind__getitem__(_vm->tp_dict, [](VM* vm, PyObject* obj, PyObject* index) { + Dict& self = _CAST(Dict&, obj); + PyObject* ret = self.try_get(index); + if(ret == nullptr) vm->KeyError(index); + return ret; + }); + + _vm->bind__setitem__(_vm->tp_dict, [](VM* vm, PyObject* obj, PyObject* key, PyObject* value) { + Dict& self = _CAST(Dict&, obj); + self.set(key, value); + }); + + _vm->bind__delitem__(_vm->tp_dict, [](VM* vm, PyObject* obj, PyObject* key) { + Dict& self = _CAST(Dict&, obj); + if(!self.contains(key)) vm->KeyError(key); + self.erase(key); + }); + + _vm->bind__contains__(_vm->tp_dict, [](VM* vm, PyObject* obj, PyObject* key) { + Dict& self = _CAST(Dict&, obj); + return self.contains(key); + }); + + _vm->bind_method<-1>("dict", "get", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + if(args.size() == 1+1){ + PyObject* ret = self.try_get(args[1]); + if(ret != nullptr) return ret; + return vm->None; + }else if(args.size() == 1+2){ + PyObject* ret = self.try_get(args[1]); + if(ret != nullptr) return ret; + return args[2]; + } + vm->TypeError("get() takes at most 2 arguments"); + return vm->None; + }); + + _vm->bind_method<0>("dict", "keys", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + List keys; + for(auto& item : self.items()) keys.push_back(item.first); + return VAR(std::move(keys)); + }); + + _vm->bind_method<0>("dict", "values", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + List values; + for(auto& item : self.items()) values.push_back(item.second); + return VAR(std::move(values)); + }); + + _vm->bind_method<0>("dict", "items", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + List items; + for(auto& item : self.items()){ + PyObject* t = VAR(Tuple({item.first, item.second})); + items.push_back(std::move(t)); + } + return VAR(std::move(items)); + }); + + _vm->bind_method<-1>("dict", "update", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + Dict& other = CAST(Dict&, args[1]); + for(auto& item : other.items()) self.set(item.first, item.second); + return vm->None; + }); + + _vm->bind_method<0>("dict", "copy", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + Dict copy(vm); + for(auto& item : self.items()) copy.set(item.first, item.second); + return VAR(std::move(copy)); + }); + + _vm->bind_method<0>("dict", "clear", [](VM* vm, ArgsView args) { + Dict& self = _CAST(Dict&, args[0]); + self.clear(); + return vm->None; + }); + + _vm->bind__repr__(_vm->tp_dict, [](VM* vm, PyObject* obj) { + Dict& self = _CAST(Dict&, obj); + std::stringstream ss; + ss << "{"; + bool first = true; + for(auto& item : self.items()){ + if(!first) ss << ", "; + first = false; + Str key = CAST(Str&, vm->py_repr(item.first)); + Str value = CAST(Str&, vm->py_repr(item.second)); + ss << key << ": " << value; + } + ss << "}"; + return VAR(ss.str()); + }); + + _vm->bind__json__(_vm->tp_dict, [](VM* vm, PyObject* obj) { + Dict& self = _CAST(Dict&, obj); + std::stringstream ss; + ss << "{"; + bool first = true; + for(auto& item : self.items()){ + if(!first) ss << ", "; + first = false; + Str key = CAST(Str&, item.first); + Str value = CAST(Str&, vm->py_json(item.second)); + ss << key << ": " << value; + } + ss << "}"; + return VAR(ss.str()); + }); + + _vm->bind__eq__(_vm->tp_dict, [](VM* vm, PyObject* a, PyObject* b) { + Dict& self = _CAST(Dict&, a); + if(!is_non_tagged_type(b, vm->tp_dict)) return false; + Dict& other = _CAST(Dict&, b); + if(self.size() != other.size()) return false; + for(auto& item : self.items()){ + PyObject* value = other.try_get(item.first); + if(value == nullptr) return false; + if(!vm->py_equals(item.second, value)) return false; + } + return true; + }); + + _vm->bind__ne__(_vm->tp_dict, [](VM* vm, PyObject* a, PyObject* b) { + return !vm->py_equals(a, b); + }); } #ifdef _WIN32 @@ -819,9 +983,7 @@ inline void add_module_json(VM* vm){ }); vm->bind_func<1>(mod, "dumps", [](VM* vm, ArgsView args) { - const PyTypeInfo* ti = vm->_inst_type_info(args[0]); - if(ti->m__json__) return ti->m__json__(vm, args[0]); - return vm->call_method(args[0], __json__); + return vm->py_json(args[0]); }); } @@ -1049,8 +1211,6 @@ inline void VM::post_init(){ CodeObject_ code = compile(kPythonLibs["builtins"], "", EXEC_MODE); this->_exec(code, this->builtins); - code = compile(kPythonLibs["_dict"], "", EXEC_MODE); - this->_exec(code, this->builtins); code = compile(kPythonLibs["_set"], "", EXEC_MODE); this->_exec(code, this->builtins); diff --git a/src/vm.h b/src/vm.h index ac673392..dd6435f5 100644 --- a/src/vm.h +++ b/src/vm.h @@ -9,7 +9,7 @@ #include "obj.h" #include "str.h" #include "tuplelist.h" -// #include "dict.h" +#include "dict.h" namespace pkpy{ @@ -147,6 +147,7 @@ public: Type tp_function, tp_native_func, tp_iterator, tp_bound_method; Type tp_slice, tp_range, tp_module; Type tp_super, tp_exception, tp_bytes, tp_mappingproxy; + Type tp_dict; const bool enable_os; @@ -181,6 +182,12 @@ public: return call_method(obj, __repr__); } + PyObject* py_json(PyObject* obj){ + const PyTypeInfo* ti = _inst_type_info(obj); + if(ti->m__json__) return ti->m__json__(this, obj); + return call_method(obj, __json__); + } + PyObject* py_iter(PyObject* obj){ if(is_type(obj, tp_iterator)) return obj; const PyTypeInfo* ti = _inst_type_info(obj); @@ -521,7 +528,7 @@ public: void IndexError(const Str& msg){ _error("IndexError", msg); } void ValueError(const Str& msg){ _error("ValueError", msg); } void NameError(StrName name){ _error("NameError", fmt("name ", name.escape() + " is not defined")); } - void KeyError(const Str& msg){ _error("KeyError", msg); } + void KeyError(PyObject* obj){ _error("KeyError", OBJ_GET(Str, py_repr(obj))); } void AttributeError(PyObject* obj, StrName name){ // OBJ_NAME calls getattr, which may lead to a infinite recursion @@ -567,15 +574,16 @@ public: _modules.clear(); _lazy_modules.clear(); } - +#if DEBUG_CEVAL_STEP void _log_s_data(const char* title = nullptr); +#endif PyObject* vectorcall(int ARGC, int KWARGC=0, bool op_call=false); CodeObject_ compile(Str source, Str filename, CompileMode mode, bool unknown_global_scope=false); - PyObject* num_negated(PyObject* obj); + PyObject* py_negate(PyObject* obj); f64 num_to_float(PyObject* obj); - bool asBool(PyObject* obj); - i64 hash(PyObject* obj); - PyObject* asList(PyObject*); + bool py_bool(PyObject* obj); + i64 py_hash(PyObject* obj); + PyObject* py_list(PyObject*); PyObject* new_module(StrName name); Str disassemble(CodeObject_ co); void init_builtin_types(); @@ -622,6 +630,7 @@ DEF_NATIVE_2(Slice, tp_slice) DEF_NATIVE_2(Exception, tp_exception) DEF_NATIVE_2(Bytes, tp_bytes) DEF_NATIVE_2(MappingProxy, tp_mappingproxy) +DEF_NATIVE_2(Dict, tp_dict) #define PY_CAST_INT(T) \ template<> inline T py_cast(VM* vm, PyObject* obj){ \ @@ -733,14 +742,10 @@ inline PyObject* py_var(VM* vm, PyObject* val){ return val; } -inline PyObject* VM::num_negated(PyObject* obj){ - if (is_int(obj)){ - return VAR(-CAST(i64, obj)); - }else if(is_float(obj)){ - return VAR(-CAST(f64, obj)); - } - TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape()); - return nullptr; +inline PyObject* VM::py_negate(PyObject* obj){ + const PyTypeInfo* ti = _inst_type_info(obj); + if(ti->m__neg__) return ti->m__neg__(this, obj); + return call_method(obj, __neg__); } inline f64 VM::num_to_float(PyObject* obj){ @@ -753,7 +758,7 @@ inline f64 VM::num_to_float(PyObject* obj){ return 0; } -inline bool VM::asBool(PyObject* obj){ +inline bool VM::py_bool(PyObject* obj){ if(is_non_tagged_type(obj, tp_bool)) return obj == True; if(obj == None) return false; if(is_int(obj)) return _CAST(i64, obj) != 0; @@ -767,7 +772,7 @@ inline bool VM::asBool(PyObject* obj){ return true; } -inline PyObject* VM::asList(PyObject* it){ +inline PyObject* VM::py_list(PyObject* it){ auto _lock = heap.gc_scope_lock(); it = py_iter(it); List list; @@ -821,7 +826,7 @@ inline void VM::parse_int_slice(const Slice& s, int length, int& start, int& sto } } -inline i64 VM::hash(PyObject* obj){ +inline i64 VM::py_hash(PyObject* obj){ const PyTypeInfo* ti = _inst_type_info(obj); if(ti->m__hash__) return ti->m__hash__(this, obj); PyObject* ret = call_method(obj, __hash__); @@ -974,6 +979,7 @@ inline Str VM::disassemble(CodeObject_ co){ return Str(ss.str()); } +#if DEBUG_CEVAL_STEP inline void VM::_log_s_data(const char* title) { if(_main == nullptr) return; if(callstack.empty()) return; @@ -1023,6 +1029,7 @@ inline void VM::_log_s_data(const char* title) { Bytecode byte = frame->co->codes[frame->_ip]; std::cout << output << " " << OP_NAMES[byte.op] << " " << _opcode_argstr(nullptr, byte, frame->co) << std::endl; } +#endif inline void VM::init_builtin_types(){ _all_types.push_back({heap._new(Type(1), Type(0)), -1, "object", true}); @@ -1048,6 +1055,7 @@ inline void VM::init_builtin_types(){ tp_exception = _new_type_object("Exception"); tp_bytes = _new_type_object("bytes"); tp_mappingproxy = _new_type_object("mappingproxy"); + tp_dict = _new_type_object("dict"); this->None = heap._new(_new_type_object("NoneType"), {}); this->Ellipsis = heap._new(_new_type_object("ellipsis"), {}); @@ -1068,6 +1076,7 @@ inline void VM::init_builtin_types(){ builtins->attr().set("tuple", _t(tp_tuple)); builtins->attr().set("range", _t(tp_range)); builtins->attr().set("bytes", _t(tp_bytes)); + builtins->attr().set("dict", _t(tp_dict)); builtins->attr().set("StopIteration", StopIteration); builtins->attr().set("slice", _t(tp_slice)); @@ -1444,13 +1453,13 @@ inline void VM::bind__len__(Type type, i64 (*f)(VM*, PyObject*)){ } -// inline void Dict::_probe(PyObject *key, bool &ok, int &i) const{ -// ok = false; -// i = vm->hash(key) & _mask; -// while(_items[i].first != nullptr) { -// if(vm->py_equals(_items[i].first, key)) { ok = true; break; } -// i = (i + 1) & _mask; -// } -// } +inline void Dict::_probe(PyObject *key, bool &ok, int &i) const{ + ok = false; + i = vm->py_hash(key) & _mask; + while(_items[i].first != nullptr) { + if(vm->py_equals(_items[i].first, key)) { ok = true; break; } + i = (i + 1) & _mask; + } +} } // namespace pkpy \ No newline at end of file