From 43ee77e4b0a43f2b4d9c8f9d5ae3beb41056c2d4 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 21 Sep 2023 23:26:12 +0800 Subject: [PATCH] make `int` always 64-bit --- docs/features/differences.md | 2 +- docs/features/long.md | 4 +- include/pocketpy/common.h | 24 +---------- include/pocketpy/obj.h | 80 +++++++++++++++++++++++++----------- include/pocketpy/vm.h | 53 +++++++++++------------- src/ceval.cpp | 8 +++- src/lexer.cpp | 4 +- src/pocketpy.cpp | 4 +- src/vm.cpp | 12 ++---- tests/99_builtin_func.py | 27 ------------ 10 files changed, 99 insertions(+), 119 deletions(-) diff --git a/docs/features/differences.md b/docs/features/differences.md index b39ba772..c55c912a 100644 --- a/docs/features/differences.md +++ b/docs/features/differences.md @@ -35,7 +35,7 @@ The easiest way to test a feature is to [try it on your browser](https://pocketp 2. When a generator is exhausted, `StopIteration` is returned instead of raised. 3. `++i` and `--j` is an increment/decrement statement, not an expression. 4. `int` does not derive from `bool`. -5. `int` is not of unlimited precision. In 32 bit system, `int` and `float` is 30 bit; in 64 bit system, they are both 62 bit. You can use `long` type explicitly for arbitrary sized integers. +5. `int` is 64-bit. You can use `long` type explicitly for arbitrary sized integers. 6. `__ne__` is not required. Define `__eq__` is enough. 7. Raw string cannot have boundary quotes in it, even escaped. See [#55](https://github.com/blueloveTH/pocketpy/issues/55). 8. In a starred unpacked assignment, e.g. `a, b, *c = x`, the starred variable can only be presented in the last position. `a, *b, c = x` is not supported. diff --git a/docs/features/long.md b/docs/features/long.md index 0ca6e58c..62827e10 100644 --- a/docs/features/long.md +++ b/docs/features/long.md @@ -3,9 +3,7 @@ icon: dot title: Arbitrary Sized Integers --- -Unlike cpython, pkpy's `int` is of limited precision. -In 32 bit platforms, it is 30 bit; -in 64 bit platforms, it is 62 bit. +Unlike cpython, pkpy's `int` is of limited precision (64-bit). For arbitrary sized integers, we provide a builtin `long` type, just like python2's `long`. `long` is implemented via pure python in [_long.py](https://github.com/blueloveTH/pocketpy/blob/main/python/_long.py). diff --git a/include/pocketpy/common.h b/include/pocketpy/common.h index eb13dbbf..f783c888 100644 --- a/include/pocketpy/common.h +++ b/include/pocketpy/common.h @@ -74,8 +74,6 @@ struct NumberTraits<4> { using int_t = int32_t; using float_t = float; - template - static int_t stoi(Args&&... args) { return std::stoi(std::forward(args)...); } template static float_t stof(Args&&... args) { return std::stof(std::forward(args)...); } @@ -89,8 +87,6 @@ struct NumberTraits<8> { using int_t = int64_t; using float_t = double; - template - static int_t stoi(Args&&... args) { return std::stoll(std::forward(args)...); } template static float_t stof(Args&&... args) { return std::stod(std::forward(args)...); } @@ -100,10 +96,10 @@ struct NumberTraits<8> { }; using Number = NumberTraits; -using i64 = Number::int_t; +using i64 = int64_t; using f64 = Number::float_t; -static_assert(sizeof(i64) == sizeof(void*)); +static_assert(sizeof(i64) == 8); static_assert(sizeof(f64) == sizeof(void*)); static_assert(std::numeric_limits::is_iec559); @@ -136,22 +132,6 @@ struct Type { struct PyObject; #define PK_BITS(p) (reinterpret_cast(p)) -inline bool is_tagged(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) != 0b00; } -inline bool is_int(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b01; } -inline bool is_float(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b10; } -inline bool is_special(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b11; } - -inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept { - return is_tagged(a) && is_tagged(b); -} - -inline bool is_both_int(PyObject* a, PyObject* b) noexcept { - return is_int(a) && is_int(b); -} - -inline bool is_both_float(PyObject* a, PyObject* b) noexcept { - return is_float(a) && is_float(b); -} // special singals, is_tagged() for them is true inline PyObject* const PY_NULL = (PyObject*)0b000011; // tagged null diff --git a/include/pocketpy/obj.h b/include/pocketpy/obj.h index 74fcf0c7..be627187 100644 --- a/include/pocketpy/obj.h +++ b/include/pocketpy/obj.h @@ -123,6 +123,44 @@ struct PyObject{ } }; +const int kTpIntIndex = 2; +const int kTpFloatIndex = 3; + +inline bool is_tagged(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) != 0b00; } +inline bool is_small_int(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b01; } +inline bool is_heap_int(PyObject* p) noexcept { return !is_tagged(p) && p->type.index == kTpIntIndex; } +inline bool is_float(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b10; } +inline bool is_special(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b11; } +inline bool is_int(PyObject* p) noexcept { return is_small_int(p) || is_heap_int(p); } + +inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept { + return is_tagged(a) && is_tagged(b); +} + +inline bool is_both_float(PyObject* a, PyObject* b) noexcept { + return is_float(a) && is_float(b); +} + +inline bool is_type(PyObject* obj, Type type) { +#if PK_DEBUG_EXTRA_CHECK + if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr"); + if(is_special(obj)) throw std::runtime_error("is_type() called with special object"); +#endif + switch(type.index){ + case kTpIntIndex: return is_int(obj); + case kTpFloatIndex: return is_float(obj); + default: return !is_tagged(obj) && obj->type == type; + } +} + +inline bool is_non_tagged_type(PyObject* obj, Type type) { +#if PK_DEBUG_EXTRA_CHECK + if(obj == nullptr) throw std::runtime_error("is_non_tagged_type() called with nullptr"); + if(is_special(obj)) throw std::runtime_error("is_non_tagged_type() called with special object"); +#endif + return !is_tagged(obj) && obj->type == type; +} + template struct has_gc_marker : std::false_type {}; template struct has_gc_marker> : std::true_type {}; @@ -170,29 +208,6 @@ Str obj_type_name(VM* vm, Type type); #define OBJ_NAME(obj) PK_OBJ_GET(Str, vm->getattr(obj, __name__)) #endif -const int kTpIntIndex = 2; -const int kTpFloatIndex = 3; - -inline bool is_type(PyObject* obj, Type type) { -#if PK_DEBUG_EXTRA_CHECK - if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr"); - if(is_special(obj)) throw std::runtime_error("is_type() called with special object"); -#endif - switch(type.index){ - case kTpIntIndex: return is_int(obj); - case kTpFloatIndex: return is_float(obj); - default: return !is_tagged(obj) && obj->type == type; - } -} - -inline bool is_non_tagged_type(PyObject* obj, Type type) { -#if PK_DEBUG_EXTRA_CHECK - if(obj == nullptr) throw std::runtime_error("is_non_tagged_type() called with nullptr"); - if(is_special(obj)) throw std::runtime_error("is_non_tagged_type() called with special object"); -#endif - return !is_tagged(obj) && obj->type == type; -} - union BitsCvt { i64 _int; f64 _float; @@ -247,6 +262,25 @@ __T _py_cast(VM* vm, PyObject* obj) { #define CAST_DEFAULT(T, x, default_value) (x != vm->None) ? py_cast(vm, x) : (default_value) /*****************************************************************/ +template<> +struct Py_ final: PyObject { + i64 _value; + Py_(Type type, i64 val): PyObject(type), _value(val) {} + void _obj_gc_mark() override {} +}; + +inline bool try_cast_int(PyObject* obj, i64* val) noexcept { + if(is_small_int(obj)){ + *val = PK_BITS(obj) >> 2; + return true; + }else if(is_heap_int(obj)){ + *val = PK_OBJ_GET(i64, obj); + return true; + }else{ + return false; + } +} + template<> struct Py_ final: PyObject { List _value; diff --git a/include/pocketpy/vm.h b/include/pocketpy/vm.h index 74c81134..89613e86 100644 --- a/include/pocketpy/vm.h +++ b/include/pocketpy/vm.h @@ -393,13 +393,6 @@ public: TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", got " + OBJ_NAME(_t(obj)).escape()); } - void check_int(PyObject* obj){ - if(is_int(obj)) return; - check_type(obj, tp_int); // if failed, redirect to check_type to raise TypeError - } - - void check_int_or_float(PyObject* obj); - PyObject* _t(Type t){ return _all_types[t.index].obj; } @@ -487,12 +480,15 @@ DEF_NATIVE_2(StarWrapper, tp_star_wrapper) #define PY_CAST_INT(T) \ template<> inline T py_cast(VM* vm, PyObject* obj){ \ - vm->check_int(obj); \ - return (T)(PK_BITS(obj) >> 2); \ -} \ -template<> inline T _py_cast(VM* vm, PyObject* obj){ \ - PK_UNUSED(vm); \ - return (T)(PK_BITS(obj) >> 2); \ + if(is_small_int(obj)) return (T)(PK_BITS(obj) >> 2); \ + if(is_heap_int(obj)) return (T)PK_OBJ_GET(i64, obj); \ + vm->check_type(obj, vm->tp_int); \ + return 0; \ +} \ +template<> inline T _py_cast(VM* vm, PyObject* obj){ \ + PK_UNUSED(vm); \ + if(is_small_int(obj)) return (T)(PK_BITS(obj) >> 2); \ + return (T)PK_OBJ_GET(i64, obj); \ } PY_CAST_INT(char) @@ -507,43 +503,44 @@ PY_CAST_INT(unsigned long) PY_CAST_INT(unsigned long long) template<> inline float py_cast(VM* vm, PyObject* obj){ + i64 bits; if(is_float(obj)){ - i64 bits = PK_BITS(obj) & Number::c1; + bits = PK_BITS(obj) & Number::c1; return BitsCvt(bits)._float; } - if(is_int(obj)){ - return (float)_py_cast(vm, obj); - } - vm->check_int_or_float(obj); // error! + if(try_cast_int(obj, &bits)) return (float)bits; + vm->TypeError("expected 'int' or 'float', got " + OBJ_NAME(vm->_t(obj)).escape()); return 0; } template<> inline float _py_cast(VM* vm, PyObject* obj){ return py_cast(vm, obj); } template<> inline double py_cast(VM* vm, PyObject* obj){ + i64 bits; if(is_float(obj)){ - i64 bits = PK_BITS(obj) & Number::c1; + bits = PK_BITS(obj) & Number::c1; return BitsCvt(bits)._float; } - if(is_int(obj)){ - return (float)_py_cast(vm, obj); - } - vm->check_int_or_float(obj); // error! + if(try_cast_int(obj, &bits)) return (float)bits; + vm->TypeError("expected 'int' or 'float', got " + OBJ_NAME(vm->_t(obj)).escape()); return 0; } template<> inline double _py_cast(VM* vm, PyObject* obj){ return py_cast(vm, obj); } +const i64 kMaxSmallInt = (1ll << 28) - 1; +const i64 kMinSmallInt = -(1ll << 28); #define PY_VAR_INT(T) \ inline PyObject* py_var(VM* vm, T _val){ \ i64 val = static_cast(_val); \ - if(((val << 2) >> 2) != val){ \ - vm->_error("OverflowError", std::to_string(val) + " is out of range"); \ - } \ - val = (val << 2) | 0b01; \ - return reinterpret_cast(val); \ + if(val >= kMinSmallInt && val <= kMaxSmallInt){ \ + val = (val << 2) | 0b01; \ + return reinterpret_cast(val); \ + }else{ \ + return vm->heap.gcnew(vm->tp_int, val); \ + } \ } PY_VAR_INT(char) diff --git a/src/ceval.cpp b/src/ceval.cpp index 49f26f18..2458f9a5 100644 --- a/src/ceval.cpp +++ b/src/ceval.cpp @@ -2,6 +2,10 @@ namespace pkpy{ +static i64 _py_sint(PyObject* obj) noexcept { + return (i64)(PK_BITS(obj) >> 2); +} + PyObject* VM::_run_top_frame(){ FrameId frame = top_frame(); const int base_id = frame.index; @@ -323,10 +327,10 @@ __NEXT_STEP:; } DISPATCH(); /*****************************************/ #define PREDICT_INT_OP(op) \ - if(is_both_int(TOP(), SECOND())){ \ + if(is_small_int(TOP()) && is_small_int(SECOND())){ \ _1 = POPX(); \ _0 = TOP(); \ - TOP() = VAR(_CAST(i64, _0) op _CAST(i64, _1)); \ + TOP() = VAR(_py_sint(_0) op _py_sint(_1)); \ DISPATCH(); \ } diff --git a/src/lexer.cpp b/src/lexer.cpp index 765c4dea..d4df00a4 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -230,7 +230,7 @@ static bool is_unicode_Lo_char(uint32_t c) { size_t parsed; char code; try{ - code = (char)Number::stoi(hex, &parsed, 16); + code = (char)std::stoi(hex, &parsed, 16); }catch(...){ SyntaxError("invalid hex char"); } @@ -289,7 +289,7 @@ static bool is_unicode_Lo_char(uint32_t c) { PK_ASSERT(base == 10); add_token(TK("@num"), Number::stof(m[0], &size)); } else { - add_token(TK("@num"), Number::stoi(m[0], &size, base)); + add_token(TK("@num"), std::stoll(m[0], &size, base)); } PK_ASSERT((int)size == (int)m.length()); }catch(...){ diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 1174fcdd..a6b12301 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -379,7 +379,7 @@ void init_builtins(VM* _vm) { }); auto py_number_pow = [](VM* vm, PyObject* lhs_, PyObject* rhs_) { - if(is_both_int(lhs_, rhs_)){ + if(is_int(lhs_) && is_int(rhs_)){ i64 lhs = _CAST(i64, lhs_); i64 rhs = _CAST(i64, rhs_); bool flag = false; @@ -417,7 +417,7 @@ void init_builtins(VM* _vm) { const Str& s = CAST(Str&, args[1]); try{ size_t parsed = 0; - i64 val = Number::stoi(s.str(), &parsed, base); + i64 val = std::stoll(s.str(), &parsed, base); PK_ASSERT(parsed == s.length()); return VAR(val); }catch(...){ diff --git a/src/vm.cpp b/src/vm.cpp index a536d451..ff034d2b 100644 --- a/src/vm.cpp +++ b/src/vm.cpp @@ -309,12 +309,6 @@ PyObject* VM::py_negate(PyObject* obj){ return call_method(obj, __neg__); } -void VM::check_int_or_float(PyObject *obj){ - if(!is_tagged(obj)){ - TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape()); - } -} - bool VM::py_bool(PyObject* obj){ if(is_non_tagged_type(obj, tp_bool)) return obj == True; if(obj == None) return false; @@ -454,11 +448,11 @@ PyObject* VM::format(Str spec, PyObject* obj){ if(dot == 0){ width = -1; }else{ - width = Number::stoi(spec.substr(0, dot).str()); + width = std::stoi(spec.substr(0, dot).str()); } - precision = Number::stoi(spec.substr(dot+1).str()); + precision = std::stoi(spec.substr(dot+1).str()); }else{ - width = Number::stoi(spec.str()); + width = std::stoi(spec.str()); precision = -1; } }catch(...){ diff --git a/tests/99_builtin_func.py b/tests/99_builtin_func.py index d1a3ad71..adb82e9b 100644 --- a/tests/99_builtin_func.py +++ b/tests/99_builtin_func.py @@ -320,33 +320,6 @@ except: pass # /************ int ************/ -# 未完全测试准确性----------------------------------------------- -# 172: 367: _vm->bind_constructor<-1>("int", [](VM* vm, ArgsView args) { -# 28: 368: if(args.size() == 1+0) return VAR(0); -# 28: 369: if(args.size() == 1+1){ -# 26: 370: if (is_type(args[1], vm->tp_float)) return VAR((i64)CAST(f64, args[1])); -# 2: 371: if (is_type(args[1], vm->tp_int)) return args[1]; -# 1: 372: if (is_type(args[1], vm->tp_bool)) return VAR(_CAST(bool, args[1]) ? 1 : 0); -# -: 373: } -# 3: 374: if(args.size() > 1+2) vm->TypeError("int() takes at most 2 arguments"); -# 3: 375: if (is_type(args[1], vm->tp_str)) { -# 3: 376: int base = 10; -# 3: 377: if(args.size() == 1+2) base = CAST(i64, args[2]); -# 3: 378: const Str& s = CAST(Str&, args[1]); -# -: 379: try{ -# 3: 380: size_t parsed = 0; -# 3: 381: i64 val = Number::stoi(s.str(), &parsed, base); -# 3: 382: PK_ASSERT(parsed == s.length()); -# 3: 383: return VAR(val); -# 3: 384: }catch(...){ -# #####: 385: vm->ValueError("invalid literal for int(): " + s.escape()); -# #####: 386: } -# 3: 387: } -# #####: 388: vm->TypeError("invalid arguments for int()"); -# #####: 389: return vm->None; -# 28: 390: }); -# test int: - try: int('asad') print('未能拦截错误, 在测试 int')