make int always 64-bit

2025-12-06 18:20:17 +00:00 · 2023-09-21 23:26:12 +08:00 · 2023-09-21 23:26:12 +08:00 · 43ee77e4b0
commit 43ee77e4b0
parent ef6b3db24e
10 changed files with 99 additions and 119 deletions
--- a/docs/features/differences.md
+++ b/docs/features/differences.md
@ -35,7 +35,7 @@ The easiest way to test a feature is to [try it on your browser](https://pocketp
 2. When a generator is exhausted, `StopIteration` is returned instead of raised.
 3. `++i` and `--j` is an increment/decrement statement, not an expression.
 4. `int` does not derive from `bool`.
-5. `int` is not of unlimited precision. In 32 bit system, `int` and `float` is 30 bit; in 64 bit system, they are both 62 bit. You can use `long` type explicitly for arbitrary sized integers.
+5. `int` is 64-bit. You can use `long` type explicitly for arbitrary sized integers.
 6. `__ne__` is not required. Define `__eq__` is enough.
 7. Raw string cannot have boundary quotes in it, even escaped. See [#55](https://github.com/blueloveTH/pocketpy/issues/55).
 8. In a starred unpacked assignment, e.g. `a, b, *c = x`, the starred variable can only be presented in the last position. `a, *b, c = x` is not supported.
--- a/docs/features/long.md
+++ b/docs/features/long.md
@ -3,9 +3,7 @@ icon: dot
 title: Arbitrary Sized Integers
 ---

-Unlike cpython, pkpy's `int` is of limited precision.
-In 32 bit platforms, it is 30 bit;
-in 64 bit platforms, it is 62 bit.
+Unlike cpython, pkpy's `int` is of limited precision (64-bit).

 For arbitrary sized integers, we provide a builtin `long` type, just like python2's `long`.
 `long` is implemented via pure python in [_long.py](https://github.com/blueloveTH/pocketpy/blob/main/python/_long.py).
--- a/include/pocketpy/common.h
+++ b/include/pocketpy/common.h
@ -74,8 +74,6 @@ struct NumberTraits<4> {
 	using int_t = int32_t;
 	using float_t = float;

-	template<typename... Args>
-	static int_t stoi(Args&&... args) { return std::stoi(std::forward<Args>(args)...); }
 	template<typename... Args>
 	static float_t stof(Args&&... args) { return std::stof(std::forward<Args>(args)...); }

@ -89,8 +87,6 @@ struct NumberTraits<8> {
 	using int_t = int64_t;
 	using float_t = double;

-	template<typename... Args>
-	static int_t stoi(Args&&... args) { return std::stoll(std::forward<Args>(args)...); }
 	template<typename... Args>
 	static float_t stof(Args&&... args) { return std::stod(std::forward<Args>(args)...); }

@ -100,10 +96,10 @@ struct NumberTraits<8> {
 };

 using Number = NumberTraits<sizeof(void*)>;
-using i64 = Number::int_t;
+using i64 = int64_t;
 using f64 = Number::float_t;

-static_assert(sizeof(i64) == sizeof(void*));
+static_assert(sizeof(i64) == 8);
 static_assert(sizeof(f64) == sizeof(void*));
 static_assert(std::numeric_limits<f64>::is_iec559);

@ -136,22 +132,6 @@ struct Type {

 struct PyObject;
 #define PK_BITS(p) (reinterpret_cast<i64>(p))
-inline bool is_tagged(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) != 0b00; }
-inline bool is_int(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b01; }
-inline bool is_float(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b10; }
-inline bool is_special(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b11; }
-
-inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept {
-    return is_tagged(a) && is_tagged(b);
-}
-
-inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
-    return is_int(a) && is_int(b);
-}
-
-inline bool is_both_float(PyObject* a, PyObject* b) noexcept {
-	return is_float(a) && is_float(b);
-}

 // special singals, is_tagged() for them is true
 inline PyObject* const PY_NULL = (PyObject*)0b000011;		// tagged null
--- a/include/pocketpy/obj.h
+++ b/include/pocketpy/obj.h
@ -123,6 +123,44 @@ struct PyObject{
    }
 };

+const int kTpIntIndex = 2;
+const int kTpFloatIndex = 3;
+
+inline bool is_tagged(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) != 0b00; }
+inline bool is_small_int(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b01; }
+inline bool is_heap_int(PyObject* p) noexcept { return !is_tagged(p) && p->type.index == kTpIntIndex; }
+inline bool is_float(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b10; }
+inline bool is_special(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b11; }
+inline bool is_int(PyObject* p) noexcept { return is_small_int(p) || is_heap_int(p); }
+
+inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept {
+    return is_tagged(a) && is_tagged(b);
+}
+
+inline bool is_both_float(PyObject* a, PyObject* b) noexcept {
+	return is_float(a) && is_float(b);
+}
+
+inline bool is_type(PyObject* obj, Type type) {
+#if PK_DEBUG_EXTRA_CHECK
+    if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr");
+    if(is_special(obj)) throw std::runtime_error("is_type() called with special object");
+#endif
+    switch(type.index){
+        case kTpIntIndex: return is_int(obj);
+        case kTpFloatIndex: return is_float(obj);
+        default: return !is_tagged(obj) && obj->type == type;
+    }
+}
+
+inline bool is_non_tagged_type(PyObject* obj, Type type) {
+#if PK_DEBUG_EXTRA_CHECK
+    if(obj == nullptr) throw std::runtime_error("is_non_tagged_type() called with nullptr");
+    if(is_special(obj)) throw std::runtime_error("is_non_tagged_type() called with special object");
+#endif
+    return !is_tagged(obj) && obj->type == type;
+}
+
 template <typename, typename=void> struct has_gc_marker : std::false_type {};
 template <typename T> struct has_gc_marker<T, std::void_t<decltype(&T::_gc_mark)>> : std::true_type {};

@ -170,29 +208,6 @@ Str obj_type_name(VM* vm, Type type);
 #define OBJ_NAME(obj) PK_OBJ_GET(Str, vm->getattr(obj, __name__))
 #endif

-const int kTpIntIndex = 2;
-const int kTpFloatIndex = 3;
-
-inline bool is_type(PyObject* obj, Type type) {
-#if PK_DEBUG_EXTRA_CHECK
-    if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr");
-    if(is_special(obj)) throw std::runtime_error("is_type() called with special object");
-#endif
-    switch(type.index){
-        case kTpIntIndex: return is_int(obj);
-        case kTpFloatIndex: return is_float(obj);
-        default: return !is_tagged(obj) && obj->type == type;
-    }
-}
-
-inline bool is_non_tagged_type(PyObject* obj, Type type) {
-#if PK_DEBUG_EXTRA_CHECK
-    if(obj == nullptr) throw std::runtime_error("is_non_tagged_type() called with nullptr");
-    if(is_special(obj)) throw std::runtime_error("is_non_tagged_type() called with special object");
-#endif
-    return !is_tagged(obj) && obj->type == type;
-}
-
 union BitsCvt {
    i64 _int;
    f64 _float;
@ -247,6 +262,25 @@ __T _py_cast(VM* vm, PyObject* obj) {
 #define CAST_DEFAULT(T, x, default_value) (x != vm->None) ? py_cast<T>(vm, x) : (default_value)

 /*****************************************************************/
+template<>
+struct Py_<i64> final: PyObject {
+    i64 _value;
+    Py_(Type type, i64 val): PyObject(type), _value(val) {}
+    void _obj_gc_mark() override {}
+};
+
+inline bool try_cast_int(PyObject* obj, i64* val) noexcept {
+    if(is_small_int(obj)){
+        *val = PK_BITS(obj) >> 2;
+        return true;
+    }else if(is_heap_int(obj)){
+        *val = PK_OBJ_GET(i64, obj);
+        return true;
+    }else{
+        return false;
+    }
+}
+
 template<>
 struct Py_<List> final: PyObject {
    List _value;
--- a/include/pocketpy/vm.h
+++ b/include/pocketpy/vm.h
@ -393,13 +393,6 @@ public:
        TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", got " + OBJ_NAME(_t(obj)).escape());
    }

-    void check_int(PyObject* obj){
-        if(is_int(obj)) return;
-        check_type(obj, tp_int);    // if failed, redirect to check_type to raise TypeError
-    }
-
-    void check_int_or_float(PyObject* obj);
-
    PyObject* _t(Type t){
        return _all_types[t.index].obj;
    }
@ -487,12 +480,15 @@ DEF_NATIVE_2(StarWrapper, tp_star_wrapper)

 #define PY_CAST_INT(T)                                  \
 template<> inline T py_cast<T>(VM* vm, PyObject* obj){  \
-    vm->check_int(obj);                                 \
-    return (T)(PK_BITS(obj) >> 2);                         \
+    if(is_small_int(obj)) return (T)(PK_BITS(obj) >> 2);    \
+    if(is_heap_int(obj)) return (T)PK_OBJ_GET(i64, obj);    \
+    vm->check_type(obj, vm->tp_int);                        \
+    return 0;                                               \
 }                                                           \
 template<> inline T _py_cast<T>(VM* vm, PyObject* obj){     \
    PK_UNUSED(vm);                                          \
-    return (T)(PK_BITS(obj) >> 2);                         \
+    if(is_small_int(obj)) return (T)(PK_BITS(obj) >> 2);    \
+    return (T)PK_OBJ_GET(i64, obj);                         \
 }

 PY_CAST_INT(char)
@ -507,43 +503,44 @@ PY_CAST_INT(unsigned long)
 PY_CAST_INT(unsigned long long)

 template<> inline float py_cast<float>(VM* vm, PyObject* obj){
+    i64 bits;
    if(is_float(obj)){
-        i64 bits = PK_BITS(obj) & Number::c1;
+        bits = PK_BITS(obj) & Number::c1;
        return BitsCvt(bits)._float;
    }
-    if(is_int(obj)){
-        return (float)_py_cast<i64>(vm, obj);
-    }
-    vm->check_int_or_float(obj);       // error!
+    if(try_cast_int(obj, &bits)) return (float)bits;
+    vm->TypeError("expected 'int' or 'float', got " + OBJ_NAME(vm->_t(obj)).escape());
    return 0;
 }
 template<> inline float _py_cast<float>(VM* vm, PyObject* obj){
    return py_cast<float>(vm, obj);
 }
 template<> inline double py_cast<double>(VM* vm, PyObject* obj){
+    i64 bits;
    if(is_float(obj)){
-        i64 bits = PK_BITS(obj) & Number::c1;
+        bits = PK_BITS(obj) & Number::c1;
        return BitsCvt(bits)._float;
    }
-    if(is_int(obj)){
-        return (float)_py_cast<i64>(vm, obj);
-    }
-    vm->check_int_or_float(obj);       // error!
+    if(try_cast_int(obj, &bits)) return (float)bits;
+    vm->TypeError("expected 'int' or 'float', got " + OBJ_NAME(vm->_t(obj)).escape());
    return 0;
 }
 template<> inline double _py_cast<double>(VM* vm, PyObject* obj){
    return py_cast<double>(vm, obj);
 }

+const i64 kMaxSmallInt = (1ll << 28) - 1;
+const i64 kMinSmallInt = -(1ll << 28);

 #define PY_VAR_INT(T)                                       \
    inline PyObject* py_var(VM* vm, T _val){                \
        i64 val = static_cast<i64>(_val);                   \
-        if(((val << 2) >> 2) != val){                       \
-            vm->_error("OverflowError", std::to_string(val) + " is out of range");  \
-        }                                                                           \
+        if(val >= kMinSmallInt && val <= kMaxSmallInt){     \
            val = (val << 2) | 0b01;                        \
            return reinterpret_cast<PyObject*>(val);        \
+        }else{                                              \
+            return vm->heap.gcnew<i64>(vm->tp_int, val);    \
+        }                                                   \
    }

 PY_VAR_INT(char)
--- a/src/ceval.cpp
+++ b/src/ceval.cpp
@ -2,6 +2,10 @@

 namespace pkpy{

+static i64 _py_sint(PyObject* obj) noexcept {
+    return (i64)(PK_BITS(obj) >> 2);
+}
+
 PyObject* VM::_run_top_frame(){
    FrameId frame = top_frame();
    const int base_id = frame.index;
@ -323,10 +327,10 @@ __NEXT_STEP:;
    } DISPATCH();
    /*****************************************/
 #define PREDICT_INT_OP(op)                              \
-    if(is_both_int(TOP(), SECOND())){                   \
+    if(is_small_int(TOP()) && is_small_int(SECOND())){  \
        _1 = POPX();                                    \
        _0 = TOP();                                     \
-        TOP() = VAR(_CAST(i64, _0) op _CAST(i64, _1));  \
+        TOP() = VAR(_py_sint(_0) op _py_sint(_1));      \
        DISPATCH();                                     \
    }

--- a/src/lexer.cpp
+++ b/src/lexer.cpp
@ -230,7 +230,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
                        size_t parsed;
                        char code;
                        try{
-                            code = (char)Number::stoi(hex, &parsed, 16);
+                            code = (char)std::stoi(hex, &parsed, 16);
                        }catch(...){
                            SyntaxError("invalid hex char");
                        }
@ -289,7 +289,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
                PK_ASSERT(base == 10);
                add_token(TK("@num"), Number::stof(m[0], &size));
            } else {
-                add_token(TK("@num"), Number::stoi(m[0], &size, base));
+                add_token(TK("@num"), std::stoll(m[0], &size, base));
            }
            PK_ASSERT((int)size == (int)m.length());
        }catch(...){
--- a/src/pocketpy.cpp
+++ b/src/pocketpy.cpp
@ -379,7 +379,7 @@ void init_builtins(VM* _vm) {
    });

    auto py_number_pow = [](VM* vm, PyObject* lhs_, PyObject* rhs_) {
-        if(is_both_int(lhs_, rhs_)){
+        if(is_int(lhs_) && is_int(rhs_)){
            i64 lhs = _CAST(i64, lhs_);
            i64 rhs = _CAST(i64, rhs_);
            bool flag = false;
@ -417,7 +417,7 @@ void init_builtins(VM* _vm) {
            const Str& s = CAST(Str&, args[1]);
            try{
                size_t parsed = 0;
-                i64 val = Number::stoi(s.str(), &parsed, base);
+                i64 val = std::stoll(s.str(), &parsed, base);
                PK_ASSERT(parsed == s.length());
                return VAR(val);
            }catch(...){
--- a/src/vm.cpp
+++ b/src/vm.cpp
@ -309,12 +309,6 @@ PyObject* VM::py_negate(PyObject* obj){
    return call_method(obj, __neg__);
 }

-void VM::check_int_or_float(PyObject *obj){
-    if(!is_tagged(obj)){
-        TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
-    }
-}
-
 bool VM::py_bool(PyObject* obj){
    if(is_non_tagged_type(obj, tp_bool)) return obj == True;
    if(obj == None) return false;
@ -454,11 +448,11 @@ PyObject* VM::format(Str spec, PyObject* obj){
            if(dot == 0){
                width = -1;
            }else{
-                width = Number::stoi(spec.substr(0, dot).str());
+                width = std::stoi(spec.substr(0, dot).str());
            }
-            precision = Number::stoi(spec.substr(dot+1).str());
+            precision = std::stoi(spec.substr(dot+1).str());
        }else{
-            width = Number::stoi(spec.str());
+            width = std::stoi(spec.str());
            precision = -1;
        }
    }catch(...){
--- a/tests/99_builtin_func.py
+++ b/tests/99_builtin_func.py
@ -320,33 +320,6 @@ except:
    pass

 # /************ int ************/
-# 未完全测试准确性-----------------------------------------------
-#       172:  367:    _vm->bind_constructor<-1>("int", [](VM* vm, ArgsView args) {
-#        28:  368:        if(args.size() == 1+0) return VAR(0);
-#        28:  369:        if(args.size() == 1+1){
-#        26:  370:            if (is_type(args[1], vm->tp_float)) return VAR((i64)CAST(f64, args[1]));
-#         2:  371:            if (is_type(args[1], vm->tp_int)) return args[1];
-#         1:  372:            if (is_type(args[1], vm->tp_bool)) return VAR(_CAST(bool, args[1]) ? 1 : 0);
-#         -:  373:        }
-#         3:  374:        if(args.size() > 1+2) vm->TypeError("int() takes at most 2 arguments");
-#         3:  375:        if (is_type(args[1], vm->tp_str)) {
-#         3:  376:            int base = 10;
-#         3:  377:            if(args.size() == 1+2) base = CAST(i64, args[2]);
-#         3:  378:            const Str& s = CAST(Str&, args[1]);
-#         -:  379:            try{
-#         3:  380:                size_t parsed = 0;
-#         3:  381:                i64 val = Number::stoi(s.str(), &parsed, base);
-#         3:  382:                PK_ASSERT(parsed == s.length());
-#         3:  383:                return VAR(val);
-#         3:  384:            }catch(...){
-#     #####:  385:                vm->ValueError("invalid literal for int(): " + s.escape());
-#     #####:  386:            }
-#         3:  387:        }
-#     #####:  388:        vm->TypeError("invalid arguments for int()");
-#     #####:  389:        return vm->None;
-#        28:  390:    });
-# test int:
-
 try:
    int('asad')
    print('未能拦截错误, 在测试 int')