From 43ee77e4b0a43f2b4d9c8f9d5ae3beb41056c2d4 Mon Sep 17 00:00:00 2001
From: blueloveTH <blueloveTH@foxmail.com>
Date: Thu, 21 Sep 2023 23:26:12 +0800
Subject: [PATCH] make `int` always 64-bit

---
 docs/features/differences.md |  2 +-
 docs/features/long.md        |  4 +-
 include/pocketpy/common.h    | 24 +----------
 include/pocketpy/obj.h       | 80 +++++++++++++++++++++++++-----------
 include/pocketpy/vm.h        | 53 +++++++++++-------------
 src/ceval.cpp                |  8 +++-
 src/lexer.cpp                |  4 +-
 src/pocketpy.cpp             |  4 +-
 src/vm.cpp                   | 12 ++----
 tests/99_builtin_func.py     | 27 ------------
 10 files changed, 99 insertions(+), 119 deletions(-)
diff --git a/docs/features/differences.md b/docs/features/differences.md
index b39ba772..c55c912a 100644
--- a/docs/features/differences.md
+++ b/docs/features/differences.md
@@ -35,7 +35,7 @@ The easiest way to test a feature is to [try it on your browser](https://pocketp
 2. When a generator is exhausted, `StopIteration` is returned instead of raised.
 3. `++i` and `--j` is an increment/decrement statement, not an expression.
 4. `int` does not derive from `bool`.
-5. `int` is not of unlimited precision. In 32 bit system, `int` and `float` is 30 bit; in 64 bit system, they are both 62 bit. You can use `long` type explicitly for arbitrary sized integers.
+5. `int` is 64-bit. You can use `long` type explicitly for arbitrary sized integers.
 6. `__ne__` is not required. Define `__eq__` is enough.
 7. Raw string cannot have boundary quotes in it, even escaped. See [#55](https://github.com/blueloveTH/pocketpy/issues/55).
 8. In a starred unpacked assignment, e.g. `a, b, *c = x`, the starred variable can only be presented in the last position. `a, *b, c = x` is not supported.
diff --git a/docs/features/long.md b/docs/features/long.md
index 0ca6e58c..62827e10 100644
--- a/docs/features/long.md
+++ b/docs/features/long.md
@@ -3,9 +3,7 @@ icon: dot
 title: Arbitrary Sized Integers
 ---
 
-Unlike cpython, pkpy's `int` is of limited precision.
-In 32 bit platforms, it is 30 bit;
-in 64 bit platforms, it is 62 bit.
+Unlike cpython, pkpy's `int` is of limited precision (64-bit).
 
 For arbitrary sized integers, we provide a builtin `long` type, just like python2's `long`.
 `long` is implemented via pure python in [_long.py](https://github.com/blueloveTH/pocketpy/blob/main/python/_long.py).
diff --git a/include/pocketpy/common.h b/include/pocketpy/common.h
index eb13dbbf..f783c888 100644
--- a/include/pocketpy/common.h
+++ b/include/pocketpy/common.h
@@ -74,8 +74,6 @@ struct NumberTraits<4> {
 	using int_t = int32_t;
 	using float_t = float;
 
-	template<typename... Args>
-	static int_t stoi(Args&&... args) { return std::stoi(std::forward<Args>(args)...); }
 	template<typename... Args>
 	static float_t stof(Args&&... args) { return std::stof(std::forward<Args>(args)...); }
 
@@ -89,8 +87,6 @@ struct NumberTraits<8> {
 	using int_t = int64_t;
 	using float_t = double;
 
-	template<typename... Args>
-	static int_t stoi(Args&&... args) { return std::stoll(std::forward<Args>(args)...); }
 	template<typename... Args>
 	static float_t stof(Args&&... args) { return std::stod(std::forward<Args>(args)...); }
 
@@ -100,10 +96,10 @@ struct NumberTraits<8> {
 };
 
 using Number = NumberTraits<sizeof(void*)>;
-using i64 = Number::int_t;
+using i64 = int64_t;
 using f64 = Number::float_t;
 
-static_assert(sizeof(i64) == sizeof(void*));
+static_assert(sizeof(i64) == 8);
 static_assert(sizeof(f64) == sizeof(void*));
 static_assert(std::numeric_limits<f64>::is_iec559);
 
@@ -136,22 +132,6 @@ struct Type {
 
 struct PyObject;
 #define PK_BITS(p) (reinterpret_cast<i64>(p))
-inline bool is_tagged(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) != 0b00; }
-inline bool is_int(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b01; }
-inline bool is_float(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b10; }
-inline bool is_special(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b11; }
-
-inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept {
-    return is_tagged(a) && is_tagged(b);
-}
-
-inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
-    return is_int(a) && is_int(b);
-}
-
-inline bool is_both_float(PyObject* a, PyObject* b) noexcept {
-	return is_float(a) && is_float(b);
-}
 
 // special singals, is_tagged() for them is true
 inline PyObject* const PY_NULL = (PyObject*)0b000011;		// tagged null
diff --git a/include/pocketpy/obj.h b/include/pocketpy/obj.h
index 74fcf0c7..be627187 100644
--- a/include/pocketpy/obj.h
+++ b/include/pocketpy/obj.h
@@ -123,6 +123,44 @@ struct PyObject{
     }
 };
 
+const int kTpIntIndex = 2;
+const int kTpFloatIndex = 3;
+
+inline bool is_tagged(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) != 0b00; }
+inline bool is_small_int(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b01; }
+inline bool is_heap_int(PyObject* p) noexcept { return !is_tagged(p) && p->type.index == kTpIntIndex; }
+inline bool is_float(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b10; }
+inline bool is_special(PyObject* p) noexcept { return (PK_BITS(p) & 0b11) == 0b11; }
+inline bool is_int(PyObject* p) noexcept { return is_small_int(p) || is_heap_int(p); }
+
+inline bool is_both_int_or_float(PyObject* a, PyObject* b) noexcept {
+    return is_tagged(a) && is_tagged(b);
+}
+
+inline bool is_both_float(PyObject* a, PyObject* b) noexcept {
+	return is_float(a) && is_float(b);
+}
+
+inline bool is_type(PyObject* obj, Type type) {
+#if PK_DEBUG_EXTRA_CHECK
+    if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr");
+    if(is_special(obj)) throw std::runtime_error("is_type() called with special object");
+#endif
+    switch(type.index){
+        case kTpIntIndex: return is_int(obj);
+        case kTpFloatIndex: return is_float(obj);
+        default: return !is_tagged(obj) && obj->type == type;
+    }
+}
+
+inline bool is_non_tagged_type(PyObject* obj, Type type) {
+#if PK_DEBUG_EXTRA_CHECK
+    if(obj == nullptr) throw std::runtime_error("is_non_tagged_type() called with nullptr");
+    if(is_special(obj)) throw std::runtime_error("is_non_tagged_type() called with special object");
+#endif
+    return !is_tagged(obj) && obj->type == type;
+}
+
 template <typename, typename=void> struct has_gc_marker : std::false_type {};
 template <typename T> struct has_gc_marker<T, std::void_t<decltype(&T::_gc_mark)>> : std::true_type {};
 
@@ -170,29 +208,6 @@ Str obj_type_name(VM* vm, Type type);
 #define OBJ_NAME(obj) PK_OBJ_GET(Str, vm->getattr(obj, __name__))
 #endif
 
-const int kTpIntIndex = 2;
-const int kTpFloatIndex = 3;
-
-inline bool is_type(PyObject* obj, Type type) {
-#if PK_DEBUG_EXTRA_CHECK
-    if(obj == nullptr) throw std::runtime_error("is_type() called with nullptr");
-    if(is_special(obj)) throw std::runtime_error("is_type() called with special object");
-#endif
-    switch(type.index){
-        case kTpIntIndex: return is_int(obj);
-        case kTpFloatIndex: return is_float(obj);
-        default: return !is_tagged(obj) && obj->type == type;
-    }
-}
-
-inline bool is_non_tagged_type(PyObject* obj, Type type) {
-#if PK_DEBUG_EXTRA_CHECK
-    if(obj == nullptr) throw std::runtime_error("is_non_tagged_type() called with nullptr");
-    if(is_special(obj)) throw std::runtime_error("is_non_tagged_type() called with special object");
-#endif
-    return !is_tagged(obj) && obj->type == type;
-}
-
 union BitsCvt {
     i64 _int;
     f64 _float;
@@ -247,6 +262,25 @@ __T _py_cast(VM* vm, PyObject* obj) {
 #define CAST_DEFAULT(T, x, default_value) (x != vm->None) ? py_cast<T>(vm, x) : (default_value)
 
 /*****************************************************************/
+template<>
+struct Py_<i64> final: PyObject {
+    i64 _value;
+    Py_(Type type, i64 val): PyObject(type), _value(val) {}
+    void _obj_gc_mark() override {}
+};
+
+inline bool try_cast_int(PyObject* obj, i64* val) noexcept {
+    if(is_small_int(obj)){
+        *val = PK_BITS(obj) >> 2;
+        return true;
+    }else if(is_heap_int(obj)){
+        *val = PK_OBJ_GET(i64, obj);
+        return true;
+    }else{
+        return false;
+    }
+}
+
 template<>
 struct Py_<List> final: PyObject {
     List _value;
diff --git a/include/pocketpy/vm.h b/include/pocketpy/vm.h
index 74c81134..89613e86 100644
--- a/include/pocketpy/vm.h
+++ b/include/pocketpy/vm.h
@@ -393,13 +393,6 @@ public:
         TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", got " + OBJ_NAME(_t(obj)).escape());
     }
 
-    void check_int(PyObject* obj){
-        if(is_int(obj)) return;
-        check_type(obj, tp_int);    // if failed, redirect to check_type to raise TypeError
-    }
-
-    void check_int_or_float(PyObject* obj);
-
     PyObject* _t(Type t){
         return _all_types[t.index].obj;
     }
@@ -487,12 +480,15 @@ DEF_NATIVE_2(StarWrapper, tp_star_wrapper)
 
 #define PY_CAST_INT(T)                                  \
 template<> inline T py_cast<T>(VM* vm, PyObject* obj){  \
-    vm->check_int(obj);                                 \
-    return (T)(PK_BITS(obj) >> 2);                         \
-}                                                       \
-template<> inline T _py_cast<T>(VM* vm, PyObject* obj){ \
-    PK_UNUSED(vm);                                      \
-    return (T)(PK_BITS(obj) >> 2);                         \
+    if(is_small_int(obj)) return (T)(PK_BITS(obj) >> 2);    \
+    if(is_heap_int(obj)) return (T)PK_OBJ_GET(i64, obj);    \
+    vm->check_type(obj, vm->tp_int);                        \
+    return 0;                                               \
+}                                                           \
+template<> inline T _py_cast<T>(VM* vm, PyObject* obj){     \
+    PK_UNUSED(vm);                                          \
+    if(is_small_int(obj)) return (T)(PK_BITS(obj) >> 2);    \
+    return (T)PK_OBJ_GET(i64, obj);                         \
 }
 
 PY_CAST_INT(char)
@@ -507,43 +503,44 @@ PY_CAST_INT(unsigned long)
 PY_CAST_INT(unsigned long long)
 
 template<> inline float py_cast<float>(VM* vm, PyObject* obj){
+    i64 bits;
     if(is_float(obj)){
-        i64 bits = PK_BITS(obj) & Number::c1;
+        bits = PK_BITS(obj) & Number::c1;
         return BitsCvt(bits)._float;
     }
-    if(is_int(obj)){
-        return (float)_py_cast<i64>(vm, obj);
-    }
-    vm->check_int_or_float(obj);       // error!
+    if(try_cast_int(obj, &bits)) return (float)bits;
+    vm->TypeError("expected 'int' or 'float', got " + OBJ_NAME(vm->_t(obj)).escape());
     return 0;
 }
 template<> inline float _py_cast<float>(VM* vm, PyObject* obj){
     return py_cast<float>(vm, obj);
 }
 template<> inline double py_cast<double>(VM* vm, PyObject* obj){
+    i64 bits;
     if(is_float(obj)){
-        i64 bits = PK_BITS(obj) & Number::c1;
+        bits = PK_BITS(obj) & Number::c1;
         return BitsCvt(bits)._float;
     }
-    if(is_int(obj)){
-        return (float)_py_cast<i64>(vm, obj);
-    }
-    vm->check_int_or_float(obj);       // error!
+    if(try_cast_int(obj, &bits)) return (float)bits;
+    vm->TypeError("expected 'int' or 'float', got " + OBJ_NAME(vm->_t(obj)).escape());
     return 0;
 }
 template<> inline double _py_cast<double>(VM* vm, PyObject* obj){
     return py_cast<double>(vm, obj);
 }
 
+const i64 kMaxSmallInt = (1ll << 28) - 1;
+const i64 kMinSmallInt = -(1ll << 28);
 
 #define PY_VAR_INT(T)                                       \
     inline PyObject* py_var(VM* vm, T _val){                \
         i64 val = static_cast<i64>(_val);                   \
-        if(((val << 2) >> 2) != val){                       \
-            vm->_error("OverflowError", std::to_string(val) + " is out of range");  \
-        }                                                                           \
-        val = (val << 2) | 0b01;                                                    \
-        return reinterpret_cast<PyObject*>(val);                                    \
+        if(val >= kMinSmallInt && val <= kMaxSmallInt){     \
+            val = (val << 2) | 0b01;                        \
+            return reinterpret_cast<PyObject*>(val);        \
+        }else{                                              \
+            return vm->heap.gcnew<i64>(vm->tp_int, val);    \
+        }                                                   \
     }
 
 PY_VAR_INT(char)
diff --git a/src/ceval.cpp b/src/ceval.cpp
index 49f26f18..2458f9a5 100644
--- a/src/ceval.cpp
+++ b/src/ceval.cpp
@@ -2,6 +2,10 @@
 
 namespace pkpy{
 
+static i64 _py_sint(PyObject* obj) noexcept {
+    return (i64)(PK_BITS(obj) >> 2);
+}
+
 PyObject* VM::_run_top_frame(){
     FrameId frame = top_frame();
     const int base_id = frame.index;
@@ -323,10 +327,10 @@ __NEXT_STEP:;
     } DISPATCH();
     /*****************************************/
 #define PREDICT_INT_OP(op)                              \
-    if(is_both_int(TOP(), SECOND())){                   \
+    if(is_small_int(TOP()) && is_small_int(SECOND())){  \
         _1 = POPX();                                    \
         _0 = TOP();                                     \
-        TOP() = VAR(_CAST(i64, _0) op _CAST(i64, _1));  \
+        TOP() = VAR(_py_sint(_0) op _py_sint(_1));      \
         DISPATCH();                                     \
     }
 
diff --git a/src/lexer.cpp b/src/lexer.cpp
index 765c4dea..d4df00a4 100644
--- a/src/lexer.cpp
+++ b/src/lexer.cpp
@@ -230,7 +230,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
                         size_t parsed;
                         char code;
                         try{
-                            code = (char)Number::stoi(hex, &parsed, 16);
+                            code = (char)std::stoi(hex, &parsed, 16);
                         }catch(...){
                             SyntaxError("invalid hex char");
                         }
@@ -289,7 +289,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
                 PK_ASSERT(base == 10);
                 add_token(TK("@num"), Number::stof(m[0], &size));
             } else {
-                add_token(TK("@num"), Number::stoi(m[0], &size, base));
+                add_token(TK("@num"), std::stoll(m[0], &size, base));
             }
             PK_ASSERT((int)size == (int)m.length());
         }catch(...){
diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp
index 1174fcdd..a6b12301 100644
--- a/src/pocketpy.cpp
+++ b/src/pocketpy.cpp
@@ -379,7 +379,7 @@ void init_builtins(VM* _vm) {
     });
 
     auto py_number_pow = [](VM* vm, PyObject* lhs_, PyObject* rhs_) {
-        if(is_both_int(lhs_, rhs_)){
+        if(is_int(lhs_) && is_int(rhs_)){
             i64 lhs = _CAST(i64, lhs_);
             i64 rhs = _CAST(i64, rhs_);
             bool flag = false;
@@ -417,7 +417,7 @@ void init_builtins(VM* _vm) {
             const Str& s = CAST(Str&, args[1]);
             try{
                 size_t parsed = 0;
-                i64 val = Number::stoi(s.str(), &parsed, base);
+                i64 val = std::stoll(s.str(), &parsed, base);
                 PK_ASSERT(parsed == s.length());
                 return VAR(val);
             }catch(...){
diff --git a/src/vm.cpp b/src/vm.cpp
index a536d451..ff034d2b 100644
--- a/src/vm.cpp
+++ b/src/vm.cpp
@@ -309,12 +309,6 @@ PyObject* VM::py_negate(PyObject* obj){
     return call_method(obj, __neg__);
 }
 
-void VM::check_int_or_float(PyObject *obj){
-    if(!is_tagged(obj)){
-        TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
-    }
-}
-
 bool VM::py_bool(PyObject* obj){
     if(is_non_tagged_type(obj, tp_bool)) return obj == True;
     if(obj == None) return false;
@@ -454,11 +448,11 @@ PyObject* VM::format(Str spec, PyObject* obj){
             if(dot == 0){
                 width = -1;
             }else{
-                width = Number::stoi(spec.substr(0, dot).str());
+                width = std::stoi(spec.substr(0, dot).str());
             }
-            precision = Number::stoi(spec.substr(dot+1).str());
+            precision = std::stoi(spec.substr(dot+1).str());
         }else{
-            width = Number::stoi(spec.str());
+            width = std::stoi(spec.str());
             precision = -1;
         }
     }catch(...){
diff --git a/tests/99_builtin_func.py b/tests/99_builtin_func.py
index d1a3ad71..adb82e9b 100644
--- a/tests/99_builtin_func.py
+++ b/tests/99_builtin_func.py
@@ -320,33 +320,6 @@ except:
     pass
 
 # /************ int ************/
-# 未完全测试准确性-----------------------------------------------
-#       172:  367:    _vm->bind_constructor<-1>("int", [](VM* vm, ArgsView args) {
-#        28:  368:        if(args.size() == 1+0) return VAR(0);
-#        28:  369:        if(args.size() == 1+1){
-#        26:  370:            if (is_type(args[1], vm->tp_float)) return VAR((i64)CAST(f64, args[1]));
-#         2:  371:            if (is_type(args[1], vm->tp_int)) return args[1];
-#         1:  372:            if (is_type(args[1], vm->tp_bool)) return VAR(_CAST(bool, args[1]) ? 1 : 0);
-#         -:  373:        }
-#         3:  374:        if(args.size() > 1+2) vm->TypeError("int() takes at most 2 arguments");
-#         3:  375:        if (is_type(args[1], vm->tp_str)) {
-#         3:  376:            int base = 10;
-#         3:  377:            if(args.size() == 1+2) base = CAST(i64, args[2]);
-#         3:  378:            const Str& s = CAST(Str&, args[1]);
-#         -:  379:            try{
-#         3:  380:                size_t parsed = 0;
-#         3:  381:                i64 val = Number::stoi(s.str(), &parsed, base);
-#         3:  382:                PK_ASSERT(parsed == s.length());
-#         3:  383:                return VAR(val);
-#         3:  384:            }catch(...){
-#     #####:  385:                vm->ValueError("invalid literal for int(): " + s.escape());
-#     #####:  386:            }
-#         3:  387:        }
-#     #####:  388:        vm->TypeError("invalid arguments for int()");
-#     #####:  389:        return vm->None;
-#        28:  390:    });
-# test int:
-
 try:
     int('asad')
     print('未能拦截错误, 在测试 int')