diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index f2efbcf7..9c288c61 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -29,8 +29,10 @@ typedef struct c11_bytes{ } c11_bytes; int c11_sv__cmp(c11_sv self, c11_sv other); -int c11_sv__cmp2(c11_sv self, const char* other, int size); -int c11_sv__cmp3(c11_sv self, const char* other); +int c11_sv__cmp2(c11_sv self, const char* other); + +bool c11__streq(const char* a, const char* b); +bool c11__sveq(c11_sv a, const char* b); c11_string* c11_string__new(const char* data); c11_string* c11_string__new2(const char* data, int size); @@ -66,6 +68,14 @@ int c11__byte_index_to_unicode(const char* data, int n); bool c11__is_unicode_Lo_char(int c); int c11__u8_header(unsigned char c, bool suppress); +typedef enum IntParsingResult{ + IntParsing_SUCCESS, + IntParsing_FAILURE, + IntParsing_OVERFLOW, +} IntParsingResult; + +IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base); + #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h index 95b665f2..8369561d 100644 --- a/include/pocketpy/compiler/lexer.h +++ b/include/pocketpy/compiler/lexer.h @@ -87,14 +87,6 @@ enum Precedence { PREC_HIGHEST, }; -typedef enum IntParsingResult{ - IntParsing_SUCCESS, - IntParsing_FAILURE, - IntParsing_OVERFLOW, -} IntParsingResult; - -IntParsingResult parse_uint(c11_sv text, int64_t* out, int base); - typedef struct Error Error; typedef c11_array pk_TokenArray; diff --git a/include/pocketpy/pocketpy.h b/include/pocketpy/pocketpy.h index bdfef599..c95ea60b 100644 --- a/include/pocketpy/pocketpy.h +++ b/include/pocketpy/pocketpy.h @@ -311,6 +311,10 @@ py_GlobalRef py_tpobject(py_Type type); /// Get the type name. const char* py_tpname(py_Type type); +/// Check if the object is an instance of the given type. +/// Re +bool py_checktype(const py_Ref self, py_Type type); + /// Python favored string formatting. /// %d: int /// %i: py_i64 (int64_t) diff --git a/src/common/str.c b/src/common/str.c index 6a88a34f..c760ad64 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -8,7 +8,6 @@ #include #include - c11_string* c11_string__new(const char* data) { return c11_string__new2(data, strlen(data)); } c11_string* c11_string__new2(const char* data, int size) { @@ -35,13 +34,9 @@ c11_string* c11_string__copy(c11_string* self) { return retval; } -void c11_string__delete(c11_string* self) { - free(self); -} +void c11_string__delete(c11_string* self) { free(self); } -c11_sv c11_string__sv(c11_string* self) { - return (c11_sv){self->data, self->size}; -} +c11_sv c11_string__sv(c11_string* self) { return (c11_sv){self->data, self->size}; } c11_string* c11_string__replace(c11_string* self, char old, char new_) { c11_string* retval = c11_string__copy(self); @@ -119,9 +114,7 @@ void c11_sv__upper(c11_sv sv, c11_vector* buf) { } } -c11_sv c11_sv__slice(c11_sv sv, int start) { - return c11_sv__slice2(sv, start, sv.size); -} +c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); } c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) { if(stop < start) stop = start; @@ -233,19 +226,25 @@ int c11__byte_index_to_unicode(const char* data, int n) { ////////////// int c11_sv__cmp(c11_sv self, c11_sv other) { - return c11_sv__cmp2(self, other.data, other.size); + int res = strncmp(self.data, other.data, PK_MIN(self.size, other.size)); + if(res != 0) return res; + return self.size - other.size; } -int c11_sv__cmp2(c11_sv self, const char* other, int size) { +int c11_sv__cmp2(c11_sv self, const char* other) { + int size = strlen(other); int res = strncmp(self.data, other, PK_MIN(self.size, size)); if(res != 0) return res; return self.size - size; } -int c11_sv__cmp3(c11_sv self, const char* other) { - return c11_sv__cmp2(self, other, strlen(other)); -} +bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; } +bool c11__sveq(c11_sv a, const char* b) { + int size = strlen(b); + if(a.size != size) return false; + return memcmp(a.data, b, size) == 0; +} // clang-format off static const int kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; @@ -273,4 +272,95 @@ int c11__u8_header(unsigned char c, bool suppress) { if((c & 0b11111110) == 0b11111100) return 6; if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n") return 0; +} + +IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) { + *out = 0; + + c11_sv prefix = {.data = text.data, .size = PK_MIN(2, text.size)}; + if(base == -1) { + if(c11__sveq(prefix, "0b")) + base = 2; + else if(c11__sveq(prefix, "0o")) + base = 8; + else if(c11__sveq(prefix, "0x")) + base = 16; + else + base = 10; + } + + if(base == 10) { + // 10-base 12334 + if(text.size == 0) return IntParsing_FAILURE; + for(int i = 0; i < text.size; i++) { + char c = text.data[i]; + if(c >= '0' && c <= '9') { + *out = (*out * 10) + (c - '0'); + } else { + return IntParsing_FAILURE; + } + } + // "9223372036854775807".__len__() == 19 + if(text.size > 19) return IntParsing_OVERFLOW; + return IntParsing_SUCCESS; + } else if(base == 2) { + // 2-base 0b101010 + if(c11__sveq(prefix, "0b")) { + // text.remove_prefix(2); + text = (c11_sv){text.data + 2, text.size - 2}; + } + if(text.size == 0) return IntParsing_FAILURE; + for(int i = 0; i < text.size; i++) { + char c = text.data[i]; + if(c == '0' || c == '1') { + *out = (*out << 1) | (c - '0'); + } else { + return IntParsing_FAILURE; + } + } + // "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63 + if(text.size > 63) return IntParsing_OVERFLOW; + return IntParsing_SUCCESS; + } else if(base == 8) { + // 8-base 0o123 + if(c11__sveq(prefix, "0o")) { + // text.remove_prefix(2); + text = (c11_sv){text.data + 2, text.size - 2}; + } + if(text.size == 0) return IntParsing_FAILURE; + for(int i = 0; i < text.size; i++) { + char c = text.data[i]; + if(c >= '0' && c <= '7') { + *out = (*out << 3) | (c - '0'); + } else { + return IntParsing_FAILURE; + } + } + // "777777777777777777777".__len__() == 21 + if(text.size > 21) return IntParsing_OVERFLOW; + return IntParsing_SUCCESS; + } else if(base == 16) { + // 16-base 0x123 + if(c11__sveq(prefix, "0x")) { + // text.remove_prefix(2); + text = (c11_sv){text.data + 2, text.size - 2}; + } + if(text.size == 0) return IntParsing_FAILURE; + for(int i = 0; i < text.size; i++) { + char c = text.data[i]; + if(c >= '0' && c <= '9') { + *out = (*out << 4) | (c - '0'); + } else if(c >= 'a' && c <= 'f') { + *out = (*out << 4) | (c - 'a' + 10); + } else if(c >= 'A' && c <= 'F') { + *out = (*out << 4) | (c - 'A' + 10); + } else { + return IntParsing_FAILURE; + } + } + // "7fffffffffffffff".__len__() == 16 + if(text.size > 16) return IntParsing_OVERFLOW; + return IntParsing_SUCCESS; + } + return IntParsing_FAILURE; } \ No newline at end of file diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 2cb1a704..ae4a2e7a 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -246,11 +246,11 @@ static Error* eat_name(pk_Lexer* self){ c11_sv name = {self->token_start, length}; if(self->src->mode == JSON_MODE) { - if(c11_sv__cmp3(name, "true") == 0) { + if(c11__sveq(name, "true")) { add_token(self, TK_TRUE); - } else if(c11_sv__cmp3(name, "false") == 0) { + } else if(c11__sveq(name, "false")) { add_token(self, TK_FALSE); - } else if(c11_sv__cmp3(name, "null") == 0) { + } else if(c11__sveq(name, "null")) { add_token(self, TK_NONE); } else { return SyntaxError("invalid JSON token"); @@ -260,12 +260,12 @@ static Error* eat_name(pk_Lexer* self){ const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE; int KW_COUNT = TK__COUNT__ - TK_FALSE; - #define less(a, b) (c11_sv__cmp3(b, a) > 0) + #define less(a, b) (c11_sv__cmp2(b, a) > 0) int out; c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out); #undef less - if(out != KW_COUNT && c11_sv__cmp3(name, KW_BEGIN[out]) == 0) { + if(out != KW_COUNT && c11__sveq(name, KW_BEGIN[out])) { add_token(self, (TokenIndex)(out + TK_FALSE)); } else { add_token(self, TK_ID); @@ -372,7 +372,7 @@ static Error* eat_number(pk_Lexer* self){ } // try integer TokenValue value = {.index = TokenValue_I64}; - switch(parse_uint(text, &value._i64, -1)) { + switch(c11__parse_uint(text, &value._i64, -1)) { case IntParsing_SUCCESS: add_token_with_value(self, TK_NUM, value); return NULL; @@ -554,7 +554,7 @@ static Error* from_precompiled(pk_Lexer* self) { deserializer.curr += 5; // skip "pkpy:" c11_sv version = TokenDeserializer__read_string(&deserializer, '\n'); - if(c11_sv__cmp3(version, PK_VERSION) != 0) { + if(c11_sv__cmp2(version, PK_VERSION) != 0) { return SyntaxError("precompiled version mismatch"); } if(TokenDeserializer__read_uint(&deserializer, '\n') != (int64_t)self->src->mode){ @@ -618,97 +618,6 @@ static Error* from_precompiled(pk_Lexer* self) { return NULL; } -IntParsingResult parse_uint(c11_sv text, int64_t* out, int base) { - *out = 0; - - c11_sv prefix = {.data = text.data, .size = PK_MIN(2, text.size)}; - if(base == -1) { - if(c11_sv__cmp3(prefix, "0b") == 0) - base = 2; - else if(c11_sv__cmp3(prefix, "0o") == 0) - base = 8; - else if(c11_sv__cmp3(prefix, "0x") == 0) - base = 16; - else - base = 10; - } - - if(base == 10) { - // 10-base 12334 - if(text.size == 0) return IntParsing_FAILURE; - for(int i = 0; i < text.size; i++) { - char c = text.data[i]; - if(c >= '0' && c <= '9') { - *out = (*out * 10) + (c - '0'); - } else { - return IntParsing_FAILURE; - } - } - // "9223372036854775807".__len__() == 19 - if(text.size > 19) return IntParsing_OVERFLOW; - return IntParsing_SUCCESS; - } else if(base == 2) { - // 2-base 0b101010 - if(c11_sv__cmp3(prefix, "0b") == 0) { - // text.remove_prefix(2); - text = (c11_sv){text.data + 2, text.size - 2}; - } - if(text.size == 0) return IntParsing_FAILURE; - for(int i = 0; i < text.size; i++) { - char c = text.data[i]; - if(c == '0' || c == '1') { - *out = (*out << 1) | (c - '0'); - } else { - return IntParsing_FAILURE; - } - } - // "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63 - if(text.size > 63) return IntParsing_OVERFLOW; - return IntParsing_SUCCESS; - } else if(base == 8) { - // 8-base 0o123 - if(c11_sv__cmp3(prefix, "0o") == 0) { - // text.remove_prefix(2); - text = (c11_sv){text.data + 2, text.size - 2}; - } - if(text.size == 0) return IntParsing_FAILURE; - for(int i = 0; i < text.size; i++) { - char c = text.data[i]; - if(c >= '0' && c <= '7') { - *out = (*out << 3) | (c - '0'); - } else { - return IntParsing_FAILURE; - } - } - // "777777777777777777777".__len__() == 21 - if(text.size > 21) return IntParsing_OVERFLOW; - return IntParsing_SUCCESS; - } else if(base == 16) { - // 16-base 0x123 - if(c11_sv__cmp3(prefix, "0x") == 0) { - // text.remove_prefix(2); - text = (c11_sv){text.data + 2, text.size - 2}; - } - if(text.size == 0) return IntParsing_FAILURE; - for(int i = 0; i < text.size; i++) { - char c = text.data[i]; - if(c >= '0' && c <= '9') { - *out = (*out << 4) | (c - '0'); - } else if(c >= 'a' && c <= 'f') { - *out = (*out << 4) | (c - 'a' + 10); - } else if(c >= 'A' && c <= 'F') { - *out = (*out << 4) | (c - 'A' + 10); - } else { - return IntParsing_FAILURE; - } - } - // "7fffffffffffffff".__len__() == 16 - if(text.size > 16) return IntParsing_OVERFLOW; - return IntParsing_SUCCESS; - } - return IntParsing_FAILURE; -} - Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){ pk_Lexer lexer; pk_Lexer__ctor(&lexer, src); diff --git a/src/interpreter/py_number.c b/src/interpreter/py_number.c index 68a33408..f0cc3b90 100644 --- a/src/interpreter/py_number.c +++ b/src/interpreter/py_number.c @@ -1,4 +1,5 @@ #include "pocketpy/interpreter/vm.h" +#include "pocketpy/pocketpy.h" #include @@ -43,6 +44,8 @@ DEF_NUM_BINARY_OP(__ge__, >=, py_newbool, py_newbool) #undef DEF_NUM_BINARY_OP +static bool ValueError(const char* fmt, ...) { return false; } + static bool _py_int__neg__(int argc, py_Ref argv) { py_checkargc(1); int64_t val = py_toint(&argv[0]); @@ -182,6 +185,161 @@ DEF_INT_BITWISE_OP(__rshift__, >>) #undef DEF_INT_BITWISE_OP +static bool _py_int__repr__(int argc, py_Ref argv) { + py_checkargc(1); + int64_t val = py_toint(&argv[0]); + char buf[32]; + int size = snprintf(buf, sizeof(buf), "%lld", (long long)val); + py_newstrn(py_retval(), buf, size); + return true; +} + +static bool _py_float__repr__(int argc, py_Ref argv) { + py_checkargc(1); + double val = py_tofloat(&argv[0]); + char buf[32]; + int size = snprintf(buf, sizeof(buf), "%f", val); + py_newstrn(py_retval(), buf, size); + return true; +} + +union c11_8bytes { + py_i64 _i64; + py_f64 _f64; + + union { + uint32_t upper; + uint32_t lower; + } bits; +}; + +static py_i64 c11_8bytes__hash(union c11_8bytes u) { + // https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key + const uint32_t C = 2654435761; + u.bits.upper *= C; + u.bits.lower *= C; + return u._i64; +} + +static bool _py_int__hash__(int argc, py_Ref argv) { + py_checkargc(1); + int64_t val = py_toint(&argv[0]); + union c11_8bytes u = {._i64 = val}; + py_newint(py_retval(), c11_8bytes__hash(u)); + return true; +} + +static bool _py_float__hash__(int argc, py_Ref argv) { + py_checkargc(1); + double val = py_tofloat(&argv[0]); + union c11_8bytes u = {._f64 = val}; + py_newint(py_retval(), c11_8bytes__hash(u)); + return true; +} + +static bool _py_int__new__(int argc, py_Ref argv) { + if(argc == 1 + 0) { + // int() == 0 + py_newint(py_retval(), 0); + return true; + } + // 1 arg + if(argc == 1 + 1) { + switch(argv[1].type) { + case tp_float: { + // int(1.1) == 1 + py_newint(py_retval(), (int64_t)py_tofloat(&argv[1])); + return true; + } + case tp_int: { + // int(1) == 1 + *py_retval() = argv[1]; + return true; + } + case tp_bool: { + // int(True) == 1 + py_newint(py_retval(), (int64_t)py_tobool(&argv[1])); + return true; + } + case tp_str: break; // leave to the next block + default: return TypeError("invalid arguments for int()"); + } + } + // 2+ args -> error + if(argc > 1 + 2) return TypeError("int() takes at most 2 arguments"); + // 1 or 2 args with str + int base = 10; + if(argc == 1 + 2) { + if(!py_checktype(py_arg(2), tp_int)) return false; + base = py_toint(py_arg(2)); + } + + if(!py_checktype(py_arg(1), tp_str)) return false; + int size; + const char* data = py_tostrn(py_arg(1), &size); + bool negative = false; + if(size && (data[0] == '+' || data[0] == '-')) { + negative = data[0] == '-'; + data++; + size--; + } + py_i64 val; + if(c11__parse_uint((c11_sv){data, size}, &val, base) != IntParsing_SUCCESS) { + return ValueError("invalid literal for int() with base %d: %q", base, data); + } + py_newint(py_retval(), negative ? -val : val); + return true; +} + +static bool _py_float__new__(int argc, py_Ref argv) { + if(argc == 1 + 0) { + // float() == 0.0 + py_newfloat(py_retval(), 0.0); + return true; + } + if(argc > 1 + 1) return TypeError("float() takes at most 1 argument"); + // 1 arg + switch(argv[1].type) { + case tp_int: { + // float(1) == 1.0 + py_newfloat(py_retval(), py_toint(&argv[1])); + return true; + } + case tp_float: { + // float(1.1) == 1.1 + *py_retval() = argv[1]; + return true; + } + case tp_bool: { + // float(True) == 1.0 + py_newfloat(py_retval(), py_tobool(&argv[1])); + return true; + } + case tp_str: break; // leave to the next block + default: return TypeError("invalid arguments for float()"); + } + // str to float + int size; + const char* data = py_tostrn(py_arg(1), &size); + + if(c11__streq(data, "inf")){ + py_newfloat(py_retval(), INFINITY); + return true; + } + if(c11__streq(data, "-inf")){ + py_newfloat(py_retval(), -INFINITY); + return true; + } + + char* p_end; + py_f64 float_out = strtod(data, &p_end); + if(p_end != data + size){ + return ValueError("invalid literal for float(): %q", data); + } + py_newfloat(py_retval(), float_out); + return true; +} + void pk_VM__init_builtins(pk_VM* self) { /****** tp_int & tp_float ******/ py_bindmagic(tp_int, __add__, _py_int__add__); @@ -208,7 +366,17 @@ void pk_VM__init_builtins(pk_VM* self) { py_bindmagic(tp_int, __neg__, _py_int__neg__); py_bindmagic(tp_float, __neg__, _py_float__neg__); - // TODO: __repr__, __new__, __hash__ + // __repr__ + py_bindmagic(tp_int, __repr__, _py_int__repr__); + py_bindmagic(tp_float, __repr__, _py_float__repr__); + + // __hash__ + py_bindmagic(tp_int, __hash__, _py_int__hash__); + py_bindmagic(tp_float, __hash__, _py_float__hash__); + + // __new__ + py_bindmagic(tp_int, __new__, _py_int__new__); + py_bindmagic(tp_float, __new__, _py_float__new__); // __truediv__ py_bindmagic(tp_int, __truediv__, _py_int__truediv__); diff --git a/src/public/cast.c b/src/public/cast.c index a4972b49..f9748924 100644 --- a/src/public/cast.c +++ b/src/public/cast.c @@ -40,3 +40,11 @@ void* py_touserdata(const py_Ref self) { } bool py_istype(const py_Ref self, py_Type type) { return self->type == type; } + +bool py_checktype(const py_Ref self, py_Type type) { + if(self->type != type) { + // py_raise(PyExc_TypeError, "expected %s, got %s", py_typename(type), py_typename(self->type)); + return false; + } + return true; +} \ No newline at end of file