diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index cec137aa..41bf3e85 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -32,7 +32,8 @@ int c11_sv__cmp(c11_sv self, c11_sv other); int c11_sv__cmp2(c11_sv self, const char* other); bool c11__streq(const char* a, const char* b); -bool c11__sveq(c11_sv a, const char* b); +bool c11__sveq(c11_sv a, c11_sv b); +bool c11__sveq2(c11_sv a, const char* b); c11_string* c11_string__new(const char* data); c11_string* c11_string__new2(const char* data, int size); @@ -48,8 +49,6 @@ c11_sv c11_string__u8_getitem(c11_string* self, int i); c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step); // general string operations -void c11_sv__lower(c11_sv sv, c11_vector* buf); -void c11_sv__upper(c11_sv sv, c11_vector* buf); c11_sv c11_sv__slice(c11_sv sv, int start); c11_sv c11_sv__slice2(c11_sv sv, int start, int stop); c11_sv c11_sv__strip(c11_sv sv, bool left, bool right); diff --git a/include/pocketpy/pocketpy.h b/include/pocketpy/pocketpy.h index fd8a9534..2be80df7 100644 --- a/include/pocketpy/pocketpy.h +++ b/include/pocketpy/pocketpy.h @@ -92,7 +92,7 @@ void py_newnativefunc(py_Ref out, py_CFunction); /// @param type type of the object. /// @param slots number of slots. Use -1 to create a `__dict__`. /// @param udsize size of your userdata. You can use `py_touserdata()` to get the pointer to it. -void py_newobject(py_Ref out, py_Type type, int slots, int udsize); +void* py_newobject(py_Ref out, py_Type type, int slots, int udsize); /************* Type Cast *************/ py_i64 py_toint(const py_Ref); py_f64 py_tofloat(const py_Ref); @@ -120,7 +120,7 @@ bool py_issubclass(py_Type derived, py_Type base); #define PY_CHECK_ARG_TYPE(i, type) if(!py_checktype(py_arg(i), type)) return false -#define py_offset(p, i) (py_Ref)((char*)p + ((i) << 4)) +#define py_offset(p, i) ((py_Ref)((char*)p + ((i) << 4))) #define py_arg(i) py_offset(argv, i) py_GlobalRef py_tpmagic(py_Type type, py_Name name); diff --git a/src/common/str.c b/src/common/str.c index 7caf2468..b205198e 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -69,26 +69,12 @@ c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step } ///////////////////////////////////////// -void c11_sv__lower(c11_sv sv, c11_vector* buf) { - for(int i = 0; i < sv.size; i++) { - char c = sv.data[i]; - if('A' <= c && c <= 'Z') c += 32; - c11_vector__push(char, buf, c); - } -} - -void c11_sv__upper(c11_sv sv, c11_vector* buf) { - for(int i = 0; i < sv.size; i++) { - char c = sv.data[i]; - if('a' <= c && c <= 'z') c -= 32; - c11_vector__push(char, buf, c); - } -} - c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); } c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) { + if(start < 0) start = 0; if(stop < start) stop = start; + if(stop > sv.size) stop = sv.size; return (c11_sv){sv.data + start, stop - start}; } @@ -211,7 +197,12 @@ int c11_sv__cmp2(c11_sv self, const char* other) { bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; } -bool c11__sveq(c11_sv a, const char* b) { +bool c11__sveq(c11_sv a, c11_sv b) { + if(a.size != b.size) return false; + return memcmp(a.data, b.data, a.size) == 0; +} + +bool c11__sveq2(c11_sv a, const char* b) { int size = strlen(b); if(a.size != size) return false; return memcmp(a.data, b, size) == 0; @@ -250,11 +241,11 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) { c11_sv prefix = {.data = text.data, .size = c11__min(2, text.size)}; if(base == -1) { - if(c11__sveq(prefix, "0b")) + if(c11__sveq2(prefix, "0b")) base = 2; - else if(c11__sveq(prefix, "0o")) + else if(c11__sveq2(prefix, "0o")) base = 8; - else if(c11__sveq(prefix, "0x")) + else if(c11__sveq2(prefix, "0x")) base = 16; else base = 10; @@ -276,7 +267,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) { return IntParsing_SUCCESS; } else if(base == 2) { // 2-base 0b101010 - if(c11__sveq(prefix, "0b")) { + if(c11__sveq2(prefix, "0b")) { // text.remove_prefix(2); text = (c11_sv){text.data + 2, text.size - 2}; } @@ -294,7 +285,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) { return IntParsing_SUCCESS; } else if(base == 8) { // 8-base 0o123 - if(c11__sveq(prefix, "0o")) { + if(c11__sveq2(prefix, "0o")) { // text.remove_prefix(2); text = (c11_sv){text.data + 2, text.size - 2}; } @@ -312,7 +303,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) { return IntParsing_SUCCESS; } else if(base == 16) { // 16-base 0x123 - if(c11__sveq(prefix, "0x")) { + if(c11__sveq2(prefix, "0x")) { // text.remove_prefix(2); text = (c11_sv){text.data + 2, text.size - 2}; } diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 7897db00..f289adef 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -246,11 +246,11 @@ static Error* eat_name(pk_Lexer* self){ c11_sv name = {self->token_start, length}; if(self->src->mode == JSON_MODE) { - if(c11__sveq(name, "true")) { + if(c11__sveq2(name, "true")) { add_token(self, TK_TRUE); - } else if(c11__sveq(name, "false")) { + } else if(c11__sveq2(name, "false")) { add_token(self, TK_FALSE); - } else if(c11__sveq(name, "null")) { + } else if(c11__sveq2(name, "null")) { add_token(self, TK_NONE); } else { return SyntaxError("invalid JSON token"); @@ -265,7 +265,7 @@ static Error* eat_name(pk_Lexer* self){ c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out); #undef less - if(out != KW_COUNT && c11__sveq(name, KW_BEGIN[out])) { + if(out != KW_COUNT && c11__sveq2(name, KW_BEGIN[out])) { add_token(self, (TokenIndex)(out + TK_FALSE)); } else { add_token(self, TK_ID); diff --git a/src/interpreter/py_number.c b/src/public/py_number.c similarity index 100% rename from src/interpreter/py_number.c rename to src/public/py_number.c diff --git a/src/public/py_str.c b/src/public/py_str.c index 07ddc4a3..ee97d3d5 100644 --- a/src/public/py_str.c +++ b/src/public/py_str.c @@ -4,24 +4,9 @@ #include "pocketpy/common/utils.h" #include "pocketpy/objects/object.h" #include "pocketpy/interpreter/vm.h" +#include "pocketpy/common/sstream.h" -py_Type pk_str__register() { - pk_VM* vm = pk_current_vm; - py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false); - // no need to dtor because the memory is controlled by the object - return type; -} - -py_Type pk_bytes__register() { - pk_VM* vm = pk_current_vm; - py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false); - // no need to dtor because the memory is controlled by the object - return type; -} - -void py_newstr(py_Ref out, const char* data) { - return py_newstrn(out, data, strlen(data)); -} +void py_newstr(py_Ref out, const char* data) { return py_newstrn(out, data, strlen(data)); } void py_newstrn(py_Ref out, const char* data, int size) { pk_ManagedHeap* heap = &pk_current_vm->heap; @@ -66,3 +51,233 @@ unsigned char* py_tobytes(const py_Ref self, int* size) { return ud->data; } +//////////////////////////////// + +static bool _py_str__new__(int argc, py_Ref argv) { return true; } + +static bool _py_str__hash__(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + int size; + const char* data = py_tostrn(&argv[0], &size); + py_i64 res = 0; + for(int i = 0; i < size; i++) { + res = res * 31 + data[i]; + } + py_newint(py_retval(), res); + return true; +} + +static bool _py_str__len__(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + c11_string* self = py_touserdata(&argv[0]); + py_newint(py_retval(), self->size); + return true; +} + +static bool _py_str__add__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_string* self = py_touserdata(&argv[0]); + if(py_arg(1)->type != tp_str) { + py_newnotimplemented(py_retval()); + } else { + c11_string* other = py_touserdata(&argv[1]); + int total_size = sizeof(c11_string) + self->size + other->size + 1; + c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size); + res->size = self->size + other->size; + char* p = (char*)res->data; + memcpy(p, self->data, self->size); + memcpy(p + self->size, other->data, other->size); + p[res->size] = '\0'; + } + return true; +} + +static bool _py_str__mul__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_string* self = py_touserdata(&argv[0]); + if(py_arg(1)->type != tp_int) { + py_newnotimplemented(py_retval()); + } else { + py_i64 n = py_toint(py_arg(1)); + if(n <= 0) { + py_newstr(py_retval(), ""); + } else { + int total_size = sizeof(c11_string) + self->size * n + 1; + c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size); + res->size = self->size * n; + char* p = (char*)res->data; + for(int i = 0; i < n; i++) { + memcpy(p + i * self->size, self->data, self->size); + } + p[res->size] = '\0'; + } + } + return true; +} + +static bool _py_str__rmul__(int argc, py_Ref argv) { return _py_str__mul__(argc, argv); } + +static bool _py_str__contains__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_string* self = py_touserdata(&argv[0]); + if(py_arg(1)->type != tp_str) { + py_newnotimplemented(py_retval()); + } else { + c11_string* other = py_touserdata(&argv[1]); + const char* p = strstr(self->data, other->data); + py_newbool(py_retval(), p != NULL); + } + return true; +} + +static bool _py_str__str__(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + *py_retval() = argv[0]; + return true; +} + +static bool _py_str__repr__(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + assert(false); + return false; +} + +static bool _py_str__iter__(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + assert(false); + return false; +} + +static bool _py_str__getitem__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_string* self = py_touserdata(&argv[0]); + PY_CHECK_ARG_TYPE(1, tp_int); + c11_sv res = c11_string__u8_getitem(self, py_toint(py_arg(1))); + py_newstrn(py_retval(), res.data, res.size); + return true; +} + +#define DEF_STR_CMP_OP(op, f, condition) \ + static bool _py_str##op(int argc, py_Ref argv) { \ + PY_CHECK_ARGC(2); \ + c11_string* self = py_touserdata(&argv[0]); \ + if(py_arg(1)->type != tp_str) { \ + py_newnotimplemented(py_retval()); \ + } else { \ + c11_string* other = py_touserdata(&argv[1]); \ + int res = c11_sv__cmp(c11_string__sv(self), c11_string__sv(other)); \ + py_newbool(py_retval(), condition); \ + } \ + return true; \ + } + +DEF_STR_CMP_OP(__eq__, c11__sveq, res) +DEF_STR_CMP_OP(__ne__, c11__sveq, !res) +DEF_STR_CMP_OP(__lt__, c11_sv__cmp, res < 0) +DEF_STR_CMP_OP(__le__, c11_sv__cmp, res <= 0) +DEF_STR_CMP_OP(__gt__, c11_sv__cmp, res > 0) +DEF_STR_CMP_OP(__ge__, c11_sv__cmp, res >= 0) + +#undef DEF_STR_CMP_OP + +static bool _py_str__lower(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + c11_string* self = py_touserdata(&argv[0]); + int total_size = sizeof(c11_string) + self->size + 1; + c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size); + res->size = self->size; + char* p = (char*)res->data; + for(int i = 0; i < self->size; i++) { + char c = self->data[i]; + p[i] = c >= 'A' && c <= 'Z' ? c + 32 : c; + } + p[res->size] = '\0'; + return true; +} + +static bool _py_str__upper(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + c11_string* self = py_touserdata(&argv[0]); + int total_size = sizeof(c11_string) + self->size + 1; + c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size); + res->size = self->size; + char* p = (char*)res->data; + for(int i = 0; i < self->size; i++) { + char c = self->data[i]; + p[i] = c >= 'a' && c <= 'z' ? c - 32 : c; + } + p[res->size] = '\0'; + return true; +} + +static bool _py_str__startswith(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_string* self = py_touserdata(&argv[0]); + PY_CHECK_ARG_TYPE(1, tp_str); + c11_string* other = py_touserdata(&argv[1]); + c11_sv _0 = c11_sv__slice2(c11_string__sv(self), 0, other->size); + c11_sv _1 = c11_string__sv(other); + py_newbool(py_retval(), c11__sveq(_0, _1)); + return true; +} + +static bool _py_str__endswith(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_string* self = py_touserdata(&argv[0]); + PY_CHECK_ARG_TYPE(1, tp_str); + c11_string* other = py_touserdata(&argv[1]); + c11_sv _0 = c11_sv__slice2(c11_string__sv(self), self->size - other->size, self->size); + c11_sv _1 = c11_string__sv(other); + py_newbool(py_retval(), c11__sveq(_0, _1)); + return true; +} + +static bool _py_str__join(int argc, py_Ref argv) { + assert(false); + // PY_CHECK_ARGC(2); + // c11_sbuf buf; + // c11_sbuf__ctor(&buf); + // c11_string* sep = py_touserdata(&argv[0]); + // py_Ref iter = py_pushtmp(); + // py_iter(iter, &argv[1]); + return false; +} + +py_Type pk_str__register() { + pk_VM* vm = pk_current_vm; + py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false); + // no need to dtor because the memory is controlled by the object + + py_bindmagic(tp_str, __new__, _py_str__new__); + py_bindmagic(tp_str, __hash__, _py_str__hash__); + py_bindmagic(tp_str, __len__, _py_str__len__); + py_bindmagic(tp_str, __add__, _py_str__add__); + py_bindmagic(tp_str, __mul__, _py_str__mul__); + py_bindmagic(tp_str, __rmul__, _py_str__rmul__); + py_bindmagic(tp_str, __contains__, _py_str__contains__); + py_bindmagic(tp_str, __str__, _py_str__str__); + py_bindmagic(tp_str, __repr__, _py_str__repr__); + py_bindmagic(tp_str, __iter__, _py_str__iter__); + py_bindmagic(tp_str, __getitem__, _py_str__getitem__); + + py_bindmagic(tp_str, __eq__, _py_str__eq__); + py_bindmagic(tp_str, __ne__, _py_str__ne__); + py_bindmagic(tp_str, __lt__, _py_str__lt__); + py_bindmagic(tp_str, __le__, _py_str__le__); + py_bindmagic(tp_str, __gt__, _py_str__gt__); + py_bindmagic(tp_str, __ge__, _py_str__ge__); + + py_bindmethod(tp_str, "lower", _py_str__lower); + py_bindmethod(tp_str, "upper", _py_str__upper); + py_bindmethod(tp_str, "startswith", _py_str__startswith); + py_bindmethod(tp_str, "endswith", _py_str__endswith); + py_bindmethod(tp_str, "join", _py_str__join); + return type; +} + +py_Type pk_bytes__register() { + pk_VM* vm = pk_current_vm; + py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false); + // no need to dtor because the memory is controlled by the object + return type; +} \ No newline at end of file diff --git a/src/public/values.c b/src/public/values.c index 93046219..a0cee84a 100644 --- a/src/public/values.c +++ b/src/public/values.c @@ -82,10 +82,11 @@ void py_newslice(py_Ref out, const py_Ref start, const py_Ref stop, const py_Ref py_setslot(out, 2, step); } -void py_newobject(py_Ref out, py_Type type, int slots, int udsize) { +void* py_newobject(py_Ref out, py_Type type, int slots, int udsize) { pk_ManagedHeap* heap = &pk_current_vm->heap; PyObject* obj = pk_ManagedHeap__gcnew(heap, type, slots, udsize); out->type = type; out->is_ptr = true; out->_obj = obj; + return PyObject__userdata(obj); }