add some string bindings

This commit is contained in:
blueloveTH 2024-07-05 22:55:24 +08:00
parent a86c134377
commit d74ca31f68
7 changed files with 256 additions and 50 deletions

View File

@ -32,7 +32,8 @@ int c11_sv__cmp(c11_sv self, c11_sv other);
int c11_sv__cmp2(c11_sv self, const char* other); int c11_sv__cmp2(c11_sv self, const char* other);
bool c11__streq(const char* a, const char* b); bool c11__streq(const char* a, const char* b);
bool c11__sveq(c11_sv a, const char* b); bool c11__sveq(c11_sv a, c11_sv b);
bool c11__sveq2(c11_sv a, const char* b);
c11_string* c11_string__new(const char* data); c11_string* c11_string__new(const char* data);
c11_string* c11_string__new2(const char* data, int size); c11_string* c11_string__new2(const char* data, int size);
@ -48,8 +49,6 @@ c11_sv c11_string__u8_getitem(c11_string* self, int i);
c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step); c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step);
// general string operations // general string operations
void c11_sv__lower(c11_sv sv, c11_vector* buf);
void c11_sv__upper(c11_sv sv, c11_vector* buf);
c11_sv c11_sv__slice(c11_sv sv, int start); c11_sv c11_sv__slice(c11_sv sv, int start);
c11_sv c11_sv__slice2(c11_sv sv, int start, int stop); c11_sv c11_sv__slice2(c11_sv sv, int start, int stop);
c11_sv c11_sv__strip(c11_sv sv, bool left, bool right); c11_sv c11_sv__strip(c11_sv sv, bool left, bool right);

View File

@ -92,7 +92,7 @@ void py_newnativefunc(py_Ref out, py_CFunction);
/// @param type type of the object. /// @param type type of the object.
/// @param slots number of slots. Use -1 to create a `__dict__`. /// @param slots number of slots. Use -1 to create a `__dict__`.
/// @param udsize size of your userdata. You can use `py_touserdata()` to get the pointer to it. /// @param udsize size of your userdata. You can use `py_touserdata()` to get the pointer to it.
void py_newobject(py_Ref out, py_Type type, int slots, int udsize); void* py_newobject(py_Ref out, py_Type type, int slots, int udsize);
/************* Type Cast *************/ /************* Type Cast *************/
py_i64 py_toint(const py_Ref); py_i64 py_toint(const py_Ref);
py_f64 py_tofloat(const py_Ref); py_f64 py_tofloat(const py_Ref);
@ -120,7 +120,7 @@ bool py_issubclass(py_Type derived, py_Type base);
#define PY_CHECK_ARG_TYPE(i, type) if(!py_checktype(py_arg(i), type)) return false #define PY_CHECK_ARG_TYPE(i, type) if(!py_checktype(py_arg(i), type)) return false
#define py_offset(p, i) (py_Ref)((char*)p + ((i) << 4)) #define py_offset(p, i) ((py_Ref)((char*)p + ((i) << 4)))
#define py_arg(i) py_offset(argv, i) #define py_arg(i) py_offset(argv, i)
py_GlobalRef py_tpmagic(py_Type type, py_Name name); py_GlobalRef py_tpmagic(py_Type type, py_Name name);

View File

@ -69,26 +69,12 @@ c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step
} }
///////////////////////////////////////// /////////////////////////////////////////
void c11_sv__lower(c11_sv sv, c11_vector* buf) {
for(int i = 0; i < sv.size; i++) {
char c = sv.data[i];
if('A' <= c && c <= 'Z') c += 32;
c11_vector__push(char, buf, c);
}
}
void c11_sv__upper(c11_sv sv, c11_vector* buf) {
for(int i = 0; i < sv.size; i++) {
char c = sv.data[i];
if('a' <= c && c <= 'z') c -= 32;
c11_vector__push(char, buf, c);
}
}
c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); } c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); }
c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) { c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) {
if(start < 0) start = 0;
if(stop < start) stop = start; if(stop < start) stop = start;
if(stop > sv.size) stop = sv.size;
return (c11_sv){sv.data + start, stop - start}; return (c11_sv){sv.data + start, stop - start};
} }
@ -211,7 +197,12 @@ int c11_sv__cmp2(c11_sv self, const char* other) {
bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; } bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; }
bool c11__sveq(c11_sv a, const char* b) { bool c11__sveq(c11_sv a, c11_sv b) {
if(a.size != b.size) return false;
return memcmp(a.data, b.data, a.size) == 0;
}
bool c11__sveq2(c11_sv a, const char* b) {
int size = strlen(b); int size = strlen(b);
if(a.size != size) return false; if(a.size != size) return false;
return memcmp(a.data, b, size) == 0; return memcmp(a.data, b, size) == 0;
@ -250,11 +241,11 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
c11_sv prefix = {.data = text.data, .size = c11__min(2, text.size)}; c11_sv prefix = {.data = text.data, .size = c11__min(2, text.size)};
if(base == -1) { if(base == -1) {
if(c11__sveq(prefix, "0b")) if(c11__sveq2(prefix, "0b"))
base = 2; base = 2;
else if(c11__sveq(prefix, "0o")) else if(c11__sveq2(prefix, "0o"))
base = 8; base = 8;
else if(c11__sveq(prefix, "0x")) else if(c11__sveq2(prefix, "0x"))
base = 16; base = 16;
else else
base = 10; base = 10;
@ -276,7 +267,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
return IntParsing_SUCCESS; return IntParsing_SUCCESS;
} else if(base == 2) { } else if(base == 2) {
// 2-base 0b101010 // 2-base 0b101010
if(c11__sveq(prefix, "0b")) { if(c11__sveq2(prefix, "0b")) {
// text.remove_prefix(2); // text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2}; text = (c11_sv){text.data + 2, text.size - 2};
} }
@ -294,7 +285,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
return IntParsing_SUCCESS; return IntParsing_SUCCESS;
} else if(base == 8) { } else if(base == 8) {
// 8-base 0o123 // 8-base 0o123
if(c11__sveq(prefix, "0o")) { if(c11__sveq2(prefix, "0o")) {
// text.remove_prefix(2); // text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2}; text = (c11_sv){text.data + 2, text.size - 2};
} }
@ -312,7 +303,7 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
return IntParsing_SUCCESS; return IntParsing_SUCCESS;
} else if(base == 16) { } else if(base == 16) {
// 16-base 0x123 // 16-base 0x123
if(c11__sveq(prefix, "0x")) { if(c11__sveq2(prefix, "0x")) {
// text.remove_prefix(2); // text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2}; text = (c11_sv){text.data + 2, text.size - 2};
} }

View File

@ -246,11 +246,11 @@ static Error* eat_name(pk_Lexer* self){
c11_sv name = {self->token_start, length}; c11_sv name = {self->token_start, length};
if(self->src->mode == JSON_MODE) { if(self->src->mode == JSON_MODE) {
if(c11__sveq(name, "true")) { if(c11__sveq2(name, "true")) {
add_token(self, TK_TRUE); add_token(self, TK_TRUE);
} else if(c11__sveq(name, "false")) { } else if(c11__sveq2(name, "false")) {
add_token(self, TK_FALSE); add_token(self, TK_FALSE);
} else if(c11__sveq(name, "null")) { } else if(c11__sveq2(name, "null")) {
add_token(self, TK_NONE); add_token(self, TK_NONE);
} else { } else {
return SyntaxError("invalid JSON token"); return SyntaxError("invalid JSON token");
@ -265,7 +265,7 @@ static Error* eat_name(pk_Lexer* self){
c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out); c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
#undef less #undef less
if(out != KW_COUNT && c11__sveq(name, KW_BEGIN[out])) { if(out != KW_COUNT && c11__sveq2(name, KW_BEGIN[out])) {
add_token(self, (TokenIndex)(out + TK_FALSE)); add_token(self, (TokenIndex)(out + TK_FALSE));
} else { } else {
add_token(self, TK_ID); add_token(self, TK_ID);

View File

@ -4,24 +4,9 @@
#include "pocketpy/common/utils.h" #include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h" #include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/vm.h" #include "pocketpy/interpreter/vm.h"
#include "pocketpy/common/sstream.h"
py_Type pk_str__register() { void py_newstr(py_Ref out, const char* data) { return py_newstrn(out, data, strlen(data)); }
pk_VM* vm = pk_current_vm;
py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false);
// no need to dtor because the memory is controlled by the object
return type;
}
py_Type pk_bytes__register() {
pk_VM* vm = pk_current_vm;
py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false);
// no need to dtor because the memory is controlled by the object
return type;
}
void py_newstr(py_Ref out, const char* data) {
return py_newstrn(out, data, strlen(data));
}
void py_newstrn(py_Ref out, const char* data, int size) { void py_newstrn(py_Ref out, const char* data, int size) {
pk_ManagedHeap* heap = &pk_current_vm->heap; pk_ManagedHeap* heap = &pk_current_vm->heap;
@ -66,3 +51,233 @@ unsigned char* py_tobytes(const py_Ref self, int* size) {
return ud->data; return ud->data;
} }
////////////////////////////////
static bool _py_str__new__(int argc, py_Ref argv) { return true; }
static bool _py_str__hash__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
int size;
const char* data = py_tostrn(&argv[0], &size);
py_i64 res = 0;
for(int i = 0; i < size; i++) {
res = res * 31 + data[i];
}
py_newint(py_retval(), res);
return true;
}
static bool _py_str__len__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
c11_string* self = py_touserdata(&argv[0]);
py_newint(py_retval(), self->size);
return true;
}
static bool _py_str__add__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
c11_string* self = py_touserdata(&argv[0]);
if(py_arg(1)->type != tp_str) {
py_newnotimplemented(py_retval());
} else {
c11_string* other = py_touserdata(&argv[1]);
int total_size = sizeof(c11_string) + self->size + other->size + 1;
c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
res->size = self->size + other->size;
char* p = (char*)res->data;
memcpy(p, self->data, self->size);
memcpy(p + self->size, other->data, other->size);
p[res->size] = '\0';
}
return true;
}
static bool _py_str__mul__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
c11_string* self = py_touserdata(&argv[0]);
if(py_arg(1)->type != tp_int) {
py_newnotimplemented(py_retval());
} else {
py_i64 n = py_toint(py_arg(1));
if(n <= 0) {
py_newstr(py_retval(), "");
} else {
int total_size = sizeof(c11_string) + self->size * n + 1;
c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
res->size = self->size * n;
char* p = (char*)res->data;
for(int i = 0; i < n; i++) {
memcpy(p + i * self->size, self->data, self->size);
}
p[res->size] = '\0';
}
}
return true;
}
static bool _py_str__rmul__(int argc, py_Ref argv) { return _py_str__mul__(argc, argv); }
static bool _py_str__contains__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
c11_string* self = py_touserdata(&argv[0]);
if(py_arg(1)->type != tp_str) {
py_newnotimplemented(py_retval());
} else {
c11_string* other = py_touserdata(&argv[1]);
const char* p = strstr(self->data, other->data);
py_newbool(py_retval(), p != NULL);
}
return true;
}
static bool _py_str__str__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
*py_retval() = argv[0];
return true;
}
static bool _py_str__repr__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
assert(false);
return false;
}
static bool _py_str__iter__(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
assert(false);
return false;
}
static bool _py_str__getitem__(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
c11_string* self = py_touserdata(&argv[0]);
PY_CHECK_ARG_TYPE(1, tp_int);
c11_sv res = c11_string__u8_getitem(self, py_toint(py_arg(1)));
py_newstrn(py_retval(), res.data, res.size);
return true;
}
#define DEF_STR_CMP_OP(op, f, condition) \
static bool _py_str##op(int argc, py_Ref argv) { \
PY_CHECK_ARGC(2); \
c11_string* self = py_touserdata(&argv[0]); \
if(py_arg(1)->type != tp_str) { \
py_newnotimplemented(py_retval()); \
} else { \
c11_string* other = py_touserdata(&argv[1]); \
int res = c11_sv__cmp(c11_string__sv(self), c11_string__sv(other)); \
py_newbool(py_retval(), condition); \
} \
return true; \
}
DEF_STR_CMP_OP(__eq__, c11__sveq, res)
DEF_STR_CMP_OP(__ne__, c11__sveq, !res)
DEF_STR_CMP_OP(__lt__, c11_sv__cmp, res < 0)
DEF_STR_CMP_OP(__le__, c11_sv__cmp, res <= 0)
DEF_STR_CMP_OP(__gt__, c11_sv__cmp, res > 0)
DEF_STR_CMP_OP(__ge__, c11_sv__cmp, res >= 0)
#undef DEF_STR_CMP_OP
static bool _py_str__lower(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
c11_string* self = py_touserdata(&argv[0]);
int total_size = sizeof(c11_string) + self->size + 1;
c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
res->size = self->size;
char* p = (char*)res->data;
for(int i = 0; i < self->size; i++) {
char c = self->data[i];
p[i] = c >= 'A' && c <= 'Z' ? c + 32 : c;
}
p[res->size] = '\0';
return true;
}
static bool _py_str__upper(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
c11_string* self = py_touserdata(&argv[0]);
int total_size = sizeof(c11_string) + self->size + 1;
c11_string* res = py_newobject(py_retval(), tp_str, 0, total_size);
res->size = self->size;
char* p = (char*)res->data;
for(int i = 0; i < self->size; i++) {
char c = self->data[i];
p[i] = c >= 'a' && c <= 'z' ? c - 32 : c;
}
p[res->size] = '\0';
return true;
}
static bool _py_str__startswith(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
c11_string* self = py_touserdata(&argv[0]);
PY_CHECK_ARG_TYPE(1, tp_str);
c11_string* other = py_touserdata(&argv[1]);
c11_sv _0 = c11_sv__slice2(c11_string__sv(self), 0, other->size);
c11_sv _1 = c11_string__sv(other);
py_newbool(py_retval(), c11__sveq(_0, _1));
return true;
}
static bool _py_str__endswith(int argc, py_Ref argv) {
PY_CHECK_ARGC(2);
c11_string* self = py_touserdata(&argv[0]);
PY_CHECK_ARG_TYPE(1, tp_str);
c11_string* other = py_touserdata(&argv[1]);
c11_sv _0 = c11_sv__slice2(c11_string__sv(self), self->size - other->size, self->size);
c11_sv _1 = c11_string__sv(other);
py_newbool(py_retval(), c11__sveq(_0, _1));
return true;
}
static bool _py_str__join(int argc, py_Ref argv) {
assert(false);
// PY_CHECK_ARGC(2);
// c11_sbuf buf;
// c11_sbuf__ctor(&buf);
// c11_string* sep = py_touserdata(&argv[0]);
// py_Ref iter = py_pushtmp();
// py_iter(iter, &argv[1]);
return false;
}
py_Type pk_str__register() {
pk_VM* vm = pk_current_vm;
py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false);
// no need to dtor because the memory is controlled by the object
py_bindmagic(tp_str, __new__, _py_str__new__);
py_bindmagic(tp_str, __hash__, _py_str__hash__);
py_bindmagic(tp_str, __len__, _py_str__len__);
py_bindmagic(tp_str, __add__, _py_str__add__);
py_bindmagic(tp_str, __mul__, _py_str__mul__);
py_bindmagic(tp_str, __rmul__, _py_str__rmul__);
py_bindmagic(tp_str, __contains__, _py_str__contains__);
py_bindmagic(tp_str, __str__, _py_str__str__);
py_bindmagic(tp_str, __repr__, _py_str__repr__);
py_bindmagic(tp_str, __iter__, _py_str__iter__);
py_bindmagic(tp_str, __getitem__, _py_str__getitem__);
py_bindmagic(tp_str, __eq__, _py_str__eq__);
py_bindmagic(tp_str, __ne__, _py_str__ne__);
py_bindmagic(tp_str, __lt__, _py_str__lt__);
py_bindmagic(tp_str, __le__, _py_str__le__);
py_bindmagic(tp_str, __gt__, _py_str__gt__);
py_bindmagic(tp_str, __ge__, _py_str__ge__);
py_bindmethod(tp_str, "lower", _py_str__lower);
py_bindmethod(tp_str, "upper", _py_str__upper);
py_bindmethod(tp_str, "startswith", _py_str__startswith);
py_bindmethod(tp_str, "endswith", _py_str__endswith);
py_bindmethod(tp_str, "join", _py_str__join);
return type;
}
py_Type pk_bytes__register() {
pk_VM* vm = pk_current_vm;
py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false);
// no need to dtor because the memory is controlled by the object
return type;
}

View File

@ -82,10 +82,11 @@ void py_newslice(py_Ref out, const py_Ref start, const py_Ref stop, const py_Ref
py_setslot(out, 2, step); py_setslot(out, 2, step);
} }
void py_newobject(py_Ref out, py_Type type, int slots, int udsize) { void* py_newobject(py_Ref out, py_Type type, int slots, int udsize) {
pk_ManagedHeap* heap = &pk_current_vm->heap; pk_ManagedHeap* heap = &pk_current_vm->heap;
PyObject* obj = pk_ManagedHeap__gcnew(heap, type, slots, udsize); PyObject* obj = pk_ManagedHeap__gcnew(heap, type, slots, udsize);
out->type = type; out->type = type;
out->is_ptr = true; out->is_ptr = true;
out->_obj = obj; out->_obj = obj;
return PyObject__userdata(obj);
} }