From ceb49d832babf8ed5224458fedb32cdf4f591549 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 6 Aug 2024 12:46:16 +0800 Subject: [PATCH] add `bytes` --- include/pocketpy/common/str.h | 2 + src/common/str.c | 4 + src/public/modules.c | 25 ++++++ src/public/py_str.c | 125 +++++++++++++++++++++++++---- tests/{68_bytes.py => 46_bytes.py} | 3 +- 5 files changed, 142 insertions(+), 17 deletions(-) rename tests/{68_bytes.py => 46_bytes.py} (93%) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 5b9eb776..59d01c78 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -21,6 +21,8 @@ typedef struct c11_bytes{ unsigned char data[]; // flexible array member } c11_bytes; +bool c11_bytes__eq(c11_bytes* self, c11_bytes* other); + int c11_sv__cmp(c11_sv self, c11_sv other); int c11_sv__cmp2(c11_sv self, const char* other); diff --git a/src/common/str.c b/src/common/str.c index a8b885b2..88dee8a8 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -232,6 +232,10 @@ int c11__byte_index_to_unicode(const char* data, int n) { } ////////////// +bool c11_bytes__eq(c11_bytes* self, c11_bytes* other) { + if(self->size != other->size) return false; + return memcmp(self->data, other->data, self->size) == 0; +} int c11_sv__cmp(c11_sv self, c11_sv other) { int res = strncmp(self.data, other.data, c11__min(self.size, other.size)); diff --git a/src/public/modules.c b/src/public/modules.c index 5c1a7b13..b173d262 100644 --- a/src/public/modules.c +++ b/src/public/modules.c @@ -409,6 +409,28 @@ static bool builtins_delattr(int argc, py_Ref argv) { return py_delattr(py_arg(0), name); } +static bool builtins_chr(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + PY_CHECK_ARG_TYPE(0, tp_int); + py_i64 val = py_toint(py_arg(0)); + if(val < 0 || val > 128) { + return ValueError("chr() arg not in range(128)"); + } + py_newstrn(py_retval(), (const char*)&val, 1); + return true; +} + +static bool builtins_ord(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + PY_CHECK_ARG_TYPE(0, tp_str); + c11_sv sv = py_tosv(py_arg(0)); + if(sv.size != 1) { + return TypeError("ord() expected a character, but string of length %d found", sv.size); + } + py_newint(py_retval(), sv.data[0]); + return true; +} + py_TValue pk_builtins__register() { py_Ref builtins = py_newmodule("builtins"); py_bindfunc(builtins, "repr", builtins_repr); @@ -436,6 +458,9 @@ py_TValue pk_builtins__register() { py_bindfunc(builtins, "hasattr", builtins_hasattr); py_bindfunc(builtins, "delattr", builtins_delattr); + py_bindfunc(builtins, "chr", builtins_chr); + py_bindfunc(builtins, "ord", builtins_ord); + // None __repr__ py_bindmagic(tp_NoneType, __repr__, NoneType__repr__); return *builtins; diff --git a/src/public/py_str.c b/src/public/py_str.c index 0f68144a..11e2596b 100644 --- a/src/public/py_str.c +++ b/src/public/py_str.c @@ -190,7 +190,7 @@ static bool str__getitem__(int argc, py_Ref argv) { } #define DEF_STR_CMP_OP(op, __f, __cond) \ - static bool str##op(int argc, py_Ref argv) { \ + static bool str##op(int argc, py_Ref argv) { \ PY_CHECK_ARGC(2); \ c11_string* self = py_touserdata(&argv[0]); \ if(py_arg(1)->type != tp_str) { \ @@ -355,17 +355,11 @@ static bool str__strip_impl(bool left, bool right, int argc, py_Ref argv) { return true; } -static bool str_strip(int argc, py_Ref argv) { - return str__strip_impl(true, true, argc, argv); -} +static bool str_strip(int argc, py_Ref argv) { return str__strip_impl(true, true, argc, argv); } -static bool str_lstrip(int argc, py_Ref argv) { - return str__strip_impl(true, false, argc, argv); -} +static bool str_lstrip(int argc, py_Ref argv) { return str__strip_impl(true, false, argc, argv); } -static bool str_rstrip(int argc, py_Ref argv) { - return str__strip_impl(false, true, argc, argv); -} +static bool str_rstrip(int argc, py_Ref argv) { return str__strip_impl(false, true, argc, argv); } static bool str_zfill(int argc, py_Ref argv) { PY_CHECK_ARGC(2); @@ -423,13 +417,9 @@ static bool str__widthjust_impl(bool left, int argc, py_Ref argv) { return true; } -static bool str_ljust(int argc, py_Ref argv) { - return str__widthjust_impl(true, argc, argv); -} +static bool str_ljust(int argc, py_Ref argv) { return str__widthjust_impl(true, argc, argv); } -static bool str_rjust(int argc, py_Ref argv) { - return str__widthjust_impl(false, argc, argv); -} +static bool str_rjust(int argc, py_Ref argv) { return str__widthjust_impl(false, argc, argv); } static bool str_find(int argc, py_Ref argv) { if(argc > 3) return TypeError("find() takes at most 3 arguments"); @@ -453,6 +443,15 @@ static bool str_index(int argc, py_Ref argv) { return true; } +static bool str_encode(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + int size; + const char* data = py_tostrn(argv, &size); + unsigned char* p = py_newbytes(py_retval(), size); + memcpy(p, data, size); + return true; +} + py_Type pk_str__register() { py_Type type = pk_newtype("str", tp_object, NULL, NULL, false, true); // no need to dtor because the memory is controlled by the object @@ -492,6 +491,7 @@ py_Type pk_str__register() { py_bindmethod(tp_str, "rjust", str_rjust); py_bindmethod(tp_str, "find", str_find); py_bindmethod(tp_str, "index", str_index); + py_bindmethod(tp_str, "encode", str_encode); return type; } @@ -522,9 +522,102 @@ py_Type pk_str_iterator__register() { return type; } +static bool bytes__repr__(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + c11_bytes* self = py_touserdata(&argv[0]); + c11_sbuf buf; + c11_sbuf__ctor(&buf); + c11_sbuf__write_char(&buf, 'b'); + c11_sbuf__write_quoted(&buf, (c11_sv){(const char*)self->data, self->size}, '\''); + c11_sbuf__py_submit(&buf, py_retval()); + return true; +} + +static bool bytes__getitem__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + int size; + unsigned char* data = py_tobytes(&argv[0], &size); + py_Ref _1 = py_arg(1); + if(_1->type == tp_int) { + int index = py_toint(_1); + if(!pk__normalize_index(&index, size)) return false; + py_newint(py_retval(), data[index]); + return true; + } else if(_1->type == tp_slice) { + int start, stop, step; + bool ok = pk__parse_int_slice(_1, size, &start, &stop, &step); + if(!ok) return false; + c11_vector res; + c11_vector__ctor(&res, sizeof(unsigned char)); + for(int i = start; step > 0 ? i < stop : i > stop; i += step) { + c11_vector__push(unsigned char, &res, data[i]); + } + unsigned char* p = py_newbytes(py_retval(), res.count); + memcpy(p, res.data, res.count); + c11_vector__dtor(&res); + return true; + } else { + return TypeError("bytes indices must be integers"); + } +} + +static bool bytes__eq__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_bytes* self = py_touserdata(&argv[0]); + if(!py_istype(&argv[1], tp_bytes)) { + py_newnotimplemented(py_retval()); + } else { + c11_bytes* other = py_touserdata(&argv[1]); + py_newbool(py_retval(), c11_bytes__eq(self, other)); + } + return true; +} + +static bool bytes__ne__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_bytes* self = py_touserdata(&argv[0]); + if(!py_istype(&argv[1], tp_bytes)) { + py_newnotimplemented(py_retval()); + } else { + c11_bytes* other = py_touserdata(&argv[1]); + py_newbool(py_retval(), !c11_bytes__eq(self, other)); + } + return true; +} + +static bool bytes__add__(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_bytes* self = py_touserdata(&argv[0]); + if(py_arg(1)->type != tp_bytes) { + py_newnotimplemented(py_retval()); + } else { + c11_bytes* other = py_touserdata(&argv[1]); + unsigned char* p = py_newbytes(py_retval(), self->size + other->size); + memcpy(p, self->data, self->size); + memcpy(p + self->size, other->data, other->size); + } + return true; +} + +static bool bytes_decode(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + int size; + unsigned char* data = py_tobytes(&argv[0], &size); + py_newstrn(py_retval(), (const char*)data, size); + return true; +} + py_Type pk_bytes__register() { py_Type type = pk_newtype("bytes", tp_object, NULL, NULL, false, true); // no need to dtor because the memory is controlled by the object + + py_bindmagic(tp_bytes, __repr__, bytes__repr__); + py_bindmagic(tp_bytes, __getitem__, bytes__getitem__); + py_bindmagic(tp_bytes, __eq__, bytes__eq__); + py_bindmagic(tp_bytes, __ne__, bytes__ne__); + py_bindmagic(tp_bytes, __add__, bytes__add__); + + py_bindmethod(tp_bytes, "decode", bytes_decode); return type; } diff --git a/tests/68_bytes.py b/tests/46_bytes.py similarity index 93% rename from tests/68_bytes.py rename to tests/46_bytes.py index bd854a6c..985a415d 100644 --- a/tests/68_bytes.py +++ b/tests/46_bytes.py @@ -13,7 +13,8 @@ assert b'\xff\xee' == b'\xff\xee' a = '测试123' assert a == a.encode().decode() - +assert chr(0) == '\x00' +assert ord('\x00') == 0 # test slice s = b"football"