diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index c1fee572..55fa12aa 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -43,14 +43,14 @@ c11_string* c11_string__copy(c11_string* self); void c11_string__delete(c11_string* self); c11_sv c11_string__sv(c11_string* self); -int c11_string__u8_length(c11_string* self); -c11_sv c11_string__u8_getitem(c11_string* self, int i); -c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step); +int c11_sv__u8_length(c11_sv self); +c11_sv c11_sv__u8_getitem(c11_sv self, int i); +c11_string* c11_sv__u8_slice(c11_sv self, int start, int stop, int step); // general string operations c11_sv c11_sv__slice(c11_sv sv, int start); c11_sv c11_sv__slice2(c11_sv sv, int start, int stop); -c11_sv c11_sv__strip(c11_sv sv, bool left, bool right); +c11_sv c11_sv__strip(c11_sv sv, c11_sv chars, bool left, bool right); int c11_sv__index(c11_sv self, char c); int c11_sv__index2(c11_sv self, c11_sv sub, int start); int c11_sv__count(c11_sv self, c11_sv sub); diff --git a/src/common/str.c b/src/common/str.c index 9441112a..db279e4e 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -47,7 +47,7 @@ c11_string* c11_sv__replace(c11_sv self, char old, char new_) { return retval; } -c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_){ +c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_) { c11_sbuf buf; c11_sbuf__ctor(&buf); int start = 0; @@ -64,22 +64,20 @@ c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_){ return c11_sbuf__submit(&buf); } -int c11_string__u8_length(c11_string* self) { - return c11__byte_index_to_unicode(self->data, self->size); +int c11_sv__u8_length(c11_sv sv) { return c11__byte_index_to_unicode(sv.data, sv.size); } + +c11_sv c11_sv__u8_getitem(c11_sv sv, int i) { + i = c11__unicode_index_to_byte(sv.data, i); + int size = c11__u8_header(sv.data[i], false); + return c11_sv__slice2(sv, i, i + size); } -c11_sv c11_string__u8_getitem(c11_string* self, int i) { - i = c11__unicode_index_to_byte(self->data, i); - int size = c11__u8_header(self->data[i], false); - return c11_sv__slice2(c11_string__sv(self), i, i + size); -} - -c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step) { +c11_string* c11_sv__u8_slice(c11_sv sv, int start, int stop, int step) { c11_sbuf ss; c11_sbuf__ctor(&ss); assert(step != 0); for(int i = start; step > 0 ? i < stop : i > stop; i += step) { - c11_sv unicode = c11_string__u8_getitem(self, i); + c11_sv unicode = c11_sv__u8_getitem(sv, i); c11_sbuf__write_sv(&ss, unicode); } return c11_sbuf__submit(&ss); @@ -95,20 +93,28 @@ c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) { return (c11_sv){sv.data + start, stop - start}; } -c11_sv c11_sv__strip(c11_sv sv, bool left, bool right) { +c11_sv c11_sv__strip(c11_sv sv, c11_sv chars, bool left, bool right) { int L = 0; - int R = sv.size; - const char* data = sv.data; + int R = c11_sv__u8_length(sv); if(left) { - while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) + while(L < R) { + c11_sv tmp = c11_sv__u8_getitem(sv, L); + bool found = c11_sv__index2(chars, tmp, 0) != -1; + if(!found) break; L++; + } } if(right) { - while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || - data[R - 1] == '\r')) + while(L < R) { + c11_sv tmp = c11_sv__u8_getitem(sv, R - 1); + bool found = c11_sv__index2(chars, tmp, 0) != -1; + if(!found) break; R--; + } } - return c11_sv__slice2(sv, L, R); + int start = c11__unicode_index_to_byte(sv.data, L); + int stop = c11__unicode_index_to_byte(sv.data, R); + return c11_sv__slice2(sv, start, stop); } int c11_sv__index(c11_sv self, char c) { diff --git a/src/public/py_str.c b/src/public/py_str.c index 5d4cb87e..ea4bdaac 100644 --- a/src/public/py_str.c +++ b/src/public/py_str.c @@ -164,18 +164,18 @@ static bool _py_str__iter__(int argc, py_Ref argv) { static bool _py_str__getitem__(int argc, py_Ref argv) { PY_CHECK_ARGC(2); - c11_string* self = py_touserdata(&argv[0]); + c11_sv self = c11_string__sv(py_touserdata(&argv[0])); py_Ref _1 = py_arg(1); if(_1->type == tp_int) { int index = py_toint(py_arg(1)); - pk__normalize_index(&index, self->size); - c11_sv res = c11_string__u8_getitem(self, index); + pk__normalize_index(&index, self.size); + c11_sv res = c11_sv__u8_getitem(self, index); py_newstrn(py_retval(), res.data, res.size); } else if(_1->type == tp_slice) { int start, stop, step; - bool ok = pk__parse_int_slice(_1, c11_string__u8_length(self), &start, &stop, &step); + bool ok = pk__parse_int_slice(_1, c11_sv__u8_length(self), &start, &stop, &step); if(!ok) return false; - c11_string* res = c11_string__u8_slice(self, start, stop, step); + c11_string* res = c11_sv__u8_slice(self, start, stop, step); py_newstrn(py_retval(), res->data, res->size); c11_string__delete(res); return true; @@ -261,14 +261,37 @@ static bool _py_str__endswith(int argc, py_Ref argv) { } static bool _py_str__join(int argc, py_Ref argv) { - assert(false); - // PY_CHECK_ARGC(2); - // c11_sbuf buf; - // c11_sbuf__ctor(&buf); - // c11_string* sep = py_touserdata(&argv[0]); - // py_Ref iter = py_pushtmp(); - // py_iter(iter, &argv[1]); - return false; + PY_CHECK_ARGC(2); + c11_sv self = c11_string__sv(py_touserdata(&argv[0])); + py_Ref _1 = py_arg(1); + // join a list or tuple + py_TValue* p; + int length; + if(py_istype(_1, tp_list)) { + p = py_list__getitem(_1, 0); + length = py_list__len(_1); + } else if(py_istype(_1, tp_tuple)) { + p = py_tuple__getitem(_1, 0); + length = py_tuple__len(_1); + } else { + return TypeError("join() argument must be a list or tuple"); + } + + c11_sbuf buf; + c11_sbuf__ctor(&buf); + for(int i = 0; i < length; i++) { + if(i > 0) c11_sbuf__write_sv(&buf, self); + if(!py_checkstr(&p[i])) { + c11_sbuf__dtor(&buf); + return false; + } + c11_string* item = py_touserdata(&p[i]); + c11_sbuf__write_cstrn(&buf, item->data, item->size); + } + c11_string* res = c11_sbuf__submit(&buf); + py_newstrn(py_retval(), res->data, res->size); + c11_string__delete(res); + return true; } static bool _py_str__replace(int argc, py_Ref argv) { @@ -318,27 +341,53 @@ static bool _py_str__count(int argc, py_Ref argv) { return true; } -static bool _py_str__strip(int argc, py_Ref argv) { - PY_CHECK_ARGC(1); - c11_string* self = py_touserdata(&argv[0]); - c11_sv res = c11_sv__strip(c11_string__sv(self), true, true); +static bool _py_str__strip_impl(bool left, bool right, int argc, py_Ref argv) { + c11_sv self = c11_string__sv(py_touserdata(&argv[0])); + c11_sv chars; + if(argc == 1) { + chars = (c11_sv){" \t\n\r", 4}; + } else if(argc == 2) { + if(!py_checkstr(&argv[1])) return false; + chars = c11_string__sv(py_touserdata(&argv[1])); + } else { + return TypeError("strip() takes at most 2 arguments"); + } + c11_sv res = c11_sv__strip(self, chars, left, right); py_newstrn(py_retval(), res.data, res.size); return true; } +static bool _py_str__strip(int argc, py_Ref argv) { + return _py_str__strip_impl(true, true, argc, argv); +} + static bool _py_str__lstrip(int argc, py_Ref argv) { - PY_CHECK_ARGC(1); - c11_string* self = py_touserdata(&argv[0]); - c11_sv res = c11_sv__strip(c11_string__sv(self), true, false); - py_newstrn(py_retval(), res.data, res.size); - return true; + return _py_str__strip_impl(true, false, argc, argv); } static bool _py_str__rstrip(int argc, py_Ref argv) { - PY_CHECK_ARGC(1); - c11_string* self = py_touserdata(&argv[0]); - c11_sv res = c11_sv__strip(c11_string__sv(self), false, true); - py_newstrn(py_retval(), res.data, res.size); + return _py_str__strip_impl(false, true, argc, argv); +} + +static bool _py_str__zfill(int argc, py_Ref argv) { + PY_CHECK_ARGC(2); + c11_sv self = c11_string__sv(py_touserdata(&argv[0])); + PY_CHECK_ARG_TYPE(1, tp_int); + int width = py_toint(py_arg(1)); + int delta = width - c11_sv__u8_length(self); + if(delta <= 0) { + *py_retval() = argv[0]; + return true; + } + c11_sbuf buf; + c11_sbuf__ctor(&buf); + for(int i = 0; i < delta; i++) { + c11_sbuf__write_char(&buf, '0'); + } + c11_sbuf__write_sv(&buf, self); + c11_string* res = c11_sbuf__submit(&buf); + py_newstrn(py_retval(), res->data, res->size); + c11_string__delete(res); return true; } @@ -377,6 +426,7 @@ py_Type pk_str__register() { py_bindmethod(tp_str, "strip", _py_str__strip); py_bindmethod(tp_str, "lstrip", _py_str__lstrip); py_bindmethod(tp_str, "rstrip", _py_str__rstrip); + py_bindmethod(tp_str, "zfill", _py_str__zfill); return type; } diff --git a/tests/04_str.py b/tests/04_str.py index e9581b76..14e1efaa 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -101,10 +101,6 @@ assert s2.join( seq ) == "runoob" assert 'x'.zfill(5) == '0000x' assert '568'.zfill(1) == '568' -def test(*seq): - return s1.join(seq) -assert test("r", "u", "n", "o", "o", "b") == "r-u-n-o-o-b" - num = 6 assert str(num) == '6' @@ -178,6 +174,10 @@ assert list(a) == ['b'] a = '测' assert list(a) == ['测'] +def test(*seq): + return s1.join(seq) +assert test("r", "u", "n", "o", "o", "b") == "r-u-n-o-o-b" + # test format() assert "Hello, {}!".format("World") == "Hello, World!" assert "{} {} {}".format("I", "love", "Python") == "I love Python"