From 104785c94b26a20c5d89fadbfd8e14d069ef947c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 14 Nov 2024 19:52:23 +0800 Subject: [PATCH] fix `str.split` --- src/common/str.c | 15 +++++++-------- src/public/py_str.c | 12 ++++++++---- tests/04_str.py | 19 ++++++++++++++----- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/common/str.c b/src/common/str.c index 8c7c3994..3e17a74b 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -188,15 +188,13 @@ c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) { int i = 0; for(int j = 0; j < self.size; j++) { if(data[j] == sep) { - if(j > i) { - c11_sv tmp = {data + i, j - i}; - c11_vector__push(c11_sv, &retval, tmp); - } + assert(j >= i); + c11_sv tmp = {data + i, j - i}; + c11_vector__push(c11_sv, &retval, tmp); i = j + 1; - continue; } } - if(self.size > i) { + if(i <= self.size) { c11_sv tmp = {data + i, self.size - i}; c11_vector__push(c11_sv, &retval, tmp); } @@ -204,6 +202,7 @@ c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) { } c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) { + if(sep.size == 1) return c11_sv__split(self, sep.data[0]); c11_vector retval; c11_vector__ctor(&retval, sizeof(c11_sv)); int start = 0; @@ -212,11 +211,11 @@ c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) { int i = c11_sv__index2(self, sep, start); if(i == -1) break; c11_sv tmp = {data + start, i - start}; - if(tmp.size != 0) c11_vector__push(c11_sv, &retval, tmp); + c11_vector__push(c11_sv, &retval, tmp); start = i + sep.size; } c11_sv tmp = {data + start, self.size - start}; - if(tmp.size != 0) c11_vector__push(c11_sv, &retval, tmp); + c11_vector__push(c11_sv, &retval, tmp); return retval; } diff --git a/src/public/py_str.c b/src/public/py_str.c index 769c12dd..1255be3e 100644 --- a/src/public/py_str.c +++ b/src/public/py_str.c @@ -317,21 +317,25 @@ static bool str_replace(int argc, py_Ref argv) { static bool str_split(int argc, py_Ref argv) { c11_sv self = c11_string__sv(py_touserdata(&argv[0])); c11_vector res; + bool discard_empty = false; if(argc > 2) return TypeError("split() takes at most 2 arguments"); if(argc == 1) { - // sep = ' ' + // sep = None res = c11_sv__split(self, ' '); + discard_empty = true; } if(argc == 2) { // sep = argv[1] if(!py_checkstr(&argv[1])) return false; c11_sv sep = c11_string__sv(py_touserdata(&argv[1])); + if(sep.size == 0) return ValueError("empty separator"); res = c11_sv__split2(self, sep); } - py_newlistn(py_retval(), res.length); + py_newlist(py_retval()); for(int i = 0; i < res.length; i++) { - c11_sv item = c11__getitem(c11_sv, &res, i); - py_newstrv(py_list_getitem(py_retval(), i), item); + c11_sv part = c11__getitem(c11_sv, &res, i); + if(discard_empty && part.size == 0) continue; + py_newstrv(py_list_emplace(py_retval()), part); } c11_vector__dtor(&res); return true; diff --git a/tests/04_str.py b/tests/04_str.py index 7adad4b4..fd4af45a 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -9,7 +9,10 @@ assert 'testing5' >= 'test' + 'ing1' assert 'abc' + 'def' == 'abcdef' assert 'abc' * 3 == 'abcabcabc' -assert repr('\\\n\t\'\r\b\x48') == r"'\\\n\t\'\r\bH'" +assert repr('\\\n\t\'\r\b\x48') in [ + r"'\\\n\t\'\r\bH'", + '"\\\\\\n\\t\'\\r\\x08H"', +] a = '' b = 'test' @@ -46,13 +49,19 @@ assert t.startswith('this') == True; assert t.split('w') == ['this is string example....', 'o', '!!!'] assert "a,b,c".split(',') == ['a', 'b', 'c'] -assert 'a,'.split(',') == ['a'] +assert 'a,'.split(',') == ['a', ''] assert 'foo!!bar!!baz'.split('!!') == ['foo', 'bar', 'baz'] assert ' 4 3 '.split() == ['4', '3'] -assert ' 4 3 '.split(' ') == ['4', '3'] +assert ' 4 3 '.split(' ') == ['', '', '4', '3', '', ''] +assert 'aa bb cccc'.split('cc') == ['aa bb ', '', ''] +assert '.a.b.'.split('.') == ['', 'a', 'b', ''] +assert '.a...b.'.split('.') == ['', 'a', '', '', 'b', ''] -x = 'aa bb cccc' -assert x.split('cc') == ['aa bb '] +try: + 'a'.split('') + exit(1) +except ValueError: + pass assert '111'.count('1') == 3 assert '111'.count('11') == 1