fix str.split

This commit is contained in:
blueloveTH 2024-11-14 19:52:23 +08:00
parent 630a5d04fc
commit 104785c94b
3 changed files with 29 additions and 17 deletions

View File

@ -188,15 +188,13 @@ c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) {
int i = 0;
for(int j = 0; j < self.size; j++) {
if(data[j] == sep) {
if(j > i) {
c11_sv tmp = {data + i, j - i};
c11_vector__push(c11_sv, &retval, tmp);
}
assert(j >= i);
c11_sv tmp = {data + i, j - i};
c11_vector__push(c11_sv, &retval, tmp);
i = j + 1;
continue;
}
}
if(self.size > i) {
if(i <= self.size) {
c11_sv tmp = {data + i, self.size - i};
c11_vector__push(c11_sv, &retval, tmp);
}
@ -204,6 +202,7 @@ c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) {
}
c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) {
if(sep.size == 1) return c11_sv__split(self, sep.data[0]);
c11_vector retval;
c11_vector__ctor(&retval, sizeof(c11_sv));
int start = 0;
@ -212,11 +211,11 @@ c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep) {
int i = c11_sv__index2(self, sep, start);
if(i == -1) break;
c11_sv tmp = {data + start, i - start};
if(tmp.size != 0) c11_vector__push(c11_sv, &retval, tmp);
c11_vector__push(c11_sv, &retval, tmp);
start = i + sep.size;
}
c11_sv tmp = {data + start, self.size - start};
if(tmp.size != 0) c11_vector__push(c11_sv, &retval, tmp);
c11_vector__push(c11_sv, &retval, tmp);
return retval;
}

View File

@ -317,21 +317,25 @@ static bool str_replace(int argc, py_Ref argv) {
static bool str_split(int argc, py_Ref argv) {
c11_sv self = c11_string__sv(py_touserdata(&argv[0]));
c11_vector res;
bool discard_empty = false;
if(argc > 2) return TypeError("split() takes at most 2 arguments");
if(argc == 1) {
// sep = ' '
// sep = None
res = c11_sv__split(self, ' ');
discard_empty = true;
}
if(argc == 2) {
// sep = argv[1]
if(!py_checkstr(&argv[1])) return false;
c11_sv sep = c11_string__sv(py_touserdata(&argv[1]));
if(sep.size == 0) return ValueError("empty separator");
res = c11_sv__split2(self, sep);
}
py_newlistn(py_retval(), res.length);
py_newlist(py_retval());
for(int i = 0; i < res.length; i++) {
c11_sv item = c11__getitem(c11_sv, &res, i);
py_newstrv(py_list_getitem(py_retval(), i), item);
c11_sv part = c11__getitem(c11_sv, &res, i);
if(discard_empty && part.size == 0) continue;
py_newstrv(py_list_emplace(py_retval()), part);
}
c11_vector__dtor(&res);
return true;

View File

@ -9,7 +9,10 @@ assert 'testing5' >= 'test' + 'ing1'
assert 'abc' + 'def' == 'abcdef'
assert 'abc' * 3 == 'abcabcabc'
assert repr('\\\n\t\'\r\b\x48') == r"'\\\n\t\'\r\bH'"
assert repr('\\\n\t\'\r\b\x48') in [
r"'\\\n\t\'\r\bH'",
'"\\\\\\n\\t\'\\r\\x08H"',
]
a = ''
b = 'test'
@ -46,13 +49,19 @@ assert t.startswith('this') == True;
assert t.split('w') == ['this is string example....', 'o', '!!!']
assert "a,b,c".split(',') == ['a', 'b', 'c']
assert 'a,'.split(',') == ['a']
assert 'a,'.split(',') == ['a', '']
assert 'foo!!bar!!baz'.split('!!') == ['foo', 'bar', 'baz']
assert ' 4 3 '.split() == ['4', '3']
assert ' 4 3 '.split(' ') == ['4', '3']
assert ' 4 3 '.split(' ') == ['', '', '4', '3', '', '']
assert 'aa bb cccc'.split('cc') == ['aa bb ', '', '']
assert '.a.b.'.split('.') == ['', 'a', 'b', '']
assert '.a...b.'.split('.') == ['', 'a', '', '', 'b', '']
x = 'aa bb cccc'
assert x.split('cc') == ['aa bb ']
try:
'a'.split('')
exit(1)
except ValueError:
pass
assert '111'.count('1') == 3
assert '111'.count('11') == 1