From d5a511ad7ceb301ff9dafc4a63b41949a0b26f25 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Thu, 3 Jul 2025 18:04:36 +0800 Subject: [PATCH] fix https://github.com/pocketpy/pocketpy/issues/378 --- include/pocketpy/common/str.h | 1 + src/common/str.c | 21 +++++++++++++++++++++ src/public/py_str.c | 2 +- tests/04_str.py | 3 +++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 83fcf20f..5ccaf5f7 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -67,6 +67,7 @@ c11_string* c11_sv__replace2(c11_sv self, c11_sv old, c11_sv new_); c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep); c11_vector /* T=c11_sv */ c11_sv__split2(c11_sv self, c11_sv sep); +c11_vector /* T=c11_sv */ c11_sv__splitwhitespace(c11_sv self); // misc int c11__unicode_index_to_byte(const char* data, int i); diff --git a/src/common/str.c b/src/common/str.c index ca2e9305..642883fc 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -2,6 +2,7 @@ #include "pocketpy/common/sstream.h" #include "pocketpy/common/utils.h" +#include #include #include #include @@ -188,6 +189,26 @@ uint64_t c11_sv__hash(c11_sv self) { return hash; } +c11_vector /* T=c11_sv */ c11_sv__splitwhitespace(c11_sv self) { + c11_vector retval; + c11_vector__ctor(&retval, sizeof(c11_sv)); + const char* data = self.data; + int i = 0; + for(int j = 0; j < self.size; j++) { + if(isspace(data[j])) { + assert(j >= i); + c11_sv tmp = {data + i, j - i}; + c11_vector__push(c11_sv, &retval, tmp); + i = j + 1; + } + } + if(i <= self.size) { + c11_sv tmp = {data + i, self.size - i}; + c11_vector__push(c11_sv, &retval, tmp); + } + return retval; +} + c11_vector /* T=c11_sv */ c11_sv__split(c11_sv self, char sep) { c11_vector retval; c11_vector__ctor(&retval, sizeof(c11_sv)); diff --git a/src/public/py_str.c b/src/public/py_str.c index 3265ef04..ee57974b 100644 --- a/src/public/py_str.c +++ b/src/public/py_str.c @@ -327,7 +327,7 @@ static bool str_split(int argc, py_Ref argv) { if(argc > 2) return TypeError("split() takes at most 2 arguments"); if(argc == 1) { // sep = None - res = c11_sv__split(self, ' '); + res = c11_sv__splitwhitespace(self); discard_empty = true; } if(argc == 2) { diff --git a/tests/04_str.py b/tests/04_str.py index dfe8a984..d18aff6a 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -57,6 +57,9 @@ assert 'aa bb cccc'.split('cc') == ['aa bb ', '', ''] assert '.a.b.'.split('.') == ['', 'a', 'b', ''] assert '.a...b.'.split('.') == ['', 'a', '', '', 'b', ''] +# https://github.com/pocketpy/pocketpy/issues/378 +assert "a b \n c\td".split() == ['a', 'b', 'c', 'd'] + try: 'a'.split('') exit(1)