From 8f11ce04669012cdacab81d894fe0a0044c1f77c Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 5 Feb 2024 14:48:00 +0800 Subject: [PATCH] move `strip` methods into cpp --- include/pocketpy/str.h | 6 ++-- python/builtins.py | 57 ++--------------------------------- src/pocketpy.cpp | 68 ++++++++++++++++++++++++++++++++++++++++++ src/str.cpp | 40 +++++++++++++++---------- tests/04_str.py | 20 +++++++++++++ 5 files changed, 119 insertions(+), 72 deletions(-) diff --git a/include/pocketpy/str.h b/include/pocketpy/str.h index 8cf8418f..af1dac6e 100644 --- a/include/pocketpy/str.h +++ b/include/pocketpy/str.h @@ -65,8 +65,10 @@ struct Str{ const char* c_str() const; std::string_view sv() const; std::string str() const; - Str lstrip() const; - Str strip() const; + Str strip(bool left, bool right, const Str& chars) const; + Str strip(bool left=true, bool right=true) const; + Str lstrip() const { return strip(true, false); } + Str rstrip() const { return strip(false, true); } Str lower() const; Str upper() const; Str escape(bool single_quote=true) const; diff --git a/python/builtins.py b/python/builtins.py index 27010099..564ba236 100644 --- a/python/builtins.py +++ b/python/builtins.py @@ -86,7 +86,7 @@ def sorted(iterable, key=None, reverse=False): return a ##### str ##### -def __f(self: str, *args, **kwargs) -> str: +def __format_string(self: str, *args, **kwargs) -> str: def tokenizeString(s: str): tokens = [] L, R = 0,0 @@ -195,59 +195,8 @@ def __f(self: str, *args, **kwargs) -> str: return ''.join(final_tokens) -str.format = __f - -def __f(self, chars=None): - chars = chars or ' \t\n\r' - i = 0 - while i < len(self) and self[i] in chars: - ++i - return self[i:] -str.lstrip = __f - -def __f(self, chars=None): - chars = chars or ' \t\n\r' - j = len(self) - 1 - while j >= 0 and self[j] in chars: - --j - return self[:j+1] -str.rstrip = __f - -def __f(self, chars=None): - chars = chars or ' \t\n\r' - i = 0 - while i < len(self) and self[i] in chars: - ++i - j = len(self) - 1 - while j >= 0 and self[j] in chars: - --j - return self[i:j+1] -str.strip = __f - -def __f(self, width: int): - delta = width - len(self) - if delta <= 0: - return self - return '0' * delta + self -str.zfill = __f - -def __f(self, width: int, fillchar=' '): - delta = width - len(self) - if delta <= 0: - return self - assert len(fillchar) == 1 - return fillchar * delta + self -str.rjust = __f - -def __f(self, width: int, fillchar=' '): - delta = width - len(self) - if delta <= 0: - return self - assert len(fillchar) == 1 - return self + fillchar * delta -str.ljust = __f - -del __f +str.format = __format_string +del __format_string def help(obj): diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 85042607..046580ba 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -641,6 +641,74 @@ void init_builtins(VM* _vm) { return VAR(self.upper()); }); + _vm->bind(_vm->_t(VM::tp_str), "strip(self, chars=None)", [](VM* vm, ArgsView args) { + const Str& self = _CAST(Str&, args[0]); + if(args[1] == vm->None){ + return VAR(self.strip()); + }else{ + const Str& chars = CAST(Str&, args[1]); + return VAR(self.strip(true, true, chars)); + } + }); + + _vm->bind(_vm->_t(VM::tp_str), "lstrip(self, chars=None)", [](VM* vm, ArgsView args) { + const Str& self = _CAST(Str&, args[0]); + if(args[1] == vm->None){ + return VAR(self.lstrip()); + }else{ + const Str& chars = CAST(Str&, args[1]); + return VAR(self.strip(true, false, chars)); + } + }); + + _vm->bind(_vm->_t(VM::tp_str), "rstrip(self, chars=None)", [](VM* vm, ArgsView args) { + const Str& self = _CAST(Str&, args[0]); + if(args[1] == vm->None){ + return VAR(self.rstrip()); + }else{ + const Str& chars = CAST(Str&, args[1]); + return VAR(self.strip(false, true, chars)); + } + }); + + // zfill + _vm->bind(_vm->_t(VM::tp_str), "zfill(self, width)", [](VM* vm, ArgsView args) { + const Str& self = _CAST(Str&, args[0]); + int width = CAST(int, args[1]); + int delta = width - self.u8_length(); + if(delta <= 0) return args[0]; + SStream ss; + for(int i=0; ibind(_vm->_t(VM::tp_str), "ljust(self, width, fillchar=' ')", [](VM* vm, ArgsView args) { + const Str& self = _CAST(Str&, args[0]); + int width = CAST(int, args[1]); + int delta = width - self.u8_length(); + if(delta <= 0) return args[0]; + const Str& fillchar = CAST(Str&, args[2]); + SStream ss; + ss << self; + for(int i=0; ibind(_vm->_t(VM::tp_str), "rjust(self, width, fillchar=' ')", [](VM* vm, ArgsView args) { + const Str& self = _CAST(Str&, args[0]); + int width = CAST(int, args[1]); + int delta = width - self.u8_length(); + if(delta <= 0) return args[0]; + const Str& fillchar = CAST(Str&, args[2]); + SStream ss; + for(int i=0; ibind(_vm->_t(VM::tp_list), "sort(self, key=None, reverse=False)", [](VM* vm, ArgsView args) { List& self = _CAST(List&, args[0]); diff --git a/src/str.cpp b/src/str.cpp index 4f7c6eb3..c1cbe5ab 100644 --- a/src/str.cpp +++ b/src/str.cpp @@ -194,24 +194,32 @@ int utf8len(unsigned char c, bool suppress){ return std::string(data, size); } - Str Str::lstrip() const { - std::string copy(data, size); - copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { - // std::isspace(c) does not working on windows (Debug) - return c != ' ' && c != '\t' && c != '\r' && c != '\n'; - })); - return Str(copy); + Str Str::strip(bool left, bool right, const Str& chars) const { + int L = 0; + int R = u8_length(); + if(left){ + while(L < R && chars.index(u8_getitem(L)) != -1) L++; + } + if(right){ + while(L < R && chars.index(u8_getitem(R-1)) != -1) R--; + } + return u8_slice(L, R, 1); } - Str Str::strip() const { - std::string copy(data, size); - copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) { - return c != ' ' && c != '\t' && c != '\r' && c != '\n'; - })); - copy.erase(std::find_if(copy.rbegin(), copy.rend(), [](char c) { - return c != ' ' && c != '\t' && c != '\r' && c != '\n'; - }).base(), copy.end()); - return Str(copy); + Str Str::strip(bool left, bool right) const { + if(is_ascii){ + int L = 0; + int R = size; + if(left){ + while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) L++; + } + if(right){ + while(L < R && (data[R-1] == ' ' || data[R-1] == '\t' || data[R-1] == '\n' || data[R-1] == '\r')) R--; + } + return substr(L, R - L); + }else{ + return strip(left, right, " \t\n\r"); + } } Str Str::lower() const{ diff --git a/tests/04_str.py b/tests/04_str.py index 51ffacaa..27314b7f 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -69,6 +69,26 @@ assert s.strip( '12' ) == "3abcrunoob3" assert t.strip( '*' ) == "this is **string** example....wow!!!" assert s.strip( '12' ) == "3abcrunoob3" +assert '测试123'.strip('测试') == '123' +assert '测试123测试'.strip('测试') == '123' +assert '123测试'.strip('2') == '123测试' +assert '测试123'.strip('测') == '试123' +assert '测试123'.strip('试') == '测试123' + +assert '测试123测试'.lstrip('测试') == '123测试' +assert '测试123测试'.rstrip('测试') == '测试123' + +assert 'abc'.lstrip('a') == 'bc' +assert 'abc'.lstrip('b') == 'abc' +assert 'abc'.lstrip('c') == 'abc' +assert 'abc'.rstrip('a') == 'abc' +assert 'abc'.rstrip('b') == 'abc' +assert 'abc'.rstrip('c') == 'ab' + +assert 'abc'.lstrip('abc') == '' +assert 'abc'.rstrip('abc') == '' +assert 'abc'.strip('abc') == '' + s = ' asd\n asd \n' assert s.strip() == 'asd\n asd'