From f9c0a9523727fc95d1e5d628450237bdfa007e82 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Tue, 22 Aug 2023 23:55:11 +0800 Subject: [PATCH] optimize `str`'s iterator --- include/pocketpy/iter.h | 4 ++-- src/iter.cpp | 8 +++++--- tests/04_str.py | 19 ++++++++++++++++++- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/include/pocketpy/iter.h b/include/pocketpy/iter.h index d465cd87..262b40ec 100644 --- a/include/pocketpy/iter.h +++ b/include/pocketpy/iter.h @@ -33,7 +33,7 @@ struct StringIter{ PY_CLASS(StringIter, builtins, "_string_iterator") PyObject* ref; Str* str; - int index; + int index; // byte index StringIter(PyObject* ref) : ref(ref), str(&PK_OBJ_GET(Str, ref)), index(0) {} @@ -43,7 +43,7 @@ struct StringIter{ }; struct Generator{ - PY_CLASS(Generator, builtins, "_generator") + PY_CLASS(Generator, builtins, "generator") Frame frame; int state; // 0,1,2 List s_backup; diff --git a/src/iter.cpp b/src/iter.cpp index 773dea60..4cc1e796 100644 --- a/src/iter.cpp +++ b/src/iter.cpp @@ -32,9 +32,11 @@ namespace pkpy{ vm->bind__iter__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj){ return obj; }); vm->bind__next__(PK_OBJ_GET(Type, type), [](VM* vm, PyObject* obj){ StringIter& self = _CAST(StringIter&, obj); - // TODO: optimize this... operator[] is of O(n) complexity - if(self.index == self.str->u8_length()) return vm->StopIteration; - return VAR(self.str->u8_getitem(self.index++)); + if(self.index == self.str->size) return vm->StopIteration; + int start = self.index; + int len = utf8len(self.str->data[self.index]); + self.index += len; + return VAR(self.str->substr(start, len)); }); } diff --git a/tests/04_str.py b/tests/04_str.py index fea011fa..e447f5fc 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -117,4 +117,21 @@ assert a.rjust(5, '0') == '00123' assert a.ljust(5) == '123 ' assert a.ljust(5, '0') == '12300' -assert '\x30\x31\x32' == '012' \ No newline at end of file +assert '\x30\x31\x32' == '012' + +a = 'abcd' +assert list(a) == ['a', 'b', 'c', 'd'] +a = '测试' +assert list(a) == ['测', '试'] +a = 'a测b试c' +assert list(a) == ['a', '测', 'b', '试', 'c'] +a = 'a测b试' +assert list(a) == ['a', '测', 'b', '试'] +a = '测b试c' +assert list(a) == ['测', 'b', '试', 'c'] +a = '测b' +assert list(a) == ['测', 'b'] +a = 'b' +assert list(a) == ['b'] +a = '测' +assert list(a) == ['测']