diff --git a/python/builtins.py b/python/builtins.py index 0c359574..3fbdfce5 100644 --- a/python/builtins.py +++ b/python/builtins.py @@ -71,7 +71,7 @@ def sorted(iterable, reverse=False): str.__mul__ = lambda self, n: ''.join([self for _ in range(n)]) -def str::split(self, sep): +def str@split(self, sep): if sep == "": return list(self) res = [] @@ -86,7 +86,7 @@ def str::split(self, sep): res.append(self) return res -def str::format(self, *args): +def str@format(self, *args): if '{}' in self: for i in range(len(args)): self = self.replace('{}', str(args[i]), 1) @@ -95,7 +95,7 @@ def str::format(self, *args): self = self.replace('{'+str(i)+'}', str(args[i])) return self -def str::strip(self, chars=None): +def str@strip(self, chars=None): chars = chars or ' \t\n\r' i = 0 while i < len(self) and self[i] in chars: @@ -127,30 +127,30 @@ def __qsort(a: list, L: int, R: int): __qsort(a, L, j) __qsort(a, i, R) -def list::sort(self, reverse=False): +def list@sort(self, reverse=False): __qsort(self, 0, len(self)-1) if reverse: self.reverse() -def list::remove(self, value): +def list@remove(self, value): for i in range(len(self)): if self[i] == value: del self[i] return True return False -def list::index(self, value): +def list@index(self, value): for i in range(len(self)): if self[i] == value: return i return -1 -def list::pop(self, i=-1): +def list@pop(self, i=-1): res = self[i] del self[i] return res -def list::__eq__(self, other): +def list@__eq__(self, other): if type(self) is not type(other): return False if len(self) != len(other): @@ -163,7 +163,7 @@ tuple.__eq__ = list.__eq__ list.__ne__ = lambda self, other: not self.__eq__(other) tuple.__ne__ = lambda self, other: not self.__eq__(other) -def list::count(self, x): +def list@count(self, x): res = 0 for i in self: if i == x: @@ -171,7 +171,7 @@ def list::count(self, x): return res tuple.count = list.count -def list::__contains__(self, item): +def list@__contains__(self, item): for i in self: if i == item: return True @@ -202,5 +202,5 @@ class staticmethod: def __call__(self, *args): return self.f(*args) -def type::__repr__(self): +def type@__repr__(self): return "" \ No newline at end of file diff --git a/src/ceval.h b/src/ceval.h index 5a9beeaa..f3242ae9 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -227,16 +227,12 @@ __NEXT_STEP:; STACK_SHRINK(byte.arg); PUSH(obj); } DISPATCH(); - TARGET(BUILD_SLICE) { - _2 = POPX(); - _1 = POPX(); - _0 = POPX(); - Slice s; - if(_0 != None) s.start = CAST(int, _0); - if(_1 != None) s.stop = CAST(int, _1); - if(_2 != None) s.step = CAST(int, _2); - PUSH(VAR(s)); - } DISPATCH(); + TARGET(BUILD_SLICE) + _2 = POPX(); // step + _1 = POPX(); // stop + _0 = POPX(); // start + PUSH(VAR(Slice(_0, _1, _2))); + DISPATCH(); TARGET(BUILD_TUPLE) _0 = VAR(STACK_VIEW(byte.arg).to_tuple()); STACK_SHRINK(byte.arg); @@ -352,14 +348,11 @@ __NEXT_STEP:; if(asBool(TOP()) == false) frame->jump_abs(byte.arg); else POP(); DISPATCH(); - TARGET(LOOP_CONTINUE) { - int target = co_blocks[byte.block].start; - frame->jump_abs(target); - } DISPATCH(); + TARGET(LOOP_CONTINUE) + frame->jump_abs(co_blocks[byte.block].start); + DISPATCH(); TARGET(LOOP_BREAK) - frame->jump_abs_break( - co_blocks[byte.block].end - ); + frame->jump_abs_break(co_blocks[byte.block].end); DISPATCH(); TARGET(GOTO) { StrName name(byte.arg); @@ -393,11 +386,10 @@ __NEXT_STEP:; TARGET(YIELD_VALUE) return PY_OP_YIELD; /*****************************************/ - TARGET(LIST_APPEND) { - PyObject* obj = POPX(); - List& list = CAST(List&, SECOND()); - list.push_back(obj); - } DISPATCH(); + TARGET(LIST_APPEND) + _0 = POPX(); + CAST(List&, SECOND()).push_back(_0); + DISPATCH(); TARGET(DICT_ADD) { _0 = POPX(); Tuple& t = CAST(Tuple&, _0); @@ -509,16 +501,15 @@ __NEXT_STEP:; PyObject* cls = new_type_object(frame->_module, name, OBJ_GET(Type, super_cls)); PUSH(cls); } DISPATCH(); - TARGET(END_CLASS) { - PyObject* cls = POPX(); - cls->attr()._try_perfect_rehash(); - }; DISPATCH(); - TARGET(STORE_CLASS_ATTR) { - StrName name(byte.arg); - PyObject* obj = POPX(); - PyObject* cls = TOP(); - cls->attr().set(name, obj); - } DISPATCH(); + TARGET(END_CLASS) + _0 = POPX(); + _0->attr()._try_perfect_rehash(); + DISPATCH(); + TARGET(STORE_CLASS_ATTR) + _name = StrName(byte.arg); + _0 = POPX(); + TOP()->attr().set(_name, _0); + DISPATCH(); /*****************************************/ // // TODO: using "goto" inside with block may cause __exit__ not called TARGET(WITH_ENTER) @@ -543,8 +534,8 @@ __NEXT_STEP:; } DISPATCH(); TARGET(EXCEPTION_MATCH) { const auto& e = CAST(Exception&, TOP()); - StrName name(byte.arg); - PUSH(VAR(e.match_type(name))); + _name = StrName(byte.arg); + PUSH(VAR(e.match_type(_name))); } DISPATCH(); TARGET(RAISE) { PyObject* obj = POPX(); diff --git a/src/compiler.h b/src/compiler.h index e6c906da..799f1cea 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -906,7 +906,7 @@ __SUBSCR_END: Str decl_name; consume(TK("@id")); decl_name = prev().str(); - if(!ctx()->is_compiling_class && match(TK("::"))){ + if(!ctx()->is_compiling_class && match(TK("@"))){ consume(TK("@id")); obj_name = decl_name; decl_name = prev().str(); diff --git a/src/gc.h b/src/gc.h index f759f44a..4ae39deb 100644 --- a/src/gc.h +++ b/src/gc.h @@ -141,6 +141,12 @@ template<> inline void gc_mark(BoundMethod& t){ OBJ_MARK(t.func); } +template<> inline void gc_mark(Slice& t){ + OBJ_MARK(t.start); + OBJ_MARK(t.stop); + OBJ_MARK(t.step); +} + template<> inline void gc_mark(Function& t){ t.decl->_gc_mark(); if(t._module != nullptr) OBJ_MARK(t._module); diff --git a/src/lexer.h b/src/lexer.h index 31b6c839..ffe5d5f3 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -19,7 +19,7 @@ constexpr const char* kTokens[] = { "&", "&=", "|", "|=", "^", "^=", "<<", "<<=", ">>", ">>=", /*****************************************/ - ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "::", + ".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "**", "=", ">", "<", "...", "->", "?", "@", "==", "!=", ">=", "<=", /** KW_BEGIN **/ "class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield", @@ -368,7 +368,7 @@ struct Lexer { case '{': add_token(TK("{")); return true; case '}': add_token(TK("}")); return true; case ',': add_token(TK(",")); return true; - case ':': add_token_2(':', TK(":"), TK("::")); return true; + case ':': add_token(TK(":")); return true; case ';': add_token(TK(";")); return true; case '(': add_token(TK("(")); return true; case ')': add_token(TK(")")); return true; diff --git a/src/obj.h b/src/obj.h index 660476c0..8d36346a 100644 --- a/src/obj.h +++ b/src/obj.h @@ -81,19 +81,11 @@ struct Bytes{ using Super = std::pair; -// TODO: re-examine the design of Slice struct Slice { - int start = 0; - int stop = 0x7fffffff; - int step = 1; - - void normalize(int len){ - if(start < 0) start += len; - if(stop < 0) stop += len; - if(start < 0) start = 0; - if(stop > len) stop = len; - if(stop < start) stop = start; - } + PyObject* start; + PyObject* stop; + PyObject* step; + Slice(PyObject* start, PyObject* stop, PyObject* step) : start(start), stop(stop), step(step) {} }; class BaseIter { diff --git a/src/pocketpy.h b/src/pocketpy.h index 6eeeceb7..944f2571 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -368,9 +368,10 @@ inline void init_builtins(VM* _vm) { const Str& self (CAST(Str&, args[0])); if(is_type(args[1], vm->tp_slice)){ - Slice s = _CAST(Slice, args[1]); - s.normalize(self.u8_length()); - return VAR(self.u8_slice(s.start, s.stop)); + const Slice& s = _CAST(Slice&, args[1]); + int start, stop, step; + vm->parse_int_slice(s, self.u8_length(), start, stop, step); + return VAR(self.u8_slice(start, stop, step)); } int index = CAST(int, args[1]); @@ -509,10 +510,11 @@ inline void init_builtins(VM* _vm) { const List& self = CAST(List&, args[0]); if(is_type(args[1], vm->tp_slice)){ - Slice s = _CAST(Slice, args[1]); - s.normalize(self.size()); + const Slice& s = _CAST(Slice&, args[1]); + int start, stop, step; + vm->parse_int_slice(s, self.size(), start, stop, step); List new_list; - for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]); + for(int i=start; step>0?istop; i+=step) new_list.push_back(self[i]); return VAR(std::move(new_list)); } @@ -551,10 +553,11 @@ inline void init_builtins(VM* _vm) { const Tuple& self = CAST(Tuple&, args[0]); if(is_type(args[1], vm->tp_slice)){ - Slice s = _CAST(Slice, args[1]); - s.normalize(self.size()); + const Slice& s = _CAST(Slice&, args[1]); + int start, stop, step; + vm->parse_int_slice(s, self.size(), start, stop, step); List new_list; - for(size_t i = s.start; i < s.stop; i++) new_list.push_back(self[i]); + for(int i=start; step>0?istop; i+=step) new_list.push_back(self[i]); return VAR(Tuple(std::move(new_list))); } @@ -877,6 +880,16 @@ inline void VM::post_init(){ _t(tp_bound_method)->attr().set("__func__", property([](VM* vm, ArgsView args){ return CAST(BoundMethod&, args[0]).func; })); + + _t(tp_slice)->attr().set("start", property([](VM* vm, ArgsView args){ + return CAST(Slice&, args[0]).start; + })); + _t(tp_slice)->attr().set("stop", property([](VM* vm, ArgsView args){ + return CAST(Slice&, args[0]).stop; + })); + _t(tp_slice)->attr().set("step", property([](VM* vm, ArgsView args){ + return CAST(Slice&, args[0]).step; + })); #endif } diff --git a/src/str.h b/src/str.h index f71a518a..63dac092 100644 --- a/src/str.h +++ b/src/str.h @@ -265,11 +265,14 @@ struct Str{ return substr(i, utf8len(data[i])); } - Str u8_slice(int start, int end) const{ - // TODO: optimize this - start = _unicode_index_to_byte(start); - end = _unicode_index_to_byte(end); - return substr(start, end - start); + Str u8_slice(int start, int stop, int step) const{ + std::stringstream ss; + if(is_ascii){ + for(int i=start; step>0?istop; i+=step) ss << data[i]; + }else{ + for(int i=start; step>0?istop; i+=step) ss << u8_getitem(i); + } + return ss.str(); } int u8_length() const { diff --git a/src/vm.h b/src/vm.h index 292048bd..d0f1a545 100644 --- a/src/vm.h +++ b/src/vm.h @@ -381,6 +381,7 @@ public: PyObject* _py_call(PyObject** sp_base, PyObject* callable, ArgsView args, ArgsView kwargs); PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true); PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false); + void parse_int_slice(const Slice& s, int length, int& start, int& stop, int& step); void setattr(PyObject* obj, StrName name, PyObject* value); template void bind_method(PyObject*, Str, NativeFuncC); @@ -557,6 +558,48 @@ inline bool VM::asBool(PyObject* obj){ return true; } +inline void VM::parse_int_slice(const Slice& s, int length, int& start, int& stop, int& step){ + auto clip = [](int value, int min, int max){ + if(value < min) return min; + if(value > max) return max; + return value; + }; + if(s.step == None) step = 1; + else step = CAST(int, s.step); + if(step == 0) ValueError("slice step cannot be zero"); + if(step > 0){ + if(s.start == None){ + start = 0; + }else{ + start = CAST(int, s.start); + if(start < 0) start += length; + start = clip(start, 0, length); + } + if(s.stop == None){ + stop = length; + }else{ + stop = CAST(int, s.stop); + if(stop < 0) stop += length; + stop = clip(stop, 0, length); + } + }else{ + if(s.start == None){ + start = length - 1; + }else{ + start = CAST(int, s.start); + if(start < 0) start += length; + start = clip(start, -1, length - 1); + } + if(s.stop == None){ + stop = -1; + }else{ + stop = CAST(int, s.stop); + if(stop < 0) stop += length; + stop = clip(stop, -1, length - 1); + } + } +} + inline i64 VM::hash(PyObject* obj){ if (is_non_tagged_type(obj, tp_str)) return CAST(Str&, obj).hash(); if (is_int(obj)) return CAST(i64, obj); diff --git a/tests/04_str.py b/tests/04_str.py index 4d48217e..8021d442 100644 --- a/tests/04_str.py +++ b/tests/04_str.py @@ -81,4 +81,20 @@ assert "Hello, {}!".format("World") == "Hello, World!" assert "{} {} {}".format("I", "love", "Python") == "I love Python" assert "{0} {1} {2}".format("I", "love", "Python") == "I love Python" assert "{2} {1} {0}".format("I", "love", "Python") == "Python love I" -assert "{0}{1}{0}".format("abra", "cad") == "abracadabra" \ No newline at end of file +assert "{0}{1}{0}".format("abra", "cad") == "abracadabra" + +# 3rd slice +a = "Hello, World!" +assert a[::-1] == "!dlroW ,olleH" +assert a[::2] == "Hlo ol!" +assert a[2:5:2] == "lo" +assert a[5:2:-1] == ",ol" +assert a[5:2:-2] == ",l" + +b = list(a) +assert b == ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!'] +assert b[::-1] == ['!', 'd', 'l', 'r', 'o', 'W', ' ', ',', 'o', 'l', 'l', 'e', 'H'] +assert b[::2] == ['H', 'l', 'o', ' ', 'o', 'l', '!'] +assert b[2:5:2] == ['l', 'o'] +assert b[5:2:-1] == [',', 'o', 'l'] +assert b[5:2:-2] == [',', 'l'] \ No newline at end of file