diff --git a/src/__stl__.h b/src/__stl__.h index d1a2f739..d9728d1c 100644 --- a/src/__stl__.h +++ b/src/__stl__.h @@ -30,7 +30,7 @@ #define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!"); #endif -#define PK_VERSION "0.5.1" +#define PK_VERSION "0.5.2" //#define PKPY_NO_TYPE_CHECK //#define PKPY_NO_INDEX_CHECK \ No newline at end of file diff --git a/src/pocketpy.h b/src/pocketpy.h index 47996432..99d93f65 100644 --- a/src/pocketpy.h +++ b/src/pocketpy.h @@ -701,6 +701,99 @@ void __addModuleMath(VM* vm){ }); } +PyVar __regex_search(const _Str& pattern, const _Str& string, bool fromStart, VM* vm){ + std::regex re(pattern); + std::smatch m; + if(std::regex_search(string, m, re)){ + if(fromStart && m.position() != 0){ + return vm->None; + } + PyVar ret = vm->newObject(vm->_userTypes["re.Match"], (_Int)1); + vm->setAttr(ret, "_start", vm->PyInt( + string.__to_u8_index(m.position()) + )); + vm->setAttr(ret, "_end", vm->PyInt( + string.__to_u8_index(m.position() + m.length()) + )); + PyVarList groups(m.size()); + for(size_t i = 0; i < m.size(); ++i){ + groups[i] = vm->PyStr(m[i].str()); + } + vm->setAttr(ret, "_groups", vm->PyTuple(groups)); + return ret; + } + return vm->None; +}; + +void __addModuleRe(VM* vm){ + PyVar mod = vm->newModule("re"); + PyVar _tp_match = vm->newUserClassType(mod, "Match", vm->_tp_object); + + vm->bindMethod("re.Match", "start", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 1, true); + PyVar self = args[0]; + return vm->getAttr(self, "_start"); + }); + + vm->bindMethod("re.Match", "end", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 1, true); + PyVar self = args[0]; + return vm->getAttr(self, "_end"); + }); + + vm->bindMethod("re.Match", "span", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 1, true); + PyVar self = args[0]; + PyVarList vec = { vm->getAttr(self, "_start"), vm->getAttr(self, "_end") }; + return vm->PyTuple(vec); + }); + + vm->bindMethod("re.Match", "group", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 2, true); + _Int index = vm->PyInt_AS_C(args[1]); + const auto& vec = vm->PyTuple_AS_C(vm->getAttr(args[0], "_groups")); + vm->normalizedIndex(index, vec.size()); + return vec[index]; + }); + + vm->bindFunc(mod, "match", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 2); + const _Str& pattern = vm->PyStr_AS_C(args[0]); + const _Str& string = vm->PyStr_AS_C(args[1]); + return __regex_search(pattern, string, true, vm); + }); + + vm->bindFunc(mod, "search", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 2); + const _Str& pattern = vm->PyStr_AS_C(args[0]); + const _Str& string = vm->PyStr_AS_C(args[1]); + return __regex_search(pattern, string, false, vm); + }); + + vm->bindFunc(mod, "sub", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 3); + const _Str& pattern = vm->PyStr_AS_C(args[0]); + const _Str& repl = vm->PyStr_AS_C(args[1]); + const _Str& string = vm->PyStr_AS_C(args[2]); + std::regex re(pattern); + return vm->PyStr(std::regex_replace(string, re, repl)); + }); + + vm->bindFunc(mod, "split", [](VM* vm, const pkpy::ArgList& args) { + vm->__checkArgSize(args, 2); + const _Str& pattern = vm->PyStr_AS_C(args[0]); + const _Str& string = vm->PyStr_AS_C(args[1]); + std::regex re(pattern); + std::sregex_token_iterator it(string.begin(), string.end(), re, -1); + std::sregex_token_iterator end; + PyVarList vec; + for(; it != end; ++it){ + vec.push_back(vm->PyStr(it->str())); + } + return vm->PyList(vec); + }); +} + class _PkExported{ public: virtual ~_PkExported() = default; @@ -826,6 +919,7 @@ extern "C" { __addModuleTime(vm); __addModuleJson(vm); __addModuleMath(vm); + __addModuleRe(vm); _Code code = compile(vm, __BUILTINS_CODE, ""); if(code == nullptr) exit(1); diff --git a/src/safestl.h b/src/safestl.h index b070e378..5f50cf30 100644 --- a/src/safestl.h +++ b/src/safestl.h @@ -39,25 +39,6 @@ public: #include "hash_table8.hpp" class PyVarDict: public emhash8::HashMap<_Str, PyVar> { - PyVar& at(const _Str&) = delete; - -public: - -#ifndef PKPY_NO_INDEX_CHECK - PyVar& operator[](const _Str& key) { - return emhash8::HashMap<_Str, PyVar>::operator[](key); - } - - const PyVar& operator[](const _Str& key) const { - auto it = find(key); - if (it == end()){ - auto msg = "map key not found, '" + key + "'"; - throw std::out_of_range(msg); - } - return it->second; - } -#endif - using emhash8::HashMap<_Str, PyVar>::HashMap; }; diff --git a/src/str.h b/src/str.h index 10d4db87..30ec86bc 100644 --- a/src/str.h +++ b/src/str.h @@ -52,6 +52,13 @@ public: return _hash; } + int __to_u8_index(int index) const{ + utf8_lazy_init(); + auto p = std::lower_bound(_u8_index->begin(), _u8_index->end(), index); + if(*p != index) UNREACHABLE(); + return p - _u8_index->begin(); + } + int u8_length() const { utf8_lazy_init(); return _u8_index->size(); diff --git a/src/vm.h b/src/vm.h index 70cba670..82648d59 100644 --- a/src/vm.h +++ b/src/vm.h @@ -128,7 +128,7 @@ protected: PyVar clsBase = frame->popValue(this); if(clsBase == None) clsBase = _tp_object; __checkType(clsBase, _tp_type); - PyVar cls = newUserClassType(clsName, clsBase); + PyVar cls = newUserClassType(frame->_module, clsName, clsBase); while(true){ PyVar fn = frame->popValue(this); if(fn == None) break; @@ -136,7 +136,7 @@ protected: setAttr(fn, __module__, frame->_module); setAttr(cls, f->name, fn); } - frame->f_globals()[clsName] = cls; + // frame->f_globals()[clsName] = cls; } break; case OP_RETURN_VALUE: return frame->popValue(this); case OP_PRINT_EXPR: @@ -342,6 +342,7 @@ protected: public: PyVarDict _types; + PyVarDict _userTypes; PyVar None, True, False, Ellipsis; bool use_stdio; @@ -597,10 +598,13 @@ public: return ret; } - PyVar newUserClassType(_Str name, PyVar base){ - PyVar obj = newClassType(name, base); - setAttr(obj, __name__, PyStr(name)); - _types.erase(name); + PyVar newUserClassType(PyVar mod, _Str name, PyVar base){ + PyVar obj = pkpy::make_shared>((_Int)1, _tp_type); + setAttr(obj, __base__, base); + _Str fullName = UNION_NAME(mod) + "." +name; + setAttr(obj, __name__, PyStr(fullName)); + _userTypes[fullName] = obj; + setAttr(mod, name, obj); return obj; } @@ -695,9 +699,11 @@ public: } void bindMethod(_Str typeName, _Str funcName, _CppFunc fn) { - PyVar type = _types[typeName]; + PyVar* type = _types.try_get(typeName); + if(type == nullptr) type = _userTypes.try_get(typeName); + if(type == nullptr) UNREACHABLE(); PyVar func = PyNativeFunction(fn); - setAttr(type, funcName, func); + setAttr(*type, funcName, func); } void bindMethodMulti(std::vector<_Str> typeNames, _Str funcName, _CppFunc fn) { diff --git a/tests/_re.py b/tests/_re.py new file mode 100644 index 00000000..5ea32032 --- /dev/null +++ b/tests/_re.py @@ -0,0 +1,17 @@ +import re + +# test match, search, sub, split + +m = re.search('测试','123测试测试') +assert m.span() == (3,5) +assert m.group(0) == '测试' + +assert re.match('测试','123测试测试') is None +assert re.sub('测试','xxx','123测试12321测试') == '123xxx12321xxx' + +# this is different from cpython, the last empty string is not included +assert re.split('测试','测试123测试12321测试') == ['', '123', '12321'] + +assert re.split(',','123,456,789,10') == ['123', '456', '789', '10'] +assert re.split(',',',123,456,789,10') == ['', '123', '456', '789', '10'] +assert re.split(',','123,456,789,10,') == ['123', '456', '789', '10'] \ No newline at end of file