mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 11:30:18 +00:00
impl re.match | re.search | re.split | re.sub
This commit is contained in:
parent
49b19160b4
commit
f227c61f2a
@ -30,7 +30,7 @@
|
||||
#define UNREACHABLE() throw std::runtime_error( __FILE__ + std::string(":") + std::to_string(__LINE__) + " UNREACHABLE()!");
|
||||
#endif
|
||||
|
||||
#define PK_VERSION "0.5.1"
|
||||
#define PK_VERSION "0.5.2"
|
||||
|
||||
//#define PKPY_NO_TYPE_CHECK
|
||||
//#define PKPY_NO_INDEX_CHECK
|
@ -701,6 +701,99 @@ void __addModuleMath(VM* vm){
|
||||
});
|
||||
}
|
||||
|
||||
PyVar __regex_search(const _Str& pattern, const _Str& string, bool fromStart, VM* vm){
|
||||
std::regex re(pattern);
|
||||
std::smatch m;
|
||||
if(std::regex_search(string, m, re)){
|
||||
if(fromStart && m.position() != 0){
|
||||
return vm->None;
|
||||
}
|
||||
PyVar ret = vm->newObject(vm->_userTypes["re.Match"], (_Int)1);
|
||||
vm->setAttr(ret, "_start", vm->PyInt(
|
||||
string.__to_u8_index(m.position())
|
||||
));
|
||||
vm->setAttr(ret, "_end", vm->PyInt(
|
||||
string.__to_u8_index(m.position() + m.length())
|
||||
));
|
||||
PyVarList groups(m.size());
|
||||
for(size_t i = 0; i < m.size(); ++i){
|
||||
groups[i] = vm->PyStr(m[i].str());
|
||||
}
|
||||
vm->setAttr(ret, "_groups", vm->PyTuple(groups));
|
||||
return ret;
|
||||
}
|
||||
return vm->None;
|
||||
};
|
||||
|
||||
void __addModuleRe(VM* vm){
|
||||
PyVar mod = vm->newModule("re");
|
||||
PyVar _tp_match = vm->newUserClassType(mod, "Match", vm->_tp_object);
|
||||
|
||||
vm->bindMethod("re.Match", "start", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 1, true);
|
||||
PyVar self = args[0];
|
||||
return vm->getAttr(self, "_start");
|
||||
});
|
||||
|
||||
vm->bindMethod("re.Match", "end", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 1, true);
|
||||
PyVar self = args[0];
|
||||
return vm->getAttr(self, "_end");
|
||||
});
|
||||
|
||||
vm->bindMethod("re.Match", "span", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 1, true);
|
||||
PyVar self = args[0];
|
||||
PyVarList vec = { vm->getAttr(self, "_start"), vm->getAttr(self, "_end") };
|
||||
return vm->PyTuple(vec);
|
||||
});
|
||||
|
||||
vm->bindMethod("re.Match", "group", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 2, true);
|
||||
_Int index = vm->PyInt_AS_C(args[1]);
|
||||
const auto& vec = vm->PyTuple_AS_C(vm->getAttr(args[0], "_groups"));
|
||||
vm->normalizedIndex(index, vec.size());
|
||||
return vec[index];
|
||||
});
|
||||
|
||||
vm->bindFunc(mod, "match", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 2);
|
||||
const _Str& pattern = vm->PyStr_AS_C(args[0]);
|
||||
const _Str& string = vm->PyStr_AS_C(args[1]);
|
||||
return __regex_search(pattern, string, true, vm);
|
||||
});
|
||||
|
||||
vm->bindFunc(mod, "search", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 2);
|
||||
const _Str& pattern = vm->PyStr_AS_C(args[0]);
|
||||
const _Str& string = vm->PyStr_AS_C(args[1]);
|
||||
return __regex_search(pattern, string, false, vm);
|
||||
});
|
||||
|
||||
vm->bindFunc(mod, "sub", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 3);
|
||||
const _Str& pattern = vm->PyStr_AS_C(args[0]);
|
||||
const _Str& repl = vm->PyStr_AS_C(args[1]);
|
||||
const _Str& string = vm->PyStr_AS_C(args[2]);
|
||||
std::regex re(pattern);
|
||||
return vm->PyStr(std::regex_replace(string, re, repl));
|
||||
});
|
||||
|
||||
vm->bindFunc(mod, "split", [](VM* vm, const pkpy::ArgList& args) {
|
||||
vm->__checkArgSize(args, 2);
|
||||
const _Str& pattern = vm->PyStr_AS_C(args[0]);
|
||||
const _Str& string = vm->PyStr_AS_C(args[1]);
|
||||
std::regex re(pattern);
|
||||
std::sregex_token_iterator it(string.begin(), string.end(), re, -1);
|
||||
std::sregex_token_iterator end;
|
||||
PyVarList vec;
|
||||
for(; it != end; ++it){
|
||||
vec.push_back(vm->PyStr(it->str()));
|
||||
}
|
||||
return vm->PyList(vec);
|
||||
});
|
||||
}
|
||||
|
||||
class _PkExported{
|
||||
public:
|
||||
virtual ~_PkExported() = default;
|
||||
@ -826,6 +919,7 @@ extern "C" {
|
||||
__addModuleTime(vm);
|
||||
__addModuleJson(vm);
|
||||
__addModuleMath(vm);
|
||||
__addModuleRe(vm);
|
||||
|
||||
_Code code = compile(vm, __BUILTINS_CODE, "<builtins>");
|
||||
if(code == nullptr) exit(1);
|
||||
|
@ -39,25 +39,6 @@ public:
|
||||
#include "hash_table8.hpp"
|
||||
|
||||
class PyVarDict: public emhash8::HashMap<_Str, PyVar> {
|
||||
PyVar& at(const _Str&) = delete;
|
||||
|
||||
public:
|
||||
|
||||
#ifndef PKPY_NO_INDEX_CHECK
|
||||
PyVar& operator[](const _Str& key) {
|
||||
return emhash8::HashMap<_Str, PyVar>::operator[](key);
|
||||
}
|
||||
|
||||
const PyVar& operator[](const _Str& key) const {
|
||||
auto it = find(key);
|
||||
if (it == end()){
|
||||
auto msg = "map key not found, '" + key + "'";
|
||||
throw std::out_of_range(msg);
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
#endif
|
||||
|
||||
using emhash8::HashMap<_Str, PyVar>::HashMap;
|
||||
};
|
||||
|
||||
|
@ -52,6 +52,13 @@ public:
|
||||
return _hash;
|
||||
}
|
||||
|
||||
int __to_u8_index(int index) const{
|
||||
utf8_lazy_init();
|
||||
auto p = std::lower_bound(_u8_index->begin(), _u8_index->end(), index);
|
||||
if(*p != index) UNREACHABLE();
|
||||
return p - _u8_index->begin();
|
||||
}
|
||||
|
||||
int u8_length() const {
|
||||
utf8_lazy_init();
|
||||
return _u8_index->size();
|
||||
|
22
src/vm.h
22
src/vm.h
@ -128,7 +128,7 @@ protected:
|
||||
PyVar clsBase = frame->popValue(this);
|
||||
if(clsBase == None) clsBase = _tp_object;
|
||||
__checkType(clsBase, _tp_type);
|
||||
PyVar cls = newUserClassType(clsName, clsBase);
|
||||
PyVar cls = newUserClassType(frame->_module, clsName, clsBase);
|
||||
while(true){
|
||||
PyVar fn = frame->popValue(this);
|
||||
if(fn == None) break;
|
||||
@ -136,7 +136,7 @@ protected:
|
||||
setAttr(fn, __module__, frame->_module);
|
||||
setAttr(cls, f->name, fn);
|
||||
}
|
||||
frame->f_globals()[clsName] = cls;
|
||||
// frame->f_globals()[clsName] = cls;
|
||||
} break;
|
||||
case OP_RETURN_VALUE: return frame->popValue(this);
|
||||
case OP_PRINT_EXPR:
|
||||
@ -342,6 +342,7 @@ protected:
|
||||
|
||||
public:
|
||||
PyVarDict _types;
|
||||
PyVarDict _userTypes;
|
||||
PyVar None, True, False, Ellipsis;
|
||||
|
||||
bool use_stdio;
|
||||
@ -597,10 +598,13 @@ public:
|
||||
return ret;
|
||||
}
|
||||
|
||||
PyVar newUserClassType(_Str name, PyVar base){
|
||||
PyVar obj = newClassType(name, base);
|
||||
setAttr(obj, __name__, PyStr(name));
|
||||
_types.erase(name);
|
||||
PyVar newUserClassType(PyVar mod, _Str name, PyVar base){
|
||||
PyVar obj = pkpy::make_shared<PyObject, Py_<_Int>>((_Int)1, _tp_type);
|
||||
setAttr(obj, __base__, base);
|
||||
_Str fullName = UNION_NAME(mod) + "." +name;
|
||||
setAttr(obj, __name__, PyStr(fullName));
|
||||
_userTypes[fullName] = obj;
|
||||
setAttr(mod, name, obj);
|
||||
return obj;
|
||||
}
|
||||
|
||||
@ -695,9 +699,11 @@ public:
|
||||
}
|
||||
|
||||
void bindMethod(_Str typeName, _Str funcName, _CppFunc fn) {
|
||||
PyVar type = _types[typeName];
|
||||
PyVar* type = _types.try_get(typeName);
|
||||
if(type == nullptr) type = _userTypes.try_get(typeName);
|
||||
if(type == nullptr) UNREACHABLE();
|
||||
PyVar func = PyNativeFunction(fn);
|
||||
setAttr(type, funcName, func);
|
||||
setAttr(*type, funcName, func);
|
||||
}
|
||||
|
||||
void bindMethodMulti(std::vector<_Str> typeNames, _Str funcName, _CppFunc fn) {
|
||||
|
17
tests/_re.py
Normal file
17
tests/_re.py
Normal file
@ -0,0 +1,17 @@
|
||||
import re
|
||||
|
||||
# test match, search, sub, split
|
||||
|
||||
m = re.search('测试','123测试测试')
|
||||
assert m.span() == (3,5)
|
||||
assert m.group(0) == '测试'
|
||||
|
||||
assert re.match('测试','123测试测试') is None
|
||||
assert re.sub('测试','xxx','123测试12321测试') == '123xxx12321xxx'
|
||||
|
||||
# this is different from cpython, the last empty string is not included
|
||||
assert re.split('测试','测试123测试12321测试') == ['', '123', '12321']
|
||||
|
||||
assert re.split(',','123,456,789,10') == ['123', '456', '789', '10']
|
||||
assert re.split(',',',123,456,789,10') == ['', '123', '456', '789', '10']
|
||||
assert re.split(',','123,456,789,10,') == ['123', '456', '789', '10']
|
Loading…
x
Reference in New Issue
Block a user