mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 03:20:18 +00:00
up
This commit is contained in:
parent
2e46449117
commit
7324f897b5
@ -6,7 +6,7 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f:
|
||||
OPCODES_TEXT = f.read()
|
||||
|
||||
pipeline = [
|
||||
["common.h", "vector.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
|
||||
["common.h", "memory.h", "vector.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
|
||||
["obj.h", "codeobject.h", "frame.h"],
|
||||
["gc.h", "vm.h", "ceval.h", "expr.h", "compiler.h", "repl.h"],
|
||||
["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]
|
||||
|
@ -49,6 +49,8 @@ public:
|
||||
StringIter(VM* vm, PyObject* ref) : BaseIter(vm), ref(ref), index(0) {}
|
||||
|
||||
PyObject* next() override{
|
||||
// TODO: optimize this to use iterator
|
||||
// operator[] is O(n) complexity
|
||||
Str* str = &OBJ_GET(Str, ref);
|
||||
if(index == str->u8_length()) return nullptr;
|
||||
return VAR(str->u8_getitem(index++));
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
#include "common.h"
|
||||
#include "error.h"
|
||||
#include "new_str.h"
|
||||
#include "str.h"
|
||||
|
||||
namespace pkpy{
|
||||
|
@ -73,7 +73,6 @@ struct LinkedListNode{
|
||||
LinkedListNode* next;
|
||||
};
|
||||
|
||||
|
||||
template<typename T>
|
||||
struct DoubleLinkedList{
|
||||
static_assert(std::is_base_of_v<LinkedListNode, T>);
|
||||
|
177
src/new_str.h
177
src/new_str.h
@ -1,177 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include "memory.h"
|
||||
|
||||
namespace pkpy{
|
||||
|
||||
inline int utf8len(unsigned char c){
|
||||
if((c & 0b10000000) == 0) return 1;
|
||||
if((c & 0b11100000) == 0b11000000) return 2;
|
||||
if((c & 0b11110000) == 0b11100000) return 3;
|
||||
if((c & 0b11111000) == 0b11110000) return 4;
|
||||
if((c & 0b11111100) == 0b11111000) return 5;
|
||||
if((c & 0b11111110) == 0b11111100) return 6;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct String{
|
||||
int size;
|
||||
bool is_ascii;
|
||||
char* data;
|
||||
|
||||
String(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {}
|
||||
|
||||
String(int size, bool is_ascii): size(size), is_ascii(is_ascii) {
|
||||
data = (char*)pool64.alloc(size);
|
||||
}
|
||||
|
||||
String(const char* str): size(strlen(str)), is_ascii(true) {
|
||||
data = (char*)pool64.alloc(size);
|
||||
for(int i=0; i<size; i++){
|
||||
data[i] = str[i];
|
||||
if(!isascii(str[i])) is_ascii = false;
|
||||
}
|
||||
}
|
||||
|
||||
String(const String& other): size(other.size), is_ascii(other.is_ascii) {
|
||||
data = (char*)pool64.alloc(size);
|
||||
memcpy(data, other.data, size);
|
||||
}
|
||||
|
||||
String(String&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) {
|
||||
other.data = nullptr;
|
||||
other.size = 0;
|
||||
}
|
||||
|
||||
String& operator=(const String& other){
|
||||
if(data!=nullptr) pool64.dealloc(data);
|
||||
size = other.size;
|
||||
is_ascii = other.is_ascii;
|
||||
data = (char*)pool64.alloc(size);
|
||||
memcpy(data, other.data, size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
String& operator=(String&& other){
|
||||
if(data!=nullptr) pool64.dealloc(data);
|
||||
size = other.size;
|
||||
is_ascii = other.is_ascii;
|
||||
data = other.data;
|
||||
other.data = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~String(){
|
||||
if(data!=nullptr) pool64.dealloc(data);
|
||||
}
|
||||
|
||||
char operator[](int idx) const {
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
int length() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
String operator+(const String& other) const {
|
||||
String ret(size + other.size, is_ascii && other.is_ascii);
|
||||
memcpy(ret.data, data, size);
|
||||
memcpy(ret.data + size, other.data, other.size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const String& str){
|
||||
os.write(str.data, str.size);
|
||||
return os;
|
||||
}
|
||||
|
||||
bool operator==(const String& other) const {
|
||||
if(size != other.size) return false;
|
||||
return memcmp(data, other.data, size) == 0;
|
||||
}
|
||||
|
||||
bool operator!=(const String& other) const {
|
||||
if(size != other.size) return true;
|
||||
return memcmp(data, other.data, size) != 0;
|
||||
}
|
||||
|
||||
bool operator<(const String& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret < 0;
|
||||
return size < other.size;
|
||||
}
|
||||
|
||||
bool operator>(const String& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret > 0;
|
||||
return size > other.size;
|
||||
}
|
||||
|
||||
bool operator<=(const String& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret < 0;
|
||||
return size <= other.size;
|
||||
}
|
||||
|
||||
bool operator>=(const String& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret > 0;
|
||||
return size >= other.size;
|
||||
}
|
||||
|
||||
String substr(int start, int len) const {
|
||||
String ret(len, is_ascii);
|
||||
memcpy(ret.data, data + start, len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
char* dup_c_str() const {
|
||||
char* p = (char*)malloc(size + 1);
|
||||
memcpy(p, data, size);
|
||||
p[size] = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
std::string_view view() const {
|
||||
return std::string_view(data, size);
|
||||
}
|
||||
|
||||
std::string str() const {
|
||||
return std::string(data, size);
|
||||
}
|
||||
|
||||
String lstrip() const {
|
||||
std::string copy = str();
|
||||
copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
|
||||
// std::isspace(c) does not working on windows (Debug)
|
||||
return c != ' ' && c != '\t' && c != '\r' && c != '\n';
|
||||
}));
|
||||
return String(copy.c_str());
|
||||
}
|
||||
|
||||
/*************unicode*************/
|
||||
|
||||
int _u8_index(int i) const{
|
||||
if(is_ascii) return i;
|
||||
int j = 0;
|
||||
while(i > 0){
|
||||
j += utf8len(data[j]);
|
||||
i--;
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
String u8_getitem(int i) const {
|
||||
i = _u8_index(i);
|
||||
return substr(i, utf8len(data[i]));
|
||||
}
|
||||
|
||||
String u8_slice(int start, int end) const{
|
||||
start = _u8_index(start);
|
||||
end = _u8_index(end);
|
||||
return substr(start, end - start);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
@ -131,8 +131,8 @@ inline void init_builtins(VM* _vm) {
|
||||
|
||||
_vm->bind_builtin_func<1>("ord", [](VM* vm, Args& args) {
|
||||
const Str& s = CAST(Str&, args[0]);
|
||||
if (s.size() != 1) vm->TypeError("ord() expected an ASCII character");
|
||||
return VAR((i64)(s.c_str()[0]));
|
||||
if (s.length()!=1) vm->TypeError("ord() expected an ASCII character");
|
||||
return VAR((i64)(s[0]));
|
||||
});
|
||||
|
||||
_vm->bind_builtin_func<2>("hasattr", [](VM* vm, Args& args) {
|
||||
@ -237,8 +237,8 @@ inline void init_builtins(VM* _vm) {
|
||||
const Str& s = CAST(Str&, args[0]);
|
||||
try{
|
||||
size_t parsed = 0;
|
||||
i64 val = S_TO_INT(s, &parsed, 10);
|
||||
if(parsed != s.size()) throw std::invalid_argument("<?>");
|
||||
i64 val = S_TO_INT(s.str(), &parsed, 10);
|
||||
if(parsed != s.length()) throw std::invalid_argument("<?>");
|
||||
return VAR(val);
|
||||
}catch(std::invalid_argument&){
|
||||
vm->ValueError("invalid literal for int(): " + s.escape(true));
|
||||
@ -284,7 +284,7 @@ inline void init_builtins(VM* _vm) {
|
||||
if(s == "inf") return VAR(INFINITY);
|
||||
if(s == "-inf") return VAR(-INFINITY);
|
||||
try{
|
||||
f64 val = S_TO_FLOAT(s);
|
||||
f64 val = S_TO_FLOAT(s.str());
|
||||
return VAR(val);
|
||||
}catch(std::invalid_argument&){
|
||||
vm->ValueError("invalid literal for float(): '" + s + "'");
|
||||
@ -327,7 +327,7 @@ inline void init_builtins(VM* _vm) {
|
||||
_vm->bind_method<1>("str", "__contains__", [](VM* vm, Args& args) {
|
||||
const Str& self = CAST(Str&, args[0]);
|
||||
const Str& other = CAST(Str&, args[1]);
|
||||
return VAR(self.find(other) != Str::npos);
|
||||
return VAR(self.index(other) != -1);
|
||||
});
|
||||
|
||||
_vm->bind_method<0>("str", "__str__", CPP_LAMBDA(args[0]));
|
||||
@ -361,7 +361,7 @@ inline void init_builtins(VM* _vm) {
|
||||
if(is_type(args[1], vm->tp_slice)){
|
||||
Slice s = _CAST(Slice, args[1]);
|
||||
s.normalize(self.u8_length());
|
||||
return VAR(self.u8_substr(s.start, s.stop));
|
||||
return VAR(self.u8_slice(s.start, s.stop));
|
||||
}
|
||||
|
||||
int index = CAST(int, args[1]);
|
||||
@ -382,28 +382,25 @@ inline void init_builtins(VM* _vm) {
|
||||
});
|
||||
|
||||
_vm->bind_method<2>("str", "replace", [](VM* vm, Args& args) {
|
||||
const Str& _self = CAST(Str&, args[0]);
|
||||
const Str& _old = CAST(Str&, args[1]);
|
||||
const Str& _new = CAST(Str&, args[2]);
|
||||
Str _copy = _self;
|
||||
size_t pos = 0;
|
||||
while ((pos = _copy.find(_old, pos)) != std::string::npos) {
|
||||
_copy.replace(pos, _old.length(), _new);
|
||||
pos += _new.length();
|
||||
}
|
||||
return VAR(_copy);
|
||||
const Str& self = CAST(Str&, args[0]);
|
||||
const Str& old = CAST(Str&, args[1]);
|
||||
const Str& new_ = CAST(Str&, args[2]);
|
||||
return VAR(self.replace(old, new_));
|
||||
});
|
||||
|
||||
_vm->bind_method<1>("str", "startswith", [](VM* vm, Args& args) {
|
||||
const Str& self = CAST(Str&, args[0]);
|
||||
const Str& prefix = CAST(Str&, args[1]);
|
||||
return VAR(self.find(prefix) == 0);
|
||||
return VAR(self.index(prefix) == 0);
|
||||
});
|
||||
|
||||
_vm->bind_method<1>("str", "endswith", [](VM* vm, Args& args) {
|
||||
const Str& self = CAST(Str&, args[0]);
|
||||
const Str& suffix = CAST(Str&, args[1]);
|
||||
return VAR(self.rfind(suffix) == self.length() - suffix.length());
|
||||
int offset = self.length() - suffix.length();
|
||||
if(offset < 0) return vm->False;
|
||||
bool ok = memcmp(self.data+offset, suffix.data, suffix.length()) == 0;
|
||||
return VAR(ok);
|
||||
});
|
||||
|
||||
_vm->bind_method<1>("str", "join", [](VM* vm, Args& args) {
|
||||
@ -664,13 +661,15 @@ struct ReMatch {
|
||||
}
|
||||
};
|
||||
|
||||
inline PyObject* _regex_search(const Str& pattern, const Str& string, bool fromStart, VM* vm){
|
||||
inline PyObject* _regex_search(const Str& _pattern, const Str& _string, bool fromStart, VM* vm){
|
||||
std::string pattern = _pattern.str();
|
||||
std::string string = _string.str();
|
||||
std::regex re(pattern);
|
||||
std::smatch m;
|
||||
if(std::regex_search(string, m, re)){
|
||||
if(fromStart && m.position() != 0) return vm->None;
|
||||
i64 start = string._to_u8_index(m.position());
|
||||
i64 end = string._to_u8_index(m.position() + m.length());
|
||||
i64 start = _string._u8_index(m.position());
|
||||
i64 end = _string._u8_index(m.position() + m.length());
|
||||
return VAR_T(ReMatch, start, end, m);
|
||||
}
|
||||
return vm->None;
|
||||
@ -695,14 +694,15 @@ inline void add_module_re(VM* vm){
|
||||
vm->bind_func<3>(mod, "sub", [](VM* vm, Args& args) {
|
||||
const Str& pattern = CAST(Str&, args[0]);
|
||||
const Str& repl = CAST(Str&, args[1]);
|
||||
const Str& string = CAST(Str&, args[2]);
|
||||
std::regex re(pattern);
|
||||
const Str& _string = CAST(Str&, args[2]);
|
||||
std::regex re(pattern.str());
|
||||
std::string string = _string.str();
|
||||
return VAR(std::regex_replace(string, re, repl));
|
||||
});
|
||||
|
||||
vm->bind_func<2>(mod, "split", [](VM* vm, Args& args) {
|
||||
const Str& pattern = CAST(Str&, args[0]);
|
||||
const Str& string = CAST(Str&, args[1]);
|
||||
std::string pattern = CAST(Str&, args[0]).str();
|
||||
std::string string = CAST(Str&, args[1]).str();
|
||||
std::regex re(pattern);
|
||||
std::sregex_token_iterator it(string.begin(), string.end(), re, -1);
|
||||
std::sregex_token_iterator end;
|
||||
|
275
src/str.h
275
src/str.h
@ -1,67 +1,182 @@
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include "memory.h"
|
||||
|
||||
namespace pkpy {
|
||||
|
||||
typedef std::stringstream StrStream;
|
||||
|
||||
class Str : public std::string {
|
||||
mutable std::vector<uint16_t>* _u8_index = nullptr;
|
||||
inline int utf8len(unsigned char c){
|
||||
if((c & 0b10000000) == 0) return 1;
|
||||
if((c & 0b11100000) == 0b11000000) return 2;
|
||||
if((c & 0b11110000) == 0b11100000) return 3;
|
||||
if((c & 0b11111000) == 0b11110000) return 4;
|
||||
if((c & 0b11111100) == 0b11111000) return 5;
|
||||
if((c & 0b11111110) == 0b11111100) return 6;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void utf8_lazy_init() const{
|
||||
if(_u8_index != nullptr) return;
|
||||
_u8_index = new std::vector<uint16_t>();
|
||||
_u8_index->reserve(size());
|
||||
if(size() > 65535) throw std::runtime_error("str has more than 65535 bytes.");
|
||||
for(uint16_t i = 0; i < size(); i++){
|
||||
// https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80
|
||||
if((at(i) & 0xC0) != 0x80) _u8_index->push_back(i);
|
||||
}
|
||||
}
|
||||
public:
|
||||
struct Str{
|
||||
int size;
|
||||
bool is_ascii;
|
||||
char* data;
|
||||
uint16_t _cached_sn_index = 0;
|
||||
|
||||
Str() : std::string() {}
|
||||
Str(const char* s) : std::string(s) {}
|
||||
Str(const char* s, size_t n) : std::string(s, n) {}
|
||||
Str(const std::string& s) : std::string(s) {}
|
||||
Str(const Str& s) : std::string(s) {
|
||||
if(s._u8_index != nullptr){
|
||||
_u8_index = new std::vector<uint16_t>(*s._u8_index);
|
||||
}
|
||||
}
|
||||
Str(Str&& s) : std::string(std::move(s)) {
|
||||
delete _u8_index;
|
||||
_u8_index = s._u8_index;
|
||||
s._u8_index = nullptr;
|
||||
Str(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {}
|
||||
|
||||
Str(int size, bool is_ascii): size(size), is_ascii(is_ascii) {
|
||||
data = (char*)pool64.alloc(size);
|
||||
}
|
||||
|
||||
i64 _to_u8_index(i64 index) const{
|
||||
utf8_lazy_init();
|
||||
auto p = std::lower_bound(_u8_index->begin(), _u8_index->end(), index);
|
||||
if(p != _u8_index->end() && *p != index) UNREACHABLE();
|
||||
return p - _u8_index->begin();
|
||||
#define STR_INIT() \
|
||||
data = (char*)pool64.alloc(size); \
|
||||
for(int i=0; i<size; i++){ \
|
||||
data[i] = s[i]; \
|
||||
if(!isascii(s[i])) is_ascii = false; \
|
||||
}
|
||||
|
||||
int u8_length() const {
|
||||
utf8_lazy_init();
|
||||
return _u8_index->size();
|
||||
Str(const std::string& s): size(s.size()), is_ascii(true) {
|
||||
STR_INIT()
|
||||
}
|
||||
|
||||
Str u8_getitem(int i) const{
|
||||
return u8_substr(i, i+1);
|
||||
Str(std::string_view s): size(s.size()), is_ascii(true) {
|
||||
STR_INIT()
|
||||
}
|
||||
|
||||
Str u8_substr(int start, int end) const{
|
||||
utf8_lazy_init();
|
||||
if(start >= end) return Str();
|
||||
int c_end = end >= _u8_index->size() ? size() : _u8_index->at(end);
|
||||
return substr(_u8_index->at(start), c_end - _u8_index->at(start));
|
||||
Str(const char* s): size(strlen(s)), is_ascii(true) {
|
||||
STR_INIT()
|
||||
}
|
||||
|
||||
Str(const char* s, int len): size(len), is_ascii(true) {
|
||||
STR_INIT()
|
||||
}
|
||||
|
||||
#undef STR_INIT
|
||||
|
||||
Str(const Str& other): size(other.size), is_ascii(other.is_ascii) {
|
||||
data = (char*)pool64.alloc(size);
|
||||
memcpy(data, other.data, size);
|
||||
}
|
||||
|
||||
Str(Str&& other): size(other.size), is_ascii(other.is_ascii), data(other.data) {
|
||||
other.data = nullptr;
|
||||
other.size = 0;
|
||||
}
|
||||
|
||||
Str& operator=(const Str& other){
|
||||
if(data!=nullptr) pool64.dealloc(data);
|
||||
size = other.size;
|
||||
is_ascii = other.is_ascii;
|
||||
data = (char*)pool64.alloc(size);
|
||||
memcpy(data, other.data, size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
Str& operator=(Str&& other) noexcept{
|
||||
if(data!=nullptr) pool64.dealloc(data);
|
||||
size = other.size;
|
||||
is_ascii = other.is_ascii;
|
||||
data = other.data;
|
||||
other.data = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Str(){
|
||||
if(data!=nullptr) pool64.dealloc(data);
|
||||
}
|
||||
|
||||
char operator[](int idx) const {
|
||||
return data[idx];
|
||||
}
|
||||
|
||||
int length() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t hash() const{
|
||||
return std::hash<std::string_view>()(sv());
|
||||
}
|
||||
|
||||
Str operator+(const Str& other) const {
|
||||
Str ret(size + other.size, is_ascii && other.is_ascii);
|
||||
memcpy(ret.data, data, size);
|
||||
memcpy(ret.data + size, other.data, other.size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
Str operator+(const char* p) const {
|
||||
Str other(p);
|
||||
return *this + other;
|
||||
}
|
||||
|
||||
friend Str operator+(const char* p, const Str& str){
|
||||
Str other(p);
|
||||
return other + str;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const Str& str){
|
||||
os.write(str.data, str.size);
|
||||
return os;
|
||||
}
|
||||
|
||||
bool operator==(const Str& other) const {
|
||||
if(size != other.size) return false;
|
||||
return memcmp(data, other.data, size) == 0;
|
||||
}
|
||||
|
||||
bool operator!=(const Str& other) const {
|
||||
if(size != other.size) return true;
|
||||
return memcmp(data, other.data, size) != 0;
|
||||
}
|
||||
|
||||
bool operator<(const Str& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret < 0;
|
||||
return size < other.size;
|
||||
}
|
||||
|
||||
bool operator>(const Str& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret > 0;
|
||||
return size > other.size;
|
||||
}
|
||||
|
||||
bool operator<=(const Str& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret < 0;
|
||||
return size <= other.size;
|
||||
}
|
||||
|
||||
bool operator>=(const Str& other) const {
|
||||
int ret = strncmp(data, other.data, std::min(size, other.size));
|
||||
if(ret != 0) return ret > 0;
|
||||
return size >= other.size;
|
||||
}
|
||||
|
||||
Str substr(int start, int len) const {
|
||||
Str ret(len, is_ascii);
|
||||
memcpy(ret.data, data + start, len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
char* c_str_dup() const {
|
||||
char* p = (char*)malloc(size + 1);
|
||||
memcpy(p, data, size);
|
||||
p[size] = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
std::string_view sv() const {
|
||||
return std::string_view(data, size);
|
||||
}
|
||||
|
||||
std::string str() const {
|
||||
return std::string(data, size);
|
||||
}
|
||||
|
||||
Str lstrip() const {
|
||||
Str copy(*this);
|
||||
std::string copy = str();
|
||||
copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
|
||||
// std::isspace(c) does not working on windows (Debug)
|
||||
return c != ' ' && c != '\t' && c != '\r' && c != '\n';
|
||||
@ -69,10 +184,6 @@ public:
|
||||
return Str(copy);
|
||||
}
|
||||
|
||||
size_t hash() const {
|
||||
return std::hash<std::string>()(*this);
|
||||
}
|
||||
|
||||
Str escape(bool single_quote) const {
|
||||
StrStream ss;
|
||||
ss << (single_quote ? '\'' : '"');
|
||||
@ -104,24 +215,60 @@ public:
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Str& operator=(const Str& s){
|
||||
this->std::string::operator=(s);
|
||||
delete _u8_index;
|
||||
if(s._u8_index != nullptr){
|
||||
_u8_index = new std::vector<uint16_t>(*s._u8_index);
|
||||
}
|
||||
return *this;
|
||||
int index(const Str& sub) const {
|
||||
auto p = std::search(data, data + size, sub.data, sub.data + sub.size);
|
||||
if(p == data + size) return -1;
|
||||
return p - data;
|
||||
}
|
||||
|
||||
Str& operator=(Str&& s){
|
||||
this->std::string::operator=(std::move(s));
|
||||
delete _u8_index;
|
||||
this->_u8_index = s._u8_index;
|
||||
s._u8_index = nullptr;
|
||||
return *this;
|
||||
Str replace(const Str& old, const Str& new_) const {
|
||||
StrStream ss;
|
||||
int i = 0;
|
||||
while(i < size){
|
||||
int j = index(old);
|
||||
if(j == -1){
|
||||
ss << substr(i, size - i);
|
||||
break;
|
||||
}
|
||||
ss << substr(i, j - i);
|
||||
ss << new_;
|
||||
i = j + old.size;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
~Str(){ delete _u8_index;}
|
||||
/*************unicode*************/
|
||||
|
||||
int _u8_index(int i) const{
|
||||
if(is_ascii) return i;
|
||||
int j = 0;
|
||||
while(i > 0){
|
||||
j += utf8len(data[j]);
|
||||
i--;
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
Str u8_getitem(int i) const{
|
||||
i = _u8_index(i);
|
||||
return substr(i, utf8len(data[i]));
|
||||
}
|
||||
|
||||
Str u8_slice(int start, int end) const{
|
||||
// TODO: optimize this
|
||||
start = _u8_index(start);
|
||||
end = _u8_index(end);
|
||||
return substr(start, end - start);
|
||||
}
|
||||
|
||||
int u8_length() const {
|
||||
if(is_ascii) return size;
|
||||
int ret = 0;
|
||||
for(int i=0; i<size; i++){
|
||||
if((data[i] & 0xC0) != 0x80) ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560};
|
||||
@ -145,7 +292,7 @@ struct StrName {
|
||||
if(s._cached_sn_index != 0){
|
||||
index = s._cached_sn_index;
|
||||
} else {
|
||||
index = get(s).index;
|
||||
index = get(s.sv()).index;
|
||||
}
|
||||
}
|
||||
const Str& str() const { return _r_interned[index-1]; }
|
||||
@ -170,11 +317,7 @@ struct StrName {
|
||||
static std::map<Str, uint16_t, std::less<>> _interned;
|
||||
static std::vector<Str> _r_interned;
|
||||
|
||||
static StrName get(const Str& s){
|
||||
return get(s.c_str());
|
||||
}
|
||||
|
||||
static StrName get(const char* s){
|
||||
static StrName get(std::string_view s){
|
||||
auto it = _interned.find(s);
|
||||
if(it != _interned.end()) return StrName(it->second);
|
||||
uint16_t index = (uint16_t)(_r_interned.size() + 1);
|
||||
|
6
src/vm.h
6
src/vm.h
@ -561,8 +561,8 @@ inline PyObject* VM::new_module(StrName name) {
|
||||
|
||||
inline Str VM::disassemble(CodeObject_ co){
|
||||
auto pad = [](const Str& s, const int n){
|
||||
if(s.size() >= n) return s.substr(0, n);
|
||||
return s + std::string(n - s.size(), ' ');
|
||||
if(s.length() >= n) return s.substr(0, n);
|
||||
return s + std::string(n - s.length(), ' ');
|
||||
};
|
||||
|
||||
std::vector<int> jumpTargets;
|
||||
@ -591,7 +591,7 @@ inline Str VM::disassemble(CodeObject_ co){
|
||||
ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
|
||||
ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
|
||||
// ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
|
||||
Str argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
|
||||
std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
|
||||
switch(byte.op){
|
||||
case OP_LOAD_CONST:
|
||||
argStr += " (" + CAST(Str, asRepr(co->consts[byte.arg])) + ")";
|
||||
|
Loading…
x
Reference in New Issue
Block a user