From 2e464491177b11016cc4a085624dc7b145cb77f6 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 8 Apr 2023 22:41:10 +0800 Subject: [PATCH] up --- src/ceval.h | 2 -- src/lexer.h | 11 +++----- src/new_str.h | 78 ++++++++++++++++++++++++++++++++++++--------------- 3 files changed, 60 insertions(+), 31 deletions(-) diff --git a/src/ceval.h b/src/ceval.h index c04b29f7..d54a7fff 100644 --- a/src/ceval.h +++ b/src/ceval.h @@ -277,8 +277,6 @@ __NEXT_STEP:; } DISPATCH(); case OP_DICT_ADD: { PyObject* kv = frame->popx(); - // we do copy here to avoid accidental gc in `kv` - // TODO: optimize to avoid copy Tuple& t = CAST(Tuple& ,kv); fast_call(__setitem__, Args{frame->top_1(), t[0], t[1]}); } DISPATCH(); diff --git a/src/lexer.h b/src/lexer.h index 74c8f96b..cf96cd8e 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -2,6 +2,7 @@ #include "common.h" #include "error.h" +#include "new_str.h" #include "str.h" namespace pkpy{ @@ -170,13 +171,9 @@ struct Lexer { int eat_name() { curr_char--; while(true){ - uint8_t c = peekchar(); - int u8bytes = 0; - if((c & 0b10000000) == 0b00000000) u8bytes = 1; - else if((c & 0b11100000) == 0b11000000) u8bytes = 2; - else if((c & 0b11110000) == 0b11100000) u8bytes = 3; - else if((c & 0b11111000) == 0b11110000) u8bytes = 4; - else return 1; + unsigned char c = peekchar(); + int u8bytes = utf8len(c); + if(u8bytes == 0) return 1; if(u8bytes == 1){ if(isalpha(c) || c=='_' || isdigit(c)) { curr_char++; diff --git a/src/new_str.h b/src/new_str.h index 73d96d1d..031c8433 100644 --- a/src/new_str.h +++ b/src/new_str.h @@ -2,33 +2,52 @@ #include "common.h" #include "memory.h" -#include namespace pkpy{ -struct String{ - char* data; - int size; +inline int utf8len(unsigned char c){ + if((c & 0b10000000) == 0) return 1; + if((c & 0b11100000) == 0b11000000) return 2; + if((c & 0b11110000) == 0b11100000) return 3; + if((c & 0b11111000) == 0b11110000) return 4; + if((c & 0b11111100) == 0b11111000) return 5; + if((c & 0b11111110) == 0b11111100) return 6; + return 0; +} - String(): data((char*)pool64.alloc(0)), size(0) {} - String(int size): data((char*)pool64.alloc(size)), size(size) {} - String(const char* str) { - size = strlen(str); +struct String{ + int size; + bool is_ascii; + char* data; + + String(): size(0), is_ascii(true), data((char*)pool64.alloc(0)) {} + + String(int size, bool is_ascii): size(size), is_ascii(is_ascii) { data = (char*)pool64.alloc(size); - memcpy(data, str, size); } - String(const String& other): data((char*)pool64.alloc(other.size)), size(other.size) { + String(const char* str): size(strlen(str)), is_ascii(true) { + data = (char*)pool64.alloc(size); + for(int i=0; i 0){ + j += utf8len(data[j]); + i--; + } + return j; + } + + String u8_getitem(int i) const { + i = _u8_index(i); + return substr(i, utf8len(data[i])); + } + + String u8_slice(int start, int end) const{ + start = _u8_index(start); + end = _u8_index(end); + return substr(start, end - start); + } }; -struct UnicodeString: String{ - -}; - - } // namespace pkpy \ No newline at end of file