diff --git a/include/pocketpy/common/smallmap.h b/include/pocketpy/common/smallmap.h index 44483fc5..d339d384 100644 --- a/include/pocketpy/common/smallmap.h +++ b/include/pocketpy/common/smallmap.h @@ -17,7 +17,7 @@ extern "C" { #define SMALLMAP_T__HEADER -#define K c11_string +#define K c11_stringview #define V uint16_t #define NAME c11_smallmap_s2n #define less(a, b) (c11_string__cmp((a), (b)) < 0) diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h index b679f80f..ac920e9a 100644 --- a/include/pocketpy/common/sstream.h +++ b/include/pocketpy/common/sstream.h @@ -15,21 +15,19 @@ typedef struct pk_SStream { } pk_SStream; void pk_SStream__ctor(pk_SStream* self); -void pk_SStream__ctor2(pk_SStream* self, int capacity); void pk_SStream__dtor(pk_SStream* self); void pk_SStream__write_int(pk_SStream* self, int); void pk_SStream__write_i64(pk_SStream* self, int64_t); void pk_SStream__write_f64(pk_SStream* self, double, int precision); void pk_SStream__write_char(pk_SStream* self, char); -void pk_SStream__write_Str(pk_SStream* self, const py_Str*); -void pk_SStream__write_sv(pk_SStream* self, c11_string); +void pk_SStream__write_sv(pk_SStream* self, c11_stringview); void pk_SStream__write_cstr(pk_SStream* self, const char*); void pk_SStream__write_cstrn(pk_SStream* self, const char*, int); void pk_SStream__write_hex(pk_SStream* self, unsigned char, bool non_zero); void pk_SStream__write_ptr(pk_SStream* self, void*); // Submit the stream and return the final string. The stream becomes invalid after this call -py_Str pk_SStream__submit(pk_SStream* self); +c11_string* pk_SStream__submit(pk_SStream* self); void pk_vsprintf(pk_SStream* ss, const char* fmt, va_list args); void pk_sprintf(pk_SStream* ss, const char* fmt, ...); diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 21a2b7b1..78c3b5bd 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -10,66 +10,48 @@ extern "C" { #endif /* string_view */ -typedef struct c11_string{ +typedef struct c11_stringview{ const char* data; int size; -} c11_string; +} c11_stringview; -int c11_string__cmp(c11_string self, c11_string other); -int c11_string__cmp2(c11_string self, const char* other, int size); -int c11_string__cmp3(c11_string self, const char* other); -int c11_string__index(c11_string self, char c); +int c11_string__cmp(c11_stringview self, c11_stringview other); +int c11_string__cmp2(c11_stringview self, const char* other, int size); +int c11_string__cmp3(c11_stringview self, const char* other); -typedef struct py_Str{ - int size; - bool is_ascii; - bool is_sso; - union{ - char* _ptr; - char _inlined[16]; - }; -} py_Str; +// int size | char[] | '\0' +typedef const char c11_string; -PK_INLINE const char* py_Str__data(const py_Str* self){ - return self->is_sso ? self->_inlined : self->_ptr; -} +c11_string* c11_string__new(const char* data); +c11_string* c11_string__new2(const char* data, int size); +c11_string* c11_string__copy(c11_string* self); +void c11_string__delete(c11_string* self); +int c11_string__len(c11_string* self); +c11_stringview c11_string__view(c11_string* self); +c11_string* c11_string__replace(c11_string* self, char old, char new_); -PK_INLINE c11_string py_Str__sv(const py_Str* self){ - c11_string retval; - retval.data = py_Str__data(self); - retval.size = self->size; - return retval; -} +int c11_string__u8_length(const c11_string* self); +c11_stringview c11_string__u8_getitem(c11_string* self, int i); +c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step); -void py_Str__ctor(py_Str* self, const char* data); -void py_Str__ctor2(py_Str* self, const char* data, int size); -void py_Str__dtor(py_Str* self); -py_Str py_Str__copy(const py_Str* self); -py_Str py_Str__concat(const py_Str* self, const py_Str* other); -py_Str py_Str__concat2(const py_Str* self, const char* other, int size); -py_Str py_Str__slice(const py_Str* self, int start); -py_Str py_Str__slice2(const py_Str* self, int start, int stop); -py_Str py_Str__lower(const py_Str* self); -py_Str py_Str__upper(const py_Str* self); -py_Str py_Str__escape(const py_Str* self, char quote); -py_Str py_Str__strip(const py_Str* self, bool left, bool right); -py_Str py_Str__strip2(const py_Str* self, bool left, bool right, const py_Str* chars); -py_Str py_Str__replace(const py_Str* self, char old, char new_); -py_Str py_Str__replace2(const py_Str* self, const py_Str* old, const py_Str* new_); -py_Str py_Str__u8_getitem(const py_Str* self, int i); -py_Str py_Str__u8_slice(const py_Str* self, int start, int stop, int step); -int py_Str__u8_length(const py_Str* self); -int py_Str__cmp(const py_Str* self, const py_Str* other); -int py_Str__cmp2(const py_Str* self, const char* other, int size); -int py_Str__cmp3(const py_Str* self, const char* other); -int py_Str__unicode_index_to_byte(const py_Str* self, int i); -int py_Str__byte_index_to_unicode(const py_Str* self, int n); -int py_Str__index(const py_Str* self, const py_Str* sub, int start); -int py_Str__count(const py_Str* self, const py_Str* sub); -c11_vector/* T=c11_string */ py_Str__split(const py_Str* self, char sep); -c11_vector/* T=c11_string */ py_Str__split2(const py_Str* self, const py_Str* sep); +// general string operations +void c11_sv__quote(c11_stringview sv, char quote, c11_vector* buf); +void c11_sv__lower(c11_stringview sv, c11_vector* buf); +void c11_sv__upper(c11_stringview sv, c11_vector* buf); +c11_stringview c11_sv__slice(c11_stringview sv, int start); +c11_stringview c11_sv__slice2(c11_stringview sv, int start, int stop); +c11_stringview c11_sv__strip(c11_stringview sv, bool left, bool right); +int c11_sv__index(c11_stringview self, char c); +int c11_sv__index2(c11_stringview self, c11_stringview sub, int start); +int c11_sv__count(c11_stringview self, c11_stringview sub); + +c11_vector/* T=c11_stringview */ c11_sv__split(c11_stringview self, char sep); +c11_vector/* T=c11_stringview */ c11_sv__split2(c11_stringview self, c11_stringview sep); + +// misc +int c11__unicode_index_to_byte(const char* data, int i); +int c11__byte_index_to_unicode(const char* data, int n); -bool c11__isascii(const char* p, int size); bool c11__is_unicode_Lo_char(int c); int c11__u8_header(unsigned char c, bool suppress); diff --git a/include/pocketpy/common/strname.h b/include/pocketpy/common/strname.h index 08f43fc6..2ef4b88f 100644 --- a/include/pocketpy/common/strname.h +++ b/include/pocketpy/common/strname.h @@ -11,9 +11,9 @@ extern "C" { typedef uint16_t StrName; uint16_t pk_StrName__map(const char*); -uint16_t pk_StrName__map2(c11_string); +uint16_t pk_StrName__map2(c11_stringview); const char* pk_StrName__rmap(uint16_t index); -c11_string pk_StrName__rmap2(uint16_t index); +c11_stringview pk_StrName__rmap2(uint16_t index); void pk_StrName__initialize(); void pk_StrName__finalize(); diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h index 28ad0712..93888056 100644 --- a/include/pocketpy/compiler/lexer.h +++ b/include/pocketpy/compiler/lexer.h @@ -46,9 +46,9 @@ enum TokenValueIndex{ typedef struct TokenValue { enum TokenValueIndex index; // 0: empty union { - int64_t _i64; // 1 - double _f64; // 2 - py_Str _str; // 3 + int64_t _i64; // 1 + double _f64; // 2 + c11_string* _str; // 3 }; } TokenValue; @@ -93,17 +93,17 @@ typedef enum IntParsingResult{ IntParsing_OVERFLOW, } IntParsingResult; -IntParsingResult parse_uint(c11_string text, int64_t* out, int base); +IntParsingResult parse_uint(c11_stringview text, int64_t* out, int base); typedef struct Error Error; typedef c11_array pk_TokenArray; Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens); -Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out_string); +Error* pk_Lexer__process_and_dump(pk_SourceData_ src, c11_string** out_string); void pk_TokenArray__dtor(pk_TokenArray* self); -#define Token__sv(self) (c11_string){(self)->start, (self)->length} +#define Token__sv(self) (c11_stringview){(self)->start, (self)->length} #ifdef __cplusplus } diff --git a/include/pocketpy/objects/codeobject.h b/include/pocketpy/objects/codeobject.h index 8531ee5c..3c8e6256 100644 --- a/include/pocketpy/objects/codeobject.h +++ b/include/pocketpy/objects/codeobject.h @@ -68,7 +68,7 @@ typedef struct BytecodeEx { typedef struct CodeObject { pk_SourceData_ src; - py_Str name; + c11_string* name; c11_vector/*T=Bytecode*/ codes; c11_vector/*T=CodeObjectByteCodeEx*/ codes_ex; @@ -87,7 +87,7 @@ typedef struct CodeObject { int end_line; } CodeObject; -void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name); +void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_stringview name); void CodeObject__dtor(CodeObject* self); void CodeObject__gc_mark(const CodeObject* self); @@ -116,7 +116,7 @@ typedef struct FuncDecl { typedef FuncDecl* FuncDecl_; -FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name); +FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_stringview name); void FuncDecl__dtor(FuncDecl* self); void FuncDecl__add_kwarg(FuncDecl* self, int index, uint16_t key, const py_TValue* value); void FuncDecl__gc_mark(const FuncDecl* self); diff --git a/include/pocketpy/objects/error.h b/include/pocketpy/objects/error.h index dec2d402..d5fdc976 100644 --- a/include/pocketpy/objects/error.h +++ b/include/pocketpy/objects/error.h @@ -9,30 +9,30 @@ extern "C" { #endif -typedef struct pkpy_ExceptionFrame { - pk_SourceData_ src; - int lineno; - const char* cursor; - py_Str name; -} pkpy_ExceptionFrame; +// typedef struct pkpy_ExceptionFrame { +// pk_SourceData_ src; +// int lineno; +// const char* cursor; +// py_Str name; +// } pkpy_ExceptionFrame; -typedef struct pkpy_Exception { - StrName type; - py_Str msg; - bool is_re; +// typedef struct pkpy_Exception { +// StrName type; +// py_Str msg; +// bool is_re; - int _ip_on_error; - void* _code_on_error; +// int _ip_on_error; +// void* _code_on_error; - PyObject* self; // weak reference +// PyObject* self; // weak reference - c11_vector/*T=pkpy_ExceptionFrame*/ stacktrace; -} pkpy_Exception; +// c11_vector/*T=pkpy_ExceptionFrame*/ stacktrace; +// } pkpy_Exception; -void pkpy_Exception__ctor(pkpy_Exception* self, StrName type); -void pkpy_Exception__dtor(pkpy_Exception* self); -void pkpy_Exception__stpush(pkpy_Exception* self, pk_SourceData_ src, int lineno, const char* cursor, const char* name); -py_Str pkpy_Exception__summary(pkpy_Exception* self); +// void pkpy_Exception__ctor(pkpy_Exception* self, StrName type); +// void pkpy_Exception__dtor(pkpy_Exception* self); +// void pkpy_Exception__stpush(pkpy_Exception* self, pk_SourceData_ src, int lineno, const char* cursor, const char* name); +// py_Str pkpy_Exception__summary(pkpy_Exception* self); struct Error{ const char* type; diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h index b1a88ba0..0d25364f 100644 --- a/include/pocketpy/objects/sourcedata.h +++ b/include/pocketpy/objects/sourcedata.h @@ -16,18 +16,18 @@ struct pk_SourceData { bool is_precompiled; bool is_dynamic; // for exec() and eval() - py_Str filename; - py_Str source; + c11_string* filename; + c11_string* source; c11_vector/*T=const char* */ line_starts; - c11_vector/*T=py_Str*/ _precompiled_tokens; + c11_vector/*T=c11_string* */ _precompiled_tokens; }; typedef struct pk_SourceData* pk_SourceData_; pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic); bool pk_SourceData__get_line(const struct pk_SourceData* self, int lineno, const char** st, const char** ed); -py_Str pk_SourceData__snapshot(const struct pk_SourceData *self, int lineno, const char *cursor, const char *name); +c11_string* pk_SourceData__snapshot(const struct pk_SourceData *self, int lineno, const char *cursor, const char *name); #ifdef __cplusplus } diff --git a/include/pocketpy/pocketpy.h b/include/pocketpy/pocketpy.h index f2401dc7..872f1a30 100644 --- a/include/pocketpy/pocketpy.h +++ b/include/pocketpy/pocketpy.h @@ -9,7 +9,6 @@ typedef struct py_TValue py_TValue; typedef struct pk_VM pk_VM; typedef uint16_t py_Name; typedef int16_t py_Type; -typedef struct py_Str py_Str; typedef int64_t py_i64; typedef double py_f64; diff --git a/src/common/smallmap.c b/src/common/smallmap.c index dc5c8cfa..bbff6a69 100644 --- a/src/common/smallmap.c +++ b/src/common/smallmap.c @@ -9,7 +9,7 @@ #define SMALLMAP_T__SOURCE -#define K c11_string +#define K c11_stringview #define V uint16_t #define NAME c11_smallmap_s2n #define less(a, b) (c11_string__cmp((a), (b)) < 0) diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c index 957f90cb..6862b7f2 100644 --- a/src/common/sourcedata.c +++ b/src/common/sourcedata.c @@ -9,10 +9,10 @@ static void pk_SourceData__ctor(struct pk_SourceData* self, const char* filename, enum CompileMode mode, bool is_dynamic) { - py_Str__ctor(&self->filename, filename); + self->filename = c11_string__new(filename); self->mode = mode; c11_vector__ctor(&self->line_starts, sizeof(const char*)); - c11_vector__ctor(&self->_precompiled_tokens, sizeof(py_Str)); + c11_vector__ctor(&self->_precompiled_tokens, sizeof(c11_string*)); // Skip utf8 BOM if there is any. if(strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; @@ -26,18 +26,18 @@ static void pk_SourceData__ctor(struct pk_SourceData* self, source++; } self->source = pk_SStream__submit(&ss); - source = py_Str__data(&self->source); self->is_precompiled = (strncmp(source, "pkpy:", 5) == 0); - c11_vector__push(const char*, &self->line_starts, source); + c11_vector__push(const char*, &self->line_starts, self->source); } static void pk_SourceData__dtor(struct pk_SourceData* self) { - py_Str__dtor(&self->filename); - py_Str__dtor(&self->source); + c11_string__delete(self->filename); + c11_string__delete(self->source); + c11_vector__dtor(&self->line_starts); for(int i = 0; i < self->_precompiled_tokens.count; i++) { - py_Str__dtor(c11__at(py_Str, &self->_precompiled_tokens, i)); + c11_string__delete(c11__getitem(c11_string*, &self->_precompiled_tokens, i)); } c11_vector__dtor(&self->_precompiled_tokens); } @@ -70,7 +70,7 @@ bool pk_SourceData__get_line(const struct pk_SourceData* self, return true; } -py_Str pk_SourceData__snapshot(const struct pk_SourceData* self, +c11_string* pk_SourceData__snapshot(const struct pk_SourceData* self, int lineno, const char* cursor, const char* name) { diff --git a/src/common/sstream.c b/src/common/sstream.c index d5eb6bda..d62ef355 100644 --- a/src/common/sstream.c +++ b/src/common/sstream.c @@ -9,11 +9,12 @@ #include #include -void pk_SStream__ctor(pk_SStream* self) { c11_vector__ctor(&self->data, sizeof(char)); } +const static int C11_STRING_HEADER_SIZE = sizeof(int); -void pk_SStream__ctor2(pk_SStream* self, int capacity) { +void pk_SStream__ctor(pk_SStream* self) { c11_vector__ctor(&self->data, sizeof(char)); - c11_vector__reserve(&self->data, capacity); + c11_vector__reserve(&self->data, 100 + C11_STRING_HEADER_SIZE); + self->data.count = C11_STRING_HEADER_SIZE; } void pk_SStream__dtor(pk_SStream* self) { c11_vector__dtor(&self->data); } @@ -21,30 +22,17 @@ void pk_SStream__dtor(pk_SStream* self) { c11_vector__dtor(&self->data); } void pk_SStream__write_char(pk_SStream* self, char c) { c11_vector__push(char, &self->data, c); } void pk_SStream__write_int(pk_SStream* self, int i) { - char buf[12]; // sign + 10 digits + null terminator - snprintf(buf, sizeof(buf), "%d", i); - pk_SStream__write_cstr(self, buf); + // len('-2147483648') == 11 + c11_vector__reserve(&self->data, self->data.count + 11 + 1); + int n = sprintf(self->data.data, "%d", i); + self->data.count += n; } void pk_SStream__write_i64(pk_SStream* self, int64_t val) { - // sign + 21 digits + null terminator - // str(-2**64).__len__() == 21 - c11_vector__reserve(&self->data, self->data.count + 23); - if(val == 0) { - pk_SStream__write_char(self, '0'); - return; - } - if(val < 0) { - pk_SStream__write_char(self, '-'); - val = -val; - } - int start = self->data.count; - while(val) { - c11_vector__push(char, &self->data, '0' + val % 10); - val /= 10; - } - int end = self->data.count - 1; - c11_vector__reverse(char, &self->data, start, end); + // len('-9223372036854775808') == 20 + c11_vector__reserve(&self->data, self->data.count + 20 + 1); + int n = sprintf(self->data.data, "%lld", (long long)val); + self->data.count += n; } void pk_SStream__write_f64(pk_SStream* self, double val, int precision) { @@ -76,11 +64,7 @@ void pk_SStream__write_f64(pk_SStream* self, double val, int precision) { if(all_is_digit) pk_SStream__write_cstr(self, ".0"); } -void pk_SStream__write_Str(pk_SStream* self, const py_Str* str) { - pk_SStream__write_cstrn(self, py_Str__data(str), str->size); -} - -void pk_SStream__write_sv(pk_SStream* self, c11_string sv) { +void pk_SStream__write_sv(pk_SStream* self, c11_stringview sv) { pk_SStream__write_cstrn(self, sv.data, sv.size); } @@ -119,15 +103,12 @@ void pk_SStream__write_ptr(pk_SStream* self, void* p) { } } -py_Str pk_SStream__submit(pk_SStream* self) { +c11_string* pk_SStream__submit(pk_SStream* self) { c11_vector__push(char, &self->data, '\0'); - c11_array a = c11_vector__submit(&self->data); - // TODO: optimize c11__isascii - py_Str retval = {.size = a.count - 1, - .is_ascii = c11__isascii((char*)a.data, a.count), - .is_sso = false, - ._ptr = (char*)a.data}; - return retval; + c11_array arr = c11_vector__submit(&self->data); + int* p = arr.data; + *p = arr.count - C11_STRING_HEADER_SIZE - 1; + return (c11_string*)(p + 1); } void pk_vsprintf(pk_SStream* ss, const char* fmt, va_list args) { @@ -165,17 +146,8 @@ void pk_vsprintf(pk_SStream* ss, const char* fmt, va_list args) { } case 'q': { const char* s = va_arg(args, const char*); - py_Str tmp, tmp2; - py_Str__ctor(&tmp, s); - tmp2 = py_Str__escape(&tmp, '\''); - pk_SStream__write_Str(ss, &tmp2); - py_Str__dtor(&tmp); - py_Str__dtor(&tmp2); - break; - } - case 'S': { - const py_Str* s = va_arg(args, const py_Str*); - pk_SStream__write_Str(ss, s); + c11_stringview sv = {s, strlen(s)}; + c11_sv__quote(sv, '\'', &ss->data); break; } case 'c': { @@ -218,7 +190,7 @@ void pk_sprintf(pk_SStream* ss, const char* fmt, ...) { const char* py_fmt(const char* fmt, ...) { PK_THREAD_LOCAL pk_SStream ss; if(ss.data.elem_size == 0) { - pk_SStream__ctor2(&ss, 256); + pk_SStream__ctor(&ss); } else { c11_vector__clear(&ss.data); } diff --git a/src/common/str.c b/src/common/str.c index bdc144ea..faa265c3 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -1,4 +1,5 @@ #include "pocketpy/common/str.h" +#include "pocketpy/common/sstream.h" #include "pocketpy/common/utils.h" #include @@ -7,409 +8,252 @@ #include #include -void py_Str__ctor(py_Str *self, const char *data){ - py_Str__ctor2(self, data, strlen(data)); -} +c11_string* c11_string__new(const char* data) { return c11_string__new2(data, strlen(data)); } -void py_Str__ctor2(py_Str *self, const char *data, int size){ - self->size = size; - self->is_ascii = c11__isascii(data, size); - self->is_sso = size < sizeof(self->_inlined); - char* p; - if(self->is_sso){ - p = self->_inlined; - }else{ - self->_ptr = (char*)malloc(size + 1); - p = self->_ptr; - } +c11_string* c11_string__new2(const char* data, int size) { + int* p = malloc(sizeof(int) + size + 1); + *p++ = size; memcpy(p, data, size); - p[size] = '\0'; + ((char*)p)[size] = '\0'; + return (c11_string*)p; } -void py_Str__dtor(py_Str *self){ - if(!self->is_sso){ - free(self->_ptr); - self->is_sso = true; - self->size = 0; - } +c11_string* c11_string__copy(c11_string* self) { + int* p = (int*)self - 1; + int total_size = sizeof(int) + *p + 1; + int* q = malloc(total_size); + memcpy(q, p, total_size); + return (c11_string*)(q + 1); } -py_Str py_Str__copy(const py_Str *self){ - py_Str retval = *self; - if(!self->is_sso){ - retval._ptr = (char*)malloc(self->size + 1); - // '\0' is copied - memcpy(retval._ptr, self->_ptr, self->size + 1); - } - return retval; +void c11_string__delete(c11_string* self) { + int* p = (int*)self - 1; + free(p); } -py_Str py_Str__concat(const py_Str *self, const py_Str *other){ - py_Str retval = { - .size = self->size + other->size, - .is_ascii = self->is_ascii && other->is_ascii, - .is_sso = self->size + other->size < sizeof(retval._inlined), - }; - char* p; - if(retval.is_sso){ - p = retval._inlined; - }else{ - retval._ptr = (char*)malloc(retval.size + 1); - p = retval._ptr; - } - memcpy(p, py_Str__data(self), self->size); - memcpy(p + self->size, py_Str__data(other), other->size); - p[retval.size] = '\0'; - return retval; +int c11_string__len(c11_string* self) { + int* p = (int*)self - 1; + return *p; } -py_Str py_Str__concat2(const py_Str *self, const char *other, int size){ - py_Str tmp; - py_Str__ctor2(&tmp, other, size); - py_Str retval = py_Str__concat(self, &tmp); - py_Str__dtor(&tmp); - return retval; +c11_stringview c11_string__view(c11_string* self) { + int* p = (int*)self - 1; + return (c11_stringview){self, *p}; } -py_Str py_Str__slice(const py_Str *self, int start){ - return py_Str__slice2(self, start, self->size); -} - -py_Str py_Str__slice2(const py_Str *self, int start, int stop){ - py_Str retval; - if(stop < start) stop = start; - py_Str__ctor2(&retval, py_Str__data(self) + start, stop - start); - return retval; -} - -py_Str py_Str__lower(const py_Str *self){ - py_Str retval = py_Str__copy(self); - char* p = (char*)py_Str__data(&retval); - for(int i = 0; i < retval.size; i++){ - if('A' <= p[i] && p[i] <= 'Z') p[i] += 32; - } - return retval; -} - -py_Str py_Str__upper(const py_Str *self){ - py_Str retval = py_Str__copy(self); - char* p = (char*)py_Str__data(&retval); - for(int i = 0; i < retval.size; i++){ - if('a' <= p[i] && p[i] <= 'z') p[i] -= 32; - } - return retval; -} - -py_Str py_Str__escape(const py_Str* self, char quote){ - assert(quote == '"' || quote == '\''); - c11_vector buffer; - c11_vector__ctor(&buffer, sizeof(char)); - c11_vector__reserve(&buffer, self->size); - c11_vector__push(char, &buffer, quote); - const char* data = py_Str__data(self); - for(int i = 0; i < self->size; i++) { - char c = data[i]; - switch(c) { - case '"': case '\'': - if(c == quote) c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, c); - break; - case '\\': - c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, '\\'); - break; - case '\n': - c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, 'n'); - break; - case '\r': - c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, 'r'); - break; - case '\t': - c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, 't'); - break; - case '\b': - c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, 'b'); - break; - default: - if('\x00' <= c && c <= '\x1f') { - c11_vector__push(char, &buffer, '\\'); - c11_vector__push(char, &buffer, 'x'); - c11_vector__push(char, &buffer, PK_HEX_TABLE[c >> 4]); - c11_vector__push(char, &buffer, PK_HEX_TABLE[c & 0xf]); - } else { - c11_vector__push(char, &buffer, c); - } - } - } - c11_vector__push(char, &buffer, quote); - c11_vector__push(char, &buffer, '\0'); - py_Str retval = { - .size = buffer.count - 1, - .is_ascii = self->is_ascii, - .is_sso = false, - ._ptr = (char*)buffer.data, - }; - return retval; -} - -py_Str py_Str__strip(const py_Str *self, bool left, bool right){ - const char* data = py_Str__data(self); - if(self->is_ascii) { - int L = 0; - int R = self->size; - if(left) { - while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r')) - L++; - } - if(right) { - while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r')) - R--; - } - return py_Str__slice2(self, L, R); - } else { - py_Str tmp; - py_Str__ctor(&tmp, " \t\n\r"); - py_Str retval = py_Str__strip2(self, left, right, &tmp); - py_Str__dtor(&tmp); - return retval; - } -} - -py_Str py_Str__strip2(const py_Str *self, bool left, bool right, const py_Str *chars){ - int L = 0; - int R = py_Str__u8_length(self); - if(left) { - while(L < R){ - py_Str tmp = py_Str__u8_getitem(self, L); - bool found = py_Str__index(chars, &tmp, 0) != -1; - py_Str__dtor(&tmp); - if(!found) break; - L++; - } - } - if(right) { - while(L < R){ - py_Str tmp = py_Str__u8_getitem(self, R - 1); - bool found = py_Str__index(chars, &tmp, 0) != -1; - py_Str__dtor(&tmp); - if(!found) break; - R--; - } - } - return py_Str__u8_slice(self, L, R, 1); -} - -py_Str py_Str__replace(const py_Str *self, char old, char new_){ - py_Str retval = py_Str__copy(self); - char* p = (char*)py_Str__data(&retval); - for(int i = 0; i < retval.size; i++){ +c11_string* c11_string__replace(c11_string* self, char old, char new_) { + c11_string* retval = c11_string__copy(self); + char* p = (char*)retval; + int size = c11_string__len(retval); + for(int i = 0; i < size; i++) { if(p[i] == old) p[i] = new_; } return retval; } -py_Str py_Str__replace2(const py_Str *self, const py_Str *old, const py_Str *new_){ - c11_vector buffer; - c11_vector__ctor(&buffer, sizeof(char)); - int start = 0; - while(true) { - int i = py_Str__index(self, old, start); - if(i == -1) break; - py_Str tmp = py_Str__slice2(self, start, i); - c11_vector__extend(char, &buffer, py_Str__data(&tmp), tmp.size); - py_Str__dtor(&tmp); - c11_vector__extend(char, &buffer, py_Str__data(new_), new_->size); - start = i + old->size; +int c11_string__u8_length(c11_string* self) { + int size = c11_string__len(self); + return c11__byte_index_to_unicode(self, size); +} + +c11_stringview c11_string__u8_getitem(c11_string* self, int i) { + i = c11__unicode_index_to_byte(self, i); + int size = c11__u8_header(self[i], false); + return c11_sv__slice2(c11_string__view(self), i, i + size); +} + +c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step) { + pk_SStream ss; + pk_SStream__ctor(&ss); + assert(step != 0); + for(int i = start; step > 0 ? i < stop : i > stop; i += step) { + c11_stringview unicode = c11_string__u8_getitem(self, i); + pk_SStream__write_sv(&ss, unicode); } - py_Str tmp = py_Str__slice2(self, start, self->size); - c11_vector__extend(char, &buffer, py_Str__data(&tmp), tmp.size); - py_Str__dtor(&tmp); - c11_vector__push(char, &buffer, '\0'); - py_Str retval = { - .size = buffer.count - 1, - .is_ascii = self->is_ascii && old->is_ascii && new_->is_ascii, - .is_sso = false, - ._ptr = (char*)buffer.data, - }; - return retval; + return pk_SStream__submit(&ss); } -int c11_string__cmp(c11_string self, c11_string other){ - return c11_string__cmp2(self, other.data, other.size); +///////////////////////////////////////// +void c11_sv__quote(c11_stringview sv, char quote, c11_vector* buf) { + assert(quote == '"' || quote == '\''); + c11_vector__push(char, buf, quote); + for(int i = 0; i < sv.size; i++) { + char c = sv.data[i]; + switch(c) { + case '"': + case '\'': + if(c == quote) c11_vector__push(char, buf, '\\'); + c11_vector__push(char, buf, c); + break; + case '\\': c11_vector__extend(char, buf, "\\\\", 2); break; + case '\n': c11_vector__extend(char, buf, "\\n", 2); break; + case '\r': c11_vector__extend(char, buf, "\\r", 2); break; + case '\t': c11_vector__extend(char, buf, "\\t", 2); break; + case '\b': c11_vector__extend(char, buf, "\\b", 2); break; + default: + if('\x00' <= c && c <= '\x1f') { + c11_vector__extend(char, buf, "\\x", 2); + c11_vector__push(char, buf, PK_HEX_TABLE[c >> 4]); + c11_vector__push(char, buf, PK_HEX_TABLE[c & 0xf]); + } else { + c11_vector__push(char, buf, c); + } + } + } + c11_vector__push(char, buf, quote); } -int c11_string__cmp2(c11_string self, const char *other, int size){ - int res = strncmp(self.data, other, PK_MIN(self.size, size)); - if(res != 0) return res; - return self.size - size; +void c11_sv__lower(c11_stringview sv, c11_vector* buf) { + for(int i = 0; i < sv.size; i++) { + char c = sv.data[i]; + if('A' <= c && c <= 'Z') c += 32; + c11_vector__push(char, buf, c); + } } -int c11_string__cmp3(c11_string self, const char *other){ - return c11_string__cmp2(self, other, strlen(other)); +void c11_sv__upper(c11_stringview sv, c11_vector* buf) { + for(int i = 0; i < sv.size; i++) { + char c = sv.data[i]; + if('a' <= c && c <= 'z') c -= 32; + c11_vector__push(char, buf, c); + } } -int c11_string__index(c11_string self, char c){ - for(int i=0; isize); -} - -int py_Str__cmp2(const py_Str *self, const char *other, int size){ - int res = strncmp(py_Str__data(self), other, PK_MIN(self->size, size)); - if(res != 0) return res; - return self->size - size; -} - -int py_Str__cmp3(const py_Str *self, const char *other){ - return strcmp(py_Str__data(self), other); -} - -py_Str py_Str__u8_getitem(const py_Str *self, int i){ - i = py_Str__unicode_index_to_byte(self, i); - int size = c11__u8_header(py_Str__data(self)[i], false); - return py_Str__slice2(self, i, i + size); -} - -py_Str py_Str__u8_slice(const py_Str *self, int start, int stop, int step){ - c11_vector buffer; - c11_vector__ctor(&buffer, sizeof(char)); - assert(step != 0); - if(self->is_ascii){ - const char* p = py_Str__data(self); - for (int i=start; step>0 ? istop; i+=step) { - c11_vector__push(char, &buffer, p[i]); - } - }else{ - for (int i=start; step>0 ? istop; i+=step) { - py_Str unicode = py_Str__u8_getitem(self, i); - const char* p = py_Str__data(&unicode); - for(int j = 0; j < unicode.size; j++){ - c11_vector__push(char, &buffer, p[j]); - } - py_Str__dtor(&unicode); - } - } - c11_vector__push(char, &buffer, '\0'); - py_Str retval = { - .size = buffer.count - 1, - .is_ascii = self->is_ascii, - .is_sso = false, - ._ptr = (char*)buffer.data, - }; - return retval; -} - -int py_Str__u8_length(const py_Str *self){ - return py_Str__byte_index_to_unicode(self, self->size); -} - -int py_Str__unicode_index_to_byte(const py_Str* self, int i) { - if(self->is_ascii) return i; - const char* p = py_Str__data(self); - int j = 0; - while(i > 0) { - j += c11__u8_header(p[j], false); - i--; - } - return j; -} - -int py_Str__byte_index_to_unicode(const py_Str* self, int n) { - if(self->is_ascii) return n; - const char* p = py_Str__data(self); - int cnt = 0; - for(int i = 0; i < n; i++) { - if((p[i] & 0xC0) != 0x80) cnt++; - } - return cnt; -} - -int py_Str__index(const py_Str *self, const py_Str *sub, int start){ - if(sub->size == 0) return start; - int max_end = self->size - sub->size; - const char* self_data = py_Str__data(self); - const char* sub_data = py_Str__data(sub); - for(int i=start; i<=max_end; i++){ - int res = memcmp(self_data + i, sub_data, sub->size); +int c11_sv__index2(c11_stringview self, c11_stringview sub, int start) { + if(sub.size == 0) return start; + int max_end = self.size - sub.size; + for(int i = start; i <= max_end; i++) { + int res = memcmp(self.data + i, sub.data, sub.size); if(res == 0) return i; } return -1; } -int py_Str__count(const py_Str *self, const py_Str *sub){ - if(sub->size == 0) return self->size + 1; +int c11_sv__count(c11_stringview self, c11_stringview sub) { + if(sub.size == 0) return self.size + 1; int cnt = 0; int start = 0; while(true) { - int i = py_Str__index(self, sub, start); + int i = c11_sv__index2(self, sub, start); if(i == -1) break; cnt++; - start = i + sub->size; + start = i + sub.size; } return cnt; } -c11_vector/* T=c11_string */ py_Str__split(const py_Str *self, char sep){ +c11_vector /* T=c11_stringview */ c11_sv__split(c11_stringview self, char sep) { c11_vector retval; - c11_vector__ctor(&retval, sizeof(c11_string)); - const char* data = py_Str__data(self); + c11_vector__ctor(&retval, sizeof(c11_stringview)); + const char* data = self.data; int i = 0; - for(int j = 0; j < self->size; j++) { + for(int j = 0; j < self.size; j++) { if(data[j] == sep) { - if(j > i){ - c11_string tmp = {data + i, j - i}; - c11_vector__push(c11_string, &retval, tmp); + if(j > i) { + c11_stringview tmp = {data + i, j - i}; + c11_vector__push(c11_stringview, &retval, tmp); } i = j + 1; continue; } } - if(self->size > i){ - c11_string tmp = {data + i, self->size - i}; - c11_vector__push(c11_string, &retval, tmp); + if(self.size > i) { + c11_stringview tmp = {data + i, self.size - i}; + c11_vector__push(c11_stringview, &retval, tmp); } return retval; } -c11_vector/* T=c11_string */ py_Str__split2(const py_Str *self, const py_Str *sep){ +c11_vector /* T=c11_stringview */ c11_sv__split2(c11_stringview self, c11_stringview sep) { c11_vector retval; - c11_vector__ctor(&retval, sizeof(c11_string)); + c11_vector__ctor(&retval, sizeof(c11_stringview)); int start = 0; - const char* data = py_Str__data(self); + const char* data = self.data; while(true) { - int i = py_Str__index(self, sep, start); + int i = c11_sv__index2(self, sep, start); if(i == -1) break; - c11_string tmp = {data + start, i - start}; - if(tmp.size != 0) c11_vector__push(c11_string, &retval, tmp); - start = i + sep->size; + c11_stringview tmp = {data + start, i - start}; + if(tmp.size != 0) c11_vector__push(c11_stringview, &retval, tmp); + start = i + sep.size; } - c11_string tmp = {data + start, self->size - start}; - if(tmp.size != 0) c11_vector__push(c11_string, &retval, tmp); + c11_stringview tmp = {data + start, self.size - start}; + if(tmp.size != 0) c11_vector__push(c11_stringview, &retval, tmp); return retval; } -bool c11__isascii(const char* p, int size){ - for(int i = 0; i < size; i++) - if((unsigned char)p[i] > 127) - return false; - return true; +int c11__unicode_index_to_byte(const char* data, int i) { + int j = 0; + while(i > 0) { + j += c11__u8_header(data[j], false); + i--; + } + return j; } +int c11__byte_index_to_unicode(const char* data, int n) { + int cnt = 0; + for(int i = 0; i < n; i++) { + if((data[i] & 0xC0) != 0x80) cnt++; + } + return cnt; +} + +////////////// + +int c11_string__cmp(c11_stringview self, c11_stringview other) { + return c11_string__cmp2(self, other.data, other.size); +} + +int c11_string__cmp2(c11_stringview self, const char* other, int size) { + int res = strncmp(self.data, other, PK_MIN(self.size, size)); + if(res != 0) return res; + return self.size - size; +} + +int c11_string__cmp3(c11_stringview self, const char* other) { + return c11_string__cmp2(self, other, strlen(other)); +} + + // clang-format off static const int kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560}; static const int kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101}; + // clang-format on -bool c11__is_unicode_Lo_char(int c){ +bool c11__is_unicode_Lo_char(int c) { if(c == 0x1f955) return true; int index; c11__lower_bound(const int, kLoRangeA, 476, c, c11__less, &index); diff --git a/src/common/strname.c b/src/common/strname.c index 041e5656..199399b1 100644 --- a/src/common/strname.c +++ b/src/common/strname.c @@ -17,7 +17,7 @@ void pk_StrName__initialize() { for(int i = 0; i < _r_interned.count; i++) { free(c11__at(char*, &_r_interned, i)); } - c11_vector__ctor(&_r_interned, sizeof(c11_string)); + c11_vector__ctor(&_r_interned, sizeof(c11_stringview)); _initialized = true; #define MAGIC_METHOD(x) x = pk_StrName__map(#x); @@ -46,10 +46,10 @@ void pk_StrName__finalize() { } uint16_t pk_StrName__map(const char* name) { - return pk_StrName__map2((c11_string){name, strlen(name)}); + return pk_StrName__map2((c11_stringview){name, strlen(name)}); } -uint16_t pk_StrName__map2(c11_string name) { +uint16_t pk_StrName__map2(c11_stringview name) { // TODO: PK_GLOBAL_SCOPE_LOCK() if(!_initialized) { pk_StrName__initialize(); // lazy init @@ -65,7 +65,7 @@ uint16_t pk_StrName__map2(c11_string name) { c11_vector__push(char*, &_r_interned, p); index = _r_interned.count; // 1-based // save to _interned - c11_smallmap_s2n__set(&_interned, (c11_string){p, name.size}, index); + c11_smallmap_s2n__set(&_interned, (c11_stringview){p, name.size}, index); assert(_interned.count == _r_interned.count); return index; } @@ -76,9 +76,9 @@ const char* pk_StrName__rmap(uint16_t index) { return c11__getitem(char*, &_r_interned, index - 1); } -c11_string pk_StrName__rmap2(uint16_t index) { +c11_stringview pk_StrName__rmap2(uint16_t index) { const char* p = pk_StrName__rmap(index); - return (c11_string){p, strlen(p)}; + return (c11_stringview){p, strlen(p)}; } py_Name py_name(const char* name) { diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index 20a889d5..93317651 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -83,7 +83,7 @@ void Ctx__patch_jump(Ctx* self, int index); bool Ctx__add_label(Ctx* self, StrName name); int Ctx__add_varname(Ctx* self, StrName name); int Ctx__add_const(Ctx* self, py_Ref); -int Ctx__add_const_string(Ctx* self, c11_string); +int Ctx__add_const_string(Ctx* self, c11_stringview); void Ctx__emit_store_name(Ctx* self, NameScope scope, StrName name, int line); void Ctx__try_merge_for_iter_store(Ctx* self, int); void Ctx__s_emit_top(Ctx*); // emit top -> pop -> delete @@ -222,7 +222,7 @@ UnaryExpr* UnaryExpr__new(int line, Expr* child, Opcode opcode) { typedef struct RawStringExpr { EXPR_COMMON_HEADER - c11_string value; + c11_stringview value; Opcode opcode; } RawStringExpr; @@ -233,7 +233,7 @@ void RawStringExpr__emit_(Expr* self_, Ctx* ctx) { Ctx__emit_(ctx, self->opcode, BC_NOARG, self->line); } -RawStringExpr* RawStringExpr__new(int line, c11_string value, Opcode opcode) { +RawStringExpr* RawStringExpr__new(int line, c11_stringview value, Opcode opcode) { const static ExprVt Vt = {.emit_ = RawStringExpr__emit_}; static_assert_expr_size(RawStringExpr); RawStringExpr* self = PoolExpr_alloc(); @@ -289,7 +289,7 @@ void LiteralExpr__emit_(Expr* self_, Ctx* ctx) { break; } case TokenValue_STR: { - c11_string sv = py_Str__sv(&self->value->_str); + c11_stringview sv = c11_string__view(self->value->_str); int index = Ctx__add_const_string(ctx, sv); Ctx__emit_(ctx, OP_LOAD_CONST, index, self->line); break; @@ -585,7 +585,7 @@ LambdaExpr* LambdaExpr__new(int line, int index) { typedef struct FStringExpr { EXPR_COMMON_HEADER - c11_string src; + c11_stringview src; } FStringExpr; static bool is_fmt_valid_char(char c) { @@ -601,7 +601,7 @@ static bool is_fmt_valid_char(char c) { } } -static bool is_identifier(c11_string s) { +static bool is_identifier(c11_stringview s) { if(s.size == 0) return false; if(!isalpha(s.data[0]) && s.data[0] != '_') return false; for(int i = 0; i < s.size; i++) { @@ -611,7 +611,7 @@ static bool is_identifier(c11_string s) { return true; } -static void _load_simple_expr(Ctx* ctx, c11_string expr, int line) { +static void _load_simple_expr(Ctx* ctx, c11_stringview expr, int line) { bool repr = false; const char* expr_end = expr.data + expr.size; if(expr.size >= 2 && expr_end[-2] == '!') { @@ -634,10 +634,10 @@ static void _load_simple_expr(Ctx* ctx, c11_string expr, int line) { Ctx__emit_(ctx, OP_LOAD_NAME, pk_StrName__map2(expr), line); is_fastpath = true; } else { - int dot = c11_string__index(expr, '.'); + int dot = c11_sv__index(expr, '.'); if(dot > 0) { - c11_string a = {expr.data, dot}; // expr[:dot] - c11_string b = {expr.data + (dot + 1), expr.size - (dot + 1)}; // expr[dot+1:] + c11_stringview a = {expr.data, dot}; // expr[:dot] + c11_stringview b = {expr.data + (dot + 1), expr.size - (dot + 1)}; // expr[dot+1:] if(is_identifier(a) && is_identifier(b)) { Ctx__emit_(ctx, OP_LOAD_NAME, pk_StrName__map2(a), line); Ctx__emit_(ctx, OP_LOAD_ATTR, pk_StrName__map2(b), line); @@ -666,11 +666,11 @@ static void FStringExpr__emit_(Expr* self_, Ctx* ctx) { if(flag) { if(src[j] == '}') { // add expression - c11_string expr = {src + i, j - i}; // src[i:j] + c11_stringview expr = {src + i, j - i}; // src[i:j] // BUG: ':' is not a format specifier in f"{stack[2:]}" - int conon = c11_string__index(expr, ':'); + int conon = c11_sv__index(expr, ':'); if(conon >= 0) { - c11_string spec = {expr.data + (conon + 1), + c11_stringview spec = {expr.data + (conon + 1), expr.size - (conon + 1)}; // expr[conon+1:] // filter some invalid spec bool ok = true; @@ -707,7 +707,7 @@ static void FStringExpr__emit_(Expr* self_, Ctx* ctx) { j++; Ctx__emit_(ctx, OP_LOAD_CONST, - Ctx__add_const_string(ctx, (c11_string){"{", 1}), + Ctx__add_const_string(ctx, (c11_stringview){"{", 1}), self->line); count++; } else { @@ -722,7 +722,7 @@ static void FStringExpr__emit_(Expr* self_, Ctx* ctx) { j++; Ctx__emit_(ctx, OP_LOAD_CONST, - Ctx__add_const_string(ctx, (c11_string){"}", 1}), + Ctx__add_const_string(ctx, (c11_stringview){"}", 1}), self->line); count++; } else { @@ -735,7 +735,7 @@ static void FStringExpr__emit_(Expr* self_, Ctx* ctx) { i = j; while(j < self->src.size && src[j] != '{' && src[j] != '}') j++; - c11_string literal = {src + i, j - i}; // src[i:j] + c11_stringview literal = {src + i, j - i}; // src[i:j] Ctx__emit_(ctx, OP_LOAD_CONST, Ctx__add_const_string(ctx, literal), self->line); count++; continue; // skip j++ @@ -746,14 +746,14 @@ static void FStringExpr__emit_(Expr* self_, Ctx* ctx) { if(flag) { // literal - c11_string literal = {src + i, self->src.size - i}; // src[i:] + c11_stringview literal = {src + i, self->src.size - i}; // src[i:] Ctx__emit_(ctx, OP_LOAD_CONST, Ctx__add_const_string(ctx, literal), self->line); count++; } Ctx__emit_(ctx, OP_BUILD_STRING, count, self->line); } -FStringExpr* FStringExpr__new(int line, c11_string src) { +FStringExpr* FStringExpr__new(int line, c11_stringview src) { const static ExprVt Vt = {.emit_ = FStringExpr__emit_}; static_assert_expr_size(FStringExpr); FStringExpr* self = PoolExpr_alloc(); @@ -1344,7 +1344,7 @@ int Ctx__add_varname(Ctx* self, StrName name) { return index; } -int Ctx__add_const_string(Ctx* self, c11_string key) { +int Ctx__add_const_string(Ctx* self, c11_stringview key) { uint16_t* val = c11_smallmap_s2n__try_get(&self->co_consts_string_dedup_map, key); if(val) { return *val; @@ -1354,7 +1354,7 @@ int Ctx__add_const_string(Ctx* self, c11_string key) { c11_vector__push(py_TValue, &self->co->consts, tmp); int index = self->co->consts.count - 1; c11_smallmap_s2n__set(&self->co_consts_string_dedup_map, - py_Str__sv(PyObject__value(tmp._obj)), + c11_string__view(PyObject__value(tmp._obj)), index); return index; } @@ -1659,19 +1659,19 @@ static Error* exprLiteral(Compiler* self) { } static Error* exprLong(Compiler* self) { - c11_string sv = Token__sv(prev()); + c11_stringview sv = Token__sv(prev()); Ctx__s_push(ctx(), (Expr*)RawStringExpr__new(prev()->line, sv, OP_BUILD_LONG)); return NULL; } static Error* exprBytes(Compiler* self) { - c11_string sv = py_Str__sv(&prev()->value._str); + c11_stringview sv = c11_string__view(prev()->value._str); Ctx__s_push(ctx(), (Expr*)RawStringExpr__new(prev()->line, sv, OP_BUILD_BYTES)); return NULL; } static Error* exprFString(Compiler* self) { - c11_string sv = py_Str__sv(&prev()->value._str); + c11_stringview sv = c11_string__view(prev()->value._str); Ctx__s_push(ctx(), (Expr*)FStringExpr__new(prev()->line, sv)); return NULL; } @@ -2054,18 +2054,17 @@ Error* pk_compile(pk_SourceData_ src, CodeObject* out) { if(err) return err; Token* data = (Token*)tokens.data; - printf("%s\n", py_Str__data(&src->filename)); + printf("%s\n", src->filename); for(int i = 0; i < tokens.count; i++) { Token* t = data + i; - py_Str tmp; - py_Str__ctor2(&tmp, t->start, t->length); - printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], py_Str__data(&tmp)); - py_Str__dtor(&tmp); + c11_string* tmp = c11_string__new2(t->start, t->length); + printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], tmp); + c11_string__delete(tmp); } Compiler compiler; Compiler__ctor(&compiler, src, tokens); - CodeObject__ctor(out, src, py_Str__sv(&src->filename)); + CodeObject__ctor(out, src, c11_string__view(src->filename)); err = Compiler__compile(&compiler, out); if(err) { // if error occurs, dispose the code object diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index c0ac3554..6ceeb1f2 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -28,7 +28,7 @@ CodeObject* Compiler::push_global_context() noexcept{ return co; } -FuncDecl_ Compiler::push_f_context(c11_string name, int* out_index) noexcept{ +FuncDecl_ Compiler::push_f_context(c11_stringview name, int* out_index) noexcept{ FuncDecl_ decl = FuncDecl__rcnew(lexer.src, name); decl->code->start_line = __i == 0 ? 1 : prev().line; decl->nested = name_scope() == NAME_LOCAL; diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 1c8f2d46..86593783 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -28,8 +28,8 @@ typedef struct TokenDeserializer { void TokenDeserializer__ctor(TokenDeserializer* self, const char* source); bool TokenDeserializer__match_char(TokenDeserializer* self, char c); -c11_string TokenDeserializer__read_string(TokenDeserializer* self, char c); -py_Str TokenDeserializer__read_string_from_hex(TokenDeserializer* self, char c); +c11_stringview TokenDeserializer__read_string(TokenDeserializer* self, char c); +c11_string* TokenDeserializer__read_string_from_hex(TokenDeserializer* self, char c); int TokenDeserializer__read_count(TokenDeserializer* self); int64_t TokenDeserializer__read_uint(TokenDeserializer* self, char c); double TokenDeserializer__read_float(TokenDeserializer* self, char c); @@ -40,7 +40,7 @@ const static TokenValue EmptyTokenValue; static void pk_Lexer__ctor(pk_Lexer* self, pk_SourceData_ src){ PK_INCREF(src); self->src = src; - self->curr_char = self->token_start = py_Str__data(&src->source); + self->curr_char = self->token_start = src->source; self->current_line = 1; self->brackets_level = 0; c11_vector__ctor(&self->nexts, sizeof(Token)); @@ -220,15 +220,9 @@ static Error* eat_name(pk_Lexer* self){ } } // handle multibyte char - py_Str u8str; - py_Str__ctor2(&u8str, self->curr_char, u8bytes); - if(u8str.size != u8bytes){ - py_Str__dtor(&u8str); - return SyntaxError("invalid utf8 sequence: %s", py_Str__data(&u8str)); - } uint32_t value = 0; for(int k = 0; k < u8bytes; k++) { - uint8_t b = py_Str__data(&u8str)[k]; + uint8_t b = self->curr_char[k]; if(k == 0) { if(u8bytes == 2) value = (b & 0b00011111) << 6; @@ -240,15 +234,16 @@ static Error* eat_name(pk_Lexer* self){ value |= (b & 0b00111111) << (6 * (u8bytes - k - 1)); } } - if(c11__is_unicode_Lo_char(value)) + if(c11__is_unicode_Lo_char(value)){ self->curr_char += u8bytes; - else + }else{ break; + } } int length = (int)(self->curr_char - self->token_start); if(length == 0) return SyntaxError("@id contains invalid char"); - c11_string name = {self->token_start, length}; + c11_stringview name = {self->token_start, length}; if(self->src->mode == JSON_MODE) { if(c11_string__cmp3(name, "true") == 0) { @@ -278,7 +273,7 @@ static Error* eat_name(pk_Lexer* self){ return NULL; } -static Error* eat_string_until(pk_Lexer* self, char quote, bool raw, py_Str* out) { +static Error* eat_string_until(pk_Lexer* self, char quote, bool raw, c11_string** out) { // previous char is quote bool quote3 = match_n_chars(self, 2, quote); pk_SStream buff; @@ -341,7 +336,7 @@ enum StringType { }; static Error* eat_string(pk_Lexer* self, char quote, enum StringType type){ - py_Str s; + c11_string* s; Error* err = eat_string_until(self, quote, type == RAW_STRING, &s); if(err) return err; TokenValue value = {TokenValue_STR, ._str = s}; @@ -366,7 +361,7 @@ static Error* eat_number(pk_Lexer* self){ is_scientific_notation = true; } - c11_string text = {self->token_start, i - self->token_start}; + c11_stringview text = {self->token_start, i - self->token_start}; self->curr_char = i; if(text.data[0] != '.' && !is_scientific_notation) { @@ -554,10 +549,10 @@ static Error* lex_one_token(pk_Lexer* self, bool* eof){ static Error* from_precompiled(pk_Lexer* self) { TokenDeserializer deserializer; - TokenDeserializer__ctor(&deserializer, py_Str__data(&self->src->source)); + TokenDeserializer__ctor(&deserializer, self->src->source); deserializer.curr += 5; // skip "pkpy:" - c11_string version = TokenDeserializer__read_string(&deserializer, '\n'); + c11_stringview version = TokenDeserializer__read_string(&deserializer, '\n'); if(c11_string__cmp3(version, PK_VERSION) != 0) { return SyntaxError("precompiled version mismatch"); @@ -569,10 +564,9 @@ static Error* from_precompiled(pk_Lexer* self) { int count = TokenDeserializer__read_count(&deserializer); c11_vector* precompiled_tokens = &self->src->_precompiled_tokens; for(int i = 0; i < count; i++) { - c11_string item = TokenDeserializer__read_string(&deserializer, '\n'); - py_Str copied_item; - py_Str__ctor2(&copied_item, item.data, item.size); - c11_vector__push(py_Str, precompiled_tokens, copied_item); + c11_stringview item = TokenDeserializer__read_string(&deserializer, '\n'); + c11_string* copied_item = c11_string__new2(item.data, item.size); + c11_vector__push(c11_string*, precompiled_tokens, copied_item); } count = TokenDeserializer__read_count(&deserializer); @@ -581,9 +575,9 @@ static Error* from_precompiled(pk_Lexer* self) { t.type = (TokenIndex)TokenDeserializer__read_uint(&deserializer, ','); if(is_raw_string_used(t.type)) { int64_t index = TokenDeserializer__read_uint(&deserializer, ','); - py_Str* p = c11__at(py_Str, precompiled_tokens, index); - t.start = py_Str__data(p); - t.length = c11__getitem(py_Str, precompiled_tokens, index).size; + c11_string* p = c11__getitem(c11_string*, precompiled_tokens, index); + t.start = p; + t.length = c11_string__len(p); } else { t.start = NULL; t.length = 0; @@ -612,7 +606,7 @@ static Error* from_precompiled(pk_Lexer* self) { t.value = (TokenValue){TokenValue_F64, ._f64 = res}; } break; case 'S': { - py_Str res = TokenDeserializer__read_string_from_hex(&deserializer, '\n'); + c11_string* res = TokenDeserializer__read_string_from_hex(&deserializer, '\n'); t.value = (TokenValue){TokenValue_STR, ._str = res}; } break; default: @@ -624,10 +618,10 @@ static Error* from_precompiled(pk_Lexer* self) { return NULL; } -IntParsingResult parse_uint(c11_string text, int64_t* out, int base) { +IntParsingResult parse_uint(c11_stringview text, int64_t* out, int base) { *out = 0; - c11_string prefix = {.data = text.data, .size = PK_MIN(2, text.size)}; + c11_stringview prefix = {.data = text.data, .size = PK_MIN(2, text.size)}; if(base == -1) { if(c11_string__cmp3(prefix, "0b") == 0) base = 2; @@ -657,7 +651,7 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base) { // 2-base 0b101010 if(c11_string__cmp3(prefix, "0b") == 0) { // text.remove_prefix(2); - text = (c11_string){text.data + 2, text.size - 2}; + text = (c11_stringview){text.data + 2, text.size - 2}; } if(text.size == 0) return IntParsing_FAILURE; for(int i = 0; i < text.size; i++) { @@ -675,7 +669,7 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base) { // 8-base 0o123 if(c11_string__cmp3(prefix, "0o") == 0) { // text.remove_prefix(2); - text = (c11_string){text.data + 2, text.size - 2}; + text = (c11_stringview){text.data + 2, text.size - 2}; } if(text.size == 0) return IntParsing_FAILURE; for(int i = 0; i < text.size; i++) { @@ -693,7 +687,7 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base) { // 16-base 0x123 if(c11_string__cmp3(prefix, "0x") == 0) { // text.remove_prefix(2); - text = (c11_string){text.data + 2, text.size - 2}; + text = (c11_stringview){text.data + 2, text.size - 2}; } if(text.size == 0) return IntParsing_FAILURE; for(int i = 0; i < text.size; i++) { @@ -745,7 +739,7 @@ Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){ return NULL; } -Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { +Error* pk_Lexer__process_and_dump(pk_SourceData_ src, c11_string** out) { assert(!src->is_precompiled); pk_TokenArray nexts; // output tokens Error* err = pk_Lexer__process(src, &nexts); @@ -765,7 +759,7 @@ Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { c11__foreach(Token, &nexts, token) { if(is_raw_string_used(token->type)) { - c11_string token_sv = {token->start, token->length}; + c11_stringview token_sv = {token->start, token->length}; if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) { c11_smallmap_s2n__set(&token_indices, token_sv, 0); } @@ -796,7 +790,7 @@ Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { if(is_raw_string_used(token->type)) { uint16_t *p = c11_smallmap_s2n__try_get( - &token_indices, (c11_string){token->start, token->length}); + &token_indices, (c11_stringview){token->start, token->length}); assert(p != NULL); pk_SStream__write_int(&ss, (int)*p); pk_SStream__write_char(&ss, ','); @@ -827,7 +821,7 @@ Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { break; case TokenValue_STR: { pk_SStream__write_char(&ss, 'S'); - c11_string sv = py_Str__sv(&token->value._str); + c11_stringview sv = c11_string__view(token->value._str); for(int i=0; idata; for(int i=0; icount; i++){ if(data[i].value.index == TokenValue_STR){ - py_Str__dtor(&data[i].value._str); + c11_string__delete(data[i].value._str); } } c11_array__dtor(self); @@ -888,19 +882,20 @@ bool TokenDeserializer__match_char(TokenDeserializer* self, char c){ return false; } -c11_string TokenDeserializer__read_string(TokenDeserializer* self, char c){ +c11_stringview TokenDeserializer__read_string(TokenDeserializer* self, char c){ const char* start = self->curr; while(*self->curr != c) self->curr++; - c11_string retval = {start, (int)(self->curr-start)}; + c11_stringview retval = {start, (int)(self->curr-start)}; self->curr++; // skip the delimiter return retval; } -py_Str TokenDeserializer__read_string_from_hex(TokenDeserializer* self, char c){ - c11_string sv = TokenDeserializer__read_string(self, c); +c11_string* TokenDeserializer__read_string_from_hex(TokenDeserializer* self, char c){ + c11_stringview sv = TokenDeserializer__read_string(self, c); const char* s = sv.data; - char* buffer = (char*)malloc(sv.size / 2 + 1); + pk_SStream ss; + pk_SStream__ctor(&ss); for(int i = 0; i < sv.size; i += 2) { char c = 0; if(s[i] >= '0' && s[i] <= '9') @@ -916,15 +911,9 @@ py_Str TokenDeserializer__read_string_from_hex(TokenDeserializer* self, char c){ c += s[i + 1] - 'a' + 10; else assert(false); - buffer[i / 2] = c; + pk_SStream__write_char(&ss, c); } - buffer[sv.size / 2] = 0; - return (py_Str){ - .size = sv.size / 2, - .is_ascii = c11__isascii(buffer, sv.size / 2), - .is_sso = false, - ._ptr = buffer - }; + return pk_SStream__submit(&ss); } int TokenDeserializer__read_count(TokenDeserializer* self){ @@ -944,12 +933,12 @@ int64_t TokenDeserializer__read_uint(TokenDeserializer* self, char c){ } double TokenDeserializer__read_float(TokenDeserializer* self, char c){ - c11_string sv = TokenDeserializer__read_string(self, c); - py_Str nullterm; - py_Str__ctor2(&nullterm, sv.data, sv.size); + c11_stringview sv = TokenDeserializer__read_string(self, c); + // TODO: optimize this + c11_string* nullterm = c11_string__new2(sv.data, sv.size); char* end; - double retval = strtod(py_Str__data(&nullterm), &end); - py_Str__dtor(&nullterm); + double retval = strtod(nullterm, &end); + c11_string__delete(nullterm); assert(*end == 0); return retval; } diff --git a/src/error.c b/src/error.c index 39155d57..e9f336f4 100644 --- a/src/error.c +++ b/src/error.c @@ -1,59 +1,59 @@ -#include "pocketpy/objects/error.h" -#include "pocketpy/common/strname.h" -#include "pocketpy/common/sstream.h" +// #include "pocketpy/objects/error.h" +// #include "pocketpy/common/strname.h" +// #include "pocketpy/common/sstream.h" -void pkpy_Exception__ctor(pkpy_Exception* self, StrName type){ - self->type = type; - self->is_re = true; - self->_ip_on_error = -1; - self->_code_on_error = NULL; - self->self = NULL; +// void pkpy_Exception__ctor(pkpy_Exception* self, StrName type){ +// self->type = type; +// self->is_re = true; +// self->_ip_on_error = -1; +// self->_code_on_error = NULL; +// self->self = NULL; - py_Str__ctor(&self->msg, ""); - c11_vector__ctor(&self->stacktrace, sizeof(pkpy_ExceptionFrame)); -} +// py_Str__ctor(&self->msg, ""); +// c11_vector__ctor(&self->stacktrace, sizeof(pkpy_ExceptionFrame)); +// } -void pkpy_Exception__dtor(pkpy_Exception* self){ - for(int i=0; istacktrace.count; i++){ - pkpy_ExceptionFrame* frame = c11__at(pkpy_ExceptionFrame, &self->stacktrace, i); - PK_DECREF(frame->src); - py_Str__dtor(&frame->name); - } - py_Str__dtor(&self->msg); - c11_vector__dtor(&self->stacktrace); -} +// void pkpy_Exception__dtor(pkpy_Exception* self){ +// for(int i=0; istacktrace.count; i++){ +// pkpy_ExceptionFrame* frame = c11__at(pkpy_ExceptionFrame, &self->stacktrace, i); +// PK_DECREF(frame->src); +// py_Str__dtor(&frame->name); +// } +// py_Str__dtor(&self->msg); +// c11_vector__dtor(&self->stacktrace); +// } -void pkpy_Exception__stpush(pkpy_Exception* self, pk_SourceData_ src, int lineno, const char* cursor, const char* name){ - if(self->stacktrace.count >= 7) return; - PK_INCREF(src); - pkpy_ExceptionFrame* frame = c11_vector__emplace(&self->stacktrace); - frame->src = src; - frame->lineno = lineno; - frame->cursor = cursor; - py_Str__ctor(&frame->name, name); -} +// void pkpy_Exception__stpush(pkpy_Exception* self, pk_SourceData_ src, int lineno, const char* cursor, const char* name){ +// if(self->stacktrace.count >= 7) return; +// PK_INCREF(src); +// pkpy_ExceptionFrame* frame = c11_vector__emplace(&self->stacktrace); +// frame->src = src; +// frame->lineno = lineno; +// frame->cursor = cursor; +// py_Str__ctor(&frame->name, name); +// } -py_Str pkpy_Exception__summary(pkpy_Exception* self){ - pk_SStream ss; - pk_SStream__ctor(&ss); +// py_Str pkpy_Exception__summary(pkpy_Exception* self){ +// pk_SStream ss; +// pk_SStream__ctor(&ss); - if(self->is_re){ - pk_SStream__write_cstr(&ss, "Traceback (most recent call last):\n"); - } - for(int i=self->stacktrace.count-1; i >= 0; i--) { - pkpy_ExceptionFrame* frame = c11__at(pkpy_ExceptionFrame, &self->stacktrace, i); - py_Str s = pk_SourceData__snapshot(frame->src, frame->lineno, frame->cursor, py_Str__data(&frame->name)); - pk_SStream__write_Str(&ss, &s); - py_Str__dtor(&s); - pk_SStream__write_cstr(&ss, "\n"); - } +// if(self->is_re){ +// pk_SStream__write_cstr(&ss, "Traceback (most recent call last):\n"); +// } +// for(int i=self->stacktrace.count-1; i >= 0; i--) { +// pkpy_ExceptionFrame* frame = c11__at(pkpy_ExceptionFrame, &self->stacktrace, i); +// py_Str s = pk_SourceData__snapshot(frame->src, frame->lineno, frame->cursor, py_Str__data(&frame->name)); +// pk_SStream__write_Str(&ss, &s); +// py_Str__dtor(&s); +// pk_SStream__write_cstr(&ss, "\n"); +// } - const char* name = pk_StrName__rmap(self->type); - pk_SStream__write_cstr(&ss, name); +// const char* name = pk_StrName__rmap(self->type); +// pk_SStream__write_cstr(&ss, name); - if(self->msg.size > 0){ - pk_SStream__write_cstr(&ss, ": "); - pk_SStream__write_Str(&ss, &self->msg); - } - return pk_SStream__submit(&ss); -} \ No newline at end of file +// if(self->msg.size > 0){ +// pk_SStream__write_cstr(&ss, ": "); +// pk_SStream__write_Str(&ss, &self->msg); +// } +// return pk_SStream__submit(&ss); +// } \ No newline at end of file diff --git a/src/interpreter/ceval.c b/src/interpreter/ceval.c index 1c7197a4..2e02e33c 100644 --- a/src/interpreter/ceval.c +++ b/src/interpreter/ceval.c @@ -13,8 +13,6 @@ int NameError(py_Name name) { return -1; } static bool stack_binaryop(pk_VM* self, py_Name op, py_Name rop); -// private -void py_newStr_(py_Ref, py_Str); #define DISPATCH() \ do { \ @@ -426,10 +424,11 @@ pk_FrameResult pk_VM__run_top_frame(pk_VM* self) { DISPATCH(); } case OP_BUILD_BYTES: { - py_Str* s = py_touserdata(TOP()); - unsigned char* p = (unsigned char*)malloc(s->size); - memcpy(p, py_Str__data(s), s->size); - py_newbytes(TOP(), p, s->size); + int size; + const char* data = py_tostrn(TOP(), &size); + unsigned char* p = (unsigned char*)malloc(size); + memcpy(p, data, size); + py_newbytes(TOP(), p, size); DISPATCH(); } case OP_BUILD_TUPLE: { @@ -490,11 +489,14 @@ pk_FrameResult pk_VM__run_top_frame(pk_VM* self) { pk_SStream__ctor(&ss); for(int i = 0; i < byte.arg; i++) { if(!py_str(begin + i)) goto __ERROR; - py_Str* item = py_touserdata(&self->last_retval); - pk_SStream__write_Str(&ss, item); + int size; + const char* data = py_tostrn(&self->last_retval, &size); + pk_SStream__write_cstrn(&ss, data, size); } SP() = begin; - py_newStr_(SP()++, pk_SStream__submit(&ss)); + c11_string* res = pk_SStream__submit(&ss); + py_newstrn(SP()++, res, c11_string__len(res)); + c11_string__delete(res); DISPATCH(); } /*****************************/ diff --git a/src/interpreter/py_number.c b/src/interpreter/py_number.c index 6e097b1a..68a33408 100644 --- a/src/interpreter/py_number.c +++ b/src/interpreter/py_number.c @@ -2,56 +2,19 @@ #include -// static int _py_print(const py_Ref args, int argc){ -// int length = py_tuple__len(args+0); -// py_Str* sep; -// py_Str* end; - -// int err; -// err = py_tostr(args+1, &sep); -// if(err) return err; -// err = py_tostr(args+2, &end); -// if(err) return err; - -// pk_SStream ss; -// pk_SStream__ctor(&ss); - -// for(int i=0; i_stdout(py_Str__data(&out)); -// py_Str__dtor(&out); -// return 0; -// } - #define DEF_NUM_BINARY_OP(name, op, rint, rfloat) \ static bool _py_int##name(int argc, py_Ref argv) { \ py_checkargc(2); \ if(py_isint(&argv[1])) { \ int64_t lhs = py_toint(&argv[0]); \ int64_t rhs = py_toint(&argv[1]); \ - rint(py_retval(), lhs op rhs); \ + rint(py_retval(), lhs op rhs); \ } else if(py_isfloat(&argv[1])) { \ int64_t lhs = py_toint(&argv[0]); \ double rhs = py_tofloat(&argv[1]); \ - rfloat(py_retval(), lhs op rhs); \ + rfloat(py_retval(), lhs op rhs); \ } else { \ - py_newnotimplemented(py_retval()); \ + py_newnotimplemented(py_retval()); \ } \ return true; \ } \ @@ -60,9 +23,9 @@ double lhs = py_tofloat(&argv[0]); \ double rhs; \ if(py_castfloat(&argv[1], &rhs)) { \ - rfloat(py_retval(), lhs op rhs); \ + rfloat(py_retval(), lhs op rhs); \ } else { \ - py_newnotimplemented(py_retval()); \ + py_newnotimplemented(py_retval()); \ } \ return true; \ } @@ -204,9 +167,9 @@ static bool _py_int__bit_length(int argc, py_Ref argv) { int64_t lhs = py_toint(&argv[0]); \ if(py_isint(&argv[1])) { \ int64_t rhs = py_toint(&argv[1]); \ - py_newint(py_retval(), lhs op rhs); \ + py_newint(py_retval(), lhs op rhs); \ } else { \ - py_newnotimplemented(py_retval()); \ + py_newnotimplemented(py_retval()); \ } \ return true; \ } @@ -270,11 +233,4 @@ void pk_VM__init_builtins(pk_VM* self) { // int.bit_length py_bindmethod(tp_int, "bit_length", _py_int__bit_length); - - // py_Ref builtins = py_getmodule("builtins"); - // py_newfunction(py_reg(0), _py_print, - // "print(*args, sep=' ', end='\\n')", - // BindType_FUNCTION - // ); - // py_setdict(builtins, py_name("hello"), py_reg(0)); } \ No newline at end of file diff --git a/src/objects/codeobject.c b/src/objects/codeobject.c index 6487d45c..031e1ed3 100644 --- a/src/objects/codeobject.c +++ b/src/objects/codeobject.c @@ -13,7 +13,7 @@ bool Bytecode__is_forward_jump(const Bytecode* self) { return self->op >= OP_JUMP_FORWARD && self->op <= OP_LOOP_BREAK; } -FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name) { +FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_stringview name) { FuncDecl* self = malloc(sizeof(FuncDecl)); self->rc.count = 1; self->rc.dtor = (void (*)(void*))FuncDecl__dtor; @@ -46,10 +46,10 @@ void FuncDecl__add_kwarg(FuncDecl* self, int index, uint16_t key, const py_TValu c11_vector__push(FuncDeclKwArg, &self->kwargs, item); } -void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name) { +void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_stringview name) { self->src = src; PK_INCREF(src); - py_Str__ctor2(&self->name, name.data, name.size); + self->name = c11_string__new2(name.data, name.size); c11_vector__ctor(&self->codes, sizeof(Bytecode)); c11_vector__ctor(&self->codes_ex, sizeof(BytecodeEx)); @@ -73,7 +73,7 @@ void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name) { void CodeObject__dtor(CodeObject* self) { PK_DECREF(self->src); - py_Str__dtor(&self->name); + c11_string__delete(self->name); c11_vector__dtor(&self->codes); c11_vector__dtor(&self->codes_ex); diff --git a/src/public/cast.c b/src/public/cast.c index 6634d7d1..36ba3bb4 100644 --- a/src/public/cast.c +++ b/src/public/cast.c @@ -40,15 +40,15 @@ py_Type py_totype(const py_Ref self){ const char* py_tostr(const py_Ref self){ assert(self->type == tp_str); - py_Str* ud = PyObject__value(self->_obj); - return py_Str__data(ud); + int* p = PyObject__value(self->_obj); + return (const char*)(p+1); } const char* py_tostrn(const py_Ref self, int* size){ assert(self->type == tp_str); - py_Str* ud = PyObject__value(self->_obj); - *size = ud->size; - return py_Str__data(ud); + int* p = PyObject__value(self->_obj); + *size = *p; + return (const char*)(p+1); } const unsigned char* py_tobytes(const py_Ref self, int* size){ diff --git a/src/public/values.c b/src/public/values.c index 327cc6fc..02af220b 100644 --- a/src/public/values.c +++ b/src/public/values.c @@ -41,28 +41,15 @@ void py_newellipsis(py_Ref out) { void py_newnull(py_Ref out) { out->type = 0; } void py_newstr(py_Ref out, const char* data) { - pk_ManagedHeap* heap = &pk_current_vm->heap; - PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_str, 0, sizeof(py_Str)); - py_Str__ctor(PyObject__value(obj), data); - out->type = tp_str; - out->is_ptr = true; - out->_obj = obj; + return py_newstrn(out, data, strlen(data)); } void py_newstrn(py_Ref out, const char* data, int size) { pk_ManagedHeap* heap = &pk_current_vm->heap; - PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_str, 0, sizeof(py_Str)); - py_Str__ctor2((py_Str*)PyObject__value(obj), data, size); - out->type = tp_str; - out->is_ptr = true; - out->_obj = obj; -} - -void py_newStr_(py_Ref out, py_Str input) { - pk_ManagedHeap* heap = &pk_current_vm->heap; - PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_str, 0, sizeof(py_Str)); - py_Str* userdata = PyObject__value(obj); - *userdata = input; + int total_size = sizeof(int) + size + 1; + PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_str, 0, total_size); + int* p = PyObject__value(obj); + *p = size; out->type = tp_str; out->is_ptr = true; out->_obj = obj;