From c4bb16e3906315d59a75d787f56fde89cf11457e Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Sat, 15 Jun 2024 21:41:59 +0800 Subject: [PATCH] some fix --- include/pocketpy/compiler/lexer.h | 25 ++++++ include/pocketpy/compiler/lexer.hpp | 23 ------ src/common/sourcedata.c | 4 + src/compiler/lexer.c | 83 ++++++++++++++++++++ src/compiler/lexer.cpp | 116 ++++++++++------------------ 5 files changed, 151 insertions(+), 100 deletions(-) create mode 100644 include/pocketpy/compiler/lexer.h create mode 100644 src/compiler/lexer.c diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h new file mode 100644 index 00000000..abe8885f --- /dev/null +++ b/include/pocketpy/compiler/lexer.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include "pocketpy/common/str.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct pkpy_TokenDeserializer { + const char* curr; + const char* source; +} pkpy_TokenDeserializer; + +void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source); +bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c); +c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c); +pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c); +int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self); +int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c); +double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c); + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/include/pocketpy/compiler/lexer.hpp b/include/pocketpy/compiler/lexer.hpp index b8e9ba4a..96ff80f5 100644 --- a/include/pocketpy/compiler/lexer.hpp +++ b/include/pocketpy/compiler/lexer.hpp @@ -146,27 +146,4 @@ enum class IntParsingResult { IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept; -struct TokenDeserializer { - const char* curr; - const char* source; - - TokenDeserializer(const char* source) noexcept: curr(source), source(source){} - - char read_char() noexcept{ return *curr++; } - - bool match_char(char c) noexcept{ - if(*curr == c) { - curr++; - return true; - } - return false; - } - - std::string_view read_string(char c) noexcept; - Str read_string_from_hex(char c) noexcept; - int read_count() noexcept; - i64 read_uint(char c) noexcept; - f64 read_float(char c) noexcept; -}; - } // namespace pkpy diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c index b37a1810..66ba607d 100644 --- a/src/common/sourcedata.c +++ b/src/common/sourcedata.c @@ -33,6 +33,10 @@ void pkpy_SourceData__dtor(struct pkpy_SourceData* self) { pkpy_Str__dtor(&self->filename); pkpy_Str__dtor(&self->source); c11_vector__dtor(&self->line_starts); + + for(int i=0; i_precompiled_tokens.count; i++){ + pkpy_Str__dtor(c11__at(pkpy_Str, &self->_precompiled_tokens, i)); + } c11_vector__dtor(&self->_precompiled_tokens); } diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c new file mode 100644 index 00000000..cf7d2784 --- /dev/null +++ b/src/compiler/lexer.c @@ -0,0 +1,83 @@ +#include "pocketpy/common/config.h" +#include "pocketpy/common/str.h" +#include "pocketpy/common/smallmap.h" +#include "pocketpy/compiler/lexer.h" + +void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){ + self->curr = source; + self->source = source; +} + +bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c){ + if(*self->curr == c) { + self->curr++; + return true; + } + return false; +} + +c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c){ + const char* start = self->curr; + while(*self->curr != c) + self->curr++; + c11_string retval = {start, self->curr - start}; + self->curr++; // skip the delimiter + return retval; +} + +pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c){ + c11_string sv = pkpy_TokenDeserializer__read_string(self, c); + const char* s = sv.data; + char* buffer = (char*)malloc(sv.size / 2 + 1); + for(int i = 0; i < sv.size; i += 2) { + char c = 0; + if(s[i] >= '0' && s[i] <= '9') + c += s[i] - '0'; + else if(s[i] >= 'a' && s[i] <= 'f') + c += s[i] - 'a' + 10; + else + assert(false); + c <<= 4; + if(s[i + 1] >= '0' && s[i + 1] <= '9') + c += s[i + 1] - '0'; + else if(s[i + 1] >= 'a' && s[i + 1] <= 'f') + c += s[i + 1] - 'a' + 10; + else + assert(false); + buffer[i / 2] = c; + } + buffer[sv.size / 2] = 0; + return (pkpy_Str){ + .size = sv.size / 2, + .is_ascii = c11__isascii(buffer, sv.size / 2), + .is_sso = false, + ._ptr = buffer + }; +} + +int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self){ + assert(*self->curr == '='); + self->curr++; + return pkpy_TokenDeserializer__read_uint(self, '\n'); +} + +int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c){ + int64_t out = 0; + while(*self->curr != c) { + out = out * 10 + (*self->curr - '0'); + self->curr++; + } + self->curr++; // skip the delimiter + return out; +} + +double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c){ + c11_string sv = pkpy_TokenDeserializer__read_string(self, c); + pkpy_Str nullterm; + pkpy_Str__ctor2(&nullterm, sv.data, sv.size); + char* end; + double retval = strtod(pkpy_Str__data(&nullterm), &end); + pkpy_Str__dtor(&nullterm); + assert(*end == 0); + return retval; +} diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index e5573512..1ac3c5dc 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -2,6 +2,7 @@ #include "pocketpy/common/config.h" #include "pocketpy/common/str.h" #include "pocketpy/common/smallmap.h" +#include "pocketpy/compiler/lexer.h" #include @@ -542,54 +543,69 @@ Error* Lexer::run() noexcept{ } Error* Lexer::from_precompiled() noexcept{ - TokenDeserializer deserializer(pkpy_Str__data(&src->source)); - deserializer.curr += 5; // skip "pkpy:" - std::string_view version = deserializer.read_string('\n'); + pkpy_TokenDeserializer deserializer; + pkpy_TokenDeserializer__ctor(&deserializer, pkpy_Str__data(&src->source)); - if(version != PK_VERSION){ + deserializer.curr += 5; // skip "pkpy:" + c11_string version = pkpy_TokenDeserializer__read_string(&deserializer, '\n'); + + if(c11_string__cmp3(version, PK_VERSION) != 0) { return SyntaxError("precompiled version mismatch"); } - if(deserializer.read_uint('\n') != (i64)src->mode){ + if(pkpy_TokenDeserializer__read_uint(&deserializer, '\n') != (i64)src->mode){ return SyntaxError("precompiled mode mismatch"); } - int count = deserializer.read_count(); - auto precompiled_tokens = &src->_precompiled_tokens; + int count = pkpy_TokenDeserializer__read_count(&deserializer); + c11_vector* precompiled_tokens = &src->_precompiled_tokens; for(int i = 0; i < count; i++) { - c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n'))); + c11_string item = pkpy_TokenDeserializer__read_string(&deserializer, '\n'); + pkpy_Str copied_item; + pkpy_Str__ctor2(&copied_item, item.data, item.size); + c11_vector__push(pkpy_Str, precompiled_tokens, copied_item); } - count = deserializer.read_count(); + count = pkpy_TokenDeserializer__read_count(&deserializer); for(int i = 0; i < count; i++) { Token t; - t.type = (unsigned char)deserializer.read_uint(','); + t.type = (unsigned char)pkpy_TokenDeserializer__read_uint(&deserializer, ','); if(is_raw_string_used(t.type)) { - i64 index = deserializer.read_uint(','); - t.start = c11__getitem(Str, precompiled_tokens, index).c_str(); - t.length = c11__getitem(Str, precompiled_tokens, index).size; + i64 index = pkpy_TokenDeserializer__read_uint(&deserializer, ','); + pkpy_Str* p = c11__at(pkpy_Str, precompiled_tokens, index); + t.start = pkpy_Str__data(p); + t.length = c11__getitem(pkpy_Str, precompiled_tokens, index).size; } else { - t.start = nullptr; + t.start = NULL; t.length = 0; } - if(deserializer.match_char(',')) { + if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) { t.line = nexts.back().line; } else { - t.line = (int)deserializer.read_uint(','); + t.line = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ','); } - if(deserializer.match_char(',')) { + if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) { t.brackets_level = nexts.back().brackets_level; } else { - t.brackets_level = (int)deserializer.read_uint(','); + t.brackets_level = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ','); } - char type = deserializer.read_char(); + char type = (*deserializer.curr++); // read_char switch(type) { - case 'I': t.value = deserializer.read_uint('\n'); break; - case 'F': t.value = deserializer.read_float('\n'); break; - case 'S': t.value = deserializer.read_string_from_hex('\n'); break; - default: t.value = {}; break; + case 'I': + t.value = pkpy_TokenDeserializer__read_uint(&deserializer, '\n'); + break; + case 'F': + t.value = pkpy_TokenDeserializer__read_float(&deserializer, '\n'); + break; + case 'S': { + pkpy_Str res = pkpy_TokenDeserializer__read_string_from_hex(&deserializer, '\n'); + t.value = Str(std::move(res)); + } break; + default: + t.value = {}; + break; } nexts.push_back(t); } @@ -665,60 +681,6 @@ Error* Lexer::precompile(Str* out) noexcept{ return NULL; } -std::string_view TokenDeserializer::read_string(char c) noexcept{ - const char* start = curr; - while(*curr != c) - curr++; - std::string_view retval(start, curr - start); - curr++; // skip the delimiter - return retval; -} - -Str TokenDeserializer::read_string_from_hex(char c) noexcept{ - std::string_view s = read_string(c); - char* buffer = (char*)std::malloc(s.size() / 2 + 1); - for(int i = 0; i < s.size(); i += 2) { - char c = 0; - if(s[i] >= '0' && s[i] <= '9') - c += s[i] - '0'; - else if(s[i] >= 'a' && s[i] <= 'f') - c += s[i] - 'a' + 10; - else - assert(false); - c <<= 4; - if(s[i + 1] >= '0' && s[i + 1] <= '9') - c += s[i + 1] - '0'; - else if(s[i + 1] >= 'a' && s[i + 1] <= 'f') - c += s[i + 1] - 'a' + 10; - else - assert(false); - buffer[i / 2] = c; - } - buffer[s.size() / 2] = 0; - return pair(buffer, s.size() / 2); -} - -int TokenDeserializer::read_count() noexcept{ - assert(*curr == '='); - curr++; - return read_uint('\n'); -} - -i64 TokenDeserializer::read_uint(char c) noexcept{ - i64 out = 0; - while(*curr != c) { - out = out * 10 + (*curr - '0'); - curr++; - } - curr++; // skip the delimiter - return out; -} - -f64 TokenDeserializer::read_float(char c) noexcept{ - std::string_view sv = read_string(c); - return std::stod(std::string(sv)); -} - IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{ *out = 0;