diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h index 730155a1..e357c125 100644 --- a/include/pocketpy/compiler/lexer.h +++ b/include/pocketpy/compiler/lexer.h @@ -87,7 +87,6 @@ enum Precedence { typedef c11_array TokenArray; Error* Lexer__process(SourceData_ src, TokenArray* out_tokens); -Error* Lexer__process_and_dump(SourceData_ src, c11_string** out_string); void TokenArray__dtor(TokenArray* self); #define Token__sv(self) (c11_sv){(self)->start, (self)->length} diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h index a31a3bdc..925b4e64 100644 --- a/include/pocketpy/objects/sourcedata.h +++ b/include/pocketpy/objects/sourcedata.h @@ -9,14 +9,12 @@ struct SourceData { RefCounted rc; enum py_CompileMode mode; - bool is_precompiled; bool is_dynamic; // for exec() and eval() c11_string* filename; c11_string* source; c11_vector /*T=const char* */ line_starts; - c11_vector /*T=c11_string* */ _precompiled_tokens; }; typedef struct SourceData* SourceData_; diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c index 2f032dad..4ec0e921 100644 --- a/src/common/sourcedata.c +++ b/src/common/sourcedata.c @@ -5,14 +5,13 @@ #include static void SourceData__ctor(struct SourceData* self, - const char* source, - const char* filename, - enum py_CompileMode mode, - bool is_dynamic) { + const char* source, + const char* filename, + enum py_CompileMode mode, + bool is_dynamic) { self->filename = c11_string__new(filename); self->mode = mode; c11_vector__ctor(&self->line_starts, sizeof(const char*)); - c11_vector__ctor(&self->_precompiled_tokens, sizeof(c11_string*)); // Skip utf8 BOM if there is any. if(strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3; @@ -26,7 +25,6 @@ static void SourceData__ctor(struct SourceData* self, source++; } self->source = c11_sbuf__submit(&ss); - self->is_precompiled = (strncmp(source, "pkpy:", 5) == 0); self->is_dynamic = is_dynamic; c11_vector__push(const char*, &self->line_starts, self->source->data); } @@ -34,19 +32,13 @@ static void SourceData__ctor(struct SourceData* self, static void SourceData__dtor(struct SourceData* self) { c11_string__delete(self->filename); c11_string__delete(self->source); - c11_vector__dtor(&self->line_starts); - - for(int i = 0; i < self->_precompiled_tokens.count; i++) { - c11_string__delete(c11__getitem(c11_string*, &self->_precompiled_tokens, i)); - } - c11_vector__dtor(&self->_precompiled_tokens); } SourceData_ SourceData__rcnew(const char* source, - const char* filename, - enum py_CompileMode mode, - bool is_dynamic) { + const char* filename, + enum py_CompileMode mode, + bool is_dynamic) { SourceData_ self = malloc(sizeof(struct SourceData)); SourceData__ctor(self, source, filename, mode, is_dynamic); self->rc.count = 1; @@ -55,10 +47,10 @@ SourceData_ SourceData__rcnew(const char* source, } bool SourceData__get_line(const struct SourceData* self, - int lineno, - const char** st, - const char** ed) { - if(self->is_precompiled || lineno == -1) { return false; } + int lineno, + const char** st, + const char** ed) { + if(lineno < 0) return false; lineno -= 1; if(lineno < 0) lineno = 0; const char* _start = c11__getitem(const char*, &self->line_starts, lineno); @@ -72,10 +64,10 @@ bool SourceData__get_line(const struct SourceData* self, } void SourceData__snapshot(const struct SourceData* self, - c11_sbuf* ss, - int lineno, - const char* cursor, - const char* name) { + c11_sbuf* ss, + int lineno, + const char* cursor, + const char* name) { pk_sprintf(ss, " File \"%s\", line %d", self->filename->data, lineno); if(name && *name) { @@ -83,26 +75,24 @@ void SourceData__snapshot(const struct SourceData* self, c11_sbuf__write_cstr(ss, name); } - if(!self->is_precompiled) { - c11_sbuf__write_char(ss, '\n'); - const char *st = NULL, *ed; - if(SourceData__get_line(self, lineno, &st, &ed)) { - while(st < ed && isblank(*st)) - ++st; - if(st < ed) { - c11_sbuf__write_cstr(ss, " "); - c11_sbuf__write_cstrn(ss, st, ed - st); - if(cursor && st <= cursor && cursor <= ed) { - c11_sbuf__write_cstr(ss, "\n "); - for(int i = 0; i < (cursor - st); ++i) - c11_sbuf__write_char(ss, ' '); - c11_sbuf__write_cstr(ss, "^"); - } - } else { - st = NULL; + c11_sbuf__write_char(ss, '\n'); + const char *st = NULL, *ed; + if(SourceData__get_line(self, lineno, &st, &ed)) { + while(st < ed && isblank(*st)) + ++st; + if(st < ed) { + c11_sbuf__write_cstr(ss, " "); + c11_sbuf__write_cstrn(ss, st, ed - st); + if(cursor && st <= cursor && cursor <= ed) { + c11_sbuf__write_cstr(ss, "\n "); + for(int i = 0; i < (cursor - st); ++i) + c11_sbuf__write_char(ss, ' '); + c11_sbuf__write_cstr(ss, "^"); } + } else { + st = NULL; } - - if(!st) { c11_sbuf__write_cstr(ss, " "); } } + + if(!st) { c11_sbuf__write_cstr(ss, " "); } } diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index aabaabba..437a9e7d 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -267,7 +267,11 @@ static Error* eat_name(Lexer* self) { return NULL; } -static Error* eat_string_until(Lexer* self, char quote, bool raw, c11_string** out) { +enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES }; + +static Error* eat_string(Lexer* self, char quote, enum StringType type) { + bool raw = type == RAW_STRING; + // previous char is quote bool quote3 = match_n_chars(self, 2, quote); c11_sbuf buff; @@ -313,17 +317,9 @@ static Error* eat_string_until(Lexer* self, char quote, bool raw, c11_string** o c11_sbuf__write_char(&buff, c); } } - *out = c11_sbuf__submit(&buff); - return NULL; -} -enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES }; - -static Error* eat_string(Lexer* self, char quote, enum StringType type) { - c11_string* s; - Error* err = eat_string_until(self, quote, type == RAW_STRING, &s); - if(err) return err; - TokenValue value = {TokenValue_STR, ._str = s}; + c11_string* res = c11_sbuf__submit(&buff); + TokenValue value = {TokenValue_STR, ._str = res}; if(type == F_STRING) { add_token_with_value(self, TK_FSTR, value); } else if(type == NORMAL_BYTES) { @@ -468,8 +464,7 @@ static Error* lex_one_token(Lexer* self, bool* eof) { if(matchchar(self, '=')) { add_token(self, TK_NE); } else { - Error* err = SyntaxError(self, "expected '=' after '!'"); - if(err) return err; + return SyntaxError(self, "expected '=' after '!'"); } break; case '*': @@ -523,85 +518,10 @@ static Error* lex_one_token(Lexer* self, bool* eof) { return NULL; } -static Error* from_precompiled(Lexer* self) { - TokenDeserializer deserializer; - TokenDeserializer__ctor(&deserializer, self->src->source->data); - - deserializer.curr += 5; // skip "pkpy:" - c11_sv version = TokenDeserializer__read_string(&deserializer, '\n'); - - if(c11_sv__cmp2(version, PK_VERSION) != 0) { - return SyntaxError(self, "precompiled version mismatch"); - } - if(TokenDeserializer__read_uint(&deserializer, '\n') != (int64_t)self->src->mode) { - return SyntaxError(self, "precompiled mode mismatch"); - } - - int count = TokenDeserializer__read_count(&deserializer); - c11_vector* precompiled_tokens = &self->src->_precompiled_tokens; - for(int i = 0; i < count; i++) { - c11_sv item = TokenDeserializer__read_string(&deserializer, '\n'); - c11_string* copied_item = c11_string__new2(item.data, item.size); - c11_vector__push(c11_string*, precompiled_tokens, copied_item); - } - - count = TokenDeserializer__read_count(&deserializer); - for(int i = 0; i < count; i++) { - Token t; - t.type = (TokenIndex)TokenDeserializer__read_uint(&deserializer, ','); - if(is_raw_string_used(t.type)) { - int64_t index = TokenDeserializer__read_uint(&deserializer, ','); - c11_string* p = c11__getitem(c11_string*, precompiled_tokens, index); - t.start = p->data; - t.length = p->size; - } else { - t.start = NULL; - t.length = 0; - } - - if(TokenDeserializer__match_char(&deserializer, ',')) { - t.line = c11_vector__back(Token, &self->nexts).line; - } else { - t.line = (int)TokenDeserializer__read_uint(&deserializer, ','); - } - - if(TokenDeserializer__match_char(&deserializer, ',')) { - t.brackets_level = c11_vector__back(Token, &self->nexts).brackets_level; - } else { - t.brackets_level = (int)TokenDeserializer__read_uint(&deserializer, ','); - } - - char type = (*deserializer.curr++); // read_char - switch(type) { - case 'I': { - int64_t res = TokenDeserializer__read_uint(&deserializer, '\n'); - t.value = (TokenValue){TokenValue_I64, ._i64 = res}; - } break; - case 'F': { - double res = TokenDeserializer__read_float(&deserializer, '\n'); - t.value = (TokenValue){TokenValue_F64, ._f64 = res}; - } break; - case 'S': { - c11_string* res = TokenDeserializer__read_string_from_hex(&deserializer, '\n'); - t.value = (TokenValue){TokenValue_STR, ._str = res}; - } break; - default: t.value = EmptyTokenValue; break; - } - c11_vector__push(Token, &self->nexts, t); - } - return NULL; -} - Error* Lexer__process(SourceData_ src, TokenArray* out_tokens) { Lexer lexer; Lexer__ctor(&lexer, src); - if(src->is_precompiled) { - Error* err = from_precompiled(&lexer); - // TODO: set out tokens - Lexer__dtor(&lexer); - return err; - } // push initial tokens Token sof = {TK_SOF, lexer.token_start, 0, lexer.current_line, lexer.brackets_level, EmptyTokenValue}; @@ -623,102 +543,6 @@ Error* Lexer__process(SourceData_ src, TokenArray* out_tokens) { return NULL; } -Error* Lexer__process_and_dump(SourceData_ src, c11_string** out) { - assert(!src->is_precompiled); - TokenArray nexts; // output tokens - Error* err = Lexer__process(src, &nexts); - if(err) return err; - - c11_sbuf ss; - c11_sbuf__ctor(&ss); - - // L1: version string - c11_sbuf__write_cstr(&ss, "pkpy:" PK_VERSION "\n"); - // L2: mode - c11_sbuf__write_int(&ss, (int)src->mode); - c11_sbuf__write_char(&ss, '\n'); - - c11_smallmap_s2n token_indices; - c11_smallmap_s2n__ctor(&token_indices); - - c11__foreach(Token, &nexts, token) { - if(is_raw_string_used(token->type)) { - c11_sv token_sv = {token->start, token->length}; - if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) { - c11_smallmap_s2n__set(&token_indices, token_sv, 0); - } - } - } - // L3: raw string count - c11_sbuf__write_char(&ss, '='); - c11_sbuf__write_int(&ss, token_indices.count); - c11_sbuf__write_char(&ss, '\n'); - - uint16_t index = 0; - for(int i = 0; i < token_indices.count; i++) { - c11_smallmap_s2n_KV* kv = c11__at(c11_smallmap_s2n_KV, &token_indices, i); - // L4: raw strings - c11_sbuf__write_cstrn(&ss, kv->key.data, kv->key.size); - kv->value = index++; - } - - // L5: token count - c11_sbuf__write_char(&ss, '='); - c11_sbuf__write_int(&ss, nexts.count); - c11_sbuf__write_char(&ss, '\n'); - - for(int i = 0; i < nexts.count; i++) { - const Token* token = c11__at(Token, &nexts, i); - c11_sbuf__write_int(&ss, (int)token->type); - c11_sbuf__write_char(&ss, ','); - - if(is_raw_string_used(token->type)) { - uint16_t* p = - c11_smallmap_s2n__try_get(&token_indices, (c11_sv){token->start, token->length}); - assert(p != NULL); - c11_sbuf__write_int(&ss, (int)*p); - c11_sbuf__write_char(&ss, ','); - } - if(i > 0 && c11__getitem(Token, &nexts, i - 1).line == token->line) { - c11_sbuf__write_char(&ss, ','); - } else { - c11_sbuf__write_int(&ss, token->line); - c11_sbuf__write_char(&ss, ','); - } - - if(i > 0 && c11__getitem(Token, &nexts, i - 1).brackets_level == token->brackets_level) { - c11_sbuf__write_char(&ss, ','); - } else { - c11_sbuf__write_int(&ss, token->brackets_level); - c11_sbuf__write_char(&ss, ','); - } - // visit token value - switch(token->value.index) { - case TokenValue_EMPTY: break; - case TokenValue_I64: - c11_sbuf__write_char(&ss, 'I'); - c11_sbuf__write_int(&ss, token->value._i64); - break; - case TokenValue_F64: - c11_sbuf__write_char(&ss, 'F'); - c11_sbuf__write_f64(&ss, token->value._f64, -1); - break; - case TokenValue_STR: { - c11_sbuf__write_char(&ss, 'S'); - c11_sv sv = c11_string__sv(token->value._str); - for(int i = 0; i < sv.size; i++) { - c11_sbuf__write_hex(&ss, sv.data[i], false); - } - break; - } - } - c11_sbuf__write_char(&ss, '\n'); - } - *out = c11_sbuf__submit(&ss); - c11_smallmap_s2n__dtor(&token_indices); - return NULL; -} - void TokenArray__dtor(TokenArray* self) { Token* data = self->data; for(int i = 0; i < self->count; i++) {