This commit is contained in:
blueloveTH 2024-06-15 21:41:59 +08:00
parent 225f634f33
commit c4bb16e390
5 changed files with 151 additions and 100 deletions

View File

@ -0,0 +1,25 @@
#pragma once
#include <stdbool.h>
#include "pocketpy/common/str.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct pkpy_TokenDeserializer {
const char* curr;
const char* source;
} pkpy_TokenDeserializer;
void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source);
bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c);
c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c);
pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c);
int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self);
int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c);
double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c);
#ifdef __cplusplus
}
#endif

View File

@ -146,27 +146,4 @@ enum class IntParsingResult {
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept; IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
struct TokenDeserializer {
const char* curr;
const char* source;
TokenDeserializer(const char* source) noexcept: curr(source), source(source){}
char read_char() noexcept{ return *curr++; }
bool match_char(char c) noexcept{
if(*curr == c) {
curr++;
return true;
}
return false;
}
std::string_view read_string(char c) noexcept;
Str read_string_from_hex(char c) noexcept;
int read_count() noexcept;
i64 read_uint(char c) noexcept;
f64 read_float(char c) noexcept;
};
} // namespace pkpy } // namespace pkpy

View File

@ -33,6 +33,10 @@ void pkpy_SourceData__dtor(struct pkpy_SourceData* self) {
pkpy_Str__dtor(&self->filename); pkpy_Str__dtor(&self->filename);
pkpy_Str__dtor(&self->source); pkpy_Str__dtor(&self->source);
c11_vector__dtor(&self->line_starts); c11_vector__dtor(&self->line_starts);
for(int i=0; i<self->_precompiled_tokens.count; i++){
pkpy_Str__dtor(c11__at(pkpy_Str, &self->_precompiled_tokens, i));
}
c11_vector__dtor(&self->_precompiled_tokens); c11_vector__dtor(&self->_precompiled_tokens);
} }

83
src/compiler/lexer.c Normal file
View File

@ -0,0 +1,83 @@
#include "pocketpy/common/config.h"
#include "pocketpy/common/str.h"
#include "pocketpy/common/smallmap.h"
#include "pocketpy/compiler/lexer.h"
void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){
self->curr = source;
self->source = source;
}
bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c){
if(*self->curr == c) {
self->curr++;
return true;
}
return false;
}
c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c){
const char* start = self->curr;
while(*self->curr != c)
self->curr++;
c11_string retval = {start, self->curr - start};
self->curr++; // skip the delimiter
return retval;
}
pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c){
c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
const char* s = sv.data;
char* buffer = (char*)malloc(sv.size / 2 + 1);
for(int i = 0; i < sv.size; i += 2) {
char c = 0;
if(s[i] >= '0' && s[i] <= '9')
c += s[i] - '0';
else if(s[i] >= 'a' && s[i] <= 'f')
c += s[i] - 'a' + 10;
else
assert(false);
c <<= 4;
if(s[i + 1] >= '0' && s[i + 1] <= '9')
c += s[i + 1] - '0';
else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
c += s[i + 1] - 'a' + 10;
else
assert(false);
buffer[i / 2] = c;
}
buffer[sv.size / 2] = 0;
return (pkpy_Str){
.size = sv.size / 2,
.is_ascii = c11__isascii(buffer, sv.size / 2),
.is_sso = false,
._ptr = buffer
};
}
int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self){
assert(*self->curr == '=');
self->curr++;
return pkpy_TokenDeserializer__read_uint(self, '\n');
}
int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c){
int64_t out = 0;
while(*self->curr != c) {
out = out * 10 + (*self->curr - '0');
self->curr++;
}
self->curr++; // skip the delimiter
return out;
}
double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c){
c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
pkpy_Str nullterm;
pkpy_Str__ctor2(&nullterm, sv.data, sv.size);
char* end;
double retval = strtod(pkpy_Str__data(&nullterm), &end);
pkpy_Str__dtor(&nullterm);
assert(*end == 0);
return retval;
}

View File

@ -2,6 +2,7 @@
#include "pocketpy/common/config.h" #include "pocketpy/common/config.h"
#include "pocketpy/common/str.h" #include "pocketpy/common/str.h"
#include "pocketpy/common/smallmap.h" #include "pocketpy/common/smallmap.h"
#include "pocketpy/compiler/lexer.h"
#include <cstdarg> #include <cstdarg>
@ -542,54 +543,69 @@ Error* Lexer::run() noexcept{
} }
Error* Lexer::from_precompiled() noexcept{ Error* Lexer::from_precompiled() noexcept{
TokenDeserializer deserializer(pkpy_Str__data(&src->source)); pkpy_TokenDeserializer deserializer;
deserializer.curr += 5; // skip "pkpy:" pkpy_TokenDeserializer__ctor(&deserializer, pkpy_Str__data(&src->source));
std::string_view version = deserializer.read_string('\n');
if(version != PK_VERSION){ deserializer.curr += 5; // skip "pkpy:"
c11_string version = pkpy_TokenDeserializer__read_string(&deserializer, '\n');
if(c11_string__cmp3(version, PK_VERSION) != 0) {
return SyntaxError("precompiled version mismatch"); return SyntaxError("precompiled version mismatch");
} }
if(deserializer.read_uint('\n') != (i64)src->mode){ if(pkpy_TokenDeserializer__read_uint(&deserializer, '\n') != (i64)src->mode){
return SyntaxError("precompiled mode mismatch"); return SyntaxError("precompiled mode mismatch");
} }
int count = deserializer.read_count(); int count = pkpy_TokenDeserializer__read_count(&deserializer);
auto precompiled_tokens = &src->_precompiled_tokens; c11_vector* precompiled_tokens = &src->_precompiled_tokens;
for(int i = 0; i < count; i++) { for(int i = 0; i < count; i++) {
c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n'))); c11_string item = pkpy_TokenDeserializer__read_string(&deserializer, '\n');
pkpy_Str copied_item;
pkpy_Str__ctor2(&copied_item, item.data, item.size);
c11_vector__push(pkpy_Str, precompiled_tokens, copied_item);
} }
count = deserializer.read_count(); count = pkpy_TokenDeserializer__read_count(&deserializer);
for(int i = 0; i < count; i++) { for(int i = 0; i < count; i++) {
Token t; Token t;
t.type = (unsigned char)deserializer.read_uint(','); t.type = (unsigned char)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
if(is_raw_string_used(t.type)) { if(is_raw_string_used(t.type)) {
i64 index = deserializer.read_uint(','); i64 index = pkpy_TokenDeserializer__read_uint(&deserializer, ',');
t.start = c11__getitem(Str, precompiled_tokens, index).c_str(); pkpy_Str* p = c11__at(pkpy_Str, precompiled_tokens, index);
t.length = c11__getitem(Str, precompiled_tokens, index).size; t.start = pkpy_Str__data(p);
t.length = c11__getitem(pkpy_Str, precompiled_tokens, index).size;
} else { } else {
t.start = nullptr; t.start = NULL;
t.length = 0; t.length = 0;
} }
if(deserializer.match_char(',')) { if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) {
t.line = nexts.back().line; t.line = nexts.back().line;
} else { } else {
t.line = (int)deserializer.read_uint(','); t.line = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
} }
if(deserializer.match_char(',')) { if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) {
t.brackets_level = nexts.back().brackets_level; t.brackets_level = nexts.back().brackets_level;
} else { } else {
t.brackets_level = (int)deserializer.read_uint(','); t.brackets_level = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
} }
char type = deserializer.read_char(); char type = (*deserializer.curr++); // read_char
switch(type) { switch(type) {
case 'I': t.value = deserializer.read_uint('\n'); break; case 'I':
case 'F': t.value = deserializer.read_float('\n'); break; t.value = pkpy_TokenDeserializer__read_uint(&deserializer, '\n');
case 'S': t.value = deserializer.read_string_from_hex('\n'); break; break;
default: t.value = {}; break; case 'F':
t.value = pkpy_TokenDeserializer__read_float(&deserializer, '\n');
break;
case 'S': {
pkpy_Str res = pkpy_TokenDeserializer__read_string_from_hex(&deserializer, '\n');
t.value = Str(std::move(res));
} break;
default:
t.value = {};
break;
} }
nexts.push_back(t); nexts.push_back(t);
} }
@ -665,60 +681,6 @@ Error* Lexer::precompile(Str* out) noexcept{
return NULL; return NULL;
} }
std::string_view TokenDeserializer::read_string(char c) noexcept{
const char* start = curr;
while(*curr != c)
curr++;
std::string_view retval(start, curr - start);
curr++; // skip the delimiter
return retval;
}
Str TokenDeserializer::read_string_from_hex(char c) noexcept{
std::string_view s = read_string(c);
char* buffer = (char*)std::malloc(s.size() / 2 + 1);
for(int i = 0; i < s.size(); i += 2) {
char c = 0;
if(s[i] >= '0' && s[i] <= '9')
c += s[i] - '0';
else if(s[i] >= 'a' && s[i] <= 'f')
c += s[i] - 'a' + 10;
else
assert(false);
c <<= 4;
if(s[i + 1] >= '0' && s[i + 1] <= '9')
c += s[i + 1] - '0';
else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
c += s[i + 1] - 'a' + 10;
else
assert(false);
buffer[i / 2] = c;
}
buffer[s.size() / 2] = 0;
return pair<char*, int>(buffer, s.size() / 2);
}
int TokenDeserializer::read_count() noexcept{
assert(*curr == '=');
curr++;
return read_uint('\n');
}
i64 TokenDeserializer::read_uint(char c) noexcept{
i64 out = 0;
while(*curr != c) {
out = out * 10 + (*curr - '0');
curr++;
}
curr++; // skip the delimiter
return out;
}
f64 TokenDeserializer::read_float(char c) noexcept{
std::string_view sv = read_string(c);
return std::stod(std::string(sv));
}
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{ IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{
*out = 0; *out = 0;