mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-21 03:50:16 +00:00
some fix
This commit is contained in:
parent
225f634f33
commit
c4bb16e390
25
include/pocketpy/compiler/lexer.h
Normal file
25
include/pocketpy/compiler/lexer.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "pocketpy/common/str.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct pkpy_TokenDeserializer {
|
||||
const char* curr;
|
||||
const char* source;
|
||||
} pkpy_TokenDeserializer;
|
||||
|
||||
void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source);
|
||||
bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c);
|
||||
c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c);
|
||||
pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c);
|
||||
int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self);
|
||||
int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c);
|
||||
double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
@ -146,27 +146,4 @@ enum class IntParsingResult {
|
||||
|
||||
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
|
||||
|
||||
struct TokenDeserializer {
|
||||
const char* curr;
|
||||
const char* source;
|
||||
|
||||
TokenDeserializer(const char* source) noexcept: curr(source), source(source){}
|
||||
|
||||
char read_char() noexcept{ return *curr++; }
|
||||
|
||||
bool match_char(char c) noexcept{
|
||||
if(*curr == c) {
|
||||
curr++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string_view read_string(char c) noexcept;
|
||||
Str read_string_from_hex(char c) noexcept;
|
||||
int read_count() noexcept;
|
||||
i64 read_uint(char c) noexcept;
|
||||
f64 read_float(char c) noexcept;
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
||||
|
@ -33,6 +33,10 @@ void pkpy_SourceData__dtor(struct pkpy_SourceData* self) {
|
||||
pkpy_Str__dtor(&self->filename);
|
||||
pkpy_Str__dtor(&self->source);
|
||||
c11_vector__dtor(&self->line_starts);
|
||||
|
||||
for(int i=0; i<self->_precompiled_tokens.count; i++){
|
||||
pkpy_Str__dtor(c11__at(pkpy_Str, &self->_precompiled_tokens, i));
|
||||
}
|
||||
c11_vector__dtor(&self->_precompiled_tokens);
|
||||
}
|
||||
|
||||
|
83
src/compiler/lexer.c
Normal file
83
src/compiler/lexer.c
Normal file
@ -0,0 +1,83 @@
|
||||
#include "pocketpy/common/config.h"
|
||||
#include "pocketpy/common/str.h"
|
||||
#include "pocketpy/common/smallmap.h"
|
||||
#include "pocketpy/compiler/lexer.h"
|
||||
|
||||
void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){
|
||||
self->curr = source;
|
||||
self->source = source;
|
||||
}
|
||||
|
||||
bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c){
|
||||
if(*self->curr == c) {
|
||||
self->curr++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c){
|
||||
const char* start = self->curr;
|
||||
while(*self->curr != c)
|
||||
self->curr++;
|
||||
c11_string retval = {start, self->curr - start};
|
||||
self->curr++; // skip the delimiter
|
||||
return retval;
|
||||
}
|
||||
|
||||
pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c){
|
||||
c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
|
||||
const char* s = sv.data;
|
||||
char* buffer = (char*)malloc(sv.size / 2 + 1);
|
||||
for(int i = 0; i < sv.size; i += 2) {
|
||||
char c = 0;
|
||||
if(s[i] >= '0' && s[i] <= '9')
|
||||
c += s[i] - '0';
|
||||
else if(s[i] >= 'a' && s[i] <= 'f')
|
||||
c += s[i] - 'a' + 10;
|
||||
else
|
||||
assert(false);
|
||||
c <<= 4;
|
||||
if(s[i + 1] >= '0' && s[i + 1] <= '9')
|
||||
c += s[i + 1] - '0';
|
||||
else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
|
||||
c += s[i + 1] - 'a' + 10;
|
||||
else
|
||||
assert(false);
|
||||
buffer[i / 2] = c;
|
||||
}
|
||||
buffer[sv.size / 2] = 0;
|
||||
return (pkpy_Str){
|
||||
.size = sv.size / 2,
|
||||
.is_ascii = c11__isascii(buffer, sv.size / 2),
|
||||
.is_sso = false,
|
||||
._ptr = buffer
|
||||
};
|
||||
}
|
||||
|
||||
int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self){
|
||||
assert(*self->curr == '=');
|
||||
self->curr++;
|
||||
return pkpy_TokenDeserializer__read_uint(self, '\n');
|
||||
}
|
||||
|
||||
int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c){
|
||||
int64_t out = 0;
|
||||
while(*self->curr != c) {
|
||||
out = out * 10 + (*self->curr - '0');
|
||||
self->curr++;
|
||||
}
|
||||
self->curr++; // skip the delimiter
|
||||
return out;
|
||||
}
|
||||
|
||||
double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c){
|
||||
c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
|
||||
pkpy_Str nullterm;
|
||||
pkpy_Str__ctor2(&nullterm, sv.data, sv.size);
|
||||
char* end;
|
||||
double retval = strtod(pkpy_Str__data(&nullterm), &end);
|
||||
pkpy_Str__dtor(&nullterm);
|
||||
assert(*end == 0);
|
||||
return retval;
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
#include "pocketpy/common/config.h"
|
||||
#include "pocketpy/common/str.h"
|
||||
#include "pocketpy/common/smallmap.h"
|
||||
#include "pocketpy/compiler/lexer.h"
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
@ -542,54 +543,69 @@ Error* Lexer::run() noexcept{
|
||||
}
|
||||
|
||||
Error* Lexer::from_precompiled() noexcept{
|
||||
TokenDeserializer deserializer(pkpy_Str__data(&src->source));
|
||||
deserializer.curr += 5; // skip "pkpy:"
|
||||
std::string_view version = deserializer.read_string('\n');
|
||||
pkpy_TokenDeserializer deserializer;
|
||||
pkpy_TokenDeserializer__ctor(&deserializer, pkpy_Str__data(&src->source));
|
||||
|
||||
if(version != PK_VERSION){
|
||||
deserializer.curr += 5; // skip "pkpy:"
|
||||
c11_string version = pkpy_TokenDeserializer__read_string(&deserializer, '\n');
|
||||
|
||||
if(c11_string__cmp3(version, PK_VERSION) != 0) {
|
||||
return SyntaxError("precompiled version mismatch");
|
||||
}
|
||||
if(deserializer.read_uint('\n') != (i64)src->mode){
|
||||
if(pkpy_TokenDeserializer__read_uint(&deserializer, '\n') != (i64)src->mode){
|
||||
return SyntaxError("precompiled mode mismatch");
|
||||
}
|
||||
|
||||
int count = deserializer.read_count();
|
||||
auto precompiled_tokens = &src->_precompiled_tokens;
|
||||
int count = pkpy_TokenDeserializer__read_count(&deserializer);
|
||||
c11_vector* precompiled_tokens = &src->_precompiled_tokens;
|
||||
for(int i = 0; i < count; i++) {
|
||||
c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n')));
|
||||
c11_string item = pkpy_TokenDeserializer__read_string(&deserializer, '\n');
|
||||
pkpy_Str copied_item;
|
||||
pkpy_Str__ctor2(&copied_item, item.data, item.size);
|
||||
c11_vector__push(pkpy_Str, precompiled_tokens, copied_item);
|
||||
}
|
||||
|
||||
count = deserializer.read_count();
|
||||
count = pkpy_TokenDeserializer__read_count(&deserializer);
|
||||
for(int i = 0; i < count; i++) {
|
||||
Token t;
|
||||
t.type = (unsigned char)deserializer.read_uint(',');
|
||||
t.type = (unsigned char)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
|
||||
if(is_raw_string_used(t.type)) {
|
||||
i64 index = deserializer.read_uint(',');
|
||||
t.start = c11__getitem(Str, precompiled_tokens, index).c_str();
|
||||
t.length = c11__getitem(Str, precompiled_tokens, index).size;
|
||||
i64 index = pkpy_TokenDeserializer__read_uint(&deserializer, ',');
|
||||
pkpy_Str* p = c11__at(pkpy_Str, precompiled_tokens, index);
|
||||
t.start = pkpy_Str__data(p);
|
||||
t.length = c11__getitem(pkpy_Str, precompiled_tokens, index).size;
|
||||
} else {
|
||||
t.start = nullptr;
|
||||
t.start = NULL;
|
||||
t.length = 0;
|
||||
}
|
||||
|
||||
if(deserializer.match_char(',')) {
|
||||
if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) {
|
||||
t.line = nexts.back().line;
|
||||
} else {
|
||||
t.line = (int)deserializer.read_uint(',');
|
||||
t.line = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
|
||||
}
|
||||
|
||||
if(deserializer.match_char(',')) {
|
||||
if(pkpy_TokenDeserializer__match_char(&deserializer, ',')) {
|
||||
t.brackets_level = nexts.back().brackets_level;
|
||||
} else {
|
||||
t.brackets_level = (int)deserializer.read_uint(',');
|
||||
t.brackets_level = (int)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
|
||||
}
|
||||
|
||||
char type = deserializer.read_char();
|
||||
char type = (*deserializer.curr++); // read_char
|
||||
switch(type) {
|
||||
case 'I': t.value = deserializer.read_uint('\n'); break;
|
||||
case 'F': t.value = deserializer.read_float('\n'); break;
|
||||
case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
|
||||
default: t.value = {}; break;
|
||||
case 'I':
|
||||
t.value = pkpy_TokenDeserializer__read_uint(&deserializer, '\n');
|
||||
break;
|
||||
case 'F':
|
||||
t.value = pkpy_TokenDeserializer__read_float(&deserializer, '\n');
|
||||
break;
|
||||
case 'S': {
|
||||
pkpy_Str res = pkpy_TokenDeserializer__read_string_from_hex(&deserializer, '\n');
|
||||
t.value = Str(std::move(res));
|
||||
} break;
|
||||
default:
|
||||
t.value = {};
|
||||
break;
|
||||
}
|
||||
nexts.push_back(t);
|
||||
}
|
||||
@ -665,60 +681,6 @@ Error* Lexer::precompile(Str* out) noexcept{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::string_view TokenDeserializer::read_string(char c) noexcept{
|
||||
const char* start = curr;
|
||||
while(*curr != c)
|
||||
curr++;
|
||||
std::string_view retval(start, curr - start);
|
||||
curr++; // skip the delimiter
|
||||
return retval;
|
||||
}
|
||||
|
||||
Str TokenDeserializer::read_string_from_hex(char c) noexcept{
|
||||
std::string_view s = read_string(c);
|
||||
char* buffer = (char*)std::malloc(s.size() / 2 + 1);
|
||||
for(int i = 0; i < s.size(); i += 2) {
|
||||
char c = 0;
|
||||
if(s[i] >= '0' && s[i] <= '9')
|
||||
c += s[i] - '0';
|
||||
else if(s[i] >= 'a' && s[i] <= 'f')
|
||||
c += s[i] - 'a' + 10;
|
||||
else
|
||||
assert(false);
|
||||
c <<= 4;
|
||||
if(s[i + 1] >= '0' && s[i + 1] <= '9')
|
||||
c += s[i + 1] - '0';
|
||||
else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
|
||||
c += s[i + 1] - 'a' + 10;
|
||||
else
|
||||
assert(false);
|
||||
buffer[i / 2] = c;
|
||||
}
|
||||
buffer[s.size() / 2] = 0;
|
||||
return pair<char*, int>(buffer, s.size() / 2);
|
||||
}
|
||||
|
||||
int TokenDeserializer::read_count() noexcept{
|
||||
assert(*curr == '=');
|
||||
curr++;
|
||||
return read_uint('\n');
|
||||
}
|
||||
|
||||
i64 TokenDeserializer::read_uint(char c) noexcept{
|
||||
i64 out = 0;
|
||||
while(*curr != c) {
|
||||
out = out * 10 + (*curr - '0');
|
||||
curr++;
|
||||
}
|
||||
curr++; // skip the delimiter
|
||||
return out;
|
||||
}
|
||||
|
||||
f64 TokenDeserializer::read_float(char c) noexcept{
|
||||
std::string_view sv = read_string(c);
|
||||
return std::stod(std::string(sv));
|
||||
}
|
||||
|
||||
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{
|
||||
*out = 0;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user