add precompile

This commit is contained in:
blueloveTH 2024-04-13 22:55:49 +08:00
parent 9855f4e681
commit 0d41d40456
6 changed files with 113 additions and 26 deletions

View File

@ -170,4 +170,6 @@ inline constexpr bool is_integral_v = std::is_same_v<T, char>
template<typename T>
inline constexpr bool is_floating_point_v = std::is_same_v<T, float> || std::is_same_v<T, double>;
inline const char* PK_HEX_TABLE = "0123456789abcdef";
} // namespace pkpy

View File

@ -121,8 +121,22 @@ class Compiler {
public:
Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope=false);
void precompile();
Str precompile();
void from_precompiled(const char* source);
CodeObject_ compile();
};
struct TokenDeserializer{
const char* curr;
const char* source;
TokenDeserializer(const char* source): curr(source), source(source) {}
char read_char(){ return *curr++; }
std::string_view read_string(char c);
Str read_string_from_hex(char c);
i64 read_int(char c);
f64 read_float(char c);
};
} // namespace pkpy

View File

@ -31,6 +31,7 @@ struct SourceData {
Str source;
pod_vector<const char*> line_starts;
bool is_precompiled;
SourceData(std::string_view source, const Str& filename, CompileMode mode);
SourceData(const Str& filename, CompileMode mode);

View File

@ -1225,40 +1225,67 @@ __EAT_DOTS_END:
init_pratt_rules();
}
void Compiler::precompile(){
Str Compiler::precompile(){
auto tokens = lexer.run();
SStream ss;
ss << PK_VERSION << '\n'; // L1: version string
ss << lexer.src->filename << '\n'; // L2: filename
ss << mode() << '\n'; // L3: compile mode
ss << (int)unknown_global_scope << '\n'; // L4: unknown global scope
ss << '=' << (int)tokens.size() << '\n'; // L5: token count
for(auto token: lexer.run()){
ss << (int)token.type << '\n';
int offset = token.start - lexer.src->source.c_str();
ss << offset << '\n';
ss << token.length << '\n';
ss << token.line << '\n';
ss << token.brackets_level << '\n';
ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
ss << "=" << (int)tokens.size() << '\n'; // L5: token count
for(auto token: tokens){
ss << (int)token.type << ',';
ss << token.line << ',';
ss << token.brackets_level << ',';
// visit token value
std::visit([&ss](auto&& arg){
using T = std::decay_t<decltype(arg)>;
if constexpr(std::is_same_v<T, i64>){
ss << 'i' << arg << '\n';
ss << 'I' << arg;
}else if constexpr(std::is_same_v<T, f64>){
ss << 'f' << arg << '\n';
ss << 'F' << arg;
}else if constexpr(std::is_same_v<T, Str>){
ss << 's' << arg.escape() << '\n';
ss << 'S';
for(char c: arg) ss.write_hex((unsigned char)c);
}
ss << '\n';
}, token.value);
}
std::cout << ss.str() << std::endl;
return ss.str();
}
void Compiler::from_precompiled(const char* source){
TokenDeserializer deserializer(source);
deserializer.curr += 5; // skip "pkpy:"
std::string_view version = deserializer.read_string('\n');
if(version != PK_VERSION) SyntaxError(_S("precompiled version mismatch: ", version, "!=" PK_VERSION));
deserializer.curr += 1; // skip '='
i64 count = deserializer.read_int('\n');
const char* null_start = lexer.src->source.c_str();
for(int i=0; i<count; i++){
Token t;
t.type = (unsigned char)deserializer.read_int(',');
t.start = null_start;
t.length = 0;
t.line = (int)deserializer.read_int(',');
t.brackets_level = (int)deserializer.read_int(',');
char type = deserializer.read_char();
switch(type){
case 'I': t.value = deserializer.read_int('\n'); break;
case 'F': t.value = deserializer.read_float('\n'); break;
case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
default: t.value = {}; break;
}
tokens.push_back(t);
}
}
CodeObject_ Compiler::compile(){
PK_ASSERT(i == 0) // make sure it is the first time to compile
tokens = lexer.run();
if(lexer.src->is_precompiled){
from_precompiled(lexer.src->source.c_str());
}else{
this->tokens = lexer.run();
}
CodeObject_ code = push_global_context();
advance(); // skip @sof, so prev() is always valid
@ -1296,4 +1323,42 @@ __EAT_DOTS_END:
e.st_push(src, lineno, cursor, "");
throw e;
}
std::string_view TokenDeserializer::read_string(char c){
const char* start = curr;
while(*curr != c) curr++;
std::string_view retval(start, curr-start);
curr++; // skip the delimiter
return retval;
}
Str TokenDeserializer::read_string_from_hex(char c){
std::string_view s = read_string(c);
char* buffer = (char*)pool64_alloc(s.size()/2 + 1);
for(int i=0; i<s.size(); i+=2){
char c = 0;
if(s[i]>='0' && s[i]<='9') c += s[i]-'0';
else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10;
else PK_FATAL_ERROR();
c <<= 4;
if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0';
else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10;
else PK_FATAL_ERROR();
buffer[i/2] = c;
}
return std::pair<char*, int>(buffer, s.size()/2);
}
i64 TokenDeserializer::read_int(char c){
std::string_view sv = read_string(c);
i64 out;
IntParsingResult res = parse_int(sv, &out, 10);
PK_ASSERT(res == IntParsingResult::Success);
return out;
}
f64 TokenDeserializer::read_float(char c){
std::string_view sv = read_string(c);
return std::stod(std::string(sv));
}
} // namespace pkpy

View File

@ -13,6 +13,11 @@ namespace pkpy{
index++;
}
this->source = ss.str();
if(this->source.sv().substr(5) == "pkpy:"){
this->is_precompiled = true;
}else{
this->is_precompiled = false;
}
line_starts.push_back(this->source.c_str());
}
@ -35,7 +40,7 @@ namespace pkpy{
SStream ss;
ss << " " << "File \"" << filename << "\", line " << lineno;
if(!name.empty()) ss << ", in " << name;
if(!source.empty()){
if(!source.empty() && !is_precompiled){
ss << '\n';
std::pair<const char*,const char*> pair = _get_line(lineno);
Str line = "<?>";

View File

@ -255,8 +255,8 @@ int utf8len(unsigned char c, bool suppress){
default:
if ('\x00' <= c && c <= '\x1f') {
ss << "\\x"; // << std::hex << std::setw(2) << std::setfill('0') << (int)c;
ss << "0123456789abcdef"[c >> 4];
ss << "0123456789abcdef"[c & 0xf];
ss << PK_HEX_TABLE[c >> 4];
ss << PK_HEX_TABLE[c & 0xf];
} else {
ss << c;
}
@ -495,11 +495,11 @@ int utf8len(unsigned char c, bool suppress){
unsigned char high = c >> 4;
unsigned char low = c & 0xf;
if(non_zero){
if(high) (*this) << "0123456789abcdef"[high];
if(high || low) (*this) << "0123456789abcdef"[low];
if(high) (*this) << PK_HEX_TABLE[high];
if(high || low) (*this) << PK_HEX_TABLE[low];
}else{
(*this) << "0123456789abcdef"[high];
(*this) << "0123456789abcdef"[low];
(*this) << PK_HEX_TABLE[high];
(*this) << PK_HEX_TABLE[low];
}
}