diff --git a/include/pocketpy/common.h b/include/pocketpy/common.h index 29c80f3a..c1057e8f 100644 --- a/include/pocketpy/common.h +++ b/include/pocketpy/common.h @@ -170,4 +170,6 @@ inline constexpr bool is_integral_v = std::is_same_v template inline constexpr bool is_floating_point_v = std::is_same_v || std::is_same_v; +inline const char* PK_HEX_TABLE = "0123456789abcdef"; + } // namespace pkpy diff --git a/include/pocketpy/compiler.h b/include/pocketpy/compiler.h index 4065c4c4..6daa785b 100644 --- a/include/pocketpy/compiler.h +++ b/include/pocketpy/compiler.h @@ -121,8 +121,22 @@ class Compiler { public: Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope=false); - void precompile(); + Str precompile(); + void from_precompiled(const char* source); CodeObject_ compile(); }; +struct TokenDeserializer{ + const char* curr; + const char* source; + + TokenDeserializer(const char* source): curr(source), source(source) {} + char read_char(){ return *curr++; } + + std::string_view read_string(char c); + Str read_string_from_hex(char c); + i64 read_int(char c); + f64 read_float(char c); +}; + } // namespace pkpy \ No newline at end of file diff --git a/include/pocketpy/error.h b/include/pocketpy/error.h index 2061402e..00ebf97b 100644 --- a/include/pocketpy/error.h +++ b/include/pocketpy/error.h @@ -31,6 +31,7 @@ struct SourceData { Str source; pod_vector line_starts; + bool is_precompiled; SourceData(std::string_view source, const Str& filename, CompileMode mode); SourceData(const Str& filename, CompileMode mode); diff --git a/src/compiler.cpp b/src/compiler.cpp index d9b7cd6f..fdbae655 100644 --- a/src/compiler.cpp +++ b/src/compiler.cpp @@ -1225,40 +1225,67 @@ __EAT_DOTS_END: init_pratt_rules(); } - void Compiler::precompile(){ + Str Compiler::precompile(){ auto tokens = lexer.run(); SStream ss; - ss << PK_VERSION << '\n'; // L1: version string - ss << lexer.src->filename << '\n'; // L2: filename - ss << mode() << '\n'; // L3: compile mode - ss << (int)unknown_global_scope << '\n'; // L4: unknown global scope - ss << '=' << (int)tokens.size() << '\n'; // L5: token count - for(auto token: lexer.run()){ - ss << (int)token.type << '\n'; - int offset = token.start - lexer.src->source.c_str(); - ss << offset << '\n'; - ss << token.length << '\n'; - ss << token.line << '\n'; - ss << token.brackets_level << '\n'; + ss << "pkpy:" PK_VERSION << '\n'; // L1: version string + ss << "=" << (int)tokens.size() << '\n'; // L5: token count + for(auto token: tokens){ + ss << (int)token.type << ','; + ss << token.line << ','; + ss << token.brackets_level << ','; // visit token value std::visit([&ss](auto&& arg){ using T = std::decay_t; if constexpr(std::is_same_v){ - ss << 'i' << arg << '\n'; + ss << 'I' << arg; }else if constexpr(std::is_same_v){ - ss << 'f' << arg << '\n'; + ss << 'F' << arg; }else if constexpr(std::is_same_v){ - ss << 's' << arg.escape() << '\n'; + ss << 'S'; + for(char c: arg) ss.write_hex((unsigned char)c); } + ss << '\n'; }, token.value); } - std::cout << ss.str() << std::endl; + return ss.str(); + } + + void Compiler::from_precompiled(const char* source){ + TokenDeserializer deserializer(source); + deserializer.curr += 5; // skip "pkpy:" + std::string_view version = deserializer.read_string('\n'); + if(version != PK_VERSION) SyntaxError(_S("precompiled version mismatch: ", version, "!=" PK_VERSION)); + deserializer.curr += 1; // skip '=' + i64 count = deserializer.read_int('\n'); + const char* null_start = lexer.src->source.c_str(); + for(int i=0; iis_precompiled){ + from_precompiled(lexer.src->source.c_str()); + }else{ + this->tokens = lexer.run(); + } + CodeObject_ code = push_global_context(); advance(); // skip @sof, so prev() is always valid @@ -1296,4 +1323,42 @@ __EAT_DOTS_END: e.st_push(src, lineno, cursor, ""); throw e; } + + std::string_view TokenDeserializer::read_string(char c){ + const char* start = curr; + while(*curr != c) curr++; + std::string_view retval(start, curr-start); + curr++; // skip the delimiter + return retval; + } + + Str TokenDeserializer::read_string_from_hex(char c){ + std::string_view s = read_string(c); + char* buffer = (char*)pool64_alloc(s.size()/2 + 1); + for(int i=0; i='0' && s[i]<='9') c += s[i]-'0'; + else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10; + else PK_FATAL_ERROR(); + c <<= 4; + if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0'; + else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10; + else PK_FATAL_ERROR(); + buffer[i/2] = c; + } + return std::pair(buffer, s.size()/2); + } + + i64 TokenDeserializer::read_int(char c){ + std::string_view sv = read_string(c); + i64 out; + IntParsingResult res = parse_int(sv, &out, 10); + PK_ASSERT(res == IntParsingResult::Success); + return out; + } + + f64 TokenDeserializer::read_float(char c){ + std::string_view sv = read_string(c); + return std::stod(std::string(sv)); + } } // namespace pkpy \ No newline at end of file diff --git a/src/error.cpp b/src/error.cpp index f3333fa0..3b7820c4 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -13,6 +13,11 @@ namespace pkpy{ index++; } this->source = ss.str(); + if(this->source.sv().substr(5) == "pkpy:"){ + this->is_precompiled = true; + }else{ + this->is_precompiled = false; + } line_starts.push_back(this->source.c_str()); } @@ -35,7 +40,7 @@ namespace pkpy{ SStream ss; ss << " " << "File \"" << filename << "\", line " << lineno; if(!name.empty()) ss << ", in " << name; - if(!source.empty()){ + if(!source.empty() && !is_precompiled){ ss << '\n'; std::pair pair = _get_line(lineno); Str line = ""; diff --git a/src/str.cpp b/src/str.cpp index b0308ffb..195d1d68 100644 --- a/src/str.cpp +++ b/src/str.cpp @@ -255,8 +255,8 @@ int utf8len(unsigned char c, bool suppress){ default: if ('\x00' <= c && c <= '\x1f') { ss << "\\x"; // << std::hex << std::setw(2) << std::setfill('0') << (int)c; - ss << "0123456789abcdef"[c >> 4]; - ss << "0123456789abcdef"[c & 0xf]; + ss << PK_HEX_TABLE[c >> 4]; + ss << PK_HEX_TABLE[c & 0xf]; } else { ss << c; } @@ -495,11 +495,11 @@ int utf8len(unsigned char c, bool suppress){ unsigned char high = c >> 4; unsigned char low = c & 0xf; if(non_zero){ - if(high) (*this) << "0123456789abcdef"[high]; - if(high || low) (*this) << "0123456789abcdef"[low]; + if(high) (*this) << PK_HEX_TABLE[high]; + if(high || low) (*this) << PK_HEX_TABLE[low]; }else{ - (*this) << "0123456789abcdef"[high]; - (*this) << "0123456789abcdef"[low]; + (*this) << PK_HEX_TABLE[high]; + (*this) << PK_HEX_TABLE[low]; } }