mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 11:30:18 +00:00
use error code
This commit is contained in:
parent
783547a481
commit
81c4853f04
@ -15,6 +15,6 @@ struct GIL {
|
||||
#define PK_GLOBAL_SCOPE_LOCK() GIL _lock;
|
||||
|
||||
#else
|
||||
#define PK_THREAD_LOCAL
|
||||
#define PK_THREAD_LOCAL static
|
||||
#define PK_GLOBAL_SCOPE_LOCK()
|
||||
#endif
|
||||
|
@ -25,7 +25,7 @@ struct Str {
|
||||
Str(std::string_view s);
|
||||
Str(const char* s);
|
||||
Str(const char* s, int len);
|
||||
Str(std::pair<char*, int>);
|
||||
Str(pair<char*, int>); // take ownership
|
||||
Str(const Str& other);
|
||||
Str(Str&& other);
|
||||
|
||||
|
@ -15,6 +15,13 @@ struct explicit_copy_t {
|
||||
explicit explicit_copy_t() = default;
|
||||
};
|
||||
|
||||
template <typename K, typename V>
|
||||
struct pair {
|
||||
K first;
|
||||
V second;
|
||||
pair(K first, V second) : first(first), second(second) {}
|
||||
};
|
||||
|
||||
// Dummy types
|
||||
struct DummyInstance {};
|
||||
|
||||
|
@ -13,6 +13,8 @@ namespace pkpy {
|
||||
|
||||
template <typename T>
|
||||
struct array {
|
||||
static_assert(is_pod_v<T>);
|
||||
|
||||
T* _data;
|
||||
int _size;
|
||||
|
||||
@ -39,10 +41,7 @@ struct array {
|
||||
array(T* data, int size) : _data(data), _size(size) {}
|
||||
|
||||
array& operator= (array&& other) noexcept {
|
||||
if(_data) {
|
||||
std::destroy(begin(), end());
|
||||
std::free(_data);
|
||||
}
|
||||
if(_data) std::free(_data);
|
||||
_data = other._data;
|
||||
_size = other._size;
|
||||
other._data = nullptr;
|
||||
@ -70,18 +69,15 @@ struct array {
|
||||
|
||||
T* data() const { return _data; }
|
||||
|
||||
std::pair<T*, int> detach() noexcept {
|
||||
std::pair<T*, int> retval(_data, _size);
|
||||
pair<T*, int> detach() noexcept {
|
||||
pair<T*, int> retval(_data, _size);
|
||||
_data = nullptr;
|
||||
_size = 0;
|
||||
return retval;
|
||||
}
|
||||
|
||||
~array() {
|
||||
if(_data) {
|
||||
std::destroy(begin(), end());
|
||||
std::free(_data);
|
||||
}
|
||||
if(_data) std::free(_data);
|
||||
}
|
||||
};
|
||||
|
||||
@ -260,8 +256,8 @@ struct vector {
|
||||
return retval;
|
||||
}
|
||||
|
||||
std::pair<T*, int> detach() noexcept {
|
||||
std::pair<T*, int> retval(_data, _size);
|
||||
pair<T*, int> detach() noexcept {
|
||||
pair<T*, int> retval(_data, _size);
|
||||
_data = nullptr;
|
||||
_capacity = 0;
|
||||
_size = 0;
|
||||
|
@ -1,11 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include "pocketpy/compiler/expr.hpp"
|
||||
#include "pocketpy/objects/error.hpp"
|
||||
|
||||
namespace pkpy {
|
||||
|
||||
class Compiler;
|
||||
typedef void (Compiler::*PrattCallback)();
|
||||
struct Compiler;
|
||||
typedef Error* (Compiler::*PrattCallback)() noexcept;
|
||||
|
||||
struct PrattRule {
|
||||
PrattCallback prefix;
|
||||
@ -13,7 +14,7 @@ struct PrattRule {
|
||||
Precedence precedence;
|
||||
};
|
||||
|
||||
class Compiler {
|
||||
struct Compiler {
|
||||
PK_ALWAYS_PASS_BY_POINTER(Compiler)
|
||||
|
||||
static PrattRule rules[kTokenCount];
|
||||
@ -24,47 +25,41 @@ class Compiler {
|
||||
bool unknown_global_scope; // for eval/exec() call
|
||||
// for parsing token stream
|
||||
int i = 0;
|
||||
vector<Token> tokens;
|
||||
|
||||
const Token& prev() const { return tokens[i - 1]; }
|
||||
const Token& tk(int i) const noexcept{ return lexer.nexts[i]; }
|
||||
const Token& prev() const noexcept{ return tk(i - 1); }
|
||||
const Token& curr() const noexcept{ return tk(i); }
|
||||
const Token& next() const noexcept{ return tk(i + 1); }
|
||||
|
||||
const Token& curr() const { return tokens[i]; }
|
||||
|
||||
const Token& next() const { return tokens[i + 1]; }
|
||||
|
||||
const Token& err() const {
|
||||
if(i >= tokens.size()) return prev();
|
||||
const Token& err() const noexcept{
|
||||
if(i >= lexer.nexts.size()) return prev();
|
||||
return curr();
|
||||
}
|
||||
|
||||
void advance(int delta = 1) { i += delta; }
|
||||
void advance(int delta = 1) noexcept{ i += delta; }
|
||||
|
||||
CodeEmitContext* ctx() { return &contexts.back(); }
|
||||
CodeEmitContext* ctx() noexcept{ return &contexts.back(); }
|
||||
vector<Expr*>& s_expr() noexcept{ return ctx()->s_expr; }
|
||||
|
||||
CompileMode mode() const { return lexer.src->mode; }
|
||||
CompileMode mode() const noexcept{ return lexer.src->mode; }
|
||||
|
||||
NameScope name_scope() const;
|
||||
CodeObject_ push_global_context();
|
||||
FuncDecl_ push_f_context(Str name);
|
||||
void pop_context();
|
||||
NameScope name_scope() const noexcept;
|
||||
CodeObject_ push_global_context() noexcept;
|
||||
FuncDecl_ push_f_context(Str name) noexcept;
|
||||
|
||||
static void init_pratt_rules();
|
||||
|
||||
bool match(TokenIndex expected);
|
||||
void consume(TokenIndex expected);
|
||||
bool match_newlines_repl();
|
||||
|
||||
bool match_newlines(bool repl_throw = false);
|
||||
bool match_end_stmt();
|
||||
void consume_end_stmt();
|
||||
static void init_pratt_rules() noexcept;
|
||||
|
||||
bool match(TokenIndex expected) noexcept;
|
||||
bool match_newlines_repl() noexcept{ return match_newlines(mode() == REPL_MODE); }
|
||||
bool match_newlines(bool repl_throw = false) noexcept;
|
||||
bool match_end_stmt() noexcept;
|
||||
/*************************************************/
|
||||
void EXPR();
|
||||
void EXPR_TUPLE(bool allow_slice = false);
|
||||
Expr* EXPR_VARS(); // special case for `for loop` and `comp`
|
||||
[[nodiscard]] Error* EXPR() noexcept{ return parse_expression(PREC_LOWEST + 1); }
|
||||
[[nodiscard]] Error* EXPR_TUPLE(bool allow_slice = false) noexcept;
|
||||
[[nodiscard]] Error* EXPR_VARS() noexcept; // special case for `for loop` and `comp`
|
||||
|
||||
template <typename T, typename... Args>
|
||||
T* make_expr(Args&&... args) {
|
||||
T* make_expr(Args&&... args) noexcept{
|
||||
static_assert(sizeof(T) <= kPoolExprBlockSize);
|
||||
static_assert(std::is_base_of_v<Expr, T>);
|
||||
void* p = PoolExpr_alloc();
|
||||
@ -73,87 +68,63 @@ class Compiler {
|
||||
return expr;
|
||||
}
|
||||
|
||||
void consume_comp(CompExpr* ce, Expr* expr);
|
||||
[[nodiscard]] Error* consume_comp(Opcode op0, Opcode op1) noexcept;
|
||||
[[nodiscard]] Error* pop_context() noexcept;
|
||||
|
||||
void exprLiteral();
|
||||
void exprLong();
|
||||
void exprImag();
|
||||
void exprBytes();
|
||||
void exprFString();
|
||||
void exprLambda();
|
||||
void exprOr();
|
||||
void exprAnd();
|
||||
void exprTernary();
|
||||
void exprBinaryOp();
|
||||
void exprNot();
|
||||
void exprUnaryOp();
|
||||
void exprGroup();
|
||||
void exprList();
|
||||
void exprMap();
|
||||
void exprCall();
|
||||
void exprName();
|
||||
void exprAttrib();
|
||||
void exprSlice0();
|
||||
void exprSlice1();
|
||||
void exprSubscr();
|
||||
void exprLiteral0();
|
||||
Error* exprLiteral() noexcept;
|
||||
Error* exprLong() noexcept;
|
||||
Error* exprImag() noexcept;
|
||||
Error* exprBytes() noexcept;
|
||||
Error* exprFString() noexcept;
|
||||
Error* exprLambda() noexcept;
|
||||
Error* exprOr() noexcept;
|
||||
Error* exprAnd() noexcept;
|
||||
Error* exprTernary() noexcept;
|
||||
Error* exprBinaryOp() noexcept;
|
||||
Error* exprNot() noexcept;
|
||||
Error* exprUnaryOp() noexcept;
|
||||
Error* exprGroup() noexcept;
|
||||
Error* exprList() noexcept;
|
||||
Error* exprMap() noexcept;
|
||||
Error* exprCall() noexcept;
|
||||
Error* exprName() noexcept;
|
||||
Error* exprAttrib() noexcept;
|
||||
Error* exprSlice0() noexcept;
|
||||
Error* exprSlice1() noexcept;
|
||||
Error* exprSubscr() noexcept;
|
||||
Error* exprLiteral0() noexcept;
|
||||
|
||||
void compile_block_body(void (Compiler::*callback)() = nullptr);
|
||||
void compile_normal_import();
|
||||
void compile_from_import();
|
||||
bool is_expression(bool allow_slice = false);
|
||||
void parse_expression(int precedence, bool allow_slice = false);
|
||||
void compile_if_stmt();
|
||||
void compile_while_loop();
|
||||
void compile_for_loop();
|
||||
void compile_try_except();
|
||||
void compile_decorated();
|
||||
bool is_expression(bool allow_slice = false) noexcept;
|
||||
|
||||
bool try_compile_assignment();
|
||||
void compile_stmt();
|
||||
void consume_type_hints();
|
||||
void _add_decorators(const Expr_vector& decorators);
|
||||
void compile_class(const Expr_vector& decorators = {});
|
||||
void _compile_f_args(FuncDecl_ decl, bool enable_type_hints);
|
||||
void compile_function(const Expr_vector& decorators = {});
|
||||
[[nodiscard]] Error* compile_block_body(PrattCallback callback = NULL) noexcept;
|
||||
[[nodiscard]] Error* compile_normal_import() noexcept;
|
||||
[[nodiscard]] Error* compile_from_import() noexcept;
|
||||
[[nodiscard]] Error* parse_expression(int precedence, bool allow_slice = false) noexcept;
|
||||
[[nodiscard]] Error* compile_if_stmt() noexcept;
|
||||
[[nodiscard]] Error* compile_while_loop() noexcept;
|
||||
[[nodiscard]] Error* compile_for_loop() noexcept;
|
||||
[[nodiscard]] Error* compile_try_except() noexcept;
|
||||
[[nodiscard]] Error* compile_decorated() noexcept;
|
||||
|
||||
PyVar to_object(const TokenValue& value);
|
||||
PyVar read_literal();
|
||||
[[nodiscard]] Error* try_compile_assignment(bool* is_assign) noexcept;
|
||||
[[nodiscard]] Error* compile_stmt() noexcept;
|
||||
[[nodiscard]] Error* consume_type_hints() noexcept;
|
||||
[[nodiscard]] Error* _compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcept;
|
||||
[[nodiscard]] Error* compile_function(int decorators = 0) noexcept;
|
||||
[[nodiscard]] Error* compile_class(int decorators = 0) noexcept;
|
||||
|
||||
void SyntaxError(Str msg) { lexer.throw_err("SyntaxError", msg, err().line, err().start); }
|
||||
PyVar to_object(const TokenValue& value) noexcept;
|
||||
|
||||
void SyntaxError() { lexer.throw_err("SyntaxError", "invalid syntax", err().line, err().start); }
|
||||
[[nodiscard]] Error* read_literal(PyVar* out) noexcept;
|
||||
|
||||
void IndentationError(Str msg) { lexer.throw_err("IndentationError", msg, err().line, err().start); }
|
||||
[[nodiscard]] Error* SyntaxError(const char* msg = "invalid syntax", ...) noexcept;
|
||||
[[nodiscard]] Error* IndentationError(const char* msg) noexcept{ return lexer._error(false, "IndentationError", msg, {}); }
|
||||
[[nodiscard]] Error* NeedMoreLines() noexcept{ return lexer._error(false, "NeedMoreLines", "", {}, (i64)ctx()->is_compiling_class); }
|
||||
|
||||
public:
|
||||
Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope = false);
|
||||
Str precompile();
|
||||
void from_precompiled(const char* source);
|
||||
CodeObject_ compile();
|
||||
};
|
||||
|
||||
struct TokenDeserializer {
|
||||
const char* curr;
|
||||
const char* source;
|
||||
|
||||
TokenDeserializer(const char* source) : curr(source), source(source) {}
|
||||
|
||||
char read_char() { return *curr++; }
|
||||
|
||||
bool match_char(char c) {
|
||||
if(*curr == c) {
|
||||
curr++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string_view read_string(char c);
|
||||
Str read_string_from_hex(char c);
|
||||
int read_count();
|
||||
i64 read_uint(char c);
|
||||
f64 read_float(char c);
|
||||
Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope = false) noexcept;
|
||||
[[nodiscard]] Error* compile(CodeObject_* out) noexcept;
|
||||
~Compiler();
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
||||
|
@ -56,7 +56,7 @@ inline void delete_expr(Expr* p){
|
||||
PoolExpr_dealloc(p);
|
||||
}
|
||||
|
||||
struct CodeEmitContext {
|
||||
struct CodeEmitContext{
|
||||
VM* vm;
|
||||
FuncDecl_ func; // optional
|
||||
CodeObject_ co; // 1 CodeEmitContext <=> 1 CodeObject_
|
||||
@ -72,21 +72,22 @@ struct CodeEmitContext {
|
||||
small_map<PyVar, int> _co_consts_nonstring_dedup_map;
|
||||
small_map<std::string_view, int> _co_consts_string_dedup_map;
|
||||
|
||||
int get_loop() const;
|
||||
CodeBlock* enter_block(CodeBlockType type);
|
||||
void exit_block();
|
||||
void emit_expr(bool emit = true); // clear the expression stack and generate bytecode
|
||||
int emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual = false);
|
||||
void revert_last_emit_();
|
||||
int emit_int(i64 value, int line);
|
||||
void patch_jump(int index);
|
||||
bool add_label(StrName name);
|
||||
int add_varname(StrName name);
|
||||
int add_const(PyVar);
|
||||
int add_const_string(std::string_view);
|
||||
int add_func_decl(FuncDecl_ decl);
|
||||
void emit_store_name(NameScope scope, StrName name, int line);
|
||||
void try_merge_for_iter_store(int);
|
||||
int get_loop() const noexcept;
|
||||
CodeBlock* enter_block(CodeBlockType type) noexcept;
|
||||
void exit_block() noexcept;
|
||||
void emit_expr(bool emit = true) noexcept; // clear the expression stack and generate bytecode
|
||||
void emit_decorators(int count) noexcept;
|
||||
int emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual = false) noexcept;
|
||||
void revert_last_emit_() noexcept;
|
||||
int emit_int(i64 value, int line) noexcept;
|
||||
void patch_jump(int index) noexcept;
|
||||
bool add_label(StrName name) noexcept;
|
||||
int add_varname(StrName name) noexcept;
|
||||
int add_const(PyVar) noexcept;
|
||||
int add_const_string(std::string_view) noexcept;
|
||||
int add_func_decl(FuncDecl_ decl) noexcept;
|
||||
void emit_store_name(NameScope scope, StrName name, int line) noexcept;
|
||||
void try_merge_for_iter_store(int) noexcept;
|
||||
};
|
||||
|
||||
struct NameExpr : Expr {
|
||||
@ -236,15 +237,14 @@ struct DictItemExpr : Expr {
|
||||
};
|
||||
|
||||
struct SequenceExpr : Expr {
|
||||
Expr_vector items;
|
||||
array<Expr*> items;
|
||||
|
||||
SequenceExpr(Expr_vector&& items) : items(std::move(items)) {}
|
||||
SequenceExpr(int count) : items(count) {}
|
||||
|
||||
virtual Opcode opcode() const = 0;
|
||||
|
||||
void emit_(CodeEmitContext* ctx) override {
|
||||
for(auto& item: items)
|
||||
item->emit_(ctx);
|
||||
for(auto& item: items) item->emit_(ctx);
|
||||
ctx->emit_(opcode(), items.size(), line);
|
||||
}
|
||||
|
||||
@ -308,8 +308,10 @@ struct CompExpr : Expr {
|
||||
Expr* iter = nullptr; // loop iter
|
||||
Expr* cond = nullptr; // optional if condition
|
||||
|
||||
virtual Opcode op0() = 0;
|
||||
virtual Opcode op1() = 0;
|
||||
Opcode op0;
|
||||
Opcode op1;
|
||||
|
||||
CompExpr(Opcode op0, Opcode op1) : op0(op0), op1(op1) {}
|
||||
|
||||
void emit_(CodeEmitContext* ctx) override;
|
||||
|
||||
@ -321,24 +323,6 @@ struct CompExpr : Expr {
|
||||
}
|
||||
};
|
||||
|
||||
struct ListCompExpr : CompExpr {
|
||||
Opcode op0() override { return OP_BUILD_LIST; }
|
||||
|
||||
Opcode op1() override { return OP_LIST_APPEND; }
|
||||
};
|
||||
|
||||
struct DictCompExpr : CompExpr {
|
||||
Opcode op0() override { return OP_BUILD_DICT; }
|
||||
|
||||
Opcode op1() override { return OP_DICT_ADD; }
|
||||
};
|
||||
|
||||
struct SetCompExpr : CompExpr {
|
||||
Opcode op0() override { return OP_BUILD_SET; }
|
||||
|
||||
Opcode op1() override { return OP_SET_ADD; }
|
||||
};
|
||||
|
||||
struct LambdaExpr : Expr {
|
||||
FuncDecl_ decl;
|
||||
|
||||
@ -391,7 +375,7 @@ struct CallExpr : Expr {
|
||||
Expr* callable;
|
||||
Expr_vector args;
|
||||
// **a will be interpreted as a special keyword argument: {"**": a}
|
||||
vector<std::pair<StrName, Expr*>> kwargs;
|
||||
vector<pair<StrName, Expr*>> kwargs;
|
||||
void emit_(CodeEmitContext* ctx) override;
|
||||
|
||||
~CallExpr() {
|
||||
|
@ -92,7 +92,7 @@ enum Precedence {
|
||||
PREC_HIGHEST,
|
||||
};
|
||||
|
||||
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
|
||||
enum class StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
|
||||
|
||||
struct Lexer {
|
||||
VM* vm;
|
||||
@ -104,38 +104,38 @@ struct Lexer {
|
||||
small_vector_2<int, 8> indents;
|
||||
int brackets_level = 0;
|
||||
|
||||
char peekchar() const { return *curr_char; }
|
||||
char peekchar() const noexcept { return *curr_char; }
|
||||
|
||||
bool match_n_chars(int n, char c0);
|
||||
bool match_string(const char* s);
|
||||
int eat_spaces();
|
||||
bool match_n_chars(int n, char c0) noexcept;
|
||||
bool match_string(const char* s) noexcept;
|
||||
int eat_spaces() noexcept;
|
||||
|
||||
bool eat_indentation();
|
||||
char eatchar();
|
||||
char eatchar_include_newline();
|
||||
int eat_name();
|
||||
void skip_line_comment();
|
||||
bool matchchar(char c);
|
||||
void add_token(TokenIndex type, TokenValue value = {});
|
||||
void add_token_2(char c, TokenIndex one, TokenIndex two);
|
||||
Str eat_string_until(char quote, bool raw);
|
||||
void eat_string(char quote, StringType type);
|
||||
bool eat_indentation() noexcept;
|
||||
char eatchar() noexcept;
|
||||
char eatchar_include_newline() noexcept;
|
||||
void skip_line_comment() noexcept;
|
||||
bool matchchar(char c) noexcept;
|
||||
void add_token(TokenIndex type, TokenValue value = {}) noexcept;
|
||||
void add_token_2(char c, TokenIndex one, TokenIndex two) noexcept;
|
||||
|
||||
void eat_number();
|
||||
bool lex_one_token();
|
||||
[[nodiscard]] Error* eat_name() noexcept;
|
||||
[[nodiscard]] Error* eat_string_until(char quote, bool raw, Str* out) noexcept;
|
||||
[[nodiscard]] Error* eat_string(char quote, StringType type) noexcept;
|
||||
[[nodiscard]] Error* eat_number() noexcept;
|
||||
[[nodiscard]] Error* lex_one_token(bool* eof) noexcept;
|
||||
|
||||
/***** Error Reporter *****/
|
||||
[[noreturn]] void throw_err(StrName type, Str msg);
|
||||
[[noreturn]] void throw_err(StrName type, Str msg, int lineno, const char* cursor);
|
||||
[[nodiscard]] Error* _error(bool lexer_err, const char* type, const char* msg, va_list args, i64 userdata=0) noexcept;
|
||||
[[nodiscard]] Error* SyntaxError(const char* fmt, ...) noexcept;
|
||||
[[nodiscard]] Error* IndentationError(const char* msg) noexcept { return _error(true, "IndentationError", msg, {}); }
|
||||
[[nodiscard]] Error* NeedMoreLines() noexcept { return _error(true, "NeedMoreLines", "", {}, 0); }
|
||||
|
||||
[[noreturn]] void SyntaxError(Str msg) { throw_err("SyntaxError", msg); }
|
||||
Lexer(VM* vm, std::shared_ptr<SourceData> src) noexcept;
|
||||
|
||||
[[noreturn]] void SyntaxError() { throw_err("SyntaxError", "invalid syntax"); }
|
||||
[[nodiscard]] Error* run() noexcept;
|
||||
|
||||
[[noreturn]] void IndentationError(Str msg) { throw_err("IndentationError", msg); }
|
||||
|
||||
Lexer(VM* vm, std::shared_ptr<SourceData> src);
|
||||
vector<Token> run();
|
||||
void from_precompiled();
|
||||
[[nodiscard]] Error* precompile(Str* out);
|
||||
};
|
||||
|
||||
enum class IntParsingResult {
|
||||
@ -144,6 +144,29 @@ enum class IntParsingResult {
|
||||
Overflow,
|
||||
};
|
||||
|
||||
IntParsingResult parse_uint(std::string_view text, i64* out, int base);
|
||||
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
|
||||
|
||||
struct TokenDeserializer {
|
||||
const char* curr;
|
||||
const char* source;
|
||||
|
||||
TokenDeserializer(const char* source) : curr(source), source(source) {}
|
||||
|
||||
char read_char() { return *curr++; }
|
||||
|
||||
bool match_char(char c) {
|
||||
if(*curr == c) {
|
||||
curr++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string_view read_string(char c);
|
||||
Str read_string_from_hex(char c);
|
||||
int read_count();
|
||||
i64 read_uint(char c);
|
||||
f64 read_float(char c);
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
||||
|
@ -38,7 +38,7 @@ struct VoidP {
|
||||
|
||||
#define POINTER_VAR(Tp, NAME) \
|
||||
inline PyVar py_var(VM* vm, Tp val) { \
|
||||
const static std::pair<StrName, StrName> P("c", NAME); \
|
||||
const static pair<StrName, StrName> P("c", NAME); \
|
||||
PyVar type = vm->_modules[P.first]->attr(P.second); \
|
||||
return vm->new_object<VoidP>(type->as<Type>(), val); \
|
||||
}
|
||||
|
@ -463,7 +463,6 @@ public:
|
||||
vm->s_data.emplace(p->type, p);
|
||||
}
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
template <typename T>
|
||||
Type _find_type_in_cxx_typeid_map() {
|
||||
@ -500,31 +499,26 @@ public:
|
||||
[[noreturn]] void __builtin_error(StrName type);
|
||||
[[noreturn]] void __builtin_error(StrName type, PyVar arg);
|
||||
[[noreturn]] void __builtin_error(StrName type, const Str& msg);
|
||||
[[noreturn]] void __compile_error(Error* err);
|
||||
void __init_builtin_types();
|
||||
void __post_init_builtin_types();
|
||||
|
||||
void __push_varargs() {}
|
||||
|
||||
void __push_varargs(PyVar _0) { PUSH(_0); }
|
||||
|
||||
void __push_varargs(PyVar _0, PyVar _1) {
|
||||
PUSH(_0);
|
||||
PUSH(_1);
|
||||
}
|
||||
|
||||
void __push_varargs(PyVar _0, PyVar _1, PyVar _2) {
|
||||
PUSH(_0);
|
||||
PUSH(_1);
|
||||
PUSH(_2);
|
||||
}
|
||||
|
||||
void __push_varargs(PyVar _0, PyVar _1, PyVar _2, PyVar _3) {
|
||||
PUSH(_0);
|
||||
PUSH(_1);
|
||||
PUSH(_2);
|
||||
PUSH(_3);
|
||||
}
|
||||
|
||||
PyVar __pack_next_retval(unsigned);
|
||||
PyVar __minmax_reduce(bool (VM::*op)(PyVar, PyVar), PyVar args, PyVar key);
|
||||
bool __py_bool_non_trivial(PyVar);
|
||||
@ -539,95 +533,26 @@ constexpr inline bool is_immutable_v =
|
||||
std::is_same_v<T, Bytes> || std::is_same_v<T, bool> || std::is_same_v<T, Range> || std::is_same_v<T, Slice> ||
|
||||
std::is_pointer_v<T> || std::is_enum_v<T>;
|
||||
|
||||
template <typename T>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map() {
|
||||
return Type();
|
||||
}
|
||||
template<typename T> constexpr Type _tp_builtin() { return Type(); }
|
||||
template<> constexpr Type _tp_builtin<Str>() { return VM::tp_str; }
|
||||
template<> constexpr Type _tp_builtin<List>() { return VM::tp_list; }
|
||||
template<> constexpr Type _tp_builtin<Tuple>() { return VM::tp_tuple; }
|
||||
template<> constexpr Type _tp_builtin<Function>() { return VM::tp_function; }
|
||||
template<> constexpr Type _tp_builtin<NativeFunc>() { return VM::tp_native_func; }
|
||||
template<> constexpr Type _tp_builtin<BoundMethod>() { return VM::tp_bound_method; }
|
||||
template<> constexpr Type _tp_builtin<Range>() { return VM::tp_range; }
|
||||
template<> constexpr Type _tp_builtin<Slice>() { return VM::tp_slice; }
|
||||
template<> constexpr Type _tp_builtin<Exception>() { return VM::tp_exception; }
|
||||
template<> constexpr Type _tp_builtin<Bytes>() { return VM::tp_bytes; }
|
||||
template<> constexpr Type _tp_builtin<MappingProxy>() { return VM::tp_mappingproxy; }
|
||||
template<> constexpr Type _tp_builtin<Dict>() { return VM::tp_dict; }
|
||||
template<> constexpr Type _tp_builtin<Property>() { return VM::tp_property; }
|
||||
template<> constexpr Type _tp_builtin<StarWrapper>() { return VM::tp_star_wrapper; }
|
||||
template<> constexpr Type _tp_builtin<StaticMethod>() { return VM::tp_staticmethod; }
|
||||
template<> constexpr Type _tp_builtin<ClassMethod>() { return VM::tp_classmethod; }
|
||||
template<> constexpr Type _tp_builtin<StackMemory>() { return VM::tp_stack_memory; }
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Str>() {
|
||||
return VM::tp_str;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<List>() {
|
||||
return VM::tp_list;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Tuple>() {
|
||||
return VM::tp_tuple;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Function>() {
|
||||
return VM::tp_function;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<NativeFunc>() {
|
||||
return VM::tp_native_func;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<BoundMethod>() {
|
||||
return VM::tp_bound_method;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Range>() {
|
||||
return VM::tp_range;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Slice>() {
|
||||
return VM::tp_slice;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Exception>() {
|
||||
return VM::tp_exception;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Bytes>() {
|
||||
return VM::tp_bytes;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<MappingProxy>() {
|
||||
return VM::tp_mappingproxy;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Dict>() {
|
||||
return VM::tp_dict;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<Property>() {
|
||||
return VM::tp_property;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<StarWrapper>() {
|
||||
return VM::tp_star_wrapper;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<StaticMethod>() {
|
||||
return VM::tp_staticmethod;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<ClassMethod>() {
|
||||
return VM::tp_classmethod;
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr Type _find_type_in_const_cxx_typeid_map<StackMemory>() {
|
||||
return VM::tp_stack_memory;
|
||||
}
|
||||
// clang-format on
|
||||
|
||||
template <typename __T>
|
||||
PyVar py_var(VM* vm, __T&& value) {
|
||||
@ -654,7 +579,7 @@ PyVar py_var(VM* vm, __T&& value) {
|
||||
} else if constexpr(std::is_pointer_v<T>) {
|
||||
return from_void_p(vm, (void*)value);
|
||||
} else {
|
||||
constexpr Type const_type = _find_type_in_const_cxx_typeid_map<T>();
|
||||
constexpr Type const_type = _tp_builtin<T>();
|
||||
if constexpr((bool)const_type) {
|
||||
if constexpr(is_sso_v<T>)
|
||||
return PyVar(const_type, value);
|
||||
@ -715,7 +640,7 @@ __T _py_cast__internal(VM* vm, PyVar obj) {
|
||||
static_assert(!std::is_reference_v<__T>);
|
||||
return to_void_p<T>(vm, obj);
|
||||
} else {
|
||||
constexpr Type const_type = _find_type_in_const_cxx_typeid_map<T>();
|
||||
constexpr Type const_type = _tp_builtin<T>();
|
||||
if constexpr((bool)const_type) {
|
||||
if constexpr(with_check) {
|
||||
if constexpr(std::is_same_v<T, Exception>) {
|
||||
|
@ -33,14 +33,14 @@ struct Exception {
|
||||
PyObject* _self; // weak reference
|
||||
|
||||
struct Frame {
|
||||
std::shared_ptr<SourceData> src;
|
||||
SourceData* src; // weak ref
|
||||
int lineno;
|
||||
const char* cursor;
|
||||
std::string name;
|
||||
|
||||
Str snapshot() const { return src->snapshot(lineno, cursor, name); }
|
||||
|
||||
Frame(std::shared_ptr<SourceData> src, int lineno, const char* cursor, std::string_view name) :
|
||||
Frame(SourceData* src, int lineno, const char* cursor, std::string_view name) :
|
||||
src(src), lineno(lineno), cursor(cursor), name(name) {}
|
||||
};
|
||||
|
||||
@ -77,4 +77,13 @@ struct TopLevelException : std::exception {
|
||||
}
|
||||
};
|
||||
|
||||
struct Error{
|
||||
const char* type;
|
||||
SourceData* src;
|
||||
int lineno;
|
||||
const char* cursor;
|
||||
char msg[100];
|
||||
i64 userdata;
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
||||
|
@ -10,9 +10,9 @@ namespace pkpy {
|
||||
struct NameDict {
|
||||
PK_ALWAYS_PASS_BY_POINTER(NameDict)
|
||||
|
||||
using Item = std::pair<StrName, PyVar>;
|
||||
using Item = pair<StrName, PyVar>;
|
||||
|
||||
constexpr static uint16_t kInitialCapacity = 16;
|
||||
static_assert(is_pod_v<PyVar>);
|
||||
|
||||
float _load_factor;
|
||||
uint16_t _size;
|
||||
|
@ -21,7 +21,7 @@ struct SourceData {
|
||||
|
||||
SourceData(std::string_view source, const Str& filename, CompileMode mode);
|
||||
SourceData(const Str& filename, CompileMode mode);
|
||||
std::pair<const char*, const char*> _get_line(int lineno) const;
|
||||
pair<const char*, const char*> _get_line(int lineno) const;
|
||||
std::string_view get_line(int lineno) const;
|
||||
Str snapshot(int lineno, const char* cursor, std::string_view name) const;
|
||||
};
|
||||
|
@ -52,7 +52,7 @@ Str::Str(int size, bool is_ascii) :
|
||||
Str::Str(const char* s, int len) :
|
||||
size(len), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
|
||||
|
||||
Str::Str(std::pair<char*, int> detached) : size(detached.second), is_ascii(true) {
|
||||
Str::Str(pair<char*, int> detached) : size(detached.second), is_ascii(true) {
|
||||
this->data = detached.first;
|
||||
for(int i = 0; i < size; i++) {
|
||||
if(!isascii(data[i])) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -13,7 +13,7 @@ inline bool is_identifier(std::string_view s) {
|
||||
|
||||
inline bool is_small_int(i64 value) { return value >= INT16_MIN && value <= INT16_MAX; }
|
||||
|
||||
int CodeEmitContext::get_loop() const {
|
||||
int CodeEmitContext::get_loop() const noexcept{
|
||||
int index = curr_iblock;
|
||||
while(index >= 0) {
|
||||
if(co->blocks[index].type == CodeBlockType::FOR_LOOP) break;
|
||||
@ -23,13 +23,13 @@ int CodeEmitContext::get_loop() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
CodeBlock* CodeEmitContext::enter_block(CodeBlockType type) {
|
||||
CodeBlock* CodeEmitContext::enter_block(CodeBlockType type) noexcept{
|
||||
co->blocks.push_back(CodeBlock(type, curr_iblock, (int)co->codes.size()));
|
||||
curr_iblock = co->blocks.size() - 1;
|
||||
return &co->blocks[curr_iblock];
|
||||
}
|
||||
|
||||
void CodeEmitContext::exit_block() {
|
||||
void CodeEmitContext::exit_block() noexcept{
|
||||
auto curr_type = co->blocks[curr_iblock].type;
|
||||
co->blocks[curr_iblock].end = co->codes.size();
|
||||
curr_iblock = co->blocks[curr_iblock].parent;
|
||||
@ -41,14 +41,27 @@ void CodeEmitContext::exit_block() {
|
||||
}
|
||||
|
||||
// clear the expression stack and generate bytecode
|
||||
void CodeEmitContext::emit_expr(bool emit) {
|
||||
assert(s_expr.size() == 1);
|
||||
void CodeEmitContext::emit_expr(bool emit) noexcept{
|
||||
// assert(s_expr.size() == 1);
|
||||
Expr* e = s_expr.popx_back();
|
||||
if(emit) e->emit_(this);
|
||||
delete_expr(e);
|
||||
}
|
||||
|
||||
int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual) {
|
||||
void CodeEmitContext::emit_decorators(int count) noexcept{
|
||||
// [obj]
|
||||
for(int i=0; i<count; i++) {
|
||||
Expr* deco = s_expr.popx_back();
|
||||
deco->emit_(this); // [obj, f]
|
||||
emit_(OP_ROT_TWO, BC_NOARG, deco->line); // [f, obj]
|
||||
emit_(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); // [f, obj, NULL]
|
||||
emit_(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); // [obj, NULL, f]
|
||||
emit_(OP_CALL, 1, deco->line); // [obj]
|
||||
delete_expr(deco);
|
||||
}
|
||||
}
|
||||
|
||||
int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual) noexcept{
|
||||
co->codes.push_back(Bytecode{(uint8_t)opcode, arg});
|
||||
co->lines.push_back(CodeObject::LineInfo{line, is_virtual, curr_iblock});
|
||||
int i = co->codes.size() - 1;
|
||||
@ -61,12 +74,12 @@ int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtua
|
||||
return i;
|
||||
}
|
||||
|
||||
void CodeEmitContext::revert_last_emit_() {
|
||||
void CodeEmitContext::revert_last_emit_() noexcept{
|
||||
co->codes.pop_back();
|
||||
co->lines.pop_back();
|
||||
}
|
||||
|
||||
void CodeEmitContext::try_merge_for_iter_store(int i) {
|
||||
void CodeEmitContext::try_merge_for_iter_store(int i) noexcept{
|
||||
// [FOR_ITER, STORE_?, ]
|
||||
if(co->codes[i].op != OP_FOR_ITER) return;
|
||||
if(co->codes.size() - i != 2) return;
|
||||
@ -85,7 +98,7 @@ void CodeEmitContext::try_merge_for_iter_store(int i) {
|
||||
}
|
||||
}
|
||||
|
||||
int CodeEmitContext::emit_int(i64 value, int line) {
|
||||
int CodeEmitContext::emit_int(i64 value, int line) noexcept{
|
||||
if(is_small_int(value)) {
|
||||
return emit_(OP_LOAD_SMALL_INT, (uint16_t)value, line);
|
||||
} else {
|
||||
@ -93,18 +106,18 @@ int CodeEmitContext::emit_int(i64 value, int line) {
|
||||
}
|
||||
}
|
||||
|
||||
void CodeEmitContext::patch_jump(int index) {
|
||||
void CodeEmitContext::patch_jump(int index) noexcept{
|
||||
int target = co->codes.size();
|
||||
co->codes[index].set_signed_arg(target - index);
|
||||
}
|
||||
|
||||
bool CodeEmitContext::add_label(StrName name) {
|
||||
bool CodeEmitContext::add_label(StrName name) noexcept{
|
||||
if(co->labels.contains(name)) return false;
|
||||
co->labels.insert(name, co->codes.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
int CodeEmitContext::add_varname(StrName name) {
|
||||
int CodeEmitContext::add_varname(StrName name) noexcept{
|
||||
// PK_MAX_CO_VARNAMES will be checked when pop_context(), not here
|
||||
int index = co->varnames_inv.get(name, -1);
|
||||
if(index >= 0) return index;
|
||||
@ -115,7 +128,7 @@ int CodeEmitContext::add_varname(StrName name) {
|
||||
return index;
|
||||
}
|
||||
|
||||
int CodeEmitContext::add_const_string(std::string_view key) {
|
||||
int CodeEmitContext::add_const_string(std::string_view key) noexcept{
|
||||
int* val = _co_consts_string_dedup_map.try_get(key);
|
||||
if(val) {
|
||||
return *val;
|
||||
@ -128,7 +141,7 @@ int CodeEmitContext::add_const_string(std::string_view key) {
|
||||
}
|
||||
}
|
||||
|
||||
int CodeEmitContext::add_const(PyVar v) {
|
||||
int CodeEmitContext::add_const(PyVar v) noexcept{
|
||||
assert(!is_type(v, VM::tp_str));
|
||||
// non-string deduplication
|
||||
int* val = _co_consts_nonstring_dedup_map.try_get(v);
|
||||
@ -142,12 +155,12 @@ int CodeEmitContext::add_const(PyVar v) {
|
||||
}
|
||||
}
|
||||
|
||||
int CodeEmitContext::add_func_decl(FuncDecl_ decl) {
|
||||
int CodeEmitContext::add_func_decl(FuncDecl_ decl) noexcept{
|
||||
co->func_decls.push_back(decl);
|
||||
return co->func_decls.size() - 1;
|
||||
}
|
||||
|
||||
void CodeEmitContext::emit_store_name(NameScope scope, StrName name, int line) {
|
||||
void CodeEmitContext::emit_store_name(NameScope scope, StrName name, int line) noexcept{
|
||||
switch(scope) {
|
||||
case NAME_LOCAL: emit_(OP_STORE_FAST, add_varname(name), line); break;
|
||||
case NAME_GLOBAL: emit_(OP_STORE_GLOBAL, StrName(name).index, line); break;
|
||||
@ -321,8 +334,8 @@ void DictItemExpr::emit_(CodeEmitContext* ctx) {
|
||||
assert(key == nullptr);
|
||||
value->emit_(ctx);
|
||||
} else {
|
||||
key->emit_(ctx);
|
||||
value->emit_(ctx);
|
||||
key->emit_(ctx); // reverse order
|
||||
ctx->emit_(OP_BUILD_TUPLE, 2, line);
|
||||
}
|
||||
}
|
||||
@ -378,7 +391,7 @@ bool TupleExpr::emit_del(CodeEmitContext* ctx) {
|
||||
}
|
||||
|
||||
void CompExpr::emit_(CodeEmitContext* ctx) {
|
||||
ctx->emit_(op0(), 0, line);
|
||||
ctx->emit_(op0, 0, line);
|
||||
iter->emit_(ctx);
|
||||
ctx->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
|
||||
ctx->enter_block(CodeBlockType::FOR_LOOP);
|
||||
@ -392,11 +405,11 @@ void CompExpr::emit_(CodeEmitContext* ctx) {
|
||||
cond->emit_(ctx);
|
||||
int patch = ctx->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
|
||||
expr->emit_(ctx);
|
||||
ctx->emit_(op1(), BC_NOARG, BC_KEEPLINE);
|
||||
ctx->emit_(op1, BC_NOARG, BC_KEEPLINE);
|
||||
ctx->patch_jump(patch);
|
||||
} else {
|
||||
expr->emit_(ctx);
|
||||
ctx->emit_(op1(), BC_NOARG, BC_KEEPLINE);
|
||||
ctx->emit_(op1, BC_NOARG, BC_KEEPLINE);
|
||||
}
|
||||
ctx->emit_(OP_LOOP_CONTINUE, curr_iblock, BC_KEEPLINE);
|
||||
ctx->exit_block();
|
||||
|
@ -1,4 +1,6 @@
|
||||
#include "pocketpy/compiler/lexer.hpp"
|
||||
#include "pocketpy/common/gil.hpp"
|
||||
#include "pocketpy/common/version.h"
|
||||
|
||||
namespace pkpy {
|
||||
|
||||
@ -7,7 +9,7 @@ static const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,164
|
||||
static const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101};
|
||||
// clang-format on
|
||||
|
||||
static bool is_possible_number_char(char c) {
|
||||
static bool is_possible_number_char(char c) noexcept{
|
||||
switch(c) {
|
||||
// clang-format off
|
||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
||||
@ -20,7 +22,7 @@ static bool is_possible_number_char(char c) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_unicode_Lo_char(uint32_t c) {
|
||||
static bool is_unicode_Lo_char(uint32_t c) noexcept{
|
||||
// open a hole for carrot
|
||||
if(c == U'🥕') return true;
|
||||
auto index = std::lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA;
|
||||
@ -30,7 +32,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
|
||||
return c >= kLoRangeA[index] && c <= kLoRangeB[index];
|
||||
}
|
||||
|
||||
bool Lexer::match_n_chars(int n, char c0) {
|
||||
bool Lexer::match_n_chars(int n, char c0) noexcept{
|
||||
const char* c = curr_char;
|
||||
for(int i = 0; i < n; i++) {
|
||||
if(*c == '\0') return false;
|
||||
@ -42,7 +44,7 @@ bool Lexer::match_n_chars(int n, char c0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Lexer::match_string(const char* s) {
|
||||
bool Lexer::match_string(const char* s) noexcept{
|
||||
int s_len = strlen(s);
|
||||
bool ok = strncmp(curr_char, s, s_len) == 0;
|
||||
if(ok)
|
||||
@ -51,7 +53,7 @@ bool Lexer::match_string(const char* s) {
|
||||
return ok;
|
||||
}
|
||||
|
||||
int Lexer::eat_spaces() {
|
||||
int Lexer::eat_spaces() noexcept{
|
||||
int count = 0;
|
||||
while(true) {
|
||||
switch(peekchar()) {
|
||||
@ -63,7 +65,7 @@ int Lexer::eat_spaces() {
|
||||
}
|
||||
}
|
||||
|
||||
bool Lexer::eat_indentation() {
|
||||
bool Lexer::eat_indentation() noexcept{
|
||||
if(brackets_level > 0) return true;
|
||||
int spaces = eat_spaces();
|
||||
if(peekchar() == '#') skip_line_comment();
|
||||
@ -82,14 +84,14 @@ bool Lexer::eat_indentation() {
|
||||
return true;
|
||||
}
|
||||
|
||||
char Lexer::eatchar() {
|
||||
char Lexer::eatchar() noexcept{
|
||||
char c = peekchar();
|
||||
assert(c != '\n'); // eatchar() cannot consume a newline
|
||||
curr_char++;
|
||||
return c;
|
||||
}
|
||||
|
||||
char Lexer::eatchar_include_newline() {
|
||||
char Lexer::eatchar_include_newline() noexcept{
|
||||
char c = peekchar();
|
||||
curr_char++;
|
||||
if(c == '\n') {
|
||||
@ -99,12 +101,12 @@ char Lexer::eatchar_include_newline() {
|
||||
return c;
|
||||
}
|
||||
|
||||
int Lexer::eat_name() {
|
||||
Error* Lexer::eat_name() noexcept{
|
||||
curr_char--;
|
||||
while(true) {
|
||||
unsigned char c = peekchar();
|
||||
int u8bytes = utf8len(c, true);
|
||||
if(u8bytes == 0) return 1;
|
||||
if(u8bytes == 0) return SyntaxError("invalid char: %c", c);
|
||||
if(u8bytes == 1) {
|
||||
if(isalpha(c) || c == '_' || isdigit(c)) {
|
||||
curr_char++;
|
||||
@ -115,7 +117,7 @@ int Lexer::eat_name() {
|
||||
}
|
||||
// handle multibyte char
|
||||
Str u8str(curr_char, u8bytes);
|
||||
if(u8str.size != u8bytes) return 2;
|
||||
if(u8str.size != u8bytes) return SyntaxError("invalid utf8 sequence: %s", u8str.c_str());
|
||||
uint32_t value = 0;
|
||||
for(int k = 0; k < u8bytes; k++) {
|
||||
uint8_t b = u8str[k];
|
||||
@ -137,7 +139,7 @@ int Lexer::eat_name() {
|
||||
}
|
||||
|
||||
int length = (int)(curr_char - token_start);
|
||||
if(length == 0) return 3;
|
||||
if(length == 0) return SyntaxError("@id contains invalid char");
|
||||
std::string_view name(token_start, length);
|
||||
|
||||
if(src->mode == JSON_MODE) {
|
||||
@ -148,9 +150,9 @@ int Lexer::eat_name() {
|
||||
} else if(name == "null") {
|
||||
add_token(TK("None"));
|
||||
} else {
|
||||
return 4;
|
||||
return SyntaxError("invalid JSON token");
|
||||
}
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const auto KW_BEGIN = kTokens + TK("False");
|
||||
@ -162,10 +164,10 @@ int Lexer::eat_name() {
|
||||
} else {
|
||||
add_token(TK("@id"));
|
||||
}
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Lexer::skip_line_comment() {
|
||||
void Lexer::skip_line_comment() noexcept{
|
||||
char c;
|
||||
while((c = peekchar()) != '\0') {
|
||||
if(c == '\n') return;
|
||||
@ -173,13 +175,13 @@ void Lexer::skip_line_comment() {
|
||||
}
|
||||
}
|
||||
|
||||
bool Lexer::matchchar(char c) {
|
||||
bool Lexer::matchchar(char c) noexcept{
|
||||
if(peekchar() != c) return false;
|
||||
eatchar_include_newline();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Lexer::add_token(TokenIndex type, TokenValue value) {
|
||||
void Lexer::add_token(TokenIndex type, TokenValue value) noexcept{
|
||||
switch(type) {
|
||||
case TK("{"):
|
||||
case TK("["):
|
||||
@ -213,14 +215,14 @@ void Lexer::add_token(TokenIndex type, TokenValue value) {
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::add_token_2(char c, TokenIndex one, TokenIndex two) {
|
||||
void Lexer::add_token_2(char c, TokenIndex one, TokenIndex two) noexcept{
|
||||
if(matchchar(c))
|
||||
add_token(two);
|
||||
else
|
||||
add_token(one);
|
||||
}
|
||||
|
||||
Str Lexer::eat_string_until(char quote, bool raw) {
|
||||
Error* Lexer::eat_string_until(char quote, bool raw, Str* out) noexcept{
|
||||
bool quote3 = match_n_chars(2, quote);
|
||||
small_vector_2<char, 32> buff;
|
||||
while(true) {
|
||||
@ -233,12 +235,12 @@ Str Lexer::eat_string_until(char quote, bool raw) {
|
||||
break;
|
||||
}
|
||||
if(c == '\0') {
|
||||
if(quote3 && src->mode == REPL_MODE) { throw NeedMoreLines(false); }
|
||||
SyntaxError("EOL while scanning string literal");
|
||||
if(quote3 && src->mode == REPL_MODE) return NeedMoreLines();
|
||||
return SyntaxError("EOL while scanning string literal");
|
||||
}
|
||||
if(c == '\n') {
|
||||
if(!quote3)
|
||||
SyntaxError("EOL while scanning string literal");
|
||||
return SyntaxError("EOL while scanning string literal");
|
||||
else {
|
||||
buff.push_back(c);
|
||||
continue;
|
||||
@ -259,33 +261,37 @@ Str Lexer::eat_string_until(char quote, bool raw) {
|
||||
char code;
|
||||
try {
|
||||
code = (char)std::stoi(hex, &parsed, 16);
|
||||
} catch(...) { SyntaxError("invalid hex char"); }
|
||||
if(parsed != 2) SyntaxError("invalid hex char");
|
||||
} catch(...) {
|
||||
return SyntaxError("invalid hex char");
|
||||
}
|
||||
if(parsed != 2) return SyntaxError("invalid hex char");
|
||||
buff.push_back(code);
|
||||
} break;
|
||||
default: SyntaxError("invalid escape char");
|
||||
default: return SyntaxError("invalid escape char");
|
||||
}
|
||||
} else {
|
||||
buff.push_back(c);
|
||||
}
|
||||
}
|
||||
return Str(buff.data(), buff.size());
|
||||
*out = Str(buff.data(), buff.size());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Lexer::eat_string(char quote, StringType type) {
|
||||
Str s = eat_string_until(quote, type == RAW_STRING);
|
||||
if(type == F_STRING) {
|
||||
Error* Lexer::eat_string(char quote, StringType type) noexcept{
|
||||
Str s;
|
||||
Error* err = eat_string_until(quote, type == StringType::RAW_STRING, &s);
|
||||
if(err) return err;
|
||||
if(type == StringType::F_STRING) {
|
||||
add_token(TK("@fstr"), s);
|
||||
return;
|
||||
}
|
||||
if(type == NORMAL_BYTES) {
|
||||
}else if(type == StringType::NORMAL_BYTES) {
|
||||
add_token(TK("@bytes"), s);
|
||||
return;
|
||||
}
|
||||
}else{
|
||||
add_token(TK("@str"), s);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Lexer::eat_number() {
|
||||
Error* Lexer::eat_number() noexcept{
|
||||
const char* i = token_start;
|
||||
while(is_possible_number_char(*i))
|
||||
i++;
|
||||
@ -305,13 +311,13 @@ void Lexer::eat_number() {
|
||||
// try long
|
||||
if(i[-1] == 'L') {
|
||||
add_token(TK("@long"));
|
||||
return;
|
||||
return NULL;
|
||||
}
|
||||
// try integer
|
||||
i64 int_out;
|
||||
switch(parse_uint(text, &int_out, -1)) {
|
||||
case IntParsingResult::Success: add_token(TK("@num"), int_out); return;
|
||||
case IntParsingResult::Overflow: SyntaxError("int literal is too large"); return;
|
||||
case IntParsingResult::Success: add_token(TK("@num"), int_out); return NULL;
|
||||
case IntParsingResult::Overflow: return SyntaxError("int literal is too large");
|
||||
case IntParsingResult::Failure: break; // do nothing
|
||||
}
|
||||
}
|
||||
@ -321,54 +327,61 @@ void Lexer::eat_number() {
|
||||
char* p_end;
|
||||
try {
|
||||
float_out = std::strtod(text.data(), &p_end);
|
||||
} catch(...) { SyntaxError("invalid number literal"); }
|
||||
} catch(...) {
|
||||
return SyntaxError("invalid number literal");
|
||||
}
|
||||
|
||||
if(p_end == text.data() + text.size()) {
|
||||
add_token(TK("@num"), (f64)float_out);
|
||||
return;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(i[-1] == 'j' && p_end == text.data() + text.size() - 1) {
|
||||
add_token(TK("@imag"), (f64)float_out);
|
||||
return;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SyntaxError("invalid number literal");
|
||||
return SyntaxError("invalid number literal");
|
||||
}
|
||||
|
||||
bool Lexer::lex_one_token() {
|
||||
Error* Lexer::lex_one_token(bool* eof) noexcept{
|
||||
*eof = false;
|
||||
while(peekchar() != '\0') {
|
||||
token_start = curr_char;
|
||||
char c = eatchar_include_newline();
|
||||
switch(c) {
|
||||
case '\'':
|
||||
case '"': eat_string(c, NORMAL_STRING); return true;
|
||||
case '"': {
|
||||
Error* err = eat_string(c, StringType::NORMAL_STRING);
|
||||
if(err) return err;
|
||||
return NULL;
|
||||
}
|
||||
case '#': skip_line_comment(); break;
|
||||
case '~': add_token(TK("~")); return true;
|
||||
case '{': add_token(TK("{")); return true;
|
||||
case '}': add_token(TK("}")); return true;
|
||||
case ',': add_token(TK(",")); return true;
|
||||
case ':': add_token(TK(":")); return true;
|
||||
case ';': add_token(TK(";")); return true;
|
||||
case '(': add_token(TK("(")); return true;
|
||||
case ')': add_token(TK(")")); return true;
|
||||
case '[': add_token(TK("[")); return true;
|
||||
case ']': add_token(TK("]")); return true;
|
||||
case '@': add_token(TK("@")); return true;
|
||||
case '~': add_token(TK("~")); return NULL;
|
||||
case '{': add_token(TK("{")); return NULL;
|
||||
case '}': add_token(TK("}")); return NULL;
|
||||
case ',': add_token(TK(",")); return NULL;
|
||||
case ':': add_token(TK(":")); return NULL;
|
||||
case ';': add_token(TK(";")); return NULL;
|
||||
case '(': add_token(TK("(")); return NULL;
|
||||
case ')': add_token(TK(")")); return NULL;
|
||||
case '[': add_token(TK("[")); return NULL;
|
||||
case ']': add_token(TK("]")); return NULL;
|
||||
case '@': add_token(TK("@")); return NULL;
|
||||
case '\\': {
|
||||
// line continuation character
|
||||
char c = eatchar_include_newline();
|
||||
if(c != '\n') {
|
||||
if(src->mode == REPL_MODE && c == '\0') throw NeedMoreLines(false);
|
||||
SyntaxError("expected newline after line continuation character");
|
||||
if(src->mode == REPL_MODE && c == '\0') return NeedMoreLines();
|
||||
return SyntaxError("expected newline after line continuation character");
|
||||
}
|
||||
eat_spaces();
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
case '%': add_token_2('=', TK("%"), TK("%=")); return true;
|
||||
case '&': add_token_2('=', TK("&"), TK("&=")); return true;
|
||||
case '|': add_token_2('=', TK("|"), TK("|=")); return true;
|
||||
case '^': add_token_2('=', TK("^"), TK("^=")); return true;
|
||||
case '%': add_token_2('=', TK("%"), TK("%=")); return NULL;
|
||||
case '&': add_token_2('=', TK("&"), TK("&=")); return NULL;
|
||||
case '|': add_token_2('=', TK("|"), TK("|=")); return NULL;
|
||||
case '^': add_token_2('=', TK("^"), TK("^=")); return NULL;
|
||||
case '.': {
|
||||
if(matchchar('.')) {
|
||||
if(matchchar('.')) {
|
||||
@ -379,21 +392,22 @@ bool Lexer::lex_one_token() {
|
||||
} else {
|
||||
char next_char = peekchar();
|
||||
if(next_char >= '0' && next_char <= '9') {
|
||||
eat_number();
|
||||
Error* err = eat_number();
|
||||
if(err) return err;
|
||||
} else {
|
||||
add_token(TK("."));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
case '=': add_token_2('=', TK("="), TK("==")); return true;
|
||||
case '=': add_token_2('=', TK("="), TK("==")); return NULL;
|
||||
case '+':
|
||||
if(matchchar('+')) {
|
||||
add_token(TK("++"));
|
||||
} else {
|
||||
add_token_2('=', TK("+"), TK("+="));
|
||||
}
|
||||
return true;
|
||||
return NULL;
|
||||
case '>': {
|
||||
if(matchchar('='))
|
||||
add_token(TK(">="));
|
||||
@ -401,7 +415,7 @@ bool Lexer::lex_one_token() {
|
||||
add_token_2('=', TK(">>"), TK(">>="));
|
||||
else
|
||||
add_token(TK(">"));
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
case '<': {
|
||||
if(matchchar('='))
|
||||
@ -410,7 +424,7 @@ bool Lexer::lex_one_token() {
|
||||
add_token_2('=', TK("<<"), TK("<<="));
|
||||
else
|
||||
add_token(TK("<"));
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
case '-': {
|
||||
if(matchchar('-')) {
|
||||
@ -423,13 +437,15 @@ bool Lexer::lex_one_token() {
|
||||
else
|
||||
add_token(TK("-"));
|
||||
}
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
case '!':
|
||||
if(matchchar('='))
|
||||
if(matchchar('=')){
|
||||
add_token(TK("!="));
|
||||
else
|
||||
SyntaxError("expected '=' after '!'");
|
||||
}else{
|
||||
Error* err = SyntaxError("expected '=' after '!'");
|
||||
if(err) return err;
|
||||
}
|
||||
break;
|
||||
case '*':
|
||||
if(matchchar('*')) {
|
||||
@ -437,63 +453,36 @@ bool Lexer::lex_one_token() {
|
||||
} else {
|
||||
add_token_2('=', TK("*"), TK("*="));
|
||||
}
|
||||
return true;
|
||||
return NULL;
|
||||
case '/':
|
||||
if(matchchar('/')) {
|
||||
add_token_2('=', TK("//"), TK("//="));
|
||||
} else {
|
||||
add_token_2('=', TK("/"), TK("/="));
|
||||
}
|
||||
return true;
|
||||
return NULL;
|
||||
case ' ':
|
||||
case '\t': eat_spaces(); break;
|
||||
case '\n': {
|
||||
add_token(TK("@eol"));
|
||||
if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level");
|
||||
return true;
|
||||
if(!eat_indentation()){
|
||||
return IndentationError("unindent does not match any outer indentation level");
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
default: {
|
||||
if(c == 'f') {
|
||||
if(matchchar('\'')) {
|
||||
eat_string('\'', F_STRING);
|
||||
return true;
|
||||
}
|
||||
if(matchchar('"')) {
|
||||
eat_string('"', F_STRING);
|
||||
return true;
|
||||
}
|
||||
if(matchchar('\'')) return eat_string('\'', StringType::F_STRING);
|
||||
if(matchchar('"')) return eat_string('"', StringType::F_STRING);
|
||||
} else if(c == 'r') {
|
||||
if(matchchar('\'')) {
|
||||
eat_string('\'', RAW_STRING);
|
||||
return true;
|
||||
}
|
||||
if(matchchar('"')) {
|
||||
eat_string('"', RAW_STRING);
|
||||
return true;
|
||||
}
|
||||
if(matchchar('\'')) return eat_string('\'', StringType::RAW_STRING);
|
||||
if(matchchar('"')) return eat_string('"', StringType::RAW_STRING);
|
||||
} else if(c == 'b') {
|
||||
if(matchchar('\'')) {
|
||||
eat_string('\'', NORMAL_BYTES);
|
||||
return true;
|
||||
if(matchchar('\'')) return eat_string('\'', StringType::NORMAL_BYTES);
|
||||
if(matchchar('"')) return eat_string('"', StringType::NORMAL_BYTES);
|
||||
}
|
||||
if(matchchar('"')) {
|
||||
eat_string('"', NORMAL_BYTES);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if(c >= '0' && c <= '9') {
|
||||
eat_number();
|
||||
return true;
|
||||
}
|
||||
switch(eat_name()) {
|
||||
case 0: break;
|
||||
case 1: SyntaxError("invalid char: " + std::string(1, c)); break;
|
||||
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); break;
|
||||
case 3: SyntaxError("@id contains invalid char"); break;
|
||||
case 4: SyntaxError("invalid JSON token"); break;
|
||||
default: assert(false);
|
||||
}
|
||||
return true;
|
||||
if(c >= '0' && c <= '9') return eat_number();
|
||||
return eat_name();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -502,38 +491,227 @@ bool Lexer::lex_one_token() {
|
||||
while(indents.size() > 1) {
|
||||
indents.pop_back();
|
||||
add_token(TK("@dedent"));
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
add_token(TK("@eof"));
|
||||
return false;
|
||||
*eof = true;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Lexer::throw_err(StrName type, Str msg) {
|
||||
int lineno = current_line;
|
||||
const char* cursor = curr_char;
|
||||
if(peekchar() == '\n') {
|
||||
lineno--;
|
||||
cursor--;
|
||||
Error* Lexer::_error(bool lexer_err, const char* type, const char* msg, va_list args, i64 userdata) noexcept{
|
||||
PK_THREAD_LOCAL Error err;
|
||||
err.type = type;
|
||||
err.src = src.get();
|
||||
if(lexer_err){
|
||||
err.lineno = current_line;
|
||||
err.cursor = curr_char;
|
||||
if(*curr_char == '\n') {
|
||||
err.lineno--;
|
||||
err.cursor--;
|
||||
}
|
||||
throw_err(type, msg, lineno, cursor);
|
||||
}else{
|
||||
err.lineno = -1;
|
||||
err.cursor = NULL;
|
||||
}
|
||||
vsnprintf(err.msg, sizeof(err.msg), msg, args);
|
||||
err.userdata = userdata;
|
||||
return &err;
|
||||
}
|
||||
|
||||
Lexer::Lexer(VM* vm, std::shared_ptr<SourceData> src) : vm(vm), src(src) {
|
||||
Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
Error* err = _error(true, "SyntaxError", fmt, args);
|
||||
va_end(args);
|
||||
return err;
|
||||
}
|
||||
|
||||
Lexer::Lexer(VM* vm, std::shared_ptr<SourceData> src) noexcept : vm(vm), src(src){
|
||||
this->token_start = src->source.c_str();
|
||||
this->curr_char = src->source.c_str();
|
||||
this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line, brackets_level, {}});
|
||||
this->indents.push_back(0);
|
||||
}
|
||||
|
||||
vector<Token> Lexer::run() {
|
||||
Error* Lexer::run() noexcept{
|
||||
if(src->is_precompiled) {
|
||||
from_precompiled();
|
||||
return NULL;
|
||||
}
|
||||
assert(curr_char == src->source.c_str());
|
||||
while(lex_one_token())
|
||||
;
|
||||
return std::move(nexts);
|
||||
bool eof = false;
|
||||
while(!eof) {
|
||||
Error* err = lex_one_token(&eof);
|
||||
if(err) return err;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void Lexer::from_precompiled() {
|
||||
TokenDeserializer deserializer(src->source.c_str());
|
||||
deserializer.curr += 5; // skip "pkpy:"
|
||||
std::string_view version = deserializer.read_string('\n');
|
||||
|
||||
IntParsingResult parse_uint(std::string_view text, i64* out, int base) {
|
||||
assert(version == PK_VERSION);
|
||||
assert(deserializer.read_uint('\n') == (i64)src->mode);
|
||||
|
||||
int count = deserializer.read_count();
|
||||
vector<Str>& precompiled_tokens = src->_precompiled_tokens;
|
||||
for(int i = 0; i < count; i++) {
|
||||
precompiled_tokens.push_back(deserializer.read_string('\n'));
|
||||
}
|
||||
|
||||
count = deserializer.read_count();
|
||||
for(int i = 0; i < count; i++) {
|
||||
Token t;
|
||||
t.type = (unsigned char)deserializer.read_uint(',');
|
||||
if(is_raw_string_used(t.type)) {
|
||||
i64 index = deserializer.read_uint(',');
|
||||
t.start = precompiled_tokens[index].c_str();
|
||||
t.length = precompiled_tokens[index].size;
|
||||
} else {
|
||||
t.start = nullptr;
|
||||
t.length = 0;
|
||||
}
|
||||
|
||||
if(deserializer.match_char(',')) {
|
||||
t.line = nexts.back().line;
|
||||
} else {
|
||||
t.line = (int)deserializer.read_uint(',');
|
||||
}
|
||||
|
||||
if(deserializer.match_char(',')) {
|
||||
t.brackets_level = nexts.back().brackets_level;
|
||||
} else {
|
||||
t.brackets_level = (int)deserializer.read_uint(',');
|
||||
}
|
||||
|
||||
char type = deserializer.read_char();
|
||||
switch(type) {
|
||||
case 'I': t.value = deserializer.read_uint('\n'); break;
|
||||
case 'F': t.value = deserializer.read_float('\n'); break;
|
||||
case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
|
||||
default: t.value = {}; break;
|
||||
}
|
||||
nexts.push_back(t);
|
||||
}
|
||||
}
|
||||
|
||||
Error* Lexer::precompile(Str* out) {
|
||||
assert(!src->is_precompiled);
|
||||
Error* err = run();
|
||||
if(err) return err;
|
||||
SStream ss;
|
||||
ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
|
||||
ss << (int)src->mode << '\n'; // L2: mode
|
||||
|
||||
small_map<std::string_view, int> token_indices;
|
||||
for(auto token: nexts) {
|
||||
if(is_raw_string_used(token.type)) {
|
||||
if(!token_indices.contains(token.sv())) {
|
||||
token_indices.insert(token.sv(), 0);
|
||||
// assert no '\n' in token.sv()
|
||||
for(char c: token.sv())
|
||||
assert(c != '\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
ss << "=" << (int)token_indices.size() << '\n'; // L3: raw string count
|
||||
int index = 0;
|
||||
for(auto& kv: token_indices) {
|
||||
ss << kv.first << '\n'; // L4: raw strings
|
||||
kv.second = index++;
|
||||
}
|
||||
|
||||
ss << "=" << (int)nexts.size() << '\n'; // L5: token count
|
||||
for(int i = 0; i < nexts.size(); i++) {
|
||||
const Token& token = nexts[i];
|
||||
ss << (int)token.type << ',';
|
||||
if(is_raw_string_used(token.type)) { ss << token_indices[token.sv()] << ','; }
|
||||
if(i > 0 && nexts[i - 1].line == token.line)
|
||||
ss << ',';
|
||||
else
|
||||
ss << token.line << ',';
|
||||
if(i > 0 && nexts[i - 1].brackets_level == token.brackets_level)
|
||||
ss << ',';
|
||||
else
|
||||
ss << token.brackets_level << ',';
|
||||
// visit token value
|
||||
std::visit(
|
||||
[&ss](auto&& arg) {
|
||||
using T = std::decay_t<decltype(arg)>;
|
||||
if constexpr(std::is_same_v<T, i64>) {
|
||||
ss << 'I' << arg;
|
||||
} else if constexpr(std::is_same_v<T, f64>) {
|
||||
ss << 'F' << arg;
|
||||
} else if constexpr(std::is_same_v<T, Str>) {
|
||||
ss << 'S';
|
||||
for(char c: arg)
|
||||
ss.write_hex((unsigned char)c);
|
||||
}
|
||||
ss << '\n';
|
||||
},
|
||||
token.value);
|
||||
}
|
||||
*out = ss.str();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::string_view TokenDeserializer::read_string(char c) {
|
||||
const char* start = curr;
|
||||
while(*curr != c)
|
||||
curr++;
|
||||
std::string_view retval(start, curr - start);
|
||||
curr++; // skip the delimiter
|
||||
return retval;
|
||||
}
|
||||
|
||||
Str TokenDeserializer::read_string_from_hex(char c) {
|
||||
std::string_view s = read_string(c);
|
||||
char* buffer = (char*)std::malloc(s.size() / 2 + 1);
|
||||
for(int i = 0; i < s.size(); i += 2) {
|
||||
char c = 0;
|
||||
if(s[i] >= '0' && s[i] <= '9')
|
||||
c += s[i] - '0';
|
||||
else if(s[i] >= 'a' && s[i] <= 'f')
|
||||
c += s[i] - 'a' + 10;
|
||||
else
|
||||
assert(false);
|
||||
c <<= 4;
|
||||
if(s[i + 1] >= '0' && s[i + 1] <= '9')
|
||||
c += s[i + 1] - '0';
|
||||
else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
|
||||
c += s[i + 1] - 'a' + 10;
|
||||
else
|
||||
assert(false);
|
||||
buffer[i / 2] = c;
|
||||
}
|
||||
buffer[s.size() / 2] = 0;
|
||||
return pair<char*, int>(buffer, s.size() / 2);
|
||||
}
|
||||
|
||||
int TokenDeserializer::read_count() {
|
||||
assert(*curr == '=');
|
||||
curr++;
|
||||
return read_uint('\n');
|
||||
}
|
||||
|
||||
i64 TokenDeserializer::read_uint(char c) {
|
||||
i64 out = 0;
|
||||
while(*curr != c) {
|
||||
out = out * 10 + (*curr - '0');
|
||||
curr++;
|
||||
}
|
||||
curr++; // skip the delimiter
|
||||
return out;
|
||||
}
|
||||
|
||||
f64 TokenDeserializer::read_float(char c) {
|
||||
std::string_view sv = read_string(c);
|
||||
return std::stod(std::string(sv));
|
||||
}
|
||||
|
||||
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{
|
||||
*out = 0;
|
||||
|
||||
if(base == -1) {
|
||||
|
@ -1455,7 +1455,7 @@ void VM::__raise_exc(bool re_raise) {
|
||||
int current_line = frame->co->lines[actual_ip].lineno; // current line
|
||||
auto current_f_name = frame->co->name.sv(); // current function name
|
||||
if(frame->_callable == nullptr) current_f_name = ""; // not in a function
|
||||
e.st_push(frame->co->src, current_line, nullptr, current_f_name);
|
||||
e.st_push(frame->co->src.get(), current_line, nullptr, current_f_name);
|
||||
|
||||
if(next_ip >= 0) {
|
||||
throw InternalException(InternalExceptionType::Handled, next_ip);
|
||||
|
@ -25,7 +25,7 @@ SourceData::SourceData(const Str& filename, CompileMode mode) : filename(filenam
|
||||
line_starts.push_back(this->source.c_str());
|
||||
}
|
||||
|
||||
std::pair<const char*, const char*> SourceData::_get_line(int lineno) const {
|
||||
pair<const char*, const char*> SourceData::_get_line(int lineno) const {
|
||||
if(is_precompiled || lineno == -1) return {nullptr, nullptr};
|
||||
lineno -= 1;
|
||||
if(lineno < 0) lineno = 0;
|
||||
@ -49,7 +49,7 @@ Str SourceData::snapshot(int lineno, const char* cursor, std::string_view name)
|
||||
if(!name.empty()) ss << ", in " << name;
|
||||
if(!is_precompiled) {
|
||||
ss << '\n';
|
||||
std::pair<const char*, const char*> pair = _get_line(lineno);
|
||||
pair<const char*, const char*> pair = _get_line(lineno);
|
||||
Str line = "<?>";
|
||||
int removed_spaces = 0;
|
||||
if(pair.first && pair.second) {
|
||||
|
@ -1723,22 +1723,29 @@ void VM::__post_init_builtin_types() {
|
||||
|
||||
CodeObject_ VM::compile(std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope) {
|
||||
Compiler compiler(this, source, filename, mode, unknown_global_scope);
|
||||
try {
|
||||
return compiler.compile();
|
||||
} catch(TopLevelException e) {
|
||||
_error(e.ptr->self());
|
||||
return nullptr;
|
||||
}
|
||||
CodeObject_ code;
|
||||
Error* err = compiler.compile(&code);
|
||||
if(err) __compile_error(err);
|
||||
return code;
|
||||
}
|
||||
|
||||
void VM::__compile_error(Error* err){
|
||||
assert(err != nullptr);
|
||||
__last_exception = vm->call(
|
||||
vm->builtins->attr(err->type),
|
||||
VAR((const char*)err->msg)
|
||||
).get();
|
||||
Exception& e = __last_exception->as<Exception>();
|
||||
e.st_push(err->src, err->lineno, err->cursor, "");
|
||||
_error(__last_exception);
|
||||
}
|
||||
|
||||
Str VM::precompile(std::string_view source, const Str& filename, CompileMode mode) {
|
||||
Compiler compiler(this, source, filename, mode, false);
|
||||
try {
|
||||
return compiler.precompile();
|
||||
} catch(TopLevelException e) {
|
||||
_error(e.ptr->self());
|
||||
return nullptr;
|
||||
}
|
||||
Str out;
|
||||
Error* err = compiler.lexer.precompile(&out);
|
||||
if(err) __compile_error(err);
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace pkpy
|
||||
|
Loading…
x
Reference in New Issue
Block a user