use error code

This commit is contained in:
blueloveTH 2024-06-09 01:39:05 +08:00
parent 783547a481
commit 81c4853f04
19 changed files with 1047 additions and 975 deletions

View File

@ -15,6 +15,6 @@ struct GIL {
#define PK_GLOBAL_SCOPE_LOCK() GIL _lock;
#else
#define PK_THREAD_LOCAL
#define PK_THREAD_LOCAL static
#define PK_GLOBAL_SCOPE_LOCK()
#endif

View File

@ -25,7 +25,7 @@ struct Str {
Str(std::string_view s);
Str(const char* s);
Str(const char* s, int len);
Str(std::pair<char*, int>);
Str(pair<char*, int>); // take ownership
Str(const Str& other);
Str(Str&& other);

View File

@ -15,6 +15,13 @@ struct explicit_copy_t {
explicit explicit_copy_t() = default;
};
template <typename K, typename V>
struct pair {
K first;
V second;
pair(K first, V second) : first(first), second(second) {}
};
// Dummy types
struct DummyInstance {};

View File

@ -13,6 +13,8 @@ namespace pkpy {
template <typename T>
struct array {
static_assert(is_pod_v<T>);
T* _data;
int _size;
@ -39,10 +41,7 @@ struct array {
array(T* data, int size) : _data(data), _size(size) {}
array& operator= (array&& other) noexcept {
if(_data) {
std::destroy(begin(), end());
std::free(_data);
}
if(_data) std::free(_data);
_data = other._data;
_size = other._size;
other._data = nullptr;
@ -70,18 +69,15 @@ struct array {
T* data() const { return _data; }
std::pair<T*, int> detach() noexcept {
std::pair<T*, int> retval(_data, _size);
pair<T*, int> detach() noexcept {
pair<T*, int> retval(_data, _size);
_data = nullptr;
_size = 0;
return retval;
}
~array() {
if(_data) {
std::destroy(begin(), end());
std::free(_data);
}
if(_data) std::free(_data);
}
};
@ -260,8 +256,8 @@ struct vector {
return retval;
}
std::pair<T*, int> detach() noexcept {
std::pair<T*, int> retval(_data, _size);
pair<T*, int> detach() noexcept {
pair<T*, int> retval(_data, _size);
_data = nullptr;
_capacity = 0;
_size = 0;

View File

@ -1,11 +1,12 @@
#pragma once
#include "pocketpy/compiler/expr.hpp"
#include "pocketpy/objects/error.hpp"
namespace pkpy {
class Compiler;
typedef void (Compiler::*PrattCallback)();
struct Compiler;
typedef Error* (Compiler::*PrattCallback)() noexcept;
struct PrattRule {
PrattCallback prefix;
@ -13,7 +14,7 @@ struct PrattRule {
Precedence precedence;
};
class Compiler {
struct Compiler {
PK_ALWAYS_PASS_BY_POINTER(Compiler)
static PrattRule rules[kTokenCount];
@ -24,47 +25,41 @@ class Compiler {
bool unknown_global_scope; // for eval/exec() call
// for parsing token stream
int i = 0;
vector<Token> tokens;
const Token& prev() const { return tokens[i - 1]; }
const Token& tk(int i) const noexcept{ return lexer.nexts[i]; }
const Token& prev() const noexcept{ return tk(i - 1); }
const Token& curr() const noexcept{ return tk(i); }
const Token& next() const noexcept{ return tk(i + 1); }
const Token& curr() const { return tokens[i]; }
const Token& next() const { return tokens[i + 1]; }
const Token& err() const {
if(i >= tokens.size()) return prev();
const Token& err() const noexcept{
if(i >= lexer.nexts.size()) return prev();
return curr();
}
void advance(int delta = 1) { i += delta; }
void advance(int delta = 1) noexcept{ i += delta; }
CodeEmitContext* ctx() { return &contexts.back(); }
CodeEmitContext* ctx() noexcept{ return &contexts.back(); }
vector<Expr*>& s_expr() noexcept{ return ctx()->s_expr; }
CompileMode mode() const { return lexer.src->mode; }
CompileMode mode() const noexcept{ return lexer.src->mode; }
NameScope name_scope() const;
CodeObject_ push_global_context();
FuncDecl_ push_f_context(Str name);
void pop_context();
NameScope name_scope() const noexcept;
CodeObject_ push_global_context() noexcept;
FuncDecl_ push_f_context(Str name) noexcept;
static void init_pratt_rules();
bool match(TokenIndex expected);
void consume(TokenIndex expected);
bool match_newlines_repl();
bool match_newlines(bool repl_throw = false);
bool match_end_stmt();
void consume_end_stmt();
static void init_pratt_rules() noexcept;
bool match(TokenIndex expected) noexcept;
bool match_newlines_repl() noexcept{ return match_newlines(mode() == REPL_MODE); }
bool match_newlines(bool repl_throw = false) noexcept;
bool match_end_stmt() noexcept;
/*************************************************/
void EXPR();
void EXPR_TUPLE(bool allow_slice = false);
Expr* EXPR_VARS(); // special case for `for loop` and `comp`
[[nodiscard]] Error* EXPR() noexcept{ return parse_expression(PREC_LOWEST + 1); }
[[nodiscard]] Error* EXPR_TUPLE(bool allow_slice = false) noexcept;
[[nodiscard]] Error* EXPR_VARS() noexcept; // special case for `for loop` and `comp`
template <typename T, typename... Args>
T* make_expr(Args&&... args) {
T* make_expr(Args&&... args) noexcept{
static_assert(sizeof(T) <= kPoolExprBlockSize);
static_assert(std::is_base_of_v<Expr, T>);
void* p = PoolExpr_alloc();
@ -73,87 +68,63 @@ class Compiler {
return expr;
}
void consume_comp(CompExpr* ce, Expr* expr);
[[nodiscard]] Error* consume_comp(Opcode op0, Opcode op1) noexcept;
[[nodiscard]] Error* pop_context() noexcept;
void exprLiteral();
void exprLong();
void exprImag();
void exprBytes();
void exprFString();
void exprLambda();
void exprOr();
void exprAnd();
void exprTernary();
void exprBinaryOp();
void exprNot();
void exprUnaryOp();
void exprGroup();
void exprList();
void exprMap();
void exprCall();
void exprName();
void exprAttrib();
void exprSlice0();
void exprSlice1();
void exprSubscr();
void exprLiteral0();
Error* exprLiteral() noexcept;
Error* exprLong() noexcept;
Error* exprImag() noexcept;
Error* exprBytes() noexcept;
Error* exprFString() noexcept;
Error* exprLambda() noexcept;
Error* exprOr() noexcept;
Error* exprAnd() noexcept;
Error* exprTernary() noexcept;
Error* exprBinaryOp() noexcept;
Error* exprNot() noexcept;
Error* exprUnaryOp() noexcept;
Error* exprGroup() noexcept;
Error* exprList() noexcept;
Error* exprMap() noexcept;
Error* exprCall() noexcept;
Error* exprName() noexcept;
Error* exprAttrib() noexcept;
Error* exprSlice0() noexcept;
Error* exprSlice1() noexcept;
Error* exprSubscr() noexcept;
Error* exprLiteral0() noexcept;
void compile_block_body(void (Compiler::*callback)() = nullptr);
void compile_normal_import();
void compile_from_import();
bool is_expression(bool allow_slice = false);
void parse_expression(int precedence, bool allow_slice = false);
void compile_if_stmt();
void compile_while_loop();
void compile_for_loop();
void compile_try_except();
void compile_decorated();
bool is_expression(bool allow_slice = false) noexcept;
bool try_compile_assignment();
void compile_stmt();
void consume_type_hints();
void _add_decorators(const Expr_vector& decorators);
void compile_class(const Expr_vector& decorators = {});
void _compile_f_args(FuncDecl_ decl, bool enable_type_hints);
void compile_function(const Expr_vector& decorators = {});
[[nodiscard]] Error* compile_block_body(PrattCallback callback = NULL) noexcept;
[[nodiscard]] Error* compile_normal_import() noexcept;
[[nodiscard]] Error* compile_from_import() noexcept;
[[nodiscard]] Error* parse_expression(int precedence, bool allow_slice = false) noexcept;
[[nodiscard]] Error* compile_if_stmt() noexcept;
[[nodiscard]] Error* compile_while_loop() noexcept;
[[nodiscard]] Error* compile_for_loop() noexcept;
[[nodiscard]] Error* compile_try_except() noexcept;
[[nodiscard]] Error* compile_decorated() noexcept;
PyVar to_object(const TokenValue& value);
PyVar read_literal();
[[nodiscard]] Error* try_compile_assignment(bool* is_assign) noexcept;
[[nodiscard]] Error* compile_stmt() noexcept;
[[nodiscard]] Error* consume_type_hints() noexcept;
[[nodiscard]] Error* _compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcept;
[[nodiscard]] Error* compile_function(int decorators = 0) noexcept;
[[nodiscard]] Error* compile_class(int decorators = 0) noexcept;
void SyntaxError(Str msg) { lexer.throw_err("SyntaxError", msg, err().line, err().start); }
PyVar to_object(const TokenValue& value) noexcept;
void SyntaxError() { lexer.throw_err("SyntaxError", "invalid syntax", err().line, err().start); }
[[nodiscard]] Error* read_literal(PyVar* out) noexcept;
void IndentationError(Str msg) { lexer.throw_err("IndentationError", msg, err().line, err().start); }
[[nodiscard]] Error* SyntaxError(const char* msg = "invalid syntax", ...) noexcept;
[[nodiscard]] Error* IndentationError(const char* msg) noexcept{ return lexer._error(false, "IndentationError", msg, {}); }
[[nodiscard]] Error* NeedMoreLines() noexcept{ return lexer._error(false, "NeedMoreLines", "", {}, (i64)ctx()->is_compiling_class); }
public:
Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope = false);
Str precompile();
void from_precompiled(const char* source);
CodeObject_ compile();
};
struct TokenDeserializer {
const char* curr;
const char* source;
TokenDeserializer(const char* source) : curr(source), source(source) {}
char read_char() { return *curr++; }
bool match_char(char c) {
if(*curr == c) {
curr++;
return true;
}
return false;
}
std::string_view read_string(char c);
Str read_string_from_hex(char c);
int read_count();
i64 read_uint(char c);
f64 read_float(char c);
Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope = false) noexcept;
[[nodiscard]] Error* compile(CodeObject_* out) noexcept;
~Compiler();
};
} // namespace pkpy

View File

@ -56,7 +56,7 @@ inline void delete_expr(Expr* p){
PoolExpr_dealloc(p);
}
struct CodeEmitContext {
struct CodeEmitContext{
VM* vm;
FuncDecl_ func; // optional
CodeObject_ co; // 1 CodeEmitContext <=> 1 CodeObject_
@ -72,21 +72,22 @@ struct CodeEmitContext {
small_map<PyVar, int> _co_consts_nonstring_dedup_map;
small_map<std::string_view, int> _co_consts_string_dedup_map;
int get_loop() const;
CodeBlock* enter_block(CodeBlockType type);
void exit_block();
void emit_expr(bool emit = true); // clear the expression stack and generate bytecode
int emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual = false);
void revert_last_emit_();
int emit_int(i64 value, int line);
void patch_jump(int index);
bool add_label(StrName name);
int add_varname(StrName name);
int add_const(PyVar);
int add_const_string(std::string_view);
int add_func_decl(FuncDecl_ decl);
void emit_store_name(NameScope scope, StrName name, int line);
void try_merge_for_iter_store(int);
int get_loop() const noexcept;
CodeBlock* enter_block(CodeBlockType type) noexcept;
void exit_block() noexcept;
void emit_expr(bool emit = true) noexcept; // clear the expression stack and generate bytecode
void emit_decorators(int count) noexcept;
int emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual = false) noexcept;
void revert_last_emit_() noexcept;
int emit_int(i64 value, int line) noexcept;
void patch_jump(int index) noexcept;
bool add_label(StrName name) noexcept;
int add_varname(StrName name) noexcept;
int add_const(PyVar) noexcept;
int add_const_string(std::string_view) noexcept;
int add_func_decl(FuncDecl_ decl) noexcept;
void emit_store_name(NameScope scope, StrName name, int line) noexcept;
void try_merge_for_iter_store(int) noexcept;
};
struct NameExpr : Expr {
@ -236,15 +237,14 @@ struct DictItemExpr : Expr {
};
struct SequenceExpr : Expr {
Expr_vector items;
array<Expr*> items;
SequenceExpr(Expr_vector&& items) : items(std::move(items)) {}
SequenceExpr(int count) : items(count) {}
virtual Opcode opcode() const = 0;
void emit_(CodeEmitContext* ctx) override {
for(auto& item: items)
item->emit_(ctx);
for(auto& item: items) item->emit_(ctx);
ctx->emit_(opcode(), items.size(), line);
}
@ -308,8 +308,10 @@ struct CompExpr : Expr {
Expr* iter = nullptr; // loop iter
Expr* cond = nullptr; // optional if condition
virtual Opcode op0() = 0;
virtual Opcode op1() = 0;
Opcode op0;
Opcode op1;
CompExpr(Opcode op0, Opcode op1) : op0(op0), op1(op1) {}
void emit_(CodeEmitContext* ctx) override;
@ -321,24 +323,6 @@ struct CompExpr : Expr {
}
};
struct ListCompExpr : CompExpr {
Opcode op0() override { return OP_BUILD_LIST; }
Opcode op1() override { return OP_LIST_APPEND; }
};
struct DictCompExpr : CompExpr {
Opcode op0() override { return OP_BUILD_DICT; }
Opcode op1() override { return OP_DICT_ADD; }
};
struct SetCompExpr : CompExpr {
Opcode op0() override { return OP_BUILD_SET; }
Opcode op1() override { return OP_SET_ADD; }
};
struct LambdaExpr : Expr {
FuncDecl_ decl;
@ -391,7 +375,7 @@ struct CallExpr : Expr {
Expr* callable;
Expr_vector args;
// **a will be interpreted as a special keyword argument: {"**": a}
vector<std::pair<StrName, Expr*>> kwargs;
vector<pair<StrName, Expr*>> kwargs;
void emit_(CodeEmitContext* ctx) override;
~CallExpr() {

View File

@ -92,7 +92,7 @@ enum Precedence {
PREC_HIGHEST,
};
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
enum class StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
struct Lexer {
VM* vm;
@ -104,38 +104,38 @@ struct Lexer {
small_vector_2<int, 8> indents;
int brackets_level = 0;
char peekchar() const { return *curr_char; }
char peekchar() const noexcept { return *curr_char; }
bool match_n_chars(int n, char c0);
bool match_string(const char* s);
int eat_spaces();
bool match_n_chars(int n, char c0) noexcept;
bool match_string(const char* s) noexcept;
int eat_spaces() noexcept;
bool eat_indentation();
char eatchar();
char eatchar_include_newline();
int eat_name();
void skip_line_comment();
bool matchchar(char c);
void add_token(TokenIndex type, TokenValue value = {});
void add_token_2(char c, TokenIndex one, TokenIndex two);
Str eat_string_until(char quote, bool raw);
void eat_string(char quote, StringType type);
bool eat_indentation() noexcept;
char eatchar() noexcept;
char eatchar_include_newline() noexcept;
void skip_line_comment() noexcept;
bool matchchar(char c) noexcept;
void add_token(TokenIndex type, TokenValue value = {}) noexcept;
void add_token_2(char c, TokenIndex one, TokenIndex two) noexcept;
void eat_number();
bool lex_one_token();
[[nodiscard]] Error* eat_name() noexcept;
[[nodiscard]] Error* eat_string_until(char quote, bool raw, Str* out) noexcept;
[[nodiscard]] Error* eat_string(char quote, StringType type) noexcept;
[[nodiscard]] Error* eat_number() noexcept;
[[nodiscard]] Error* lex_one_token(bool* eof) noexcept;
/***** Error Reporter *****/
[[noreturn]] void throw_err(StrName type, Str msg);
[[noreturn]] void throw_err(StrName type, Str msg, int lineno, const char* cursor);
[[nodiscard]] Error* _error(bool lexer_err, const char* type, const char* msg, va_list args, i64 userdata=0) noexcept;
[[nodiscard]] Error* SyntaxError(const char* fmt, ...) noexcept;
[[nodiscard]] Error* IndentationError(const char* msg) noexcept { return _error(true, "IndentationError", msg, {}); }
[[nodiscard]] Error* NeedMoreLines() noexcept { return _error(true, "NeedMoreLines", "", {}, 0); }
[[noreturn]] void SyntaxError(Str msg) { throw_err("SyntaxError", msg); }
Lexer(VM* vm, std::shared_ptr<SourceData> src) noexcept;
[[nodiscard]] Error* run() noexcept;
[[noreturn]] void SyntaxError() { throw_err("SyntaxError", "invalid syntax"); }
[[noreturn]] void IndentationError(Str msg) { throw_err("IndentationError", msg); }
Lexer(VM* vm, std::shared_ptr<SourceData> src);
vector<Token> run();
void from_precompiled();
[[nodiscard]] Error* precompile(Str* out);
};
enum class IntParsingResult {
@ -144,6 +144,29 @@ enum class IntParsingResult {
Overflow,
};
IntParsingResult parse_uint(std::string_view text, i64* out, int base);
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
struct TokenDeserializer {
const char* curr;
const char* source;
TokenDeserializer(const char* source) : curr(source), source(source) {}
char read_char() { return *curr++; }
bool match_char(char c) {
if(*curr == c) {
curr++;
return true;
}
return false;
}
std::string_view read_string(char c);
Str read_string_from_hex(char c);
int read_count();
i64 read_uint(char c);
f64 read_float(char c);
};
} // namespace pkpy

View File

@ -36,11 +36,11 @@ struct VoidP {
static void _register(VM* vm, PyObject* mod, PyObject* type);
};
#define POINTER_VAR(Tp, NAME) \
inline PyVar py_var(VM* vm, Tp val) { \
const static std::pair<StrName, StrName> P("c", NAME); \
PyVar type = vm->_modules[P.first]->attr(P.second); \
return vm->new_object<VoidP>(type->as<Type>(), val); \
#define POINTER_VAR(Tp, NAME) \
inline PyVar py_var(VM* vm, Tp val) { \
const static pair<StrName, StrName> P("c", NAME); \
PyVar type = vm->_modules[P.first]->attr(P.second); \
return vm->new_object<VoidP>(type->as<Type>(), val); \
}
POINTER_VAR(char*, "char_p")

View File

@ -463,7 +463,6 @@ public:
vm->s_data.emplace(p->type, p);
}
#endif
// clang-format on
template <typename T>
Type _find_type_in_cxx_typeid_map() {
@ -500,31 +499,26 @@ public:
[[noreturn]] void __builtin_error(StrName type);
[[noreturn]] void __builtin_error(StrName type, PyVar arg);
[[noreturn]] void __builtin_error(StrName type, const Str& msg);
[[noreturn]] void __compile_error(Error* err);
void __init_builtin_types();
void __post_init_builtin_types();
void __push_varargs() {}
void __push_varargs(PyVar _0) { PUSH(_0); }
void __push_varargs(PyVar _0, PyVar _1) {
PUSH(_0);
PUSH(_1);
}
void __push_varargs(PyVar _0, PyVar _1, PyVar _2) {
PUSH(_0);
PUSH(_1);
PUSH(_2);
}
void __push_varargs(PyVar _0, PyVar _1, PyVar _2, PyVar _3) {
PUSH(_0);
PUSH(_1);
PUSH(_2);
PUSH(_3);
}
PyVar __pack_next_retval(unsigned);
PyVar __minmax_reduce(bool (VM::*op)(PyVar, PyVar), PyVar args, PyVar key);
bool __py_bool_non_trivial(PyVar);
@ -539,95 +533,26 @@ constexpr inline bool is_immutable_v =
std::is_same_v<T, Bytes> || std::is_same_v<T, bool> || std::is_same_v<T, Range> || std::is_same_v<T, Slice> ||
std::is_pointer_v<T> || std::is_enum_v<T>;
template <typename T>
constexpr Type _find_type_in_const_cxx_typeid_map() {
return Type();
}
template<typename T> constexpr Type _tp_builtin() { return Type(); }
template<> constexpr Type _tp_builtin<Str>() { return VM::tp_str; }
template<> constexpr Type _tp_builtin<List>() { return VM::tp_list; }
template<> constexpr Type _tp_builtin<Tuple>() { return VM::tp_tuple; }
template<> constexpr Type _tp_builtin<Function>() { return VM::tp_function; }
template<> constexpr Type _tp_builtin<NativeFunc>() { return VM::tp_native_func; }
template<> constexpr Type _tp_builtin<BoundMethod>() { return VM::tp_bound_method; }
template<> constexpr Type _tp_builtin<Range>() { return VM::tp_range; }
template<> constexpr Type _tp_builtin<Slice>() { return VM::tp_slice; }
template<> constexpr Type _tp_builtin<Exception>() { return VM::tp_exception; }
template<> constexpr Type _tp_builtin<Bytes>() { return VM::tp_bytes; }
template<> constexpr Type _tp_builtin<MappingProxy>() { return VM::tp_mappingproxy; }
template<> constexpr Type _tp_builtin<Dict>() { return VM::tp_dict; }
template<> constexpr Type _tp_builtin<Property>() { return VM::tp_property; }
template<> constexpr Type _tp_builtin<StarWrapper>() { return VM::tp_star_wrapper; }
template<> constexpr Type _tp_builtin<StaticMethod>() { return VM::tp_staticmethod; }
template<> constexpr Type _tp_builtin<ClassMethod>() { return VM::tp_classmethod; }
template<> constexpr Type _tp_builtin<StackMemory>() { return VM::tp_stack_memory; }
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Str>() {
return VM::tp_str;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<List>() {
return VM::tp_list;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Tuple>() {
return VM::tp_tuple;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Function>() {
return VM::tp_function;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<NativeFunc>() {
return VM::tp_native_func;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<BoundMethod>() {
return VM::tp_bound_method;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Range>() {
return VM::tp_range;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Slice>() {
return VM::tp_slice;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Exception>() {
return VM::tp_exception;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Bytes>() {
return VM::tp_bytes;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<MappingProxy>() {
return VM::tp_mappingproxy;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Dict>() {
return VM::tp_dict;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<Property>() {
return VM::tp_property;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<StarWrapper>() {
return VM::tp_star_wrapper;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<StaticMethod>() {
return VM::tp_staticmethod;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<ClassMethod>() {
return VM::tp_classmethod;
}
template <>
constexpr Type _find_type_in_const_cxx_typeid_map<StackMemory>() {
return VM::tp_stack_memory;
}
// clang-format on
template <typename __T>
PyVar py_var(VM* vm, __T&& value) {
@ -654,7 +579,7 @@ PyVar py_var(VM* vm, __T&& value) {
} else if constexpr(std::is_pointer_v<T>) {
return from_void_p(vm, (void*)value);
} else {
constexpr Type const_type = _find_type_in_const_cxx_typeid_map<T>();
constexpr Type const_type = _tp_builtin<T>();
if constexpr((bool)const_type) {
if constexpr(is_sso_v<T>)
return PyVar(const_type, value);
@ -715,7 +640,7 @@ __T _py_cast__internal(VM* vm, PyVar obj) {
static_assert(!std::is_reference_v<__T>);
return to_void_p<T>(vm, obj);
} else {
constexpr Type const_type = _find_type_in_const_cxx_typeid_map<T>();
constexpr Type const_type = _tp_builtin<T>();
if constexpr((bool)const_type) {
if constexpr(with_check) {
if constexpr(std::is_same_v<T, Exception>) {

View File

@ -33,14 +33,14 @@ struct Exception {
PyObject* _self; // weak reference
struct Frame {
std::shared_ptr<SourceData> src;
SourceData* src; // weak ref
int lineno;
const char* cursor;
std::string name;
Str snapshot() const { return src->snapshot(lineno, cursor, name); }
Frame(std::shared_ptr<SourceData> src, int lineno, const char* cursor, std::string_view name) :
Frame(SourceData* src, int lineno, const char* cursor, std::string_view name) :
src(src), lineno(lineno), cursor(cursor), name(name) {}
};
@ -77,4 +77,13 @@ struct TopLevelException : std::exception {
}
};
struct Error{
const char* type;
SourceData* src;
int lineno;
const char* cursor;
char msg[100];
i64 userdata;
};
} // namespace pkpy

View File

@ -10,9 +10,9 @@ namespace pkpy {
struct NameDict {
PK_ALWAYS_PASS_BY_POINTER(NameDict)
using Item = std::pair<StrName, PyVar>;
using Item = pair<StrName, PyVar>;
constexpr static uint16_t kInitialCapacity = 16;
static_assert(is_pod_v<PyVar>);
float _load_factor;
uint16_t _size;

View File

@ -21,7 +21,7 @@ struct SourceData {
SourceData(std::string_view source, const Str& filename, CompileMode mode);
SourceData(const Str& filename, CompileMode mode);
std::pair<const char*, const char*> _get_line(int lineno) const;
pair<const char*, const char*> _get_line(int lineno) const;
std::string_view get_line(int lineno) const;
Str snapshot(int lineno, const char* cursor, std::string_view name) const;
};

View File

@ -52,7 +52,7 @@ Str::Str(int size, bool is_ascii) :
Str::Str(const char* s, int len) :
size(len), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
Str::Str(std::pair<char*, int> detached) : size(detached.second), is_ascii(true) {
Str::Str(pair<char*, int> detached) : size(detached.second), is_ascii(true) {
this->data = detached.first;
for(int i = 0; i < size; i++) {
if(!isascii(data[i])) {

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,7 @@ inline bool is_identifier(std::string_view s) {
inline bool is_small_int(i64 value) { return value >= INT16_MIN && value <= INT16_MAX; }
int CodeEmitContext::get_loop() const {
int CodeEmitContext::get_loop() const noexcept{
int index = curr_iblock;
while(index >= 0) {
if(co->blocks[index].type == CodeBlockType::FOR_LOOP) break;
@ -23,13 +23,13 @@ int CodeEmitContext::get_loop() const {
return index;
}
CodeBlock* CodeEmitContext::enter_block(CodeBlockType type) {
CodeBlock* CodeEmitContext::enter_block(CodeBlockType type) noexcept{
co->blocks.push_back(CodeBlock(type, curr_iblock, (int)co->codes.size()));
curr_iblock = co->blocks.size() - 1;
return &co->blocks[curr_iblock];
}
void CodeEmitContext::exit_block() {
void CodeEmitContext::exit_block() noexcept{
auto curr_type = co->blocks[curr_iblock].type;
co->blocks[curr_iblock].end = co->codes.size();
curr_iblock = co->blocks[curr_iblock].parent;
@ -41,14 +41,27 @@ void CodeEmitContext::exit_block() {
}
// clear the expression stack and generate bytecode
void CodeEmitContext::emit_expr(bool emit) {
assert(s_expr.size() == 1);
void CodeEmitContext::emit_expr(bool emit) noexcept{
// assert(s_expr.size() == 1);
Expr* e = s_expr.popx_back();
if(emit) e->emit_(this);
delete_expr(e);
}
int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual) {
void CodeEmitContext::emit_decorators(int count) noexcept{
// [obj]
for(int i=0; i<count; i++) {
Expr* deco = s_expr.popx_back();
deco->emit_(this); // [obj, f]
emit_(OP_ROT_TWO, BC_NOARG, deco->line); // [f, obj]
emit_(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); // [f, obj, NULL]
emit_(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); // [obj, NULL, f]
emit_(OP_CALL, 1, deco->line); // [obj]
delete_expr(deco);
}
}
int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtual) noexcept{
co->codes.push_back(Bytecode{(uint8_t)opcode, arg});
co->lines.push_back(CodeObject::LineInfo{line, is_virtual, curr_iblock});
int i = co->codes.size() - 1;
@ -61,12 +74,12 @@ int CodeEmitContext::emit_(Opcode opcode, uint16_t arg, int line, bool is_virtua
return i;
}
void CodeEmitContext::revert_last_emit_() {
void CodeEmitContext::revert_last_emit_() noexcept{
co->codes.pop_back();
co->lines.pop_back();
}
void CodeEmitContext::try_merge_for_iter_store(int i) {
void CodeEmitContext::try_merge_for_iter_store(int i) noexcept{
// [FOR_ITER, STORE_?, ]
if(co->codes[i].op != OP_FOR_ITER) return;
if(co->codes.size() - i != 2) return;
@ -85,7 +98,7 @@ void CodeEmitContext::try_merge_for_iter_store(int i) {
}
}
int CodeEmitContext::emit_int(i64 value, int line) {
int CodeEmitContext::emit_int(i64 value, int line) noexcept{
if(is_small_int(value)) {
return emit_(OP_LOAD_SMALL_INT, (uint16_t)value, line);
} else {
@ -93,18 +106,18 @@ int CodeEmitContext::emit_int(i64 value, int line) {
}
}
void CodeEmitContext::patch_jump(int index) {
void CodeEmitContext::patch_jump(int index) noexcept{
int target = co->codes.size();
co->codes[index].set_signed_arg(target - index);
}
bool CodeEmitContext::add_label(StrName name) {
bool CodeEmitContext::add_label(StrName name) noexcept{
if(co->labels.contains(name)) return false;
co->labels.insert(name, co->codes.size());
return true;
}
int CodeEmitContext::add_varname(StrName name) {
int CodeEmitContext::add_varname(StrName name) noexcept{
// PK_MAX_CO_VARNAMES will be checked when pop_context(), not here
int index = co->varnames_inv.get(name, -1);
if(index >= 0) return index;
@ -115,7 +128,7 @@ int CodeEmitContext::add_varname(StrName name) {
return index;
}
int CodeEmitContext::add_const_string(std::string_view key) {
int CodeEmitContext::add_const_string(std::string_view key) noexcept{
int* val = _co_consts_string_dedup_map.try_get(key);
if(val) {
return *val;
@ -128,7 +141,7 @@ int CodeEmitContext::add_const_string(std::string_view key) {
}
}
int CodeEmitContext::add_const(PyVar v) {
int CodeEmitContext::add_const(PyVar v) noexcept{
assert(!is_type(v, VM::tp_str));
// non-string deduplication
int* val = _co_consts_nonstring_dedup_map.try_get(v);
@ -142,12 +155,12 @@ int CodeEmitContext::add_const(PyVar v) {
}
}
int CodeEmitContext::add_func_decl(FuncDecl_ decl) {
int CodeEmitContext::add_func_decl(FuncDecl_ decl) noexcept{
co->func_decls.push_back(decl);
return co->func_decls.size() - 1;
}
void CodeEmitContext::emit_store_name(NameScope scope, StrName name, int line) {
void CodeEmitContext::emit_store_name(NameScope scope, StrName name, int line) noexcept{
switch(scope) {
case NAME_LOCAL: emit_(OP_STORE_FAST, add_varname(name), line); break;
case NAME_GLOBAL: emit_(OP_STORE_GLOBAL, StrName(name).index, line); break;
@ -321,8 +334,8 @@ void DictItemExpr::emit_(CodeEmitContext* ctx) {
assert(key == nullptr);
value->emit_(ctx);
} else {
key->emit_(ctx);
value->emit_(ctx);
key->emit_(ctx); // reverse order
ctx->emit_(OP_BUILD_TUPLE, 2, line);
}
}
@ -378,7 +391,7 @@ bool TupleExpr::emit_del(CodeEmitContext* ctx) {
}
void CompExpr::emit_(CodeEmitContext* ctx) {
ctx->emit_(op0(), 0, line);
ctx->emit_(op0, 0, line);
iter->emit_(ctx);
ctx->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
ctx->enter_block(CodeBlockType::FOR_LOOP);
@ -392,11 +405,11 @@ void CompExpr::emit_(CodeEmitContext* ctx) {
cond->emit_(ctx);
int patch = ctx->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
expr->emit_(ctx);
ctx->emit_(op1(), BC_NOARG, BC_KEEPLINE);
ctx->emit_(op1, BC_NOARG, BC_KEEPLINE);
ctx->patch_jump(patch);
} else {
expr->emit_(ctx);
ctx->emit_(op1(), BC_NOARG, BC_KEEPLINE);
ctx->emit_(op1, BC_NOARG, BC_KEEPLINE);
}
ctx->emit_(OP_LOOP_CONTINUE, curr_iblock, BC_KEEPLINE);
ctx->exit_block();

View File

@ -1,4 +1,6 @@
#include "pocketpy/compiler/lexer.hpp"
#include "pocketpy/common/gil.hpp"
#include "pocketpy/common/version.h"
namespace pkpy {
@ -7,7 +9,7 @@ static const uint32_t kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,164
static const uint32_t kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101};
// clang-format on
static bool is_possible_number_char(char c) {
static bool is_possible_number_char(char c) noexcept{
switch(c) {
// clang-format off
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
@ -20,7 +22,7 @@ static bool is_possible_number_char(char c) {
}
}
static bool is_unicode_Lo_char(uint32_t c) {
static bool is_unicode_Lo_char(uint32_t c) noexcept{
// open a hole for carrot
if(c == U'🥕') return true;
auto index = std::lower_bound(kLoRangeA, kLoRangeA + 476, c) - kLoRangeA;
@ -30,7 +32,7 @@ static bool is_unicode_Lo_char(uint32_t c) {
return c >= kLoRangeA[index] && c <= kLoRangeB[index];
}
bool Lexer::match_n_chars(int n, char c0) {
bool Lexer::match_n_chars(int n, char c0) noexcept{
const char* c = curr_char;
for(int i = 0; i < n; i++) {
if(*c == '\0') return false;
@ -42,7 +44,7 @@ bool Lexer::match_n_chars(int n, char c0) {
return true;
}
bool Lexer::match_string(const char* s) {
bool Lexer::match_string(const char* s) noexcept{
int s_len = strlen(s);
bool ok = strncmp(curr_char, s, s_len) == 0;
if(ok)
@ -51,7 +53,7 @@ bool Lexer::match_string(const char* s) {
return ok;
}
int Lexer::eat_spaces() {
int Lexer::eat_spaces() noexcept{
int count = 0;
while(true) {
switch(peekchar()) {
@ -63,7 +65,7 @@ int Lexer::eat_spaces() {
}
}
bool Lexer::eat_indentation() {
bool Lexer::eat_indentation() noexcept{
if(brackets_level > 0) return true;
int spaces = eat_spaces();
if(peekchar() == '#') skip_line_comment();
@ -82,14 +84,14 @@ bool Lexer::eat_indentation() {
return true;
}
char Lexer::eatchar() {
char Lexer::eatchar() noexcept{
char c = peekchar();
assert(c != '\n'); // eatchar() cannot consume a newline
curr_char++;
return c;
}
char Lexer::eatchar_include_newline() {
char Lexer::eatchar_include_newline() noexcept{
char c = peekchar();
curr_char++;
if(c == '\n') {
@ -99,12 +101,12 @@ char Lexer::eatchar_include_newline() {
return c;
}
int Lexer::eat_name() {
Error* Lexer::eat_name() noexcept{
curr_char--;
while(true) {
unsigned char c = peekchar();
int u8bytes = utf8len(c, true);
if(u8bytes == 0) return 1;
if(u8bytes == 0) return SyntaxError("invalid char: %c", c);
if(u8bytes == 1) {
if(isalpha(c) || c == '_' || isdigit(c)) {
curr_char++;
@ -115,7 +117,7 @@ int Lexer::eat_name() {
}
// handle multibyte char
Str u8str(curr_char, u8bytes);
if(u8str.size != u8bytes) return 2;
if(u8str.size != u8bytes) return SyntaxError("invalid utf8 sequence: %s", u8str.c_str());
uint32_t value = 0;
for(int k = 0; k < u8bytes; k++) {
uint8_t b = u8str[k];
@ -137,7 +139,7 @@ int Lexer::eat_name() {
}
int length = (int)(curr_char - token_start);
if(length == 0) return 3;
if(length == 0) return SyntaxError("@id contains invalid char");
std::string_view name(token_start, length);
if(src->mode == JSON_MODE) {
@ -148,9 +150,9 @@ int Lexer::eat_name() {
} else if(name == "null") {
add_token(TK("None"));
} else {
return 4;
return SyntaxError("invalid JSON token");
}
return 0;
return NULL;
}
const auto KW_BEGIN = kTokens + TK("False");
@ -162,10 +164,10 @@ int Lexer::eat_name() {
} else {
add_token(TK("@id"));
}
return 0;
return NULL;
}
void Lexer::skip_line_comment() {
void Lexer::skip_line_comment() noexcept{
char c;
while((c = peekchar()) != '\0') {
if(c == '\n') return;
@ -173,13 +175,13 @@ void Lexer::skip_line_comment() {
}
}
bool Lexer::matchchar(char c) {
bool Lexer::matchchar(char c) noexcept{
if(peekchar() != c) return false;
eatchar_include_newline();
return true;
}
void Lexer::add_token(TokenIndex type, TokenValue value) {
void Lexer::add_token(TokenIndex type, TokenValue value) noexcept{
switch(type) {
case TK("{"):
case TK("["):
@ -213,14 +215,14 @@ void Lexer::add_token(TokenIndex type, TokenValue value) {
}
}
void Lexer::add_token_2(char c, TokenIndex one, TokenIndex two) {
void Lexer::add_token_2(char c, TokenIndex one, TokenIndex two) noexcept{
if(matchchar(c))
add_token(two);
else
add_token(one);
}
Str Lexer::eat_string_until(char quote, bool raw) {
Error* Lexer::eat_string_until(char quote, bool raw, Str* out) noexcept{
bool quote3 = match_n_chars(2, quote);
small_vector_2<char, 32> buff;
while(true) {
@ -233,12 +235,12 @@ Str Lexer::eat_string_until(char quote, bool raw) {
break;
}
if(c == '\0') {
if(quote3 && src->mode == REPL_MODE) { throw NeedMoreLines(false); }
SyntaxError("EOL while scanning string literal");
if(quote3 && src->mode == REPL_MODE) return NeedMoreLines();
return SyntaxError("EOL while scanning string literal");
}
if(c == '\n') {
if(!quote3)
SyntaxError("EOL while scanning string literal");
return SyntaxError("EOL while scanning string literal");
else {
buff.push_back(c);
continue;
@ -259,33 +261,37 @@ Str Lexer::eat_string_until(char quote, bool raw) {
char code;
try {
code = (char)std::stoi(hex, &parsed, 16);
} catch(...) { SyntaxError("invalid hex char"); }
if(parsed != 2) SyntaxError("invalid hex char");
} catch(...) {
return SyntaxError("invalid hex char");
}
if(parsed != 2) return SyntaxError("invalid hex char");
buff.push_back(code);
} break;
default: SyntaxError("invalid escape char");
default: return SyntaxError("invalid escape char");
}
} else {
buff.push_back(c);
}
}
return Str(buff.data(), buff.size());
*out = Str(buff.data(), buff.size());
return nullptr;
}
void Lexer::eat_string(char quote, StringType type) {
Str s = eat_string_until(quote, type == RAW_STRING);
if(type == F_STRING) {
Error* Lexer::eat_string(char quote, StringType type) noexcept{
Str s;
Error* err = eat_string_until(quote, type == StringType::RAW_STRING, &s);
if(err) return err;
if(type == StringType::F_STRING) {
add_token(TK("@fstr"), s);
return;
}
if(type == NORMAL_BYTES) {
}else if(type == StringType::NORMAL_BYTES) {
add_token(TK("@bytes"), s);
return;
}else{
add_token(TK("@str"), s);
}
add_token(TK("@str"), s);
return NULL;
}
void Lexer::eat_number() {
Error* Lexer::eat_number() noexcept{
const char* i = token_start;
while(is_possible_number_char(*i))
i++;
@ -305,13 +311,13 @@ void Lexer::eat_number() {
// try long
if(i[-1] == 'L') {
add_token(TK("@long"));
return;
return NULL;
}
// try integer
i64 int_out;
switch(parse_uint(text, &int_out, -1)) {
case IntParsingResult::Success: add_token(TK("@num"), int_out); return;
case IntParsingResult::Overflow: SyntaxError("int literal is too large"); return;
case IntParsingResult::Success: add_token(TK("@num"), int_out); return NULL;
case IntParsingResult::Overflow: return SyntaxError("int literal is too large");
case IntParsingResult::Failure: break; // do nothing
}
}
@ -321,54 +327,61 @@ void Lexer::eat_number() {
char* p_end;
try {
float_out = std::strtod(text.data(), &p_end);
} catch(...) { SyntaxError("invalid number literal"); }
} catch(...) {
return SyntaxError("invalid number literal");
}
if(p_end == text.data() + text.size()) {
add_token(TK("@num"), (f64)float_out);
return;
return NULL;
}
if(i[-1] == 'j' && p_end == text.data() + text.size() - 1) {
add_token(TK("@imag"), (f64)float_out);
return;
return NULL;
}
SyntaxError("invalid number literal");
return SyntaxError("invalid number literal");
}
bool Lexer::lex_one_token() {
Error* Lexer::lex_one_token(bool* eof) noexcept{
*eof = false;
while(peekchar() != '\0') {
token_start = curr_char;
char c = eatchar_include_newline();
switch(c) {
case '\'':
case '"': eat_string(c, NORMAL_STRING); return true;
case '"': {
Error* err = eat_string(c, StringType::NORMAL_STRING);
if(err) return err;
return NULL;
}
case '#': skip_line_comment(); break;
case '~': add_token(TK("~")); return true;
case '{': add_token(TK("{")); return true;
case '}': add_token(TK("}")); return true;
case ',': add_token(TK(",")); return true;
case ':': add_token(TK(":")); return true;
case ';': add_token(TK(";")); return true;
case '(': add_token(TK("(")); return true;
case ')': add_token(TK(")")); return true;
case '[': add_token(TK("[")); return true;
case ']': add_token(TK("]")); return true;
case '@': add_token(TK("@")); return true;
case '~': add_token(TK("~")); return NULL;
case '{': add_token(TK("{")); return NULL;
case '}': add_token(TK("}")); return NULL;
case ',': add_token(TK(",")); return NULL;
case ':': add_token(TK(":")); return NULL;
case ';': add_token(TK(";")); return NULL;
case '(': add_token(TK("(")); return NULL;
case ')': add_token(TK(")")); return NULL;
case '[': add_token(TK("[")); return NULL;
case ']': add_token(TK("]")); return NULL;
case '@': add_token(TK("@")); return NULL;
case '\\': {
// line continuation character
char c = eatchar_include_newline();
if(c != '\n') {
if(src->mode == REPL_MODE && c == '\0') throw NeedMoreLines(false);
SyntaxError("expected newline after line continuation character");
if(src->mode == REPL_MODE && c == '\0') return NeedMoreLines();
return SyntaxError("expected newline after line continuation character");
}
eat_spaces();
return true;
return NULL;
}
case '%': add_token_2('=', TK("%"), TK("%=")); return true;
case '&': add_token_2('=', TK("&"), TK("&=")); return true;
case '|': add_token_2('=', TK("|"), TK("|=")); return true;
case '^': add_token_2('=', TK("^"), TK("^=")); return true;
case '%': add_token_2('=', TK("%"), TK("%=")); return NULL;
case '&': add_token_2('=', TK("&"), TK("&=")); return NULL;
case '|': add_token_2('=', TK("|"), TK("|=")); return NULL;
case '^': add_token_2('=', TK("^"), TK("^=")); return NULL;
case '.': {
if(matchchar('.')) {
if(matchchar('.')) {
@ -379,21 +392,22 @@ bool Lexer::lex_one_token() {
} else {
char next_char = peekchar();
if(next_char >= '0' && next_char <= '9') {
eat_number();
Error* err = eat_number();
if(err) return err;
} else {
add_token(TK("."));
}
}
return true;
return NULL;
}
case '=': add_token_2('=', TK("="), TK("==")); return true;
case '=': add_token_2('=', TK("="), TK("==")); return NULL;
case '+':
if(matchchar('+')) {
add_token(TK("++"));
} else {
add_token_2('=', TK("+"), TK("+="));
}
return true;
return NULL;
case '>': {
if(matchchar('='))
add_token(TK(">="));
@ -401,7 +415,7 @@ bool Lexer::lex_one_token() {
add_token_2('=', TK(">>"), TK(">>="));
else
add_token(TK(">"));
return true;
return NULL;
}
case '<': {
if(matchchar('='))
@ -410,7 +424,7 @@ bool Lexer::lex_one_token() {
add_token_2('=', TK("<<"), TK("<<="));
else
add_token(TK("<"));
return true;
return NULL;
}
case '-': {
if(matchchar('-')) {
@ -423,13 +437,15 @@ bool Lexer::lex_one_token() {
else
add_token(TK("-"));
}
return true;
return NULL;
}
case '!':
if(matchchar('='))
if(matchchar('=')){
add_token(TK("!="));
else
SyntaxError("expected '=' after '!'");
}else{
Error* err = SyntaxError("expected '=' after '!'");
if(err) return err;
}
break;
case '*':
if(matchchar('*')) {
@ -437,63 +453,36 @@ bool Lexer::lex_one_token() {
} else {
add_token_2('=', TK("*"), TK("*="));
}
return true;
return NULL;
case '/':
if(matchchar('/')) {
add_token_2('=', TK("//"), TK("//="));
} else {
add_token_2('=', TK("/"), TK("/="));
}
return true;
return NULL;
case ' ':
case '\t': eat_spaces(); break;
case '\n': {
add_token(TK("@eol"));
if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level");
return true;
if(!eat_indentation()){
return IndentationError("unindent does not match any outer indentation level");
}
return NULL;
}
default: {
if(c == 'f') {
if(matchchar('\'')) {
eat_string('\'', F_STRING);
return true;
}
if(matchchar('"')) {
eat_string('"', F_STRING);
return true;
}
if(matchchar('\'')) return eat_string('\'', StringType::F_STRING);
if(matchchar('"')) return eat_string('"', StringType::F_STRING);
} else if(c == 'r') {
if(matchchar('\'')) {
eat_string('\'', RAW_STRING);
return true;
}
if(matchchar('"')) {
eat_string('"', RAW_STRING);
return true;
}
if(matchchar('\'')) return eat_string('\'', StringType::RAW_STRING);
if(matchchar('"')) return eat_string('"', StringType::RAW_STRING);
} else if(c == 'b') {
if(matchchar('\'')) {
eat_string('\'', NORMAL_BYTES);
return true;
}
if(matchchar('"')) {
eat_string('"', NORMAL_BYTES);
return true;
}
if(matchchar('\'')) return eat_string('\'', StringType::NORMAL_BYTES);
if(matchchar('"')) return eat_string('"', StringType::NORMAL_BYTES);
}
if(c >= '0' && c <= '9') {
eat_number();
return true;
}
switch(eat_name()) {
case 0: break;
case 1: SyntaxError("invalid char: " + std::string(1, c)); break;
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c)); break;
case 3: SyntaxError("@id contains invalid char"); break;
case 4: SyntaxError("invalid JSON token"); break;
default: assert(false);
}
return true;
if(c >= '0' && c <= '9') return eat_number();
return eat_name();
}
}
}
@ -502,38 +491,227 @@ bool Lexer::lex_one_token() {
while(indents.size() > 1) {
indents.pop_back();
add_token(TK("@dedent"));
return true;
return NULL;
}
add_token(TK("@eof"));
return false;
*eof = true;
return NULL;
}
void Lexer::throw_err(StrName type, Str msg) {
int lineno = current_line;
const char* cursor = curr_char;
if(peekchar() == '\n') {
lineno--;
cursor--;
Error* Lexer::_error(bool lexer_err, const char* type, const char* msg, va_list args, i64 userdata) noexcept{
PK_THREAD_LOCAL Error err;
err.type = type;
err.src = src.get();
if(lexer_err){
err.lineno = current_line;
err.cursor = curr_char;
if(*curr_char == '\n') {
err.lineno--;
err.cursor--;
}
}else{
err.lineno = -1;
err.cursor = NULL;
}
throw_err(type, msg, lineno, cursor);
vsnprintf(err.msg, sizeof(err.msg), msg, args);
err.userdata = userdata;
return &err;
}
Lexer::Lexer(VM* vm, std::shared_ptr<SourceData> src) : vm(vm), src(src) {
Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{
va_list args;
va_start(args, fmt);
Error* err = _error(true, "SyntaxError", fmt, args);
va_end(args);
return err;
}
Lexer::Lexer(VM* vm, std::shared_ptr<SourceData> src) noexcept : vm(vm), src(src){
this->token_start = src->source.c_str();
this->curr_char = src->source.c_str();
this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line, brackets_level, {}});
this->indents.push_back(0);
}
vector<Token> Lexer::run() {
Error* Lexer::run() noexcept{
if(src->is_precompiled) {
from_precompiled();
return NULL;
}
assert(curr_char == src->source.c_str());
while(lex_one_token())
;
return std::move(nexts);
bool eof = false;
while(!eof) {
Error* err = lex_one_token(&eof);
if(err) return err;
}
return NULL;
}
void Lexer::from_precompiled() {
TokenDeserializer deserializer(src->source.c_str());
deserializer.curr += 5; // skip "pkpy:"
std::string_view version = deserializer.read_string('\n');
IntParsingResult parse_uint(std::string_view text, i64* out, int base) {
assert(version == PK_VERSION);
assert(deserializer.read_uint('\n') == (i64)src->mode);
int count = deserializer.read_count();
vector<Str>& precompiled_tokens = src->_precompiled_tokens;
for(int i = 0; i < count; i++) {
precompiled_tokens.push_back(deserializer.read_string('\n'));
}
count = deserializer.read_count();
for(int i = 0; i < count; i++) {
Token t;
t.type = (unsigned char)deserializer.read_uint(',');
if(is_raw_string_used(t.type)) {
i64 index = deserializer.read_uint(',');
t.start = precompiled_tokens[index].c_str();
t.length = precompiled_tokens[index].size;
} else {
t.start = nullptr;
t.length = 0;
}
if(deserializer.match_char(',')) {
t.line = nexts.back().line;
} else {
t.line = (int)deserializer.read_uint(',');
}
if(deserializer.match_char(',')) {
t.brackets_level = nexts.back().brackets_level;
} else {
t.brackets_level = (int)deserializer.read_uint(',');
}
char type = deserializer.read_char();
switch(type) {
case 'I': t.value = deserializer.read_uint('\n'); break;
case 'F': t.value = deserializer.read_float('\n'); break;
case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
default: t.value = {}; break;
}
nexts.push_back(t);
}
}
Error* Lexer::precompile(Str* out) {
assert(!src->is_precompiled);
Error* err = run();
if(err) return err;
SStream ss;
ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
ss << (int)src->mode << '\n'; // L2: mode
small_map<std::string_view, int> token_indices;
for(auto token: nexts) {
if(is_raw_string_used(token.type)) {
if(!token_indices.contains(token.sv())) {
token_indices.insert(token.sv(), 0);
// assert no '\n' in token.sv()
for(char c: token.sv())
assert(c != '\n');
}
}
}
ss << "=" << (int)token_indices.size() << '\n'; // L3: raw string count
int index = 0;
for(auto& kv: token_indices) {
ss << kv.first << '\n'; // L4: raw strings
kv.second = index++;
}
ss << "=" << (int)nexts.size() << '\n'; // L5: token count
for(int i = 0; i < nexts.size(); i++) {
const Token& token = nexts[i];
ss << (int)token.type << ',';
if(is_raw_string_used(token.type)) { ss << token_indices[token.sv()] << ','; }
if(i > 0 && nexts[i - 1].line == token.line)
ss << ',';
else
ss << token.line << ',';
if(i > 0 && nexts[i - 1].brackets_level == token.brackets_level)
ss << ',';
else
ss << token.brackets_level << ',';
// visit token value
std::visit(
[&ss](auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr(std::is_same_v<T, i64>) {
ss << 'I' << arg;
} else if constexpr(std::is_same_v<T, f64>) {
ss << 'F' << arg;
} else if constexpr(std::is_same_v<T, Str>) {
ss << 'S';
for(char c: arg)
ss.write_hex((unsigned char)c);
}
ss << '\n';
},
token.value);
}
*out = ss.str();
return NULL;
}
std::string_view TokenDeserializer::read_string(char c) {
const char* start = curr;
while(*curr != c)
curr++;
std::string_view retval(start, curr - start);
curr++; // skip the delimiter
return retval;
}
Str TokenDeserializer::read_string_from_hex(char c) {
std::string_view s = read_string(c);
char* buffer = (char*)std::malloc(s.size() / 2 + 1);
for(int i = 0; i < s.size(); i += 2) {
char c = 0;
if(s[i] >= '0' && s[i] <= '9')
c += s[i] - '0';
else if(s[i] >= 'a' && s[i] <= 'f')
c += s[i] - 'a' + 10;
else
assert(false);
c <<= 4;
if(s[i + 1] >= '0' && s[i + 1] <= '9')
c += s[i + 1] - '0';
else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
c += s[i + 1] - 'a' + 10;
else
assert(false);
buffer[i / 2] = c;
}
buffer[s.size() / 2] = 0;
return pair<char*, int>(buffer, s.size() / 2);
}
int TokenDeserializer::read_count() {
assert(*curr == '=');
curr++;
return read_uint('\n');
}
i64 TokenDeserializer::read_uint(char c) {
i64 out = 0;
while(*curr != c) {
out = out * 10 + (*curr - '0');
curr++;
}
curr++; // skip the delimiter
return out;
}
f64 TokenDeserializer::read_float(char c) {
std::string_view sv = read_string(c);
return std::stod(std::string(sv));
}
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{
*out = 0;
if(base == -1) {

View File

@ -1455,7 +1455,7 @@ void VM::__raise_exc(bool re_raise) {
int current_line = frame->co->lines[actual_ip].lineno; // current line
auto current_f_name = frame->co->name.sv(); // current function name
if(frame->_callable == nullptr) current_f_name = ""; // not in a function
e.st_push(frame->co->src, current_line, nullptr, current_f_name);
e.st_push(frame->co->src.get(), current_line, nullptr, current_f_name);
if(next_ip >= 0) {
throw InternalException(InternalExceptionType::Handled, next_ip);

View File

@ -25,7 +25,7 @@ SourceData::SourceData(const Str& filename, CompileMode mode) : filename(filenam
line_starts.push_back(this->source.c_str());
}
std::pair<const char*, const char*> SourceData::_get_line(int lineno) const {
pair<const char*, const char*> SourceData::_get_line(int lineno) const {
if(is_precompiled || lineno == -1) return {nullptr, nullptr};
lineno -= 1;
if(lineno < 0) lineno = 0;
@ -49,7 +49,7 @@ Str SourceData::snapshot(int lineno, const char* cursor, std::string_view name)
if(!name.empty()) ss << ", in " << name;
if(!is_precompiled) {
ss << '\n';
std::pair<const char*, const char*> pair = _get_line(lineno);
pair<const char*, const char*> pair = _get_line(lineno);
Str line = "<?>";
int removed_spaces = 0;
if(pair.first && pair.second) {

View File

@ -1723,22 +1723,29 @@ void VM::__post_init_builtin_types() {
CodeObject_ VM::compile(std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope) {
Compiler compiler(this, source, filename, mode, unknown_global_scope);
try {
return compiler.compile();
} catch(TopLevelException e) {
_error(e.ptr->self());
return nullptr;
}
CodeObject_ code;
Error* err = compiler.compile(&code);
if(err) __compile_error(err);
return code;
}
void VM::__compile_error(Error* err){
assert(err != nullptr);
__last_exception = vm->call(
vm->builtins->attr(err->type),
VAR((const char*)err->msg)
).get();
Exception& e = __last_exception->as<Exception>();
e.st_push(err->src, err->lineno, err->cursor, "");
_error(__last_exception);
}
Str VM::precompile(std::string_view source, const Str& filename, CompileMode mode) {
Compiler compiler(this, source, filename, mode, false);
try {
return compiler.precompile();
} catch(TopLevelException e) {
_error(e.ptr->self());
return nullptr;
}
Str out;
Error* err = compiler.lexer.precompile(&out);
if(err) __compile_error(err);
return out;
}
} // namespace pkpy