diff --git a/include/pocketpy/common/memorypool.h b/include/pocketpy/common/memorypool.h index 10b4a20d..a8cc88f8 100644 --- a/include/pocketpy/common/memorypool.h +++ b/include/pocketpy/common/memorypool.h @@ -11,8 +11,8 @@ extern "C" { #define kPoolObjectArenaSize (256*1024) #define kPoolObjectMaxBlocks (kPoolObjectArenaSize / kPoolObjectBlockSize) -void Pools_initialize(); -void Pools_finalize(); +void pk_MemoryPools__initialize(); +void pk_MemoryPools__finalize(); void* PoolExpr_alloc(); void PoolExpr_dealloc(void*); diff --git a/include/pocketpy/compiler/compiler.h b/include/pocketpy/compiler/compiler.h index 8622a422..12df28d6 100644 --- a/include/pocketpy/compiler/compiler.h +++ b/include/pocketpy/compiler/compiler.h @@ -2,13 +2,16 @@ #include "pocketpy/common/vector.h" #include "pocketpy/compiler/lexer.h" +#include "pocketpy/objects/sourcedata.h" +#include "pocketpy/objects/codeobject.h" #ifdef __cplusplus extern "C" { #endif -Error* pk_compile(pk_SourceData_ src); - +Error* pk_compile(pk_SourceData_ src, CodeObject* out); +void pk_Compiler__initialize(); +#define pk_Compiler__finalize() // do nothing #ifdef __cplusplus } diff --git a/include/pocketpy/compiler/expr.h b/include/pocketpy/compiler/expr.h index e20cfae6..1ec5065c 100644 --- a/include/pocketpy/compiler/expr.h +++ b/include/pocketpy/compiler/expr.h @@ -1,52 +1,63 @@ -// #pragma once +#pragma once -// #include -// #include "pocketpy/common/memorypool.h" -// #include "pocketpy/compiler/lexer.h" +#include +#include "pocketpy/common/memorypool.h" +#include "pocketpy/compiler/lexer.h" +#include "pocketpy/objects/codeobject.h" -// #ifdef __cplusplus -// extern "C" { -// #endif +#ifdef __cplusplus +extern "C" { +#endif -// struct pk_Expr; -// struct pk_CodeEmitContext; +typedef struct pk_Expr pk_Expr; +typedef struct pk_CodeEmitContext pk_CodeEmitContext; -// struct pk_ExprVt{ -// void (*dtor)(pk_Expr*); -// /* reflections */ -// bool (*is_literal)(const pk_Expr*); -// bool (*is_json_object)(const pk_Expr*); -// bool (*is_attrib)(const pk_Expr*); -// bool (*is_subscr)(const pk_Expr*); -// bool (*is_compare)(const pk_Expr*); -// int (*star_level)(const pk_Expr*); -// bool (*is_tuple)(const pk_Expr*); -// bool (*is_name)(const pk_Expr*); -// /* emit */ -// void (*emit_)(pk_Expr*, pk_CodeEmitContext*); -// bool (*emit_del)(pk_Expr*, pk_CodeEmitContext*); -// bool (*emit_store)(pk_Expr*, pk_CodeEmitContext*); -// void (*emit_inplace)(pk_Expr*, pk_CodeEmitContext*); -// bool (*emit_store_inplace)(pk_Expr*, pk_CodeEmitContext*); -// }; +typedef struct pk_ExprVt{ + void (*dtor)(pk_Expr*); + /* reflections */ + bool (*is_literal)(const pk_Expr*); + bool (*is_json_object)(const pk_Expr*); + bool (*is_attrib)(const pk_Expr*); + bool (*is_subscr)(const pk_Expr*); + bool (*is_compare)(const pk_Expr*); + int (*star_level)(const pk_Expr*); + bool (*is_tuple)(const pk_Expr*); + bool (*is_name)(const pk_Expr*); + /* emit */ + void (*emit_)(pk_Expr*, pk_CodeEmitContext*); + bool (*emit_del)(pk_Expr*, pk_CodeEmitContext*); + bool (*emit_store)(pk_Expr*, pk_CodeEmitContext*); + void (*emit_inplace)(pk_Expr*, pk_CodeEmitContext*); + bool (*emit_store_inplace)(pk_Expr*, pk_CodeEmitContext*); +} pk_ExprVt; -// typedef struct pk_Expr{ -// pk_ExprVt* vt; -// int line; -// } pk_Expr; +typedef struct pk_Expr{ + pk_ExprVt* vt; + int line; +} pk_Expr; -// void pk_ExprVt__ctor(pk_ExprVt* vt); -// void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx); -// bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx); -// bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx); -// void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx); -// bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx); -// void pk_Expr__delete(pk_Expr* self); +void pk_ExprVt__ctor(pk_ExprVt* vt); +void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx); +bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx); +bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx); +void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx); +bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx); +void pk_Expr__delete(pk_Expr* self); -// typedef struct pk_CodeEmitContext{ +typedef struct pk_CodeEmitContext{ + CodeObject* co; // 1 CodeEmitContext <=> 1 CodeObject* + FuncDecl* func; // optional, weakref + int level; + int curr_iblock; + bool is_compiling_class; + c11_vector/*T=Expr* */ s_expr; + c11_vector/*T=StrName*/ global_names; + c11_smallmap_s2n co_consts_string_dedup_map; +} pk_CodeEmitContext; -// } pk_CodeEmitContext; +void pk_CodeEmitContext__ctor(pk_CodeEmitContext* self, CodeObject* co, FuncDecl* func, int level); +void pk_CodeEmitContext__dtor(pk_CodeEmitContext* self); -// #ifdef __cplusplus -// } -// #endif +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/compiler/expr.hpp b/include/pocketpy/compiler/expr.hpp index b5fe06a9..3ce215b9 100644 --- a/include/pocketpy/compiler/expr.hpp +++ b/include/pocketpy/compiler/expr.hpp @@ -53,16 +53,16 @@ struct CodeEmitContext{ int level; vector global_names; - CodeEmitContext(VM* vm, CodeObject* co, int level) : vm(vm), co(co), level(level) { - func = NULL; - c11_smallmap_s2n__ctor(&_co_consts_string_dedup_map); - } - int curr_iblock = 0; bool is_compiling_class = false; c11_smallmap_s2n _co_consts_string_dedup_map; + CodeEmitContext(VM* vm, CodeObject* co, int level) : vm(vm), co(co), level(level) { + func = NULL; + c11_smallmap_s2n__ctor(&_co_consts_string_dedup_map); + } + int get_loop() const noexcept; CodeBlock* enter_block(CodeBlockType type) noexcept; void exit_block() noexcept; diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h index ceeca13d..416e1e00 100644 --- a/include/pocketpy/compiler/lexer.h +++ b/include/pocketpy/compiler/lexer.h @@ -1,6 +1,7 @@ #pragma once #include "pocketpy/common/str.h" +#include "pocketpy/common/vector.h" #include "pocketpy/objects/sourcedata.h" #include @@ -96,8 +97,11 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base); typedef struct Error Error; -Error* pk_Lexer__process(pk_SourceData_ src, c11_array* out_tokens); +typedef c11_array pk_TokenArray; + +Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens); Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out_string); +void pk_TokenArray__dtor(pk_TokenArray* self); #ifdef __cplusplus } diff --git a/include/pocketpy/compiler/lexer.hpp b/include/pocketpy/compiler/lexer.hpp deleted file mode 100644 index 68381eb7..00000000 --- a/include/pocketpy/compiler/lexer.hpp +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#include "pocketpy/objects/error.hpp" -#include "pocketpy/objects/sourcedata.h" -#include "pocketpy/compiler/lexer.h" - -#include - -namespace pkpy { - - -struct Lexer { - PK_ALWAYS_PASS_BY_POINTER(Lexer) - - VM* vm; - pkpy_SourceData_ src; - const char* token_start; - const char* curr_char; - int current_line = 1; - vector nexts; - small_vector_2 indents; - int brackets_level = 0; - bool used = false; - - char peekchar() const noexcept { return *curr_char; } - - bool match_n_chars(int n, char c0) noexcept; - bool match_string(const char* s) noexcept; - int eat_spaces() noexcept; - - bool eat_indentation() noexcept; - char eatchar() noexcept; - char eatchar_include_newline() noexcept; - void skip_line_comment() noexcept; - bool matchchar(char c) noexcept; - void add_token(TokenIndex type, TokenValue value = {}) noexcept; - void add_token_2(char c, TokenIndex one, TokenIndex two) noexcept; - - [[nodiscard]] Error* eat_name() noexcept; - [[nodiscard]] Error* eat_string_until(char quote, bool raw, Str* out) noexcept; - [[nodiscard]] Error* eat_string(char quote, StringType type) noexcept; - [[nodiscard]] Error* eat_number() noexcept; - [[nodiscard]] Error* lex_one_token(bool* eof) noexcept; - - /***** Error Reporter *****/ - [[nodiscard]] Error* _error(bool lexer_err, const char* type, const char* msg, va_list* args, i64 userdata=0) noexcept; - [[nodiscard]] Error* SyntaxError(const char* fmt, ...) noexcept; - [[nodiscard]] Error* IndentationError(const char* msg) noexcept { return _error(true, "IndentationError", msg, NULL); } - [[nodiscard]] Error* NeedMoreLines() noexcept { return _error(true, "NeedMoreLines", "", NULL, 0); } - - [[nodiscard]] Error* run() noexcept; - [[nodiscard]] Error* from_precompiled() noexcept; - [[nodiscard]] Error* precompile(Str* out) noexcept; - - Lexer(VM* vm, std::string_view source, const Str& filename, CompileMode mode) noexcept{ - src = pkpy_SourceData__rcnew({source.data(), (int)source.size()}, &filename, mode); - this->token_start = py_Str__data(&src->source); - this->curr_char = py_Str__data(&src->source); - } - - ~Lexer(){ - PK_DECREF(src); - } -}; - -enum class IntParsingResult { - Success, - Failure, - Overflow, -}; - -IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept; - -} // namespace pkpy diff --git a/include/pocketpy/objects/codeobject.h b/include/pocketpy/objects/codeobject.h index cba87014..0264e6d6 100644 --- a/include/pocketpy/objects/codeobject.h +++ b/include/pocketpy/objects/codeobject.h @@ -28,7 +28,7 @@ typedef enum FuncType { typedef enum NameScope { NAME_LOCAL, NAME_GLOBAL, - NAME_GLOBAL_UNKNOWN + NAME_GLOBAL_UNKNOWN, } NameScope; typedef enum CodeBlockType { @@ -88,8 +88,8 @@ typedef struct CodeObject { int end_line; } CodeObject; -CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name); -void CodeObject__delete(CodeObject* self); +void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name); +void CodeObject__dtor(CodeObject* self); void CodeObject__gc_mark(const CodeObject* self); typedef struct FuncDeclKwArg{ @@ -100,7 +100,7 @@ typedef struct FuncDeclKwArg{ typedef struct FuncDecl { RefCounted rc; - CodeObject* code; // strong ref + CodeObject code; // strong ref c11_vector/*T=int*/ args; // indices in co->varnames c11_vector/*T=KwArg*/ kwargs; // indices in co->varnames diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h index 92da49f8..554b0539 100644 --- a/include/pocketpy/objects/sourcedata.h +++ b/include/pocketpy/objects/sourcedata.h @@ -15,6 +15,7 @@ struct pk_SourceData { RefCounted rc; enum CompileMode mode; bool is_precompiled; + bool is_dynamic; // for exec() and eval() py_Str filename; py_Str source; @@ -25,7 +26,7 @@ struct pk_SourceData { typedef struct pk_SourceData* pk_SourceData_; -pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode); +pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic); bool pk_SourceData__get_line(const struct pk_SourceData* self, int lineno, const char** st, const char** ed); py_Str pk_SourceData__snapshot(const struct pk_SourceData *self, int lineno, const char *cursor, const char *name); diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c index 1cde6133..bd2719c3 100644 --- a/src/common/sourcedata.c +++ b/src/common/sourcedata.c @@ -4,10 +4,11 @@ #include #include -void pk_SourceData__ctor(struct pk_SourceData* self, +static void pk_SourceData__ctor(struct pk_SourceData* self, const char* source, const char* filename, - enum CompileMode mode) { + enum CompileMode mode, + bool is_dynamic) { py_Str__ctor(&self->filename, filename); self->mode = mode; c11_vector__ctor(&self->line_starts, sizeof(const char*)); @@ -30,7 +31,7 @@ void pk_SourceData__ctor(struct pk_SourceData* self, c11_vector__push(const char*, &self->line_starts, source); } -void pk_SourceData__dtor(struct pk_SourceData* self) { +static void pk_SourceData__dtor(struct pk_SourceData* self) { py_Str__dtor(&self->filename); py_Str__dtor(&self->source); c11_vector__dtor(&self->line_starts); @@ -41,9 +42,9 @@ void pk_SourceData__dtor(struct pk_SourceData* self) { c11_vector__dtor(&self->_precompiled_tokens); } -pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode) { +pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic) { pk_SourceData_ self = malloc(sizeof(struct pk_SourceData)); - pk_SourceData__ctor(self, source, filename, mode); + pk_SourceData__ctor(self, source, filename, mode, is_dynamic); self->rc.count = 1; self->rc.dtor = (void(*)(void*))pk_SourceData__dtor; return self; diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index 9236623c..5d7d7e9c 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -1,20 +1,254 @@ #include "pocketpy/compiler/compiler.h" +#include "pocketpy/compiler/expr.h" +#include "pocketpy/compiler/lexer.h" -Error* pk_compile(pk_SourceData_ src){ - c11_array/*T=Token*/ tokens; +typedef struct pk_Compiler pk_Compiler; +typedef Error* (*PrattCallback)(pk_Compiler* self); + +typedef struct PrattRule { + PrattCallback prefix; + PrattCallback infix; + enum Precedence precedence; +} PrattRule; + +static PrattRule rules[TK__COUNT__]; + +typedef struct pk_Compiler { + pk_SourceData_ src; // weakref + pk_TokenArray tokens; + int i; + c11_vector/*T=CodeEmitContext*/ contexts; +} pk_Compiler; + +static void pk_Compiler__ctor(pk_Compiler *self, pk_SourceData_ src, pk_TokenArray tokens){ + self->src = src; + self->tokens = tokens; + self->i = 0; + c11_vector__ctor(&self->contexts, sizeof(pk_CodeEmitContext)); +} + +static void pk_Compiler__dtor(pk_Compiler *self){ + pk_TokenArray__dtor(&self->tokens); + c11_vector__dtor(&self->contexts); +} + +/**************************************/ +#define tk(i) c11__getitem(Token, &self->tokens, i) +#define prev() tk(self->i - 1) +#define curr() tk(self->i) +#define next() tk(self->i + 1) +#define err() (self->i == self->tokens.count ? prev() : curr()) + +#define advance() self->i++ +#define mode() self->src->mode +#define ctx() c11_vector__back(pk_CodeEmitContext, &self->contexts) + +#define match_newlines() match_newlines_repl(self, NULL) + +#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", pk_TokenSymbols[expected], pk_TokenSymbols[curr().type]); +#define consume_end_stmt() if(!match_end_stmt()) return SyntaxError("expected statement end") +#define check_newlines_repl() { bool __nml; match_newlines_repl(self, &__nml); if(__nml) return NeedMoreLines(); } +#define check(B) if((err = B)) return err + +#define match(expected) (curr().type == expected ? (++self->i) : 0) + +NameScope name_scope(pk_Compiler* self) { + NameScope s = self->contexts.count > 1 ? NAME_LOCAL : NAME_GLOBAL; + if(self->src->is_dynamic && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN; + return s; +} + +static Error* SyntaxError(const char* fmt, ...){ + return NULL; +} + +static Error* NeedMoreLines(){ + return NULL; +} + +bool match_newlines_repl(pk_Compiler* self, bool* need_more_lines){ + bool consumed = false; + if(curr().type == TK_EOL) { + while(curr().type == TK_EOL) advance(); + consumed = true; + } + if(need_more_lines) { + *need_more_lines = (mode() == REPL_MODE && curr().type == TK_EOF); + } + return consumed; +} + +bool is_expression(pk_Compiler* self, bool allow_slice){ + PrattCallback prefix = rules[curr().type].prefix; + return prefix && (allow_slice || curr().type != TK_COLON); +} + +Error* parse_expression(pk_Compiler* self, int precedence, bool allow_slice){ + PrattCallback prefix = rules[curr().type].prefix; + if(!prefix || (curr().type == TK_COLON && !allow_slice)) { + return SyntaxError("expected an expression, got %s", pk_TokenSymbols[curr().type]); + } + advance(); + Error* err; + check(prefix(self)); + while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK_COLON)) { + TokenIndex op = curr().type; + advance(); + PrattCallback infix = rules[op].infix; + assert(infix != NULL); + check(infix(self)); + } + return NULL; +} + + // [[nodiscard]] Error* EXPR() noexcept{ return parse_expression(PREC_LOWEST + 1); } + // [[nodiscard]] Error* EXPR_TUPLE(bool allow_slice = false) noexcept; + // [[nodiscard]] Error* EXPR_VARS() noexcept; // special case for `for loop` and `comp` + +Error* EXPR_TUPLE(pk_Compiler* self, bool allow_slice){ + Error* err; + check(parse_expression(self, PREC_LOWEST + 1, allow_slice)); + if(!match(TK_COMMA)) return NULL; + // tuple expression + int count = 1; + do { + if(curr().brackets_level) check_newlines_repl() + if(!is_expression(self, allow_slice)) break; + check(parse_expression(self, PREC_LOWEST + 1, allow_slice)); + count += 1; + if(curr().brackets_level) check_newlines_repl(); + } while(match(TK_COMMA)); + // TupleExpr* e = make_expr(count); + // for(int i=count-1; i>=0; i--) + // e->items[i] = ctx()->s_popx(); + // ctx()->s_push(e); + return NULL; +} + +static void setup_global_context(pk_Compiler* self, CodeObject* co){ + co->start_line = self->i == 0 ? 1 : prev().line; + pk_CodeEmitContext* ctx = c11_vector__emplace(&self->contexts); + pk_CodeEmitContext__ctor(ctx, co, NULL, self->contexts.count); +} + +Error* pk_Compiler__compile(pk_Compiler* self, CodeObject* out){ + // make sure it is the first time to compile + assert(self->i == 0); + // make sure the first token is @sof + assert(tk(0).type == TK_SOF); + + setup_global_context(self, out); + + advance(); // skip @sof, so prev() is always valid + match_newlines(); // skip possible leading '\n' + + Error* err; + // if(mode() == EVAL_MODE) { + // check(EXPR_TUPLE()); + // ctx()->s_emit_top(); + // consume(TK_EOF); + // ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + // check(pop_context()); + // return NULL; + // } else if(mode() == JSON_MODE) { + // check(EXPR()); + // Expr* e = ctx()->s_popx(); + // if(!e->is_json_object()){ + // return SyntaxError("expect a JSON object, literal or array"); + // } + // consume(TK_EOF); + // e->emit_(ctx()); + // ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE); + // check(pop_context()); + // return NULL; + // } + + // while(!match(TK_EOF)) { + // check(compile_stmt()); + // match_newlines(); + // } + // check(pop_context()); + return NULL; +} + + + +Error* pk_compile(pk_SourceData_ src, CodeObject* out){ + pk_TokenArray tokens; Error* err = pk_Lexer__process(src, &tokens); if(err) return err; - Token* data = (Token*)tokens.data; - printf("%s\n", py_Str__data(&src->filename)); - for(int i = 0; i < tokens.count; i++) { - Token* t = data + i; - py_Str tmp; - py_Str__ctor2(&tmp, t->start, t->length); - printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], py_Str__data(&tmp)); - py_Str__dtor(&tmp); - } + // Token* data = (Token*)tokens.data; + // printf("%s\n", py_Str__data(&src->filename)); + // for(int i = 0; i < tokens.count; i++) { + // Token* t = data + i; + // py_Str tmp; + // py_Str__ctor2(&tmp, t->start, t->length); + // printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], py_Str__data(&tmp)); + // py_Str__dtor(&tmp); + // } - c11_array__dtor(&tokens); - return NULL; + pk_Compiler compiler; + pk_Compiler__ctor(&compiler, src, tokens); + CodeObject__ctor(out, src, py_Str__sv(&src->filename)); + err = pk_Compiler__compile(&compiler, out); + CodeObject__dtor(out); + pk_Compiler__dtor(&compiler); + return err; +} + +void pk_Compiler__initialize(){ + // clang-format off +// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ +#define PK_NO_INFIX NULL, PREC_LOWEST + for(int i = 0; i < TK__COUNT__; i++) rules[i] = { NULL, PK_NO_INFIX }; + rules[TK_DOT] = { NULL, exprAttrib, PREC_PRIMARY }; + rules[TK_LPAREN] = { exprGroup, exprCall, PREC_PRIMARY }; + rules[TK_LBRACKET] = { exprList, exprSubscr, PREC_PRIMARY }; + rules[TK_LBRACE] = { exprMap, PK_NO_INFIX }; + rules[TK_MOD] = { NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_ADD] = { NULL, exprBinaryOp, PREC_TERM }; + rules[TK_SUB] = { exprUnaryOp, exprBinaryOp, PREC_TERM }; + rules[TK_MUL] = { exprUnaryOp, exprBinaryOp, PREC_FACTOR }; + rules[TK_INVERT] = { exprUnaryOp, NULL, PREC_UNARY }; + rules[TK_DIV] = { NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_FLOORDIV] = { NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_POW] = { exprUnaryOp, exprBinaryOp, PREC_EXPONENT }; + rules[TK_GT] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_LT] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_EQ] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_NE] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_GE] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_LE] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_IN] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_IS] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_LSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; + rules[TK_RSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; + rules[TK_AND] = { NULL, exprBinaryOp, PREC_BITWISE_AND }; + rules[TK_OR] = { NULL, exprBinaryOp, PREC_BITWISE_OR }; + rules[TK_XOR] = { NULL, exprBinaryOp, PREC_BITWISE_XOR }; + rules[TK_DECORATOR] = { NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_IF] = { NULL, exprTernary, PREC_TERNARY }; + rules[TK_NOT_IN] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_IS_NOT] = { NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_AND_KW ] = { NULL, exprAnd, PREC_LOGICAL_AND }; + rules[TK_OR_KW] = { NULL, exprOr, PREC_LOGICAL_OR }; + rules[TK_NOT_KW] = { exprNot, NULL, PREC_LOGICAL_NOT }; + rules[TK_TRUE] = { exprLiteral0, PK_NO_INFIX }; + rules[TK_FALSE] = { exprLiteral0, PK_NO_INFIX }; + rules[TK_NONE] = { exprLiteral0, PK_NO_INFIX }; + rules[TK_DOTDOTDOT] = { exprLiteral0, PK_NO_INFIX }; + rules[TK_LAMBDA] = { exprLambda, PK_NO_INFIX }; + rules[TK_ID] = { exprName, PK_NO_INFIX }; + rules[TK_NUM] = { exprLiteral, PK_NO_INFIX }; + rules[TK_STR] = { exprLiteral, PK_NO_INFIX }; + rules[TK_FSTR] = { exprFString, PK_NO_INFIX }; + rules[TK_LONG] = { exprLong, PK_NO_INFIX }; + rules[TK_IMAG] = { exprImag, PK_NO_INFIX }; + rules[TK_BYTES] = { exprBytes, PK_NO_INFIX }; + rules[TK_COLON] = { exprSlice0, exprSlice1, PREC_PRIMARY }; + +#undef PK_METHOD +#undef PK_NO_INFIX + // clang-format on } \ No newline at end of file diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 82e5b549..507fb886 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -17,7 +17,7 @@ PrattRule Compiler::rules[TK__COUNT__]; NameScope Compiler::name_scope() const noexcept{ auto s = contexts.size() > 1 ? NAME_LOCAL : NAME_GLOBAL; - if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN; + if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_UNKNOWN; return s; } @@ -120,61 +120,6 @@ void Compiler::init_pratt_rules() noexcept{ static bool initialized = false; if(initialized) return; initialized = true; - - // clang-format off -// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ -#define PK_METHOD(name) &Compiler::name -#define PK_NO_INFIX nullptr, PREC_LOWEST - for(int i = 0; i < TK__COUNT__; i++) rules[i] = { nullptr, PK_NO_INFIX }; - rules[TK_DOT] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY }; - rules[TK_LPAREN] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY }; - rules[TK_LBRACKET] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY }; - rules[TK_LBRACE] = { PK_METHOD(exprMap), PK_NO_INFIX }; - rules[TK_MOD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK_ADD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM }; - rules[TK_SUB] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM }; - rules[TK_MUL] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK_INVERT] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY }; - rules[TK_DIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK_FLOORDIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK_POW] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT }; - rules[TK_GT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_LT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_EQ] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_NE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_GE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_LE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_IS] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_LSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK_RSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT }; - rules[TK_AND] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND }; - rules[TK_OR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR }; - rules[TK_XOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR }; - rules[TK_DECORATOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR }; - rules[TK_IF] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY }; - rules[TK_NOT_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_IS_NOT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION }; - rules[TK_AND_KW ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND }; - rules[TK_OR_KW] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR }; - rules[TK_NOT_KW] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT }; - rules[TK_TRUE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK_FALSE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK_NONE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK_DOTDOTDOT] = { PK_METHOD(exprLiteral0), PK_NO_INFIX }; - rules[TK_LAMBDA] = { PK_METHOD(exprLambda), PK_NO_INFIX }; - rules[TK_ID] = { PK_METHOD(exprName), PK_NO_INFIX }; - rules[TK_NUM] = { PK_METHOD(exprLiteral), PK_NO_INFIX }; - rules[TK_STR] = { PK_METHOD(exprLiteral), PK_NO_INFIX }; - rules[TK_FSTR] = { PK_METHOD(exprFString), PK_NO_INFIX }; - rules[TK_LONG] = { PK_METHOD(exprLong), PK_NO_INFIX }; - rules[TK_IMAG] = { PK_METHOD(exprImag), PK_NO_INFIX }; - rules[TK_BYTES] = { PK_METHOD(exprBytes), PK_NO_INFIX }; - rules[TK_COLON] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY }; - -#undef PK_METHOD -#undef PK_NO_INFIX - // clang-format on } bool Compiler::match(TokenIndex expected) noexcept{ diff --git a/src/compiler/expr.c b/src/compiler/expr.c index 5674d119..dc2c8169 100644 --- a/src/compiler/expr.c +++ b/src/compiler/expr.c @@ -1,59 +1,79 @@ -// #include "pocketpy/compiler/expr.h" -// #include "pocketpy/common/memorypool.h" +#include "pocketpy/compiler/expr.h" +#include "pocketpy/common/memorypool.h" +#include "pocketpy/common/strname.h" -// static bool default_false(const pk_Expr*) { return false; } -// static int default_zero(const pk_Expr*) { return 0; } -// static void default_dtor(pk_Expr*) {} +static bool default_false(const pk_Expr* e) { return false; } +static int default_zero(const pk_Expr* e) { return 0; } +static void default_dtor(pk_Expr* e) {} -// void pk_ExprVt__ctor(pk_ExprVt* vt){ -// vt->dtor = default_dtor; -// vt->is_literal = default_false; -// vt->is_json_object = default_false; -// vt->is_attrib = default_false; -// vt->is_subscr = default_false; -// vt->is_compare = default_false; -// vt->star_level = default_zero; -// vt->is_tuple = default_false; -// vt->is_name = default_false; -// vt->emit_ = NULL; // must be set -// vt->emit_del = NULL; -// vt->emit_store = NULL; -// vt->emit_inplace = NULL; -// vt->emit_store_inplace = NULL; -// } +void pk_ExprVt__ctor(pk_ExprVt* vt){ + vt->dtor = default_dtor; + vt->is_literal = default_false; + vt->is_json_object = default_false; + vt->is_attrib = default_false; + vt->is_subscr = default_false; + vt->is_compare = default_false; + vt->star_level = default_zero; + vt->is_tuple = default_false; + vt->is_name = default_false; + vt->emit_ = NULL; // must be set + vt->emit_del = NULL; + vt->emit_store = NULL; + vt->emit_inplace = NULL; + vt->emit_store_inplace = NULL; +} -// void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx){ -// assert(self->vt->emit_); -// self->vt->emit_(self, ctx); -// } +void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx){ + assert(self->vt->emit_); + self->vt->emit_(self, ctx); +} -// bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx){ -// if(!self->vt->emit_del) return false; -// return self->vt->emit_del(self, ctx); -// } +bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx){ + if(!self->vt->emit_del) return false; + return self->vt->emit_del(self, ctx); +} -// bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx){ -// if(!self->vt->emit_store) return false; -// return self->vt->emit_store(self, ctx); -// } +bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx){ + if(!self->vt->emit_store) return false; + return self->vt->emit_store(self, ctx); +} -// void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){ -// if(!self->vt->emit_inplace){ -// pk_Expr__emit_(self, ctx); -// return; -// } -// self->vt->emit_inplace(self, ctx); -// } +void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){ + if(!self->vt->emit_inplace){ + pk_Expr__emit_(self, ctx); + return; + } + self->vt->emit_inplace(self, ctx); +} -// bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){ -// if(!self->vt->emit_store_inplace){ -// return pk_Expr__emit_store(self, ctx); -// } -// return self->vt->emit_store_inplace(self, ctx); -// } +bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){ + if(!self->vt->emit_store_inplace){ + return pk_Expr__emit_store(self, ctx); + } + return self->vt->emit_store_inplace(self, ctx); +} -// void pk_Expr__delete(pk_Expr* self){ -// if(!self) return; -// self->vt->dtor(self); -// PoolExpr_dealloc(self); -// } \ No newline at end of file +void pk_Expr__delete(pk_Expr* self){ + if(!self) return; + self->vt->dtor(self); + PoolExpr_dealloc(self); +} + +/* CodeEmitContext */ + +void pk_CodeEmitContext__ctor(pk_CodeEmitContext* self, CodeObject* co, FuncDecl* func, int level){ + self->co = co; + self->func = func; + self->level = level; + self->curr_iblock = 0; + self->is_compiling_class = false; + c11_vector__ctor(&self->s_expr, sizeof(pk_Expr*)); + c11_vector__ctor(&self->global_names, sizeof(StrName)); + c11_smallmap_s2n__ctor(&self->co_consts_string_dedup_map); +} + +void pk_CodeEmitContext__dtor(pk_CodeEmitContext* self){ + c11_vector__dtor(&self->s_expr); + c11_vector__dtor(&self->global_names); + c11_smallmap_s2n__dtor(&self->co_consts_string_dedup_map); +} \ No newline at end of file diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index a8df1047..5944e6e2 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -715,7 +715,7 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base) { return IntParsing_FAILURE; } -Error* pk_Lexer__process(pk_SourceData_ src, c11_array* out_tokens){ +Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){ pk_Lexer lexer; pk_Lexer__ctor(&lexer, src); @@ -747,7 +747,7 @@ Error* pk_Lexer__process(pk_SourceData_ src, c11_array* out_tokens){ Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { assert(!src->is_precompiled); - c11_array/*T=Token*/ nexts; // output tokens + pk_TokenArray nexts; // output tokens Error* err = pk_Lexer__process(src, &nexts); if(err) return err; @@ -841,6 +841,15 @@ Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { return NULL; } +void pk_TokenArray__dtor(pk_TokenArray *self){ + Token* data = self->data; + for(int i=0; icount; i++){ + if(data[i].value.index == TokenValue_STR){ + py_Str__dtor(&data[i].value._str); + } + } + c11_array__dtor(self); +} const char* pk_TokenSymbols[] = { "@eof", "@eol", "@sof", diff --git a/src/objects/codeobject.c b/src/objects/codeobject.c index 2053908d..242d2cb3 100644 --- a/src/objects/codeobject.c +++ b/src/objects/codeobject.c @@ -17,7 +17,7 @@ FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name){ FuncDecl* self = malloc(sizeof(FuncDecl)); self->rc.count = 1; self->rc.dtor = (void (*)(void*))FuncDecl__dtor; - self->code = CodeObject__new(src, name); + CodeObject__ctor(&self->code, src, name); c11_vector__ctor(&self->args, sizeof(int)); c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg)); @@ -34,7 +34,7 @@ FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name){ } void FuncDecl__dtor(FuncDecl* self){ - CodeObject__delete(self->code); + CodeObject__dtor(&self->code); c11_vector__dtor(&self->args); c11_vector__dtor(&self->kwargs); c11_smallmap_n2i__dtor(&self->kw_to_index); @@ -46,8 +46,7 @@ void FuncDecl__add_kwarg(FuncDecl* self, int index, uint16_t key, const PyVar* v c11_vector__push(FuncDeclKwArg, &self->kwargs, item); } -CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name){ - CodeObject* self = malloc(sizeof(CodeObject)); +void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name){ self->src = src; PK_INCREF(src); py_Str__ctor2(&self->name, name.data, name.size); @@ -69,10 +68,9 @@ CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name){ CodeBlock root_block = {CodeBlockType_NO_BLOCK, -1, 0, -1, -1}; c11_vector__push(CodeBlock, &self->blocks, root_block); - return self; } -void CodeObject__delete(CodeObject* self){ +void CodeObject__dtor(CodeObject* self){ PK_DECREF(self->src); py_Str__dtor(&self->name); @@ -92,6 +90,4 @@ void CodeObject__delete(CodeObject* self){ PK_DECREF(decl); } c11_vector__dtor(&self->func_decls); - - free(self); } \ No newline at end of file diff --git a/src/pocketpy.c b/src/pocketpy.c deleted file mode 100644 index dc1347c1..00000000 --- a/src/pocketpy.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "pocketpy/pocketpy.h" -#include "pocketpy/common/utils.h" -#include "pocketpy/objects/object.h" -#include "pocketpy/interpreter/vm.h" -#include -#include - - - - - - - - - - - diff --git a/src/public/vm.c b/src/public/vm.c index 4cf9d4ab..0459780b 100644 --- a/src/public/vm.c +++ b/src/public/vm.c @@ -10,8 +10,9 @@ pk_VM* pk_current_vm; static pk_VM pk_default_vm; void py_initialize() { - Pools_initialize(); + pk_MemoryPools__initialize(); pk_StrName__initialize(); + pk_Compiler__initialize(); pk_current_vm = &pk_default_vm; pk_VM__ctor(&pk_default_vm); } @@ -19,19 +20,20 @@ void py_initialize() { void py_finalize() { pk_VM__dtor(&pk_default_vm); pk_current_vm = NULL; + pk_Compiler__finalize(); pk_StrName__finalize(); - Pools_finalize(); + pk_MemoryPools__finalize(); } int py_exec(const char* source) { - pk_SourceData_ src = pk_SourceData__rcnew(source, "main.py", EXEC_MODE); - Error* err = pk_compile(src); + pk_SourceData_ src = pk_SourceData__rcnew(source, "main.py", EXEC_MODE, false); + CodeObject co; + Error* err = pk_compile(src, &co); PK_DECREF(src); if(err) abort(); - CodeObject* co = NULL; pk_VM* vm = pk_current_vm; - Frame* frame = Frame__new(co, &vm->main, NULL, vm->stack.sp, vm->stack.sp, co); + Frame* frame = Frame__new(&co, &vm->main, NULL, vm->stack.sp, vm->stack.sp, &co); pk_VM__push_frame(vm, frame); pk_FrameResult res = pk_VM__run_top_frame(vm); if(res == RES_ERROR) return vm->last_error->type; diff --git a/src2/main.cpp b/src2/main.cpp deleted file mode 100644 index 86b32b3b..00000000 --- a/src2/main.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include -#include - -#if __has_include("pocketpy_c.h") -#include "pocketpy_c.h" -#else -// for amalgamated build -#include "pocketpy.h" -#endif - -#ifdef _WIN32 - -#include - -std::string pkpy_platform_getline(bool* eof) { - HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE); - std::wstringstream wss; - WCHAR buf; - DWORD read; - while(ReadConsoleW(hStdin, &buf, 1, &read, NULL) && buf != L'\n') { - if(eof && buf == L'\x1A') *eof = true; // Ctrl+Z - wss << buf; - } - std::wstring wideInput = wss.str(); - int length = WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), NULL, 0, NULL, NULL); - std::string output; - output.resize(length); - WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL); - if(!output.empty() && output.back() == '\r') output.pop_back(); - return output; -} - -#else - -std::string pkpy_platform_getline(bool* eof) { - std::string output; - if(!std::getline(std::cin, output)) { - if(eof) *eof = true; - } - return output; -} - -#endif - -using namespace pkpy; - -static int f_input(pkpy_vm* vm) { - if(!pkpy_is_none(vm, -1)) { - pkpy_CString prompt; - bool ok = pkpy_to_string(vm, -1, &prompt); - if(!ok) return 0; - std::cout << prompt << std::flush; - } - bool eof; - std::string output = pkpy_platform_getline(&eof); - pkpy_push_string(vm, pkpy_string(output.c_str())); - return 1; -} - -int main(int argc, char** argv) { -#if _WIN32 - SetConsoleCP(CP_UTF8); - SetConsoleOutputCP(CP_UTF8); -#endif - pkpy_vm* vm = pkpy_new_vm(true); - - pkpy_push_function(vm, "input(prompt=None) -> str", f_input); - pkpy_py_import(vm, "builtins"); - pkpy_setattr(vm, pkpy_name("input")); - - if(argc == 1) { - void* repl = pkpy_new_repl(vm); - bool need_more_lines = false; - while(true) { - std::cout << (need_more_lines ? "... " : ">>> "); - bool eof = false; - std::string line = pkpy_platform_getline(&eof); - if(eof) break; - need_more_lines = pkpy_repl_input(repl, line.c_str()); - } - pkpy_delete_vm(vm); - return 0; - } - - if(argc == 2) { - std::string argv_1 = argv[1]; - if(argv_1 == "-h" || argv_1 == "--help") goto __HELP; - - std::filesystem::path filepath(argv[1]); - filepath = std::filesystem::absolute(filepath); - if(!std::filesystem::exists(filepath)) { - std::cerr << "File not found: " << argv_1 << std::endl; - return 2; - } - std::ifstream file(filepath); - if(!file.is_open()) { - std::cerr << "Failed to open file: " << argv_1 << std::endl; - return 3; - } - std::string src((std::istreambuf_iterator(file)), std::istreambuf_iterator()); - file.close(); - - pkpy_set_main_argv(vm, argc, argv); - - bool ok = pkpy_exec_2(vm, src.c_str(), filepath.filename().string().c_str(), 0, NULL); - if(!ok) pkpy_clear_error(vm, NULL); - pkpy_delete_vm(vm); - return ok ? 0 : 1; - } - -__HELP: - std::cout << "Usage: pocketpy [filename]" << std::endl; - return 0; -}