This commit is contained in:
blueloveTH 2024-06-28 19:06:27 +08:00
parent 881e94e8b0
commit 7748d2bf03
17 changed files with 425 additions and 406 deletions

View File

@ -11,8 +11,8 @@ extern "C" {
#define kPoolObjectArenaSize (256*1024) #define kPoolObjectArenaSize (256*1024)
#define kPoolObjectMaxBlocks (kPoolObjectArenaSize / kPoolObjectBlockSize) #define kPoolObjectMaxBlocks (kPoolObjectArenaSize / kPoolObjectBlockSize)
void Pools_initialize(); void pk_MemoryPools__initialize();
void Pools_finalize(); void pk_MemoryPools__finalize();
void* PoolExpr_alloc(); void* PoolExpr_alloc();
void PoolExpr_dealloc(void*); void PoolExpr_dealloc(void*);

View File

@ -2,13 +2,16 @@
#include "pocketpy/common/vector.h" #include "pocketpy/common/vector.h"
#include "pocketpy/compiler/lexer.h" #include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/objects/codeobject.h"
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
Error* pk_compile(pk_SourceData_ src); Error* pk_compile(pk_SourceData_ src, CodeObject* out);
void pk_Compiler__initialize();
#define pk_Compiler__finalize() // do nothing
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1,52 +1,63 @@
// #pragma once #pragma once
// #include <stdbool.h> #include <stdbool.h>
// #include "pocketpy/common/memorypool.h" #include "pocketpy/common/memorypool.h"
// #include "pocketpy/compiler/lexer.h" #include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/codeobject.h"
// #ifdef __cplusplus #ifdef __cplusplus
// extern "C" { extern "C" {
// #endif #endif
// struct pk_Expr; typedef struct pk_Expr pk_Expr;
// struct pk_CodeEmitContext; typedef struct pk_CodeEmitContext pk_CodeEmitContext;
// struct pk_ExprVt{ typedef struct pk_ExprVt{
// void (*dtor)(pk_Expr*); void (*dtor)(pk_Expr*);
// /* reflections */ /* reflections */
// bool (*is_literal)(const pk_Expr*); bool (*is_literal)(const pk_Expr*);
// bool (*is_json_object)(const pk_Expr*); bool (*is_json_object)(const pk_Expr*);
// bool (*is_attrib)(const pk_Expr*); bool (*is_attrib)(const pk_Expr*);
// bool (*is_subscr)(const pk_Expr*); bool (*is_subscr)(const pk_Expr*);
// bool (*is_compare)(const pk_Expr*); bool (*is_compare)(const pk_Expr*);
// int (*star_level)(const pk_Expr*); int (*star_level)(const pk_Expr*);
// bool (*is_tuple)(const pk_Expr*); bool (*is_tuple)(const pk_Expr*);
// bool (*is_name)(const pk_Expr*); bool (*is_name)(const pk_Expr*);
// /* emit */ /* emit */
// void (*emit_)(pk_Expr*, pk_CodeEmitContext*); void (*emit_)(pk_Expr*, pk_CodeEmitContext*);
// bool (*emit_del)(pk_Expr*, pk_CodeEmitContext*); bool (*emit_del)(pk_Expr*, pk_CodeEmitContext*);
// bool (*emit_store)(pk_Expr*, pk_CodeEmitContext*); bool (*emit_store)(pk_Expr*, pk_CodeEmitContext*);
// void (*emit_inplace)(pk_Expr*, pk_CodeEmitContext*); void (*emit_inplace)(pk_Expr*, pk_CodeEmitContext*);
// bool (*emit_store_inplace)(pk_Expr*, pk_CodeEmitContext*); bool (*emit_store_inplace)(pk_Expr*, pk_CodeEmitContext*);
// }; } pk_ExprVt;
// typedef struct pk_Expr{ typedef struct pk_Expr{
// pk_ExprVt* vt; pk_ExprVt* vt;
// int line; int line;
// } pk_Expr; } pk_Expr;
// void pk_ExprVt__ctor(pk_ExprVt* vt); void pk_ExprVt__ctor(pk_ExprVt* vt);
// void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx); void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx);
// bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx); bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx);
// bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx); bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx);
// void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx); void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx);
// bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx); bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx);
// void pk_Expr__delete(pk_Expr* self); void pk_Expr__delete(pk_Expr* self);
// typedef struct pk_CodeEmitContext{ typedef struct pk_CodeEmitContext{
CodeObject* co; // 1 CodeEmitContext <=> 1 CodeObject*
FuncDecl* func; // optional, weakref
int level;
int curr_iblock;
bool is_compiling_class;
c11_vector/*T=Expr* */ s_expr;
c11_vector/*T=StrName*/ global_names;
c11_smallmap_s2n co_consts_string_dedup_map;
} pk_CodeEmitContext;
// } pk_CodeEmitContext; void pk_CodeEmitContext__ctor(pk_CodeEmitContext* self, CodeObject* co, FuncDecl* func, int level);
void pk_CodeEmitContext__dtor(pk_CodeEmitContext* self);
// #ifdef __cplusplus #ifdef __cplusplus
// } }
// #endif #endif

View File

@ -53,16 +53,16 @@ struct CodeEmitContext{
int level; int level;
vector<StrName> global_names; vector<StrName> global_names;
CodeEmitContext(VM* vm, CodeObject* co, int level) : vm(vm), co(co), level(level) {
func = NULL;
c11_smallmap_s2n__ctor(&_co_consts_string_dedup_map);
}
int curr_iblock = 0; int curr_iblock = 0;
bool is_compiling_class = false; bool is_compiling_class = false;
c11_smallmap_s2n _co_consts_string_dedup_map; c11_smallmap_s2n _co_consts_string_dedup_map;
CodeEmitContext(VM* vm, CodeObject* co, int level) : vm(vm), co(co), level(level) {
func = NULL;
c11_smallmap_s2n__ctor(&_co_consts_string_dedup_map);
}
int get_loop() const noexcept; int get_loop() const noexcept;
CodeBlock* enter_block(CodeBlockType type) noexcept; CodeBlock* enter_block(CodeBlockType type) noexcept;
void exit_block() noexcept; void exit_block() noexcept;

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "pocketpy/common/str.h" #include "pocketpy/common/str.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/objects/sourcedata.h" #include "pocketpy/objects/sourcedata.h"
#include <stdint.h> #include <stdint.h>
@ -96,8 +97,11 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base);
typedef struct Error Error; typedef struct Error Error;
Error* pk_Lexer__process(pk_SourceData_ src, c11_array* out_tokens); typedef c11_array pk_TokenArray;
Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens);
Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out_string); Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out_string);
void pk_TokenArray__dtor(pk_TokenArray* self);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1,74 +0,0 @@
#pragma once
#include "pocketpy/objects/error.hpp"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/compiler/lexer.h"
#include <variant>
namespace pkpy {
struct Lexer {
PK_ALWAYS_PASS_BY_POINTER(Lexer)
VM* vm;
pkpy_SourceData_ src;
const char* token_start;
const char* curr_char;
int current_line = 1;
vector<Token> nexts;
small_vector_2<int, 8> indents;
int brackets_level = 0;
bool used = false;
char peekchar() const noexcept { return *curr_char; }
bool match_n_chars(int n, char c0) noexcept;
bool match_string(const char* s) noexcept;
int eat_spaces() noexcept;
bool eat_indentation() noexcept;
char eatchar() noexcept;
char eatchar_include_newline() noexcept;
void skip_line_comment() noexcept;
bool matchchar(char c) noexcept;
void add_token(TokenIndex type, TokenValue value = {}) noexcept;
void add_token_2(char c, TokenIndex one, TokenIndex two) noexcept;
[[nodiscard]] Error* eat_name() noexcept;
[[nodiscard]] Error* eat_string_until(char quote, bool raw, Str* out) noexcept;
[[nodiscard]] Error* eat_string(char quote, StringType type) noexcept;
[[nodiscard]] Error* eat_number() noexcept;
[[nodiscard]] Error* lex_one_token(bool* eof) noexcept;
/***** Error Reporter *****/
[[nodiscard]] Error* _error(bool lexer_err, const char* type, const char* msg, va_list* args, i64 userdata=0) noexcept;
[[nodiscard]] Error* SyntaxError(const char* fmt, ...) noexcept;
[[nodiscard]] Error* IndentationError(const char* msg) noexcept { return _error(true, "IndentationError", msg, NULL); }
[[nodiscard]] Error* NeedMoreLines() noexcept { return _error(true, "NeedMoreLines", "", NULL, 0); }
[[nodiscard]] Error* run() noexcept;
[[nodiscard]] Error* from_precompiled() noexcept;
[[nodiscard]] Error* precompile(Str* out) noexcept;
Lexer(VM* vm, std::string_view source, const Str& filename, CompileMode mode) noexcept{
src = pkpy_SourceData__rcnew({source.data(), (int)source.size()}, &filename, mode);
this->token_start = py_Str__data(&src->source);
this->curr_char = py_Str__data(&src->source);
}
~Lexer(){
PK_DECREF(src);
}
};
enum class IntParsingResult {
Success,
Failure,
Overflow,
};
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
} // namespace pkpy

View File

@ -28,7 +28,7 @@ typedef enum FuncType {
typedef enum NameScope { typedef enum NameScope {
NAME_LOCAL, NAME_LOCAL,
NAME_GLOBAL, NAME_GLOBAL,
NAME_GLOBAL_UNKNOWN NAME_GLOBAL_UNKNOWN,
} NameScope; } NameScope;
typedef enum CodeBlockType { typedef enum CodeBlockType {
@ -88,8 +88,8 @@ typedef struct CodeObject {
int end_line; int end_line;
} CodeObject; } CodeObject;
CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name); void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name);
void CodeObject__delete(CodeObject* self); void CodeObject__dtor(CodeObject* self);
void CodeObject__gc_mark(const CodeObject* self); void CodeObject__gc_mark(const CodeObject* self);
typedef struct FuncDeclKwArg{ typedef struct FuncDeclKwArg{
@ -100,7 +100,7 @@ typedef struct FuncDeclKwArg{
typedef struct FuncDecl { typedef struct FuncDecl {
RefCounted rc; RefCounted rc;
CodeObject* code; // strong ref CodeObject code; // strong ref
c11_vector/*T=int*/ args; // indices in co->varnames c11_vector/*T=int*/ args; // indices in co->varnames
c11_vector/*T=KwArg*/ kwargs; // indices in co->varnames c11_vector/*T=KwArg*/ kwargs; // indices in co->varnames

View File

@ -15,6 +15,7 @@ struct pk_SourceData {
RefCounted rc; RefCounted rc;
enum CompileMode mode; enum CompileMode mode;
bool is_precompiled; bool is_precompiled;
bool is_dynamic; // for exec() and eval()
py_Str filename; py_Str filename;
py_Str source; py_Str source;
@ -25,7 +26,7 @@ struct pk_SourceData {
typedef struct pk_SourceData* pk_SourceData_; typedef struct pk_SourceData* pk_SourceData_;
pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode); pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic);
bool pk_SourceData__get_line(const struct pk_SourceData* self, int lineno, const char** st, const char** ed); bool pk_SourceData__get_line(const struct pk_SourceData* self, int lineno, const char** st, const char** ed);
py_Str pk_SourceData__snapshot(const struct pk_SourceData *self, int lineno, const char *cursor, const char *name); py_Str pk_SourceData__snapshot(const struct pk_SourceData *self, int lineno, const char *cursor, const char *name);

View File

@ -4,10 +4,11 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
void pk_SourceData__ctor(struct pk_SourceData* self, static void pk_SourceData__ctor(struct pk_SourceData* self,
const char* source, const char* source,
const char* filename, const char* filename,
enum CompileMode mode) { enum CompileMode mode,
bool is_dynamic) {
py_Str__ctor(&self->filename, filename); py_Str__ctor(&self->filename, filename);
self->mode = mode; self->mode = mode;
c11_vector__ctor(&self->line_starts, sizeof(const char*)); c11_vector__ctor(&self->line_starts, sizeof(const char*));
@ -30,7 +31,7 @@ void pk_SourceData__ctor(struct pk_SourceData* self,
c11_vector__push(const char*, &self->line_starts, source); c11_vector__push(const char*, &self->line_starts, source);
} }
void pk_SourceData__dtor(struct pk_SourceData* self) { static void pk_SourceData__dtor(struct pk_SourceData* self) {
py_Str__dtor(&self->filename); py_Str__dtor(&self->filename);
py_Str__dtor(&self->source); py_Str__dtor(&self->source);
c11_vector__dtor(&self->line_starts); c11_vector__dtor(&self->line_starts);
@ -41,9 +42,9 @@ void pk_SourceData__dtor(struct pk_SourceData* self) {
c11_vector__dtor(&self->_precompiled_tokens); c11_vector__dtor(&self->_precompiled_tokens);
} }
pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode) { pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic) {
pk_SourceData_ self = malloc(sizeof(struct pk_SourceData)); pk_SourceData_ self = malloc(sizeof(struct pk_SourceData));
pk_SourceData__ctor(self, source, filename, mode); pk_SourceData__ctor(self, source, filename, mode, is_dynamic);
self->rc.count = 1; self->rc.count = 1;
self->rc.dtor = (void(*)(void*))pk_SourceData__dtor; self->rc.dtor = (void(*)(void*))pk_SourceData__dtor;
return self; return self;

View File

@ -1,20 +1,254 @@
#include "pocketpy/compiler/compiler.h" #include "pocketpy/compiler/compiler.h"
#include "pocketpy/compiler/expr.h"
#include "pocketpy/compiler/lexer.h"
Error* pk_compile(pk_SourceData_ src){ typedef struct pk_Compiler pk_Compiler;
c11_array/*T=Token*/ tokens; typedef Error* (*PrattCallback)(pk_Compiler* self);
typedef struct PrattRule {
PrattCallback prefix;
PrattCallback infix;
enum Precedence precedence;
} PrattRule;
static PrattRule rules[TK__COUNT__];
typedef struct pk_Compiler {
pk_SourceData_ src; // weakref
pk_TokenArray tokens;
int i;
c11_vector/*T=CodeEmitContext*/ contexts;
} pk_Compiler;
static void pk_Compiler__ctor(pk_Compiler *self, pk_SourceData_ src, pk_TokenArray tokens){
self->src = src;
self->tokens = tokens;
self->i = 0;
c11_vector__ctor(&self->contexts, sizeof(pk_CodeEmitContext));
}
static void pk_Compiler__dtor(pk_Compiler *self){
pk_TokenArray__dtor(&self->tokens);
c11_vector__dtor(&self->contexts);
}
/**************************************/
#define tk(i) c11__getitem(Token, &self->tokens, i)
#define prev() tk(self->i - 1)
#define curr() tk(self->i)
#define next() tk(self->i + 1)
#define err() (self->i == self->tokens.count ? prev() : curr())
#define advance() self->i++
#define mode() self->src->mode
#define ctx() c11_vector__back(pk_CodeEmitContext, &self->contexts)
#define match_newlines() match_newlines_repl(self, NULL)
#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", pk_TokenSymbols[expected], pk_TokenSymbols[curr().type]);
#define consume_end_stmt() if(!match_end_stmt()) return SyntaxError("expected statement end")
#define check_newlines_repl() { bool __nml; match_newlines_repl(self, &__nml); if(__nml) return NeedMoreLines(); }
#define check(B) if((err = B)) return err
#define match(expected) (curr().type == expected ? (++self->i) : 0)
NameScope name_scope(pk_Compiler* self) {
NameScope s = self->contexts.count > 1 ? NAME_LOCAL : NAME_GLOBAL;
if(self->src->is_dynamic && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
return s;
}
static Error* SyntaxError(const char* fmt, ...){
return NULL;
}
static Error* NeedMoreLines(){
return NULL;
}
bool match_newlines_repl(pk_Compiler* self, bool* need_more_lines){
bool consumed = false;
if(curr().type == TK_EOL) {
while(curr().type == TK_EOL) advance();
consumed = true;
}
if(need_more_lines) {
*need_more_lines = (mode() == REPL_MODE && curr().type == TK_EOF);
}
return consumed;
}
bool is_expression(pk_Compiler* self, bool allow_slice){
PrattCallback prefix = rules[curr().type].prefix;
return prefix && (allow_slice || curr().type != TK_COLON);
}
Error* parse_expression(pk_Compiler* self, int precedence, bool allow_slice){
PrattCallback prefix = rules[curr().type].prefix;
if(!prefix || (curr().type == TK_COLON && !allow_slice)) {
return SyntaxError("expected an expression, got %s", pk_TokenSymbols[curr().type]);
}
advance();
Error* err;
check(prefix(self));
while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK_COLON)) {
TokenIndex op = curr().type;
advance();
PrattCallback infix = rules[op].infix;
assert(infix != NULL);
check(infix(self));
}
return NULL;
}
// [[nodiscard]] Error* EXPR() noexcept{ return parse_expression(PREC_LOWEST + 1); }
// [[nodiscard]] Error* EXPR_TUPLE(bool allow_slice = false) noexcept;
// [[nodiscard]] Error* EXPR_VARS() noexcept; // special case for `for loop` and `comp`
Error* EXPR_TUPLE(pk_Compiler* self, bool allow_slice){
Error* err;
check(parse_expression(self, PREC_LOWEST + 1, allow_slice));
if(!match(TK_COMMA)) return NULL;
// tuple expression
int count = 1;
do {
if(curr().brackets_level) check_newlines_repl()
if(!is_expression(self, allow_slice)) break;
check(parse_expression(self, PREC_LOWEST + 1, allow_slice));
count += 1;
if(curr().brackets_level) check_newlines_repl();
} while(match(TK_COMMA));
// TupleExpr* e = make_expr<TupleExpr>(count);
// for(int i=count-1; i>=0; i--)
// e->items[i] = ctx()->s_popx();
// ctx()->s_push(e);
return NULL;
}
static void setup_global_context(pk_Compiler* self, CodeObject* co){
co->start_line = self->i == 0 ? 1 : prev().line;
pk_CodeEmitContext* ctx = c11_vector__emplace(&self->contexts);
pk_CodeEmitContext__ctor(ctx, co, NULL, self->contexts.count);
}
Error* pk_Compiler__compile(pk_Compiler* self, CodeObject* out){
// make sure it is the first time to compile
assert(self->i == 0);
// make sure the first token is @sof
assert(tk(0).type == TK_SOF);
setup_global_context(self, out);
advance(); // skip @sof, so prev() is always valid
match_newlines(); // skip possible leading '\n'
Error* err;
// if(mode() == EVAL_MODE) {
// check(EXPR_TUPLE());
// ctx()->s_emit_top();
// consume(TK_EOF);
// ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
// check(pop_context());
// return NULL;
// } else if(mode() == JSON_MODE) {
// check(EXPR());
// Expr* e = ctx()->s_popx();
// if(!e->is_json_object()){
// return SyntaxError("expect a JSON object, literal or array");
// }
// consume(TK_EOF);
// e->emit_(ctx());
// ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
// check(pop_context());
// return NULL;
// }
// while(!match(TK_EOF)) {
// check(compile_stmt());
// match_newlines();
// }
// check(pop_context());
return NULL;
}
Error* pk_compile(pk_SourceData_ src, CodeObject* out){
pk_TokenArray tokens;
Error* err = pk_Lexer__process(src, &tokens); Error* err = pk_Lexer__process(src, &tokens);
if(err) return err; if(err) return err;
Token* data = (Token*)tokens.data; // Token* data = (Token*)tokens.data;
printf("%s\n", py_Str__data(&src->filename)); // printf("%s\n", py_Str__data(&src->filename));
for(int i = 0; i < tokens.count; i++) { // for(int i = 0; i < tokens.count; i++) {
Token* t = data + i; // Token* t = data + i;
py_Str tmp; // py_Str tmp;
py_Str__ctor2(&tmp, t->start, t->length); // py_Str__ctor2(&tmp, t->start, t->length);
printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], py_Str__data(&tmp)); // printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], py_Str__data(&tmp));
py_Str__dtor(&tmp); // py_Str__dtor(&tmp);
// }
pk_Compiler compiler;
pk_Compiler__ctor(&compiler, src, tokens);
CodeObject__ctor(out, src, py_Str__sv(&src->filename));
err = pk_Compiler__compile(&compiler, out);
CodeObject__dtor(out);
pk_Compiler__dtor(&compiler);
return err;
} }
c11_array__dtor(&tokens); void pk_Compiler__initialize(){
return NULL; // clang-format off
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#define PK_NO_INFIX NULL, PREC_LOWEST
for(int i = 0; i < TK__COUNT__; i++) rules[i] = { NULL, PK_NO_INFIX };
rules[TK_DOT] = { NULL, exprAttrib, PREC_PRIMARY };
rules[TK_LPAREN] = { exprGroup, exprCall, PREC_PRIMARY };
rules[TK_LBRACKET] = { exprList, exprSubscr, PREC_PRIMARY };
rules[TK_LBRACE] = { exprMap, PK_NO_INFIX };
rules[TK_MOD] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_ADD] = { NULL, exprBinaryOp, PREC_TERM };
rules[TK_SUB] = { exprUnaryOp, exprBinaryOp, PREC_TERM };
rules[TK_MUL] = { exprUnaryOp, exprBinaryOp, PREC_FACTOR };
rules[TK_INVERT] = { exprUnaryOp, NULL, PREC_UNARY };
rules[TK_DIV] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_FLOORDIV] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_POW] = { exprUnaryOp, exprBinaryOp, PREC_EXPONENT };
rules[TK_GT] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LT] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_EQ] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_NE] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_GE] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LE] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IN] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IS] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT };
rules[TK_RSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT };
rules[TK_AND] = { NULL, exprBinaryOp, PREC_BITWISE_AND };
rules[TK_OR] = { NULL, exprBinaryOp, PREC_BITWISE_OR };
rules[TK_XOR] = { NULL, exprBinaryOp, PREC_BITWISE_XOR };
rules[TK_DECORATOR] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_IF] = { NULL, exprTernary, PREC_TERNARY };
rules[TK_NOT_IN] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IS_NOT] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_AND_KW ] = { NULL, exprAnd, PREC_LOGICAL_AND };
rules[TK_OR_KW] = { NULL, exprOr, PREC_LOGICAL_OR };
rules[TK_NOT_KW] = { exprNot, NULL, PREC_LOGICAL_NOT };
rules[TK_TRUE] = { exprLiteral0, PK_NO_INFIX };
rules[TK_FALSE] = { exprLiteral0, PK_NO_INFIX };
rules[TK_NONE] = { exprLiteral0, PK_NO_INFIX };
rules[TK_DOTDOTDOT] = { exprLiteral0, PK_NO_INFIX };
rules[TK_LAMBDA] = { exprLambda, PK_NO_INFIX };
rules[TK_ID] = { exprName, PK_NO_INFIX };
rules[TK_NUM] = { exprLiteral, PK_NO_INFIX };
rules[TK_STR] = { exprLiteral, PK_NO_INFIX };
rules[TK_FSTR] = { exprFString, PK_NO_INFIX };
rules[TK_LONG] = { exprLong, PK_NO_INFIX };
rules[TK_IMAG] = { exprImag, PK_NO_INFIX };
rules[TK_BYTES] = { exprBytes, PK_NO_INFIX };
rules[TK_COLON] = { exprSlice0, exprSlice1, PREC_PRIMARY };
#undef PK_METHOD
#undef PK_NO_INFIX
// clang-format on
} }

View File

@ -17,7 +17,7 @@ PrattRule Compiler::rules[TK__COUNT__];
NameScope Compiler::name_scope() const noexcept{ NameScope Compiler::name_scope() const noexcept{
auto s = contexts.size() > 1 ? NAME_LOCAL : NAME_GLOBAL; auto s = contexts.size() > 1 ? NAME_LOCAL : NAME_GLOBAL;
if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN; if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_UNKNOWN;
return s; return s;
} }
@ -120,61 +120,6 @@ void Compiler::init_pratt_rules() noexcept{
static bool initialized = false; static bool initialized = false;
if(initialized) return; if(initialized) return;
initialized = true; initialized = true;
// clang-format off
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#define PK_METHOD(name) &Compiler::name
#define PK_NO_INFIX nullptr, PREC_LOWEST
for(int i = 0; i < TK__COUNT__; i++) rules[i] = { nullptr, PK_NO_INFIX };
rules[TK_DOT] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
rules[TK_LPAREN] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
rules[TK_LBRACKET] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
rules[TK_LBRACE] = { PK_METHOD(exprMap), PK_NO_INFIX };
rules[TK_MOD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_ADD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK_SUB] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK_MUL] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_INVERT] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
rules[TK_DIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_FLOORDIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_POW] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
rules[TK_GT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_EQ] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_NE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_GE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IS] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK_RSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK_AND] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
rules[TK_OR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
rules[TK_XOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
rules[TK_DECORATOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_IF] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
rules[TK_NOT_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IS_NOT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_AND_KW ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
rules[TK_OR_KW] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
rules[TK_NOT_KW] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
rules[TK_TRUE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_FALSE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_NONE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_DOTDOTDOT] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_LAMBDA] = { PK_METHOD(exprLambda), PK_NO_INFIX };
rules[TK_ID] = { PK_METHOD(exprName), PK_NO_INFIX };
rules[TK_NUM] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK_STR] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK_FSTR] = { PK_METHOD(exprFString), PK_NO_INFIX };
rules[TK_LONG] = { PK_METHOD(exprLong), PK_NO_INFIX };
rules[TK_IMAG] = { PK_METHOD(exprImag), PK_NO_INFIX };
rules[TK_BYTES] = { PK_METHOD(exprBytes), PK_NO_INFIX };
rules[TK_COLON] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
#undef PK_METHOD
#undef PK_NO_INFIX
// clang-format on
} }
bool Compiler::match(TokenIndex expected) noexcept{ bool Compiler::match(TokenIndex expected) noexcept{

View File

@ -1,59 +1,79 @@
// #include "pocketpy/compiler/expr.h" #include "pocketpy/compiler/expr.h"
// #include "pocketpy/common/memorypool.h" #include "pocketpy/common/memorypool.h"
#include "pocketpy/common/strname.h"
// static bool default_false(const pk_Expr*) { return false; } static bool default_false(const pk_Expr* e) { return false; }
// static int default_zero(const pk_Expr*) { return 0; } static int default_zero(const pk_Expr* e) { return 0; }
// static void default_dtor(pk_Expr*) {} static void default_dtor(pk_Expr* e) {}
// void pk_ExprVt__ctor(pk_ExprVt* vt){ void pk_ExprVt__ctor(pk_ExprVt* vt){
// vt->dtor = default_dtor; vt->dtor = default_dtor;
// vt->is_literal = default_false; vt->is_literal = default_false;
// vt->is_json_object = default_false; vt->is_json_object = default_false;
// vt->is_attrib = default_false; vt->is_attrib = default_false;
// vt->is_subscr = default_false; vt->is_subscr = default_false;
// vt->is_compare = default_false; vt->is_compare = default_false;
// vt->star_level = default_zero; vt->star_level = default_zero;
// vt->is_tuple = default_false; vt->is_tuple = default_false;
// vt->is_name = default_false; vt->is_name = default_false;
// vt->emit_ = NULL; // must be set vt->emit_ = NULL; // must be set
// vt->emit_del = NULL; vt->emit_del = NULL;
// vt->emit_store = NULL; vt->emit_store = NULL;
// vt->emit_inplace = NULL; vt->emit_inplace = NULL;
// vt->emit_store_inplace = NULL; vt->emit_store_inplace = NULL;
// } }
// void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx){ void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx){
// assert(self->vt->emit_); assert(self->vt->emit_);
// self->vt->emit_(self, ctx); self->vt->emit_(self, ctx);
// } }
// bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx){ bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_del) return false; if(!self->vt->emit_del) return false;
// return self->vt->emit_del(self, ctx); return self->vt->emit_del(self, ctx);
// } }
// bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx){ bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_store) return false; if(!self->vt->emit_store) return false;
// return self->vt->emit_store(self, ctx); return self->vt->emit_store(self, ctx);
// } }
// void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){ void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_inplace){ if(!self->vt->emit_inplace){
// pk_Expr__emit_(self, ctx); pk_Expr__emit_(self, ctx);
// return; return;
// } }
// self->vt->emit_inplace(self, ctx); self->vt->emit_inplace(self, ctx);
// } }
// bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){ bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_store_inplace){ if(!self->vt->emit_store_inplace){
// return pk_Expr__emit_store(self, ctx); return pk_Expr__emit_store(self, ctx);
// } }
// return self->vt->emit_store_inplace(self, ctx); return self->vt->emit_store_inplace(self, ctx);
// } }
// void pk_Expr__delete(pk_Expr* self){ void pk_Expr__delete(pk_Expr* self){
// if(!self) return; if(!self) return;
// self->vt->dtor(self); self->vt->dtor(self);
// PoolExpr_dealloc(self); PoolExpr_dealloc(self);
// } }
/* CodeEmitContext */
void pk_CodeEmitContext__ctor(pk_CodeEmitContext* self, CodeObject* co, FuncDecl* func, int level){
self->co = co;
self->func = func;
self->level = level;
self->curr_iblock = 0;
self->is_compiling_class = false;
c11_vector__ctor(&self->s_expr, sizeof(pk_Expr*));
c11_vector__ctor(&self->global_names, sizeof(StrName));
c11_smallmap_s2n__ctor(&self->co_consts_string_dedup_map);
}
void pk_CodeEmitContext__dtor(pk_CodeEmitContext* self){
c11_vector__dtor(&self->s_expr);
c11_vector__dtor(&self->global_names);
c11_smallmap_s2n__dtor(&self->co_consts_string_dedup_map);
}

View File

@ -715,7 +715,7 @@ IntParsingResult parse_uint(c11_string text, int64_t* out, int base) {
return IntParsing_FAILURE; return IntParsing_FAILURE;
} }
Error* pk_Lexer__process(pk_SourceData_ src, c11_array* out_tokens){ Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){
pk_Lexer lexer; pk_Lexer lexer;
pk_Lexer__ctor(&lexer, src); pk_Lexer__ctor(&lexer, src);
@ -747,7 +747,7 @@ Error* pk_Lexer__process(pk_SourceData_ src, c11_array* out_tokens){
Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) { Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) {
assert(!src->is_precompiled); assert(!src->is_precompiled);
c11_array/*T=Token*/ nexts; // output tokens pk_TokenArray nexts; // output tokens
Error* err = pk_Lexer__process(src, &nexts); Error* err = pk_Lexer__process(src, &nexts);
if(err) return err; if(err) return err;
@ -841,6 +841,15 @@ Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) {
return NULL; return NULL;
} }
void pk_TokenArray__dtor(pk_TokenArray *self){
Token* data = self->data;
for(int i=0; i<self->count; i++){
if(data[i].value.index == TokenValue_STR){
py_Str__dtor(&data[i].value._str);
}
}
c11_array__dtor(self);
}
const char* pk_TokenSymbols[] = { const char* pk_TokenSymbols[] = {
"@eof", "@eol", "@sof", "@eof", "@eol", "@sof",

View File

@ -17,7 +17,7 @@ FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name){
FuncDecl* self = malloc(sizeof(FuncDecl)); FuncDecl* self = malloc(sizeof(FuncDecl));
self->rc.count = 1; self->rc.count = 1;
self->rc.dtor = (void (*)(void*))FuncDecl__dtor; self->rc.dtor = (void (*)(void*))FuncDecl__dtor;
self->code = CodeObject__new(src, name); CodeObject__ctor(&self->code, src, name);
c11_vector__ctor(&self->args, sizeof(int)); c11_vector__ctor(&self->args, sizeof(int));
c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg)); c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg));
@ -34,7 +34,7 @@ FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name){
} }
void FuncDecl__dtor(FuncDecl* self){ void FuncDecl__dtor(FuncDecl* self){
CodeObject__delete(self->code); CodeObject__dtor(&self->code);
c11_vector__dtor(&self->args); c11_vector__dtor(&self->args);
c11_vector__dtor(&self->kwargs); c11_vector__dtor(&self->kwargs);
c11_smallmap_n2i__dtor(&self->kw_to_index); c11_smallmap_n2i__dtor(&self->kw_to_index);
@ -46,8 +46,7 @@ void FuncDecl__add_kwarg(FuncDecl* self, int index, uint16_t key, const PyVar* v
c11_vector__push(FuncDeclKwArg, &self->kwargs, item); c11_vector__push(FuncDeclKwArg, &self->kwargs, item);
} }
CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name){ void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name){
CodeObject* self = malloc(sizeof(CodeObject));
self->src = src; PK_INCREF(src); self->src = src; PK_INCREF(src);
py_Str__ctor2(&self->name, name.data, name.size); py_Str__ctor2(&self->name, name.data, name.size);
@ -69,10 +68,9 @@ CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name){
CodeBlock root_block = {CodeBlockType_NO_BLOCK, -1, 0, -1, -1}; CodeBlock root_block = {CodeBlockType_NO_BLOCK, -1, 0, -1, -1};
c11_vector__push(CodeBlock, &self->blocks, root_block); c11_vector__push(CodeBlock, &self->blocks, root_block);
return self;
} }
void CodeObject__delete(CodeObject* self){ void CodeObject__dtor(CodeObject* self){
PK_DECREF(self->src); PK_DECREF(self->src);
py_Str__dtor(&self->name); py_Str__dtor(&self->name);
@ -92,6 +90,4 @@ void CodeObject__delete(CodeObject* self){
PK_DECREF(decl); PK_DECREF(decl);
} }
c11_vector__dtor(&self->func_decls); c11_vector__dtor(&self->func_decls);
free(self);
} }

View File

@ -1,17 +0,0 @@
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/vm.h"
#include <assert.h>
#include <stdlib.h>

View File

@ -10,8 +10,9 @@ pk_VM* pk_current_vm;
static pk_VM pk_default_vm; static pk_VM pk_default_vm;
void py_initialize() { void py_initialize() {
Pools_initialize(); pk_MemoryPools__initialize();
pk_StrName__initialize(); pk_StrName__initialize();
pk_Compiler__initialize();
pk_current_vm = &pk_default_vm; pk_current_vm = &pk_default_vm;
pk_VM__ctor(&pk_default_vm); pk_VM__ctor(&pk_default_vm);
} }
@ -19,19 +20,20 @@ void py_initialize() {
void py_finalize() { void py_finalize() {
pk_VM__dtor(&pk_default_vm); pk_VM__dtor(&pk_default_vm);
pk_current_vm = NULL; pk_current_vm = NULL;
pk_Compiler__finalize();
pk_StrName__finalize(); pk_StrName__finalize();
Pools_finalize(); pk_MemoryPools__finalize();
} }
int py_exec(const char* source) { int py_exec(const char* source) {
pk_SourceData_ src = pk_SourceData__rcnew(source, "main.py", EXEC_MODE); pk_SourceData_ src = pk_SourceData__rcnew(source, "main.py", EXEC_MODE, false);
Error* err = pk_compile(src); CodeObject co;
Error* err = pk_compile(src, &co);
PK_DECREF(src); PK_DECREF(src);
if(err) abort(); if(err) abort();
CodeObject* co = NULL;
pk_VM* vm = pk_current_vm; pk_VM* vm = pk_current_vm;
Frame* frame = Frame__new(co, &vm->main, NULL, vm->stack.sp, vm->stack.sp, co); Frame* frame = Frame__new(&co, &vm->main, NULL, vm->stack.sp, vm->stack.sp, &co);
pk_VM__push_frame(vm, frame); pk_VM__push_frame(vm, frame);
pk_FrameResult res = pk_VM__run_top_frame(vm); pk_FrameResult res = pk_VM__run_top_frame(vm);
if(res == RES_ERROR) return vm->last_error->type; if(res == RES_ERROR) return vm->last_error->type;

View File

@ -1,116 +0,0 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#if __has_include("pocketpy_c.h")
#include "pocketpy_c.h"
#else
// for amalgamated build
#include "pocketpy.h"
#endif
#ifdef _WIN32
#include <windows.h>
std::string pkpy_platform_getline(bool* eof) {
HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE);
std::wstringstream wss;
WCHAR buf;
DWORD read;
while(ReadConsoleW(hStdin, &buf, 1, &read, NULL) && buf != L'\n') {
if(eof && buf == L'\x1A') *eof = true; // Ctrl+Z
wss << buf;
}
std::wstring wideInput = wss.str();
int length = WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), NULL, 0, NULL, NULL);
std::string output;
output.resize(length);
WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL);
if(!output.empty() && output.back() == '\r') output.pop_back();
return output;
}
#else
std::string pkpy_platform_getline(bool* eof) {
std::string output;
if(!std::getline(std::cin, output)) {
if(eof) *eof = true;
}
return output;
}
#endif
using namespace pkpy;
static int f_input(pkpy_vm* vm) {
if(!pkpy_is_none(vm, -1)) {
pkpy_CString prompt;
bool ok = pkpy_to_string(vm, -1, &prompt);
if(!ok) return 0;
std::cout << prompt << std::flush;
}
bool eof;
std::string output = pkpy_platform_getline(&eof);
pkpy_push_string(vm, pkpy_string(output.c_str()));
return 1;
}
int main(int argc, char** argv) {
#if _WIN32
SetConsoleCP(CP_UTF8);
SetConsoleOutputCP(CP_UTF8);
#endif
pkpy_vm* vm = pkpy_new_vm(true);
pkpy_push_function(vm, "input(prompt=None) -> str", f_input);
pkpy_py_import(vm, "builtins");
pkpy_setattr(vm, pkpy_name("input"));
if(argc == 1) {
void* repl = pkpy_new_repl(vm);
bool need_more_lines = false;
while(true) {
std::cout << (need_more_lines ? "... " : ">>> ");
bool eof = false;
std::string line = pkpy_platform_getline(&eof);
if(eof) break;
need_more_lines = pkpy_repl_input(repl, line.c_str());
}
pkpy_delete_vm(vm);
return 0;
}
if(argc == 2) {
std::string argv_1 = argv[1];
if(argv_1 == "-h" || argv_1 == "--help") goto __HELP;
std::filesystem::path filepath(argv[1]);
filepath = std::filesystem::absolute(filepath);
if(!std::filesystem::exists(filepath)) {
std::cerr << "File not found: " << argv_1 << std::endl;
return 2;
}
std::ifstream file(filepath);
if(!file.is_open()) {
std::cerr << "Failed to open file: " << argv_1 << std::endl;
return 3;
}
std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
pkpy_set_main_argv(vm, argc, argv);
bool ok = pkpy_exec_2(vm, src.c_str(), filepath.filename().string().c_str(), 0, NULL);
if(!ok) pkpy_clear_error(vm, NULL);
pkpy_delete_vm(vm);
return ok ? 0 : 1;
}
__HELP:
std::cout << "Usage: pocketpy [filename]" << std::endl;
return 0;
}