diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 4e880da2..21a2b7b1 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -18,6 +18,7 @@ typedef struct c11_string{ int c11_string__cmp(c11_string self, c11_string other); int c11_string__cmp2(c11_string self, const char* other, int size); int c11_string__cmp3(c11_string self, const char* other); +int c11_string__index(c11_string self, char c); typedef struct py_Str{ int size; diff --git a/include/pocketpy/compiler/expr.h b/include/pocketpy/compiler/expr.h index 1d71f870..2301e404 100644 --- a/include/pocketpy/compiler/expr.h +++ b/include/pocketpy/compiler/expr.h @@ -97,6 +97,27 @@ typedef struct pk_SequenceExpr{ Opcode opcode; } pk_SequenceExpr; +typedef struct pk_CompExpr{ + COMMON_HEADER + pk_Expr* expr; // loop expr + pk_Expr* vars; // loop vars + pk_Expr* iter; // loop iter + pk_Expr* cond; // optional if condition + + Opcode op0; + Opcode op1; +} pk_CompExpr; + +typedef struct pk_LambdaExpr{ + COMMON_HEADER + int index; +} pk_LambdaExpr; + +typedef struct pk_FStringExpr{ + COMMON_HEADER + c11_string src; +} pk_FStringExpr; + #ifdef __cplusplus } #endif diff --git a/include/pocketpy/compiler/lexer.h b/include/pocketpy/compiler/lexer.h index 416e1e00..d4d6c9d8 100644 --- a/include/pocketpy/compiler/lexer.h +++ b/include/pocketpy/compiler/lexer.h @@ -63,7 +63,7 @@ typedef struct Token { // https://docs.python.org/3/reference/expressions.html#operator-precedence enum Precedence { - PREC_LOWEST, + PREC_LOWEST = 0, PREC_LAMBDA, // lambda PREC_TERNARY, // ?: PREC_LOGICAL_OR, // or diff --git a/src/common/str.c b/src/common/str.c index 95cccd65..bdc144ea 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -252,6 +252,13 @@ int c11_string__cmp3(c11_string self, const char *other){ return c11_string__cmp2(self, other, strlen(other)); } +int c11_string__index(c11_string self, char c){ + for(int i=0; isize); } diff --git a/src/compiler/compiler.c b/src/compiler/compiler.c index 362d9d2e..1879e274 100644 --- a/src/compiler/compiler.c +++ b/src/compiler/compiler.c @@ -1,6 +1,7 @@ #include "pocketpy/compiler/compiler.h" #include "pocketpy/compiler/expr.h" #include "pocketpy/compiler/lexer.h" +#include "pocketpy/compiler/context.h" typedef struct pk_Compiler pk_Compiler; typedef Error* (*PrattCallback)(pk_Compiler* self); @@ -163,18 +164,18 @@ static Error* EXPR_TUPLE(pk_Compiler* self, bool allow_slice){ // special case for `for loop` and `comp` static Error* EXPR_VARS(pk_Compiler* self){ - int count = 0; - do { - consume(TK_ID); - ctx()->s_push(make_expr(prev().str(), name_scope())); - count += 1; - } while(match(TK_COMMA)); - if(count > 1){ - TupleExpr* e = make_expr(count); - for(int i=count-1; i>=0; i--) - e->items[i] = ctx()->s_popx(); - ctx()->s_push(e); - } + // int count = 0; + // do { + // consume(TK_ID); + // ctx()->s_push(make_expr(prev().str(), name_scope())); + // count += 1; + // } while(match(TK_COMMA)); + // if(count > 1){ + // TupleExpr* e = make_expr(count); + // for(int i=count-1; i>=0; i--) + // e->items[i] = ctx()->s_popx(); + // ctx()->s_push(e); + // } return NULL; } @@ -253,53 +254,51 @@ Error* pk_compile(pk_SourceData_ src, CodeObject* out){ void pk_Compiler__initialize(){ // clang-format off // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ -#define PK_NO_INFIX NULL, PREC_LOWEST - for(int i = 0; i < TK__COUNT__; i++) rules[i] = { NULL, PK_NO_INFIX }; - rules[TK_DOT] = { NULL, exprAttrib, PREC_PRIMARY }; - rules[TK_LPAREN] = { exprGroup, exprCall, PREC_PRIMARY }; - rules[TK_LBRACKET] = { exprList, exprSubscr, PREC_PRIMARY }; - rules[TK_LBRACE] = { exprMap, PK_NO_INFIX }; - rules[TK_MOD] = { NULL, exprBinaryOp, PREC_FACTOR }; - rules[TK_ADD] = { NULL, exprBinaryOp, PREC_TERM }; - rules[TK_SUB] = { exprUnaryOp, exprBinaryOp, PREC_TERM }; - rules[TK_MUL] = { exprUnaryOp, exprBinaryOp, PREC_FACTOR }; - rules[TK_INVERT] = { exprUnaryOp, NULL, PREC_UNARY }; - rules[TK_DIV] = { NULL, exprBinaryOp, PREC_FACTOR }; - rules[TK_FLOORDIV] = { NULL, exprBinaryOp, PREC_FACTOR }; - rules[TK_POW] = { exprUnaryOp, exprBinaryOp, PREC_EXPONENT }; - rules[TK_GT] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_LT] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_EQ] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_NE] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_GE] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_LE] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_IN] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_IS] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_LSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; - rules[TK_RSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; - rules[TK_AND] = { NULL, exprBinaryOp, PREC_BITWISE_AND }; - rules[TK_OR] = { NULL, exprBinaryOp, PREC_BITWISE_OR }; - rules[TK_XOR] = { NULL, exprBinaryOp, PREC_BITWISE_XOR }; - rules[TK_DECORATOR] = { NULL, exprBinaryOp, PREC_FACTOR }; - rules[TK_IF] = { NULL, exprTernary, PREC_TERNARY }; - rules[TK_NOT_IN] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_IS_NOT] = { NULL, exprBinaryOp, PREC_COMPARISION }; - rules[TK_AND_KW ] = { NULL, exprAnd, PREC_LOGICAL_AND }; - rules[TK_OR_KW] = { NULL, exprOr, PREC_LOGICAL_OR }; - rules[TK_NOT_KW] = { exprNot, NULL, PREC_LOGICAL_NOT }; - rules[TK_TRUE] = { exprLiteral0, PK_NO_INFIX }; - rules[TK_FALSE] = { exprLiteral0, PK_NO_INFIX }; - rules[TK_NONE] = { exprLiteral0, PK_NO_INFIX }; - rules[TK_DOTDOTDOT] = { exprLiteral0, PK_NO_INFIX }; - rules[TK_LAMBDA] = { exprLambda, PK_NO_INFIX }; - rules[TK_ID] = { exprName, PK_NO_INFIX }; - rules[TK_NUM] = { exprLiteral, PK_NO_INFIX }; - rules[TK_STR] = { exprLiteral, PK_NO_INFIX }; - rules[TK_FSTR] = { exprFString, PK_NO_INFIX }; - rules[TK_LONG] = { exprLong, PK_NO_INFIX }; - rules[TK_IMAG] = { exprImag, PK_NO_INFIX }; - rules[TK_BYTES] = { exprBytes, PK_NO_INFIX }; - rules[TK_COLON] = { exprSlice0, exprSlice1, PREC_PRIMARY }; + rules[TK_DOT] = (PrattRule){ NULL, exprAttrib, PREC_PRIMARY }; + rules[TK_LPAREN] = (PrattRule){ exprGroup, exprCall, PREC_PRIMARY }; + rules[TK_LBRACKET] = (PrattRule){ exprList, exprSubscr, PREC_PRIMARY }; + rules[TK_MOD] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_ADD] = (PrattRule){ NULL, exprBinaryOp, PREC_TERM }; + rules[TK_SUB] = (PrattRule){ exprUnaryOp, exprBinaryOp, PREC_TERM }; + rules[TK_MUL] = (PrattRule){ exprUnaryOp, exprBinaryOp, PREC_FACTOR }; + rules[TK_INVERT] = (PrattRule){ exprUnaryOp, NULL, PREC_UNARY }; + rules[TK_DIV] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_FLOORDIV] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_POW] = (PrattRule){ exprUnaryOp, exprBinaryOp, PREC_EXPONENT }; + rules[TK_GT] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_LT] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_EQ] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_NE] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_GE] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_LE] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_IN] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_IS] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_LSHIFT] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; + rules[TK_RSHIFT] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; + rules[TK_AND] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_AND }; + rules[TK_OR] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_OR }; + rules[TK_XOR] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_XOR }; + rules[TK_DECORATOR] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR }; + rules[TK_IF] = (PrattRule){ NULL, exprTernary, PREC_TERNARY }; + rules[TK_NOT_IN] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_IS_NOT] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION }; + rules[TK_AND_KW ] = (PrattRule){ NULL, exprAnd, PREC_LOGICAL_AND }; + rules[TK_OR_KW] = (PrattRule){ NULL, exprOr, PREC_LOGICAL_OR }; + rules[TK_NOT_KW] = (PrattRule){ exprNot, NULL, PREC_LOGICAL_NOT }; + rules[TK_TRUE] = (PrattRule){ exprLiteral0 }; + rules[TK_FALSE] = (PrattRule){ exprLiteral0 }; + rules[TK_NONE] = (PrattRule){ exprLiteral0 }; + rules[TK_DOTDOTDOT] = (PrattRule){ exprLiteral0 }; + rules[TK_LAMBDA] = (PrattRule){ exprLambda, }; + rules[TK_ID] = (PrattRule){ exprName, }; + rules[TK_NUM] = (PrattRule){ exprLiteral, }; + rules[TK_STR] = (PrattRule){ exprLiteral, }; + rules[TK_FSTR] = (PrattRule){ exprFString, }; + rules[TK_LONG] = (PrattRule){ exprLong, }; + rules[TK_IMAG] = (PrattRule){ exprImag, }; + rules[TK_BYTES] = (PrattRule){ exprBytes, }; + rules[TK_LBRACE] = (PrattRule){ exprMap }; + rules[TK_COLON] = (PrattRule){ exprSlice0, exprSlice1, PREC_PRIMARY }; #undef PK_METHOD #undef PK_NO_INFIX diff --git a/src/compiler/expr.c b/src/compiler/expr.c index 310b8312..06ede9c9 100644 --- a/src/compiler/expr.c +++ b/src/compiler/expr.c @@ -2,6 +2,7 @@ #include "pocketpy/compiler/context.h" #include "pocketpy/common/memorypool.h" #include "pocketpy/common/strname.h" +#include static bool default_false(const pk_Expr* e) { return false; } static int default_zero(const pk_Expr* e) { return 0; } @@ -337,6 +338,262 @@ bool pk_TupleExpr__emit_del(pk_Expr* self_, pk_CodeEmitContext* ctx) { return true; } +static pk_ExprVt CompExprVt; + +void pk_CompExpr__dtor(pk_Expr* self_){ + pk_CompExpr* self = (pk_CompExpr*)self_; + pk_Expr__delete(self->expr); + pk_Expr__delete(self->vars); + pk_Expr__delete(self->iter); + pk_Expr__delete(self->cond); +} + +void pk_CompExpr__emit_(pk_Expr* self_, pk_CodeEmitContext* ctx) { + pk_CompExpr* self = (pk_CompExpr*)self_; + pk_CodeEmitContext__emit_(ctx, self->op0, 0, self->line); + self->iter->vt->emit_(self->iter, ctx); + pk_CodeEmitContext__emit_(ctx, OP_GET_ITER, BC_NOARG, BC_KEEPLINE); + pk_CodeEmitContext__enter_block(ctx, CodeBlockType_FOR_LOOP); + int curr_iblock = ctx->curr_iblock; + int for_codei = pk_CodeEmitContext__emit_(ctx, OP_FOR_ITER, curr_iblock, BC_KEEPLINE); + bool ok = self->vars->vt->emit_store(self->vars, ctx); + // this error occurs in `vars` instead of this line, but...nevermind + assert(ok); // this should raise a SyntaxError, but we just assert it + pk_CodeEmitContext__try_merge_for_iter_store(ctx, for_codei); + if(self->cond) { + self->cond->vt->emit_(self->cond, ctx); + int patch = pk_CodeEmitContext__emit_(ctx, OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE); + self->expr->vt->emit_(self->expr, ctx); + pk_CodeEmitContext__emit_(ctx, self->op1, BC_NOARG, BC_KEEPLINE); + pk_CodeEmitContext__patch_jump(ctx, patch); + } else { + self->expr->vt->emit_(self->expr, ctx); + pk_CodeEmitContext__emit_(ctx, self->op1, BC_NOARG, BC_KEEPLINE); + } + pk_CodeEmitContext__emit_(ctx, OP_LOOP_CONTINUE, curr_iblock, BC_KEEPLINE); + pk_CodeEmitContext__exit_block(ctx); +} + +pk_CompExpr* pk_CompExpr__new(Opcode op0, Opcode op1){ + static_assert_expr_size(pk_CompExpr); + pk_CompExpr* self = PoolExpr_alloc(); + self->vt = &CompExprVt; + self->line = -1; + self->op0 = op0; + self->op1 = op1; + self->expr = NULL; + self->vars = NULL; + self->iter = NULL; + self->cond = NULL; + return self; +} + +static pk_ExprVt LambdaExprVt; + +pk_LambdaExpr* pk_LambdaExpr__new(int index){ + static_assert_expr_size(pk_LambdaExpr); + pk_LambdaExpr* self = PoolExpr_alloc(); + self->vt = &LambdaExprVt; + self->line = -1; + self->index = index; + return self; +} + +static void pk_LambdaExpr__emit_(pk_Expr* self_, pk_CodeEmitContext* ctx) { + pk_LambdaExpr* self = (pk_LambdaExpr*)self_; + pk_CodeEmitContext__emit_(ctx, OP_LOAD_FUNCTION, self->index, self->line); +} + +static pk_ExprVt FStringExprVt; + +static bool is_fmt_valid_char(char c) { + switch(c) { + // clang-format off + case '-': case '=': case '*': case '#': case '@': case '!': case '~': + case '<': case '>': case '^': + case '.': case 'f': case 'd': case 's': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + return true; + default: return false; + // clang-format on + } +} + +static bool is_identifier(c11_string s) { + if(s.size == 0) return false; + if(!isalpha(s.data[0]) && s.data[0] != '_') return false; + for(int i=0; i= 2 && expr_end[-2] == '!') { + switch(expr_end[-1]) { + case 'r': + repr = true; + expr.size -= 2; // expr[:-2] + break; + case 's': + repr = false; + expr.size -= 2; // expr[:-2] + break; + default: break; // nothing happens + } + } + // name or name.name + bool is_fastpath = false; + if(is_identifier(expr)) { + // ctx->emit_(OP_LOAD_NAME, StrName(expr.sv()).index, line); + pk_CodeEmitContext__emit_( + ctx, + OP_LOAD_NAME, + pk_StrName__map2(expr), + line + ); + is_fastpath = true; + } else { + int dot = c11_string__index(expr, '.'); + if(dot > 0) { + // std::string_view a = expr.sv().substr(0, dot); + // std::string_view b = expr.sv().substr(dot + 1); + c11_string a = {expr.data, dot}; // expr[:dot] + c11_string b = {expr.data+(dot+1), expr.size-(dot+1)}; // expr[dot+1:] + if(is_identifier(a) && is_identifier(b)) { + pk_CodeEmitContext__emit_(ctx, OP_LOAD_NAME, pk_StrName__map2(a), line); + pk_CodeEmitContext__emit_(ctx, OP_LOAD_ATTR, pk_StrName__map2(b), line); + is_fastpath = true; + } + } + } + + if(!is_fastpath) { + int index = pk_CodeEmitContext__add_const_string(ctx, expr); + pk_CodeEmitContext__emit_(ctx, OP_FSTRING_EVAL, index, line); + } + + if(repr) { + pk_CodeEmitContext__emit_(ctx, OP_REPR, BC_NOARG, line); + } +} + +static void pk_FStringExpr__emit_(pk_Expr* self_, pk_CodeEmitContext* ctx) { + pk_FStringExpr* self = (pk_FStringExpr*)self_; + int i = 0; // left index + int j = 0; // right index + int count = 0; // how many string parts + bool flag = false; // true if we are in a expression + + const char* src = self->src.data; + while(j < self->src.size) { + if(flag) { + if(src[j] == '}') { + // add expression + c11_string expr = {src+i, j-i}; // src[i:j] + // BUG: ':' is not a format specifier in f"{stack[2:]}" + int conon = c11_string__index(expr, ':'); + if(conon >= 0) { + c11_string spec = {expr.data+(conon+1), expr.size-(conon+1)}; // expr[conon+1:] + // filter some invalid spec + bool ok = true; + for(int k = 0; k < spec.size; k++) { + char c = spec.data[k]; + if(!is_fmt_valid_char(c)) { + ok = false; + break; + } + } + if(ok) { + expr.size = conon; // expr[:conon] + _load_simple_expr(ctx, expr, self->line); + // ctx->emit_(OP_FORMAT_STRING, ctx->add_const_string(spec.sv()), line); + pk_CodeEmitContext__emit_(ctx, OP_FORMAT_STRING, pk_CodeEmitContext__add_const_string(ctx, spec), self->line); + } else { + // ':' is not a spec indicator + _load_simple_expr(ctx, expr, self->line); + } + } else { + _load_simple_expr(ctx, expr, self->line); + } + flag = false; + count++; + } + } else { + if(src[j] == '{') { + // look at next char + if(j + 1 < self->src.size && src[j + 1] == '{') { + // {{ -> { + j++; + pk_CodeEmitContext__emit_( + ctx, + OP_LOAD_CONST, + pk_CodeEmitContext__add_const_string(ctx, (c11_string){"{", 1}), + self->line + ); + count++; + } else { + // { -> } + flag = true; + i = j + 1; + } + } else if(src[j] == '}') { + // look at next char + if(j + 1 < self->src.size && src[j + 1] == '}') { + // }} -> } + j++; + pk_CodeEmitContext__emit_( + ctx, + OP_LOAD_CONST, + pk_CodeEmitContext__add_const_string(ctx, (c11_string){"}", 1}), + self->line + ); + count++; + } else { + // } -> error + // throw std::runtime_error("f-string: unexpected }"); + // just ignore + } + } else { + // literal + i = j; + while(j < self->src.size && src[j] != '{' && src[j] != '}') + j++; + c11_string literal = {src+i, j-i}; // src[i:j] + pk_CodeEmitContext__emit_( + ctx, + OP_LOAD_CONST, + pk_CodeEmitContext__add_const_string(ctx, literal), + self->line + ); + count++; + continue; // skip j++ + } + } + j++; + } + + if(flag) { + // literal + c11_string literal = {src+i, self->src.size-i}; // src[i:] + pk_CodeEmitContext__emit_(ctx, OP_LOAD_CONST, pk_CodeEmitContext__add_const_string(ctx, literal), self->line); + count++; + } + pk_CodeEmitContext__emit_(ctx, OP_BUILD_STRING, count, self->line); +} + +pk_FStringExpr* pk_FStringExpr__new(c11_string src){ + static_assert_expr_size(pk_FStringExpr); + pk_FStringExpr* self = PoolExpr_alloc(); + self->vt = &FStringExprVt; + self->line = -1; + self->src = src; + return self; +} + ///////////////////////////////////////////// void pk_Expr__initialize(){ pk_ExprVt__ctor(&NameExprVt); @@ -390,4 +647,17 @@ void pk_Expr__initialize(){ TupleExprVt.is_tuple = true; TupleExprVt.emit_store = pk_TupleExpr__emit_store; TupleExprVt.emit_del = pk_TupleExpr__emit_del; + + pk_ExprVt__ctor(&CompExprVt); + vt = &CompExprVt; + vt->dtor = pk_CompExpr__dtor; + vt->emit_ = pk_CompExpr__emit_; + + pk_ExprVt__ctor(&LambdaExprVt); + vt = &LambdaExprVt; + vt->emit_ = pk_LambdaExpr__emit_; + + pk_ExprVt__ctor(&FStringExprVt); + vt = &FStringExprVt; + vt->emit_ = pk_FStringExpr__emit_; }