This commit is contained in:
blueloveTH 2024-06-29 00:10:40 +08:00
parent c4b52ef684
commit c7597dfcdf
6 changed files with 358 additions and 60 deletions

View File

@ -18,6 +18,7 @@ typedef struct c11_string{
int c11_string__cmp(c11_string self, c11_string other); int c11_string__cmp(c11_string self, c11_string other);
int c11_string__cmp2(c11_string self, const char* other, int size); int c11_string__cmp2(c11_string self, const char* other, int size);
int c11_string__cmp3(c11_string self, const char* other); int c11_string__cmp3(c11_string self, const char* other);
int c11_string__index(c11_string self, char c);
typedef struct py_Str{ typedef struct py_Str{
int size; int size;

View File

@ -97,6 +97,27 @@ typedef struct pk_SequenceExpr{
Opcode opcode; Opcode opcode;
} pk_SequenceExpr; } pk_SequenceExpr;
typedef struct pk_CompExpr{
COMMON_HEADER
pk_Expr* expr; // loop expr
pk_Expr* vars; // loop vars
pk_Expr* iter; // loop iter
pk_Expr* cond; // optional if condition
Opcode op0;
Opcode op1;
} pk_CompExpr;
typedef struct pk_LambdaExpr{
COMMON_HEADER
int index;
} pk_LambdaExpr;
typedef struct pk_FStringExpr{
COMMON_HEADER
c11_string src;
} pk_FStringExpr;
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -63,7 +63,7 @@ typedef struct Token {
// https://docs.python.org/3/reference/expressions.html#operator-precedence // https://docs.python.org/3/reference/expressions.html#operator-precedence
enum Precedence { enum Precedence {
PREC_LOWEST, PREC_LOWEST = 0,
PREC_LAMBDA, // lambda PREC_LAMBDA, // lambda
PREC_TERNARY, // ?: PREC_TERNARY, // ?:
PREC_LOGICAL_OR, // or PREC_LOGICAL_OR, // or

View File

@ -252,6 +252,13 @@ int c11_string__cmp3(c11_string self, const char *other){
return c11_string__cmp2(self, other, strlen(other)); return c11_string__cmp2(self, other, strlen(other));
} }
int c11_string__index(c11_string self, char c){
for(int i=0; i<self.size; i++){
if(self.data[i] == c) return i;
}
return -1;
}
int py_Str__cmp(const py_Str *self, const py_Str *other){ int py_Str__cmp(const py_Str *self, const py_Str *other){
return py_Str__cmp2(self, py_Str__data(other), other->size); return py_Str__cmp2(self, py_Str__data(other), other->size);
} }

View File

@ -1,6 +1,7 @@
#include "pocketpy/compiler/compiler.h" #include "pocketpy/compiler/compiler.h"
#include "pocketpy/compiler/expr.h" #include "pocketpy/compiler/expr.h"
#include "pocketpy/compiler/lexer.h" #include "pocketpy/compiler/lexer.h"
#include "pocketpy/compiler/context.h"
typedef struct pk_Compiler pk_Compiler; typedef struct pk_Compiler pk_Compiler;
typedef Error* (*PrattCallback)(pk_Compiler* self); typedef Error* (*PrattCallback)(pk_Compiler* self);
@ -163,18 +164,18 @@ static Error* EXPR_TUPLE(pk_Compiler* self, bool allow_slice){
// special case for `for loop` and `comp` // special case for `for loop` and `comp`
static Error* EXPR_VARS(pk_Compiler* self){ static Error* EXPR_VARS(pk_Compiler* self){
int count = 0; // int count = 0;
do { // do {
consume(TK_ID); // consume(TK_ID);
ctx()->s_push(make_expr<NameExpr>(prev().str(), name_scope())); // ctx()->s_push(make_expr<NameExpr>(prev().str(), name_scope()));
count += 1; // count += 1;
} while(match(TK_COMMA)); // } while(match(TK_COMMA));
if(count > 1){ // if(count > 1){
TupleExpr* e = make_expr<TupleExpr>(count); // TupleExpr* e = make_expr<TupleExpr>(count);
for(int i=count-1; i>=0; i--) // for(int i=count-1; i>=0; i--)
e->items[i] = ctx()->s_popx(); // e->items[i] = ctx()->s_popx();
ctx()->s_push(e); // ctx()->s_push(e);
} // }
return NULL; return NULL;
} }
@ -253,53 +254,51 @@ Error* pk_compile(pk_SourceData_ src, CodeObject* out){
void pk_Compiler__initialize(){ void pk_Compiler__initialize(){
// clang-format off // clang-format off
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#define PK_NO_INFIX NULL, PREC_LOWEST rules[TK_DOT] = (PrattRule){ NULL, exprAttrib, PREC_PRIMARY };
for(int i = 0; i < TK__COUNT__; i++) rules[i] = { NULL, PK_NO_INFIX }; rules[TK_LPAREN] = (PrattRule){ exprGroup, exprCall, PREC_PRIMARY };
rules[TK_DOT] = { NULL, exprAttrib, PREC_PRIMARY }; rules[TK_LBRACKET] = (PrattRule){ exprList, exprSubscr, PREC_PRIMARY };
rules[TK_LPAREN] = { exprGroup, exprCall, PREC_PRIMARY }; rules[TK_MOD] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_LBRACKET] = { exprList, exprSubscr, PREC_PRIMARY }; rules[TK_ADD] = (PrattRule){ NULL, exprBinaryOp, PREC_TERM };
rules[TK_LBRACE] = { exprMap, PK_NO_INFIX }; rules[TK_SUB] = (PrattRule){ exprUnaryOp, exprBinaryOp, PREC_TERM };
rules[TK_MOD] = { NULL, exprBinaryOp, PREC_FACTOR }; rules[TK_MUL] = (PrattRule){ exprUnaryOp, exprBinaryOp, PREC_FACTOR };
rules[TK_ADD] = { NULL, exprBinaryOp, PREC_TERM }; rules[TK_INVERT] = (PrattRule){ exprUnaryOp, NULL, PREC_UNARY };
rules[TK_SUB] = { exprUnaryOp, exprBinaryOp, PREC_TERM }; rules[TK_DIV] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_MUL] = { exprUnaryOp, exprBinaryOp, PREC_FACTOR }; rules[TK_FLOORDIV] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_INVERT] = { exprUnaryOp, NULL, PREC_UNARY }; rules[TK_POW] = (PrattRule){ exprUnaryOp, exprBinaryOp, PREC_EXPONENT };
rules[TK_DIV] = { NULL, exprBinaryOp, PREC_FACTOR }; rules[TK_GT] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_FLOORDIV] = { NULL, exprBinaryOp, PREC_FACTOR }; rules[TK_LT] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_POW] = { exprUnaryOp, exprBinaryOp, PREC_EXPONENT }; rules[TK_EQ] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_GT] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_NE] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LT] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_GE] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_EQ] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_LE] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_NE] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_IN] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_GE] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_IS] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LE] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_LSHIFT] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_SHIFT };
rules[TK_IN] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_RSHIFT] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_SHIFT };
rules[TK_IS] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_AND] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_AND };
rules[TK_LSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; rules[TK_OR] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_OR };
rules[TK_RSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT }; rules[TK_XOR] = (PrattRule){ NULL, exprBinaryOp, PREC_BITWISE_XOR };
rules[TK_AND] = { NULL, exprBinaryOp, PREC_BITWISE_AND }; rules[TK_DECORATOR] = (PrattRule){ NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_OR] = { NULL, exprBinaryOp, PREC_BITWISE_OR }; rules[TK_IF] = (PrattRule){ NULL, exprTernary, PREC_TERNARY };
rules[TK_XOR] = { NULL, exprBinaryOp, PREC_BITWISE_XOR }; rules[TK_NOT_IN] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_DECORATOR] = { NULL, exprBinaryOp, PREC_FACTOR }; rules[TK_IS_NOT] = (PrattRule){ NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IF] = { NULL, exprTernary, PREC_TERNARY }; rules[TK_AND_KW ] = (PrattRule){ NULL, exprAnd, PREC_LOGICAL_AND };
rules[TK_NOT_IN] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_OR_KW] = (PrattRule){ NULL, exprOr, PREC_LOGICAL_OR };
rules[TK_IS_NOT] = { NULL, exprBinaryOp, PREC_COMPARISION }; rules[TK_NOT_KW] = (PrattRule){ exprNot, NULL, PREC_LOGICAL_NOT };
rules[TK_AND_KW ] = { NULL, exprAnd, PREC_LOGICAL_AND }; rules[TK_TRUE] = (PrattRule){ exprLiteral0 };
rules[TK_OR_KW] = { NULL, exprOr, PREC_LOGICAL_OR }; rules[TK_FALSE] = (PrattRule){ exprLiteral0 };
rules[TK_NOT_KW] = { exprNot, NULL, PREC_LOGICAL_NOT }; rules[TK_NONE] = (PrattRule){ exprLiteral0 };
rules[TK_TRUE] = { exprLiteral0, PK_NO_INFIX }; rules[TK_DOTDOTDOT] = (PrattRule){ exprLiteral0 };
rules[TK_FALSE] = { exprLiteral0, PK_NO_INFIX }; rules[TK_LAMBDA] = (PrattRule){ exprLambda, };
rules[TK_NONE] = { exprLiteral0, PK_NO_INFIX }; rules[TK_ID] = (PrattRule){ exprName, };
rules[TK_DOTDOTDOT] = { exprLiteral0, PK_NO_INFIX }; rules[TK_NUM] = (PrattRule){ exprLiteral, };
rules[TK_LAMBDA] = { exprLambda, PK_NO_INFIX }; rules[TK_STR] = (PrattRule){ exprLiteral, };
rules[TK_ID] = { exprName, PK_NO_INFIX }; rules[TK_FSTR] = (PrattRule){ exprFString, };
rules[TK_NUM] = { exprLiteral, PK_NO_INFIX }; rules[TK_LONG] = (PrattRule){ exprLong, };
rules[TK_STR] = { exprLiteral, PK_NO_INFIX }; rules[TK_IMAG] = (PrattRule){ exprImag, };
rules[TK_FSTR] = { exprFString, PK_NO_INFIX }; rules[TK_BYTES] = (PrattRule){ exprBytes, };
rules[TK_LONG] = { exprLong, PK_NO_INFIX }; rules[TK_LBRACE] = (PrattRule){ exprMap };
rules[TK_IMAG] = { exprImag, PK_NO_INFIX }; rules[TK_COLON] = (PrattRule){ exprSlice0, exprSlice1, PREC_PRIMARY };
rules[TK_BYTES] = { exprBytes, PK_NO_INFIX };
rules[TK_COLON] = { exprSlice0, exprSlice1, PREC_PRIMARY };
#undef PK_METHOD #undef PK_METHOD
#undef PK_NO_INFIX #undef PK_NO_INFIX

View File

@ -2,6 +2,7 @@
#include "pocketpy/compiler/context.h" #include "pocketpy/compiler/context.h"
#include "pocketpy/common/memorypool.h" #include "pocketpy/common/memorypool.h"
#include "pocketpy/common/strname.h" #include "pocketpy/common/strname.h"
#include <ctype.h>
static bool default_false(const pk_Expr* e) { return false; } static bool default_false(const pk_Expr* e) { return false; }
static int default_zero(const pk_Expr* e) { return 0; } static int default_zero(const pk_Expr* e) { return 0; }
@ -337,6 +338,262 @@ bool pk_TupleExpr__emit_del(pk_Expr* self_, pk_CodeEmitContext* ctx) {
return true; return true;
} }
static pk_ExprVt CompExprVt;
void pk_CompExpr__dtor(pk_Expr* self_){
pk_CompExpr* self = (pk_CompExpr*)self_;
pk_Expr__delete(self->expr);
pk_Expr__delete(self->vars);
pk_Expr__delete(self->iter);
pk_Expr__delete(self->cond);
}
void pk_CompExpr__emit_(pk_Expr* self_, pk_CodeEmitContext* ctx) {
pk_CompExpr* self = (pk_CompExpr*)self_;
pk_CodeEmitContext__emit_(ctx, self->op0, 0, self->line);
self->iter->vt->emit_(self->iter, ctx);
pk_CodeEmitContext__emit_(ctx, OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
pk_CodeEmitContext__enter_block(ctx, CodeBlockType_FOR_LOOP);
int curr_iblock = ctx->curr_iblock;
int for_codei = pk_CodeEmitContext__emit_(ctx, OP_FOR_ITER, curr_iblock, BC_KEEPLINE);
bool ok = self->vars->vt->emit_store(self->vars, ctx);
// this error occurs in `vars` instead of this line, but...nevermind
assert(ok); // this should raise a SyntaxError, but we just assert it
pk_CodeEmitContext__try_merge_for_iter_store(ctx, for_codei);
if(self->cond) {
self->cond->vt->emit_(self->cond, ctx);
int patch = pk_CodeEmitContext__emit_(ctx, OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
self->expr->vt->emit_(self->expr, ctx);
pk_CodeEmitContext__emit_(ctx, self->op1, BC_NOARG, BC_KEEPLINE);
pk_CodeEmitContext__patch_jump(ctx, patch);
} else {
self->expr->vt->emit_(self->expr, ctx);
pk_CodeEmitContext__emit_(ctx, self->op1, BC_NOARG, BC_KEEPLINE);
}
pk_CodeEmitContext__emit_(ctx, OP_LOOP_CONTINUE, curr_iblock, BC_KEEPLINE);
pk_CodeEmitContext__exit_block(ctx);
}
pk_CompExpr* pk_CompExpr__new(Opcode op0, Opcode op1){
static_assert_expr_size(pk_CompExpr);
pk_CompExpr* self = PoolExpr_alloc();
self->vt = &CompExprVt;
self->line = -1;
self->op0 = op0;
self->op1 = op1;
self->expr = NULL;
self->vars = NULL;
self->iter = NULL;
self->cond = NULL;
return self;
}
static pk_ExprVt LambdaExprVt;
pk_LambdaExpr* pk_LambdaExpr__new(int index){
static_assert_expr_size(pk_LambdaExpr);
pk_LambdaExpr* self = PoolExpr_alloc();
self->vt = &LambdaExprVt;
self->line = -1;
self->index = index;
return self;
}
static void pk_LambdaExpr__emit_(pk_Expr* self_, pk_CodeEmitContext* ctx) {
pk_LambdaExpr* self = (pk_LambdaExpr*)self_;
pk_CodeEmitContext__emit_(ctx, OP_LOAD_FUNCTION, self->index, self->line);
}
static pk_ExprVt FStringExprVt;
static bool is_fmt_valid_char(char c) {
switch(c) {
// clang-format off
case '-': case '=': case '*': case '#': case '@': case '!': case '~':
case '<': case '>': case '^':
case '.': case 'f': case 'd': case 's':
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
return true;
default: return false;
// clang-format on
}
}
static bool is_identifier(c11_string s) {
if(s.size == 0) return false;
if(!isalpha(s.data[0]) && s.data[0] != '_') return false;
for(int i=0; i<s.size; i++){
char c = s.data[i];
if(!isalnum(c) && c != '_') return false;
}
return true;
}
static void _load_simple_expr(pk_CodeEmitContext* ctx, c11_string expr, int line) {
bool repr = false;
const char* expr_end = expr.data + expr.size;
if(expr.size >= 2 && expr_end[-2] == '!') {
switch(expr_end[-1]) {
case 'r':
repr = true;
expr.size -= 2; // expr[:-2]
break;
case 's':
repr = false;
expr.size -= 2; // expr[:-2]
break;
default: break; // nothing happens
}
}
// name or name.name
bool is_fastpath = false;
if(is_identifier(expr)) {
// ctx->emit_(OP_LOAD_NAME, StrName(expr.sv()).index, line);
pk_CodeEmitContext__emit_(
ctx,
OP_LOAD_NAME,
pk_StrName__map2(expr),
line
);
is_fastpath = true;
} else {
int dot = c11_string__index(expr, '.');
if(dot > 0) {
// std::string_view a = expr.sv().substr(0, dot);
// std::string_view b = expr.sv().substr(dot + 1);
c11_string a = {expr.data, dot}; // expr[:dot]
c11_string b = {expr.data+(dot+1), expr.size-(dot+1)}; // expr[dot+1:]
if(is_identifier(a) && is_identifier(b)) {
pk_CodeEmitContext__emit_(ctx, OP_LOAD_NAME, pk_StrName__map2(a), line);
pk_CodeEmitContext__emit_(ctx, OP_LOAD_ATTR, pk_StrName__map2(b), line);
is_fastpath = true;
}
}
}
if(!is_fastpath) {
int index = pk_CodeEmitContext__add_const_string(ctx, expr);
pk_CodeEmitContext__emit_(ctx, OP_FSTRING_EVAL, index, line);
}
if(repr) {
pk_CodeEmitContext__emit_(ctx, OP_REPR, BC_NOARG, line);
}
}
static void pk_FStringExpr__emit_(pk_Expr* self_, pk_CodeEmitContext* ctx) {
pk_FStringExpr* self = (pk_FStringExpr*)self_;
int i = 0; // left index
int j = 0; // right index
int count = 0; // how many string parts
bool flag = false; // true if we are in a expression
const char* src = self->src.data;
while(j < self->src.size) {
if(flag) {
if(src[j] == '}') {
// add expression
c11_string expr = {src+i, j-i}; // src[i:j]
// BUG: ':' is not a format specifier in f"{stack[2:]}"
int conon = c11_string__index(expr, ':');
if(conon >= 0) {
c11_string spec = {expr.data+(conon+1), expr.size-(conon+1)}; // expr[conon+1:]
// filter some invalid spec
bool ok = true;
for(int k = 0; k < spec.size; k++) {
char c = spec.data[k];
if(!is_fmt_valid_char(c)) {
ok = false;
break;
}
}
if(ok) {
expr.size = conon; // expr[:conon]
_load_simple_expr(ctx, expr, self->line);
// ctx->emit_(OP_FORMAT_STRING, ctx->add_const_string(spec.sv()), line);
pk_CodeEmitContext__emit_(ctx, OP_FORMAT_STRING, pk_CodeEmitContext__add_const_string(ctx, spec), self->line);
} else {
// ':' is not a spec indicator
_load_simple_expr(ctx, expr, self->line);
}
} else {
_load_simple_expr(ctx, expr, self->line);
}
flag = false;
count++;
}
} else {
if(src[j] == '{') {
// look at next char
if(j + 1 < self->src.size && src[j + 1] == '{') {
// {{ -> {
j++;
pk_CodeEmitContext__emit_(
ctx,
OP_LOAD_CONST,
pk_CodeEmitContext__add_const_string(ctx, (c11_string){"{", 1}),
self->line
);
count++;
} else {
// { -> }
flag = true;
i = j + 1;
}
} else if(src[j] == '}') {
// look at next char
if(j + 1 < self->src.size && src[j + 1] == '}') {
// }} -> }
j++;
pk_CodeEmitContext__emit_(
ctx,
OP_LOAD_CONST,
pk_CodeEmitContext__add_const_string(ctx, (c11_string){"}", 1}),
self->line
);
count++;
} else {
// } -> error
// throw std::runtime_error("f-string: unexpected }");
// just ignore
}
} else {
// literal
i = j;
while(j < self->src.size && src[j] != '{' && src[j] != '}')
j++;
c11_string literal = {src+i, j-i}; // src[i:j]
pk_CodeEmitContext__emit_(
ctx,
OP_LOAD_CONST,
pk_CodeEmitContext__add_const_string(ctx, literal),
self->line
);
count++;
continue; // skip j++
}
}
j++;
}
if(flag) {
// literal
c11_string literal = {src+i, self->src.size-i}; // src[i:]
pk_CodeEmitContext__emit_(ctx, OP_LOAD_CONST, pk_CodeEmitContext__add_const_string(ctx, literal), self->line);
count++;
}
pk_CodeEmitContext__emit_(ctx, OP_BUILD_STRING, count, self->line);
}
pk_FStringExpr* pk_FStringExpr__new(c11_string src){
static_assert_expr_size(pk_FStringExpr);
pk_FStringExpr* self = PoolExpr_alloc();
self->vt = &FStringExprVt;
self->line = -1;
self->src = src;
return self;
}
///////////////////////////////////////////// /////////////////////////////////////////////
void pk_Expr__initialize(){ void pk_Expr__initialize(){
pk_ExprVt__ctor(&NameExprVt); pk_ExprVt__ctor(&NameExprVt);
@ -390,4 +647,17 @@ void pk_Expr__initialize(){
TupleExprVt.is_tuple = true; TupleExprVt.is_tuple = true;
TupleExprVt.emit_store = pk_TupleExpr__emit_store; TupleExprVt.emit_store = pk_TupleExpr__emit_store;
TupleExprVt.emit_del = pk_TupleExpr__emit_del; TupleExprVt.emit_del = pk_TupleExpr__emit_del;
pk_ExprVt__ctor(&CompExprVt);
vt = &CompExprVt;
vt->dtor = pk_CompExpr__dtor;
vt->emit_ = pk_CompExpr__emit_;
pk_ExprVt__ctor(&LambdaExprVt);
vt = &LambdaExprVt;
vt->emit_ = pk_LambdaExpr__emit_;
pk_ExprVt__ctor(&FStringExprVt);
vt = &FStringExprVt;
vt->emit_ = pk_FStringExpr__emit_;
} }