This commit is contained in:
blueloveTH 2023-03-31 17:19:32 +08:00
parent c607d11bd6
commit da022c5e25
5 changed files with 475 additions and 466 deletions

View File

@ -39,6 +39,9 @@ enum CodeBlockType {
TRY_EXCEPT,
};
#define BC_NOARG -1
#define BC_KEEPLINE -1
struct CodeBlock {
CodeBlockType type;
int parent; // parent index in blocks
@ -68,27 +71,6 @@ struct CodeObject {
void optimize(VM* vm);
bool add_label(StrName label){
if(labels.count(label)) return false;
labels[label] = codes.size();
return true;
}
int add_name(StrName name, NameScope scope){
if(scope == NAME_LOCAL && global_names.count(name)) scope = NAME_GLOBAL;
auto p = std::make_pair(name, scope);
for(int i=0; i<names.size(); i++){
if(names[i] == p) return i;
}
names.push_back(p);
return names.size() - 1;
}
int add_const(PyObject* v){
consts.push_back(v);
return consts.size() - 1;
}
void _mark() const {
for(PyObject* v : consts) OBJ_MARK(v);
}

View File

@ -117,7 +117,7 @@ public:
const std::vector<T>& data() const { return vec; }
};
struct Expression;
typedef std::unique_ptr<Expression> Expression_;
struct Expr;
typedef std::unique_ptr<Expr> Expr_;
} // namespace pkpy

View File

@ -20,41 +20,6 @@ struct PrattRule{
Precedence precedence;
};
struct CodeEmitContext{
CodeObject_ co;
stack<Expression_> s_expr;
CodeEmitContext(CodeObject_ co): co(co) {}
int curr_block_i = 0;
bool is_compiling_class = false;
bool is_curr_block_loop() const {
return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP;
}
void enter_block(CodeBlockType type){
co->blocks.push_back(CodeBlock{
type, curr_block_i, (int)co->codes.size()
});
curr_block_i = co->blocks.size()-1;
}
void exit_block(){
co->blocks[curr_block_i].end = co->codes.size();
curr_block_i = co->blocks[curr_block_i].parent;
if(curr_block_i < 0) UNREACHABLE();
}
// clear the expression stack and generate bytecode
void emit_expr(){
if(s_expr.size() != 1) UNREACHABLE();
Expression_ expr = s_expr.popx();
// emit
// ...
}
};
class Compiler {
std::unique_ptr<Lexer> lexer;
stack<CodeEmitContext> contexts;
@ -70,7 +35,7 @@ class Compiler {
template<typename... Args>
CodeObject_ push_context(Args&&... args){
CodeObject_ co = make_sp<CodeObject>(std::forward<Args>(args)...);
contexts.push(CodeEmitContext(co));
contexts.push(CodeEmitContext(vm, co));
return co;
}
@ -117,30 +82,29 @@ public:
rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
rules[TK("not")] = { METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
rules[TK("True")] = { METHOD(exprLiteral0), NO_INFIX };
rules[TK("False")] = { METHOD(exprLiteral0), NO_INFIX };
rules[TK("None")] = { METHOD(exprLiteral0), NO_INFIX };
rules[TK("...")] = { METHOD(exprLiteral0), NO_INFIX };
rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK(">>=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK("<<=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
rules[TK(":")] = { nullptr, METHOD(exprSlice), PREC_SLICE };
rules[TK("+=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("-=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("*=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("/=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("//=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("%=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("&=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("|=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("^=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK(">>=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK("<<=")] = { nullptr, METHOD(exprInplaceAssign), PREC_ASSIGNMENT };
rules[TK(",")] = { nullptr, METHOD(exprTuple), PREC_TUPLE };
rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
@ -150,7 +114,7 @@ public:
#undef NO_INFIX
#define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression
#define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed
#define EXPR_TUPLE() parse_expression(PREC_TUPLE) // no '=', but ',' is allowed
#define EXPR_ANY() parse_expression(PREC_ASSIGNMENT)
}
@ -201,96 +165,55 @@ private:
if (!match_end_stmt()) SyntaxError("expected statement end");
}
PyObject* get_value(const Token& token) {
switch (token.type) {
case TK("@num"):
if(std::holds_alternative<i64>(token.value)) return VAR(std::get<i64>(token.value));
if(std::holds_alternative<f64>(token.value)) return VAR(std::get<f64>(token.value));
UNREACHABLE();
case TK("@str"): case TK("@fstr"):
return VAR(std::get<Str>(token.value));
default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type));
}
}
void exprLiteral(){
ctx()->s_expr.push(
std::make_unique<LiteralExpr>(prev().value)
expr_prev_line<LiteralExpr>(prev().value)
);
// PyObject* value = get_value(prev());
// int index = co()->add_const(value);
// emit(OP_LOAD_CONST, index);
}
void exprFString(){
ctx()->s_expr.push(
std::make_unique<FStringExpr>(std::get<Str>(prev().value))
expr_prev_line<FStringExpr>(std::get<Str>(prev().value))
);
// static const std::regex pattern(R"(\{(.*?)\})");
// PyObject* value = get_value(prev());
// Str s = CAST(Str, value);
// std::sregex_iterator begin(s.begin(), s.end(), pattern);
// std::sregex_iterator end;
// int size = 0;
// int i = 0;
// for(auto it = begin; it != end; it++) {
// std::smatch m = *it;
// if (i < m.position()) {
// std::string literal = s.substr(i, m.position() - i);
// emit(OP_LOAD_CONST, co()->add_const(VAR(literal)));
// size++;
// }
// emit(OP_LOAD_EVAL_FN);
// emit(OP_LOAD_CONST, co()->add_const(VAR(m[1].str())));
// emit(OP_CALL, 1);
// size++;
// i = (int)(m.position() + m.length());
// }
// if (i < s.size()) {
// std::string literal = s.substr(i, s.size() - i);
// emit(OP_LOAD_CONST, co()->add_const(VAR(literal)));
// size++;
// }
// emit(OP_BUILD_STRING, size);
}
void emit_expr(){}
template <typename T, typename... Args>
std::unique_ptr<T> expr_prev_line(Args&&... args) {
std::unique_ptr<T> expr = std::make_unique<T>(std::forward<Args>(args)...);
expr->line = prev().line;
return expr;
}
void exprLambda(){
Function func;
func.name = "<lambda>";
auto e = expr_prev_line<LambdaExpr>();
e->func.name = "<lambda>";
e->scope = name_scope();
if(!match(TK(":"))){
_compile_f_args(func, false);
_compile_f_args(e->func, false);
consume(TK(":"));
}
func.code = push_context(lexer->src, func.name.str());
e->func.code = push_context(lexer->src, "<lambda>");
EXPR();
emit_expr();
emit(OP_RETURN_VALUE);
ctx()->emit(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
pop_context();
ctx()->s_expr.push(std::move(e));
}
ctx()->s_expr.push(
std::make_unique<LambdaExpr>(std::move(func), name_scope())
);
// emit(OP_LOAD_FUNCTION, co()->add_const(VAR(func)));
// if(name_scope() == NAME_LOCAL) emit(OP_SETUP_CLOSURE);
void exprInplaceAssign(){
auto e = expr_prev_line<InplaceAssignExpr>();
e->op = prev().type;
e->lhs = ctx()->s_expr.popx();
EXPR_TUPLE();
e->rhs = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(e));
}
void exprAssign(){
Expression_ lhs = ctx()->s_expr.popx();
TokenIndex op = prev().type;
auto e = expr_prev_line<AssignExpr>();
e->lhs = ctx()->s_expr.popx();
EXPR_TUPLE();
if(op == TK("=")){
ctx()->s_expr.push(
std::make_unique<AssignExpr>(std::move(lhs), ctx()->s_expr.popx())
);
}else{
// += -= ...
ctx()->s_expr.push(
std::make_unique<InplaceAssignExpr>(op, std::move(lhs), ctx()->s_expr.popx())
);
}
e->rhs = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(e));
// if(co()->codes.empty()) UNREACHABLE();
// bool is_load_name_ref = co()->codes.back().op == OP_LOAD_NAME_REF;
@ -341,123 +264,73 @@ private:
// co()->_rvalue -= 1;
}
void exprSlice(){
}
void exprComma(){
int size = 1; // an expr is in the stack now
void exprTuple(){
auto e = expr_prev_line<TupleExpr>();
do {
EXPR(); // NOTE: "1," will fail, "1,2" will be ok
size++;
e->items.push_back(ctx()->s_expr.popx());
} while(match(TK(",")));
std::vector<Expression_> items(size);
for(int i=size-1; i>=0; i--) items[i] = ctx()->s_expr.popx();
ctx()->s_expr.push(
std::make_unique<TupleExpr>(std::move(items))
);
// emit(co()->_rvalue ? OP_BUILD_TUPLE : OP_BUILD_TUPLE_REF, size);
ctx()->s_expr.push(std::move(e));
}
void exprOr(){
Expression_ lhs = ctx()->s_expr.popx();
parse_expression(PREC_LOGICAL_OR);
ctx()->s_expr.push(
std::make_unique<OrExpr>(std::move(lhs), ctx()->s_expr.popx())
);
// int patch = emit(OP_JUMP_IF_TRUE_OR_POP);
// parse_expression(PREC_LOGICAL_OR);
// patch_jump(patch);
auto e = expr_prev_line<OrExpr>();
e->lhs = ctx()->s_expr.popx();
parse_expression(PREC_LOGICAL_OR + 1);
e->rhs = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(e));
}
void exprAnd(){
Expression_ lhs = ctx()->s_expr.popx();
parse_expression(PREC_LOGICAL_AND);
ctx()->s_expr.push(
std::make_unique<AndExpr>(std::move(lhs), ctx()->s_expr.popx())
);
// int patch = emit(OP_JUMP_IF_FALSE_OR_POP);
// parse_expression(PREC_LOGICAL_AND);
// patch_jump(patch);
auto e = expr_prev_line<OrExpr>();
e->lhs = ctx()->s_expr.popx();
parse_expression(PREC_LOGICAL_AND + 1);
e->rhs = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(e));
}
void exprTernary(){
Expression_ cond = ctx()->s_expr.popx();
auto e = expr_prev_line<TernaryExpr>();
e->cond = ctx()->s_expr.popx();
EXPR(); // if true
Expression_ true_expr = ctx()->s_expr.popx();
e->true_expr = ctx()->s_expr.popx();
consume(TK(":"));
EXPR(); // if false
Expression_ false_expr = ctx()->s_expr.popx();
ctx()->s_expr.push(
std::make_unique<TernaryExpr>(std::move(cond), std::move(true_expr), std::move(false_expr))
);
// int patch = emit(OP_POP_JUMP_IF_FALSE);
// EXPR(); // if true
// int patch2 = emit(OP_JUMP_ABSOLUTE);
// consume(TK(":"));
// patch_jump(patch);
// EXPR(); // if false
// patch_jump(patch2);
e->false_expr = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(e));
}
void exprBinaryOp(){
TokenIndex op = prev().type;
Expression_ lhs = ctx()->s_expr.popx();
parse_expression((Precedence)(rules[op].precedence + 1));
ctx()->s_expr.push(
std::make_unique<BinaryExpr>(op, std::move(lhs), ctx()->s_expr.popx())
);
// switch (op) {
// case TK("+"): emit(OP_BINARY_OP, 0); break;
// case TK("-"): emit(OP_BINARY_OP, 1); break;
// case TK("*"): emit(OP_BINARY_OP, 2); break;
// case TK("/"): emit(OP_BINARY_OP, 3); break;
// case TK("//"): emit(OP_BINARY_OP, 4); break;
// case TK("%"): emit(OP_BINARY_OP, 5); break;
// case TK("**"): emit(OP_BINARY_OP, 6); break;
// case TK("<"): emit(OP_COMPARE_OP, 0); break;
// case TK("<="): emit(OP_COMPARE_OP, 1); break;
// case TK("=="): emit(OP_COMPARE_OP, 2); break;
// case TK("!="): emit(OP_COMPARE_OP, 3); break;
// case TK(">"): emit(OP_COMPARE_OP, 4); break;
// case TK(">="): emit(OP_COMPARE_OP, 5); break;
// case TK("in"): emit(OP_CONTAINS_OP, 0); break;
// case TK("not in"): emit(OP_CONTAINS_OP, 1); break;
// case TK("is"): emit(OP_IS_OP, 0); break;
// case TK("is not"): emit(OP_IS_OP, 1); break;
// case TK("<<"): emit(OP_BITWISE_OP, 0); break;
// case TK(">>"): emit(OP_BITWISE_OP, 1); break;
// case TK("&"): emit(OP_BITWISE_OP, 2); break;
// case TK("|"): emit(OP_BITWISE_OP, 3); break;
// case TK("^"): emit(OP_BITWISE_OP, 4); break;
// default: UNREACHABLE();
// }
auto e = expr_prev_line<BinaryExpr>();
e->op = prev().type;
e->lhs = ctx()->s_expr.popx();
parse_expression(rules[e->op].precedence + 1);
e->rhs = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(e));
}
void exprNot() {
parse_expression((Precedence)(PREC_LOGICAL_NOT + 1));
parse_expression(PREC_LOGICAL_NOT + 1);
ctx()->s_expr.push(
std::make_unique<NotExpr>(ctx()->s_expr.popx())
expr_prev_line<NotExpr>(ctx()->s_expr.popx())
);
// emit(OP_UNARY_NOT);
}
void exprUnaryOp(){
TokenIndex type = prev().type;
parse_expression((Precedence)(PREC_UNARY + 1));
ctx()->s_expr.push(
std::make_unique<UnaryExpr>(type, ctx()->s_expr.popx())
);
// switch (type) {
// case TK("-"): emit(OP_UNARY_NEGATIVE); break;
// case TK("*"): emit(OP_UNARY_STAR, co()->_rvalue); break;
// default: UNREACHABLE();
// }
parse_expression(PREC_UNARY + 1);
Expr_ e;
switch(type){
case TK("-"):
e = expr_prev_line<NegatedExpr>(ctx()->s_expr.popx());
case TK("*"):
e = expr_prev_line<StarredExpr>(ctx()->s_expr.popx());
default: UNREACHABLE();
}
ctx()->s_expr.push(std::move(e));
}
// () is just for change precedence, so we don't need to push it into stack
// () is just for change precedence
void exprGroup(){
match_newlines(mode()==REPL_MODE);
EXPR_TUPLE();
@ -507,52 +380,37 @@ private:
// }
template<typename T>
void _consume_comp(){
void _consume_comp(Expr_ expr){
static_assert(std::is_base_of<CompExpr, T>::value);
std::unique_ptr<CompExpr> ce = std::make_unique<T>();
ce->expr = std::move(expr);
// ...
ctx()->s_expr.push(std::move(ce));
}
void exprList() {
int ARGC = 0;
auto e = expr_prev_line<ListExpr>();
do {
match_newlines(mode()==REPL_MODE);
if (curr().type == TK("]")) break;
EXPR(); ARGC++;
EXPR();
e->items.push_back(ctx()->s_expr.popx());
match_newlines(mode()==REPL_MODE);
if(ARGC == 1 && match(TK("for"))){
_consume_comp<ListCompExpr>();
if(e->items.size()==1 && match(TK("for"))){
_consume_comp<ListCompExpr>(std::move(e->items[0]));
consume(TK("]"));
return;
}
} while (match(TK(",")));
match_newlines(mode()==REPL_MODE);
consume(TK("]"));
auto list_expr = std::make_unique<ListExpr>();
list_expr->items.resize(ARGC);
for(int i=ARGC-1; i>=0; i--) list_expr->items[i] = ctx()->s_expr.popx();
ctx()->s_expr.push(std::move(list_expr));
// int _patch = emit(OP_NO_OP);
// int _body_start = co()->codes.size();
// int ARGC = 0;
// do {
// match_newlines(mode()==REPL_MODE);
// if (curr().type == TK("]")) break;
// EXPR(); ARGC++;
// match_newlines(mode()==REPL_MODE);
// if(ARGC == 1 && match(TK("for"))){
// _consume_comp(OP_BUILD_LIST, OP_LIST_APPEND, _patch, _body_start);
// consume(TK("]"));
// return;
// }
// } while (match(TK(",")));
// match_newlines(mode()==REPL_MODE);
// consume(TK("]"));
// emit(OP_BUILD_LIST, ARGC);
ctx()->s_expr.push(std::move(e));
}
// {...} may be dict or set
void exprMap() {
bool parsing_dict = false;
int ARGC = 0;
std::vector<Expr_> items;
do {
match_newlines(mode()==REPL_MODE);
if (curr().type == TK("}")) break;
@ -561,62 +419,33 @@ private:
if(parsing_dict){
consume(TK(":"));
EXPR();
Expression_ value = ctx()->s_expr.popx();
ctx()->s_expr.push(
std::make_unique<DictItemExpr>(ctx()->s_expr.popx(), std::move(value))
);
auto dict_item = expr_prev_line<DictItemExpr>();
dict_item->key = ctx()->s_expr.popx();
dict_item->value = ctx()->s_expr.popx();
items.push_back(std::move(dict_item));
}else{
items.push_back(ctx()->s_expr.popx());
}
ARGC++;
match_newlines(mode()==REPL_MODE);
if(ARGC == 1 && match(TK("for"))){
if(parsing_dict) _consume_comp<DictCompExpr>();
else _consume_comp<SetCompExpr>();
if(items.size()==1 && match(TK("for"))){
if(parsing_dict) _consume_comp<DictCompExpr>(std::move(items[0]));
else _consume_comp<SetCompExpr>(std::move(items[0]));
consume(TK("}"));
return;
}
} while (match(TK(",")));
consume(TK("}"));
if(ARGC == 0 || parsing_dict){
auto e = std::make_unique<DictExpr>();
e->items.resize(ARGC);
for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx();
if(items.size()==0 || parsing_dict){
auto e = expr_prev_line<DictExpr>(std::move(items));
ctx()->s_expr.push(std::move(e));
}else{
auto e = std::make_unique<SetExpr>();
e->items.resize(ARGC);
for(int i=ARGC-1; i>=0; i--) e->items[i] = ctx()->s_expr.popx();
auto e = expr_prev_line<SetExpr>(std::move(items));
ctx()->s_expr.push(std::move(e));
}
// int _patch = emit(OP_NO_OP);
// int _body_start = co()->codes.size();
// bool parsing_dict = false;
// int ARGC = 0;
// do {
// match_newlines(mode()==REPL_MODE);
// if (curr().type == TK("}")) break;
// EXPR();
// if(curr().type == TK(":")) parsing_dict = true;
// if(parsing_dict){
// consume(TK(":"));
// EXPR();
// }
// ARGC++;
// match_newlines(mode()==REPL_MODE);
// if(ARGC == 1 && match(TK("for"))){
// if(parsing_dict) _consume_comp(OP_BUILD_MAP, OP_MAP_ADD, _patch, _body_start);
// else _consume_comp(OP_BUILD_SET, OP_SET_ADD, _patch, _body_start);
// consume(TK("}"));
// return;
// }
// } while (match(TK(",")));
// consume(TK("}"));
// if(ARGC == 0 || parsing_dict) emit(OP_BUILD_MAP, ARGC);
// else emit(OP_BUILD_SET, ARGC);
}
void exprCall() {
auto e = std::make_unique<CallExpr>();
auto e = _expr<CallExpr>();
do {
match_newlines(mode()==REPL_MODE);
if (curr().type==TK(")")) break;
@ -648,69 +477,49 @@ private:
void exprName(){
ctx()->s_expr.push(
std::make_unique<NameExpr>(prev().str(), name_scope())
expr_prev_line<NameExpr>(prev().str(), name_scope())
);
}
void exprAttrib() {
consume(TK("@id"));
ctx()->s_expr.push(
std::make_unique<AttribExpr>(ctx()->s_expr.popx(), prev().str())
expr_prev_line<AttribExpr>(ctx()->s_expr.popx(), prev().str())
);
}
// [:], [:b]
// [a], [a:], [a:b]
void exprSubscr() {
Expression_ a = nullptr;
Expression_ b = nullptr;
if(match(TK(":"))){
if(match(TK("]"))){ // [:]
}else{ // [:b]
EXPR_TUPLE();
consume(TK("]"));
}
emit(OP_BUILD_SLICE);
}else{
auto e = expr_prev_line<SubscrExpr>();
std::vector<Expr_> items;
do {
EXPR_TUPLE();
if(match(TK(":"))){
if(match(TK("]"))){ // [a:]
emit(OP_LOAD_NONE);
}else{ // [a:b]
EXPR_TUPLE();
consume(TK("]"));
items.push_back(ctx()->s_expr.popx());
} while(match(TK(":")));
consume(TK("]"));
switch(items.size()){
case 1:
e->b = std::move(items[0]);
break;
case 2: case 3: {
auto slice = expr_prev_line<SliceExpr>();
slice->start = std::move(items[0]);
slice->stop = std::move(items[1]);
if(items.size()==3){
slice->step = std::move(items[2]);
}
emit(OP_BUILD_SLICE);
}else{ // [a]
consume(TK("]"));
}
e->b = std::move(slice);
} break;
default: SyntaxError(); break;
}
// emit(OP_BUILD_INDEX, (int)(co()->_rvalue>0));
ctx()->s_expr.push(std::move(e));
}
void exprValue() {
void exprLiteral0() {
ctx()->s_expr.push(
std::make_unique<SpecialLiteralExpr>(prev().type)
expr_prev_line<Literal0Expr>(prev().type)
);
}
int emit(Opcode opcode, int arg=-1, bool keepline=false) {
int line = prev().line;
co()->codes.push_back(
Bytecode{(uint8_t)opcode, (uint16_t)ctx()->curr_block_i, arg, line}
);
int i = co()->codes.size() - 1;
if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line;
return i;
}
inline void patch_jump(int addr_index) {
int target = co()->codes.size();
co()->codes[addr_index].arg = target;
}
void compile_block_body() {
consume(TK(":"));
if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
@ -778,10 +587,11 @@ private:
consume_end_stmt();
}
// a = 1 + 2
// ['a', '1', '2', '+', '=']
//
void parse_expression(Precedence precedence, bool allowslice=false) {
void parse_expression(int precedence){
parse_expression((Precedence)precedence);
}
void parse_expression(Precedence precedence) {
advance();
PrattCallback prefix = rules[prev().type].prefix;
if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));

View File

@ -8,115 +8,262 @@
namespace pkpy{
struct Expression{
virtual Str to_string() const = 0;
struct CodeEmitContext;
struct Expr{
int line = 0;
virtual Str str() const = 0;
~Expr() = default;
virtual void emit(CodeEmitContext* ctx) = 0;
};
struct NameExpr: Expression{
struct CodeEmitContext{
CodeObject_ co;
VM* vm;
stack<Expr_> s_expr;
CodeEmitContext(VM* vm, CodeObject_ co): co(co) {}
CodeEmitContext(const CodeEmitContext&) = delete;
CodeEmitContext& operator=(const CodeEmitContext&) = delete;
CodeEmitContext(CodeEmitContext&&) = delete;
CodeEmitContext& operator=(CodeEmitContext&&) = delete;
int curr_block_i = 0;
bool is_compiling_class = false;
bool is_curr_block_loop() const {
return co->blocks[curr_block_i].type == FOR_LOOP || co->blocks[curr_block_i].type == WHILE_LOOP;
}
void enter_block(CodeBlockType type){
co->blocks.push_back(CodeBlock{
type, curr_block_i, (int)co->codes.size()
});
curr_block_i = co->blocks.size()-1;
}
void exit_block(){
co->blocks[curr_block_i].end = co->codes.size();
curr_block_i = co->blocks[curr_block_i].parent;
if(curr_block_i < 0) UNREACHABLE();
}
// clear the expression stack and generate bytecode
void emit_expr(){
if(s_expr.size() != 1) UNREACHABLE();
Expr_ expr = s_expr.popx();
// emit
// ...
}
int emit(Opcode opcode, int arg, int line) {
co->codes.push_back(
Bytecode{(uint8_t)opcode, (uint16_t)curr_block_i, arg, line}
);
int i = co->codes.size() - 1;
if(line==BC_KEEPLINE && i>=1) co->codes[i].line = co->codes[i-1].line;
return i;
}
void patch_jump(int index) {
int target = co->codes.size();
co->codes[index].arg = target;
}
bool add_label(StrName label){
if(co->labels.count(label)) return false;
co->labels[label] = co->codes.size();
return true;
}
int add_name(StrName name, NameScope scope){
if(scope == NAME_LOCAL && co->global_names.count(name)) scope = NAME_GLOBAL;
auto p = std::make_pair(name, scope);
for(int i=0; i<co->names.size(); i++){
if(co->names[i] == p) return i;
}
co->names.push_back(p);
return co->names.size() - 1;
}
int add_const(PyObject* v){
co->consts.push_back(v);
return co->consts.size() - 1;
}
};
struct NameExpr: Expr{
Str name;
NameScope scope;
NameExpr(const Str& name, NameScope scope): name(name), scope(scope) {}
NameExpr(Str&& name, NameScope scope): name(std::move(name)), scope(scope) {}
Str to_string() const override { return name; }
Str str() const override { return "$" + name; }
void emit(CodeEmitContext* ctx) override {
int index = ctx->add_name(name, scope);
ctx->emit(OP_LOAD_NAME, index, line);
}
};
struct UnaryExpr: Expression{
TokenIndex op;
Expression_ child;
UnaryExpr(TokenIndex op, Expression_&& child): op(op), child(std::move(child)) {}
Str to_string() const override { return TK_STR(op); }
struct StarredExpr: Expr{
Expr_ child;
StarredExpr(Expr_&& child): child(std::move(child)) {}
Str str() const override { return "*"; }
void emit(CodeEmitContext* ctx) override {
child->emit(ctx);
ctx->emit(OP_UNARY_STAR, (int)false, line);
}
};
struct NotExpr: Expression{
Expression_ child;
NotExpr(Expression_&& child): child(std::move(child)) {}
Str to_string() const override { return "not"; }
struct NegatedExpr: Expr{
Expr_ child;
NegatedExpr(Expr_&& child): child(std::move(child)) {}
Str str() const override { return "-"; }
void emit(CodeEmitContext* ctx) override {
child->emit(ctx);
ctx->emit(OP_UNARY_NEGATIVE, BC_NOARG, line);
}
};
struct AndExpr: Expression{
Expression_ lhs;
Expression_ rhs;
AndExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {}
Str to_string() const override { return "and"; }
struct NotExpr: Expr{
Expr_ child;
NotExpr(Expr_&& child): child(std::move(child)) {}
Str str() const override { return "not"; }
void emit(CodeEmitContext* ctx) override {
child->emit(ctx);
ctx->emit(OP_UNARY_NOT, BC_NOARG, line);
}
};
struct OrExpr: Expression{
Expression_ lhs;
Expression_ rhs;
OrExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {}
Str to_string() const override { return "or"; }
struct AndExpr: Expr{
Expr_ lhs;
Expr_ rhs;
Str str() const override { return "and"; }
void emit(CodeEmitContext* ctx) override {
lhs->emit(ctx);
int patch = ctx->emit(OP_JUMP_IF_FALSE_OR_POP, BC_NOARG, line);
rhs->emit(ctx);
ctx->patch_jump(patch);
}
};
struct OrExpr: Expr{
Expr_ lhs;
Expr_ rhs;
Str str() const override { return "or"; }
void emit(CodeEmitContext* ctx) override {
lhs->emit(ctx);
int patch = ctx->emit(OP_JUMP_IF_TRUE_OR_POP, BC_NOARG, line);
rhs->emit(ctx);
ctx->patch_jump(patch);
}
};
// [None, True, False, ...]
struct SpecialLiteralExpr: Expression{
struct Literal0Expr: Expr{
TokenIndex token;
SpecialLiteralExpr(TokenIndex token): token(token) {}
Str to_string() const override { return TK_STR(token); }
Literal0Expr(TokenIndex token): token(token) {}
Str str() const override { return TK_STR(token); }
void gen(){
// switch (token) {
// case TK("None"): emit(OP_LOAD_NONE); break;
// case TK("True"): emit(OP_LOAD_TRUE); break;
// case TK("False"): emit(OP_LOAD_FALSE); break;
// case TK("..."): emit(OP_LOAD_ELLIPSIS); break;
// default: UNREACHABLE();
// }
void emit(CodeEmitContext* ctx) override {
switch (token) {
case TK("None"): ctx->emit(OP_LOAD_NONE, BC_NOARG, line); break;
case TK("True"): ctx->emit(OP_LOAD_TRUE, BC_NOARG, line); break;
case TK("False"): ctx->emit(OP_LOAD_FALSE, BC_NOARG, line); break;
case TK("..."): ctx->emit(OP_LOAD_ELLIPSIS, BC_NOARG, line); break;
default: UNREACHABLE();
}
}
};
// @num, @str which needs to invoke OP_LOAD_CONST
struct LiteralExpr: Expression{
struct LiteralExpr: Expr{
TokenValue value;
LiteralExpr(TokenValue value): value(value) {}
Str to_string() const override { return "literal"; }
Str str() const override {
if(std::holds_alternative<i64>(value)){
return std::to_string(std::get<i64>(value));
}
if(std::holds_alternative<f64>(value)){
return std::to_string(std::get<f64>(value));
}
if(std::holds_alternative<Str>(value)){
return std::get<Str>(value).escape(true);
}
UNREACHABLE();
}
void emit(CodeEmitContext* ctx) override {
VM* vm = ctx->vm;
PyObject* obj = nullptr;
if(std::holds_alternative<i64>(value)){
obj = VAR(std::get<i64>(value));
}
if(std::holds_alternative<f64>(value)){
obj = VAR(std::get<f64>(value));
}
if(std::holds_alternative<Str>(value)){
obj = VAR(std::get<Str>(value));
}
if(!obj) UNREACHABLE();
int index = ctx->add_const(obj);
ctx->emit(OP_LOAD_CONST, index, line);
}
};
struct SliceExpr: Expression{
Expression_ start;
Expression_ stop;
Expression_ step;
SliceExpr(Expression_&& start, Expression_&& stop, Expression_&& step):
start(std::move(start)), stop(std::move(stop)), step(std::move(step)) {}
Str to_string() const override { return "slice"; }
struct SliceExpr: Expr{
Expr_ start;
Expr_ stop;
Expr_ step;
Str str() const override { return "slice()"; }
};
struct ListExpr: Expression{
std::vector<Expression_> items;
Str to_string() const override { return "[]"; }
struct ListExpr: Expr{
std::vector<Expr_> items;
Str str() const override { return "[]"; }
};
struct DictExpr: Expression{
std::vector<Expression_> items; // each item is a DictItemExpr
Str to_string() const override { return "{}"; }
struct DictExpr: Expr{
std::vector<Expr_> items; // each item is a DictItemExpr
DictExpr(std::vector<Expr_>&& items): items(std::move(items)) {}
Str str() const override { return "{}"; }
};
struct SetExpr: Expression{
std::vector<Expression_> items;
Str to_string() const override { return "{}"; }
struct SetExpr: Expr{
std::vector<Expr_> items;
Set(std::vector<Expr_>&& items): items(std::move(items)) {}
Str str() const override { return "{}"; }
};
struct TupleExpr: Expression{
std::vector<Expression_> items;
TupleExpr(std::vector<Expression_>&& items): items(std::move(items)) {}
Str to_string() const override { return "(a, b, c)"; }
struct TupleExpr: Expr{
std::vector<Expr_> items;
Str str() const override { return "tuple()"; }
};
struct CompExpr: Expression{
Expression_ expr; // loop expr
Expression_ vars; // loop vars
Expression_ iter; // loop iter
Expression_ cond; // optional if condition
struct CompExpr: Expr{
Expr_ expr; // loop expr
Expr_ vars; // loop vars
Expr_ iter; // loop iter
Expr_ cond; // optional if condition
virtual void emit_expr() = 0;
};
// a:b
struct DictItemExpr: Expression{
Expression_ key;
Expression_ value;
DictItemExpr(Expression_&& key, Expression_&& value)
: key(std::move(key)), value(std::move(value)) {}
Str to_string() const override { return "dict item"; }
struct DictItemExpr: Expr{
Expr_ key;
Expr_ value;
Str str() const override { return "k:v"; }
};
struct ListCompExpr: CompExpr{
@ -128,73 +275,144 @@ struct DictCompExpr: CompExpr{
struct SetCompExpr: CompExpr{
};
struct LambdaExpr: Expression{
struct LambdaExpr: Expr{
Function func;
NameScope scope;
LambdaExpr(Function&& func, NameScope scope): func(std::move(func)), scope(scope) {}
Str to_string() const override { return "lambda"; }
Str str() const override { return "<lambda>"; }
void emit(CodeEmitContext* ctx) override {
VM* vm = ctx->vm;
ctx->emit(OP_LOAD_FUNCTION, ctx->add_const(VAR(func)), line);
if(scope == NAME_LOCAL) ctx->emit(OP_SETUP_CLOSURE, BC_NOARG, line);
}
};
struct FStringExpr: Expression{
struct FStringExpr: Expr{
Str src;
FStringExpr(const Str& src): src(src) {}
Str to_string() const override { return "@fstr"; }
Str str() const override {
return "f" + src.escape(true);
}
void emit(CodeEmitContext* ctx) override {
VM* vm = ctx->vm;
static const std::regex pattern(R"(\{(.*?)\})");
std::sregex_iterator begin(src.begin(), src.end(), pattern);
std::sregex_iterator end;
int size = 0;
int i = 0;
for(auto it = begin; it != end; it++) {
std::smatch m = *it;
if (i < m.position()) {
std::string literal = src.substr(i, m.position() - i);
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line);
size++;
}
ctx->emit(OP_LOAD_EVAL_FN, BC_NOARG, line);
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(m[1].str())), line);
ctx->emit(OP_CALL, 1, line);
size++;
i = (int)(m.position() + m.length());
}
if (i < src.size()) {
std::string literal = src.substr(i, src.size() - i);
ctx->emit(OP_LOAD_CONST, ctx->add_const(VAR(literal)), line);
size++;
}
ctx->emit(OP_BUILD_STRING, size, line);
}
};
struct SubscrExpr: Expression{
Expression_ a;
Expression_ b;
SubscrExpr(Expression_&& a, Expression_&& b): a(std::move(a)), b(std::move(b)) {}
Str to_string() const override { return "a[b]"; }
struct SubscrExpr: Expr{
Expr_ a;
Expr_ b;
Str str() const override { return "a[b]"; }
};
struct AttribExpr: Expression{
Expression_ a;
struct AttribExpr: Expr{
Expr_ a;
Str b;
AttribExpr(Expression_ a, const Str& b): a(std::move(a)), b(b) {}
AttribExpr(Expression_ a, Str&& b): a(std::move(a)), b(std::move(b)) {}
Str to_string() const override { return "."; }
AttribExpr(Expr_ a, const Str& b): a(std::move(a)), b(b) {}
AttribExpr(Expr_ a, Str&& b): a(std::move(a)), b(std::move(b)) {}
Str str() const override { return "a.b"; }
};
struct AssignExpr: Expression{
Expression_ lhs;
Expression_ rhs;
AssignExpr(Expression_&& lhs, Expression_&& rhs): lhs(std::move(lhs)), rhs(std::move(rhs)) {}
Str to_string() const override { return "="; }
struct AssignExpr: Expr{
Expr_ lhs;
Expr_ rhs;
Str str() const override { return "="; }
};
struct InplaceAssignExpr: Expression{
struct InplaceAssignExpr: Expr{
TokenIndex op;
Expression_ lhs;
Expression_ rhs;
InplaceAssignExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs)
: op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
Str to_string() const override { return TK_STR(op); }
Expr_ lhs;
Expr_ rhs;
Str str() const override { return TK_STR(op); }
};
struct CallExpr: Expression{
std::vector<Expression_> args;
std::vector<std::pair<Str, Expression_>> kwargs;
Str to_string() const override { return "()"; }
struct CallExpr: Expr{
std::vector<Expr_> args;
std::vector<std::pair<Str, Expr_>> kwargs;
Str str() const override { return "()"; }
};
struct BinaryExpr: Expression{
struct BinaryExpr: Expr{
TokenIndex op;
Expression_ lhs;
Expression_ rhs;
BinaryExpr(TokenIndex op, Expression_&& lhs, Expression_&& rhs)
: op(op), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
Str to_string() const override { return TK_STR(op); }
Expr_ lhs;
Expr_ rhs;
Str str() const override { return TK_STR(op); }
void emit(CodeEmitContext* ctx) override {
lhs->emit(ctx);
rhs->emit(ctx);
switch (op) {
case TK("+"): ctx->emit(OP_BINARY_OP, 0, line); break;
case TK("-"): ctx->emit(OP_BINARY_OP, 1, line); break;
case TK("*"): ctx->emit(OP_BINARY_OP, 2, line); break;
case TK("/"): ctx->emit(OP_BINARY_OP, 3, line); break;
case TK("//"): ctx->emit(OP_BINARY_OP, 4, line); break;
case TK("%"): ctx->emit(OP_BINARY_OP, 5, line); break;
case TK("**"): ctx->emit(OP_BINARY_OP, 6, line); break;
case TK("<"): ctx->emit(OP_COMPARE_OP, 0, line); break;
case TK("<="): ctx->emit(OP_COMPARE_OP, 1, line); break;
case TK("=="): ctx->emit(OP_COMPARE_OP, 2, line); break;
case TK("!="): ctx->emit(OP_COMPARE_OP, 3, line); break;
case TK(">"): ctx->emit(OP_COMPARE_OP, 4, line); break;
case TK(">="): ctx->emit(OP_COMPARE_OP, 5, line); break;
case TK("in"): ctx->emit(OP_CONTAINS_OP, 0, line); break;
case TK("not in"): ctx->emit(OP_CONTAINS_OP, 1, line); break;
case TK("is"): ctx->emit(OP_IS_OP, 0, line); break;
case TK("is not"): ctx->emit(OP_IS_OP, 1, line); break;
case TK("<<"): ctx->emit(OP_BITWISE_OP, 0, line); break;
case TK(">>"): ctx->emit(OP_BITWISE_OP, 1, line); break;
case TK("&"): ctx->emit(OP_BITWISE_OP, 2, line); break;
case TK("|"): ctx->emit(OP_BITWISE_OP, 3, line); break;
case TK("^"): ctx->emit(OP_BITWISE_OP, 4, line); break;
default: UNREACHABLE();
}
}
};
struct TernaryExpr: Expression{
Expression_ cond;
Expression_ true_expr;
Expression_ false_expr;
TernaryExpr(Expression_&& cond, Expression_&& true_expr, Expression_&& false_expr)
: cond(std::move(cond)), true_expr(std::move(true_expr)), false_expr(std::move(false_expr)) {}
Str to_string() const override { return "?"; }
struct TernaryExpr: Expr{
Expr_ cond;
Expr_ true_expr;
Expr_ false_expr;
Str str() const override {
return "cond ? true_expr : false_expr";
}
void emit(CodeEmitContext* ctx) override {
cond->emit(ctx);
int patch = ctx->emit(OP_POP_JUMP_IF_FALSE, BC_NOARG, cond->line);
true_expr->emit(ctx);
int patch_2 = ctx->emit(OP_JUMP_ABSOLUTE, BC_NOARG, true_expr->line);
ctx->patch_jump(patch);
false_expr->emit(ctx);
ctx->patch_jump(patch_2);
}
};

View File

@ -72,8 +72,7 @@ struct Token{
enum Precedence {
PREC_NONE,
PREC_ASSIGNMENT, // =
PREC_COMMA, // ,
PREC_SLICE, // : (only available inside a subscript expression)
PREC_TUPLE, // ,
PREC_TERNARY, // ?:
PREC_LOGICAL_OR, // or
PREC_LOGICAL_AND, // and