remove constexpr TK

This commit is contained in:
blueloveTH 2024-06-20 00:24:35 +08:00
parent eb1806deaa
commit 1431cf8cde
9 changed files with 391 additions and 377 deletions

View File

@ -17,7 +17,7 @@ struct PrattRule {
struct Compiler {
PK_ALWAYS_PASS_BY_POINTER(Compiler)
static PrattRule rules[kTokenCount];
static PrattRule rules[TK__COUNT__];
Lexer lexer;
vector<CodeEmitContext> contexts;
@ -43,7 +43,7 @@ struct Compiler {
printf("%s:%d %s %s\n",
lexer.src.filename().c_str(),
curr().line,
TK_STR(curr().type),
pk_TokenSymbols(curr().type),
curr().str().escape().c_str()
);
}

View File

@ -7,11 +7,38 @@
extern "C" {
#endif
extern const char* pk_TokenSymbols[];
typedef struct pkpy_TokenDeserializer {
const char* curr;
const char* source;
} pkpy_TokenDeserializer;
enum TokenIndex{
TK_EOF, TK_EOL, TK_SOF,
TK_ID, TK_NUM, TK_STR, TK_FSTR, TK_LONG, TK_BYTES, TK_IMAG,
TK_INDENT, TK_DEDENT,
/***************/
TK_IS_NOT, TK_NOT_IN, TK_YIELD_FROM,
/***************/
TK_ADD, TK_IADD, TK_SUB, TK_ISUB,
TK_MUL, TK_IMUL, TK_DIV, TK_IDIV, TK_FLOORDIV, TK_IFLOORDIV, TK_MOD, TK_IMOD,
TK_AND, TK_IAND, TK_OR, TK_IOR, TK_XOR, TK_IXOR,
TK_LSHIFT, TK_ILSHIFT, TK_RSHIFT, TK_IRSHIFT,
/***************/
TK_LPAREN, TK_RPAREN, TK_LBRACKET, TK_RBRACKET, TK_LBRACE, TK_RBRACE,
TK_DOT, TK_DOTDOT, TK_DOTDOTDOT, TK_COMMA, TK_COLON, TK_SEMICOLON,
TK_POW, TK_ARROW, TK_HASH, TK_DECORATOR,
TK_GT, TK_LT, TK_ASSIGN, TK_EQ, TK_NE, TK_GE, TK_LE, TK_INVERT,
/***************/
TK_FALSE, TK_NONE, TK_TRUE, TK_AND_KW, TK_AS, TK_ASSERT, TK_BREAK, TK_CLASS, TK_CONTINUE,
TK_DEF, TK_DEL, TK_ELIF, TK_ELSE, TK_EXCEPT, TK_FINALLY, TK_FOR, TK_FROM, TK_GLOBAL,
TK_IF, TK_IMPORT, TK_IN, TK_IS, TK_LAMBDA, TK_NOT_KW, TK_OR_KW, TK_PASS, TK_RAISE, TK_RETURN,
TK_TRY, TK_WHILE, TK_WITH, TK_YIELD,
/***************/
TK__COUNT__
};
void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source);
bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c);
c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c);

View File

@ -2,58 +2,15 @@
#include "pocketpy/objects/error.hpp"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/compiler/lexer.h"
#include <variant>
namespace pkpy {
typedef uint8_t TokenIndex;
// clang-format off
constexpr const char* kTokens[] = {
"@eof", "@eol", "@sof",
"@id", "@num", "@str", "@fstr", "@long", "@bytes", "@imag",
"@indent", "@dedent",
// These 3 are compound keywords which are generated on the fly
"is not", "not in", "yield from",
/*****************************************/
"+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed
"*", "*=", "/", "/=", "//", "//=", "%", "%=",
"&", "&=", "|", "|=", "^", "^=",
"<<", "<<=", ">>", ">>=",
/*****************************************/
"(", ")", "[", "]", "{", "}",
".", "..", "...", ",", ":", ";",
"**", "->", "#", "@",
">", "<", "=", "==", "!=", ">=", "<=", "~",
/** KW_BEGIN **/
// NOTE: These keywords should be sorted in ascending order!!
"False", "None", "True", "and", "as", "assert", "break", "class", "continue",
"def", "del", "elif", "else", "except", "finally", "for", "from", "global",
"if", "import", "in", "is", "lambda", "not", "or", "pass", "raise", "return",
"try", "while", "with", "yield",
};
// clang-format on
using TokenValue = std::variant<std::monostate, i64, f64, Str>;
const int kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
constexpr TokenIndex TK(const char token[]) {
for(int k = 0; k < kTokenCount; k++) {
const char* i = kTokens[k];
const char* j = token;
while(*i && *j && *i == *j) {
i++;
j++;
}
if(*i == *j) return k;
}
return 255;
}
constexpr inline bool is_raw_string_used(TokenIndex t) noexcept{ return t == TK("@id") || t == TK("@long"); }
#define TK_STR(t) kTokens[t]
constexpr inline bool is_raw_string_used(TokenIndex t) noexcept{ return t == TK_ID || t == TK_LONG; }
struct Token {
TokenIndex type;

View File

@ -406,6 +406,7 @@ bool c11__is_unicode_Lo_char(int c){
if(c == 0x1f955) return true;
int index;
c11__lower_bound(const int, kLoRangeA, 476, c, c11__less, &index);
if(index == 476) return false;
if(c == kLoRangeA[index]) return true;
index -= 1;
if(index < 0) return false;

View File

@ -6,12 +6,12 @@
namespace pkpy {
#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", TK_STR(expected), TK_STR(curr().type));
#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", pk_TokenSymbols[expected], pk_TokenSymbols[curr().type]);
#define consume_end_stmt() if(!match_end_stmt()) return SyntaxError("expected statement end")
#define check_newlines_repl() { bool __nml; match_newlines(&__nml); if(__nml) return NeedMoreLines(); }
#define check(B) if((err = B)) return err
PrattRule Compiler::rules[kTokenCount];
PrattRule Compiler::rules[TK__COUNT__];
NameScope Compiler::name_scope() const noexcept{
auto s = contexts.size() > 1 ? NAME_LOCAL : NAME_GLOBAL;
@ -44,7 +44,7 @@ Error* Compiler::pop_context() noexcept{
ctx()->emit_(OP_RETURN_VALUE, 1, BC_KEEPLINE, true);
// find the last valid token
int j = __i - 1;
while(tk(j).type == TK("@eol") || tk(j).type == TK("@dedent") || tk(j).type == TK("@eof"))
while(tk(j).type == TK_EOL || tk(j).type == TK_DEDENT || tk(j).type == TK_EOF)
j--;
ctx()->co->end_line = tk(j).line;
@ -115,52 +115,52 @@ void Compiler::init_pratt_rules() noexcept{
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#define PK_METHOD(name) &Compiler::name
#define PK_NO_INFIX nullptr, PREC_LOWEST
for(TokenIndex i = 0; i < kTokenCount; i++) rules[i] = { nullptr, PK_NO_INFIX };
rules[TK(".")] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
rules[TK("(")] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
rules[TK("[")] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
rules[TK("{")] = { PK_METHOD(exprMap), PK_NO_INFIX };
rules[TK("%")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK("+")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK("-")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK("*")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK("~")] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
rules[TK("/")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK("//")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK("**")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
rules[TK(">")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("==")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("!=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK(">=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("<=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("is")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("<<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK(">>")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK("&")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
rules[TK("|")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
rules[TK("^")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
rules[TK("@")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK("if")] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
rules[TK("not in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("is not")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK("and") ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
rules[TK("or")] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
rules[TK("not")] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
rules[TK("True")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK("False")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK("None")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK("...")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK("lambda")] = { PK_METHOD(exprLambda), PK_NO_INFIX };
rules[TK("@id")] = { PK_METHOD(exprName), PK_NO_INFIX };
rules[TK("@num")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK("@str")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK("@fstr")] = { PK_METHOD(exprFString), PK_NO_INFIX };
rules[TK("@long")] = { PK_METHOD(exprLong), PK_NO_INFIX };
rules[TK("@imag")] = { PK_METHOD(exprImag), PK_NO_INFIX };
rules[TK("@bytes")] = { PK_METHOD(exprBytes), PK_NO_INFIX };
rules[TK(":")] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
for(int i = 0; i < TK__COUNT__; i++) rules[i] = { nullptr, PK_NO_INFIX };
rules[TK_DOT] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
rules[TK_LPAREN] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
rules[TK_LBRACKET] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
rules[TK_LBRACE] = { PK_METHOD(exprMap), PK_NO_INFIX };
rules[TK_MOD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_ADD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK_SUB] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK_MUL] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_INVERT] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
rules[TK_DIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_FLOORDIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_POW] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
rules[TK_GT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_EQ] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_NE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_GE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IS] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK_RSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK_AND] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
rules[TK_OR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
rules[TK_XOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
rules[TK_DECORATOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_IF] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
rules[TK_NOT_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IS_NOT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_AND_KW ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
rules[TK_OR_KW] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
rules[TK_NOT_KW] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
rules[TK_TRUE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_FALSE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_NONE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_DOTDOTDOT] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_LAMBDA] = { PK_METHOD(exprLambda), PK_NO_INFIX };
rules[TK_ID] = { PK_METHOD(exprName), PK_NO_INFIX };
rules[TK_NUM] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK_STR] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK_FSTR] = { PK_METHOD(exprFString), PK_NO_INFIX };
rules[TK_LONG] = { PK_METHOD(exprLong), PK_NO_INFIX };
rules[TK_IMAG] = { PK_METHOD(exprImag), PK_NO_INFIX };
rules[TK_BYTES] = { PK_METHOD(exprBytes), PK_NO_INFIX };
rules[TK_COLON] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
#undef PK_METHOD
#undef PK_NO_INFIX
@ -175,30 +175,30 @@ bool Compiler::match(TokenIndex expected) noexcept{
bool Compiler::match_newlines(bool* need_more_lines) noexcept{
bool consumed = false;
if(curr().type == TK("@eol")) {
while(curr().type == TK("@eol")) advance();
if(curr().type == TK_EOL) {
while(curr().type == TK_EOL) advance();
consumed = true;
}
if(need_more_lines) {
*need_more_lines = (mode() == REPL_MODE && curr().type == TK("@eof"));
*need_more_lines = (mode() == REPL_MODE && curr().type == TK_EOF);
}
return consumed;
}
bool Compiler::match_end_stmt() noexcept{
if(match(TK(";"))) {
if(match(TK_SEMICOLON)) {
match_newlines();
return true;
}
if(match_newlines() || curr().type == TK("@eof")) return true;
if(curr().type == TK("@dedent")) return true;
if(match_newlines() || curr().type == TK_EOF) return true;
if(curr().type == TK_DEDENT) return true;
return false;
}
Error* Compiler::EXPR_TUPLE(bool allow_slice) noexcept{
Error* err;
check(parse_expression(PREC_LOWEST + 1, allow_slice));
if(!match(TK(","))) return NULL;
if(!match(TK_COMMA)) return NULL;
// tuple expression
int count = 1;
do {
@ -207,7 +207,7 @@ Error* Compiler::EXPR_TUPLE(bool allow_slice) noexcept{
check(parse_expression(PREC_LOWEST + 1, allow_slice));
count += 1;
if(curr().brackets_level) check_newlines_repl();
} while(match(TK(",")));
} while(match(TK_COMMA));
TupleExpr* e = make_expr<TupleExpr>(count);
for(int i=count-1; i>=0; i--)
e->items[i] = ctx()->s_popx();
@ -218,10 +218,10 @@ Error* Compiler::EXPR_TUPLE(bool allow_slice) noexcept{
Error* Compiler::EXPR_VARS() noexcept{
int count = 0;
do {
consume(TK("@id"));
consume(TK_ID);
ctx()->s_push(make_expr<NameExpr>(prev().str(), name_scope()));
count += 1;
} while(match(TK(",")));
} while(match(TK_COMMA));
if(count > 1){
TupleExpr* e = make_expr<TupleExpr>(count);
for(int i=count-1; i>=0; i--)
@ -260,9 +260,9 @@ Error* Compiler::exprLambda() noexcept{
Error* err;
FuncDecl_ decl = push_f_context("<lambda>");
int line = prev().line; // backup line
if(!match(TK(":"))) {
if(!match(TK_COLON)) {
check(_compile_f_args(decl, false));
consume(TK(":"));
consume(TK_COLON);
}
// https://github.com/pocketpy/pocketpy/issues/37
check(parse_expression(PREC_LAMBDA + 1));
@ -304,7 +304,7 @@ Error* Compiler::exprTernary() noexcept{
Error* err;
int line = prev().line;
check(parse_expression(PREC_TERNARY + 1)); // [true_expr, cond]
consume(TK("else"));
consume(TK_ELSE);
check(parse_expression(PREC_TERNARY + 1)); // [true_expr, cond, false_expr]
auto e = make_expr<TernaryExpr>();
e->line = line;
@ -341,10 +341,10 @@ Error* Compiler::exprUnaryOp() noexcept{
TokenIndex op = prev().type;
check(parse_expression(PREC_UNARY + 1));
switch(op) {
case TK("-"): ctx()->s_push(make_expr<NegatedExpr>(ctx()->s_popx())); break;
case TK("~"): ctx()->s_push(make_expr<InvertExpr>(ctx()->s_popx())); break;
case TK("*"): ctx()->s_push(make_expr<StarredExpr>(ctx()->s_popx(), 1)); break;
case TK("**"): ctx()->s_push(make_expr<StarredExpr>(ctx()->s_popx(), 2)); break;
case TK_SUB: ctx()->s_push(make_expr<NegatedExpr>(ctx()->s_popx())); break;
case TK_INVERT: ctx()->s_push(make_expr<InvertExpr>(ctx()->s_popx())); break;
case TK_MUL: ctx()->s_push(make_expr<StarredExpr>(ctx()->s_popx(), 1)); break;
case TK_POW: ctx()->s_push(make_expr<StarredExpr>(ctx()->s_popx(), 2)); break;
default: assert(false);
}
return NULL;
@ -355,7 +355,7 @@ Error* Compiler::exprGroup() noexcept{
check_newlines_repl()
check(EXPR_TUPLE()); // () is just for change precedence
check_newlines_repl()
consume(TK(")"));
consume(TK_RPAREN);
if(ctx()->s_top()->is_tuple()) return NULL;
Expr* g = make_expr<GroupedExpr>(ctx()->s_popx());
ctx()->s_push(g);
@ -367,10 +367,10 @@ Error* Compiler::consume_comp(Opcode op0, Opcode op1) noexcept{
Error* err;
bool has_cond = false;
check(EXPR_VARS()); // [expr, vars]
consume(TK("in"));
consume(TK_IN);
check(parse_expression(PREC_TERNARY + 1)); // [expr, vars, iter]
check_newlines_repl()
if(match(TK("if"))) {
if(match(TK_IF)) {
check(parse_expression(PREC_TERNARY + 1)); // [expr, vars, iter, cond]
has_cond = true;
}
@ -390,17 +390,17 @@ Error* Compiler::exprList() noexcept{
int count = 0;
do {
check_newlines_repl()
if(curr().type == TK("]")) break;
if(curr().type == TK_RBRACKET) break;
check(EXPR()); count += 1;
check_newlines_repl()
if(count == 1 && match(TK("for"))) {
if(count == 1 && match(TK_FOR)) {
check(consume_comp(OP_BUILD_LIST, OP_LIST_APPEND));
consume(TK("]"));
consume(TK_RBRACKET);
return NULL;
}
check_newlines_repl()
} while(match(TK(",")));
consume(TK("]"));
} while(match(TK_COMMA));
consume(TK_RBRACKET);
ListExpr* e = make_expr<ListExpr>(count);
e->line = line; // override line
for(int i=count-1; i>=0; i--)
@ -415,10 +415,10 @@ Error* Compiler::exprMap() noexcept{
int count = 0;
do {
check_newlines_repl()
if(curr().type == TK("}")) break;
if(curr().type == TK_RBRACE) break;
check(EXPR()); // [key]
int star_level = ctx()->s_top()->star_level();
if(star_level == 2 || curr().type == TK(":")) { parsing_dict = true; }
if(star_level == 2 || curr().type == TK_COLON) { parsing_dict = true; }
if(parsing_dict) {
if(star_level == 2) {
DictItemExpr* dict_item = make_expr<DictItemExpr>();
@ -426,7 +426,7 @@ Error* Compiler::exprMap() noexcept{
dict_item->value = ctx()->s_popx();
ctx()->s_push(dict_item);
} else {
consume(TK(":"));
consume(TK_COLON);
check(EXPR());
DictItemExpr* dict_item = make_expr<DictItemExpr>();
dict_item->value = ctx()->s_popx();
@ -436,18 +436,18 @@ Error* Compiler::exprMap() noexcept{
}
count += 1;
check_newlines_repl()
if(count == 1 && match(TK("for"))) {
if(count == 1 && match(TK_FOR)) {
if(parsing_dict){
check(consume_comp(OP_BUILD_DICT, OP_DICT_ADD));
}else{
check(consume_comp(OP_BUILD_SET, OP_SET_ADD));
}
consume(TK("}"));
consume(TK_RBRACE);
return NULL;
}
check_newlines_repl()
} while(match(TK(",")));
consume(TK("}"));
} while(match(TK_COMMA));
consume(TK_RBRACE);
SequenceExpr* se;
if(count == 0 || parsing_dict) {
@ -468,11 +468,11 @@ Error* Compiler::exprCall() noexcept{
ctx()->s_push(e); // push onto the stack in advance
do {
check_newlines_repl()
if(curr().type == TK(")")) break;
if(curr().type == TK("@id") && next().type == TK("=")) {
consume(TK("@id"));
if(curr().type == TK_RPAREN) break;
if(curr().type == TK_ID && next().type == TK_ASSIGN) {
consume(TK_ID);
StrName key(prev().sv());
consume(TK("="));
consume(TK_ASSIGN);
check(EXPR());
e->kwargs.push_back({key, ctx()->s_popx()});
} else {
@ -487,8 +487,8 @@ Error* Compiler::exprCall() noexcept{
}
}
check_newlines_repl()
} while(match(TK(",")));
consume(TK(")"));
} while(match(TK_COMMA));
consume(TK_RPAREN);
return NULL;
}
@ -501,7 +501,7 @@ Error* Compiler::exprName() noexcept{
}
Error* Compiler::exprAttrib() noexcept{
consume(TK("@id"));
consume(TK_ID);
ctx()->s_push(make_expr<AttribExpr>(ctx()->s_popx(), StrName::get(prev().sv())));
return NULL;
}
@ -514,11 +514,11 @@ Error* Compiler::exprSlice0() noexcept{
check(EXPR());
slice->stop = ctx()->s_popx();
// try optional step
if(match(TK(":"))) { // :<stop>:<step>
if(match(TK_COLON)) { // :<stop>:<step>
check(EXPR());
slice->step = ctx()->s_popx();
}
} else if(match(TK(":"))) {
} else if(match(TK_COLON)) {
if(is_expression()) { // ::<step>
check(EXPR());
slice->step = ctx()->s_popx();
@ -536,11 +536,11 @@ Error* Compiler::exprSlice1() noexcept{
check(EXPR());
slice->stop = ctx()->s_popx();
// try optional step
if(match(TK(":"))) { // <start>:<stop>:<step>
if(match(TK_COLON)) { // <start>:<stop>:<step>
check(EXPR());
slice->step = ctx()->s_popx();
}
} else if(match(TK(":"))) { // <start>::<step>
} else if(match(TK_COLON)) { // <start>::<step>
check(EXPR());
slice->step = ctx()->s_popx();
} // else <start>:
@ -553,7 +553,7 @@ Error* Compiler::exprSubscr() noexcept{
check_newlines_repl()
check(EXPR_TUPLE(true));
check_newlines_repl()
consume(TK("]")); // [lhs, rhs]
consume(TK_RBRACKET); // [lhs, rhs]
SubscrExpr* e = make_expr<SubscrExpr>();
e->line = line;
e->rhs = ctx()->s_popx(); // [lhs]
@ -570,12 +570,12 @@ Error* Compiler::exprLiteral0() noexcept{
Error* Compiler::compile_block_body(PrattCallback callback) noexcept{
Error* err;
if(!callback) callback = &Compiler::compile_stmt;
consume(TK(":"));
if(curr().type != TK("@eol") && curr().type != TK("@eof")) {
consume(TK_COLON);
if(curr().type != TK_EOL && curr().type != TK_EOF) {
while(true) {
check(compile_stmt());
bool possible = curr().type != TK("@eol") && curr().type != TK("@eof");
if(prev().type != TK(";") || !possible) break;
bool possible = curr().type != TK_EOL && curr().type != TK_EOF;
if(prev().type != TK_SEMICOLON || !possible) break;
}
return NULL;
}
@ -585,13 +585,13 @@ Error* Compiler::compile_block_body(PrattCallback callback) noexcept{
if(need_more_lines) return NeedMoreLines();
if(!consumed) return SyntaxError("expected a new line after ':'");
consume(TK("@indent"));
while(curr().type != TK("@dedent")) {
consume(TK_INDENT);
while(curr().type != TK_DEDENT) {
match_newlines();
check((this->*callback)());
match_newlines();
}
consume(TK("@dedent"));
consume(TK_DEDENT);
return NULL;
}
@ -599,15 +599,15 @@ Error* Compiler::compile_block_body(PrattCallback callback) noexcept{
// import a [as b], c [as d]
Error* Compiler::compile_normal_import() noexcept{
do {
consume(TK("@id"));
consume(TK_ID);
Str name = prev().str();
ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(name.sv()), prev().line);
if(match(TK("as"))) {
consume(TK("@id"));
if(match(TK_AS)) {
consume(TK_ID);
name = prev().str();
}
ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
} while(match(TK(",")));
} while(match(TK_COMMA));
consume_end_stmt();
return NULL;
}
@ -624,9 +624,9 @@ Error* Compiler::compile_from_import() noexcept{
while(true) {
switch(curr().type) {
case TK("."): dots += 1; break;
case TK(".."): dots += 2; break;
case TK("..."): dots += 3; break;
case TK_DOT: dots += 1; break;
case TK_DOTDOT: dots += 2; break;
case TK_DOTDOTDOT: dots += 3; break;
default: goto __EAT_DOTS_END;
}
advance();
@ -638,27 +638,27 @@ __EAT_DOTS_END:
if(dots > 0) {
// @id is optional if dots > 0
if(match(TK("@id"))) {
if(match(TK_ID)) {
ss << prev().sv();
while(match(TK("."))) {
consume(TK("@id"));
while(match(TK_DOT)) {
consume(TK_ID);
ss << "." << prev().sv();
}
}
} else {
// @id is required if dots == 0
consume(TK("@id"));
consume(TK_ID);
ss << prev().sv();
while(match(TK("."))) {
consume(TK("@id"));
while(match(TK_DOT)) {
consume(TK_ID);
ss << "." << prev().sv();
}
}
ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(ss.str().sv()), prev().line);
consume(TK("import"));
consume(TK_IMPORT);
if(match(TK("*"))) {
if(match(TK_MUL)) {
if(name_scope() != NAME_GLOBAL) return SyntaxError("from <module> import * can only be used in global scope");
// pop the module and import __all__
ctx()->emit_(OP_POP_IMPORT_STAR, BC_NOARG, prev().line);
@ -668,15 +668,15 @@ __EAT_DOTS_END:
do {
ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
consume(TK("@id"));
consume(TK_ID);
Str name = prev().str();
ctx()->emit_(OP_LOAD_ATTR, StrName(name).index, prev().line);
if(match(TK("as"))) {
consume(TK("@id"));
if(match(TK_AS)) {
consume(TK_ID);
name = prev().str();
}
ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
} while(match(TK(",")));
} while(match(TK_COMMA));
ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
consume_end_stmt();
return NULL;
@ -684,18 +684,18 @@ __EAT_DOTS_END:
bool Compiler::is_expression(bool allow_slice) noexcept{
PrattCallback prefix = rules[curr().type].prefix;
return prefix != nullptr && (allow_slice || curr().type != TK(":"));
return prefix != nullptr && (allow_slice || curr().type != TK_COLON);
}
Error* Compiler::parse_expression(int precedence, bool allow_slice) noexcept{
PrattCallback prefix = rules[curr().type].prefix;
if(prefix == nullptr || (curr().type == TK(":") && !allow_slice)) {
return SyntaxError("expected an expression, got %s", TK_STR(curr().type));
if(prefix == nullptr || (curr().type == TK_COLON && !allow_slice)) {
return SyntaxError("expected an expression, got %s", pk_TokenSymbols[curr().type]);
}
advance();
Error* err;
check((this->*prefix)());
while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK(":"))) {
while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK_COLON)) {
TokenIndex op = curr().type;
advance();
PrattCallback infix = rules[op].infix;
@ -712,12 +712,12 @@ Error* Compiler::compile_if_stmt() noexcept{
int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
err = compile_block_body();
if(err) return err;
if(match(TK("elif"))) {
if(match(TK_ELIF)) {
int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line);
ctx()->patch_jump(patch);
check(compile_if_stmt());
ctx()->patch_jump(exit_patch);
} else if(match(TK("else"))) {
} else if(match(TK_ELSE)) {
int exit_patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, prev().line);
ctx()->patch_jump(patch);
check(compile_block_body());
@ -739,7 +739,7 @@ Error* Compiler::compile_while_loop() noexcept{
ctx()->patch_jump(patch);
ctx()->exit_block();
// optional else clause
if(match(TK("else"))) {
if(match(TK_ELSE)) {
check(compile_block_body());
block->end2 = ctx()->co->codes.size();
}
@ -749,7 +749,7 @@ Error* Compiler::compile_while_loop() noexcept{
Error* Compiler::compile_for_loop() noexcept{
Error* err;
check(EXPR_VARS()); // [vars]
consume(TK("in"));
consume(TK_IN);
check(EXPR_TUPLE()); // [vars, iter]
ctx()->s_emit_top(); // [vars]
ctx()->emit_(OP_GET_ITER_NEW, BC_NOARG, BC_KEEPLINE);
@ -764,7 +764,7 @@ Error* Compiler::compile_for_loop() noexcept{
ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
ctx()->exit_block();
// optional else clause
if(match(TK("else"))) {
if(match(TK_ELSE)) {
check(compile_block_body());
block->end2 = ctx()->co->codes.size();
}
@ -781,16 +781,16 @@ Error* Compiler::compile_try_except() noexcept{
ctx()->exit_block();
int finally_entry = -1;
if(curr().type != TK("finally")) {
if(curr().type != TK_FINALLY) {
do {
StrName as_name;
consume(TK("except"));
consume(TK_EXCEPT);
if(is_expression()) {
check(EXPR()); // push assumed type on to the stack
ctx()->s_emit_top();
ctx()->emit_(OP_EXCEPTION_MATCH, BC_NOARG, prev().line);
if(match(TK("as"))) {
consume(TK("@id"));
if(match(TK_AS)) {
consume(TK_ID);
as_name = StrName(prev().sv());
}
} else {
@ -807,10 +807,10 @@ Error* Compiler::compile_try_except() noexcept{
check(compile_block_body());
patches.push_back(ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE));
ctx()->patch_jump(patch);
} while(curr().type == TK("except"));
} while(curr().type == TK_EXCEPT);
}
if(match(TK("finally"))) {
if(match(TK_FINALLY)) {
int patch = ctx()->emit_(OP_JUMP_FORWARD, BC_NOARG, BC_KEEPLINE);
finally_entry = ctx()->co->codes.size();
check(compile_block_body());
@ -848,12 +848,12 @@ Error* Compiler::compile_decorated() noexcept{
bool consumed = match_newlines(&need_more_lines);
if(need_more_lines) return NeedMoreLines();
if(!consumed) return SyntaxError("expected a newline after '@'");
} while(match(TK("@")));
} while(match(TK_DECORATOR));
if(match(TK("class"))) {
if(match(TK_CLASS)) {
check(compile_class(count));
} else {
consume(TK("def"));
consume(TK_DEF);
check(compile_function(count));
}
return NULL;
@ -862,17 +862,17 @@ Error* Compiler::compile_decorated() noexcept{
Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{
Error* err;
switch(curr().type) {
case TK("+="):
case TK("-="):
case TK("*="):
case TK("/="):
case TK("//="):
case TK("%="):
case TK("<<="):
case TK(">>="):
case TK("&="):
case TK("|="):
case TK("^="): {
case TK_IADD:
case TK_ISUB:
case TK_IMUL:
case TK_IDIV:
case TK_IFLOORDIV:
case TK_IMOD:
case TK_ILSHIFT:
case TK_IRSHIFT:
case TK_IAND:
case TK_IOR:
case TK_IXOR: {
if(ctx()->s_top()->is_starred()) return SyntaxError();
if(ctx()->is_compiling_class){
return SyntaxError("can't use inplace operator in class definition");
@ -882,7 +882,7 @@ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{
// a.x += 1; a should be evaluated only once
// -1 to remove =; inplace=true
int line = prev().line;
TokenIndex op = prev().type-1;
TokenIndex op = (TokenIndex)(prev().type - 1);
// [lhs]
check(EXPR_TUPLE()); // [lhs, rhs]
if(ctx()->s_top()->is_starred()) return SyntaxError();
@ -897,9 +897,9 @@ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{
*is_assign = true;
return NULL;
}
case TK("="): {
case TK_ASSIGN: {
int n = 0;
while(match(TK("="))) {
while(match(TK_ASSIGN)) {
check(EXPR_TUPLE());
n += 1;
}
@ -923,7 +923,7 @@ Error* Compiler::try_compile_assignment(bool* is_assign) noexcept{
Error* Compiler::compile_stmt() noexcept{
Error* err;
if(match(TK("class"))) {
if(match(TK_CLASS)) {
check(compile_class());
return NULL;
}
@ -931,24 +931,24 @@ Error* Compiler::compile_stmt() noexcept{
int kw_line = prev().line; // backup line number
int curr_loop_block = ctx()->get_loop();
switch(prev().type) {
case TK("break"):
case TK_BREAK:
if(curr_loop_block < 0) return SyntaxError("'break' outside loop");
ctx()->emit_(OP_LOOP_BREAK, curr_loop_block, kw_line);
consume_end_stmt();
break;
case TK("continue"):
case TK_CONTINUE:
if(curr_loop_block < 0) return SyntaxError("'continue' not properly in loop");
ctx()->emit_(OP_LOOP_CONTINUE, curr_loop_block, kw_line);
consume_end_stmt();
break;
case TK("yield"):
case TK_YIELD:
if(contexts.size() <= 1) return SyntaxError("'yield' outside function");
check(EXPR_TUPLE());
ctx()->s_emit_top();
ctx()->emit_(OP_YIELD_VALUE, BC_NOARG, kw_line);
consume_end_stmt();
break;
case TK("yield from"):
case TK_YIELD_FROM:
if(contexts.size() <= 1) return SyntaxError("'yield from' outside function");
check(EXPR_TUPLE());
ctx()->s_emit_top();
@ -960,7 +960,7 @@ Error* Compiler::compile_stmt() noexcept{
ctx()->exit_block();
consume_end_stmt();
break;
case TK("return"):
case TK_RETURN:
if(contexts.size() <= 1) return SyntaxError("'return' outside function");
if(match_end_stmt()) {
ctx()->emit_(OP_RETURN_VALUE, 1, kw_line);
@ -972,22 +972,22 @@ Error* Compiler::compile_stmt() noexcept{
}
break;
/*************************************************/
case TK("if"): check(compile_if_stmt()); break;
case TK("while"): check(compile_while_loop()); break;
case TK("for"): check(compile_for_loop()); break;
case TK("import"): check(compile_normal_import()); break;
case TK("from"): check(compile_from_import()); break;
case TK("def"): check(compile_function()); break;
case TK("@"): check(compile_decorated()); break;
case TK("try"): check(compile_try_except()); break;
case TK("pass"): consume_end_stmt(); break;
case TK_IF: check(compile_if_stmt()); break;
case TK_WHILE: check(compile_while_loop()); break;
case TK_FOR: check(compile_for_loop()); break;
case TK_IMPORT: check(compile_normal_import()); break;
case TK_FROM: check(compile_from_import()); break;
case TK_DEF: check(compile_function()); break;
case TK_DECORATOR: check(compile_decorated()); break;
case TK_TRY: check(compile_try_except()); break;
case TK_PASS: consume_end_stmt(); break;
/*************************************************/
case TK("assert"): {
case TK_ASSERT: {
check(EXPR()); // condition
ctx()->s_emit_top();
int index = ctx()->emit_(OP_POP_JUMP_IF_TRUE, BC_NOARG, kw_line);
int has_msg = 0;
if(match(TK(","))) {
if(match(TK_COMMA)) {
check(EXPR()); // message
ctx()->s_emit_top();
has_msg = 1;
@ -997,32 +997,32 @@ Error* Compiler::compile_stmt() noexcept{
consume_end_stmt();
break;
}
case TK("global"):
case TK_GLOBAL:
do {
consume(TK("@id"));
consume(TK_ID);
ctx()->global_names.push_back(StrName(prev().sv()));
} while(match(TK(",")));
} while(match(TK_COMMA));
consume_end_stmt();
break;
case TK("raise"): {
case TK_RAISE: {
check(EXPR());
ctx()->s_emit_top();
ctx()->emit_(OP_RAISE, BC_NOARG, kw_line);
consume_end_stmt();
} break;
case TK("del"): {
case TK_DEL: {
check(EXPR_TUPLE());
if(!ctx()->s_top()->emit_del(ctx())) return SyntaxError();
ctx()->s_pop();
consume_end_stmt();
} break;
case TK("with"): {
case TK_WITH: {
check(EXPR()); // [ <expr> ]
ctx()->s_emit_top();
ctx()->enter_block(CodeBlockType::CONTEXT_MANAGER);
Expr* as_name = nullptr;
if(match(TK("as"))) {
consume(TK("@id"));
if(match(TK_AS)) {
consume(TK_ID);
as_name = make_expr<NameExpr>(prev().str(), name_scope());
}
ctx()->emit_(OP_WITH_ENTER, BC_NOARG, prev().line);
@ -1039,18 +1039,18 @@ Error* Compiler::compile_stmt() noexcept{
ctx()->exit_block();
} break;
/*************************************************/
case TK("=="): {
consume(TK("@id"));
case TK_EQ: {
consume(TK_ID);
if(mode() != EXEC_MODE) return SyntaxError("'label' is only available in EXEC_MODE");
if(!ctx()->add_label(prev().str())) {
Str escaped(prev().str().escape());
return SyntaxError("label %s already exists", escaped.c_str());
}
consume(TK("=="));
consume(TK_EQ);
consume_end_stmt();
} break;
case TK("->"):
consume(TK("@id"));
case TK_ARROW:
consume(TK_ID);
if(mode() != EXEC_MODE) return SyntaxError("'goto' is only available in EXEC_MODE");
ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line);
consume_end_stmt();
@ -1064,7 +1064,7 @@ Error* Compiler::compile_stmt() noexcept{
bool is_typed_name = false; // e.g. x: int
// eat variable's type hint if it is a single name
if(ctx()->s_top()->is_name()) {
if(match(TK(":"))) {
if(match(TK_COLON)) {
check(consume_type_hints());
is_typed_name = true;
@ -1107,15 +1107,15 @@ Error* Compiler::consume_type_hints() noexcept{
Error* Compiler::compile_class(int decorators) noexcept{
Error* err;
consume(TK("@id"));
consume(TK_ID);
int namei = StrName(prev().sv()).index;
bool has_base = false;
if(match(TK("("))) {
if(match(TK_LPAREN)) {
if(is_expression()) {
check(EXPR());
has_base = true; // [base]
}
consume(TK(")"));
consume(TK_RPAREN);
}
if(!has_base) {
ctx()->emit_(OP_LOAD_NONE, BC_NOARG, prev().line);
@ -1148,15 +1148,15 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep
if(state > 3) return SyntaxError();
if(state == 3) return SyntaxError("**kwargs should be the last argument");
match_newlines();
if(match(TK("*"))) {
if(match(TK_MUL)) {
if(state < 1)
state = 1;
else
return SyntaxError("*args should be placed before **kwargs");
} else if(match(TK("**"))) {
} else if(match(TK_POW)) {
state = 3;
}
consume(TK("@id"));
consume(TK_ID);
StrName name(prev().sv());
// check duplicate argument name
@ -1174,8 +1174,8 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep
}
// eat type hints
if(enable_type_hints && match(TK(":"))) check(consume_type_hints());
if(state == 0 && curr().type == TK("=")) state = 2;
if(enable_type_hints && match(TK_COLON)) check(consume_type_hints());
if(state == 0 && curr().type == TK_ASSIGN) state = 2;
int index = ctx()->add_varname(name);
switch(state) {
case 0: decl->args.push_back(index); break;
@ -1184,7 +1184,7 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep
state += 1;
break;
case 2: {
consume(TK("="));
consume(TK_ASSIGN);
PyVar value;
check(read_literal(&value));
if(value == nullptr) return SyntaxError("default argument must be a literal");
@ -1195,21 +1195,21 @@ Error* Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints) noexcep
state += 1;
break;
}
} while(match(TK(",")));
} while(match(TK_COMMA));
return NULL;
}
Error* Compiler::compile_function(int decorators) noexcept{
Error* err;
consume(TK("@id"));
consume(TK_ID);
Str decl_name = prev().str();
FuncDecl_ decl = push_f_context(decl_name);
consume(TK("("));
if(!match(TK(")"))) {
consume(TK_LPAREN);
if(!match(TK_RPAREN)) {
check(_compile_f_args(decl, true));
consume(TK(")"));
consume(TK_RPAREN);
}
if(match(TK("->"))) check(consume_type_hints());
if(match(TK_ARROW)) check(consume_type_hints());
check(compile_block_body());
check(pop_context());
@ -1251,29 +1251,29 @@ Error* Compiler::read_literal(PyVar* out) noexcept{
Error* err;
advance();
switch(prev().type) {
case TK("-"): {
consume(TK("@num"));
case TK_SUB: {
consume(TK_NUM);
PyVar val = to_object(prev().value);
*out = vm->py_negate(val);
return NULL;
}
case TK("@num"): *out = to_object(prev().value); return NULL;
case TK("@str"): *out = to_object(prev().value); return NULL;
case TK("True"): *out = VAR(true); return NULL;
case TK("False"): *out = VAR(false); return NULL;
case TK("None"): *out = vm->None; return NULL;
case TK("..."): *out = vm->Ellipsis; return NULL;
case TK("("): {
case TK_NUM: *out = to_object(prev().value); return NULL;
case TK_STR: *out = to_object(prev().value); return NULL;
case TK_TRUE: *out = VAR(true); return NULL;
case TK_FALSE: *out = VAR(false); return NULL;
case TK_NONE: *out = vm->None; return NULL;
case TK_DOTDOTDOT: *out = vm->Ellipsis; return NULL;
case TK_LPAREN: {
List cpnts;
while(true) {
PyVar elem;
check(read_literal(&elem));
cpnts.push_back(elem);
if(curr().type == TK(")")) break;
consume(TK(","));
if(curr().type == TK(")")) break;
if(curr().type == TK_RPAREN) break;
consume(TK_COMMA);
if(curr().type == TK_RPAREN) break;
}
consume(TK(")"));
consume(TK_RPAREN);
*out = VAR(cpnts.to_tuple());
return NULL;
}
@ -1297,20 +1297,20 @@ Error* Compiler::compile(CodeObject_* out) noexcept{
// if(lexer.src.filename()[0] != '<'){
// printf("%s\n", lexer.src.filename().c_str());
// for(int i=0; i<lexer.nexts.size(); i++){
// printf("%s: %s\n", TK_STR(tk(i).type), tk(i).str().escape().c_str());
// printf("%s: %s\n", pk_TokenSymbols[tk(i).type], tk(i).str().escape().c_str());
// }
// }
CodeObject_ code = push_global_context();
assert(curr().type == TK("@sof"));
assert(curr().type == TK_SOF);
advance(); // skip @sof, so prev() is always valid
match_newlines(); // skip possible leading '\n'
if(mode() == EVAL_MODE) {
check(EXPR_TUPLE());
ctx()->s_emit_top();
consume(TK("@eof"));
consume(TK_EOF);
ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
check(pop_context());
*out = code;
@ -1319,7 +1319,7 @@ Error* Compiler::compile(CodeObject_* out) noexcept{
check(EXPR());
Expr* e = ctx()->s_popx();
if(!e->is_json_object()) return SyntaxError("expect a JSON object, literal or array");
consume(TK("@eof"));
consume(TK_EOF);
e->emit_(ctx());
ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
check(pop_context());
@ -1327,7 +1327,7 @@ Error* Compiler::compile(CodeObject_* out) noexcept{
return NULL;
}
while(!match(TK("@eof"))) {
while(!match(TK_EOF)) {
check(compile_stmt());
match_newlines();
}

View File

@ -231,10 +231,10 @@ void OrExpr::emit_(CodeEmitContext* ctx) {
void Literal0Expr::emit_(CodeEmitContext* ctx) {
switch(token) {
case TK("None"): ctx->emit_(OP_LOAD_NONE, BC_NOARG, line); break;
case TK("True"): ctx->emit_(OP_LOAD_TRUE, BC_NOARG, line); break;
case TK("False"): ctx->emit_(OP_LOAD_FALSE, BC_NOARG, line); break;
case TK("..."): ctx->emit_(OP_LOAD_ELLIPSIS, BC_NOARG, line); break;
case TK_NONE: ctx->emit_(OP_LOAD_NONE, BC_NOARG, line); break;
case TK_TRUE: ctx->emit_(OP_LOAD_TRUE, BC_NOARG, line); break;
case TK_FALSE: ctx->emit_(OP_LOAD_FALSE, BC_NOARG, line); break;
case TK_DOTDOTDOT: ctx->emit_(OP_LOAD_ELLIPSIS, BC_NOARG, line); break;
default: assert(false);
}
}
@ -681,12 +681,12 @@ void CallExpr::emit_(CodeEmitContext* ctx) {
bool BinaryExpr::is_compare() const {
switch(op) {
case TK("<"):
case TK("<="):
case TK("=="):
case TK("!="):
case TK(">"):
case TK(">="): return true;
case TK_LT:
case TK_LE:
case TK_EQ:
case TK_NE:
case TK_GT:
case TK_GE: return true;
default: return false;
}
}
@ -701,12 +701,12 @@ void BinaryExpr::_emit_compare(CodeEmitContext* ctx, small_vector_2<int, 8>& jmp
ctx->emit_(OP_DUP_TOP, BC_NOARG, line); // [a, b, b]
ctx->emit_(OP_ROT_THREE, BC_NOARG, line); // [b, a, b]
switch(op) {
case TK("<"): ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break;
case TK("<="): ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break;
case TK("=="): ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break;
case TK("!="): ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break;
case TK(">"): ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break;
case TK(">="): ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break;
case TK_LT: ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break;
case TK_LE: ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break;
case TK_EQ: ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break;
case TK_NE: ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break;
case TK_GT: ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break;
case TK_GE: ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break;
default: PK_UNREACHABLE()
}
// [b, RES]
@ -731,34 +731,34 @@ void BinaryExpr::emit_(CodeEmitContext* ctx) {
rhs->emit_(ctx);
switch(op) {
case TK("+"): ctx->emit_(OP_BINARY_ADD, BC_NOARG, line); break;
case TK("-"): ctx->emit_(OP_BINARY_SUB, BC_NOARG, line); break;
case TK("*"): ctx->emit_(OP_BINARY_MUL, BC_NOARG, line); break;
case TK("/"): ctx->emit_(OP_BINARY_TRUEDIV, BC_NOARG, line); break;
case TK("//"): ctx->emit_(OP_BINARY_FLOORDIV, BC_NOARG, line); break;
case TK("%"): ctx->emit_(OP_BINARY_MOD, BC_NOARG, line); break;
case TK("**"): ctx->emit_(OP_BINARY_POW, BC_NOARG, line); break;
case TK_ADD: ctx->emit_(OP_BINARY_ADD, BC_NOARG, line); break;
case TK_SUB: ctx->emit_(OP_BINARY_SUB, BC_NOARG, line); break;
case TK_MUL: ctx->emit_(OP_BINARY_MUL, BC_NOARG, line); break;
case TK_DIV: ctx->emit_(OP_BINARY_TRUEDIV, BC_NOARG, line); break;
case TK_FLOORDIV: ctx->emit_(OP_BINARY_FLOORDIV, BC_NOARG, line); break;
case TK_MOD: ctx->emit_(OP_BINARY_MOD, BC_NOARG, line); break;
case TK_POW: ctx->emit_(OP_BINARY_POW, BC_NOARG, line); break;
case TK("<"): ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break;
case TK("<="): ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break;
case TK("=="): ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break;
case TK("!="): ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break;
case TK(">"): ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break;
case TK(">="): ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break;
case TK_LT: ctx->emit_(OP_COMPARE_LT, BC_NOARG, line); break;
case TK_LE: ctx->emit_(OP_COMPARE_LE, BC_NOARG, line); break;
case TK_EQ: ctx->emit_(OP_COMPARE_EQ, BC_NOARG, line); break;
case TK_NE: ctx->emit_(OP_COMPARE_NE, BC_NOARG, line); break;
case TK_GT: ctx->emit_(OP_COMPARE_GT, BC_NOARG, line); break;
case TK_GE: ctx->emit_(OP_COMPARE_GE, BC_NOARG, line); break;
case TK("in"): ctx->emit_(OP_CONTAINS_OP, 0, line); break;
case TK("not in"): ctx->emit_(OP_CONTAINS_OP, 1, line); break;
case TK("is"): ctx->emit_(OP_IS_OP, BC_NOARG, line); break;
case TK("is not"): ctx->emit_(OP_IS_NOT_OP, BC_NOARG, line); break;
case TK_IN: ctx->emit_(OP_CONTAINS_OP, 0, line); break;
case TK_NOT_IN: ctx->emit_(OP_CONTAINS_OP, 1, line); break;
case TK_IS: ctx->emit_(OP_IS_OP, BC_NOARG, line); break;
case TK_IS_NOT: ctx->emit_(OP_IS_NOT_OP, BC_NOARG, line); break;
case TK("<<"): ctx->emit_(OP_BITWISE_LSHIFT, BC_NOARG, line); break;
case TK(">>"): ctx->emit_(OP_BITWISE_RSHIFT, BC_NOARG, line); break;
case TK("&"): ctx->emit_(OP_BITWISE_AND, BC_NOARG, line); break;
case TK("|"): ctx->emit_(OP_BITWISE_OR, BC_NOARG, line); break;
case TK("^"): ctx->emit_(OP_BITWISE_XOR, BC_NOARG, line); break;
case TK_LSHIFT: ctx->emit_(OP_BITWISE_LSHIFT, BC_NOARG, line); break;
case TK_RSHIFT: ctx->emit_(OP_BITWISE_RSHIFT, BC_NOARG, line); break;
case TK_AND: ctx->emit_(OP_BITWISE_AND, BC_NOARG, line); break;
case TK_OR: ctx->emit_(OP_BITWISE_OR, BC_NOARG, line); break;
case TK_XOR: ctx->emit_(OP_BITWISE_XOR, BC_NOARG, line); break;
case TK("@"): ctx->emit_(OP_BINARY_MATMUL, BC_NOARG, line); break;
default: PK_FATAL_ERROR("unknown binary operator: %s\n", TK_STR(op));
case TK_DECORATOR: ctx->emit_(OP_BINARY_MATMUL, BC_NOARG, line); break;
default: PK_FATAL_ERROR("unknown binary operator: %s\n", pk_TokenSymbols[op]);
}
for(int i: jmps)

View File

@ -3,6 +3,30 @@
#include "pocketpy/common/smallmap.h"
#include "pocketpy/compiler/lexer.h"
const char* pk_TokenSymbols[] = {
"@eof", "@eol", "@sof",
"@id", "@num", "@str", "@fstr", "@long", "@bytes", "@imag",
"@indent", "@dedent",
// These 3 are compound keywords which are generated on the fly
"is not", "not in", "yield from",
/*****************************************/
"+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed
"*", "*=", "/", "/=", "//", "//=", "%", "%=",
"&", "&=", "|", "|=", "^", "^=",
"<<", "<<=", ">>", ">>=",
/*****************************************/
"(", ")", "[", "]", "{", "}",
".", "..", "...", ",", ":", ";",
"**", "->", "#", "@",
">", "<", "=", "==", "!=", ">=", "<=", "~",
/** KW_BEGIN **/
// NOTE: These keywords should be sorted in ascending order!!
"False", "None", "True", "and", "as", "assert", "break", "class", "continue",
"def", "del", "elif", "else", "except", "finally", "for", "from", "global",
"if", "import", "in", "is", "lambda", "not", "or", "pass", "raise", "return",
"try", "while", "with", "yield",
};
void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){
self->curr = source;
self->source = source;

View File

@ -62,11 +62,11 @@ bool Lexer::eat_indentation() noexcept{
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
if(spaces > indents.back()) {
indents.push_back(spaces);
nexts.push_back(Token{TK("@indent"), token_start, 0, current_line, brackets_level, {}});
nexts.push_back(Token{TK_INDENT, token_start, 0, current_line, brackets_level, {}});
} else if(spaces < indents.back()) {
while(spaces < indents.back()) {
indents.pop_back();
nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line, brackets_level, {}});
nexts.push_back(Token{TK_DEDENT, token_start, 0, current_line, brackets_level, {}});
}
if(spaces != indents.back()) { return false; }
}
@ -129,29 +129,32 @@ Error* Lexer::eat_name() noexcept{
int length = (int)(curr_char - token_start);
if(length == 0) return SyntaxError("@id contains invalid char");
std::string_view name(token_start, length);
c11_string name = {token_start, length};
if(src->mode == JSON_MODE) {
if(name == "true") {
add_token(TK("True"));
} else if(name == "false") {
add_token(TK("False"));
} else if(name == "null") {
add_token(TK("None"));
if(c11_string__cmp3(name, "true") == 0) {
add_token(TK_TRUE);
} else if(c11_string__cmp3(name, "false") == 0) {
add_token(TK_FALSE);
} else if(c11_string__cmp3(name, "null") == 0) {
add_token(TK_NONE);
} else {
return SyntaxError("invalid JSON token");
}
return NULL;
}
const auto KW_BEGIN = kTokens + TK("False");
const auto KW_END = kTokens + kTokenCount;
const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE;
int KW_COUNT = TK__COUNT__ - TK_FALSE;
#define less(a, b) (c11_string__cmp3(b, a) > 0)
int out;
c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
#undef less
auto it = lower_bound(KW_BEGIN, KW_END, name);
if(it != KW_END && *it == name) {
add_token(it - kTokens);
if(out != KW_COUNT && c11_string__cmp3(name, KW_BEGIN[out]) == 0) {
add_token((TokenIndex)(out + TK_FALSE));
} else {
add_token(TK("@id"));
add_token(TK_ID);
}
return NULL;
}
@ -172,32 +175,33 @@ bool Lexer::matchchar(char c) noexcept{
void Lexer::add_token(TokenIndex type, TokenValue value) noexcept{
switch(type) {
case TK("{"):
case TK("["):
case TK("("): brackets_level++; break;
case TK(")"):
case TK("]"):
case TK("}"): brackets_level--; break;
case TK_LBRACE:
case TK_LBRACKET:
case TK_LPAREN: brackets_level++; break;
case TK_RPAREN:
case TK_RBRACKET:
case TK_RBRACE: brackets_level--; break;
default: break;
}
auto token = Token{type,
token_start,
(int)(curr_char - token_start),
current_line - ((type == TK("@eol")) ? 1 : 0),
current_line - ((type == TK_EOL) ? 1 : 0),
brackets_level,
value};
// handle "not in", "is not", "yield from"
if(!nexts.empty()) {
auto& back = nexts.back();
if(back.type == TK("not") && type == TK("in")) {
back.type = TK("not in");
if(back.type == TK_NOT_KW && type == TK_IN) {
back.type = TK_NOT_IN;
return;
}
if(back.type == TK("is") && type == TK("not")) {
back.type = TK("is not");
if(back.type == TK_IS && type == TK_NOT_KW) {
back.type = TK_IS_NOT;
return;
}
if(back.type == TK("yield") && type == TK("from")) {
back.type = TK("yield from");
if(back.type == TK_YIELD && type == TK_FROM) {
back.type = TK_YIELD_FROM;
return;
}
nexts.push_back(token);
@ -271,11 +275,11 @@ Error* Lexer::eat_string(char quote, StringType type) noexcept{
Error* err = eat_string_until(quote, type == StringType::RAW_STRING, &s);
if(err) return err;
if(type == StringType::F_STRING) {
add_token(TK("@fstr"), s);
add_token(TK_FSTR, s);
}else if(type == StringType::NORMAL_BYTES) {
add_token(TK("@bytes"), s);
add_token(TK_BYTES, s);
}else{
add_token(TK("@str"), s);
add_token(TK_STR, s);
}
return NULL;
}
@ -299,13 +303,13 @@ Error* Lexer::eat_number() noexcept{
if(text[0] != '.' && !is_scientific_notation) {
// try long
if(i[-1] == 'L') {
add_token(TK("@long"));
add_token(TK_LONG);
return NULL;
}
// try integer
i64 int_out;
switch(parse_uint(text, &int_out, -1)) {
case IntParsingResult::Success: add_token(TK("@num"), int_out); return NULL;
case IntParsingResult::Success: add_token(TK_NUM, int_out); return NULL;
case IntParsingResult::Overflow: return SyntaxError("int literal is too large");
case IntParsingResult::Failure: break; // do nothing
}
@ -321,12 +325,12 @@ Error* Lexer::eat_number() noexcept{
}
if(p_end == text.data() + text.size()) {
add_token(TK("@num"), (f64)float_out);
add_token(TK_NUM, (f64)float_out);
return NULL;
}
if(i[-1] == 'j' && p_end == text.data() + text.size() - 1) {
add_token(TK("@imag"), (f64)float_out);
add_token(TK_IMAG, (f64)float_out);
return NULL;
}
@ -346,17 +350,17 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{
return NULL;
}
case '#': skip_line_comment(); break;
case '~': add_token(TK("~")); return NULL;
case '{': add_token(TK("{")); return NULL;
case '}': add_token(TK("}")); return NULL;
case ',': add_token(TK(",")); return NULL;
case ':': add_token(TK(":")); return NULL;
case ';': add_token(TK(";")); return NULL;
case '(': add_token(TK("(")); return NULL;
case ')': add_token(TK(")")); return NULL;
case '[': add_token(TK("[")); return NULL;
case ']': add_token(TK("]")); return NULL;
case '@': add_token(TK("@")); return NULL;
case '~': add_token(TK_INVERT); return NULL;
case '{': add_token(TK_LBRACE); return NULL;
case '}': add_token(TK_RBRACE); return NULL;
case ',': add_token(TK_COMMA); return NULL;
case ':': add_token(TK_COLON); return NULL;
case ';': add_token(TK_SEMICOLON); return NULL;
case '(': add_token(TK_LPAREN); return NULL;
case ')': add_token(TK_RPAREN); return NULL;
case '[': add_token(TK_LBRACKET); return NULL;
case ']': add_token(TK_RBRACKET); return NULL;
case '@': add_token(TK_DECORATOR); return NULL;
case '\\': {
// line continuation character
char c = eatchar_include_newline();
@ -367,16 +371,16 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{
eat_spaces();
return NULL;
}
case '%': add_token_2('=', TK("%"), TK("%=")); return NULL;
case '&': add_token_2('=', TK("&"), TK("&=")); return NULL;
case '|': add_token_2('=', TK("|"), TK("|=")); return NULL;
case '^': add_token_2('=', TK("^"), TK("^=")); return NULL;
case '%': add_token_2('=', TK_MOD, TK_IMOD); return NULL;
case '&': add_token_2('=', TK_AND, TK_IAND); return NULL;
case '|': add_token_2('=', TK_OR, TK_IOR); return NULL;
case '^': add_token_2('=', TK_XOR, TK_IXOR); return NULL;
case '.': {
if(matchchar('.')) {
if(matchchar('.')) {
add_token(TK("..."));
add_token(TK_DOTDOTDOT);
} else {
add_token(TK(".."));
add_token(TK_DOTDOT);
}
} else {
char next_char = peekchar();
@ -384,43 +388,43 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{
Error* err = eat_number();
if(err) return err;
} else {
add_token(TK("."));
add_token(TK_DOT);
}
}
return NULL;
}
case '=': add_token_2('=', TK("="), TK("==")); return NULL;
case '+': add_token_2('=', TK("+"), TK("+=")); return NULL;
case '=': add_token_2('=', TK_ASSIGN, TK_EQ); return NULL;
case '+': add_token_2('=', TK_ADD, TK_IADD); return NULL;
case '>': {
if(matchchar('='))
add_token(TK(">="));
add_token(TK_GE);
else if(matchchar('>'))
add_token_2('=', TK(">>"), TK(">>="));
add_token_2('=', TK_RSHIFT, TK_IRSHIFT);
else
add_token(TK(">"));
add_token(TK_GT);
return NULL;
}
case '<': {
if(matchchar('='))
add_token(TK("<="));
add_token(TK_LE);
else if(matchchar('<'))
add_token_2('=', TK("<<"), TK("<<="));
add_token_2('=', TK_LSHIFT, TK_ILSHIFT);
else
add_token(TK("<"));
add_token(TK_LT);
return NULL;
}
case '-': {
if(matchchar('='))
add_token(TK("-="));
add_token(TK_ISUB);
else if(matchchar('>'))
add_token(TK("->"));
add_token(TK_ARROW);
else
add_token(TK("-"));
add_token(TK_SUB);
return NULL;
}
case '!':
if(matchchar('=')){
add_token(TK("!="));
add_token(TK_NE);
}else{
Error* err = SyntaxError("expected '=' after '!'");
if(err) return err;
@ -428,22 +432,22 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{
break;
case '*':
if(matchchar('*')) {
add_token(TK("**")); // '**'
add_token(TK_POW); // '**'
} else {
add_token_2('=', TK("*"), TK("*="));
add_token_2('=', TK_MUL, TK_IMUL);
}
return NULL;
case '/':
if(matchchar('/')) {
add_token_2('=', TK("//"), TK("//="));
add_token_2('=', TK_FLOORDIV, TK_IFLOORDIV);
} else {
add_token_2('=', TK("/"), TK("/="));
add_token_2('=', TK_DIV, TK_IDIV);
}
return NULL;
case ' ':
case '\t': eat_spaces(); break;
case '\n': {
add_token(TK("@eol"));
add_token(TK_EOL);
if(!eat_indentation()){
return IndentationError("unindent does not match any outer indentation level");
}
@ -469,10 +473,10 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{
token_start = curr_char;
while(indents.size() > 1) {
indents.pop_back();
add_token(TK("@dedent"));
add_token(TK_DEDENT);
return NULL;
}
add_token(TK("@eof"));
add_token(TK_EOF);
*eof = true;
return NULL;
}
@ -496,7 +500,7 @@ Error* Lexer::_error(bool lexer_err, const char* type, const char* msg, va_list*
if(args){
vsnprintf(err->msg, sizeof(err->msg), msg, *args);
}else{
std::strncpy(err->msg, msg, sizeof(err->msg));
strncpy(err->msg, msg, sizeof(err->msg));
}
err->userdata = userdata;
return err;
@ -517,7 +521,7 @@ Error* Lexer::run() noexcept{
return from_precompiled();
}
// push initial tokens
this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line, brackets_level, {}});
this->nexts.push_back(Token{TK_SOF, token_start, 0, current_line, brackets_level, {}});
this->indents.push_back(0);
bool eof = false;
@ -554,7 +558,7 @@ Error* Lexer::from_precompiled() noexcept{
count = pkpy_TokenDeserializer__read_count(&deserializer);
for(int i = 0; i < count; i++) {
Token t;
t.type = (unsigned char)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
t.type = (TokenIndex)pkpy_TokenDeserializer__read_uint(&deserializer, ',');
if(is_raw_string_used(t.type)) {
i64 index = pkpy_TokenDeserializer__read_uint(&deserializer, ',');
pkpy_Str* p = c11__at(pkpy_Str, precompiled_tokens, index);

View File

@ -201,6 +201,7 @@ struct Random {
f64 key = self.gen.uniform(0.0, cum_weights[size - 1]);
int index;
c11__lower_bound(f64, cum_weights.begin(), cum_weights.size(), key, c11__less, &index);
assert(index != cum_weights.size());
result[i] = data[index];
}
return VAR(std::move(result));