mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 03:20:18 +00:00
update lexer
This commit is contained in:
parent
120773891a
commit
e78aa44895
@ -6,8 +6,8 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f:
|
||||
OPCODES_TEXT = f.read()
|
||||
|
||||
pipeline = [
|
||||
["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"],
|
||||
["obj.h", "parser.h", "codeobject.h", "frame.h"],
|
||||
["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
|
||||
["obj.h", "codeobject.h", "frame.h"],
|
||||
["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"],
|
||||
["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]
|
||||
]
|
||||
|
@ -7,7 +7,7 @@ namespace pkpy{
|
||||
|
||||
inline PyObject* VM::run_frame(Frame* frame){
|
||||
while(frame->has_next_bytecode()){
|
||||
// heap._auto_collect(this);
|
||||
heap._auto_collect(this);
|
||||
|
||||
const Bytecode& byte = frame->next_bytecode();
|
||||
switch (byte.op)
|
||||
@ -325,7 +325,7 @@ inline PyObject* VM::run_frame(Frame* frame){
|
||||
if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE");
|
||||
return frame->pop_value(this);
|
||||
}
|
||||
#if PK_EXTRA_CHECK
|
||||
#if DEBUG_EXTRA_CHECK
|
||||
if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE");
|
||||
#endif
|
||||
return None;
|
||||
|
34
src/common.h
34
src/common.h
@ -10,7 +10,6 @@
|
||||
#include <sstream>
|
||||
#include <regex>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@ -26,10 +25,13 @@
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include <initializer_list>
|
||||
#include <list>
|
||||
#include <variant>
|
||||
|
||||
#define PK_VERSION "0.9.5"
|
||||
#define PK_EXTRA_CHECK 0
|
||||
#define PK_VERSION "0.9.6"
|
||||
|
||||
// debug macros
|
||||
#define DEBUG_NO_BUILTIN_MODULES 0
|
||||
#define DEBUG_EXTRA_CHECK 1
|
||||
|
||||
#if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__)
|
||||
#define PK_ENABLE_FILEIO 0
|
||||
@ -40,13 +42,13 @@
|
||||
#if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__)
|
||||
typedef int32_t i64;
|
||||
typedef float f64;
|
||||
#define S_TO_INT std::stoi
|
||||
#define S_TO_FLOAT std::stof
|
||||
#define S_TO_INT(...) static_cast<i64>(std::stoi(__VA_ARGS__))
|
||||
#define S_TO_FLOAT(...) static_cast<f64>(std::stof(__VA_ARGS__))
|
||||
#else
|
||||
typedef int64_t i64;
|
||||
typedef double f64;
|
||||
#define S_TO_INT std::stoll
|
||||
#define S_TO_FLOAT std::stod
|
||||
#define S_TO_INT(...) static_cast<i64>(std::stoll(__VA_ARGS__))
|
||||
#define S_TO_FLOAT(...) static_cast<f64>(std::stod(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
namespace pkpy{
|
||||
@ -100,22 +102,6 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
|
||||
return is_int(a) && is_int(b);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
class queue{
|
||||
std::list<T> list;
|
||||
public:
|
||||
void push(const T& t){ list.push_back(t); }
|
||||
void push(T&& t){ list.push_back(std::move(t)); }
|
||||
void pop(){ list.pop_front(); }
|
||||
void clear(){ list.clear(); }
|
||||
bool empty() const { return list.empty(); }
|
||||
size_t size() const { return list.size(); }
|
||||
T& front(){ return list.front(); }
|
||||
const T& front() const { return list.front(); }
|
||||
const std::list<T>& data() const { return list; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class stack{
|
||||
std::vector<T> vec;
|
||||
|
378
src/compiler.h
378
src/compiler.h
@ -2,7 +2,7 @@
|
||||
|
||||
#include "codeobject.h"
|
||||
#include "common.h"
|
||||
#include "parser.h"
|
||||
#include "lexer.h"
|
||||
#include "error.h"
|
||||
#include "ceval.h"
|
||||
|
||||
@ -18,24 +18,21 @@ struct GrammarRule{
|
||||
Precedence precedence;
|
||||
};
|
||||
|
||||
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
|
||||
|
||||
class Compiler {
|
||||
std::unique_ptr<Parser> parser;
|
||||
std::unique_ptr<Lexer> lexer;
|
||||
stack<CodeObject_> codes;
|
||||
int lexing_count = 0;
|
||||
bool used = false;
|
||||
VM* vm;
|
||||
std::map<TokenIndex, GrammarRule> rules;
|
||||
|
||||
CodeObject_ co() const{ return codes.top(); }
|
||||
CompileMode mode() const{ return parser->src->mode; }
|
||||
CompileMode mode() const{ return lexer->src->mode; }
|
||||
NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
|
||||
|
||||
public:
|
||||
Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
|
||||
this->vm = vm;
|
||||
this->parser = std::make_unique<Parser>(
|
||||
this->lexer = std::make_unique<Lexer>(
|
||||
make_sp<SourceData>(source, filename, mode)
|
||||
);
|
||||
|
||||
@ -104,239 +101,36 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
Str eat_string_until(char quote, bool raw) {
|
||||
bool quote3 = parser->match_n_chars(2, quote);
|
||||
std::vector<char> buff;
|
||||
while (true) {
|
||||
char c = parser->eatchar_include_newline();
|
||||
if (c == quote){
|
||||
if(quote3 && !parser->match_n_chars(2, quote)){
|
||||
buff.push_back(c);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (c == '\0'){
|
||||
if(quote3 && parser->src->mode == REPL_MODE){
|
||||
throw NeedMoreLines(false);
|
||||
}
|
||||
SyntaxError("EOL while scanning string literal");
|
||||
}
|
||||
if (c == '\n'){
|
||||
if(!quote3) SyntaxError("EOL while scanning string literal");
|
||||
else{
|
||||
buff.push_back(c);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!raw && c == '\\') {
|
||||
switch (parser->eatchar_include_newline()) {
|
||||
case '"': buff.push_back('"'); break;
|
||||
case '\'': buff.push_back('\''); break;
|
||||
case '\\': buff.push_back('\\'); break;
|
||||
case 'n': buff.push_back('\n'); break;
|
||||
case 'r': buff.push_back('\r'); break;
|
||||
case 't': buff.push_back('\t'); break;
|
||||
default: SyntaxError("invalid escape char");
|
||||
}
|
||||
} else {
|
||||
buff.push_back(c);
|
||||
}
|
||||
}
|
||||
return Str(buff.data(), buff.size());
|
||||
}
|
||||
int i = 0;
|
||||
std::vector<Token> tokens;
|
||||
|
||||
void eat_string(char quote, StringType type) {
|
||||
Str s = eat_string_until(quote, type == RAW_STRING);
|
||||
if(type == F_STRING){
|
||||
parser->set_next_token(TK("@fstr"), VAR(s));
|
||||
}else{
|
||||
parser->set_next_token(TK("@str"), VAR(s));
|
||||
}
|
||||
}
|
||||
|
||||
void eat_number() {
|
||||
static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
|
||||
std::smatch m;
|
||||
|
||||
const char* i = parser->token_start;
|
||||
while(*i != '\n' && *i != '\0') i++;
|
||||
std::string s = std::string(parser->token_start, i);
|
||||
|
||||
try{
|
||||
if (std::regex_search(s, m, pattern)) {
|
||||
// here is m.length()-1, since the first char was eaten by lex_token()
|
||||
for(int j=0; j<m.length()-1; j++) parser->eatchar();
|
||||
|
||||
int base = 10;
|
||||
size_t size;
|
||||
if (m[1].matched) base = 16;
|
||||
if (m[2].matched) {
|
||||
if(base == 16) SyntaxError("hex literal should not contain a dot");
|
||||
parser->set_next_token(TK("@num"), VAR(S_TO_FLOAT(m[0], &size)));
|
||||
} else {
|
||||
parser->set_next_token(TK("@num"), VAR(S_TO_INT(m[0], &size, base)));
|
||||
}
|
||||
if (size != m.length()) UNREACHABLE();
|
||||
}
|
||||
}catch(std::exception& _){
|
||||
SyntaxError("invalid number literal");
|
||||
}
|
||||
}
|
||||
|
||||
void lex_token(){
|
||||
lexing_count++;
|
||||
_lex_token();
|
||||
lexing_count--;
|
||||
}
|
||||
|
||||
// Lex the next token and set it as the next token.
|
||||
void _lex_token() {
|
||||
parser->prev = parser->curr;
|
||||
parser->curr = parser->next_token();
|
||||
//std::cout << parser->curr.info() << std::endl;
|
||||
|
||||
while (parser->peekchar() != '\0') {
|
||||
parser->token_start = parser->curr_char;
|
||||
char c = parser->eatchar_include_newline();
|
||||
switch (c) {
|
||||
case '\'': case '"': eat_string(c, NORMAL_STRING); return;
|
||||
case '#': parser->skip_line_comment(); break;
|
||||
case '{': parser->set_next_token(TK("{")); return;
|
||||
case '}': parser->set_next_token(TK("}")); return;
|
||||
case ',': parser->set_next_token(TK(",")); return;
|
||||
case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return;
|
||||
case ';': parser->set_next_token(TK(";")); return;
|
||||
case '(': parser->set_next_token(TK("(")); return;
|
||||
case ')': parser->set_next_token(TK(")")); return;
|
||||
case '[': parser->set_next_token(TK("[")); return;
|
||||
case ']': parser->set_next_token(TK("]")); return;
|
||||
case '@': parser->set_next_token(TK("@")); return;
|
||||
case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
|
||||
case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
|
||||
case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
|
||||
case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
|
||||
case '?': parser->set_next_token(TK("?")); return;
|
||||
case '.': {
|
||||
if(parser->matchchar('.')) {
|
||||
if(parser->matchchar('.')) {
|
||||
parser->set_next_token(TK("..."));
|
||||
} else {
|
||||
SyntaxError("invalid token '..'");
|
||||
}
|
||||
} else {
|
||||
parser->set_next_token(TK("."));
|
||||
}
|
||||
return;
|
||||
}
|
||||
case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
|
||||
case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
|
||||
case '>': {
|
||||
if(parser->matchchar('=')) parser->set_next_token(TK(">="));
|
||||
else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>="));
|
||||
else parser->set_next_token(TK(">"));
|
||||
return;
|
||||
}
|
||||
case '<': {
|
||||
if(parser->matchchar('=')) parser->set_next_token(TK("<="));
|
||||
else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<="));
|
||||
else parser->set_next_token(TK("<"));
|
||||
return;
|
||||
}
|
||||
case '-': {
|
||||
if(parser->matchchar('=')) parser->set_next_token(TK("-="));
|
||||
else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
|
||||
else parser->set_next_token(TK("-"));
|
||||
return;
|
||||
}
|
||||
case '!':
|
||||
if(parser->matchchar('=')) parser->set_next_token(TK("!="));
|
||||
else SyntaxError("expected '=' after '!'");
|
||||
break;
|
||||
case '*':
|
||||
if (parser->matchchar('*')) {
|
||||
parser->set_next_token(TK("**")); // '**'
|
||||
} else {
|
||||
parser->set_next_token_2('=', TK("*"), TK("*="));
|
||||
}
|
||||
return;
|
||||
case '/':
|
||||
if(parser->matchchar('/')) {
|
||||
parser->set_next_token_2('=', TK("//"), TK("//="));
|
||||
} else {
|
||||
parser->set_next_token_2('=', TK("/"), TK("/="));
|
||||
}
|
||||
return;
|
||||
case '\r': break; // just ignore '\r'
|
||||
case ' ': case '\t': parser->eat_spaces(); break;
|
||||
case '\n': {
|
||||
parser->set_next_token(TK("@eol"));
|
||||
if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level");
|
||||
return;
|
||||
}
|
||||
default: {
|
||||
if(c == 'f'){
|
||||
if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;}
|
||||
if(parser->matchchar('"')) {eat_string('"', F_STRING); return;}
|
||||
}else if(c == 'r'){
|
||||
if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;}
|
||||
if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;}
|
||||
}
|
||||
|
||||
if (c >= '0' && c <= '9') {
|
||||
eat_number();
|
||||
return;
|
||||
}
|
||||
|
||||
switch (parser->eat_name())
|
||||
{
|
||||
case 0: break;
|
||||
case 1: SyntaxError("invalid char: " + std::string(1, c));
|
||||
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
|
||||
case 3: SyntaxError("@id contains invalid char"); break;
|
||||
case 4: SyntaxError("invalid JSON token"); break;
|
||||
default: UNREACHABLE();
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parser->token_start = parser->curr_char;
|
||||
parser->set_next_token(TK("@eof"));
|
||||
}
|
||||
|
||||
TokenIndex peek() {
|
||||
return parser->curr.type;
|
||||
}
|
||||
|
||||
// not sure this will work
|
||||
TokenIndex peek_next() {
|
||||
if(parser->nexts.empty()) return TK("@eof");
|
||||
return parser->nexts.front().type;
|
||||
}
|
||||
const Token& prev() { return tokens.at(i-1); }
|
||||
const Token& curr() { return tokens.at(i); }
|
||||
const Token& next() { return tokens.at(i+1); }
|
||||
const Token& peek(int offset=0) { return tokens.at(i+offset); }
|
||||
void advance() { i++; }
|
||||
|
||||
bool match(TokenIndex expected) {
|
||||
if (peek() != expected) return false;
|
||||
lex_token();
|
||||
if (curr().type != expected) return false;
|
||||
advance();
|
||||
return true;
|
||||
}
|
||||
|
||||
void consume(TokenIndex expected) {
|
||||
if (!match(expected)){
|
||||
StrStream ss;
|
||||
ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
|
||||
ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
|
||||
SyntaxError(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
bool match_newlines(bool repl_throw=false) {
|
||||
bool consumed = false;
|
||||
if (peek() == TK("@eol")) {
|
||||
while (peek() == TK("@eol")) lex_token();
|
||||
if (curr().type == TK("@eol")) {
|
||||
while (curr().type == TK("@eol")) advance();
|
||||
consumed = true;
|
||||
}
|
||||
if (repl_throw && peek() == TK("@eof")){
|
||||
if (repl_throw && curr().type == TK("@eof")){
|
||||
throw NeedMoreLines(co()->_is_compiling_class);
|
||||
}
|
||||
return consumed;
|
||||
@ -344,8 +138,8 @@ private:
|
||||
|
||||
bool match_end_stmt() {
|
||||
if (match(TK(";"))) { match_newlines(); return true; }
|
||||
if (match_newlines() || peek()==TK("@eof")) return true;
|
||||
if (peek() == TK("@dedent")) return true;
|
||||
if (match_newlines() || curr().type == TK("@eof")) return true;
|
||||
if (curr().type == TK("@dedent")) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -353,15 +147,27 @@ private:
|
||||
if (!match_end_stmt()) SyntaxError("expected statement end");
|
||||
}
|
||||
|
||||
PyObject* get_value(const Token& token) {
|
||||
switch (token.type) {
|
||||
case TK("@num"):
|
||||
if(std::holds_alternative<i64>(token.value)) return VAR(std::get<i64>(token.value));
|
||||
if(std::holds_alternative<f64>(token.value)) return VAR(std::get<f64>(token.value));
|
||||
UNREACHABLE();
|
||||
case TK("@str"): case TK("@fstr"):
|
||||
return VAR(std::get<Str>(token.value));
|
||||
default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type));
|
||||
}
|
||||
}
|
||||
|
||||
void exprLiteral() {
|
||||
PyObject* value = parser->prev.value;
|
||||
PyObject* value = get_value(prev());
|
||||
int index = co()->add_const(value);
|
||||
emit(OP_LOAD_CONST, index);
|
||||
}
|
||||
|
||||
void exprFString() {
|
||||
static const std::regex pattern(R"(\{(.*?)\})");
|
||||
PyObject* value = parser->prev.value;
|
||||
PyObject* value = get_value(prev());
|
||||
Str s = CAST(Str, value);
|
||||
std::sregex_iterator begin(s.begin(), s.end(), pattern);
|
||||
std::sregex_iterator end;
|
||||
@ -395,7 +201,7 @@ private:
|
||||
_compile_f_args(func, false);
|
||||
consume(TK(":"));
|
||||
}
|
||||
func.code = make_sp<CodeObject>(parser->src, func.name.str());
|
||||
func.code = make_sp<CodeObject>(lexer->src, func.name.str());
|
||||
this->codes.push(func.code);
|
||||
co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
|
||||
emit(OP_RETURN_VALUE);
|
||||
@ -414,7 +220,7 @@ private:
|
||||
if(is_load_name_ref) co()->codes.pop_back();
|
||||
|
||||
co()->_rvalue += 1;
|
||||
TokenIndex op = parser->prev.type;
|
||||
TokenIndex op = prev().type;
|
||||
if(op == TK("=")) { // a = (expr)
|
||||
EXPR_TUPLE();
|
||||
if(is_load_name_ref){
|
||||
@ -487,7 +293,7 @@ private:
|
||||
}
|
||||
|
||||
void exprBinaryOp() {
|
||||
TokenIndex op = parser->prev.type;
|
||||
TokenIndex op = prev().type;
|
||||
parse_expression((Precedence)(rules[op].precedence + 1));
|
||||
|
||||
switch (op) {
|
||||
@ -525,7 +331,7 @@ private:
|
||||
}
|
||||
|
||||
void exprUnaryOp() {
|
||||
TokenIndex op = parser->prev.type;
|
||||
TokenIndex op = prev().type;
|
||||
parse_expression((Precedence)(PREC_UNARY + 1));
|
||||
switch (op) {
|
||||
case TK("-"): emit(OP_UNARY_NEGATIVE); break;
|
||||
@ -588,7 +394,7 @@ private:
|
||||
int ARGC = 0;
|
||||
do {
|
||||
match_newlines(mode()==REPL_MODE);
|
||||
if (peek() == TK("]")) break;
|
||||
if (curr().type == TK("]")) break;
|
||||
EXPR(); ARGC++;
|
||||
match_newlines(mode()==REPL_MODE);
|
||||
if(ARGC == 1 && match(TK("for"))){
|
||||
@ -609,9 +415,9 @@ private:
|
||||
int ARGC = 0;
|
||||
do {
|
||||
match_newlines(mode()==REPL_MODE);
|
||||
if (peek() == TK("}")) break;
|
||||
if (curr().type == TK("}")) break;
|
||||
EXPR();
|
||||
if(peek() == TK(":")) parsing_dict = true;
|
||||
if(curr().type == TK(":")) parsing_dict = true;
|
||||
if(parsing_dict){
|
||||
consume(TK(":"));
|
||||
EXPR();
|
||||
@ -637,10 +443,10 @@ private:
|
||||
bool need_unpack = false;
|
||||
do {
|
||||
match_newlines(mode()==REPL_MODE);
|
||||
if (peek() == TK(")")) break;
|
||||
if(peek() == TK("@id") && peek_next() == TK("=")) {
|
||||
if (curr().type == TK(")")) break;
|
||||
if(curr().type == TK("@id") && next().type == TK("=")) {
|
||||
consume(TK("@id"));
|
||||
const Str& key = parser->prev.str();
|
||||
const Str& key = prev().str();
|
||||
emit(OP_LOAD_CONST, co()->add_const(VAR(key)));
|
||||
consume(TK("="));
|
||||
co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
|
||||
@ -666,7 +472,7 @@ private:
|
||||
void exprName(){ _exprName(false); }
|
||||
|
||||
void _exprName(bool force_lvalue) {
|
||||
Token tkname = parser->prev;
|
||||
const Token& tkname = prev();
|
||||
int index = co()->add_name(tkname.str(), name_scope());
|
||||
bool fast_load = !force_lvalue && co()->_rvalue>0;
|
||||
emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
|
||||
@ -674,7 +480,7 @@ private:
|
||||
|
||||
void exprAttrib() {
|
||||
consume(TK("@id"));
|
||||
const Str& name = parser->prev.str();
|
||||
const Str& name = prev().str();
|
||||
int index = co()->add_name(name, NAME_ATTR);
|
||||
emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index);
|
||||
}
|
||||
@ -710,7 +516,7 @@ private:
|
||||
}
|
||||
|
||||
void exprValue() {
|
||||
TokenIndex op = parser->prev.type;
|
||||
TokenIndex op = prev().type;
|
||||
switch (op) {
|
||||
case TK("None"): emit(OP_LOAD_NONE); break;
|
||||
case TK("True"): emit(OP_LOAD_TRUE); break;
|
||||
@ -721,7 +527,7 @@ private:
|
||||
}
|
||||
|
||||
int emit(Opcode opcode, int arg=-1, bool keepline=false) {
|
||||
int line = parser->prev.line;
|
||||
int line = prev().line;
|
||||
co()->codes.push_back(
|
||||
Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line}
|
||||
);
|
||||
@ -738,7 +544,7 @@ private:
|
||||
void compile_block_body(CompilerAction action=nullptr) {
|
||||
if(action == nullptr) action = &Compiler::compile_stmt;
|
||||
consume(TK(":"));
|
||||
if(peek()!=TK("@eol") && peek()!=TK("@eof")){
|
||||
if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
|
||||
(this->*action)(); // inline block
|
||||
return;
|
||||
}
|
||||
@ -746,7 +552,7 @@ private:
|
||||
SyntaxError("expected a new line after ':'");
|
||||
}
|
||||
consume(TK("@indent"));
|
||||
while (peek() != TK("@dedent")) {
|
||||
while (curr().type != TK("@dedent")) {
|
||||
match_newlines();
|
||||
(this->*action)();
|
||||
match_newlines();
|
||||
@ -756,7 +562,7 @@ private:
|
||||
|
||||
Token _compile_import() {
|
||||
consume(TK("@id"));
|
||||
Token tkmodule = parser->prev;
|
||||
Token tkmodule = prev();
|
||||
int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
|
||||
emit(OP_IMPORT_NAME, index);
|
||||
return tkmodule;
|
||||
@ -768,7 +574,7 @@ private:
|
||||
Token tkmodule = _compile_import();
|
||||
if (match(TK("as"))) {
|
||||
consume(TK("@id"));
|
||||
tkmodule = parser->prev;
|
||||
tkmodule = prev();
|
||||
}
|
||||
int index = co()->add_name(tkmodule.str(), name_scope());
|
||||
emit(OP_STORE_NAME, index);
|
||||
@ -789,12 +595,12 @@ private:
|
||||
do {
|
||||
emit(OP_DUP_TOP_VALUE);
|
||||
consume(TK("@id"));
|
||||
Token tkname = parser->prev;
|
||||
Token tkname = prev();
|
||||
int index = co()->add_name(tkname.str(), NAME_ATTR);
|
||||
emit(OP_BUILD_ATTR, index);
|
||||
if (match(TK("as"))) {
|
||||
consume(TK("@id"));
|
||||
tkname = parser->prev;
|
||||
tkname = prev();
|
||||
}
|
||||
index = co()->add_name(tkname.str(), name_scope());
|
||||
emit(OP_STORE_NAME, index);
|
||||
@ -807,14 +613,14 @@ private:
|
||||
// ['a', '1', '2', '+', '=']
|
||||
//
|
||||
void parse_expression(Precedence precedence) {
|
||||
lex_token();
|
||||
GrammarFn prefix = rules[parser->prev.type].prefix;
|
||||
if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type));
|
||||
advance();
|
||||
GrammarFn prefix = rules[prev().type].prefix;
|
||||
if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
|
||||
(this->*prefix)();
|
||||
bool meet_assign_token = false;
|
||||
while (rules[peek()].precedence >= precedence) {
|
||||
lex_token();
|
||||
TokenIndex op = parser->prev.type;
|
||||
while (rules[curr().type].precedence >= precedence) {
|
||||
advance();
|
||||
TokenIndex op = prev().type;
|
||||
if (op == TK("=")){
|
||||
if(meet_assign_token) SyntaxError();
|
||||
meet_assign_token = true;
|
||||
@ -891,7 +697,7 @@ private:
|
||||
do {
|
||||
consume(TK("except"));
|
||||
if(match(TK("@id"))){
|
||||
int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
|
||||
int name_idx = co()->add_name(prev().str(), NAME_SPECIAL);
|
||||
emit(OP_EXCEPTION_MATCH, name_idx);
|
||||
}else{
|
||||
emit(OP_LOAD_TRUE);
|
||||
@ -901,7 +707,7 @@ private:
|
||||
compile_block_body();
|
||||
patches.push_back(emit(OP_JUMP_ABSOLUTE));
|
||||
patch_jump(patch);
|
||||
}while(peek() == TK("except"));
|
||||
}while(curr().type == TK("except"));
|
||||
emit(OP_RE_RAISE); // no match, re-raise
|
||||
for (int patch : patches) patch_jump(patch);
|
||||
}
|
||||
@ -968,7 +774,7 @@ private:
|
||||
EXPR();
|
||||
consume(TK("as"));
|
||||
consume(TK("@id"));
|
||||
Token tkname = parser->prev;
|
||||
Token tkname = prev();
|
||||
int index = co()->add_name(tkname.str(), name_scope());
|
||||
emit(OP_STORE_NAME, index);
|
||||
emit(OP_LOAD_NAME_REF, index);
|
||||
@ -979,18 +785,18 @@ private:
|
||||
} else if(match(TK("label"))){
|
||||
if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
|
||||
consume(TK(".")); consume(TK("@id"));
|
||||
Str label = parser->prev.str();
|
||||
Str label = prev().str();
|
||||
bool ok = co()->add_label(label);
|
||||
if(!ok) SyntaxError("label '" + label + "' already exists");
|
||||
consume_end_stmt();
|
||||
} else if(match(TK("goto"))){ // https://entrian.com/goto/
|
||||
if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
|
||||
consume(TK(".")); consume(TK("@id"));
|
||||
emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
|
||||
emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL));
|
||||
consume_end_stmt();
|
||||
} else if(match(TK("raise"))){
|
||||
consume(TK("@id"));
|
||||
int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
|
||||
int dummy_t = co()->add_name(prev().str(), NAME_SPECIAL);
|
||||
if(match(TK("(")) && !match(TK(")"))){
|
||||
EXPR(); consume(TK(")"));
|
||||
}else{
|
||||
@ -1005,7 +811,7 @@ private:
|
||||
} else if(match(TK("global"))){
|
||||
do {
|
||||
consume(TK("@id"));
|
||||
co()->global_names[parser->prev.str()] = 1;
|
||||
co()->global_names[prev().str()] = 1;
|
||||
} while (match(TK(",")));
|
||||
consume_end_stmt();
|
||||
} else if(match(TK("pass"))){
|
||||
@ -1030,10 +836,10 @@ private:
|
||||
|
||||
void compile_class(){
|
||||
consume(TK("@id"));
|
||||
int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
|
||||
int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
|
||||
int super_cls_name_idx = -1;
|
||||
if(match(TK("(")) && match(TK("@id"))){
|
||||
super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
|
||||
super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
|
||||
consume(TK(")"));
|
||||
}
|
||||
if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
|
||||
@ -1059,13 +865,13 @@ private:
|
||||
}
|
||||
|
||||
consume(TK("@id"));
|
||||
const Str& name = parser->prev.str();
|
||||
const Str& name = prev().str();
|
||||
if(func.has_name(name)) SyntaxError("duplicate argument name");
|
||||
|
||||
// eat type hints
|
||||
if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
|
||||
|
||||
if(state == 0 && peek() == TK("=")) state = 2;
|
||||
if(state == 0 && curr().type == TK("=")) state = 2;
|
||||
|
||||
switch (state)
|
||||
{
|
||||
@ -1075,7 +881,7 @@ private:
|
||||
consume(TK("="));
|
||||
PyObject* value = read_literal();
|
||||
if(value == nullptr){
|
||||
SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type));
|
||||
SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
|
||||
}
|
||||
func.kwargs.set(name, value);
|
||||
func.kwargs_order.push_back(name);
|
||||
@ -1090,11 +896,11 @@ private:
|
||||
Function func;
|
||||
StrName obj_name;
|
||||
consume(TK("@id"));
|
||||
func.name = parser->prev.str();
|
||||
func.name = prev().str();
|
||||
if(!co()->_is_compiling_class && match(TK("::"))){
|
||||
consume(TK("@id"));
|
||||
obj_name = func.name;
|
||||
func.name = parser->prev.str();
|
||||
func.name = prev().str();
|
||||
}
|
||||
consume(TK("("));
|
||||
if (!match(TK(")"))) {
|
||||
@ -1104,7 +910,7 @@ private:
|
||||
if(match(TK("->"))){
|
||||
if(!match(TK("None"))) consume(TK("@id"));
|
||||
}
|
||||
func.code = make_sp<CodeObject>(parser->src, func.name.str());
|
||||
func.code = make_sp<CodeObject>(lexer->src, func.name.str());
|
||||
this->codes.push(func.code);
|
||||
compile_block_body();
|
||||
func.code->optimize(vm);
|
||||
@ -1132,11 +938,11 @@ private:
|
||||
PyObject* read_literal(){
|
||||
if(match(TK("-"))){
|
||||
consume(TK("@num"));
|
||||
PyObject* val = parser->prev.value;
|
||||
PyObject* val = get_value(prev());
|
||||
return vm->num_negated(val);
|
||||
}
|
||||
if(match(TK("@num"))) return parser->prev.value;
|
||||
if(match(TK("@str"))) return parser->prev.value;
|
||||
if(match(TK("@num"))) return get_value(prev());
|
||||
if(match(TK("@str"))) return get_value(prev());
|
||||
if(match(TK("True"))) return VAR(true);
|
||||
if(match(TK("False"))) return VAR(false);
|
||||
if(match(TK("None"))) return vm->None;
|
||||
@ -1144,23 +950,8 @@ private:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/***** Error Reporter *****/
|
||||
void throw_err(Str type, Str msg){
|
||||
int lineno = parser->curr.line;
|
||||
const char* cursor = parser->curr.start;
|
||||
// if error occurs in lexing, lineno should be `parser->current_line`
|
||||
if(lexing_count > 0){
|
||||
lineno = parser->current_line;
|
||||
cursor = parser->curr_char;
|
||||
}
|
||||
if(parser->peekchar() == '\n') lineno--;
|
||||
auto e = Exception("SyntaxError", msg);
|
||||
e.st_push(parser->src->snapshot(lineno, cursor));
|
||||
throw e;
|
||||
}
|
||||
void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
|
||||
void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
|
||||
void IndentationError(Str msg){ throw_err("IndentationError", msg); }
|
||||
void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); }
|
||||
void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); }
|
||||
|
||||
public:
|
||||
CodeObject_ compile(){
|
||||
@ -1168,11 +959,16 @@ public:
|
||||
if(used) UNREACHABLE();
|
||||
used = true;
|
||||
|
||||
CodeObject_ code = make_sp<CodeObject>(parser->src, Str("<module>"));
|
||||
tokens = lexer->run();
|
||||
// if(lexer->src->filename == "tests/01_int.py"){
|
||||
// for(auto& t: tokens) std::cout << t.info() << std::endl;
|
||||
// }
|
||||
|
||||
CodeObject_ code = make_sp<CodeObject>(lexer->src, lexer->src->filename);
|
||||
codes.push(code);
|
||||
|
||||
lex_token(); lex_token();
|
||||
match_newlines();
|
||||
advance(); // skip @sof, so prev() is always valid
|
||||
match_newlines(); // skip leading '\n'
|
||||
|
||||
if(mode()==EVAL_MODE) {
|
||||
EXPR_TUPLE();
|
||||
|
108
src/expr.h
Normal file
108
src/expr.h
Normal file
@ -0,0 +1,108 @@
|
||||
#pragma once
|
||||
|
||||
#include "codeobject.h"
|
||||
#include "common.h"
|
||||
#include "parser.h"
|
||||
#include "error.h"
|
||||
#include "ceval.h"
|
||||
#include <memory>
|
||||
|
||||
namespace pkpy{
|
||||
|
||||
struct Expression;
|
||||
typedef std::unique_ptr<Expression> Expression_;
|
||||
|
||||
struct Expression{
|
||||
std::vector<Expression_> children;
|
||||
virtual Str to_string() const = 0;
|
||||
};
|
||||
|
||||
struct NameExpr: Expression{
|
||||
Str name;
|
||||
NameScope scope;
|
||||
NameExpr(Str name, NameScope scope): name(name), scope(scope) {}
|
||||
Str to_string() const override { return name; }
|
||||
};
|
||||
|
||||
struct GroupExpr: Expression{
|
||||
Expression_ expr;
|
||||
GroupExpr(Expression_ expr): expr(std::move(expr)) {}
|
||||
Str to_string() const override { return "()"; }
|
||||
};
|
||||
|
||||
struct UnaryExpr: Expression{
|
||||
TokenIndex op;
|
||||
UnaryExpr(TokenIndex op): op(op) {}
|
||||
Str to_string() const override { return TK_STR(op); }
|
||||
};
|
||||
|
||||
struct NotExpr: Expression{
|
||||
Str to_string() const override { return "not"; }
|
||||
};
|
||||
|
||||
struct AndExpr: Expression{
|
||||
Str to_string() const override { return "and"; }
|
||||
};
|
||||
|
||||
struct OrExpr: Expression{
|
||||
Str to_string() const override { return "or"; }
|
||||
};
|
||||
|
||||
// None, True, False, ...
|
||||
struct SpecialValueExpr: Expression{
|
||||
TokenIndex token;
|
||||
SpecialValueExpr(TokenIndex token): token(token) {}
|
||||
Str to_string() const override { return TK_STR(token); }
|
||||
};
|
||||
|
||||
// @num, @str which needs to invoke OP_LOAD_CONST
|
||||
struct LiteralExpr: Expression{
|
||||
PyObject* value;
|
||||
LiteralExpr(PyObject* value): value(value) {}
|
||||
Str to_string() const override { return "literal"; }
|
||||
};
|
||||
|
||||
struct ListExpr: Expression{
|
||||
Str to_string() const override { return "[]"; }
|
||||
};
|
||||
|
||||
struct DictExpr: Expression{
|
||||
Str to_string() const override { return "{}"; }
|
||||
};
|
||||
|
||||
struct LambdaExpr: Expression{
|
||||
Str to_string() const override { return "lambda"; }
|
||||
};
|
||||
|
||||
struct FStringExpr: Expression{
|
||||
Str to_string() const override { return "@fstr"; }
|
||||
};
|
||||
|
||||
struct AttribExpr: Expression{
|
||||
Str to_string() const override { return "."; }
|
||||
};
|
||||
|
||||
struct CallExpr: Expression{
|
||||
Str to_string() const override { return "()"; }
|
||||
};
|
||||
|
||||
struct BinaryExpr: Expression{
|
||||
TokenIndex op;
|
||||
BinaryExpr(TokenIndex op): op(op) {}
|
||||
Str to_string() const override { return TK_STR(op); }
|
||||
};
|
||||
|
||||
struct TernaryExpr: Expression{
|
||||
Str to_string() const override { return "?"; }
|
||||
};
|
||||
|
||||
struct AssignExpr: Expression{
|
||||
Str to_string() const override { return "="; }
|
||||
};
|
||||
|
||||
struct CommaExpr: Expression{
|
||||
Str to_string() const override { return ","; }
|
||||
};
|
||||
|
||||
|
||||
} // namespace pkpy
|
@ -58,7 +58,7 @@ struct Frame {
|
||||
}
|
||||
|
||||
PyObject* pop(){
|
||||
#if PK_EXTRA_CHECK
|
||||
#if DEBUG_EXTRA_CHECK
|
||||
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
||||
#endif
|
||||
PyObject* v = _data.back();
|
||||
@ -67,7 +67,7 @@ struct Frame {
|
||||
}
|
||||
|
||||
void _pop(){
|
||||
#if PK_EXTRA_CHECK
|
||||
#if DEBUG_EXTRA_CHECK
|
||||
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
||||
#endif
|
||||
_data.pop_back();
|
||||
@ -88,14 +88,14 @@ struct Frame {
|
||||
}
|
||||
|
||||
PyObject*& top(){
|
||||
#if PK_EXTRA_CHECK
|
||||
#if DEBUG_EXTRA_CHECK
|
||||
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
||||
#endif
|
||||
return _data.back();
|
||||
}
|
||||
|
||||
PyObject*& top_1(){
|
||||
#if PK_EXTRA_CHECK
|
||||
#if DEBUG_EXTRA_CHECK
|
||||
if(_data.size() < 2) throw std::runtime_error("_data.size() < 2");
|
||||
#endif
|
||||
return _data[_data.size()-2];
|
||||
|
6
src/gc.h
6
src/gc.h
@ -67,9 +67,9 @@ struct ManagedHeap{
|
||||
|
||||
~ManagedHeap(){
|
||||
for(PyObject* obj: _no_gc) delete obj;
|
||||
for(auto& [type, count]: deleted){
|
||||
std::cout << "GC: " << type << "=" << count << std::endl;
|
||||
}
|
||||
// for(auto& [type, count]: deleted){
|
||||
// std::cout << "GC: " << type << "=" << count << std::endl;
|
||||
// }
|
||||
}
|
||||
|
||||
int sweep(VM* vm){
|
||||
|
510
src/lexer.h
Normal file
510
src/lexer.h
Normal file
@ -0,0 +1,510 @@
|
||||
#pragma once
|
||||
|
||||
#include "common.h"
|
||||
#include "error.h"
|
||||
#include "str.h"
|
||||
|
||||
namespace pkpy{
|
||||
|
||||
typedef uint8_t TokenIndex;
|
||||
|
||||
constexpr const char* kTokens[] = {
|
||||
"@eof", "@eol", "@sof",
|
||||
".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
|
||||
"+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
|
||||
"<<", ">>", "&", "|", "^", "?", "@",
|
||||
"==", "!=", ">=", "<=",
|
||||
"+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
|
||||
/** KW_BEGIN **/
|
||||
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
|
||||
"None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
|
||||
"goto", "label", // extended keywords, not available in cpython
|
||||
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
|
||||
/** KW_END **/
|
||||
"is not", "not in",
|
||||
"@id", "@num", "@str", "@fstr",
|
||||
"@indent", "@dedent"
|
||||
};
|
||||
|
||||
using TokenValue = std::variant<std::monostate, i64, f64, Str>;
|
||||
const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
|
||||
|
||||
constexpr TokenIndex TK(const char token[]) {
|
||||
for(int k=0; k<kTokenCount; k++){
|
||||
const char* i = kTokens[k];
|
||||
const char* j = token;
|
||||
while(*i && *j && *i == *j) { i++; j++;}
|
||||
if(*i == *j) return k;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
#define TK_STR(t) kTokens[t]
|
||||
const TokenIndex kTokenKwBegin = TK("class");
|
||||
const TokenIndex kTokenKwEnd = TK("raise");
|
||||
|
||||
const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
|
||||
std::map<std::string_view, TokenIndex> map;
|
||||
for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
|
||||
return map;
|
||||
}();
|
||||
|
||||
|
||||
struct Token{
|
||||
TokenIndex type;
|
||||
const char* start;
|
||||
int length;
|
||||
int line;
|
||||
TokenValue value;
|
||||
|
||||
Str str() const { return Str(start, length);}
|
||||
|
||||
Str info() const {
|
||||
StrStream ss;
|
||||
Str raw = str();
|
||||
if (raw == Str("\n")) raw = "\\n";
|
||||
ss << line << ": " << TK_STR(type) << " '" << raw << "'";
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
// https://docs.python.org/3/reference/expressions.html
|
||||
enum Precedence {
|
||||
PREC_NONE,
|
||||
PREC_ASSIGNMENT, // =
|
||||
PREC_COMMA, // ,
|
||||
PREC_TERNARY, // ?:
|
||||
PREC_LOGICAL_OR, // or
|
||||
PREC_LOGICAL_AND, // and
|
||||
PREC_LOGICAL_NOT, // not
|
||||
PREC_EQUALITY, // == !=
|
||||
PREC_TEST, // in / is / is not / not in
|
||||
PREC_COMPARISION, // < > <= >=
|
||||
PREC_BITWISE_OR, // |
|
||||
PREC_BITWISE_XOR, // ^
|
||||
PREC_BITWISE_AND, // &
|
||||
PREC_BITWISE_SHIFT, // << >>
|
||||
PREC_TERM, // + -
|
||||
PREC_FACTOR, // * / % //
|
||||
PREC_UNARY, // - not
|
||||
PREC_EXPONENT, // **
|
||||
PREC_CALL, // ()
|
||||
PREC_SUBSCRIPT, // []
|
||||
PREC_ATTRIB, // .index
|
||||
PREC_PRIMARY,
|
||||
};
|
||||
|
||||
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
|
||||
|
||||
struct Lexer {
|
||||
shared_ptr<SourceData> src;
|
||||
const char* token_start;
|
||||
const char* curr_char;
|
||||
int current_line = 1;
|
||||
std::vector<Token> nexts;
|
||||
stack<int> indents;
|
||||
int brackets_level = 0;
|
||||
bool used = false;
|
||||
|
||||
char peekchar() const{ return *curr_char; }
|
||||
|
||||
bool match_n_chars(int n, char c0){
|
||||
const char* c = curr_char;
|
||||
for(int i=0; i<n; i++){
|
||||
if(*c == '\0') return false;
|
||||
if(*c != c0) return false;
|
||||
c++;
|
||||
}
|
||||
for(int i=0; i<n; i++) eatchar_include_newline();
|
||||
return true;
|
||||
}
|
||||
|
||||
int eat_spaces(){
|
||||
int count = 0;
|
||||
while (true) {
|
||||
switch (peekchar()) {
|
||||
case ' ' : count+=1; break;
|
||||
case '\t': count+=4; break;
|
||||
default: return count;
|
||||
}
|
||||
eatchar();
|
||||
}
|
||||
}
|
||||
|
||||
bool eat_indentation(){
|
||||
if(brackets_level > 0) return true;
|
||||
int spaces = eat_spaces();
|
||||
if(peekchar() == '#') skip_line_comment();
|
||||
if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
|
||||
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
|
||||
if(spaces > indents.top()){
|
||||
indents.push(spaces);
|
||||
nexts.push_back(Token{TK("@indent"), token_start, 0, current_line});
|
||||
} else if(spaces < indents.top()){
|
||||
while(spaces < indents.top()){
|
||||
indents.pop();
|
||||
nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line});
|
||||
}
|
||||
if(spaces != indents.top()){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
char eatchar() {
|
||||
char c = peekchar();
|
||||
if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
|
||||
curr_char++;
|
||||
return c;
|
||||
}
|
||||
|
||||
char eatchar_include_newline() {
|
||||
char c = peekchar();
|
||||
curr_char++;
|
||||
if (c == '\n'){
|
||||
current_line++;
|
||||
src->line_starts.push_back(curr_char);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int eat_name() {
|
||||
curr_char--;
|
||||
while(true){
|
||||
uint8_t c = peekchar();
|
||||
int u8bytes = 0;
|
||||
if((c & 0b10000000) == 0b00000000) u8bytes = 1;
|
||||
else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
|
||||
else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
|
||||
else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
|
||||
else return 1;
|
||||
if(u8bytes == 1){
|
||||
if(isalpha(c) || c=='_' || isdigit(c)) {
|
||||
curr_char++;
|
||||
continue;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// handle multibyte char
|
||||
std::string u8str(curr_char, u8bytes);
|
||||
if(u8str.size() != u8bytes) return 2;
|
||||
uint32_t value = 0;
|
||||
for(int k=0; k < u8bytes; k++){
|
||||
uint8_t b = u8str[k];
|
||||
if(k==0){
|
||||
if(u8bytes == 2) value = (b & 0b00011111) << 6;
|
||||
else if(u8bytes == 3) value = (b & 0b00001111) << 12;
|
||||
else if(u8bytes == 4) value = (b & 0b00000111) << 18;
|
||||
}else{
|
||||
value |= (b & 0b00111111) << (6*(u8bytes-k-1));
|
||||
}
|
||||
}
|
||||
if(is_unicode_Lo_char(value)) curr_char += u8bytes;
|
||||
else break;
|
||||
}
|
||||
|
||||
int length = (int)(curr_char - token_start);
|
||||
if(length == 0) return 3;
|
||||
std::string_view name(token_start, length);
|
||||
|
||||
if(src->mode == JSON_MODE){
|
||||
if(name == "true"){
|
||||
add_token(TK("True"));
|
||||
} else if(name == "false"){
|
||||
add_token(TK("False"));
|
||||
} else if(name == "null"){
|
||||
add_token(TK("None"));
|
||||
} else {
|
||||
return 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(kTokenKwMap.count(name)){
|
||||
if(name == "not"){
|
||||
if(strncmp(curr_char, " in", 3) == 0){
|
||||
curr_char += 3;
|
||||
add_token(TK("not in"));
|
||||
return 0;
|
||||
}
|
||||
}else if(name == "is"){
|
||||
if(strncmp(curr_char, " not", 4) == 0){
|
||||
curr_char += 4;
|
||||
add_token(TK("is not"));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
add_token(kTokenKwMap.at(name));
|
||||
} else {
|
||||
add_token(TK("@id"));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void skip_line_comment() {
|
||||
char c;
|
||||
while ((c = peekchar()) != '\0') {
|
||||
if (c == '\n') return;
|
||||
eatchar();
|
||||
}
|
||||
}
|
||||
|
||||
bool matchchar(char c) {
|
||||
if (peekchar() != c) return false;
|
||||
eatchar_include_newline();
|
||||
return true;
|
||||
}
|
||||
|
||||
void add_token(TokenIndex type, TokenValue value={}) {
|
||||
switch(type){
|
||||
case TK("{"): case TK("["): case TK("("): brackets_level++; break;
|
||||
case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
|
||||
}
|
||||
nexts.push_back( Token{
|
||||
type,
|
||||
token_start,
|
||||
(int)(curr_char - token_start),
|
||||
current_line - ((type == TK("@eol")) ? 1 : 0),
|
||||
value
|
||||
});
|
||||
}
|
||||
|
||||
void add_token_2(char c, TokenIndex one, TokenIndex two) {
|
||||
if (matchchar(c)) add_token(two);
|
||||
else add_token(one);
|
||||
}
|
||||
|
||||
Str eat_string_until(char quote, bool raw) {
|
||||
bool quote3 = match_n_chars(2, quote);
|
||||
std::vector<char> buff;
|
||||
while (true) {
|
||||
char c = eatchar_include_newline();
|
||||
if (c == quote){
|
||||
if(quote3 && !match_n_chars(2, quote)){
|
||||
buff.push_back(c);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (c == '\0'){
|
||||
if(quote3 && src->mode == REPL_MODE){
|
||||
throw NeedMoreLines(false);
|
||||
}
|
||||
SyntaxError("EOL while scanning string literal");
|
||||
}
|
||||
if (c == '\n'){
|
||||
if(!quote3) SyntaxError("EOL while scanning string literal");
|
||||
else{
|
||||
buff.push_back(c);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!raw && c == '\\') {
|
||||
switch (eatchar_include_newline()) {
|
||||
case '"': buff.push_back('"'); break;
|
||||
case '\'': buff.push_back('\''); break;
|
||||
case '\\': buff.push_back('\\'); break;
|
||||
case 'n': buff.push_back('\n'); break;
|
||||
case 'r': buff.push_back('\r'); break;
|
||||
case 't': buff.push_back('\t'); break;
|
||||
default: SyntaxError("invalid escape char");
|
||||
}
|
||||
} else {
|
||||
buff.push_back(c);
|
||||
}
|
||||
}
|
||||
return Str(buff.data(), buff.size());
|
||||
}
|
||||
|
||||
void eat_string(char quote, StringType type) {
|
||||
Str s = eat_string_until(quote, type == RAW_STRING);
|
||||
if(type == F_STRING){
|
||||
add_token(TK("@fstr"), s);
|
||||
}else{
|
||||
add_token(TK("@str"), s);
|
||||
}
|
||||
}
|
||||
|
||||
void eat_number() {
|
||||
static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
|
||||
std::smatch m;
|
||||
|
||||
const char* i = token_start;
|
||||
while(*i != '\n' && *i != '\0') i++;
|
||||
std::string s = std::string(token_start, i);
|
||||
|
||||
try{
|
||||
if (std::regex_search(s, m, pattern)) {
|
||||
// here is m.length()-1, since the first char was eaten by lex_token()
|
||||
for(int j=0; j<m.length()-1; j++) eatchar();
|
||||
|
||||
int base = 10;
|
||||
size_t size;
|
||||
if (m[1].matched) base = 16;
|
||||
if (m[2].matched) {
|
||||
if(base == 16) SyntaxError("hex literal should not contain a dot");
|
||||
add_token(TK("@num"), S_TO_FLOAT(m[0], &size));
|
||||
} else {
|
||||
add_token(TK("@num"), S_TO_INT(m[0], &size, base));
|
||||
}
|
||||
if (size != m.length()) UNREACHABLE();
|
||||
}
|
||||
}catch(std::exception& _){
|
||||
SyntaxError("invalid number literal");
|
||||
}
|
||||
}
|
||||
|
||||
bool lex_one_token() {
|
||||
while (peekchar() != '\0') {
|
||||
token_start = curr_char;
|
||||
char c = eatchar_include_newline();
|
||||
switch (c) {
|
||||
case '\'': case '"': eat_string(c, NORMAL_STRING); return true;
|
||||
case '#': skip_line_comment(); break;
|
||||
case '{': add_token(TK("{")); return true;
|
||||
case '}': add_token(TK("}")); return true;
|
||||
case ',': add_token(TK(",")); return true;
|
||||
case ':': add_token_2(':', TK(":"), TK("::")); return true;
|
||||
case ';': add_token(TK(";")); return true;
|
||||
case '(': add_token(TK("(")); return true;
|
||||
case ')': add_token(TK(")")); return true;
|
||||
case '[': add_token(TK("[")); return true;
|
||||
case ']': add_token(TK("]")); return true;
|
||||
case '@': add_token(TK("@")); return true;
|
||||
case '%': add_token_2('=', TK("%"), TK("%=")); return true;
|
||||
case '&': add_token_2('=', TK("&"), TK("&=")); return true;
|
||||
case '|': add_token_2('=', TK("|"), TK("|=")); return true;
|
||||
case '^': add_token_2('=', TK("^"), TK("^=")); return true;
|
||||
case '?': add_token(TK("?")); return true;
|
||||
case '.': {
|
||||
if(matchchar('.')) {
|
||||
if(matchchar('.')) {
|
||||
add_token(TK("..."));
|
||||
} else {
|
||||
SyntaxError("invalid token '..'");
|
||||
}
|
||||
} else {
|
||||
add_token(TK("."));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case '=': add_token_2('=', TK("="), TK("==")); return true;
|
||||
case '+': add_token_2('=', TK("+"), TK("+=")); return true;
|
||||
case '>': {
|
||||
if(matchchar('=')) add_token(TK(">="));
|
||||
else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>="));
|
||||
else add_token(TK(">"));
|
||||
return true;
|
||||
}
|
||||
case '<': {
|
||||
if(matchchar('=')) add_token(TK("<="));
|
||||
else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<="));
|
||||
else add_token(TK("<"));
|
||||
return true;
|
||||
}
|
||||
case '-': {
|
||||
if(matchchar('=')) add_token(TK("-="));
|
||||
else if(matchchar('>')) add_token(TK("->"));
|
||||
else add_token(TK("-"));
|
||||
return true;
|
||||
}
|
||||
case '!':
|
||||
if(matchchar('=')) add_token(TK("!="));
|
||||
else SyntaxError("expected '=' after '!'");
|
||||
break;
|
||||
case '*':
|
||||
if (matchchar('*')) {
|
||||
add_token(TK("**")); // '**'
|
||||
} else {
|
||||
add_token_2('=', TK("*"), TK("*="));
|
||||
}
|
||||
return true;
|
||||
case '/':
|
||||
if(matchchar('/')) {
|
||||
add_token_2('=', TK("//"), TK("//="));
|
||||
} else {
|
||||
add_token_2('=', TK("/"), TK("/="));
|
||||
}
|
||||
return true;
|
||||
case '\r': break; // just ignore '\r'
|
||||
case ' ': case '\t': eat_spaces(); break;
|
||||
case '\n': {
|
||||
add_token(TK("@eol"));
|
||||
if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level");
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
if(c == 'f'){
|
||||
if(matchchar('\'')) {eat_string('\'', F_STRING); return true;}
|
||||
if(matchchar('"')) {eat_string('"', F_STRING); return true;}
|
||||
}else if(c == 'r'){
|
||||
if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;}
|
||||
if(matchchar('"')) {eat_string('"', RAW_STRING); return true;}
|
||||
}
|
||||
if (c >= '0' && c <= '9') {
|
||||
eat_number();
|
||||
return true;
|
||||
}
|
||||
switch (eat_name())
|
||||
{
|
||||
case 0: break;
|
||||
case 1: SyntaxError("invalid char: " + std::string(1, c));
|
||||
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
|
||||
case 3: SyntaxError("@id contains invalid char"); break;
|
||||
case 4: SyntaxError("invalid JSON token"); break;
|
||||
default: UNREACHABLE();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
token_start = curr_char;
|
||||
while(indents.size() > 1){
|
||||
indents.pop();
|
||||
add_token(TK("@dedent"));
|
||||
return true;
|
||||
}
|
||||
add_token(TK("@eof"));
|
||||
return false;
|
||||
}
|
||||
|
||||
/***** Error Reporter *****/
|
||||
void throw_err(Str type, Str msg){
|
||||
int lineno = current_line;
|
||||
const char* cursor = curr_char;
|
||||
if(peekchar() == '\n'){
|
||||
lineno--;
|
||||
cursor--;
|
||||
}
|
||||
throw_err(type, msg, lineno, cursor);
|
||||
}
|
||||
|
||||
void throw_err(Str type, Str msg, int lineno, const char* cursor){
|
||||
auto e = Exception("SyntaxError", msg);
|
||||
e.st_push(src->snapshot(lineno, cursor));
|
||||
throw e;
|
||||
}
|
||||
void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
|
||||
void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
|
||||
void IndentationError(Str msg){ throw_err("IndentationError", msg); }
|
||||
|
||||
Lexer(shared_ptr<SourceData> src) {
|
||||
this->src = src;
|
||||
this->token_start = src->source;
|
||||
this->curr_char = src->source;
|
||||
this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line});
|
||||
this->indents.push(0);
|
||||
}
|
||||
|
||||
std::vector<Token> run() {
|
||||
if(used) UNREACHABLE();
|
||||
used = true;
|
||||
while (lex_one_token());
|
||||
return std::move(nexts);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
302
src/parser.h
302
src/parser.h
@ -1,302 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "error.h"
|
||||
#include "obj.h"
|
||||
|
||||
namespace pkpy{
|
||||
|
||||
typedef uint8_t TokenIndex;
|
||||
|
||||
constexpr const char* kTokens[] = {
|
||||
"@error", "@eof", "@eol", "@sof",
|
||||
".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
|
||||
"+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
|
||||
"<<", ">>", "&", "|", "^", "?", "@",
|
||||
"==", "!=", ">=", "<=",
|
||||
"+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
|
||||
/** KW_BEGIN **/
|
||||
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
|
||||
"None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
|
||||
"goto", "label", // extended keywords, not available in cpython
|
||||
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
|
||||
/** KW_END **/
|
||||
"is not", "not in",
|
||||
"@id", "@num", "@str", "@fstr",
|
||||
"@indent", "@dedent"
|
||||
};
|
||||
|
||||
const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
|
||||
|
||||
constexpr TokenIndex TK(const char token[]) {
|
||||
for(int k=0; k<kTokenCount; k++){
|
||||
const char* i = kTokens[k];
|
||||
const char* j = token;
|
||||
while(*i && *j && *i == *j) { i++; j++;}
|
||||
if(*i == *j) return k;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
#define TK_STR(t) kTokens[t]
|
||||
const TokenIndex kTokenKwBegin = TK("class");
|
||||
const TokenIndex kTokenKwEnd = TK("raise");
|
||||
|
||||
const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
|
||||
std::map<std::string_view, TokenIndex> map;
|
||||
for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
|
||||
return map;
|
||||
}();
|
||||
|
||||
|
||||
struct Token{
|
||||
TokenIndex type;
|
||||
|
||||
const char* start;
|
||||
int length;
|
||||
int line;
|
||||
PyObject* value;
|
||||
|
||||
Str str() const { return Str(start, length);}
|
||||
|
||||
Str info() const {
|
||||
StrStream ss;
|
||||
Str raw = str();
|
||||
if (raw == Str("\n")) raw = "\\n";
|
||||
ss << line << ": " << TK_STR(type) << " '" << raw << "'";
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
// https://docs.python.org/3/reference/expressions.html
|
||||
enum Precedence {
|
||||
PREC_NONE,
|
||||
PREC_ASSIGNMENT, // =
|
||||
PREC_COMMA, // ,
|
||||
PREC_TERNARY, // ?:
|
||||
PREC_LOGICAL_OR, // or
|
||||
PREC_LOGICAL_AND, // and
|
||||
PREC_LOGICAL_NOT, // not
|
||||
PREC_EQUALITY, // == !=
|
||||
PREC_TEST, // in / is / is not / not in
|
||||
PREC_COMPARISION, // < > <= >=
|
||||
PREC_BITWISE_OR, // |
|
||||
PREC_BITWISE_XOR, // ^
|
||||
PREC_BITWISE_AND, // &
|
||||
PREC_BITWISE_SHIFT, // << >>
|
||||
PREC_TERM, // + -
|
||||
PREC_FACTOR, // * / % //
|
||||
PREC_UNARY, // - not
|
||||
PREC_EXPONENT, // **
|
||||
PREC_CALL, // ()
|
||||
PREC_SUBSCRIPT, // []
|
||||
PREC_ATTRIB, // .index
|
||||
PREC_PRIMARY,
|
||||
};
|
||||
|
||||
// The context of the parsing phase for the compiler.
|
||||
struct Parser {
|
||||
shared_ptr<SourceData> src;
|
||||
|
||||
const char* token_start;
|
||||
const char* curr_char;
|
||||
int current_line = 1;
|
||||
Token prev, curr;
|
||||
queue<Token> nexts;
|
||||
stack<int> indents;
|
||||
|
||||
int brackets_level = 0;
|
||||
|
||||
Token next_token(){
|
||||
if(nexts.empty()){
|
||||
return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line};
|
||||
}
|
||||
Token t = nexts.front();
|
||||
if(t.type == TK("@eof") && indents.size()>1){
|
||||
nexts.pop();
|
||||
indents.pop();
|
||||
return Token{TK("@dedent"), token_start, 0, current_line};
|
||||
}
|
||||
nexts.pop();
|
||||
return t;
|
||||
}
|
||||
|
||||
char peekchar() const{ return *curr_char; }
|
||||
|
||||
bool match_n_chars(int n, char c0){
|
||||
const char* c = curr_char;
|
||||
for(int i=0; i<n; i++){
|
||||
if(*c == '\0') return false;
|
||||
if(*c != c0) return false;
|
||||
c++;
|
||||
}
|
||||
for(int i=0; i<n; i++) eatchar_include_newline();
|
||||
return true;
|
||||
}
|
||||
|
||||
int eat_spaces(){
|
||||
int count = 0;
|
||||
while (true) {
|
||||
switch (peekchar()) {
|
||||
case ' ' : count+=1; break;
|
||||
case '\t': count+=4; break;
|
||||
default: return count;
|
||||
}
|
||||
eatchar();
|
||||
}
|
||||
}
|
||||
|
||||
bool eat_indentation(){
|
||||
if(brackets_level > 0) return true;
|
||||
int spaces = eat_spaces();
|
||||
if(peekchar() == '#') skip_line_comment();
|
||||
if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
|
||||
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
|
||||
if(spaces > indents.top()){
|
||||
indents.push(spaces);
|
||||
nexts.push(Token{TK("@indent"), token_start, 0, current_line});
|
||||
} else if(spaces < indents.top()){
|
||||
while(spaces < indents.top()){
|
||||
indents.pop();
|
||||
nexts.push(Token{TK("@dedent"), token_start, 0, current_line});
|
||||
}
|
||||
if(spaces != indents.top()){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
char eatchar() {
|
||||
char c = peekchar();
|
||||
if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
|
||||
curr_char++;
|
||||
return c;
|
||||
}
|
||||
|
||||
char eatchar_include_newline() {
|
||||
char c = peekchar();
|
||||
curr_char++;
|
||||
if (c == '\n'){
|
||||
current_line++;
|
||||
src->line_starts.push_back(curr_char);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int eat_name() {
|
||||
curr_char--;
|
||||
while(true){
|
||||
uint8_t c = peekchar();
|
||||
int u8bytes = 0;
|
||||
if((c & 0b10000000) == 0b00000000) u8bytes = 1;
|
||||
else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
|
||||
else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
|
||||
else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
|
||||
else return 1;
|
||||
if(u8bytes == 1){
|
||||
if(isalpha(c) || c=='_' || isdigit(c)) {
|
||||
curr_char++;
|
||||
continue;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// handle multibyte char
|
||||
std::string u8str(curr_char, u8bytes);
|
||||
if(u8str.size() != u8bytes) return 2;
|
||||
uint32_t value = 0;
|
||||
for(int k=0; k < u8bytes; k++){
|
||||
uint8_t b = u8str[k];
|
||||
if(k==0){
|
||||
if(u8bytes == 2) value = (b & 0b00011111) << 6;
|
||||
else if(u8bytes == 3) value = (b & 0b00001111) << 12;
|
||||
else if(u8bytes == 4) value = (b & 0b00000111) << 18;
|
||||
}else{
|
||||
value |= (b & 0b00111111) << (6*(u8bytes-k-1));
|
||||
}
|
||||
}
|
||||
if(is_unicode_Lo_char(value)) curr_char += u8bytes;
|
||||
else break;
|
||||
}
|
||||
|
||||
int length = (int)(curr_char - token_start);
|
||||
if(length == 0) return 3;
|
||||
std::string_view name(token_start, length);
|
||||
|
||||
if(src->mode == JSON_MODE){
|
||||
if(name == "true"){
|
||||
set_next_token(TK("True"));
|
||||
} else if(name == "false"){
|
||||
set_next_token(TK("False"));
|
||||
} else if(name == "null"){
|
||||
set_next_token(TK("None"));
|
||||
} else {
|
||||
return 4;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(kTokenKwMap.count(name)){
|
||||
if(name == "not"){
|
||||
if(strncmp(curr_char, " in", 3) == 0){
|
||||
curr_char += 3;
|
||||
set_next_token(TK("not in"));
|
||||
return 0;
|
||||
}
|
||||
}else if(name == "is"){
|
||||
if(strncmp(curr_char, " not", 4) == 0){
|
||||
curr_char += 4;
|
||||
set_next_token(TK("is not"));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
set_next_token(kTokenKwMap.at(name));
|
||||
} else {
|
||||
set_next_token(TK("@id"));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void skip_line_comment() {
|
||||
char c;
|
||||
while ((c = peekchar()) != '\0') {
|
||||
if (c == '\n') return;
|
||||
eatchar();
|
||||
}
|
||||
}
|
||||
|
||||
bool matchchar(char c) {
|
||||
if (peekchar() != c) return false;
|
||||
eatchar_include_newline();
|
||||
return true;
|
||||
}
|
||||
|
||||
void set_next_token(TokenIndex type, PyObject* value=nullptr) {
|
||||
switch(type){
|
||||
case TK("{"): case TK("["): case TK("("): brackets_level++; break;
|
||||
case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
|
||||
}
|
||||
nexts.push( Token{
|
||||
type,
|
||||
token_start,
|
||||
(int)(curr_char - token_start),
|
||||
current_line - ((type == TK("@eol")) ? 1 : 0),
|
||||
value
|
||||
});
|
||||
}
|
||||
|
||||
void set_next_token_2(char c, TokenIndex one, TokenIndex two) {
|
||||
if (matchchar(c)) set_next_token(two);
|
||||
else set_next_token(one);
|
||||
}
|
||||
|
||||
Parser(shared_ptr<SourceData> src) {
|
||||
this->src = src;
|
||||
this->token_start = src->source;
|
||||
this->curr_char = src->source;
|
||||
this->nexts.push(Token{TK("@sof"), token_start, 0, current_line});
|
||||
this->indents.push(0);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace pkpy
|
@ -760,6 +760,7 @@ inline void add_module_gc(VM* vm){
|
||||
|
||||
inline void VM::post_init(){
|
||||
init_builtins(this);
|
||||
#if !DEBUG_NO_BUILTIN_MODULES
|
||||
add_module_sys(this);
|
||||
add_module_time(this);
|
||||
add_module_json(this);
|
||||
@ -793,6 +794,7 @@ inline void VM::post_init(){
|
||||
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])];
|
||||
return VAR(info.name);
|
||||
}));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace pkpy
|
||||
|
4
src/vm.h
4
src/vm.h
@ -93,7 +93,7 @@ public:
|
||||
}
|
||||
|
||||
Frame* top_frame() const {
|
||||
#if PK_EXTRA_CHECK
|
||||
#if DEBUG_EXTRA_CHECK
|
||||
if(callstack.empty()) UNREACHABLE();
|
||||
#endif
|
||||
return callstack.top().get();
|
||||
@ -166,7 +166,7 @@ public:
|
||||
if(_module == nullptr) _module = _main;
|
||||
try {
|
||||
CodeObject_ code = compile(source, filename, mode);
|
||||
if(_module == _main) std::cout << disassemble(code) << '\n';
|
||||
// if(_module == _main) std::cout << disassemble(code) << '\n';
|
||||
return _exec(code, _module);
|
||||
}catch (const Exception& e){
|
||||
*_stderr << e.summary() << '\n';
|
||||
|
Loading…
x
Reference in New Issue
Block a user