mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 03:20:18 +00:00
update lexer
This commit is contained in:
parent
120773891a
commit
e78aa44895
@ -6,8 +6,8 @@ with open("src/opcodes.h", "rt", encoding='utf-8') as f:
|
|||||||
OPCODES_TEXT = f.read()
|
OPCODES_TEXT = f.read()
|
||||||
|
|
||||||
pipeline = [
|
pipeline = [
|
||||||
["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h"],
|
["common.h", "memory.h", "str.h", "tuplelist.h", "namedict.h", "error.h", "lexer.h"],
|
||||||
["obj.h", "parser.h", "codeobject.h", "frame.h"],
|
["obj.h", "codeobject.h", "frame.h"],
|
||||||
["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"],
|
["gc.h", "vm.h", "ref.h", "ceval.h", "compiler.h", "repl.h"],
|
||||||
["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]
|
["iter.h", "cffi.h", "io.h", "_generated.h", "pocketpy.h"]
|
||||||
]
|
]
|
||||||
|
@ -7,7 +7,7 @@ namespace pkpy{
|
|||||||
|
|
||||||
inline PyObject* VM::run_frame(Frame* frame){
|
inline PyObject* VM::run_frame(Frame* frame){
|
||||||
while(frame->has_next_bytecode()){
|
while(frame->has_next_bytecode()){
|
||||||
// heap._auto_collect(this);
|
heap._auto_collect(this);
|
||||||
|
|
||||||
const Bytecode& byte = frame->next_bytecode();
|
const Bytecode& byte = frame->next_bytecode();
|
||||||
switch (byte.op)
|
switch (byte.op)
|
||||||
@ -325,7 +325,7 @@ inline PyObject* VM::run_frame(Frame* frame){
|
|||||||
if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE");
|
if(frame->_data.size() != 1) throw std::runtime_error("_data.size() != 1 in EVAL/JSON_MODE");
|
||||||
return frame->pop_value(this);
|
return frame->pop_value(this);
|
||||||
}
|
}
|
||||||
#if PK_EXTRA_CHECK
|
#if DEBUG_EXTRA_CHECK
|
||||||
if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE");
|
if(!frame->_data.empty()) throw std::runtime_error("_data.size() != 0 in EXEC_MODE");
|
||||||
#endif
|
#endif
|
||||||
return None;
|
return None;
|
||||||
|
34
src/common.h
34
src/common.h
@ -10,7 +10,6 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -26,10 +25,13 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <list>
|
#include <variant>
|
||||||
|
|
||||||
#define PK_VERSION "0.9.5"
|
#define PK_VERSION "0.9.6"
|
||||||
#define PK_EXTRA_CHECK 0
|
|
||||||
|
// debug macros
|
||||||
|
#define DEBUG_NO_BUILTIN_MODULES 0
|
||||||
|
#define DEBUG_EXTRA_CHECK 1
|
||||||
|
|
||||||
#if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__)
|
#if (defined(__ANDROID__) && __ANDROID_API__ <= 22) || defined(__EMSCRIPTEN__)
|
||||||
#define PK_ENABLE_FILEIO 0
|
#define PK_ENABLE_FILEIO 0
|
||||||
@ -40,13 +42,13 @@
|
|||||||
#if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__)
|
#if defined(__EMSCRIPTEN__) || defined(__arm__) || defined(__i386__)
|
||||||
typedef int32_t i64;
|
typedef int32_t i64;
|
||||||
typedef float f64;
|
typedef float f64;
|
||||||
#define S_TO_INT std::stoi
|
#define S_TO_INT(...) static_cast<i64>(std::stoi(__VA_ARGS__))
|
||||||
#define S_TO_FLOAT std::stof
|
#define S_TO_FLOAT(...) static_cast<f64>(std::stof(__VA_ARGS__))
|
||||||
#else
|
#else
|
||||||
typedef int64_t i64;
|
typedef int64_t i64;
|
||||||
typedef double f64;
|
typedef double f64;
|
||||||
#define S_TO_INT std::stoll
|
#define S_TO_INT(...) static_cast<i64>(std::stoll(__VA_ARGS__))
|
||||||
#define S_TO_FLOAT std::stod
|
#define S_TO_FLOAT(...) static_cast<f64>(std::stod(__VA_ARGS__))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace pkpy{
|
namespace pkpy{
|
||||||
@ -100,22 +102,6 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
|
|||||||
return is_int(a) && is_int(b);
|
return is_int(a) && is_int(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
class queue{
|
|
||||||
std::list<T> list;
|
|
||||||
public:
|
|
||||||
void push(const T& t){ list.push_back(t); }
|
|
||||||
void push(T&& t){ list.push_back(std::move(t)); }
|
|
||||||
void pop(){ list.pop_front(); }
|
|
||||||
void clear(){ list.clear(); }
|
|
||||||
bool empty() const { return list.empty(); }
|
|
||||||
size_t size() const { return list.size(); }
|
|
||||||
T& front(){ return list.front(); }
|
|
||||||
const T& front() const { return list.front(); }
|
|
||||||
const std::list<T>& data() const { return list; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class stack{
|
class stack{
|
||||||
std::vector<T> vec;
|
std::vector<T> vec;
|
||||||
|
378
src/compiler.h
378
src/compiler.h
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#include "codeobject.h"
|
#include "codeobject.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "parser.h"
|
#include "lexer.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "ceval.h"
|
#include "ceval.h"
|
||||||
|
|
||||||
@ -18,24 +18,21 @@ struct GrammarRule{
|
|||||||
Precedence precedence;
|
Precedence precedence;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
|
|
||||||
|
|
||||||
class Compiler {
|
class Compiler {
|
||||||
std::unique_ptr<Parser> parser;
|
std::unique_ptr<Lexer> lexer;
|
||||||
stack<CodeObject_> codes;
|
stack<CodeObject_> codes;
|
||||||
int lexing_count = 0;
|
|
||||||
bool used = false;
|
bool used = false;
|
||||||
VM* vm;
|
VM* vm;
|
||||||
std::map<TokenIndex, GrammarRule> rules;
|
std::map<TokenIndex, GrammarRule> rules;
|
||||||
|
|
||||||
CodeObject_ co() const{ return codes.top(); }
|
CodeObject_ co() const{ return codes.top(); }
|
||||||
CompileMode mode() const{ return parser->src->mode; }
|
CompileMode mode() const{ return lexer->src->mode; }
|
||||||
NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
|
NameScope name_scope() const { return codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
|
Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
|
||||||
this->vm = vm;
|
this->vm = vm;
|
||||||
this->parser = std::make_unique<Parser>(
|
this->lexer = std::make_unique<Lexer>(
|
||||||
make_sp<SourceData>(source, filename, mode)
|
make_sp<SourceData>(source, filename, mode)
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -104,239 +101,36 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Str eat_string_until(char quote, bool raw) {
|
int i = 0;
|
||||||
bool quote3 = parser->match_n_chars(2, quote);
|
std::vector<Token> tokens;
|
||||||
std::vector<char> buff;
|
|
||||||
while (true) {
|
|
||||||
char c = parser->eatchar_include_newline();
|
|
||||||
if (c == quote){
|
|
||||||
if(quote3 && !parser->match_n_chars(2, quote)){
|
|
||||||
buff.push_back(c);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (c == '\0'){
|
|
||||||
if(quote3 && parser->src->mode == REPL_MODE){
|
|
||||||
throw NeedMoreLines(false);
|
|
||||||
}
|
|
||||||
SyntaxError("EOL while scanning string literal");
|
|
||||||
}
|
|
||||||
if (c == '\n'){
|
|
||||||
if(!quote3) SyntaxError("EOL while scanning string literal");
|
|
||||||
else{
|
|
||||||
buff.push_back(c);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!raw && c == '\\') {
|
|
||||||
switch (parser->eatchar_include_newline()) {
|
|
||||||
case '"': buff.push_back('"'); break;
|
|
||||||
case '\'': buff.push_back('\''); break;
|
|
||||||
case '\\': buff.push_back('\\'); break;
|
|
||||||
case 'n': buff.push_back('\n'); break;
|
|
||||||
case 'r': buff.push_back('\r'); break;
|
|
||||||
case 't': buff.push_back('\t'); break;
|
|
||||||
default: SyntaxError("invalid escape char");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
buff.push_back(c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Str(buff.data(), buff.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
void eat_string(char quote, StringType type) {
|
const Token& prev() { return tokens.at(i-1); }
|
||||||
Str s = eat_string_until(quote, type == RAW_STRING);
|
const Token& curr() { return tokens.at(i); }
|
||||||
if(type == F_STRING){
|
const Token& next() { return tokens.at(i+1); }
|
||||||
parser->set_next_token(TK("@fstr"), VAR(s));
|
const Token& peek(int offset=0) { return tokens.at(i+offset); }
|
||||||
}else{
|
void advance() { i++; }
|
||||||
parser->set_next_token(TK("@str"), VAR(s));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void eat_number() {
|
|
||||||
static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
|
|
||||||
std::smatch m;
|
|
||||||
|
|
||||||
const char* i = parser->token_start;
|
|
||||||
while(*i != '\n' && *i != '\0') i++;
|
|
||||||
std::string s = std::string(parser->token_start, i);
|
|
||||||
|
|
||||||
try{
|
|
||||||
if (std::regex_search(s, m, pattern)) {
|
|
||||||
// here is m.length()-1, since the first char was eaten by lex_token()
|
|
||||||
for(int j=0; j<m.length()-1; j++) parser->eatchar();
|
|
||||||
|
|
||||||
int base = 10;
|
|
||||||
size_t size;
|
|
||||||
if (m[1].matched) base = 16;
|
|
||||||
if (m[2].matched) {
|
|
||||||
if(base == 16) SyntaxError("hex literal should not contain a dot");
|
|
||||||
parser->set_next_token(TK("@num"), VAR(S_TO_FLOAT(m[0], &size)));
|
|
||||||
} else {
|
|
||||||
parser->set_next_token(TK("@num"), VAR(S_TO_INT(m[0], &size, base)));
|
|
||||||
}
|
|
||||||
if (size != m.length()) UNREACHABLE();
|
|
||||||
}
|
|
||||||
}catch(std::exception& _){
|
|
||||||
SyntaxError("invalid number literal");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void lex_token(){
|
|
||||||
lexing_count++;
|
|
||||||
_lex_token();
|
|
||||||
lexing_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lex the next token and set it as the next token.
|
|
||||||
void _lex_token() {
|
|
||||||
parser->prev = parser->curr;
|
|
||||||
parser->curr = parser->next_token();
|
|
||||||
//std::cout << parser->curr.info() << std::endl;
|
|
||||||
|
|
||||||
while (parser->peekchar() != '\0') {
|
|
||||||
parser->token_start = parser->curr_char;
|
|
||||||
char c = parser->eatchar_include_newline();
|
|
||||||
switch (c) {
|
|
||||||
case '\'': case '"': eat_string(c, NORMAL_STRING); return;
|
|
||||||
case '#': parser->skip_line_comment(); break;
|
|
||||||
case '{': parser->set_next_token(TK("{")); return;
|
|
||||||
case '}': parser->set_next_token(TK("}")); return;
|
|
||||||
case ',': parser->set_next_token(TK(",")); return;
|
|
||||||
case ':': parser->set_next_token_2(':', TK(":"), TK("::")); return;
|
|
||||||
case ';': parser->set_next_token(TK(";")); return;
|
|
||||||
case '(': parser->set_next_token(TK("(")); return;
|
|
||||||
case ')': parser->set_next_token(TK(")")); return;
|
|
||||||
case '[': parser->set_next_token(TK("[")); return;
|
|
||||||
case ']': parser->set_next_token(TK("]")); return;
|
|
||||||
case '@': parser->set_next_token(TK("@")); return;
|
|
||||||
case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
|
|
||||||
case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
|
|
||||||
case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
|
|
||||||
case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
|
|
||||||
case '?': parser->set_next_token(TK("?")); return;
|
|
||||||
case '.': {
|
|
||||||
if(parser->matchchar('.')) {
|
|
||||||
if(parser->matchchar('.')) {
|
|
||||||
parser->set_next_token(TK("..."));
|
|
||||||
} else {
|
|
||||||
SyntaxError("invalid token '..'");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
parser->set_next_token(TK("."));
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
|
|
||||||
case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
|
|
||||||
case '>': {
|
|
||||||
if(parser->matchchar('=')) parser->set_next_token(TK(">="));
|
|
||||||
else if(parser->matchchar('>')) parser->set_next_token_2('=', TK(">>"), TK(">>="));
|
|
||||||
else parser->set_next_token(TK(">"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case '<': {
|
|
||||||
if(parser->matchchar('=')) parser->set_next_token(TK("<="));
|
|
||||||
else if(parser->matchchar('<')) parser->set_next_token_2('=', TK("<<"), TK("<<="));
|
|
||||||
else parser->set_next_token(TK("<"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case '-': {
|
|
||||||
if(parser->matchchar('=')) parser->set_next_token(TK("-="));
|
|
||||||
else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
|
|
||||||
else parser->set_next_token(TK("-"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
case '!':
|
|
||||||
if(parser->matchchar('=')) parser->set_next_token(TK("!="));
|
|
||||||
else SyntaxError("expected '=' after '!'");
|
|
||||||
break;
|
|
||||||
case '*':
|
|
||||||
if (parser->matchchar('*')) {
|
|
||||||
parser->set_next_token(TK("**")); // '**'
|
|
||||||
} else {
|
|
||||||
parser->set_next_token_2('=', TK("*"), TK("*="));
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
case '/':
|
|
||||||
if(parser->matchchar('/')) {
|
|
||||||
parser->set_next_token_2('=', TK("//"), TK("//="));
|
|
||||||
} else {
|
|
||||||
parser->set_next_token_2('=', TK("/"), TK("/="));
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
case '\r': break; // just ignore '\r'
|
|
||||||
case ' ': case '\t': parser->eat_spaces(); break;
|
|
||||||
case '\n': {
|
|
||||||
parser->set_next_token(TK("@eol"));
|
|
||||||
if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
default: {
|
|
||||||
if(c == 'f'){
|
|
||||||
if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;}
|
|
||||||
if(parser->matchchar('"')) {eat_string('"', F_STRING); return;}
|
|
||||||
}else if(c == 'r'){
|
|
||||||
if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;}
|
|
||||||
if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c >= '0' && c <= '9') {
|
|
||||||
eat_number();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (parser->eat_name())
|
|
||||||
{
|
|
||||||
case 0: break;
|
|
||||||
case 1: SyntaxError("invalid char: " + std::string(1, c));
|
|
||||||
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
|
|
||||||
case 3: SyntaxError("@id contains invalid char"); break;
|
|
||||||
case 4: SyntaxError("invalid JSON token"); break;
|
|
||||||
default: UNREACHABLE();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
parser->token_start = parser->curr_char;
|
|
||||||
parser->set_next_token(TK("@eof"));
|
|
||||||
}
|
|
||||||
|
|
||||||
TokenIndex peek() {
|
|
||||||
return parser->curr.type;
|
|
||||||
}
|
|
||||||
|
|
||||||
// not sure this will work
|
|
||||||
TokenIndex peek_next() {
|
|
||||||
if(parser->nexts.empty()) return TK("@eof");
|
|
||||||
return parser->nexts.front().type;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool match(TokenIndex expected) {
|
bool match(TokenIndex expected) {
|
||||||
if (peek() != expected) return false;
|
if (curr().type != expected) return false;
|
||||||
lex_token();
|
advance();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void consume(TokenIndex expected) {
|
void consume(TokenIndex expected) {
|
||||||
if (!match(expected)){
|
if (!match(expected)){
|
||||||
StrStream ss;
|
StrStream ss;
|
||||||
ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
|
ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(curr().type) << "'";
|
||||||
SyntaxError(ss.str());
|
SyntaxError(ss.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool match_newlines(bool repl_throw=false) {
|
bool match_newlines(bool repl_throw=false) {
|
||||||
bool consumed = false;
|
bool consumed = false;
|
||||||
if (peek() == TK("@eol")) {
|
if (curr().type == TK("@eol")) {
|
||||||
while (peek() == TK("@eol")) lex_token();
|
while (curr().type == TK("@eol")) advance();
|
||||||
consumed = true;
|
consumed = true;
|
||||||
}
|
}
|
||||||
if (repl_throw && peek() == TK("@eof")){
|
if (repl_throw && curr().type == TK("@eof")){
|
||||||
throw NeedMoreLines(co()->_is_compiling_class);
|
throw NeedMoreLines(co()->_is_compiling_class);
|
||||||
}
|
}
|
||||||
return consumed;
|
return consumed;
|
||||||
@ -344,8 +138,8 @@ private:
|
|||||||
|
|
||||||
bool match_end_stmt() {
|
bool match_end_stmt() {
|
||||||
if (match(TK(";"))) { match_newlines(); return true; }
|
if (match(TK(";"))) { match_newlines(); return true; }
|
||||||
if (match_newlines() || peek()==TK("@eof")) return true;
|
if (match_newlines() || curr().type == TK("@eof")) return true;
|
||||||
if (peek() == TK("@dedent")) return true;
|
if (curr().type == TK("@dedent")) return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -353,15 +147,27 @@ private:
|
|||||||
if (!match_end_stmt()) SyntaxError("expected statement end");
|
if (!match_end_stmt()) SyntaxError("expected statement end");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject* get_value(const Token& token) {
|
||||||
|
switch (token.type) {
|
||||||
|
case TK("@num"):
|
||||||
|
if(std::holds_alternative<i64>(token.value)) return VAR(std::get<i64>(token.value));
|
||||||
|
if(std::holds_alternative<f64>(token.value)) return VAR(std::get<f64>(token.value));
|
||||||
|
UNREACHABLE();
|
||||||
|
case TK("@str"): case TK("@fstr"):
|
||||||
|
return VAR(std::get<Str>(token.value));
|
||||||
|
default: throw std::runtime_error(Str("invalid token type: ") + TK_STR(token.type));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void exprLiteral() {
|
void exprLiteral() {
|
||||||
PyObject* value = parser->prev.value;
|
PyObject* value = get_value(prev());
|
||||||
int index = co()->add_const(value);
|
int index = co()->add_const(value);
|
||||||
emit(OP_LOAD_CONST, index);
|
emit(OP_LOAD_CONST, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
void exprFString() {
|
void exprFString() {
|
||||||
static const std::regex pattern(R"(\{(.*?)\})");
|
static const std::regex pattern(R"(\{(.*?)\})");
|
||||||
PyObject* value = parser->prev.value;
|
PyObject* value = get_value(prev());
|
||||||
Str s = CAST(Str, value);
|
Str s = CAST(Str, value);
|
||||||
std::sregex_iterator begin(s.begin(), s.end(), pattern);
|
std::sregex_iterator begin(s.begin(), s.end(), pattern);
|
||||||
std::sregex_iterator end;
|
std::sregex_iterator end;
|
||||||
@ -395,7 +201,7 @@ private:
|
|||||||
_compile_f_args(func, false);
|
_compile_f_args(func, false);
|
||||||
consume(TK(":"));
|
consume(TK(":"));
|
||||||
}
|
}
|
||||||
func.code = make_sp<CodeObject>(parser->src, func.name.str());
|
func.code = make_sp<CodeObject>(lexer->src, func.name.str());
|
||||||
this->codes.push(func.code);
|
this->codes.push(func.code);
|
||||||
co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
|
co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
|
||||||
emit(OP_RETURN_VALUE);
|
emit(OP_RETURN_VALUE);
|
||||||
@ -414,7 +220,7 @@ private:
|
|||||||
if(is_load_name_ref) co()->codes.pop_back();
|
if(is_load_name_ref) co()->codes.pop_back();
|
||||||
|
|
||||||
co()->_rvalue += 1;
|
co()->_rvalue += 1;
|
||||||
TokenIndex op = parser->prev.type;
|
TokenIndex op = prev().type;
|
||||||
if(op == TK("=")) { // a = (expr)
|
if(op == TK("=")) { // a = (expr)
|
||||||
EXPR_TUPLE();
|
EXPR_TUPLE();
|
||||||
if(is_load_name_ref){
|
if(is_load_name_ref){
|
||||||
@ -487,7 +293,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void exprBinaryOp() {
|
void exprBinaryOp() {
|
||||||
TokenIndex op = parser->prev.type;
|
TokenIndex op = prev().type;
|
||||||
parse_expression((Precedence)(rules[op].precedence + 1));
|
parse_expression((Precedence)(rules[op].precedence + 1));
|
||||||
|
|
||||||
switch (op) {
|
switch (op) {
|
||||||
@ -525,7 +331,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void exprUnaryOp() {
|
void exprUnaryOp() {
|
||||||
TokenIndex op = parser->prev.type;
|
TokenIndex op = prev().type;
|
||||||
parse_expression((Precedence)(PREC_UNARY + 1));
|
parse_expression((Precedence)(PREC_UNARY + 1));
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case TK("-"): emit(OP_UNARY_NEGATIVE); break;
|
case TK("-"): emit(OP_UNARY_NEGATIVE); break;
|
||||||
@ -588,7 +394,7 @@ private:
|
|||||||
int ARGC = 0;
|
int ARGC = 0;
|
||||||
do {
|
do {
|
||||||
match_newlines(mode()==REPL_MODE);
|
match_newlines(mode()==REPL_MODE);
|
||||||
if (peek() == TK("]")) break;
|
if (curr().type == TK("]")) break;
|
||||||
EXPR(); ARGC++;
|
EXPR(); ARGC++;
|
||||||
match_newlines(mode()==REPL_MODE);
|
match_newlines(mode()==REPL_MODE);
|
||||||
if(ARGC == 1 && match(TK("for"))){
|
if(ARGC == 1 && match(TK("for"))){
|
||||||
@ -609,9 +415,9 @@ private:
|
|||||||
int ARGC = 0;
|
int ARGC = 0;
|
||||||
do {
|
do {
|
||||||
match_newlines(mode()==REPL_MODE);
|
match_newlines(mode()==REPL_MODE);
|
||||||
if (peek() == TK("}")) break;
|
if (curr().type == TK("}")) break;
|
||||||
EXPR();
|
EXPR();
|
||||||
if(peek() == TK(":")) parsing_dict = true;
|
if(curr().type == TK(":")) parsing_dict = true;
|
||||||
if(parsing_dict){
|
if(parsing_dict){
|
||||||
consume(TK(":"));
|
consume(TK(":"));
|
||||||
EXPR();
|
EXPR();
|
||||||
@ -637,10 +443,10 @@ private:
|
|||||||
bool need_unpack = false;
|
bool need_unpack = false;
|
||||||
do {
|
do {
|
||||||
match_newlines(mode()==REPL_MODE);
|
match_newlines(mode()==REPL_MODE);
|
||||||
if (peek() == TK(")")) break;
|
if (curr().type == TK(")")) break;
|
||||||
if(peek() == TK("@id") && peek_next() == TK("=")) {
|
if(curr().type == TK("@id") && next().type == TK("=")) {
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
const Str& key = parser->prev.str();
|
const Str& key = prev().str();
|
||||||
emit(OP_LOAD_CONST, co()->add_const(VAR(key)));
|
emit(OP_LOAD_CONST, co()->add_const(VAR(key)));
|
||||||
consume(TK("="));
|
consume(TK("="));
|
||||||
co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
|
co()->_rvalue += 1; EXPR(); co()->_rvalue -= 1;
|
||||||
@ -666,7 +472,7 @@ private:
|
|||||||
void exprName(){ _exprName(false); }
|
void exprName(){ _exprName(false); }
|
||||||
|
|
||||||
void _exprName(bool force_lvalue) {
|
void _exprName(bool force_lvalue) {
|
||||||
Token tkname = parser->prev;
|
const Token& tkname = prev();
|
||||||
int index = co()->add_name(tkname.str(), name_scope());
|
int index = co()->add_name(tkname.str(), name_scope());
|
||||||
bool fast_load = !force_lvalue && co()->_rvalue>0;
|
bool fast_load = !force_lvalue && co()->_rvalue>0;
|
||||||
emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
|
emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
|
||||||
@ -674,7 +480,7 @@ private:
|
|||||||
|
|
||||||
void exprAttrib() {
|
void exprAttrib() {
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
const Str& name = parser->prev.str();
|
const Str& name = prev().str();
|
||||||
int index = co()->add_name(name, NAME_ATTR);
|
int index = co()->add_name(name, NAME_ATTR);
|
||||||
emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index);
|
emit(co()->_rvalue ? OP_BUILD_ATTR : OP_BUILD_ATTR_REF, index);
|
||||||
}
|
}
|
||||||
@ -710,7 +516,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void exprValue() {
|
void exprValue() {
|
||||||
TokenIndex op = parser->prev.type;
|
TokenIndex op = prev().type;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case TK("None"): emit(OP_LOAD_NONE); break;
|
case TK("None"): emit(OP_LOAD_NONE); break;
|
||||||
case TK("True"): emit(OP_LOAD_TRUE); break;
|
case TK("True"): emit(OP_LOAD_TRUE); break;
|
||||||
@ -721,7 +527,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
int emit(Opcode opcode, int arg=-1, bool keepline=false) {
|
int emit(Opcode opcode, int arg=-1, bool keepline=false) {
|
||||||
int line = parser->prev.line;
|
int line = prev().line;
|
||||||
co()->codes.push_back(
|
co()->codes.push_back(
|
||||||
Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line}
|
Bytecode{(uint8_t)opcode, (uint16_t)co()->_curr_block_i, arg, line}
|
||||||
);
|
);
|
||||||
@ -738,7 +544,7 @@ private:
|
|||||||
void compile_block_body(CompilerAction action=nullptr) {
|
void compile_block_body(CompilerAction action=nullptr) {
|
||||||
if(action == nullptr) action = &Compiler::compile_stmt;
|
if(action == nullptr) action = &Compiler::compile_stmt;
|
||||||
consume(TK(":"));
|
consume(TK(":"));
|
||||||
if(peek()!=TK("@eol") && peek()!=TK("@eof")){
|
if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
|
||||||
(this->*action)(); // inline block
|
(this->*action)(); // inline block
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -746,7 +552,7 @@ private:
|
|||||||
SyntaxError("expected a new line after ':'");
|
SyntaxError("expected a new line after ':'");
|
||||||
}
|
}
|
||||||
consume(TK("@indent"));
|
consume(TK("@indent"));
|
||||||
while (peek() != TK("@dedent")) {
|
while (curr().type != TK("@dedent")) {
|
||||||
match_newlines();
|
match_newlines();
|
||||||
(this->*action)();
|
(this->*action)();
|
||||||
match_newlines();
|
match_newlines();
|
||||||
@ -756,7 +562,7 @@ private:
|
|||||||
|
|
||||||
Token _compile_import() {
|
Token _compile_import() {
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
Token tkmodule = parser->prev;
|
Token tkmodule = prev();
|
||||||
int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
|
int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
|
||||||
emit(OP_IMPORT_NAME, index);
|
emit(OP_IMPORT_NAME, index);
|
||||||
return tkmodule;
|
return tkmodule;
|
||||||
@ -768,7 +574,7 @@ private:
|
|||||||
Token tkmodule = _compile_import();
|
Token tkmodule = _compile_import();
|
||||||
if (match(TK("as"))) {
|
if (match(TK("as"))) {
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
tkmodule = parser->prev;
|
tkmodule = prev();
|
||||||
}
|
}
|
||||||
int index = co()->add_name(tkmodule.str(), name_scope());
|
int index = co()->add_name(tkmodule.str(), name_scope());
|
||||||
emit(OP_STORE_NAME, index);
|
emit(OP_STORE_NAME, index);
|
||||||
@ -789,12 +595,12 @@ private:
|
|||||||
do {
|
do {
|
||||||
emit(OP_DUP_TOP_VALUE);
|
emit(OP_DUP_TOP_VALUE);
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
Token tkname = parser->prev;
|
Token tkname = prev();
|
||||||
int index = co()->add_name(tkname.str(), NAME_ATTR);
|
int index = co()->add_name(tkname.str(), NAME_ATTR);
|
||||||
emit(OP_BUILD_ATTR, index);
|
emit(OP_BUILD_ATTR, index);
|
||||||
if (match(TK("as"))) {
|
if (match(TK("as"))) {
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
tkname = parser->prev;
|
tkname = prev();
|
||||||
}
|
}
|
||||||
index = co()->add_name(tkname.str(), name_scope());
|
index = co()->add_name(tkname.str(), name_scope());
|
||||||
emit(OP_STORE_NAME, index);
|
emit(OP_STORE_NAME, index);
|
||||||
@ -807,14 +613,14 @@ private:
|
|||||||
// ['a', '1', '2', '+', '=']
|
// ['a', '1', '2', '+', '=']
|
||||||
//
|
//
|
||||||
void parse_expression(Precedence precedence) {
|
void parse_expression(Precedence precedence) {
|
||||||
lex_token();
|
advance();
|
||||||
GrammarFn prefix = rules[parser->prev.type].prefix;
|
GrammarFn prefix = rules[prev().type].prefix;
|
||||||
if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type));
|
if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(prev().type));
|
||||||
(this->*prefix)();
|
(this->*prefix)();
|
||||||
bool meet_assign_token = false;
|
bool meet_assign_token = false;
|
||||||
while (rules[peek()].precedence >= precedence) {
|
while (rules[curr().type].precedence >= precedence) {
|
||||||
lex_token();
|
advance();
|
||||||
TokenIndex op = parser->prev.type;
|
TokenIndex op = prev().type;
|
||||||
if (op == TK("=")){
|
if (op == TK("=")){
|
||||||
if(meet_assign_token) SyntaxError();
|
if(meet_assign_token) SyntaxError();
|
||||||
meet_assign_token = true;
|
meet_assign_token = true;
|
||||||
@ -891,7 +697,7 @@ private:
|
|||||||
do {
|
do {
|
||||||
consume(TK("except"));
|
consume(TK("except"));
|
||||||
if(match(TK("@id"))){
|
if(match(TK("@id"))){
|
||||||
int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
|
int name_idx = co()->add_name(prev().str(), NAME_SPECIAL);
|
||||||
emit(OP_EXCEPTION_MATCH, name_idx);
|
emit(OP_EXCEPTION_MATCH, name_idx);
|
||||||
}else{
|
}else{
|
||||||
emit(OP_LOAD_TRUE);
|
emit(OP_LOAD_TRUE);
|
||||||
@ -901,7 +707,7 @@ private:
|
|||||||
compile_block_body();
|
compile_block_body();
|
||||||
patches.push_back(emit(OP_JUMP_ABSOLUTE));
|
patches.push_back(emit(OP_JUMP_ABSOLUTE));
|
||||||
patch_jump(patch);
|
patch_jump(patch);
|
||||||
}while(peek() == TK("except"));
|
}while(curr().type == TK("except"));
|
||||||
emit(OP_RE_RAISE); // no match, re-raise
|
emit(OP_RE_RAISE); // no match, re-raise
|
||||||
for (int patch : patches) patch_jump(patch);
|
for (int patch : patches) patch_jump(patch);
|
||||||
}
|
}
|
||||||
@ -968,7 +774,7 @@ private:
|
|||||||
EXPR();
|
EXPR();
|
||||||
consume(TK("as"));
|
consume(TK("as"));
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
Token tkname = parser->prev;
|
Token tkname = prev();
|
||||||
int index = co()->add_name(tkname.str(), name_scope());
|
int index = co()->add_name(tkname.str(), name_scope());
|
||||||
emit(OP_STORE_NAME, index);
|
emit(OP_STORE_NAME, index);
|
||||||
emit(OP_LOAD_NAME_REF, index);
|
emit(OP_LOAD_NAME_REF, index);
|
||||||
@ -979,18 +785,18 @@ private:
|
|||||||
} else if(match(TK("label"))){
|
} else if(match(TK("label"))){
|
||||||
if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
|
if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
|
||||||
consume(TK(".")); consume(TK("@id"));
|
consume(TK(".")); consume(TK("@id"));
|
||||||
Str label = parser->prev.str();
|
Str label = prev().str();
|
||||||
bool ok = co()->add_label(label);
|
bool ok = co()->add_label(label);
|
||||||
if(!ok) SyntaxError("label '" + label + "' already exists");
|
if(!ok) SyntaxError("label '" + label + "' already exists");
|
||||||
consume_end_stmt();
|
consume_end_stmt();
|
||||||
} else if(match(TK("goto"))){ // https://entrian.com/goto/
|
} else if(match(TK("goto"))){ // https://entrian.com/goto/
|
||||||
if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
|
if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
|
||||||
consume(TK(".")); consume(TK("@id"));
|
consume(TK(".")); consume(TK("@id"));
|
||||||
emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
|
emit(OP_GOTO, co()->add_name(prev().str(), NAME_SPECIAL));
|
||||||
consume_end_stmt();
|
consume_end_stmt();
|
||||||
} else if(match(TK("raise"))){
|
} else if(match(TK("raise"))){
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
|
int dummy_t = co()->add_name(prev().str(), NAME_SPECIAL);
|
||||||
if(match(TK("(")) && !match(TK(")"))){
|
if(match(TK("(")) && !match(TK(")"))){
|
||||||
EXPR(); consume(TK(")"));
|
EXPR(); consume(TK(")"));
|
||||||
}else{
|
}else{
|
||||||
@ -1005,7 +811,7 @@ private:
|
|||||||
} else if(match(TK("global"))){
|
} else if(match(TK("global"))){
|
||||||
do {
|
do {
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
co()->global_names[parser->prev.str()] = 1;
|
co()->global_names[prev().str()] = 1;
|
||||||
} while (match(TK(",")));
|
} while (match(TK(",")));
|
||||||
consume_end_stmt();
|
consume_end_stmt();
|
||||||
} else if(match(TK("pass"))){
|
} else if(match(TK("pass"))){
|
||||||
@ -1030,10 +836,10 @@ private:
|
|||||||
|
|
||||||
void compile_class(){
|
void compile_class(){
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
|
int cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
|
||||||
int super_cls_name_idx = -1;
|
int super_cls_name_idx = -1;
|
||||||
if(match(TK("(")) && match(TK("@id"))){
|
if(match(TK("(")) && match(TK("@id"))){
|
||||||
super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
|
super_cls_name_idx = co()->add_name(prev().str(), NAME_GLOBAL);
|
||||||
consume(TK(")"));
|
consume(TK(")"));
|
||||||
}
|
}
|
||||||
if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
|
if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
|
||||||
@ -1059,13 +865,13 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
const Str& name = parser->prev.str();
|
const Str& name = prev().str();
|
||||||
if(func.has_name(name)) SyntaxError("duplicate argument name");
|
if(func.has_name(name)) SyntaxError("duplicate argument name");
|
||||||
|
|
||||||
// eat type hints
|
// eat type hints
|
||||||
if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
|
if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
|
||||||
|
|
||||||
if(state == 0 && peek() == TK("=")) state = 2;
|
if(state == 0 && curr().type == TK("=")) state = 2;
|
||||||
|
|
||||||
switch (state)
|
switch (state)
|
||||||
{
|
{
|
||||||
@ -1075,7 +881,7 @@ private:
|
|||||||
consume(TK("="));
|
consume(TK("="));
|
||||||
PyObject* value = read_literal();
|
PyObject* value = read_literal();
|
||||||
if(value == nullptr){
|
if(value == nullptr){
|
||||||
SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type));
|
SyntaxError(Str("expect a literal, not ") + TK_STR(curr().type));
|
||||||
}
|
}
|
||||||
func.kwargs.set(name, value);
|
func.kwargs.set(name, value);
|
||||||
func.kwargs_order.push_back(name);
|
func.kwargs_order.push_back(name);
|
||||||
@ -1090,11 +896,11 @@ private:
|
|||||||
Function func;
|
Function func;
|
||||||
StrName obj_name;
|
StrName obj_name;
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
func.name = parser->prev.str();
|
func.name = prev().str();
|
||||||
if(!co()->_is_compiling_class && match(TK("::"))){
|
if(!co()->_is_compiling_class && match(TK("::"))){
|
||||||
consume(TK("@id"));
|
consume(TK("@id"));
|
||||||
obj_name = func.name;
|
obj_name = func.name;
|
||||||
func.name = parser->prev.str();
|
func.name = prev().str();
|
||||||
}
|
}
|
||||||
consume(TK("("));
|
consume(TK("("));
|
||||||
if (!match(TK(")"))) {
|
if (!match(TK(")"))) {
|
||||||
@ -1104,7 +910,7 @@ private:
|
|||||||
if(match(TK("->"))){
|
if(match(TK("->"))){
|
||||||
if(!match(TK("None"))) consume(TK("@id"));
|
if(!match(TK("None"))) consume(TK("@id"));
|
||||||
}
|
}
|
||||||
func.code = make_sp<CodeObject>(parser->src, func.name.str());
|
func.code = make_sp<CodeObject>(lexer->src, func.name.str());
|
||||||
this->codes.push(func.code);
|
this->codes.push(func.code);
|
||||||
compile_block_body();
|
compile_block_body();
|
||||||
func.code->optimize(vm);
|
func.code->optimize(vm);
|
||||||
@ -1132,11 +938,11 @@ private:
|
|||||||
PyObject* read_literal(){
|
PyObject* read_literal(){
|
||||||
if(match(TK("-"))){
|
if(match(TK("-"))){
|
||||||
consume(TK("@num"));
|
consume(TK("@num"));
|
||||||
PyObject* val = parser->prev.value;
|
PyObject* val = get_value(prev());
|
||||||
return vm->num_negated(val);
|
return vm->num_negated(val);
|
||||||
}
|
}
|
||||||
if(match(TK("@num"))) return parser->prev.value;
|
if(match(TK("@num"))) return get_value(prev());
|
||||||
if(match(TK("@str"))) return parser->prev.value;
|
if(match(TK("@str"))) return get_value(prev());
|
||||||
if(match(TK("True"))) return VAR(true);
|
if(match(TK("True"))) return VAR(true);
|
||||||
if(match(TK("False"))) return VAR(false);
|
if(match(TK("False"))) return VAR(false);
|
||||||
if(match(TK("None"))) return vm->None;
|
if(match(TK("None"))) return vm->None;
|
||||||
@ -1144,23 +950,8 @@ private:
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/***** Error Reporter *****/
|
void SyntaxError(Str msg){ lexer->throw_err("SyntaxError", msg, curr().line, curr().start); }
|
||||||
void throw_err(Str type, Str msg){
|
void SyntaxError(){ lexer->throw_err("SyntaxError", "invalid syntax", curr().line, curr().start); }
|
||||||
int lineno = parser->curr.line;
|
|
||||||
const char* cursor = parser->curr.start;
|
|
||||||
// if error occurs in lexing, lineno should be `parser->current_line`
|
|
||||||
if(lexing_count > 0){
|
|
||||||
lineno = parser->current_line;
|
|
||||||
cursor = parser->curr_char;
|
|
||||||
}
|
|
||||||
if(parser->peekchar() == '\n') lineno--;
|
|
||||||
auto e = Exception("SyntaxError", msg);
|
|
||||||
e.st_push(parser->src->snapshot(lineno, cursor));
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
|
|
||||||
void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
|
|
||||||
void IndentationError(Str msg){ throw_err("IndentationError", msg); }
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CodeObject_ compile(){
|
CodeObject_ compile(){
|
||||||
@ -1168,11 +959,16 @@ public:
|
|||||||
if(used) UNREACHABLE();
|
if(used) UNREACHABLE();
|
||||||
used = true;
|
used = true;
|
||||||
|
|
||||||
CodeObject_ code = make_sp<CodeObject>(parser->src, Str("<module>"));
|
tokens = lexer->run();
|
||||||
|
// if(lexer->src->filename == "tests/01_int.py"){
|
||||||
|
// for(auto& t: tokens) std::cout << t.info() << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
|
CodeObject_ code = make_sp<CodeObject>(lexer->src, lexer->src->filename);
|
||||||
codes.push(code);
|
codes.push(code);
|
||||||
|
|
||||||
lex_token(); lex_token();
|
advance(); // skip @sof, so prev() is always valid
|
||||||
match_newlines();
|
match_newlines(); // skip leading '\n'
|
||||||
|
|
||||||
if(mode()==EVAL_MODE) {
|
if(mode()==EVAL_MODE) {
|
||||||
EXPR_TUPLE();
|
EXPR_TUPLE();
|
||||||
|
108
src/expr.h
Normal file
108
src/expr.h
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "codeobject.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "parser.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "ceval.h"
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace pkpy{
|
||||||
|
|
||||||
|
struct Expression;
|
||||||
|
typedef std::unique_ptr<Expression> Expression_;
|
||||||
|
|
||||||
|
struct Expression{
|
||||||
|
std::vector<Expression_> children;
|
||||||
|
virtual Str to_string() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NameExpr: Expression{
|
||||||
|
Str name;
|
||||||
|
NameScope scope;
|
||||||
|
NameExpr(Str name, NameScope scope): name(name), scope(scope) {}
|
||||||
|
Str to_string() const override { return name; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GroupExpr: Expression{
|
||||||
|
Expression_ expr;
|
||||||
|
GroupExpr(Expression_ expr): expr(std::move(expr)) {}
|
||||||
|
Str to_string() const override { return "()"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct UnaryExpr: Expression{
|
||||||
|
TokenIndex op;
|
||||||
|
UnaryExpr(TokenIndex op): op(op) {}
|
||||||
|
Str to_string() const override { return TK_STR(op); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NotExpr: Expression{
|
||||||
|
Str to_string() const override { return "not"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AndExpr: Expression{
|
||||||
|
Str to_string() const override { return "and"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OrExpr: Expression{
|
||||||
|
Str to_string() const override { return "or"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// None, True, False, ...
|
||||||
|
struct SpecialValueExpr: Expression{
|
||||||
|
TokenIndex token;
|
||||||
|
SpecialValueExpr(TokenIndex token): token(token) {}
|
||||||
|
Str to_string() const override { return TK_STR(token); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// @num, @str which needs to invoke OP_LOAD_CONST
|
||||||
|
struct LiteralExpr: Expression{
|
||||||
|
PyObject* value;
|
||||||
|
LiteralExpr(PyObject* value): value(value) {}
|
||||||
|
Str to_string() const override { return "literal"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ListExpr: Expression{
|
||||||
|
Str to_string() const override { return "[]"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DictExpr: Expression{
|
||||||
|
Str to_string() const override { return "{}"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LambdaExpr: Expression{
|
||||||
|
Str to_string() const override { return "lambda"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FStringExpr: Expression{
|
||||||
|
Str to_string() const override { return "@fstr"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AttribExpr: Expression{
|
||||||
|
Str to_string() const override { return "."; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CallExpr: Expression{
|
||||||
|
Str to_string() const override { return "()"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BinaryExpr: Expression{
|
||||||
|
TokenIndex op;
|
||||||
|
BinaryExpr(TokenIndex op): op(op) {}
|
||||||
|
Str to_string() const override { return TK_STR(op); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TernaryExpr: Expression{
|
||||||
|
Str to_string() const override { return "?"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AssignExpr: Expression{
|
||||||
|
Str to_string() const override { return "="; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CommaExpr: Expression{
|
||||||
|
Str to_string() const override { return ","; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace pkpy
|
@ -58,7 +58,7 @@ struct Frame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
PyObject* pop(){
|
PyObject* pop(){
|
||||||
#if PK_EXTRA_CHECK
|
#if DEBUG_EXTRA_CHECK
|
||||||
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
||||||
#endif
|
#endif
|
||||||
PyObject* v = _data.back();
|
PyObject* v = _data.back();
|
||||||
@ -67,7 +67,7 @@ struct Frame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void _pop(){
|
void _pop(){
|
||||||
#if PK_EXTRA_CHECK
|
#if DEBUG_EXTRA_CHECK
|
||||||
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
||||||
#endif
|
#endif
|
||||||
_data.pop_back();
|
_data.pop_back();
|
||||||
@ -88,14 +88,14 @@ struct Frame {
|
|||||||
}
|
}
|
||||||
|
|
||||||
PyObject*& top(){
|
PyObject*& top(){
|
||||||
#if PK_EXTRA_CHECK
|
#if DEBUG_EXTRA_CHECK
|
||||||
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
if(_data.empty()) throw std::runtime_error("_data.empty() is true");
|
||||||
#endif
|
#endif
|
||||||
return _data.back();
|
return _data.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject*& top_1(){
|
PyObject*& top_1(){
|
||||||
#if PK_EXTRA_CHECK
|
#if DEBUG_EXTRA_CHECK
|
||||||
if(_data.size() < 2) throw std::runtime_error("_data.size() < 2");
|
if(_data.size() < 2) throw std::runtime_error("_data.size() < 2");
|
||||||
#endif
|
#endif
|
||||||
return _data[_data.size()-2];
|
return _data[_data.size()-2];
|
||||||
|
6
src/gc.h
6
src/gc.h
@ -67,9 +67,9 @@ struct ManagedHeap{
|
|||||||
|
|
||||||
~ManagedHeap(){
|
~ManagedHeap(){
|
||||||
for(PyObject* obj: _no_gc) delete obj;
|
for(PyObject* obj: _no_gc) delete obj;
|
||||||
for(auto& [type, count]: deleted){
|
// for(auto& [type, count]: deleted){
|
||||||
std::cout << "GC: " << type << "=" << count << std::endl;
|
// std::cout << "GC: " << type << "=" << count << std::endl;
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
int sweep(VM* vm){
|
int sweep(VM* vm){
|
||||||
|
510
src/lexer.h
Normal file
510
src/lexer.h
Normal file
@ -0,0 +1,510 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "error.h"
|
||||||
|
#include "str.h"
|
||||||
|
|
||||||
|
namespace pkpy{
|
||||||
|
|
||||||
|
typedef uint8_t TokenIndex;
|
||||||
|
|
||||||
|
constexpr const char* kTokens[] = {
|
||||||
|
"@eof", "@eol", "@sof",
|
||||||
|
".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
|
||||||
|
"+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
|
||||||
|
"<<", ">>", "&", "|", "^", "?", "@",
|
||||||
|
"==", "!=", ">=", "<=",
|
||||||
|
"+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
|
||||||
|
/** KW_BEGIN **/
|
||||||
|
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
|
||||||
|
"None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
|
||||||
|
"goto", "label", // extended keywords, not available in cpython
|
||||||
|
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
|
||||||
|
/** KW_END **/
|
||||||
|
"is not", "not in",
|
||||||
|
"@id", "@num", "@str", "@fstr",
|
||||||
|
"@indent", "@dedent"
|
||||||
|
};
|
||||||
|
|
||||||
|
using TokenValue = std::variant<std::monostate, i64, f64, Str>;
|
||||||
|
const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
|
||||||
|
|
||||||
|
constexpr TokenIndex TK(const char token[]) {
|
||||||
|
for(int k=0; k<kTokenCount; k++){
|
||||||
|
const char* i = kTokens[k];
|
||||||
|
const char* j = token;
|
||||||
|
while(*i && *j && *i == *j) { i++; j++;}
|
||||||
|
if(*i == *j) return k;
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
#define TK_STR(t) kTokens[t]
|
||||||
|
const TokenIndex kTokenKwBegin = TK("class");
|
||||||
|
const TokenIndex kTokenKwEnd = TK("raise");
|
||||||
|
|
||||||
|
const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
|
||||||
|
std::map<std::string_view, TokenIndex> map;
|
||||||
|
for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
|
||||||
|
return map;
|
||||||
|
}();
|
||||||
|
|
||||||
|
|
||||||
|
struct Token{
|
||||||
|
TokenIndex type;
|
||||||
|
const char* start;
|
||||||
|
int length;
|
||||||
|
int line;
|
||||||
|
TokenValue value;
|
||||||
|
|
||||||
|
Str str() const { return Str(start, length);}
|
||||||
|
|
||||||
|
Str info() const {
|
||||||
|
StrStream ss;
|
||||||
|
Str raw = str();
|
||||||
|
if (raw == Str("\n")) raw = "\\n";
|
||||||
|
ss << line << ": " << TK_STR(type) << " '" << raw << "'";
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// https://docs.python.org/3/reference/expressions.html
|
||||||
|
enum Precedence {
|
||||||
|
PREC_NONE,
|
||||||
|
PREC_ASSIGNMENT, // =
|
||||||
|
PREC_COMMA, // ,
|
||||||
|
PREC_TERNARY, // ?:
|
||||||
|
PREC_LOGICAL_OR, // or
|
||||||
|
PREC_LOGICAL_AND, // and
|
||||||
|
PREC_LOGICAL_NOT, // not
|
||||||
|
PREC_EQUALITY, // == !=
|
||||||
|
PREC_TEST, // in / is / is not / not in
|
||||||
|
PREC_COMPARISION, // < > <= >=
|
||||||
|
PREC_BITWISE_OR, // |
|
||||||
|
PREC_BITWISE_XOR, // ^
|
||||||
|
PREC_BITWISE_AND, // &
|
||||||
|
PREC_BITWISE_SHIFT, // << >>
|
||||||
|
PREC_TERM, // + -
|
||||||
|
PREC_FACTOR, // * / % //
|
||||||
|
PREC_UNARY, // - not
|
||||||
|
PREC_EXPONENT, // **
|
||||||
|
PREC_CALL, // ()
|
||||||
|
PREC_SUBSCRIPT, // []
|
||||||
|
PREC_ATTRIB, // .index
|
||||||
|
PREC_PRIMARY,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
|
||||||
|
|
||||||
|
struct Lexer {
|
||||||
|
shared_ptr<SourceData> src;
|
||||||
|
const char* token_start;
|
||||||
|
const char* curr_char;
|
||||||
|
int current_line = 1;
|
||||||
|
std::vector<Token> nexts;
|
||||||
|
stack<int> indents;
|
||||||
|
int brackets_level = 0;
|
||||||
|
bool used = false;
|
||||||
|
|
||||||
|
char peekchar() const{ return *curr_char; }
|
||||||
|
|
||||||
|
bool match_n_chars(int n, char c0){
|
||||||
|
const char* c = curr_char;
|
||||||
|
for(int i=0; i<n; i++){
|
||||||
|
if(*c == '\0') return false;
|
||||||
|
if(*c != c0) return false;
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
for(int i=0; i<n; i++) eatchar_include_newline();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int eat_spaces(){
|
||||||
|
int count = 0;
|
||||||
|
while (true) {
|
||||||
|
switch (peekchar()) {
|
||||||
|
case ' ' : count+=1; break;
|
||||||
|
case '\t': count+=4; break;
|
||||||
|
default: return count;
|
||||||
|
}
|
||||||
|
eatchar();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool eat_indentation(){
|
||||||
|
if(brackets_level > 0) return true;
|
||||||
|
int spaces = eat_spaces();
|
||||||
|
if(peekchar() == '#') skip_line_comment();
|
||||||
|
if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
|
||||||
|
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
|
||||||
|
if(spaces > indents.top()){
|
||||||
|
indents.push(spaces);
|
||||||
|
nexts.push_back(Token{TK("@indent"), token_start, 0, current_line});
|
||||||
|
} else if(spaces < indents.top()){
|
||||||
|
while(spaces < indents.top()){
|
||||||
|
indents.pop();
|
||||||
|
nexts.push_back(Token{TK("@dedent"), token_start, 0, current_line});
|
||||||
|
}
|
||||||
|
if(spaces != indents.top()){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
char eatchar() {
|
||||||
|
char c = peekchar();
|
||||||
|
if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
|
||||||
|
curr_char++;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
char eatchar_include_newline() {
|
||||||
|
char c = peekchar();
|
||||||
|
curr_char++;
|
||||||
|
if (c == '\n'){
|
||||||
|
current_line++;
|
||||||
|
src->line_starts.push_back(curr_char);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
int eat_name() {
|
||||||
|
curr_char--;
|
||||||
|
while(true){
|
||||||
|
uint8_t c = peekchar();
|
||||||
|
int u8bytes = 0;
|
||||||
|
if((c & 0b10000000) == 0b00000000) u8bytes = 1;
|
||||||
|
else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
|
||||||
|
else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
|
||||||
|
else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
|
||||||
|
else return 1;
|
||||||
|
if(u8bytes == 1){
|
||||||
|
if(isalpha(c) || c=='_' || isdigit(c)) {
|
||||||
|
curr_char++;
|
||||||
|
continue;
|
||||||
|
}else{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// handle multibyte char
|
||||||
|
std::string u8str(curr_char, u8bytes);
|
||||||
|
if(u8str.size() != u8bytes) return 2;
|
||||||
|
uint32_t value = 0;
|
||||||
|
for(int k=0; k < u8bytes; k++){
|
||||||
|
uint8_t b = u8str[k];
|
||||||
|
if(k==0){
|
||||||
|
if(u8bytes == 2) value = (b & 0b00011111) << 6;
|
||||||
|
else if(u8bytes == 3) value = (b & 0b00001111) << 12;
|
||||||
|
else if(u8bytes == 4) value = (b & 0b00000111) << 18;
|
||||||
|
}else{
|
||||||
|
value |= (b & 0b00111111) << (6*(u8bytes-k-1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(is_unicode_Lo_char(value)) curr_char += u8bytes;
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int length = (int)(curr_char - token_start);
|
||||||
|
if(length == 0) return 3;
|
||||||
|
std::string_view name(token_start, length);
|
||||||
|
|
||||||
|
if(src->mode == JSON_MODE){
|
||||||
|
if(name == "true"){
|
||||||
|
add_token(TK("True"));
|
||||||
|
} else if(name == "false"){
|
||||||
|
add_token(TK("False"));
|
||||||
|
} else if(name == "null"){
|
||||||
|
add_token(TK("None"));
|
||||||
|
} else {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(kTokenKwMap.count(name)){
|
||||||
|
if(name == "not"){
|
||||||
|
if(strncmp(curr_char, " in", 3) == 0){
|
||||||
|
curr_char += 3;
|
||||||
|
add_token(TK("not in"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}else if(name == "is"){
|
||||||
|
if(strncmp(curr_char, " not", 4) == 0){
|
||||||
|
curr_char += 4;
|
||||||
|
add_token(TK("is not"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
add_token(kTokenKwMap.at(name));
|
||||||
|
} else {
|
||||||
|
add_token(TK("@id"));
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void skip_line_comment() {
|
||||||
|
char c;
|
||||||
|
while ((c = peekchar()) != '\0') {
|
||||||
|
if (c == '\n') return;
|
||||||
|
eatchar();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool matchchar(char c) {
|
||||||
|
if (peekchar() != c) return false;
|
||||||
|
eatchar_include_newline();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_token(TokenIndex type, TokenValue value={}) {
|
||||||
|
switch(type){
|
||||||
|
case TK("{"): case TK("["): case TK("("): brackets_level++; break;
|
||||||
|
case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
|
||||||
|
}
|
||||||
|
nexts.push_back( Token{
|
||||||
|
type,
|
||||||
|
token_start,
|
||||||
|
(int)(curr_char - token_start),
|
||||||
|
current_line - ((type == TK("@eol")) ? 1 : 0),
|
||||||
|
value
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_token_2(char c, TokenIndex one, TokenIndex two) {
|
||||||
|
if (matchchar(c)) add_token(two);
|
||||||
|
else add_token(one);
|
||||||
|
}
|
||||||
|
|
||||||
|
Str eat_string_until(char quote, bool raw) {
|
||||||
|
bool quote3 = match_n_chars(2, quote);
|
||||||
|
std::vector<char> buff;
|
||||||
|
while (true) {
|
||||||
|
char c = eatchar_include_newline();
|
||||||
|
if (c == quote){
|
||||||
|
if(quote3 && !match_n_chars(2, quote)){
|
||||||
|
buff.push_back(c);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (c == '\0'){
|
||||||
|
if(quote3 && src->mode == REPL_MODE){
|
||||||
|
throw NeedMoreLines(false);
|
||||||
|
}
|
||||||
|
SyntaxError("EOL while scanning string literal");
|
||||||
|
}
|
||||||
|
if (c == '\n'){
|
||||||
|
if(!quote3) SyntaxError("EOL while scanning string literal");
|
||||||
|
else{
|
||||||
|
buff.push_back(c);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!raw && c == '\\') {
|
||||||
|
switch (eatchar_include_newline()) {
|
||||||
|
case '"': buff.push_back('"'); break;
|
||||||
|
case '\'': buff.push_back('\''); break;
|
||||||
|
case '\\': buff.push_back('\\'); break;
|
||||||
|
case 'n': buff.push_back('\n'); break;
|
||||||
|
case 'r': buff.push_back('\r'); break;
|
||||||
|
case 't': buff.push_back('\t'); break;
|
||||||
|
default: SyntaxError("invalid escape char");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buff.push_back(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Str(buff.data(), buff.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void eat_string(char quote, StringType type) {
|
||||||
|
Str s = eat_string_until(quote, type == RAW_STRING);
|
||||||
|
if(type == F_STRING){
|
||||||
|
add_token(TK("@fstr"), s);
|
||||||
|
}else{
|
||||||
|
add_token(TK("@str"), s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void eat_number() {
|
||||||
|
static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
|
||||||
|
std::smatch m;
|
||||||
|
|
||||||
|
const char* i = token_start;
|
||||||
|
while(*i != '\n' && *i != '\0') i++;
|
||||||
|
std::string s = std::string(token_start, i);
|
||||||
|
|
||||||
|
try{
|
||||||
|
if (std::regex_search(s, m, pattern)) {
|
||||||
|
// here is m.length()-1, since the first char was eaten by lex_token()
|
||||||
|
for(int j=0; j<m.length()-1; j++) eatchar();
|
||||||
|
|
||||||
|
int base = 10;
|
||||||
|
size_t size;
|
||||||
|
if (m[1].matched) base = 16;
|
||||||
|
if (m[2].matched) {
|
||||||
|
if(base == 16) SyntaxError("hex literal should not contain a dot");
|
||||||
|
add_token(TK("@num"), S_TO_FLOAT(m[0], &size));
|
||||||
|
} else {
|
||||||
|
add_token(TK("@num"), S_TO_INT(m[0], &size, base));
|
||||||
|
}
|
||||||
|
if (size != m.length()) UNREACHABLE();
|
||||||
|
}
|
||||||
|
}catch(std::exception& _){
|
||||||
|
SyntaxError("invalid number literal");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool lex_one_token() {
|
||||||
|
while (peekchar() != '\0') {
|
||||||
|
token_start = curr_char;
|
||||||
|
char c = eatchar_include_newline();
|
||||||
|
switch (c) {
|
||||||
|
case '\'': case '"': eat_string(c, NORMAL_STRING); return true;
|
||||||
|
case '#': skip_line_comment(); break;
|
||||||
|
case '{': add_token(TK("{")); return true;
|
||||||
|
case '}': add_token(TK("}")); return true;
|
||||||
|
case ',': add_token(TK(",")); return true;
|
||||||
|
case ':': add_token_2(':', TK(":"), TK("::")); return true;
|
||||||
|
case ';': add_token(TK(";")); return true;
|
||||||
|
case '(': add_token(TK("(")); return true;
|
||||||
|
case ')': add_token(TK(")")); return true;
|
||||||
|
case '[': add_token(TK("[")); return true;
|
||||||
|
case ']': add_token(TK("]")); return true;
|
||||||
|
case '@': add_token(TK("@")); return true;
|
||||||
|
case '%': add_token_2('=', TK("%"), TK("%=")); return true;
|
||||||
|
case '&': add_token_2('=', TK("&"), TK("&=")); return true;
|
||||||
|
case '|': add_token_2('=', TK("|"), TK("|=")); return true;
|
||||||
|
case '^': add_token_2('=', TK("^"), TK("^=")); return true;
|
||||||
|
case '?': add_token(TK("?")); return true;
|
||||||
|
case '.': {
|
||||||
|
if(matchchar('.')) {
|
||||||
|
if(matchchar('.')) {
|
||||||
|
add_token(TK("..."));
|
||||||
|
} else {
|
||||||
|
SyntaxError("invalid token '..'");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
add_token(TK("."));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case '=': add_token_2('=', TK("="), TK("==")); return true;
|
||||||
|
case '+': add_token_2('=', TK("+"), TK("+=")); return true;
|
||||||
|
case '>': {
|
||||||
|
if(matchchar('=')) add_token(TK(">="));
|
||||||
|
else if(matchchar('>')) add_token_2('=', TK(">>"), TK(">>="));
|
||||||
|
else add_token(TK(">"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case '<': {
|
||||||
|
if(matchchar('=')) add_token(TK("<="));
|
||||||
|
else if(matchchar('<')) add_token_2('=', TK("<<"), TK("<<="));
|
||||||
|
else add_token(TK("<"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case '-': {
|
||||||
|
if(matchchar('=')) add_token(TK("-="));
|
||||||
|
else if(matchchar('>')) add_token(TK("->"));
|
||||||
|
else add_token(TK("-"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case '!':
|
||||||
|
if(matchchar('=')) add_token(TK("!="));
|
||||||
|
else SyntaxError("expected '=' after '!'");
|
||||||
|
break;
|
||||||
|
case '*':
|
||||||
|
if (matchchar('*')) {
|
||||||
|
add_token(TK("**")); // '**'
|
||||||
|
} else {
|
||||||
|
add_token_2('=', TK("*"), TK("*="));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
case '/':
|
||||||
|
if(matchchar('/')) {
|
||||||
|
add_token_2('=', TK("//"), TK("//="));
|
||||||
|
} else {
|
||||||
|
add_token_2('=', TK("/"), TK("/="));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
case '\r': break; // just ignore '\r'
|
||||||
|
case ' ': case '\t': eat_spaces(); break;
|
||||||
|
case '\n': {
|
||||||
|
add_token(TK("@eol"));
|
||||||
|
if(!eat_indentation()) IndentationError("unindent does not match any outer indentation level");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
if(c == 'f'){
|
||||||
|
if(matchchar('\'')) {eat_string('\'', F_STRING); return true;}
|
||||||
|
if(matchchar('"')) {eat_string('"', F_STRING); return true;}
|
||||||
|
}else if(c == 'r'){
|
||||||
|
if(matchchar('\'')) {eat_string('\'', RAW_STRING); return true;}
|
||||||
|
if(matchchar('"')) {eat_string('"', RAW_STRING); return true;}
|
||||||
|
}
|
||||||
|
if (c >= '0' && c <= '9') {
|
||||||
|
eat_number();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
switch (eat_name())
|
||||||
|
{
|
||||||
|
case 0: break;
|
||||||
|
case 1: SyntaxError("invalid char: " + std::string(1, c));
|
||||||
|
case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
|
||||||
|
case 3: SyntaxError("@id contains invalid char"); break;
|
||||||
|
case 4: SyntaxError("invalid JSON token"); break;
|
||||||
|
default: UNREACHABLE();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
token_start = curr_char;
|
||||||
|
while(indents.size() > 1){
|
||||||
|
indents.pop();
|
||||||
|
add_token(TK("@dedent"));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
add_token(TK("@eof"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/***** Error Reporter *****/
|
||||||
|
void throw_err(Str type, Str msg){
|
||||||
|
int lineno = current_line;
|
||||||
|
const char* cursor = curr_char;
|
||||||
|
if(peekchar() == '\n'){
|
||||||
|
lineno--;
|
||||||
|
cursor--;
|
||||||
|
}
|
||||||
|
throw_err(type, msg, lineno, cursor);
|
||||||
|
}
|
||||||
|
|
||||||
|
void throw_err(Str type, Str msg, int lineno, const char* cursor){
|
||||||
|
auto e = Exception("SyntaxError", msg);
|
||||||
|
e.st_push(src->snapshot(lineno, cursor));
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
|
||||||
|
void SyntaxError(){ throw_err("SyntaxError", "invalid syntax"); }
|
||||||
|
void IndentationError(Str msg){ throw_err("IndentationError", msg); }
|
||||||
|
|
||||||
|
Lexer(shared_ptr<SourceData> src) {
|
||||||
|
this->src = src;
|
||||||
|
this->token_start = src->source;
|
||||||
|
this->curr_char = src->source;
|
||||||
|
this->nexts.push_back(Token{TK("@sof"), token_start, 0, current_line});
|
||||||
|
this->indents.push(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Token> run() {
|
||||||
|
if(used) UNREACHABLE();
|
||||||
|
used = true;
|
||||||
|
while (lex_one_token());
|
||||||
|
return std::move(nexts);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pkpy
|
302
src/parser.h
302
src/parser.h
@ -1,302 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include "error.h"
|
|
||||||
#include "obj.h"
|
|
||||||
|
|
||||||
namespace pkpy{
|
|
||||||
|
|
||||||
typedef uint8_t TokenIndex;
|
|
||||||
|
|
||||||
constexpr const char* kTokens[] = {
|
|
||||||
"@error", "@eof", "@eol", "@sof",
|
|
||||||
".", ",", ":", ";", "#", "(", ")", "[", "]", "{", "}", "%", "::",
|
|
||||||
"+", "-", "*", "/", "//", "**", "=", ">", "<", "...", "->",
|
|
||||||
"<<", ">>", "&", "|", "^", "?", "@",
|
|
||||||
"==", "!=", ">=", "<=",
|
|
||||||
"+=", "-=", "*=", "/=", "//=", "%=", "&=", "|=", "^=", ">>=", "<<=",
|
|
||||||
/** KW_BEGIN **/
|
|
||||||
"class", "import", "as", "def", "lambda", "pass", "del", "from", "with", "yield",
|
|
||||||
"None", "in", "is", "and", "or", "not", "True", "False", "global", "try", "except", "finally",
|
|
||||||
"goto", "label", // extended keywords, not available in cpython
|
|
||||||
"while", "for", "if", "elif", "else", "break", "continue", "return", "assert", "raise",
|
|
||||||
/** KW_END **/
|
|
||||||
"is not", "not in",
|
|
||||||
"@id", "@num", "@str", "@fstr",
|
|
||||||
"@indent", "@dedent"
|
|
||||||
};
|
|
||||||
|
|
||||||
const TokenIndex kTokenCount = sizeof(kTokens) / sizeof(kTokens[0]);
|
|
||||||
|
|
||||||
constexpr TokenIndex TK(const char token[]) {
|
|
||||||
for(int k=0; k<kTokenCount; k++){
|
|
||||||
const char* i = kTokens[k];
|
|
||||||
const char* j = token;
|
|
||||||
while(*i && *j && *i == *j) { i++; j++;}
|
|
||||||
if(*i == *j) return k;
|
|
||||||
}
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TK_STR(t) kTokens[t]
|
|
||||||
const TokenIndex kTokenKwBegin = TK("class");
|
|
||||||
const TokenIndex kTokenKwEnd = TK("raise");
|
|
||||||
|
|
||||||
const std::map<std::string_view, TokenIndex> kTokenKwMap = [](){
|
|
||||||
std::map<std::string_view, TokenIndex> map;
|
|
||||||
for(int k=kTokenKwBegin; k<=kTokenKwEnd; k++) map[kTokens[k]] = k;
|
|
||||||
return map;
|
|
||||||
}();
|
|
||||||
|
|
||||||
|
|
||||||
struct Token{
|
|
||||||
TokenIndex type;
|
|
||||||
|
|
||||||
const char* start;
|
|
||||||
int length;
|
|
||||||
int line;
|
|
||||||
PyObject* value;
|
|
||||||
|
|
||||||
Str str() const { return Str(start, length);}
|
|
||||||
|
|
||||||
Str info() const {
|
|
||||||
StrStream ss;
|
|
||||||
Str raw = str();
|
|
||||||
if (raw == Str("\n")) raw = "\\n";
|
|
||||||
ss << line << ": " << TK_STR(type) << " '" << raw << "'";
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// https://docs.python.org/3/reference/expressions.html
|
|
||||||
enum Precedence {
|
|
||||||
PREC_NONE,
|
|
||||||
PREC_ASSIGNMENT, // =
|
|
||||||
PREC_COMMA, // ,
|
|
||||||
PREC_TERNARY, // ?:
|
|
||||||
PREC_LOGICAL_OR, // or
|
|
||||||
PREC_LOGICAL_AND, // and
|
|
||||||
PREC_LOGICAL_NOT, // not
|
|
||||||
PREC_EQUALITY, // == !=
|
|
||||||
PREC_TEST, // in / is / is not / not in
|
|
||||||
PREC_COMPARISION, // < > <= >=
|
|
||||||
PREC_BITWISE_OR, // |
|
|
||||||
PREC_BITWISE_XOR, // ^
|
|
||||||
PREC_BITWISE_AND, // &
|
|
||||||
PREC_BITWISE_SHIFT, // << >>
|
|
||||||
PREC_TERM, // + -
|
|
||||||
PREC_FACTOR, // * / % //
|
|
||||||
PREC_UNARY, // - not
|
|
||||||
PREC_EXPONENT, // **
|
|
||||||
PREC_CALL, // ()
|
|
||||||
PREC_SUBSCRIPT, // []
|
|
||||||
PREC_ATTRIB, // .index
|
|
||||||
PREC_PRIMARY,
|
|
||||||
};
|
|
||||||
|
|
||||||
// The context of the parsing phase for the compiler.
|
|
||||||
struct Parser {
|
|
||||||
shared_ptr<SourceData> src;
|
|
||||||
|
|
||||||
const char* token_start;
|
|
||||||
const char* curr_char;
|
|
||||||
int current_line = 1;
|
|
||||||
Token prev, curr;
|
|
||||||
queue<Token> nexts;
|
|
||||||
stack<int> indents;
|
|
||||||
|
|
||||||
int brackets_level = 0;
|
|
||||||
|
|
||||||
Token next_token(){
|
|
||||||
if(nexts.empty()){
|
|
||||||
return Token{TK("@error"), token_start, (int)(curr_char - token_start), current_line};
|
|
||||||
}
|
|
||||||
Token t = nexts.front();
|
|
||||||
if(t.type == TK("@eof") && indents.size()>1){
|
|
||||||
nexts.pop();
|
|
||||||
indents.pop();
|
|
||||||
return Token{TK("@dedent"), token_start, 0, current_line};
|
|
||||||
}
|
|
||||||
nexts.pop();
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
char peekchar() const{ return *curr_char; }
|
|
||||||
|
|
||||||
bool match_n_chars(int n, char c0){
|
|
||||||
const char* c = curr_char;
|
|
||||||
for(int i=0; i<n; i++){
|
|
||||||
if(*c == '\0') return false;
|
|
||||||
if(*c != c0) return false;
|
|
||||||
c++;
|
|
||||||
}
|
|
||||||
for(int i=0; i<n; i++) eatchar_include_newline();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int eat_spaces(){
|
|
||||||
int count = 0;
|
|
||||||
while (true) {
|
|
||||||
switch (peekchar()) {
|
|
||||||
case ' ' : count+=1; break;
|
|
||||||
case '\t': count+=4; break;
|
|
||||||
default: return count;
|
|
||||||
}
|
|
||||||
eatchar();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool eat_indentation(){
|
|
||||||
if(brackets_level > 0) return true;
|
|
||||||
int spaces = eat_spaces();
|
|
||||||
if(peekchar() == '#') skip_line_comment();
|
|
||||||
if(peekchar() == '\0' || peekchar() == '\n' || peekchar() == '\r') return true;
|
|
||||||
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
|
|
||||||
if(spaces > indents.top()){
|
|
||||||
indents.push(spaces);
|
|
||||||
nexts.push(Token{TK("@indent"), token_start, 0, current_line});
|
|
||||||
} else if(spaces < indents.top()){
|
|
||||||
while(spaces < indents.top()){
|
|
||||||
indents.pop();
|
|
||||||
nexts.push(Token{TK("@dedent"), token_start, 0, current_line});
|
|
||||||
}
|
|
||||||
if(spaces != indents.top()){
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
char eatchar() {
|
|
||||||
char c = peekchar();
|
|
||||||
if(c == '\n') throw std::runtime_error("eatchar() cannot consume a newline");
|
|
||||||
curr_char++;
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
char eatchar_include_newline() {
|
|
||||||
char c = peekchar();
|
|
||||||
curr_char++;
|
|
||||||
if (c == '\n'){
|
|
||||||
current_line++;
|
|
||||||
src->line_starts.push_back(curr_char);
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
int eat_name() {
|
|
||||||
curr_char--;
|
|
||||||
while(true){
|
|
||||||
uint8_t c = peekchar();
|
|
||||||
int u8bytes = 0;
|
|
||||||
if((c & 0b10000000) == 0b00000000) u8bytes = 1;
|
|
||||||
else if((c & 0b11100000) == 0b11000000) u8bytes = 2;
|
|
||||||
else if((c & 0b11110000) == 0b11100000) u8bytes = 3;
|
|
||||||
else if((c & 0b11111000) == 0b11110000) u8bytes = 4;
|
|
||||||
else return 1;
|
|
||||||
if(u8bytes == 1){
|
|
||||||
if(isalpha(c) || c=='_' || isdigit(c)) {
|
|
||||||
curr_char++;
|
|
||||||
continue;
|
|
||||||
}else{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// handle multibyte char
|
|
||||||
std::string u8str(curr_char, u8bytes);
|
|
||||||
if(u8str.size() != u8bytes) return 2;
|
|
||||||
uint32_t value = 0;
|
|
||||||
for(int k=0; k < u8bytes; k++){
|
|
||||||
uint8_t b = u8str[k];
|
|
||||||
if(k==0){
|
|
||||||
if(u8bytes == 2) value = (b & 0b00011111) << 6;
|
|
||||||
else if(u8bytes == 3) value = (b & 0b00001111) << 12;
|
|
||||||
else if(u8bytes == 4) value = (b & 0b00000111) << 18;
|
|
||||||
}else{
|
|
||||||
value |= (b & 0b00111111) << (6*(u8bytes-k-1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(is_unicode_Lo_char(value)) curr_char += u8bytes;
|
|
||||||
else break;
|
|
||||||
}
|
|
||||||
|
|
||||||
int length = (int)(curr_char - token_start);
|
|
||||||
if(length == 0) return 3;
|
|
||||||
std::string_view name(token_start, length);
|
|
||||||
|
|
||||||
if(src->mode == JSON_MODE){
|
|
||||||
if(name == "true"){
|
|
||||||
set_next_token(TK("True"));
|
|
||||||
} else if(name == "false"){
|
|
||||||
set_next_token(TK("False"));
|
|
||||||
} else if(name == "null"){
|
|
||||||
set_next_token(TK("None"));
|
|
||||||
} else {
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(kTokenKwMap.count(name)){
|
|
||||||
if(name == "not"){
|
|
||||||
if(strncmp(curr_char, " in", 3) == 0){
|
|
||||||
curr_char += 3;
|
|
||||||
set_next_token(TK("not in"));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}else if(name == "is"){
|
|
||||||
if(strncmp(curr_char, " not", 4) == 0){
|
|
||||||
curr_char += 4;
|
|
||||||
set_next_token(TK("is not"));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
set_next_token(kTokenKwMap.at(name));
|
|
||||||
} else {
|
|
||||||
set_next_token(TK("@id"));
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void skip_line_comment() {
|
|
||||||
char c;
|
|
||||||
while ((c = peekchar()) != '\0') {
|
|
||||||
if (c == '\n') return;
|
|
||||||
eatchar();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool matchchar(char c) {
|
|
||||||
if (peekchar() != c) return false;
|
|
||||||
eatchar_include_newline();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_next_token(TokenIndex type, PyObject* value=nullptr) {
|
|
||||||
switch(type){
|
|
||||||
case TK("{"): case TK("["): case TK("("): brackets_level++; break;
|
|
||||||
case TK(")"): case TK("]"): case TK("}"): brackets_level--; break;
|
|
||||||
}
|
|
||||||
nexts.push( Token{
|
|
||||||
type,
|
|
||||||
token_start,
|
|
||||||
(int)(curr_char - token_start),
|
|
||||||
current_line - ((type == TK("@eol")) ? 1 : 0),
|
|
||||||
value
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_next_token_2(char c, TokenIndex one, TokenIndex two) {
|
|
||||||
if (matchchar(c)) set_next_token(two);
|
|
||||||
else set_next_token(one);
|
|
||||||
}
|
|
||||||
|
|
||||||
Parser(shared_ptr<SourceData> src) {
|
|
||||||
this->src = src;
|
|
||||||
this->token_start = src->source;
|
|
||||||
this->curr_char = src->source;
|
|
||||||
this->nexts.push(Token{TK("@sof"), token_start, 0, current_line});
|
|
||||||
this->indents.push(0);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace pkpy
|
|
@ -760,6 +760,7 @@ inline void add_module_gc(VM* vm){
|
|||||||
|
|
||||||
inline void VM::post_init(){
|
inline void VM::post_init(){
|
||||||
init_builtins(this);
|
init_builtins(this);
|
||||||
|
#if !DEBUG_NO_BUILTIN_MODULES
|
||||||
add_module_sys(this);
|
add_module_sys(this);
|
||||||
add_module_time(this);
|
add_module_time(this);
|
||||||
add_module_json(this);
|
add_module_json(this);
|
||||||
@ -793,6 +794,7 @@ inline void VM::post_init(){
|
|||||||
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])];
|
const PyTypeInfo& info = vm->_all_types[OBJ_GET(Type, args[0])];
|
||||||
return VAR(info.name);
|
return VAR(info.name);
|
||||||
}));
|
}));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace pkpy
|
} // namespace pkpy
|
||||||
|
4
src/vm.h
4
src/vm.h
@ -93,7 +93,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Frame* top_frame() const {
|
Frame* top_frame() const {
|
||||||
#if PK_EXTRA_CHECK
|
#if DEBUG_EXTRA_CHECK
|
||||||
if(callstack.empty()) UNREACHABLE();
|
if(callstack.empty()) UNREACHABLE();
|
||||||
#endif
|
#endif
|
||||||
return callstack.top().get();
|
return callstack.top().get();
|
||||||
@ -166,7 +166,7 @@ public:
|
|||||||
if(_module == nullptr) _module = _main;
|
if(_module == nullptr) _module = _main;
|
||||||
try {
|
try {
|
||||||
CodeObject_ code = compile(source, filename, mode);
|
CodeObject_ code = compile(source, filename, mode);
|
||||||
if(_module == _main) std::cout << disassemble(code) << '\n';
|
// if(_module == _main) std::cout << disassemble(code) << '\n';
|
||||||
return _exec(code, _module);
|
return _exec(code, _module);
|
||||||
}catch (const Exception& e){
|
}catch (const Exception& e){
|
||||||
*_stderr << e.summary() << '\n';
|
*_stderr << e.summary() << '\n';
|
||||||
|
Loading…
x
Reference in New Issue
Block a user