Compare commits

...

8 Commits

Author SHA1 Message Date
blueloveTH
6f4617b83d Update compiler.c 2024-06-28 19:32:48 +08:00
blueloveTH
4c332b7d16 Update compiler.c 2024-06-28 19:14:36 +08:00
blueloveTH
7748d2bf03 some fix 2024-06-28 19:06:27 +08:00
blueloveTH
881e94e8b0 some fix 2024-06-28 16:17:29 +08:00
blueloveTH
72723d24f5 some fix 2024-06-28 15:58:05 +08:00
blueloveTH
1ac08cfc2b some fix 2024-06-28 15:47:11 +08:00
blueloveTH
79b9df3392 some fix 2024-06-28 15:33:23 +08:00
blueloveTH
104c266bc0 move lexer 2024-06-28 15:02:55 +08:00
22 changed files with 1377 additions and 1351 deletions

View File

@ -11,8 +11,8 @@ extern "C" {
#define kPoolObjectArenaSize (256*1024)
#define kPoolObjectMaxBlocks (kPoolObjectArenaSize / kPoolObjectBlockSize)
void Pools_initialize();
void Pools_finalize();
void pk_MemoryPools__initialize();
void pk_MemoryPools__finalize();
void* PoolExpr_alloc();
void PoolExpr_dealloc(void*);

View File

@ -34,6 +34,7 @@ c11_vector c11_vector__copy(const c11_vector* self);
void c11_vector__reserve(c11_vector* self, int capacity);
void c11_vector__clear(c11_vector* self);
void* c11_vector__emplace(c11_vector* self);
c11_array c11_vector__submit(c11_vector* self);
#define c11__getitem(T, self, index) (((T*)(self)->data)[index])
#define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value;

View File

@ -0,0 +1,18 @@
#pragma once
#include "pocketpy/common/vector.h"
#include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/objects/codeobject.h"
#ifdef __cplusplus
extern "C" {
#endif
Error* pk_compile(pk_SourceData_ src, CodeObject* out);
void pk_Compiler__initialize();
#define pk_Compiler__finalize() // do nothing
#ifdef __cplusplus
}
#endif

View File

@ -1,52 +1,63 @@
// #pragma once
#pragma once
// #include <stdbool.h>
// #include "pocketpy/common/memorypool.h"
// #include "pocketpy/compiler/lexer.h"
#include <stdbool.h>
#include "pocketpy/common/memorypool.h"
#include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/codeobject.h"
// #ifdef __cplusplus
// extern "C" {
// #endif
#ifdef __cplusplus
extern "C" {
#endif
// struct pk_Expr;
// struct pk_CodeEmitContext;
typedef struct pk_Expr pk_Expr;
typedef struct pk_CodeEmitContext pk_CodeEmitContext;
// struct pk_ExprVt{
// void (*dtor)(pk_Expr*);
// /* reflections */
// bool (*is_literal)(const pk_Expr*);
// bool (*is_json_object)(const pk_Expr*);
// bool (*is_attrib)(const pk_Expr*);
// bool (*is_subscr)(const pk_Expr*);
// bool (*is_compare)(const pk_Expr*);
// int (*star_level)(const pk_Expr*);
// bool (*is_tuple)(const pk_Expr*);
// bool (*is_name)(const pk_Expr*);
// /* emit */
// void (*emit_)(pk_Expr*, pk_CodeEmitContext*);
// bool (*emit_del)(pk_Expr*, pk_CodeEmitContext*);
// bool (*emit_store)(pk_Expr*, pk_CodeEmitContext*);
// void (*emit_inplace)(pk_Expr*, pk_CodeEmitContext*);
// bool (*emit_store_inplace)(pk_Expr*, pk_CodeEmitContext*);
// };
typedef struct pk_ExprVt{
void (*dtor)(pk_Expr*);
/* reflections */
bool (*is_literal)(const pk_Expr*);
bool (*is_json_object)(const pk_Expr*);
bool (*is_attrib)(const pk_Expr*);
bool (*is_subscr)(const pk_Expr*);
bool (*is_compare)(const pk_Expr*);
int (*star_level)(const pk_Expr*);
bool (*is_tuple)(const pk_Expr*);
bool (*is_name)(const pk_Expr*);
/* emit */
void (*emit_)(pk_Expr*, pk_CodeEmitContext*);
bool (*emit_del)(pk_Expr*, pk_CodeEmitContext*);
bool (*emit_store)(pk_Expr*, pk_CodeEmitContext*);
void (*emit_inplace)(pk_Expr*, pk_CodeEmitContext*);
bool (*emit_store_inplace)(pk_Expr*, pk_CodeEmitContext*);
} pk_ExprVt;
// typedef struct pk_Expr{
// pk_ExprVt* vt;
// int line;
// } pk_Expr;
typedef struct pk_Expr{
pk_ExprVt* vt;
int line;
} pk_Expr;
// void pk_ExprVt__ctor(pk_ExprVt* vt);
// void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx);
// bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx);
// bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx);
// void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx);
// bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx);
// void pk_Expr__delete(pk_Expr* self);
void pk_ExprVt__ctor(pk_ExprVt* vt);
void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx);
bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx);
bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx);
void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx);
bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx);
void pk_Expr__delete(pk_Expr* self);
// typedef struct pk_CodeEmitContext{
typedef struct pk_CodeEmitContext{
CodeObject* co; // 1 CodeEmitContext <=> 1 CodeObject*
FuncDecl* func; // optional, weakref
int level;
int curr_iblock;
bool is_compiling_class;
c11_vector/*T=Expr* */ s_expr;
c11_vector/*T=StrName*/ global_names;
c11_smallmap_s2n co_consts_string_dedup_map;
} pk_CodeEmitContext;
// } pk_CodeEmitContext;
void pk_CodeEmitContext__ctor(pk_CodeEmitContext* self, CodeObject* co, FuncDecl* func, int level);
void pk_CodeEmitContext__dtor(pk_CodeEmitContext* self);
// #ifdef __cplusplus
// }
// #endif
#ifdef __cplusplus
}
#endif

View File

@ -53,16 +53,16 @@ struct CodeEmitContext{
int level;
vector<StrName> global_names;
CodeEmitContext(VM* vm, CodeObject* co, int level) : vm(vm), co(co), level(level) {
func = NULL;
c11_smallmap_s2n__ctor(&_co_consts_string_dedup_map);
}
int curr_iblock = 0;
bool is_compiling_class = false;
c11_smallmap_s2n _co_consts_string_dedup_map;
CodeEmitContext(VM* vm, CodeObject* co, int level) : vm(vm), co(co), level(level) {
func = NULL;
c11_smallmap_s2n__ctor(&_co_consts_string_dedup_map);
}
int get_loop() const noexcept;
CodeBlock* enter_block(CodeBlockType type) noexcept;
void exit_block() noexcept;

View File

@ -1,6 +1,8 @@
#pragma once
#include "pocketpy/common/str.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/objects/sourcedata.h"
#include <stdint.h>
#ifdef __cplusplus
@ -34,12 +36,19 @@ typedef enum TokenIndex{
TK__COUNT__
} TokenIndex;
enum TokenValueIndex{
TokenValue_EMPTY = 0,
TokenValue_I64 = 1,
TokenValue_F64 = 2,
TokenValue_STR = 3,
};
typedef struct TokenValue {
int index;
enum TokenValueIndex index; // 0: empty
union {
int64_t _i64; // 0
double _f64; // 1
py_Str _str; // 2
int64_t _i64; // 1
double _f64; // 2
py_Str _str; // 3
};
} TokenValue;
@ -78,28 +87,21 @@ enum Precedence {
PREC_HIGHEST,
};
enum StringType {
NORMAL_STRING,
RAW_STRING,
F_STRING,
NORMAL_BYTES
};
typedef enum IntParsingResult{
IntParsing_SUCCESS,
IntParsing_FAILURE,
IntParsing_OVERFLOW,
} IntParsingResult;
#define is_raw_string_used(t) ((t) == TK_ID || (t) == TK_LONG)
IntParsingResult parse_uint(c11_string text, int64_t* out, int base);
typedef struct pk_TokenDeserializer {
const char* curr;
const char* source;
} pk_TokenDeserializer;
typedef struct Error Error;
void pk_TokenDeserializer__ctor(pk_TokenDeserializer* self, const char* source);
bool pk_TokenDeserializer__match_char(pk_TokenDeserializer* self, char c);
c11_string pk_TokenDeserializer__read_string(pk_TokenDeserializer* self, char c);
py_Str pk_TokenDeserializer__read_string_from_hex(pk_TokenDeserializer* self, char c);
int pk_TokenDeserializer__read_count(pk_TokenDeserializer* self);
int64_t pk_TokenDeserializer__read_uint(pk_TokenDeserializer* self, char c);
double pk_TokenDeserializer__read_float(pk_TokenDeserializer* self, char c);
typedef c11_array pk_TokenArray;
Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens);
Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out_string);
void pk_TokenArray__dtor(pk_TokenArray* self);
#ifdef __cplusplus
}

View File

@ -1,74 +0,0 @@
#pragma once
#include "pocketpy/objects/error.hpp"
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/compiler/lexer.h"
#include <variant>
namespace pkpy {
struct Lexer {
PK_ALWAYS_PASS_BY_POINTER(Lexer)
VM* vm;
pkpy_SourceData_ src;
const char* token_start;
const char* curr_char;
int current_line = 1;
vector<Token> nexts;
small_vector_2<int, 8> indents;
int brackets_level = 0;
bool used = false;
char peekchar() const noexcept { return *curr_char; }
bool match_n_chars(int n, char c0) noexcept;
bool match_string(const char* s) noexcept;
int eat_spaces() noexcept;
bool eat_indentation() noexcept;
char eatchar() noexcept;
char eatchar_include_newline() noexcept;
void skip_line_comment() noexcept;
bool matchchar(char c) noexcept;
void add_token(TokenIndex type, TokenValue value = {}) noexcept;
void add_token_2(char c, TokenIndex one, TokenIndex two) noexcept;
[[nodiscard]] Error* eat_name() noexcept;
[[nodiscard]] Error* eat_string_until(char quote, bool raw, Str* out) noexcept;
[[nodiscard]] Error* eat_string(char quote, StringType type) noexcept;
[[nodiscard]] Error* eat_number() noexcept;
[[nodiscard]] Error* lex_one_token(bool* eof) noexcept;
/***** Error Reporter *****/
[[nodiscard]] Error* _error(bool lexer_err, const char* type, const char* msg, va_list* args, i64 userdata=0) noexcept;
[[nodiscard]] Error* SyntaxError(const char* fmt, ...) noexcept;
[[nodiscard]] Error* IndentationError(const char* msg) noexcept { return _error(true, "IndentationError", msg, NULL); }
[[nodiscard]] Error* NeedMoreLines() noexcept { return _error(true, "NeedMoreLines", "", NULL, 0); }
[[nodiscard]] Error* run() noexcept;
[[nodiscard]] Error* from_precompiled() noexcept;
[[nodiscard]] Error* precompile(Str* out) noexcept;
Lexer(VM* vm, std::string_view source, const Str& filename, CompileMode mode) noexcept{
src = pkpy_SourceData__rcnew({source.data(), (int)source.size()}, &filename, mode);
this->token_start = py_Str__data(&src->source);
this->curr_char = py_Str__data(&src->source);
}
~Lexer(){
PK_DECREF(src);
}
};
enum class IntParsingResult {
Success,
Failure,
Overflow,
};
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept;
} // namespace pkpy

View File

@ -28,7 +28,7 @@ typedef enum FuncType {
typedef enum NameScope {
NAME_LOCAL,
NAME_GLOBAL,
NAME_GLOBAL_UNKNOWN
NAME_GLOBAL_UNKNOWN,
} NameScope;
typedef enum CodeBlockType {
@ -88,8 +88,8 @@ typedef struct CodeObject {
int end_line;
} CodeObject;
CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name);
void CodeObject__delete(CodeObject* self);
void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name);
void CodeObject__dtor(CodeObject* self);
void CodeObject__gc_mark(const CodeObject* self);
typedef struct FuncDeclKwArg{
@ -100,7 +100,7 @@ typedef struct FuncDeclKwArg{
typedef struct FuncDecl {
RefCounted rc;
CodeObject* code; // strong ref
CodeObject code; // strong ref
c11_vector/*T=int*/ args; // indices in co->varnames
c11_vector/*T=KwArg*/ kwargs; // indices in co->varnames

View File

@ -15,6 +15,7 @@ struct pk_SourceData {
RefCounted rc;
enum CompileMode mode;
bool is_precompiled;
bool is_dynamic; // for exec() and eval()
py_Str filename;
py_Str source;
@ -25,7 +26,7 @@ struct pk_SourceData {
typedef struct pk_SourceData* pk_SourceData_;
pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode);
pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic);
bool pk_SourceData__get_line(const struct pk_SourceData* self, int lineno, const char** st, const char** ed);
py_Str pk_SourceData__snapshot(const struct pk_SourceData *self, int lineno, const char *cursor, const char *name);

View File

@ -4,10 +4,11 @@
#include <stdlib.h>
#include <string.h>
void pk_SourceData__ctor(struct pk_SourceData* self,
static void pk_SourceData__ctor(struct pk_SourceData* self,
const char* source,
const char* filename,
enum CompileMode mode) {
enum CompileMode mode,
bool is_dynamic) {
py_Str__ctor(&self->filename, filename);
self->mode = mode;
c11_vector__ctor(&self->line_starts, sizeof(const char*));
@ -30,7 +31,7 @@ void pk_SourceData__ctor(struct pk_SourceData* self,
c11_vector__push(const char*, &self->line_starts, source);
}
void pk_SourceData__dtor(struct pk_SourceData* self) {
static void pk_SourceData__dtor(struct pk_SourceData* self) {
py_Str__dtor(&self->filename);
py_Str__dtor(&self->source);
c11_vector__dtor(&self->line_starts);
@ -41,9 +42,9 @@ void pk_SourceData__dtor(struct pk_SourceData* self) {
c11_vector__dtor(&self->_precompiled_tokens);
}
pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode) {
pk_SourceData_ pk_SourceData__rcnew(const char* source, const char* filename, enum CompileMode mode, bool is_dynamic) {
pk_SourceData_ self = malloc(sizeof(struct pk_SourceData));
pk_SourceData__ctor(self, source, filename, mode);
pk_SourceData__ctor(self, source, filename, mode, is_dynamic);
self->rc.count = 1;
self->rc.dtor = (void(*)(void*))pk_SourceData__dtor;
return self;

View File

@ -152,12 +152,13 @@ void pk_SStream__write_any(pk_SStream* self, const char* fmt, const pk_AnyStr* a
py_Str pk_SStream__submit(pk_SStream* self) {
c11_vector__push(char, &self->data, '\0');
c11_array a = c11_vector__submit(&self->data);
// TODO: optimize c11__isascii
py_Str retval = {
.size = self->data.count - 1,
.is_ascii = c11__isascii((char*)self->data.data, self->data.count),
.size = a.count - 1,
.is_ascii = c11__isascii((char*)a.data, a.count),
.is_sso = false,
._ptr = (char*)self->data.data
._ptr = (char*)a.data
};
return retval;
}

View File

@ -62,3 +62,15 @@ void* c11_vector__emplace(c11_vector* self){
self->count++;
return p;
}
c11_array c11_vector__submit(c11_vector* self){
c11_array retval = {
.data = self->data,
.count = self->count,
.elem_size = self->elem_size
};
self->data = NULL;
self->count = 0;
self->capacity = 0;
return retval;
}

307
src/compiler/compiler.c Normal file
View File

@ -0,0 +1,307 @@
#include "pocketpy/compiler/compiler.h"
#include "pocketpy/compiler/expr.h"
#include "pocketpy/compiler/lexer.h"
typedef struct pk_Compiler pk_Compiler;
typedef Error* (*PrattCallback)(pk_Compiler* self);
typedef struct PrattRule {
PrattCallback prefix;
PrattCallback infix;
enum Precedence precedence;
} PrattRule;
static PrattRule rules[TK__COUNT__];
typedef struct pk_Compiler {
pk_SourceData_ src; // weakref
pk_TokenArray tokens;
int i;
c11_vector/*T=CodeEmitContext*/ contexts;
} pk_Compiler;
static void pk_Compiler__ctor(pk_Compiler *self, pk_SourceData_ src, pk_TokenArray tokens){
self->src = src;
self->tokens = tokens;
self->i = 0;
c11_vector__ctor(&self->contexts, sizeof(pk_CodeEmitContext));
}
static void pk_Compiler__dtor(pk_Compiler *self){
pk_TokenArray__dtor(&self->tokens);
c11_vector__dtor(&self->contexts);
}
/**************************************/
#define tk(i) c11__getitem(Token, &self->tokens, i)
#define prev() tk(self->i - 1)
#define curr() tk(self->i)
#define next() tk(self->i + 1)
#define err() (self->i == self->tokens.count ? prev() : curr())
#define advance() self->i++
#define mode() self->src->mode
#define ctx() c11_vector__back(pk_CodeEmitContext, &self->contexts)
#define match_newlines() match_newlines_repl(self, NULL)
#define consume(expected) if(!match(expected)) return SyntaxError("expected '%s', got '%s'", pk_TokenSymbols[expected], pk_TokenSymbols[curr().type]);
#define consume_end_stmt() if(!match_end_stmt()) return SyntaxError("expected statement end")
#define check_newlines_repl() { bool __nml; match_newlines_repl(self, &__nml); if(__nml) return NeedMoreLines(); }
#define check(B) if((err = B)) return err
static NameScope name_scope(pk_Compiler* self) {
NameScope s = self->contexts.count > 1 ? NAME_LOCAL : NAME_GLOBAL;
if(self->src->is_dynamic && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
return s;
}
static Error* SyntaxError(const char* fmt, ...){
return NULL;
}
static Error* NeedMoreLines(){
return NULL;
}
/* Matchers */
static bool is_expression(pk_Compiler* self, bool allow_slice){
PrattCallback prefix = rules[curr().type].prefix;
return prefix && (allow_slice || curr().type != TK_COLON);
}
#define match(expected) (curr().type == expected ? (++self->i) : 0)
static bool match_newlines_repl(pk_Compiler* self, bool* need_more_lines){
bool consumed = false;
if(curr().type == TK_EOL) {
while(curr().type == TK_EOL) advance();
consumed = true;
}
if(need_more_lines) {
*need_more_lines = (mode() == REPL_MODE && curr().type == TK_EOF);
}
return consumed;
}
static bool match_end_stmt(pk_Compiler* self) {
if(match(TK_SEMICOLON)) {
match_newlines();
return true;
}
if(match_newlines() || curr().type == TK_EOF) return true;
if(curr().type == TK_DEDENT) return true;
return false;
}
/* Expression Callbacks */
static Error* exprLiteral(pk_Compiler* self);
static Error* exprLong(pk_Compiler* self);
static Error* exprImag(pk_Compiler* self);
static Error* exprBytes(pk_Compiler* self);
static Error* exprFString(pk_Compiler* self);
static Error* exprLambda(pk_Compiler* self);
static Error* exprOr(pk_Compiler* self);
static Error* exprAnd(pk_Compiler* self);
static Error* exprTernary(pk_Compiler* self);
static Error* exprBinaryOp(pk_Compiler* self);
static Error* exprNot(pk_Compiler* self);
static Error* exprUnaryOp(pk_Compiler* self);
static Error* exprGroup(pk_Compiler* self);
static Error* exprList(pk_Compiler* self);
static Error* exprMap(pk_Compiler* self);
static Error* exprCall(pk_Compiler* self);
static Error* exprName(pk_Compiler* self);
static Error* exprAttrib(pk_Compiler* self);
static Error* exprSlice0(pk_Compiler* self);
static Error* exprSlice1(pk_Compiler* self);
static Error* exprSubscr(pk_Compiler* self);
static Error* exprLiteral0(pk_Compiler* self);
/* Expression */
static Error* parse_expression(pk_Compiler* self, int precedence, bool allow_slice){
PrattCallback prefix = rules[curr().type].prefix;
if(!prefix || (curr().type == TK_COLON && !allow_slice)) {
return SyntaxError("expected an expression, got %s", pk_TokenSymbols[curr().type]);
}
advance();
Error* err;
check(prefix(self));
while(rules[curr().type].precedence >= precedence && (allow_slice || curr().type != TK_COLON)) {
TokenIndex op = curr().type;
advance();
PrattCallback infix = rules[op].infix;
assert(infix != NULL);
check(infix(self));
}
return NULL;
}
static Error* EXPR(pk_Compiler* self) {
return parse_expression(self, PREC_LOWEST + 1, false);
}
static Error* EXPR_TUPLE(pk_Compiler* self, bool allow_slice){
Error* err;
check(parse_expression(self, PREC_LOWEST + 1, allow_slice));
if(!match(TK_COMMA)) return NULL;
// tuple expression
int count = 1;
do {
if(curr().brackets_level) check_newlines_repl()
if(!is_expression(self, allow_slice)) break;
check(parse_expression(self, PREC_LOWEST + 1, allow_slice));
count += 1;
if(curr().brackets_level) check_newlines_repl();
} while(match(TK_COMMA));
// TupleExpr* e = make_expr<TupleExpr>(count);
// for(int i=count-1; i>=0; i--)
// e->items[i] = ctx()->s_popx();
// ctx()->s_push(e);
return NULL;
}
// special case for `for loop` and `comp`
static Error* EXPR_VARS(pk_Compiler* self){
int count = 0;
do {
consume(TK_ID);
ctx()->s_push(make_expr<NameExpr>(prev().str(), name_scope()));
count += 1;
} while(match(TK_COMMA));
if(count > 1){
TupleExpr* e = make_expr<TupleExpr>(count);
for(int i=count-1; i>=0; i--)
e->items[i] = ctx()->s_popx();
ctx()->s_push(e);
}
return NULL;
}
static void setup_global_context(pk_Compiler* self, CodeObject* co){
co->start_line = self->i == 0 ? 1 : prev().line;
pk_CodeEmitContext* ctx = c11_vector__emplace(&self->contexts);
pk_CodeEmitContext__ctor(ctx, co, NULL, self->contexts.count);
}
Error* pk_Compiler__compile(pk_Compiler* self, CodeObject* out){
// make sure it is the first time to compile
assert(self->i == 0);
// make sure the first token is @sof
assert(tk(0).type == TK_SOF);
setup_global_context(self, out);
advance(); // skip @sof, so prev() is always valid
match_newlines(); // skip possible leading '\n'
Error* err;
// if(mode() == EVAL_MODE) {
// check(EXPR_TUPLE());
// ctx()->s_emit_top();
// consume(TK_EOF);
// ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
// check(pop_context());
// return NULL;
// } else if(mode() == JSON_MODE) {
// check(EXPR());
// Expr* e = ctx()->s_popx();
// if(!e->is_json_object()){
// return SyntaxError("expect a JSON object, literal or array");
// }
// consume(TK_EOF);
// e->emit_(ctx());
// ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
// check(pop_context());
// return NULL;
// }
// while(!match(TK_EOF)) {
// check(compile_stmt());
// match_newlines();
// }
// check(pop_context());
return NULL;
}
Error* pk_compile(pk_SourceData_ src, CodeObject* out){
pk_TokenArray tokens;
Error* err = pk_Lexer__process(src, &tokens);
if(err) return err;
// Token* data = (Token*)tokens.data;
// printf("%s\n", py_Str__data(&src->filename));
// for(int i = 0; i < tokens.count; i++) {
// Token* t = data + i;
// py_Str tmp;
// py_Str__ctor2(&tmp, t->start, t->length);
// printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], py_Str__data(&tmp));
// py_Str__dtor(&tmp);
// }
pk_Compiler compiler;
pk_Compiler__ctor(&compiler, src, tokens);
CodeObject__ctor(out, src, py_Str__sv(&src->filename));
err = pk_Compiler__compile(&compiler, out);
CodeObject__dtor(out);
pk_Compiler__dtor(&compiler);
return err;
}
void pk_Compiler__initialize(){
// clang-format off
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#define PK_NO_INFIX NULL, PREC_LOWEST
for(int i = 0; i < TK__COUNT__; i++) rules[i] = { NULL, PK_NO_INFIX };
rules[TK_DOT] = { NULL, exprAttrib, PREC_PRIMARY };
rules[TK_LPAREN] = { exprGroup, exprCall, PREC_PRIMARY };
rules[TK_LBRACKET] = { exprList, exprSubscr, PREC_PRIMARY };
rules[TK_LBRACE] = { exprMap, PK_NO_INFIX };
rules[TK_MOD] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_ADD] = { NULL, exprBinaryOp, PREC_TERM };
rules[TK_SUB] = { exprUnaryOp, exprBinaryOp, PREC_TERM };
rules[TK_MUL] = { exprUnaryOp, exprBinaryOp, PREC_FACTOR };
rules[TK_INVERT] = { exprUnaryOp, NULL, PREC_UNARY };
rules[TK_DIV] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_FLOORDIV] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_POW] = { exprUnaryOp, exprBinaryOp, PREC_EXPONENT };
rules[TK_GT] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LT] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_EQ] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_NE] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_GE] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LE] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IN] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IS] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_LSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT };
rules[TK_RSHIFT] = { NULL, exprBinaryOp, PREC_BITWISE_SHIFT };
rules[TK_AND] = { NULL, exprBinaryOp, PREC_BITWISE_AND };
rules[TK_OR] = { NULL, exprBinaryOp, PREC_BITWISE_OR };
rules[TK_XOR] = { NULL, exprBinaryOp, PREC_BITWISE_XOR };
rules[TK_DECORATOR] = { NULL, exprBinaryOp, PREC_FACTOR };
rules[TK_IF] = { NULL, exprTernary, PREC_TERNARY };
rules[TK_NOT_IN] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_IS_NOT] = { NULL, exprBinaryOp, PREC_COMPARISION };
rules[TK_AND_KW ] = { NULL, exprAnd, PREC_LOGICAL_AND };
rules[TK_OR_KW] = { NULL, exprOr, PREC_LOGICAL_OR };
rules[TK_NOT_KW] = { exprNot, NULL, PREC_LOGICAL_NOT };
rules[TK_TRUE] = { exprLiteral0, PK_NO_INFIX };
rules[TK_FALSE] = { exprLiteral0, PK_NO_INFIX };
rules[TK_NONE] = { exprLiteral0, PK_NO_INFIX };
rules[TK_DOTDOTDOT] = { exprLiteral0, PK_NO_INFIX };
rules[TK_LAMBDA] = { exprLambda, PK_NO_INFIX };
rules[TK_ID] = { exprName, PK_NO_INFIX };
rules[TK_NUM] = { exprLiteral, PK_NO_INFIX };
rules[TK_STR] = { exprLiteral, PK_NO_INFIX };
rules[TK_FSTR] = { exprFString, PK_NO_INFIX };
rules[TK_LONG] = { exprLong, PK_NO_INFIX };
rules[TK_IMAG] = { exprImag, PK_NO_INFIX };
rules[TK_BYTES] = { exprBytes, PK_NO_INFIX };
rules[TK_COLON] = { exprSlice0, exprSlice1, PREC_PRIMARY };
#undef PK_METHOD
#undef PK_NO_INFIX
// clang-format on
}

View File

@ -17,7 +17,7 @@ PrattRule Compiler::rules[TK__COUNT__];
NameScope Compiler::name_scope() const noexcept{
auto s = contexts.size() > 1 ? NAME_LOCAL : NAME_GLOBAL;
if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_UNKNOWN;
return s;
}
@ -120,61 +120,6 @@ void Compiler::init_pratt_rules() noexcept{
static bool initialized = false;
if(initialized) return;
initialized = true;
// clang-format off
// http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
#define PK_METHOD(name) &Compiler::name
#define PK_NO_INFIX nullptr, PREC_LOWEST
for(int i = 0; i < TK__COUNT__; i++) rules[i] = { nullptr, PK_NO_INFIX };
rules[TK_DOT] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
rules[TK_LPAREN] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
rules[TK_LBRACKET] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
rules[TK_LBRACE] = { PK_METHOD(exprMap), PK_NO_INFIX };
rules[TK_MOD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_ADD] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK_SUB] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
rules[TK_MUL] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_INVERT] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
rules[TK_DIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_FLOORDIV] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_POW] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
rules[TK_GT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_EQ] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_NE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_GE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LE] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IS] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_LSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK_RSHIFT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
rules[TK_AND] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
rules[TK_OR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
rules[TK_XOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
rules[TK_DECORATOR] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
rules[TK_IF] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
rules[TK_NOT_IN] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_IS_NOT] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
rules[TK_AND_KW ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
rules[TK_OR_KW] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
rules[TK_NOT_KW] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
rules[TK_TRUE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_FALSE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_NONE] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_DOTDOTDOT] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
rules[TK_LAMBDA] = { PK_METHOD(exprLambda), PK_NO_INFIX };
rules[TK_ID] = { PK_METHOD(exprName), PK_NO_INFIX };
rules[TK_NUM] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK_STR] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
rules[TK_FSTR] = { PK_METHOD(exprFString), PK_NO_INFIX };
rules[TK_LONG] = { PK_METHOD(exprLong), PK_NO_INFIX };
rules[TK_IMAG] = { PK_METHOD(exprImag), PK_NO_INFIX };
rules[TK_BYTES] = { PK_METHOD(exprBytes), PK_NO_INFIX };
rules[TK_COLON] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
#undef PK_METHOD
#undef PK_NO_INFIX
// clang-format on
}
bool Compiler::match(TokenIndex expected) noexcept{

View File

@ -1,59 +1,79 @@
// #include "pocketpy/compiler/expr.h"
// #include "pocketpy/common/memorypool.h"
#include "pocketpy/compiler/expr.h"
#include "pocketpy/common/memorypool.h"
#include "pocketpy/common/strname.h"
// static bool default_false(const pk_Expr*) { return false; }
// static int default_zero(const pk_Expr*) { return 0; }
// static void default_dtor(pk_Expr*) {}
static bool default_false(const pk_Expr* e) { return false; }
static int default_zero(const pk_Expr* e) { return 0; }
static void default_dtor(pk_Expr* e) {}
// void pk_ExprVt__ctor(pk_ExprVt* vt){
// vt->dtor = default_dtor;
// vt->is_literal = default_false;
// vt->is_json_object = default_false;
// vt->is_attrib = default_false;
// vt->is_subscr = default_false;
// vt->is_compare = default_false;
// vt->star_level = default_zero;
// vt->is_tuple = default_false;
// vt->is_name = default_false;
// vt->emit_ = NULL; // must be set
// vt->emit_del = NULL;
// vt->emit_store = NULL;
// vt->emit_inplace = NULL;
// vt->emit_store_inplace = NULL;
// }
void pk_ExprVt__ctor(pk_ExprVt* vt){
vt->dtor = default_dtor;
vt->is_literal = default_false;
vt->is_json_object = default_false;
vt->is_attrib = default_false;
vt->is_subscr = default_false;
vt->is_compare = default_false;
vt->star_level = default_zero;
vt->is_tuple = default_false;
vt->is_name = default_false;
vt->emit_ = NULL; // must be set
vt->emit_del = NULL;
vt->emit_store = NULL;
vt->emit_inplace = NULL;
vt->emit_store_inplace = NULL;
}
// void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx){
// assert(self->vt->emit_);
// self->vt->emit_(self, ctx);
// }
void pk_Expr__emit_(pk_Expr* self, pk_CodeEmitContext* ctx){
assert(self->vt->emit_);
self->vt->emit_(self, ctx);
}
// bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_del) return false;
// return self->vt->emit_del(self, ctx);
// }
bool pk_Expr__emit_del(pk_Expr* self, pk_CodeEmitContext* ctx){
if(!self->vt->emit_del) return false;
return self->vt->emit_del(self, ctx);
}
// bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_store) return false;
// return self->vt->emit_store(self, ctx);
// }
bool pk_Expr__emit_store(pk_Expr* self, pk_CodeEmitContext* ctx){
if(!self->vt->emit_store) return false;
return self->vt->emit_store(self, ctx);
}
// void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_inplace){
// pk_Expr__emit_(self, ctx);
// return;
// }
// self->vt->emit_inplace(self, ctx);
// }
void pk_Expr__emit_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){
if(!self->vt->emit_inplace){
pk_Expr__emit_(self, ctx);
return;
}
self->vt->emit_inplace(self, ctx);
}
// bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){
// if(!self->vt->emit_store_inplace){
// return pk_Expr__emit_store(self, ctx);
// }
// return self->vt->emit_store_inplace(self, ctx);
// }
bool pk_Expr__emit_store_inplace(pk_Expr* self, pk_CodeEmitContext* ctx){
if(!self->vt->emit_store_inplace){
return pk_Expr__emit_store(self, ctx);
}
return self->vt->emit_store_inplace(self, ctx);
}
// void pk_Expr__delete(pk_Expr* self){
// if(!self) return;
// self->vt->dtor(self);
// PoolExpr_dealloc(self);
// }
void pk_Expr__delete(pk_Expr* self){
if(!self) return;
self->vt->dtor(self);
PoolExpr_dealloc(self);
}
/* CodeEmitContext */
void pk_CodeEmitContext__ctor(pk_CodeEmitContext* self, CodeObject* co, FuncDecl* func, int level){
self->co = co;
self->func = func;
self->level = level;
self->curr_iblock = 0;
self->is_compiling_class = false;
c11_vector__ctor(&self->s_expr, sizeof(pk_Expr*));
c11_vector__ctor(&self->global_names, sizeof(StrName));
c11_smallmap_s2n__ctor(&self->co_consts_string_dedup_map);
}
void pk_CodeEmitContext__dtor(pk_CodeEmitContext* self){
c11_vector__dtor(&self->s_expr);
c11_vector__dtor(&self->global_names);
c11_smallmap_s2n__dtor(&self->co_consts_string_dedup_map);
}

View File

@ -1,8 +1,855 @@
#include "pocketpy/common/config.h"
#include "pocketpy/common/str.h"
#include "pocketpy/common/smallmap.h"
#include "pocketpy/common/config.h"
#include "pocketpy/common/sstream.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/sourcedata.h"
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#define is_raw_string_used(t) ((t) == TK_ID || (t) == TK_LONG)
typedef struct pk_Lexer{
pk_SourceData_ src;
const char* token_start;
const char* curr_char;
int current_line;
int brackets_level;
c11_vector/*T=Token*/ nexts;
c11_vector/*T=int*/ indents;
} pk_Lexer;
typedef struct pk_TokenDeserializer {
const char* curr;
const char* source;
} pk_TokenDeserializer;
void pk_TokenDeserializer__ctor(pk_TokenDeserializer* self, const char* source);
bool pk_TokenDeserializer__match_char(pk_TokenDeserializer* self, char c);
c11_string pk_TokenDeserializer__read_string(pk_TokenDeserializer* self, char c);
py_Str pk_TokenDeserializer__read_string_from_hex(pk_TokenDeserializer* self, char c);
int pk_TokenDeserializer__read_count(pk_TokenDeserializer* self);
int64_t pk_TokenDeserializer__read_uint(pk_TokenDeserializer* self, char c);
double pk_TokenDeserializer__read_float(pk_TokenDeserializer* self, char c);
const static TokenValue EmptyTokenValue;
static void pk_Lexer__ctor(pk_Lexer* self, pk_SourceData_ src){
PK_INCREF(src);
self->src = src;
self->curr_char = self->token_start = py_Str__data(&src->source);
self->current_line = 1;
self->brackets_level = 0;
c11_vector__ctor(&self->nexts, sizeof(Token));
c11_vector__ctor(&self->indents, sizeof(int));
}
static void pk_Lexer__dtor(pk_Lexer* self){
PK_DECREF(self->src);
c11_vector__dtor(&self->nexts);
c11_vector__dtor(&self->indents);
}
static char eatchar(pk_Lexer* self){
char c = *self->curr_char;
assert(c != '\n'); // eatchar() cannot consume a newline
self->curr_char++;
return c;
}
static char eatchar_include_newline(pk_Lexer* self){
char c = *self->curr_char;
self->curr_char++;
if(c == '\n') {
self->current_line++;
c11_vector__push(const char*, &self->src->line_starts, self->curr_char);
}
return c;
}
static int eat_spaces(pk_Lexer* self){
int count = 0;
while(true) {
switch(*self->curr_char) {
case ' ': count += 1; break;
case '\t': count += 4; break;
default: return count;
}
eatchar(self);
}
}
static bool matchchar(pk_Lexer* self, char c){
if(*self->curr_char != c) return false;
eatchar_include_newline(self);
return true;
}
static bool match_n_chars(pk_Lexer* self, int n, char c0){
const char* c = self->curr_char;
for(int i = 0; i < n; i++) {
if(*c == '\0') return false;
if(*c != c0) return false;
c++;
}
for(int i = 0; i < n; i++)
eatchar_include_newline(self);
return true;
}
static void skip_line_comment(pk_Lexer* self){
while(*self->curr_char) {
if(*self->curr_char == '\n') return;
eatchar(self);
}
}
static void add_token_with_value(pk_Lexer* self, TokenIndex type, TokenValue value){
switch(type) {
case TK_LBRACE:
case TK_LBRACKET:
case TK_LPAREN: self->brackets_level++; break;
case TK_RPAREN:
case TK_RBRACKET:
case TK_RBRACE: self->brackets_level--; break;
default: break;
}
Token token = {type,
self->token_start,
(int)(self->curr_char - self->token_start),
self->current_line - ((type == TK_EOL) ? 1 : 0),
self->brackets_level,
value};
// handle "not in", "is not", "yield from"
if(self->nexts.count > 0) {
Token* back = &c11_vector__back(Token, &self->nexts);
if(back->type == TK_NOT_KW && type == TK_IN) {
back->type = TK_NOT_IN;
return;
}
if(back->type == TK_IS && type == TK_NOT_KW) {
back->type = TK_IS_NOT;
return;
}
if(back->type == TK_YIELD && type == TK_FROM) {
back->type = TK_YIELD_FROM;
return;
}
c11_vector__push(Token, &self->nexts, token);
}
}
static void add_token(pk_Lexer* self, TokenIndex type){
add_token_with_value(self, type, EmptyTokenValue);
}
static void add_token_2(pk_Lexer* self, char c, TokenIndex one, TokenIndex two){
if(matchchar(self, c))
add_token(self, two);
else
add_token(self, one);
}
static bool eat_indentation(pk_Lexer* self){
if(self->brackets_level > 0) return true;
int spaces = eat_spaces(self);
if(*self->curr_char == '#') skip_line_comment(self);
if(*self->curr_char == '\0' || *self->curr_char == '\n'){
return true;
}
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
int indents_back = c11_vector__back(int, &self->indents);
if(spaces > indents_back) {
c11_vector__push(int, &self->indents, spaces);
Token t = {TK_INDENT, self->token_start, 0, self->current_line, self->brackets_level, EmptyTokenValue};
c11_vector__push(Token, &self->nexts, t);
} else if(spaces < indents_back) {
do {
c11_vector__pop(int, &self->indents);
Token t = {TK_DEDENT, self->token_start, 0, self->current_line, self->brackets_level, EmptyTokenValue};
c11_vector__push(Token, &self->nexts, t);
indents_back = c11_vector__back(int, &self->indents);
} while(spaces < indents_back);
if(spaces != indents_back) { return false; }
}
return true;
}
static bool is_possible_number_char(char c){
switch(c) {
// clang-format off
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case '.': case 'L': case 'x': case 'o': case 'j':
return true;
default: return false;
// clang-format on
}
}
/******************************/
static Error* SyntaxError(const char* fmt, ...){
// va_list args;
// va_start(args, fmt);
// Error* err = _error(true, "SyntaxError", fmt, &args);
// va_end(args);
// return err;
return NULL;
}
static Error* NeedMoreLines(){
return NULL;
}
static Error* eat_name(pk_Lexer* self){
self->curr_char--;
while(true) {
unsigned char c = *self->curr_char;
int u8bytes = c11__u8_header(c, true);
if(u8bytes == 0) return SyntaxError("invalid char: %c", c);
if(u8bytes == 1) {
if(isalnum(c) || c == '_') {
self->curr_char++;
continue;
} else {
break;
}
}
// handle multibyte char
py_Str u8str;
py_Str__ctor2(&u8str, self->curr_char, u8bytes);
if(u8str.size != u8bytes){
py_Str__dtor(&u8str);
return SyntaxError("invalid utf8 sequence: %s", py_Str__data(&u8str));
}
uint32_t value = 0;
for(int k = 0; k < u8bytes; k++) {
uint8_t b = py_Str__data(&u8str)[k];
if(k == 0) {
if(u8bytes == 2)
value = (b & 0b00011111) << 6;
else if(u8bytes == 3)
value = (b & 0b00001111) << 12;
else if(u8bytes == 4)
value = (b & 0b00000111) << 18;
} else {
value |= (b & 0b00111111) << (6 * (u8bytes - k - 1));
}
}
if(c11__is_unicode_Lo_char(value))
self->curr_char += u8bytes;
else
break;
}
int length = (int)(self->curr_char - self->token_start);
if(length == 0) return SyntaxError("@id contains invalid char");
c11_string name = {self->token_start, length};
if(self->src->mode == JSON_MODE) {
if(c11_string__cmp3(name, "true") == 0) {
add_token(self, TK_TRUE);
} else if(c11_string__cmp3(name, "false") == 0) {
add_token(self, TK_FALSE);
} else if(c11_string__cmp3(name, "null") == 0) {
add_token(self, TK_NONE);
} else {
return SyntaxError("invalid JSON token");
}
return NULL;
}
const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE;
int KW_COUNT = TK__COUNT__ - TK_FALSE;
#define less(a, b) (c11_string__cmp3(b, a) > 0)
int out;
c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
#undef less
if(out != KW_COUNT && c11_string__cmp3(name, KW_BEGIN[out]) == 0) {
add_token(self, (TokenIndex)(out + TK_FALSE));
} else {
add_token(self, TK_ID);
}
return NULL;
}
static Error* eat_string_until(pk_Lexer* self, char quote, bool raw, py_Str* out) {
// previous char is quote
bool quote3 = match_n_chars(self, 2, quote);
pk_SStream buff;
pk_SStream__ctor(&buff);
while(true) {
char c = eatchar_include_newline(self);
if(c == quote) {
if(quote3 && !match_n_chars(self, 2, quote)) {
pk_SStream__write_char(&buff, c);
continue;
}
break;
}
if(c == '\0') {
if(quote3 && self->src->mode == REPL_MODE){
return NeedMoreLines();
}
return SyntaxError("EOL while scanning string literal");
}
if(c == '\n') {
if(!quote3)
return SyntaxError("EOL while scanning string literal");
else {
pk_SStream__write_char(&buff, c);
continue;
}
}
if(!raw && c == '\\') {
switch(eatchar_include_newline(self)) {
case '"': pk_SStream__write_char(&buff, '"'); break;
case '\'': pk_SStream__write_char(&buff, '\''); break;
case '\\': pk_SStream__write_char(&buff, '\\'); break;
case 'n': pk_SStream__write_char(&buff, '\n'); break;
case 'r': pk_SStream__write_char(&buff, '\r'); break;
case 't': pk_SStream__write_char(&buff, '\t'); break;
case 'b': pk_SStream__write_char(&buff, '\b'); break;
case 'x': {
char hex[3] = {eatchar(self), eatchar(self), '\0'};
int code;
if(sscanf(hex, "%x", &code) != 1) {
return SyntaxError("invalid hex char");
}
pk_SStream__write_char(&buff, (char)code);
} break;
default: return SyntaxError("invalid escape char");
}
} else {
pk_SStream__write_char(&buff, c);
}
}
*out = pk_SStream__submit(&buff);
return NULL;
}
enum StringType {
NORMAL_STRING,
RAW_STRING,
F_STRING,
NORMAL_BYTES
};
static Error* eat_string(pk_Lexer* self, char quote, enum StringType type){
py_Str s;
Error* err = eat_string_until(self, quote, type == RAW_STRING, &s);
if(err) return err;
TokenValue value = {TokenValue_STR, ._str = s};
if(type == F_STRING) {
add_token_with_value(self, TK_FSTR, value);
}else if(type == NORMAL_BYTES) {
add_token_with_value(self, TK_BYTES, value);
}else{
add_token_with_value(self, TK_STR, value);
}
return NULL;
}
static Error* eat_number(pk_Lexer* self){
const char* i = self->token_start;
while(is_possible_number_char(*i)) i++;
bool is_scientific_notation = false;
if(*(i - 1) == 'e' && (*i == '+' || *i == '-')) {
i++;
while(isdigit(*i) || *i == 'j') i++;
is_scientific_notation = true;
}
c11_string text = {self->token_start, i - self->token_start};
self->curr_char = i;
if(text.data[0] != '.' && !is_scientific_notation) {
// try long
if(i[-1] == 'L') {
add_token(self, TK_LONG);
return NULL;
}
// try integer
TokenValue value = {.index = TokenValue_EMPTY};
switch(parse_uint(text, &value._i64, -1)) {
case IntParsing_SUCCESS:
add_token_with_value(self, TK_NUM, value);
return NULL;
case IntParsing_OVERFLOW:
return SyntaxError("int literal is too large");
case IntParsing_FAILURE:
break; // do nothing
}
}
// try float
double float_out;
char* p_end;
float_out = strtod(text.data, &p_end);
if(p_end == text.data + text.size){
TokenValue value = {.index = TokenValue_F64, ._f64 = float_out};
add_token_with_value(self, TK_NUM, value);
return NULL;
}
if(i[-1] == 'j' && p_end == text.data + text.size - 1) {
TokenValue value = {.index = TokenValue_F64, ._f64 = float_out};
add_token_with_value(self, TK_IMAG, value);
return NULL;
}
return SyntaxError("invalid number literal");
}
static Error* lex_one_token(pk_Lexer* self, bool* eof){
*eof = false;
while(*self->curr_char) {
self->token_start = self->curr_char;
char c = eatchar_include_newline(self);
switch(c) {
case '\'':
case '"': {
Error* err = eat_string(self, c, NORMAL_STRING);
if(err) return err;
return NULL;
}
case '#': skip_line_comment(self); break;
case '~': add_token(self, TK_INVERT); return NULL;
case '{': add_token(self, TK_LBRACE); return NULL;
case '}': add_token(self, TK_RBRACE); return NULL;
case ',': add_token(self, TK_COMMA); return NULL;
case ':': add_token(self, TK_COLON); return NULL;
case ';': add_token(self, TK_SEMICOLON); return NULL;
case '(': add_token(self, TK_LPAREN); return NULL;
case ')': add_token(self, TK_RPAREN); return NULL;
case '[': add_token(self, TK_LBRACKET); return NULL;
case ']': add_token(self, TK_RBRACKET); return NULL;
case '@': add_token(self, TK_DECORATOR); return NULL;
case '\\': {
// line continuation character
char c = eatchar_include_newline(self);
if(c != '\n') {
if(self->src->mode == REPL_MODE && c == '\0') return NeedMoreLines();
return SyntaxError("expected newline after line continuation character");
}
eat_spaces(self);
return NULL;
}
case '%': add_token_2(self, '=', TK_MOD, TK_IMOD); return NULL;
case '&': add_token_2(self, '=', TK_AND, TK_IAND); return NULL;
case '|': add_token_2(self, '=', TK_OR, TK_IOR); return NULL;
case '^': add_token_2(self, '=', TK_XOR, TK_IXOR); return NULL;
case '.': {
if(matchchar(self, '.')) {
if(matchchar(self, '.')) {
add_token(self, TK_DOTDOTDOT);
} else {
add_token(self, TK_DOTDOT);
}
} else {
char next_char = *self->curr_char;
if(next_char >= '0' && next_char <= '9') {
Error* err = eat_number(self);
if(err) return err;
} else {
add_token(self, TK_DOT);
}
}
return NULL;
}
case '=': add_token_2(self, '=', TK_ASSIGN, TK_EQ); return NULL;
case '+': add_token_2(self, '=', TK_ADD, TK_IADD); return NULL;
case '>': {
if(matchchar(self, '='))
add_token(self, TK_GE);
else if(matchchar(self, '>'))
add_token_2(self, '=', TK_RSHIFT, TK_IRSHIFT);
else
add_token(self, TK_GT);
return NULL;
}
case '<': {
if(matchchar(self, '='))
add_token(self, TK_LE);
else if(matchchar(self, '<'))
add_token_2(self, '=', TK_LSHIFT, TK_ILSHIFT);
else
add_token(self, TK_LT);
return NULL;
}
case '-': {
if(matchchar(self, '='))
add_token(self, TK_ISUB);
else if(matchchar(self, '>'))
add_token(self, TK_ARROW);
else
add_token(self, TK_SUB);
return NULL;
}
case '!':
if(matchchar(self, '=')){
add_token(self, TK_NE);
}else{
Error* err = SyntaxError("expected '=' after '!'");
if(err) return err;
}
break;
case '*':
if(matchchar(self, '*')) {
add_token(self, TK_POW); // '**'
} else {
add_token_2(self, '=', TK_MUL, TK_IMUL);
}
return NULL;
case '/':
if(matchchar(self, '/')) {
add_token_2(self, '=', TK_FLOORDIV, TK_IFLOORDIV);
} else {
add_token_2(self, '=', TK_DIV, TK_IDIV);
}
return NULL;
case ' ':
case '\t': eat_spaces(self); break;
case '\n': {
add_token(self, TK_EOL);
if(!eat_indentation(self)){
return SyntaxError("unindent does not match any outer indentation level");
}
return NULL;
}
default: {
if(c == 'f') {
if(matchchar(self, '\'')) return eat_string(self, '\'', F_STRING);
if(matchchar(self, '"')) return eat_string(self, '"', F_STRING);
} else if(c == 'r') {
if(matchchar(self, '\'')) return eat_string(self, '\'', RAW_STRING);
if(matchchar(self, '"')) return eat_string(self, '"', RAW_STRING);
} else if(c == 'b') {
if(matchchar(self, '\'')) return eat_string(self, '\'', NORMAL_BYTES);
if(matchchar(self, '"')) return eat_string(self, '"', NORMAL_BYTES);
}
if(c >= '0' && c <= '9') return eat_number(self);
return eat_name(self);
}
}
}
self->token_start = self->curr_char;
while(self->indents.count > 1) {
c11_vector__pop(int, &self->indents);
add_token(self, TK_DEDENT);
return NULL;
}
add_token(self, TK_EOF);
*eof = true;
return NULL;
}
static Error* from_precompiled(pk_Lexer* self) {
pk_TokenDeserializer deserializer;
pk_TokenDeserializer__ctor(&deserializer, py_Str__data(&self->src->source));
deserializer.curr += 5; // skip "pkpy:"
c11_string version = pk_TokenDeserializer__read_string(&deserializer, '\n');
if(c11_string__cmp3(version, PK_VERSION) != 0) {
return SyntaxError("precompiled version mismatch");
}
if(pk_TokenDeserializer__read_uint(&deserializer, '\n') != (int64_t)self->src->mode){
return SyntaxError("precompiled mode mismatch");
}
int count = pk_TokenDeserializer__read_count(&deserializer);
c11_vector* precompiled_tokens = &self->src->_precompiled_tokens;
for(int i = 0; i < count; i++) {
c11_string item = pk_TokenDeserializer__read_string(&deserializer, '\n');
py_Str copied_item;
py_Str__ctor2(&copied_item, item.data, item.size);
c11_vector__push(py_Str, precompiled_tokens, copied_item);
}
count = pk_TokenDeserializer__read_count(&deserializer);
for(int i = 0; i < count; i++) {
Token t;
t.type = (TokenIndex)pk_TokenDeserializer__read_uint(&deserializer, ',');
if(is_raw_string_used(t.type)) {
int64_t index = pk_TokenDeserializer__read_uint(&deserializer, ',');
py_Str* p = c11__at(py_Str, precompiled_tokens, index);
t.start = py_Str__data(p);
t.length = c11__getitem(py_Str, precompiled_tokens, index).size;
} else {
t.start = NULL;
t.length = 0;
}
if(pk_TokenDeserializer__match_char(&deserializer, ',')) {
t.line = c11_vector__back(Token, &self->nexts).line;
} else {
t.line = (int)pk_TokenDeserializer__read_uint(&deserializer, ',');
}
if(pk_TokenDeserializer__match_char(&deserializer, ',')) {
t.brackets_level = c11_vector__back(Token, &self->nexts).brackets_level;
} else {
t.brackets_level = (int)pk_TokenDeserializer__read_uint(&deserializer, ',');
}
char type = (*deserializer.curr++); // read_char
switch(type) {
case 'I': {
int64_t res = pk_TokenDeserializer__read_uint(&deserializer, '\n');
t.value = (TokenValue){TokenValue_I64, ._i64 = res};
} break;
case 'F': {
double res = pk_TokenDeserializer__read_float(&deserializer, '\n');
t.value = (TokenValue){TokenValue_F64, ._f64 = res};
} break;
case 'S': {
py_Str res = pk_TokenDeserializer__read_string_from_hex(&deserializer, '\n');
t.value = (TokenValue){TokenValue_STR, ._str = res};
} break;
default:
t.value = EmptyTokenValue;
break;
}
c11_vector__push(Token, &self->nexts, t);
}
return NULL;
}
IntParsingResult parse_uint(c11_string text, int64_t* out, int base) {
*out = 0;
c11_string prefix = {.data = text.data, .size = PK_MIN(2, text.size)};
if(base == -1) {
if(c11_string__cmp3(prefix, "0b") == 0)
base = 2;
else if(c11_string__cmp3(prefix, "0o") == 0)
base = 8;
else if(c11_string__cmp3(prefix, "0x") == 0)
base = 16;
else
base = 10;
}
if(base == 10) {
// 10-base 12334
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '9') {
*out = (*out * 10) + (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "9223372036854775807".__len__() == 19
if(text.size > 19) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 2) {
// 2-base 0b101010
if(c11_string__cmp3(prefix, "0b") == 0) {
// text.remove_prefix(2);
text = (c11_string){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c == '0' || c == '1') {
*out = (*out << 1) | (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63
if(text.size > 63) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 8) {
// 8-base 0o123
if(c11_string__cmp3(prefix, "0o") == 0) {
// text.remove_prefix(2);
text = (c11_string){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '7') {
*out = (*out << 3) | (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "777777777777777777777".__len__() == 21
if(text.size > 21) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 16) {
// 16-base 0x123
if(c11_string__cmp3(prefix, "0x") == 0) {
// text.remove_prefix(2);
text = (c11_string){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '9') {
*out = (*out << 4) | (c - '0');
} else if(c >= 'a' && c <= 'f') {
*out = (*out << 4) | (c - 'a' + 10);
} else if(c >= 'A' && c <= 'F') {
*out = (*out << 4) | (c - 'A' + 10);
} else {
return IntParsing_FAILURE;
}
}
// "7fffffffffffffff".__len__() == 16
if(text.size > 16) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
}
return IntParsing_FAILURE;
}
Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){
pk_Lexer lexer;
pk_Lexer__ctor(&lexer, src);
if(src->is_precompiled) {
Error* err = from_precompiled(&lexer);
// TODO: set out tokens
pk_Lexer__dtor(&lexer);
return err;
}
// push initial tokens
Token sof = {TK_SOF, lexer.token_start, 0, lexer.current_line, lexer.brackets_level, EmptyTokenValue};
c11_vector__push(Token, &lexer.nexts, sof);
c11_vector__push(int, &lexer.indents, 0);
bool eof = false;
while(!eof) {
void* err = lex_one_token(&lexer, &eof);
if(err){
pk_Lexer__dtor(&lexer);
return err;
}
}
// set out_tokens
*out_tokens = c11_vector__submit(&lexer.nexts);
pk_Lexer__dtor(&lexer);
return NULL;
}
Error* pk_Lexer__process_and_dump(pk_SourceData_ src, py_Str* out) {
assert(!src->is_precompiled);
pk_TokenArray nexts; // output tokens
Error* err = pk_Lexer__process(src, &nexts);
if(err) return err;
pk_SStream ss;
pk_SStream__ctor(&ss);
// L1: version string
pk_SStream__write_cstr(&ss, "pkpy:" PK_VERSION "\n");
// L2: mode
pk_SStream__write_int(&ss, (int)src->mode);
pk_SStream__write_char(&ss, '\n');
c11_smallmap_s2n token_indices;
c11_smallmap_s2n__ctor(&token_indices);
c11_vector__foreach(Token, &nexts, token) {
if(is_raw_string_used(token->type)) {
c11_string token_sv = {token->start, token->length};
if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) {
c11_smallmap_s2n__set(&token_indices, token_sv, 0);
}
}
}
// L3: raw string count
pk_SStream__write_char(&ss, '=');
pk_SStream__write_int(&ss, token_indices.count);
pk_SStream__write_char(&ss, '\n');
uint16_t index = 0;
for(int i=0; i<token_indices.count; i++){
c11_smallmap_s2n_KV* kv = c11__at(c11_smallmap_s2n_KV, &token_indices, i);
// L4: raw strings
pk_SStream__write_cstrn(&ss, kv->key.data, kv->key.size);
kv->value = index++;
}
// L5: token count
pk_SStream__write_char(&ss, '=');
pk_SStream__write_int(&ss, nexts.count);
pk_SStream__write_char(&ss, '\n');
for(int i = 0; i < nexts.count; i++) {
const Token* token = c11__at(Token, &nexts, i);
pk_SStream__write_int(&ss, (int)token->type);
pk_SStream__write_char(&ss, ',');
if(is_raw_string_used(token->type)) {
uint16_t *p = c11_smallmap_s2n__try_get(
&token_indices, (c11_string){token->start, token->length});
assert(p != NULL);
pk_SStream__write_int(&ss, (int)*p);
pk_SStream__write_char(&ss, ',');
}
if(i > 0 && c11__getitem(Token, &nexts, i-1).line == token->line){
pk_SStream__write_char(&ss, ',');
}else{
pk_SStream__write_int(&ss, token->line);
pk_SStream__write_char(&ss, ',');
}
if(i > 0 && c11__getitem(Token, &nexts, i-1).brackets_level == token->brackets_level){
pk_SStream__write_char(&ss, ',');
}else{
pk_SStream__write_int(&ss, token->brackets_level);
pk_SStream__write_char(&ss, ',');
}
// visit token value
switch(token->value.index){
case TokenValue_EMPTY: break;
case TokenValue_I64:
pk_SStream__write_char(&ss, 'I');
pk_SStream__write_int(&ss, token->value._i64);
break;
case TokenValue_F64:
pk_SStream__write_char(&ss, 'F');
pk_SStream__write_float(&ss, token->value._f64, -1);
break;
case TokenValue_STR: {
pk_SStream__write_char(&ss, 'S');
c11_string sv = py_Str__sv(&token->value._str);
for(int i=0; i<sv.size; i++){
pk_SStream__write_hex(&ss, sv.data[i], false);
}
break;
}
}
pk_SStream__write_char(&ss, '\n');
}
*out = pk_SStream__submit(&ss);
c11_smallmap_s2n__dtor(&token_indices);
return NULL;
}
void pk_TokenArray__dtor(pk_TokenArray *self){
Token* data = self->data;
for(int i=0; i<self->count; i++){
if(data[i].value.index == TokenValue_STR){
py_Str__dtor(&data[i].value._str);
}
}
c11_array__dtor(self);
}
const char* pk_TokenSymbols[] = {
"@eof", "@eol", "@sof",

View File

@ -1,751 +0,0 @@
#include "pocketpy/compiler/lexer.hpp"
#include "pocketpy/common/config.h"
#include "pocketpy/common/str.h"
#include "pocketpy/common/smallmap.h"
#include "pocketpy/compiler/lexer.h"
#include <cstdarg>
namespace pkpy {
static bool is_possible_number_char(char c) noexcept{
switch(c) {
// clang-format off
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case '.': case 'L': case 'x': case 'o': case 'j':
return true;
default: return false;
// clang-format on
}
}
bool Lexer::match_n_chars(int n, char c0) noexcept{
const char* c = curr_char;
for(int i = 0; i < n; i++) {
if(*c == '\0') return false;
if(*c != c0) return false;
c++;
}
for(int i = 0; i < n; i++)
eatchar_include_newline();
return true;
}
bool Lexer::match_string(const char* s) noexcept{
int s_len = strlen(s);
bool ok = strncmp(curr_char, s, s_len) == 0;
if(ok)
for(int i = 0; i < s_len; i++)
eatchar_include_newline();
return ok;
}
int Lexer::eat_spaces() noexcept{
int count = 0;
while(true) {
switch(peekchar()) {
case ' ': count += 1; break;
case '\t': count += 4; break;
default: return count;
}
eatchar();
}
}
bool Lexer::eat_indentation() noexcept{
if(brackets_level > 0) return true;
int spaces = eat_spaces();
if(peekchar() == '#') skip_line_comment();
if(peekchar() == '\0' || peekchar() == '\n') return true;
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
if(spaces > indents.back()) {
indents.push_back(spaces);
nexts.push_back(Token{TK_INDENT, token_start, 0, current_line, brackets_level, {}});
} else if(spaces < indents.back()) {
while(spaces < indents.back()) {
indents.pop_back();
nexts.push_back(Token{TK_DEDENT, token_start, 0, current_line, brackets_level, {}});
}
if(spaces != indents.back()) { return false; }
}
return true;
}
char Lexer::eatchar() noexcept{
char c = peekchar();
assert(c != '\n'); // eatchar() cannot consume a newline
curr_char++;
return c;
}
char Lexer::eatchar_include_newline() noexcept{
char c = peekchar();
curr_char++;
if(c == '\n') {
current_line++;
c11_vector__push(const char*, &src->line_starts, curr_char);
}
return c;
}
Error* Lexer::eat_name() noexcept{
curr_char--;
while(true) {
unsigned char c = peekchar();
int u8bytes = c11__u8_header(c, true);
if(u8bytes == 0) return SyntaxError("invalid char: %c", c);
if(u8bytes == 1) {
if(isalpha(c) || c == '_' || isdigit(c)) {
curr_char++;
continue;
} else {
break;
}
}
// handle multibyte char
Str u8str(curr_char, u8bytes);
if(u8str.size != u8bytes) return SyntaxError("invalid utf8 sequence: %s", u8str.c_str());
uint32_t value = 0;
for(int k = 0; k < u8bytes; k++) {
uint8_t b = u8str[k];
if(k == 0) {
if(u8bytes == 2)
value = (b & 0b00011111) << 6;
else if(u8bytes == 3)
value = (b & 0b00001111) << 12;
else if(u8bytes == 4)
value = (b & 0b00000111) << 18;
} else {
value |= (b & 0b00111111) << (6 * (u8bytes - k - 1));
}
}
if(c11__is_unicode_Lo_char(value))
curr_char += u8bytes;
else
break;
}
int length = (int)(curr_char - token_start);
if(length == 0) return SyntaxError("@id contains invalid char");
c11_string name = {token_start, length};
if(src->mode == JSON_MODE) {
if(c11_string__cmp3(name, "true") == 0) {
add_token(TK_TRUE);
} else if(c11_string__cmp3(name, "false") == 0) {
add_token(TK_FALSE);
} else if(c11_string__cmp3(name, "null") == 0) {
add_token(TK_NONE);
} else {
return SyntaxError("invalid JSON token");
}
return NULL;
}
const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE;
int KW_COUNT = TK__COUNT__ - TK_FALSE;
#define less(a, b) (c11_string__cmp3(b, a) > 0)
int out;
c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
#undef less
if(out != KW_COUNT && c11_string__cmp3(name, KW_BEGIN[out]) == 0) {
add_token((TokenIndex)(out + TK_FALSE));
} else {
add_token(TK_ID);
}
return NULL;
}
void Lexer::skip_line_comment() noexcept{
char c;
while((c = peekchar()) != '\0') {
if(c == '\n') return;
eatchar();
}
}
bool Lexer::matchchar(char c) noexcept{
if(peekchar() != c) return false;
eatchar_include_newline();
return true;
}
void Lexer::add_token(TokenIndex type, TokenValue value) noexcept{
switch(type) {
case TK_LBRACE:
case TK_LBRACKET:
case TK_LPAREN: brackets_level++; break;
case TK_RPAREN:
case TK_RBRACKET:
case TK_RBRACE: brackets_level--; break;
default: break;
}
auto token = Token{type,
token_start,
(int)(curr_char - token_start),
current_line - ((type == TK_EOL) ? 1 : 0),
brackets_level,
value};
// handle "not in", "is not", "yield from"
if(!nexts.empty()) {
auto& back = nexts.back();
if(back.type == TK_NOT_KW && type == TK_IN) {
back.type = TK_NOT_IN;
return;
}
if(back.type == TK_IS && type == TK_NOT_KW) {
back.type = TK_IS_NOT;
return;
}
if(back.type == TK_YIELD && type == TK_FROM) {
back.type = TK_YIELD_FROM;
return;
}
nexts.push_back(token);
}
}
void Lexer::add_token_2(char c, TokenIndex one, TokenIndex two) noexcept{
if(matchchar(c))
add_token(two);
else
add_token(one);
}
Error* Lexer::eat_string_until(char quote, bool raw, Str* out) noexcept{
bool quote3 = match_n_chars(2, quote);
small_vector_2<char, 32> buff;
while(true) {
char c = eatchar_include_newline();
if(c == quote) {
if(quote3 && !match_n_chars(2, quote)) {
buff.push_back(c);
continue;
}
break;
}
if(c == '\0') {
if(quote3 && src->mode == REPL_MODE) return NeedMoreLines();
return SyntaxError("EOL while scanning string literal");
}
if(c == '\n') {
if(!quote3)
return SyntaxError("EOL while scanning string literal");
else {
buff.push_back(c);
continue;
}
}
if(!raw && c == '\\') {
switch(eatchar_include_newline()) {
case '"': buff.push_back('"'); break;
case '\'': buff.push_back('\''); break;
case '\\': buff.push_back('\\'); break;
case 'n': buff.push_back('\n'); break;
case 'r': buff.push_back('\r'); break;
case 't': buff.push_back('\t'); break;
case 'b': buff.push_back('\b'); break;
case 'x': {
char hex[3] = {eatchar(), eatchar(), '\0'};
size_t parsed;
char code;
try {
code = (char)std::stoi(hex, &parsed, 16);
} catch(...) {
return SyntaxError("invalid hex char");
}
if(parsed != 2) return SyntaxError("invalid hex char");
buff.push_back(code);
} break;
default: return SyntaxError("invalid escape char");
}
} else {
buff.push_back(c);
}
}
*out = Str(buff.data(), buff.size());
return nullptr;
}
Error* Lexer::eat_string(char quote, StringType type) noexcept{
Str s;
Error* err = eat_string_until(quote, type == StringType::RAW_STRING, &s);
if(err) return err;
if(type == StringType::F_STRING) {
add_token(TK_FSTR, s);
}else if(type == StringType::NORMAL_BYTES) {
add_token(TK_BYTES, s);
}else{
add_token(TK_STR, s);
}
return NULL;
}
Error* Lexer::eat_number() noexcept{
const char* i = token_start;
while(is_possible_number_char(*i))
i++;
bool is_scientific_notation = false;
if(*(i - 1) == 'e' && (*i == '+' || *i == '-')) {
i++;
while(isdigit(*i) || *i == 'j')
i++;
is_scientific_notation = true;
}
std::string_view text(token_start, i - token_start);
this->curr_char = i;
if(text[0] != '.' && !is_scientific_notation) {
// try long
if(i[-1] == 'L') {
add_token(TK_LONG);
return NULL;
}
// try integer
i64 int_out;
switch(parse_uint(text, &int_out, -1)) {
case IntParsingResult::Success: add_token(TK_NUM, int_out); return NULL;
case IntParsingResult::Overflow: return SyntaxError("int literal is too large");
case IntParsingResult::Failure: break; // do nothing
}
}
// try float
double float_out;
char* p_end;
try {
float_out = std::strtod(text.data(), &p_end);
} catch(...) {
return SyntaxError("invalid number literal");
}
if(p_end == text.data() + text.size()) {
add_token(TK_NUM, (f64)float_out);
return NULL;
}
if(i[-1] == 'j' && p_end == text.data() + text.size() - 1) {
add_token(TK_IMAG, (f64)float_out);
return NULL;
}
return SyntaxError("invalid number literal");
}
Error* Lexer::lex_one_token(bool* eof) noexcept{
*eof = false;
while(peekchar() != '\0') {
token_start = curr_char;
char c = eatchar_include_newline();
switch(c) {
case '\'':
case '"': {
Error* err = eat_string(c, StringType::NORMAL_STRING);
if(err) return err;
return NULL;
}
case '#': skip_line_comment(); break;
case '~': add_token(TK_INVERT); return NULL;
case '{': add_token(TK_LBRACE); return NULL;
case '}': add_token(TK_RBRACE); return NULL;
case ',': add_token(TK_COMMA); return NULL;
case ':': add_token(TK_COLON); return NULL;
case ';': add_token(TK_SEMICOLON); return NULL;
case '(': add_token(TK_LPAREN); return NULL;
case ')': add_token(TK_RPAREN); return NULL;
case '[': add_token(TK_LBRACKET); return NULL;
case ']': add_token(TK_RBRACKET); return NULL;
case '@': add_token(TK_DECORATOR); return NULL;
case '\\': {
// line continuation character
char c = eatchar_include_newline();
if(c != '\n') {
if(src->mode == REPL_MODE && c == '\0') return NeedMoreLines();
return SyntaxError("expected newline after line continuation character");
}
eat_spaces();
return NULL;
}
case '%': add_token_2('=', TK_MOD, TK_IMOD); return NULL;
case '&': add_token_2('=', TK_AND, TK_IAND); return NULL;
case '|': add_token_2('=', TK_OR, TK_IOR); return NULL;
case '^': add_token_2('=', TK_XOR, TK_IXOR); return NULL;
case '.': {
if(matchchar('.')) {
if(matchchar('.')) {
add_token(TK_DOTDOTDOT);
} else {
add_token(TK_DOTDOT);
}
} else {
char next_char = peekchar();
if(next_char >= '0' && next_char <= '9') {
Error* err = eat_number();
if(err) return err;
} else {
add_token(TK_DOT);
}
}
return NULL;
}
case '=': add_token_2('=', TK_ASSIGN, TK_EQ); return NULL;
case '+': add_token_2('=', TK_ADD, TK_IADD); return NULL;
case '>': {
if(matchchar('='))
add_token(TK_GE);
else if(matchchar('>'))
add_token_2('=', TK_RSHIFT, TK_IRSHIFT);
else
add_token(TK_GT);
return NULL;
}
case '<': {
if(matchchar('='))
add_token(TK_LE);
else if(matchchar('<'))
add_token_2('=', TK_LSHIFT, TK_ILSHIFT);
else
add_token(TK_LT);
return NULL;
}
case '-': {
if(matchchar('='))
add_token(TK_ISUB);
else if(matchchar('>'))
add_token(TK_ARROW);
else
add_token(TK_SUB);
return NULL;
}
case '!':
if(matchchar('=')){
add_token(TK_NE);
}else{
Error* err = SyntaxError("expected '=' after '!'");
if(err) return err;
}
break;
case '*':
if(matchchar('*')) {
add_token(TK_POW); // '**'
} else {
add_token_2('=', TK_MUL, TK_IMUL);
}
return NULL;
case '/':
if(matchchar('/')) {
add_token_2('=', TK_FLOORDIV, TK_IFLOORDIV);
} else {
add_token_2('=', TK_DIV, TK_IDIV);
}
return NULL;
case ' ':
case '\t': eat_spaces(); break;
case '\n': {
add_token(TK_EOL);
if(!eat_indentation()){
return IndentationError("unindent does not match any outer indentation level");
}
return NULL;
}
default: {
if(c == 'f') {
if(matchchar('\'')) return eat_string('\'', StringType::F_STRING);
if(matchchar('"')) return eat_string('"', StringType::F_STRING);
} else if(c == 'r') {
if(matchchar('\'')) return eat_string('\'', StringType::RAW_STRING);
if(matchchar('"')) return eat_string('"', StringType::RAW_STRING);
} else if(c == 'b') {
if(matchchar('\'')) return eat_string('\'', StringType::NORMAL_BYTES);
if(matchchar('"')) return eat_string('"', StringType::NORMAL_BYTES);
}
if(c >= '0' && c <= '9') return eat_number();
return eat_name();
}
}
}
token_start = curr_char;
while(indents.size() > 1) {
indents.pop_back();
add_token(TK_DEDENT);
return NULL;
}
add_token(TK_EOF);
*eof = true;
return NULL;
}
Error* Lexer::_error(bool lexer_err, const char* type, const char* msg, va_list* args, i64 userdata) noexcept{
Error* err = (Error*)malloc(sizeof(Error));
err->type = type;
err->src = src;
PK_INCREF(src);
if(lexer_err){
err->lineno = current_line;
err->cursor = curr_char;
if(*curr_char == '\n') {
err->lineno--;
err->cursor--;
}
}else{
err->lineno = -1;
err->cursor = NULL;
}
if(args){
vsnprintf(err->msg, sizeof(err->msg), msg, *args);
}else{
strncpy(err->msg, msg, sizeof(err->msg));
}
err->userdata = userdata;
return err;
}
Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{
va_list args;
va_start(args, fmt);
Error* err = _error(true, "SyntaxError", fmt, &args);
va_end(args);
return err;
}
Error* Lexer::run() noexcept{
assert(!this->used);
this->used = true;
if(src->is_precompiled) {
return from_precompiled();
}
// push initial tokens
this->nexts.push_back(Token{TK_SOF, token_start, 0, current_line, brackets_level, {}});
this->indents.push_back(0);
bool eof = false;
while(!eof) {
Error* err = lex_one_token(&eof);
if(err) return err;
}
return NULL;
}
Error* Lexer::from_precompiled() noexcept{
pk_TokenDeserializer deserializer;
pk_TokenDeserializer__ctor(&deserializer, py_Str__data(&src->source));
deserializer.curr += 5; // skip "pkpy:"
c11_string version = pk_TokenDeserializer__read_string(&deserializer, '\n');
if(c11_string__cmp3(version, PK_VERSION) != 0) {
return SyntaxError("precompiled version mismatch");
}
if(pk_TokenDeserializer__read_uint(&deserializer, '\n') != (i64)src->mode){
return SyntaxError("precompiled mode mismatch");
}
int count = pk_TokenDeserializer__read_count(&deserializer);
c11_vector* precompiled_tokens = &src->_precompiled_tokens;
for(int i = 0; i < count; i++) {
c11_string item = pk_TokenDeserializer__read_string(&deserializer, '\n');
py_Str copied_item;
py_Str__ctor2(&copied_item, item.data, item.size);
c11_vector__push(py_Str, precompiled_tokens, copied_item);
}
count = pk_TokenDeserializer__read_count(&deserializer);
for(int i = 0; i < count; i++) {
Token t;
t.type = (TokenIndex)pk_TokenDeserializer__read_uint(&deserializer, ',');
if(is_raw_string_used(t.type)) {
i64 index = pk_TokenDeserializer__read_uint(&deserializer, ',');
py_Str* p = c11__at(py_Str, precompiled_tokens, index);
t.start = py_Str__data(p);
t.length = c11__getitem(py_Str, precompiled_tokens, index).size;
} else {
t.start = NULL;
t.length = 0;
}
if(pk_TokenDeserializer__match_char(&deserializer, ',')) {
t.line = nexts.back().line;
} else {
t.line = (int)pk_TokenDeserializer__read_uint(&deserializer, ',');
}
if(pk_TokenDeserializer__match_char(&deserializer, ',')) {
t.brackets_level = nexts.back().brackets_level;
} else {
t.brackets_level = (int)pk_TokenDeserializer__read_uint(&deserializer, ',');
}
char type = (*deserializer.curr++); // read_char
switch(type) {
case 'I':
t.value = pk_TokenDeserializer__read_uint(&deserializer, '\n');
break;
case 'F':
t.value = pk_TokenDeserializer__read_float(&deserializer, '\n');
break;
case 'S': {
py_Str res = pk_TokenDeserializer__read_string_from_hex(&deserializer, '\n');
t.value = Str(std::move(res));
} break;
default:
t.value = {};
break;
}
nexts.push_back(t);
}
return NULL;
}
Error* Lexer::precompile(Str* out) noexcept{
assert(!src->is_precompiled);
Error* err = run();
if(err) return err;
SStream ss;
ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
ss << (int)src->mode << '\n'; // L2: mode
c11_smallmap_s2n token_indices;
c11_smallmap_s2n__ctor(&token_indices);
for(auto token: nexts) {
if(is_raw_string_used(token.type)) {
c11_string token_sv = {token.start, token.length};
if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) {
c11_smallmap_s2n__set(&token_indices, token_sv, 0);
// assert no '\n' in token.sv()
for(char c: token.sv())
assert(c != '\n');
}
}
}
ss << "=" << (int)token_indices.count << '\n'; // L3: raw string count
uint16_t index = 0;
for(int i=0; i<token_indices.count; i++){
auto kv = c11__at(c11_smallmap_s2n_KV, &token_indices, i);
ss << kv->key << '\n'; // L4: raw strings
kv->value = index++;
}
ss << "=" << (int)nexts.size() << '\n'; // L5: token count
for(int i = 0; i < nexts.size(); i++) {
const Token& token = nexts[i];
ss << (int)token.type << ',';
if(is_raw_string_used(token.type)) {
uint16_t *p = c11_smallmap_s2n__try_get(&token_indices, {token.start, token.length});
assert(p != NULL);
ss << (int)*p << ',';
}
if(i > 0 && nexts[i - 1].line == token.line)
ss << ',';
else
ss << token.line << ',';
if(i > 0 && nexts[i - 1].brackets_level == token.brackets_level)
ss << ',';
else
ss << token.brackets_level << ',';
// visit token value
std::visit(
[&ss](auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr(std::is_same_v<T, i64>) {
ss << 'I' << arg;
} else if constexpr(std::is_same_v<T, f64>) {
ss << 'F' << arg;
} else if constexpr(std::is_same_v<T, Str>) {
ss << 'S';
for(char c: arg)
ss.write_hex((unsigned char)c);
}
ss << '\n';
},
token.value);
}
*out = ss.str();
c11_smallmap_s2n__dtor(&token_indices);
return NULL;
}
IntParsingResult parse_uint(std::string_view text, i64* out, int base) noexcept{
*out = 0;
if(base == -1) {
if(text.substr(0, 2) == "0b")
base = 2;
else if(text.substr(0, 2) == "0o")
base = 8;
else if(text.substr(0, 2) == "0x")
base = 16;
else
base = 10;
}
if(base == 10) {
// 10-base 12334
if(text.length() == 0) return IntParsingResult::Failure;
for(char c: text) {
if(c >= '0' && c <= '9') {
*out = (*out * 10) + (c - '0');
} else {
return IntParsingResult::Failure;
}
}
const std::string_view INT64_MAX_S = "9223372036854775807";
if(text.length() > INT64_MAX_S.length()) return IntParsingResult::Overflow;
return IntParsingResult::Success;
} else if(base == 2) {
// 2-base 0b101010
if(text.substr(0, 2) == "0b") text.remove_prefix(2);
if(text.length() == 0) return IntParsingResult::Failure;
for(char c: text) {
if(c == '0' || c == '1') {
*out = (*out << 1) | (c - '0');
} else {
return IntParsingResult::Failure;
}
}
const std::string_view INT64_MAX_S = "111111111111111111111111111111111111111111111111111111111111111";
if(text.length() > INT64_MAX_S.length()) return IntParsingResult::Overflow;
return IntParsingResult::Success;
} else if(base == 8) {
// 8-base 0o123
if(text.substr(0, 2) == "0o") text.remove_prefix(2);
if(text.length() == 0) return IntParsingResult::Failure;
for(char c: text) {
if(c >= '0' && c <= '7') {
*out = (*out << 3) | (c - '0');
} else {
return IntParsingResult::Failure;
}
}
const std::string_view INT64_MAX_S = "777777777777777777777";
if(text.length() > INT64_MAX_S.length()) return IntParsingResult::Overflow;
return IntParsingResult::Success;
} else if(base == 16) {
// 16-base 0x123
if(text.substr(0, 2) == "0x") text.remove_prefix(2);
if(text.length() == 0) return IntParsingResult::Failure;
for(char c: text) {
if(c >= '0' && c <= '9') {
*out = (*out << 4) | (c - '0');
} else if(c >= 'a' && c <= 'f') {
*out = (*out << 4) | (c - 'a' + 10);
} else if(c >= 'A' && c <= 'F') {
*out = (*out << 4) | (c - 'A' + 10);
} else {
return IntParsingResult::Failure;
}
}
const std::string_view INT64_MAX_S = "7fffffffffffffff";
if(text.length() > INT64_MAX_S.length()) return IntParsingResult::Overflow;
return IntParsingResult::Success;
}
return IntParsingResult::Failure;
}
} // namespace pkpy

View File

@ -1,187 +0,0 @@
#include "pocketpy/compiler/lexer.h"
#include "pocketpy/objects/sourcedata.h"
typedef struct pk_Lexer{
pk_SourceData_ src;
const char* token_start;
const char* curr_char;
int current_line;
int brackets_level;
c11_vector/*T=Token*/ nexts;
c11_vector/*T=int*/ indents;
} pk_Lexer;
const static TokenValue EmptyTokenValue;
void pk_Lexer__ctor(pk_Lexer* self, pk_SourceData_ src){
PK_INCREF(src);
self->src = src;
self->curr_char = self->token_start = py_Str__data(&src->source);
self->current_line = 1;
self->brackets_level = 0;
c11_vector__ctor(&self->nexts, sizeof(Token));
c11_vector__ctor(&self->indents, sizeof(int));
}
void pk_Lexer__dtor(pk_Lexer* self){
PK_DECREF(self->src);
c11_vector__dtor(&self->nexts);
c11_vector__dtor(&self->indents);
}
void* pk_Lexer__run(pk_SourceData_ src, void** out_tokens){
pk_Lexer lexer;
pk_Lexer__ctor(&lexer, src);
if(src->is_precompiled) {
pk_Lexer__dtor(&lexer);
return from_precompiled();
}
// push initial tokens
Token sof = {TK_SOF, lexer.token_start, 0, lexer.current_line, lexer.brackets_level, EmptyTokenValue};
c11_vector__push(Token, &lexer.nexts, sof);
c11_vector__push(int, &lexer.indents, 0);
bool eof = false;
while(!eof) {
void* err = lex_one_token(&eof);
if(err){
pk_Lexer__dtor(&lexer);
return err;
}
}
pk_Lexer__dtor(&lexer);
return NULL;
}
char eatchar(pk_Lexer* self){
char c = *self->curr_char;
assert(c != '\n'); // eatchar() cannot consume a newline
self->curr_char++;
return c;
}
char eatchar_include_newline(pk_Lexer* self){
char c = *self->curr_char;
self->curr_char++;
if(c == '\n') {
self->current_line++;
c11_vector__push(const char*, &self->src->line_starts, self->curr_char);
}
return c;
}
int eat_spaces(pk_Lexer* self){
int count = 0;
while(true) {
switch(*self->curr_char) {
case ' ': count += 1; break;
case '\t': count += 4; break;
default: return count;
}
eatchar(self);
}
}
bool matchchar(pk_Lexer* self, char c){
if(*self->curr_char != c) return false;
eatchar_include_newline(self);
return true;
}
bool match_n_chars(pk_Lexer* self, int n, char c0){
const char* c = self->curr_char;
for(int i = 0; i < n; i++) {
if(*c == '\0') return false;
if(*c != c0) return false;
c++;
}
for(int i = 0; i < n; i++)
eatchar_include_newline(self);
return true;
}
bool match_string(pk_Lexer* self, const char* s){
int s_len = strlen(s);
if(strncmp(self->curr_char, s, s_len) == 0){
for(int i = 0; i < s_len; i++)
eatchar_include_newline(self);
}
return ok;
}
void skip_line_comment(pk_Lexer* self){
while(*self->curr_char) {
if(*self->curr_char == '\n') return;
eatchar(self);
}
}
void add_token(pk_Lexer* self, TokenIndex type, TokenValue value){
switch(type) {
case TK_LBRACE:
case TK_LBRACKET:
case TK_LPAREN: self->brackets_level++; break;
case TK_RPAREN:
case TK_RBRACKET:
case TK_RBRACE: self->brackets_level--; break;
default: break;
}
Token token = {type,
self->token_start,
(int)(self->curr_char - self->token_start),
self->current_line - ((type == TK_EOL) ? 1 : 0),
self->brackets_level,
value};
// handle "not in", "is not", "yield from"
if(self->nexts.count > 0) {
Token* back = &c11_vector__back(Token, &self->nexts);
if(back->type == TK_NOT_KW && type == TK_IN) {
back->type = TK_NOT_IN;
return;
}
if(back->type == TK_IS && type == TK_NOT_KW) {
back->type = TK_IS_NOT;
return;
}
if(back->type == TK_YIELD && type == TK_FROM) {
back->type = TK_YIELD_FROM;
return;
}
c11_vector__push(Token, &self->nexts, token);
}
}
void add_token_2(pk_Lexer* self, char c, TokenIndex one, TokenIndex two){
if(matchchar(self, c))
add_token(self, two, EmptyTokenValue);
else
add_token(self, one, EmptyTokenValue);
}
bool eat_indentation(pk_Lexer* self){
if(self->brackets_level > 0) return true;
int spaces = eat_spaces(self);
if(*self->curr_char == '#') skip_line_comment();
if(*self->curr_char == '\0' || *self->curr_char == '\n'){
return true;
}
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
int indents_back = c11_vector__back(int, &self->indents);
if(spaces > indents_back) {
c11_vector__push(int, &self->indents, spaces);
Token t = {TK_INDENT, self->token_start, 0, self->current_line, self->brackets_level, EmptyTokenValue};
c11_vector__push(Token, &self->nexts, t);
} else if(spaces < indents_back) {
do {
c11_vector__pop(int, &self->indents);
Token t = {TK_DEDENT, self->token_start, 0, self->current_line, self->brackets_level, EmptyTokenValue};
c11_vector__push(Token, &self->nexts, t);
indents_back = c11_vector__back(int, &self->indents);
} while(spaces < indents_back);
if(spaces != indents_back) { return false; }
}
return true;
}

View File

@ -17,7 +17,7 @@ FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name){
FuncDecl* self = malloc(sizeof(FuncDecl));
self->rc.count = 1;
self->rc.dtor = (void (*)(void*))FuncDecl__dtor;
self->code = CodeObject__new(src, name);
CodeObject__ctor(&self->code, src, name);
c11_vector__ctor(&self->args, sizeof(int));
c11_vector__ctor(&self->kwargs, sizeof(FuncDeclKwArg));
@ -34,7 +34,7 @@ FuncDecl_ FuncDecl__rcnew(pk_SourceData_ src, c11_string name){
}
void FuncDecl__dtor(FuncDecl* self){
CodeObject__delete(self->code);
CodeObject__dtor(&self->code);
c11_vector__dtor(&self->args);
c11_vector__dtor(&self->kwargs);
c11_smallmap_n2i__dtor(&self->kw_to_index);
@ -46,8 +46,7 @@ void FuncDecl__add_kwarg(FuncDecl* self, int index, uint16_t key, const PyVar* v
c11_vector__push(FuncDeclKwArg, &self->kwargs, item);
}
CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name){
CodeObject* self = malloc(sizeof(CodeObject));
void CodeObject__ctor(CodeObject* self, pk_SourceData_ src, c11_string name){
self->src = src; PK_INCREF(src);
py_Str__ctor2(&self->name, name.data, name.size);
@ -69,10 +68,9 @@ CodeObject* CodeObject__new(pk_SourceData_ src, c11_string name){
CodeBlock root_block = {CodeBlockType_NO_BLOCK, -1, 0, -1, -1};
c11_vector__push(CodeBlock, &self->blocks, root_block);
return self;
}
void CodeObject__delete(CodeObject* self){
void CodeObject__dtor(CodeObject* self){
PK_DECREF(self->src);
py_Str__dtor(&self->name);
@ -92,6 +90,4 @@ void CodeObject__delete(CodeObject* self){
PK_DECREF(decl);
}
c11_vector__dtor(&self->func_decls);
free(self);
}

View File

@ -1,17 +0,0 @@
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/vm.h"
#include <assert.h>
#include <stdlib.h>

View File

@ -1,15 +1,18 @@
#include "pocketpy/objects/sourcedata.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/vm.h"
#include "pocketpy/compiler/compiler.h"
pk_VM* pk_current_vm;
static pk_VM pk_default_vm;
void py_initialize() {
Pools_initialize();
pk_MemoryPools__initialize();
pk_StrName__initialize();
pk_Compiler__initialize();
pk_current_vm = &pk_default_vm;
pk_VM__ctor(&pk_default_vm);
}
@ -17,14 +20,20 @@ void py_initialize() {
void py_finalize() {
pk_VM__dtor(&pk_default_vm);
pk_current_vm = NULL;
pk_Compiler__finalize();
pk_StrName__finalize();
Pools_finalize();
pk_MemoryPools__finalize();
}
int py_exec(const char* source) {
CodeObject* co = NULL;
pk_SourceData_ src = pk_SourceData__rcnew(source, "main.py", EXEC_MODE, false);
CodeObject co;
Error* err = pk_compile(src, &co);
PK_DECREF(src);
if(err) abort();
pk_VM* vm = pk_current_vm;
Frame* frame = Frame__new(co, &vm->main, NULL, vm->stack.sp, vm->stack.sp, co);
Frame* frame = Frame__new(&co, &vm->main, NULL, vm->stack.sp, vm->stack.sp, &co);
pk_VM__push_frame(vm, frame);
pk_FrameResult res = pk_VM__run_top_frame(vm);
if(res == RES_ERROR) return vm->last_error->type;

View File

@ -1,116 +0,0 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#if __has_include("pocketpy_c.h")
#include "pocketpy_c.h"
#else
// for amalgamated build
#include "pocketpy.h"
#endif
#ifdef _WIN32
#include <windows.h>
std::string pkpy_platform_getline(bool* eof) {
HANDLE hStdin = GetStdHandle(STD_INPUT_HANDLE);
std::wstringstream wss;
WCHAR buf;
DWORD read;
while(ReadConsoleW(hStdin, &buf, 1, &read, NULL) && buf != L'\n') {
if(eof && buf == L'\x1A') *eof = true; // Ctrl+Z
wss << buf;
}
std::wstring wideInput = wss.str();
int length = WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), NULL, 0, NULL, NULL);
std::string output;
output.resize(length);
WideCharToMultiByte(CP_UTF8, 0, wideInput.c_str(), (int)wideInput.length(), &output[0], length, NULL, NULL);
if(!output.empty() && output.back() == '\r') output.pop_back();
return output;
}
#else
std::string pkpy_platform_getline(bool* eof) {
std::string output;
if(!std::getline(std::cin, output)) {
if(eof) *eof = true;
}
return output;
}
#endif
using namespace pkpy;
static int f_input(pkpy_vm* vm) {
if(!pkpy_is_none(vm, -1)) {
pkpy_CString prompt;
bool ok = pkpy_to_string(vm, -1, &prompt);
if(!ok) return 0;
std::cout << prompt << std::flush;
}
bool eof;
std::string output = pkpy_platform_getline(&eof);
pkpy_push_string(vm, pkpy_string(output.c_str()));
return 1;
}
int main(int argc, char** argv) {
#if _WIN32
SetConsoleCP(CP_UTF8);
SetConsoleOutputCP(CP_UTF8);
#endif
pkpy_vm* vm = pkpy_new_vm(true);
pkpy_push_function(vm, "input(prompt=None) -> str", f_input);
pkpy_py_import(vm, "builtins");
pkpy_setattr(vm, pkpy_name("input"));
if(argc == 1) {
void* repl = pkpy_new_repl(vm);
bool need_more_lines = false;
while(true) {
std::cout << (need_more_lines ? "... " : ">>> ");
bool eof = false;
std::string line = pkpy_platform_getline(&eof);
if(eof) break;
need_more_lines = pkpy_repl_input(repl, line.c_str());
}
pkpy_delete_vm(vm);
return 0;
}
if(argc == 2) {
std::string argv_1 = argv[1];
if(argv_1 == "-h" || argv_1 == "--help") goto __HELP;
std::filesystem::path filepath(argv[1]);
filepath = std::filesystem::absolute(filepath);
if(!std::filesystem::exists(filepath)) {
std::cerr << "File not found: " << argv_1 << std::endl;
return 2;
}
std::ifstream file(filepath);
if(!file.is_open()) {
std::cerr << "Failed to open file: " << argv_1 << std::endl;
return 3;
}
std::string src((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
file.close();
pkpy_set_main_argv(vm, argc, argv);
bool ok = pkpy_exec_2(vm, src.c_str(), filepath.filename().string().c_str(), 0, NULL);
if(!ok) pkpy_clear_error(vm, NULL);
pkpy_delete_vm(vm);
return ok ? 0 : 1;
}
__HELP:
std::cout << "Usage: pocketpy [filename]" << std::endl;
return 0;
}