From 8e106f5227aab4cd1019feb8e18c20ce350a5a89 Mon Sep 17 00:00:00 2001 From: szdytom Date: Tue, 11 Jun 2024 14:58:00 +0800 Subject: [PATCH] Make SourceData c11 --- docs/cheatsheet.md | 2 +- docs/features/precompile.md | 6 +- docs/quick-start/exec.md | 16 ++--- include/pocketpy/common/config.h | 1 + include/pocketpy/common/rcptr.h | 24 +++++++ include/pocketpy/common/sstream.h | 27 +++++++ include/pocketpy/common/str.h | 1 + include/pocketpy/common/vector.h | 8 +++ include/pocketpy/compiler/compiler.hpp | 4 +- include/pocketpy/compiler/lexer.hpp | 4 +- include/pocketpy/interpreter/vm.hpp | 6 +- include/pocketpy/objects/codeobject.hpp | 4 +- include/pocketpy/objects/error.hpp | 8 +-- include/pocketpy/objects/sourcedata.h | 35 +++++++++ include/pocketpy/objects/sourcedata.hpp | 80 +++++++++++++++++---- src/common/rcptr.c | 45 ++++++++++++ src/common/sourcedata.c | 94 +++++++++++++++++++++++++ src/common/sstream.c | 49 +++++++++++++ src/common/str.c | 34 +++++++-- src/compiler/compiler.cpp | 16 ++--- src/compiler/lexer.cpp | 24 +++---- src/interpreter/ceval.cpp | 2 +- src/interpreter/vm.cpp | 18 ++--- src/modules/modules.cpp | 6 +- src/objects/codeobject.cpp | 2 +- src/objects/sourcedata.cpp | 68 ------------------ src/pocketpy.cpp | 16 ++--- src/pocketpy_c.cpp | 6 +- src/tools/repl.cpp | 4 +- 29 files changed, 449 insertions(+), 161 deletions(-) create mode 100644 include/pocketpy/common/rcptr.h create mode 100644 include/pocketpy/common/sstream.h create mode 100644 include/pocketpy/objects/sourcedata.h create mode 100644 src/common/rcptr.c create mode 100644 src/common/sourcedata.c create mode 100644 src/common/sstream.c delete mode 100644 src/objects/sourcedata.cpp diff --git a/docs/cheatsheet.md b/docs/cheatsheet.md index d9896949..5a141b23 100644 --- a/docs/cheatsheet.md +++ b/docs/cheatsheet.md @@ -41,7 +41,7 @@ std::cout << py_cast(vm, obj); // 123 Compile a source string into a code object ```cpp -CodeObject_ co = vm->compile("print('Hello!')", "main.py", EXEC_MODE); +CodeObject_ co = vm->compile("print('Hello!')", "main.py", PK_EXEC_MODE); ``` Execute a compiled code object diff --git a/docs/features/precompile.md b/docs/features/precompile.md index 93c56ecc..4e69a48c 100644 --- a/docs/features/precompile.md +++ b/docs/features/precompile.md @@ -11,7 +11,7 @@ You can use `vm->compile` to compile your source code into a `CodeObject_` objec This object can be executed later by `vm->_exec`. ```cpp -CodeObject_ code = vm->compile("print('Hello, world!')", "", EXEC_MODE); +CodeObject_ code = vm->compile("print('Hello, world!')", "", PK_EXEC_MODE); vm->_exec(code); // Hello, world! ``` @@ -27,9 +27,9 @@ It does some basic preprocessing and outputs the result as a human-readable stri ```cpp // precompile the source code into a string -Str source = vm->precompile("print('Hello, world!')", "", EXEC_MODE); +Str source = vm->precompile("print('Hello, world!')", "", PK_EXEC_MODE); -CodeObject code = vm->compile(source, "", EXEC_MODE); +CodeObject code = vm->compile(source, "", PK_EXEC_MODE); vm->_exec(code); // Hello, world! ``` diff --git a/docs/quick-start/exec.md b/docs/quick-start/exec.md index cbc88b20..2ded9192 100644 --- a/docs/quick-start/exec.md +++ b/docs/quick-start/exec.md @@ -8,7 +8,7 @@ order: 93 Once you have a `VM` instance, you can execute python code by calling `exec` method. -#### `PyVar exec(Str source, Str filename, CompileMode mode, PyVar _module=nullptr)` +#### `PyVar exec(Str source, Str filename, pkpy_CompileMode mode, PyVar _module=nullptr)` + `source`, the python source code to be executed + `filename`, the filename of the source code. This is used for error reporting @@ -26,11 +26,11 @@ There are also overloaded versions of `exec` and `eval`, which is useful for sim ### Compile mode The `mode` parameter controls how the source code is compiled. There are 5 possible values: -+ `EXEC_MODE`, this is the default mode. Just do normal execution. -+ `EVAL_MODE`, this mode is used for evaluating a single expression. The `source` should be a single expression. It cannot contain any statements. -+ `REPL_MODE`, this mode is used for REPL. It is similar to `EXEC_MODE`, but generates `PRINT_EXPR` opcode when necessary. -+ `CELL_MODE`, this mode is designed for Jupyter like execution. It is similar to `EXEC_MODE`, but generates `PRINT_EXPR` opcode when necessary. -+ `JSON_MODE`, this mode is used for JSON parsing. It is similar to `EVAL_MODE`, but uses a lexing rule designed for JSON. ++ `PK_EXEC_MODE`, this is the default mode. Just do normal execution. ++ `PK_EVAL_MODE`, this mode is used for evaluating a single expression. The `source` should be a single expression. It cannot contain any statements. ++ `PK_REPL_MODE`, this mode is used for REPL. It is similar to `PK_EXEC_MODE`, but generates `PRINT_EXPR` opcode when necessary. ++ `PK_CELL_MODE`, this mode is designed for Jupyter like execution. It is similar to `PK_EXEC_MODE`, but generates `PRINT_EXPR` opcode when necessary. ++ `PK_JSON_MODE`, this mode is used for JSON parsing. It is similar to `PK_EVAL_MODE`, but uses a lexing rule designed for JSON. ### Fine-grained execution @@ -38,7 +38,7 @@ The `mode` parameter controls how the source code is compiled. There are 5 possi In some cases, you may want to execute python code in a more fine-grained way. These two methods are provided for this purpose: -+ `CodeObject_ compile(Str source, Str filename, CompileMode mode, bool unknown_global_scope)` ++ `CodeObject_ compile(Str source, Str filename, pkpy_CompileMode mode, bool unknown_global_scope)` + `PyVar _exec(CodeObject_ co, PyVar _module)` 1. `compile` compiles the source code into a `CodeObject_` instance. Leave `unknown_global_scope` to `false` if you don't know what it means. @@ -50,7 +50,7 @@ These two methods are provided for this purpose: ```cpp try{ - CodeObject_ code = vm->compile("a[0]", "main.py", EXEC_MODE, false); + CodeObject_ code = vm->compile("a[0]", "main.py", PK_EXEC_MODE, false); vm->_exec(code, vm->_main); }catch(TopLevelException e){ // use e.summary() to get a summary of the exception diff --git a/include/pocketpy/common/config.h b/include/pocketpy/common/config.h index 547c4a50..242c5cd1 100644 --- a/include/pocketpy/common/config.h +++ b/include/pocketpy/common/config.h @@ -38,6 +38,7 @@ #define PK_DEBUG_NO_AUTO_GC 0 #define PK_DEBUG_GC_STATS 0 #define PK_DEBUG_COMPILER 0 +#define PK_DEBUG_DATASTRUCTURE 0 #ifndef PK_DEBUG_PRECOMPILED_EXEC #define PK_DEBUG_PRECOMPILED_EXEC 0 diff --git a/include/pocketpy/common/rcptr.h b/include/pocketpy/common/rcptr.h new file mode 100644 index 00000000..17a7245b --- /dev/null +++ b/include/pocketpy/common/rcptr.h @@ -0,0 +1,24 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pocketpy/common/config.h" + +typedef struct { +#if PK_DEBUG_DATASTRUCTURE + unsigned int magic; +#endif + unsigned int ref_c; + void (*dtor)(void *self); +} pkpy_Rcptr_header; + +void pkpy_Rcptr__ctor(void *self); +void pkpy_Rcptr__ctor_withd(void *self, void (*dtor)(void *)); +void pkpy_Rcptr__ref(void *self); +void pkpy_Rcptr__unref(void *self); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h new file mode 100644 index 00000000..28e9ce67 --- /dev/null +++ b/include/pocketpy/common/sstream.h @@ -0,0 +1,27 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pocketpy/common/vector.h" +#include "pocketpy/common/str.h" +#include + +typedef struct pkpy_SStream { + c11_vector data; +} pkpy_SStream; + +void pkpy_SStream__ctor(pkpy_SStream* self); +void pkpy_SStream__dtor(pkpy_SStream* self); +void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str); +void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n); +void pkpy_SStream__append_Str(pkpy_SStream* self, pkpy_Str* str); +void pkpy_SStream__append_char(pkpy_SStream* self, char c); +void pkpy_SStream__append_int(pkpy_SStream* self, int i); +void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i); +pkpy_Str pkpy_SStream__to_Str(pkpy_SStream* self); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/common/str.h b/include/pocketpy/common/str.h index 7fb2ca6b..f6842c91 100644 --- a/include/pocketpy/common/str.h +++ b/include/pocketpy/common/str.h @@ -34,6 +34,7 @@ inline int pkpy_Str__size(const pkpy_Str* self){ int pkpy_utils__u8_header(unsigned char c, bool suppress); void pkpy_Str__ctor(pkpy_Str* self, const char* data); void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size); +void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size); void pkpy_Str__dtor(pkpy_Str* self); pkpy_Str pkpy_Str__copy(const pkpy_Str* self); pkpy_Str pkpy_Str__concat(const pkpy_Str* self, const pkpy_Str* other); diff --git a/include/pocketpy/common/vector.h b/include/pocketpy/common/vector.h index 0c5d0bb7..2a356357 100644 --- a/include/pocketpy/common/vector.h +++ b/include/pocketpy/common/vector.h @@ -28,6 +28,14 @@ c11_vector c11_vector__copy(const c11_vector* self); void* c11_vector__at(c11_vector* self, int index); void c11_vector__reserve(c11_vector* self, int capacity); +inline int c11_vector__size(c11_vector* self) { + return self->count; +} + +inline void* c11_vector__data(c11_vector* self) { + return self->data; +} + #define c11__getitem(T, self, index) ((T*)(self)->data)[index] #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value; diff --git a/include/pocketpy/compiler/compiler.hpp b/include/pocketpy/compiler/compiler.hpp index 704523d2..b01a7649 100644 --- a/include/pocketpy/compiler/compiler.hpp +++ b/include/pocketpy/compiler/compiler.hpp @@ -51,7 +51,7 @@ struct Compiler { } CodeEmitContext* ctx() noexcept{ return &contexts.back(); } - CompileMode mode() const noexcept{ return lexer.src->mode; } + pkpy_CompileMode mode() const noexcept{ return lexer.src->mode; } NameScope name_scope() const noexcept; CodeObject_ push_global_context() noexcept; @@ -133,7 +133,7 @@ struct Compiler { } public: - Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope = false) noexcept; + Compiler(VM* vm, std::string_view source, const Str& filename, pkpy_CompileMode mode, bool unknown_global_scope = false) noexcept; [[nodiscard]] Error* compile(CodeObject_* out) noexcept; ~Compiler(); }; diff --git a/include/pocketpy/compiler/lexer.hpp b/include/pocketpy/compiler/lexer.hpp index b8e9ba4a..6b168ca5 100644 --- a/include/pocketpy/compiler/lexer.hpp +++ b/include/pocketpy/compiler/lexer.hpp @@ -96,7 +96,7 @@ enum class StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES }; struct Lexer { VM* vm; - std::shared_ptr src; + SourceData src; const char* token_start; const char* curr_char; int current_line = 1; @@ -131,7 +131,7 @@ struct Lexer { [[nodiscard]] Error* IndentationError(const char* msg) noexcept { return _error(true, "IndentationError", msg, NULL); } [[nodiscard]] Error* NeedMoreLines() noexcept { return _error(true, "NeedMoreLines", "", NULL, 0); } - Lexer(VM* vm, std::shared_ptr src) noexcept; + Lexer(VM* vm, SourceData src) noexcept; [[nodiscard]] Error* run() noexcept; [[nodiscard]] Error* from_precompiled() noexcept; diff --git a/include/pocketpy/interpreter/vm.hpp b/include/pocketpy/interpreter/vm.hpp index 1c2ec125..7b35661e 100644 --- a/include/pocketpy/interpreter/vm.hpp +++ b/include/pocketpy/interpreter/vm.hpp @@ -281,9 +281,9 @@ public: #endif #if PK_REGION("Source Execution Methods") - CodeObject_ compile(std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope=false); - Str precompile(std::string_view source, const Str& filename, CompileMode mode); - PyVar exec(std::string_view source, Str filename, CompileMode mode, PyObject* _module=nullptr); + CodeObject_ compile(std::string_view source, const Str& filename, pkpy_CompileMode mode, bool unknown_global_scope=false); + Str precompile(std::string_view source, const Str& filename, pkpy_CompileMode mode); + PyVar exec(std::string_view source, Str filename, pkpy_CompileMode mode, PyObject* _module=nullptr); PyVar exec(std::string_view source); PyVar eval(std::string_view source); diff --git a/include/pocketpy/objects/codeobject.hpp b/include/pocketpy/objects/codeobject.hpp index baefd679..f8685092 100644 --- a/include/pocketpy/objects/codeobject.hpp +++ b/include/pocketpy/objects/codeobject.hpp @@ -79,7 +79,7 @@ struct CodeObject { int iblock; // block index }; - std::shared_ptr src; + SourceData src; Str name; vector codes; @@ -99,7 +99,7 @@ struct CodeObject { const CodeBlock& _get_block_codei(int codei) const { return blocks[lines[codei].iblock]; } - CodeObject(std::shared_ptr src, const Str& name); + CodeObject(SourceData src, const Str& name); void _gc_mark(VM*) const; }; diff --git a/include/pocketpy/objects/error.hpp b/include/pocketpy/objects/error.hpp index a125329d..099507bf 100644 --- a/include/pocketpy/objects/error.hpp +++ b/include/pocketpy/objects/error.hpp @@ -33,14 +33,14 @@ struct Exception { PyObject* _self; // weak reference struct Frame { - std::shared_ptr src; // weak ref + SourceData src; // weak ref int lineno; const char* cursor; std::string name; - Str snapshot() const { return src->snapshot(lineno, cursor, name); } + Str snapshot() const { return src.snapshot(lineno, cursor, name); } - Frame(std::shared_ptr src, int lineno, const char* cursor, std::string_view name) : + Frame(SourceData src, int lineno, const char* cursor, std::string_view name) : src(src), lineno(lineno), cursor(cursor), name(name) {} }; @@ -79,7 +79,7 @@ struct TopLevelException : std::exception { struct Error{ const char* type; - std::shared_ptr src; + SourceData src; int lineno; const char* cursor; char msg[100]; diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h new file mode 100644 index 00000000..82d34481 --- /dev/null +++ b/include/pocketpy/objects/sourcedata.h @@ -0,0 +1,35 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pocketpy/common/rcptr.h" +#include "pocketpy/common/str.h" +#include "pocketpy/common/vector.h" + +enum pkpy_CompileMode { PK_EXEC_MODE, PK_EVAL_MODE, PK_REPL_MODE, PK_JSON_MODE, PK_CELL_MODE }; + +struct pkpy_SourceData { + pkpy_Rcptr_header _rc; + + enum pkpy_CompileMode mode; + bool is_precompiled; + + pkpy_Str filename; + pkpy_Str source; + + c11_vector line_starts; // contains "const char *" + c11_vector _precompiled_tokens; // contains "pkpy_Str" +}; + +void pkpy_SourceData__ctor(struct pkpy_SourceData *self, const char *source, int source_size, const pkpy_Str *filename, enum pkpy_CompileMode mode); +void pkpy_SourceData__dtor(struct pkpy_SourceData* self); + +bool pkpy_SourceData__get_line(const struct pkpy_SourceData *self, int lineno, const char **st, const char **ed); +pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData *self, int lineno, const char *cursor, const char *name); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/objects/sourcedata.hpp b/include/pocketpy/objects/sourcedata.hpp index 07ff384e..96b716f9 100644 --- a/include/pocketpy/objects/sourcedata.hpp +++ b/include/pocketpy/objects/sourcedata.hpp @@ -2,28 +2,80 @@ #include "pocketpy/common/utils.h" #include "pocketpy/common/str.hpp" +#include "pocketpy/objects/sourcedata.h" namespace pkpy { -enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE }; - struct SourceData { - PK_ALWAYS_PASS_BY_POINTER(SourceData) + pkpy_SourceData *self; - Str filename; - CompileMode mode; + SourceData(): self(nullptr) { + } - Str source; - vector line_starts; + SourceData(std::string_view source, const Str& filename, pkpy_CompileMode mode) { + self = static_cast(std::malloc(sizeof(pkpy_SourceData))); + pkpy_SourceData__ctor(self, source.data(), source.size(), &filename, mode); + } - bool is_precompiled; - vector _precompiled_tokens; + SourceData(const SourceData& other) { + self = other.self; + pkpy_Rcptr__ref(self); + } - SourceData(std::string_view source, const Str& filename, CompileMode mode); - SourceData(const Str& filename, CompileMode mode); - pair _get_line(int lineno) const; - std::string_view get_line(int lineno) const; - Str snapshot(int lineno, const char* cursor, std::string_view name) const; + SourceData& operator=(const SourceData& other) { + if (this != &other) { + pkpy_Rcptr__unref(self); + self = other.self; + pkpy_Rcptr__ref(self); + } + return *this; + } + + SourceData(SourceData &&other) { + self = other.self; + other.self = nullptr; + } + + SourceData& operator=(SourceData &&other) { + if (this != &other) { + pkpy_Rcptr__unref(self); + self = other.self; + other.self = nullptr; + } + return *this; + } + + pkpy_SourceData* get() const { + return self; + } + + pkpy_SourceData* operator->() const { + return self; + } + + std::string_view get_line(int lineno) const { + const char *st, *ed; + if (pkpy_SourceData__get_line(self, lineno, &st, &ed)) { + return std::string_view(st, ed - st); + } + return ""; + } + + Str& filename() const { + return static_cast(self->filename); + } + + Str& source() const { + return static_cast(self->source); + } + + Str snapshot(int lineno, const char* cursor, std::string_view name) const { + return pkpy_SourceData__snapshot(self, lineno, cursor, name.data()); + } + + ~SourceData() { + pkpy_Rcptr__unref(self); + } }; } // namespace pkpy diff --git a/src/common/rcptr.c b/src/common/rcptr.c new file mode 100644 index 00000000..db230d75 --- /dev/null +++ b/src/common/rcptr.c @@ -0,0 +1,45 @@ +#include "pocketpy/common/rcptr.h" +#include +#include + +#define RCPTR_MAGIC 0x3c3d3e3f + +#if PK_DEBUG_DATASTRUCTURE +#define CHECK_MAGIC() assert(self->magic == RCPTR_MAGIC) +#else +#define CHECK_MAGIC() while(0) +#endif + +void pkpy_Rcptr__ctor(void *self) { + pkpy_Rcptr__ctor_withd(self, NULL); +} + +void pkpy_Rcptr__ctor_withd(void *self_, void (*dtor)(void *)) { + pkpy_Rcptr_header *self = self_; +#if PK_DEBUG_DATASTRUCTURE + self->magic = RCPTR_MAGIC; +#endif + self->ref_c = 1; + self->dtor = dtor; +} + +void pkpy_Rcptr__ref(void *self_) { + if (self_ == NULL) + return; + pkpy_Rcptr_header *self = self_; + CHECK_MAGIC(); + self->ref_c += 1; +} + +void pkpy_Rcptr__unref(void *self_) { + if (self_ == NULL) + return; + pkpy_Rcptr_header *self = self_; + CHECK_MAGIC(); + self->ref_c -= 1; + if (self->ref_c == 0) { + if (self->dtor) + self->dtor(self_); + free(self_); + } +} diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c new file mode 100644 index 00000000..c9ce0b5e --- /dev/null +++ b/src/common/sourcedata.c @@ -0,0 +1,94 @@ +#include "pocketpy/objects/sourcedata.h" +#include "pocketpy/common/sstream.h" +#include +#include +#include + +void pkpy_SourceData__ctor(struct pkpy_SourceData* self, + const char* source, + int source_size, + const pkpy_Str* filename, + enum pkpy_CompileMode mode) { + pkpy_Rcptr__ctor_withd(self, &pkpy_SourceData__dtor); + + self->filename = pkpy_Str__copy(filename); // OPTIMIZEME? + self->mode = mode; + + c11_vector__ctor(&self->line_starts, sizeof(const char*)); + c11_vector__ctor(&self->_precompiled_tokens, sizeof(pkpy_Str)); + + int index = (strncmp(source, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0; + int len = source_size - index; + for(int i = 0; i < source_size; ++i) + len -= (source[i] == '\r'); + + char *buf = malloc(len + 1), *p = buf; + buf[len] = '\0'; + for(; index < source_size; ++index) { + if(source[index] != '\r') *(p++) = source[index]; + } + pkpy_Str__take_buf(&self->source, buf, len); + + self->is_precompiled = (strncmp(pkpy_Str_data(&self->source), "pkpy:", 5) == 0); + c11_vector__push_back(const char*, &self->line_starts, pkpy_Str_data(&self->source)); +} + +void pkpy_SourceData__dtor(struct pkpy_SourceData* self) { + pkpy_Str__dtor(&self->filename); + pkpy_Str__dtor(&self->source); + c11_vector__dtor(&self->line_starts); + c11_vector__dtor(&self->_precompiled_tokens); +} + +bool pkpy_SourceData__get_line(const struct pkpy_SourceData* self, int lineno, const char** st, const char** ed) { + if(self->is_precompiled || lineno == -1) { return false; } + lineno -= 1; + if(lineno < 0) lineno = 0; + const char* _start = c11__getitem(const char*, &self->line_starts, lineno); + const char* i = _start; + // max 300 chars + while(*i != '\n' && *i != '\0' && i - _start < 300) + i++; + *st = _start; + *ed = i; +} + +pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData* self, int lineno, const char* cursor, const char* name) { + pkpy_SStream ss; + pkpy_SStream__ctor(&ss); + pkpy_SStream__append_cstr(&ss, " File \""); + pkpy_SStream__append_Str(&ss, &self->filename); + pkpy_SStream__append_cstr(&ss, "\", line "); + pkpy_SStream__append_int(&ss, lineno); + + if(name) { + pkpy_SStream__append_cstr(&ss, ", in "); + pkpy_SStream__append_Str(&ss, &name); + } + + if(!self->is_precompiled) { + pkpy_SStream__append_char(&ss, '\n'); + const char *st = NULL, *ed; + if(pkpy_SourceData__get_line(self, lineno, &st, &ed)) { + while(st < ed && isblank(st)) + ++st; + if(st < ed) { + pkpy_SStream__append_cstr(&ss, " "); + pkpy_SStream__append_cstrn(&ss, st, ed - st); + if(cursor && st <= cursor && cursor <= ed) { + pkpy_SStream__append_cstr(&ss, "\n "); + for(int i = 0; i < (cursor - st); ++i) + pkpy_SStream__append_char(&ss, ' '); + pkpy_SStream__append_cstr(&ss, "^"); + } + } else { + st = NULL; + } + } + + if(!st) { pkpy_SStream__append_cstr(&ss, " "); } + } + pkpy_Str res = pkpy_SStream__to_Str(&ss); + pkpy_SStream__dtor(&ss); + return res; +} diff --git a/src/common/sstream.c b/src/common/sstream.c new file mode 100644 index 00000000..7ebea145 --- /dev/null +++ b/src/common/sstream.c @@ -0,0 +1,49 @@ +#include "pocketpy/common/sstream.h" +#include +#include "sstream.h" + +void pkpy_SStream__ctor(pkpy_SStream* self) { + c11_vector__ctor(&self->data, sizeof(char)); +} + +void pkpy_SStream__dtor(pkpy_SStream* self) { + c11_vector__dtor(&self->data); +} + +void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str) { + for (size_t i = 0; str[i] != '\0'; i++) { + c11_vector__push_back(char, &self->data, str[i]); + } +} + +void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n) { + for (size_t i = 0; i < n; i++) { + c11_vector__push_back(char, &self->data, str[i]); + } +} + +void pkpy_SStream__append_Str(pkpy_SStream* self, pkpy_Str* str) { + pkpy_SStream__append_cstr(self, pkpy_Str__data(str)); +} + +void pkpy_SStream__append_char(pkpy_SStream* self, char c) { + c11_vector__push_back(char, &self->data, c); +} + +void pkpy_SStream__append_int(pkpy_SStream* self, int i) { + char str[11]; // 10 digits + null terminator + sprintf(str, "%d", i); + pkpy_SStream__append_cstr(self, str); +} + +void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i) { + char str[21]; // 20 digits + null terminator + sprintf(str, "%lld", i); + pkpy_SStream__append_cstr(self, str); +} + +pkpy_Str pkpy_SStream__to_Str(pkpy_SStream* self) { + pkpy_Str res; + pkpy_Str__ctor2(&res, c11_vector__data(&self->data), c11_vector__size(&self->data)); + return res; +} diff --git a/src/common/str.c b/src/common/str.c index bf20a4cd..50af2b3e 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -22,6 +22,32 @@ void pkpy_Str__ctor(pkpy_Str *self, const char *data){ pkpy_Str__ctor2(self, data, strlen(data)); } +static void pkpy_Str__check_ascii(pkpy_Str *self, char *p) { + for(int i = 0; i < self->size; i++){ + if(!isascii(p[i])){ + self->is_ascii = false; + break; + } + } +} + +void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size) { + self->size = size; + self->is_ascii = true; + self->is_sso = size < sizeof(self->_inlined); + char* p; + if(self->is_sso){ + p = self->_inlined; + memcpy(p, data, size); + p[size] = '\0'; + free(data); + }else{ + self->_ptr = data; + p = self->_ptr; + } + pkpy_Str__check_ascii(self, p); +} + void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ self->size = size; self->is_ascii = true; @@ -35,13 +61,7 @@ void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ } memcpy(p, data, size); p[size] = '\0'; - // check is_ascii - for(int i = 0; i < size; i++){ - if(!isascii(p[i])){ - self->is_ascii = false; - break; - } - } + pkpy_Str__check_ascii(self, p); } void pkpy_Str__dtor(pkpy_Str *self){ diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 1db9e898..8ad846b6 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -179,7 +179,7 @@ bool Compiler::match_newlines(bool* need_more_lines) noexcept{ consumed = true; } if(need_more_lines) { - *need_more_lines = (mode() == REPL_MODE && curr().type == TK("@eof")); + *need_more_lines = (mode() == PK_REPL_MODE && curr().type == TK("@eof")); } return consumed; } @@ -1040,7 +1040,7 @@ Error* Compiler::compile_stmt() noexcept{ /*************************************************/ case TK("=="): { consume(TK("@id")); - if(mode() != EXEC_MODE) return SyntaxError("'label' is only available in EXEC_MODE"); + if(mode() != PK_EXEC_MODE) return SyntaxError("'label' is only available in PK_EXEC_MODE"); if(!ctx()->add_label(prev().str())) { Str escaped(prev().str().escape()); return SyntaxError("label %s already exists", escaped.c_str()); @@ -1050,7 +1050,7 @@ Error* Compiler::compile_stmt() noexcept{ } break; case TK("->"): consume(TK("@id")); - if(mode() != EXEC_MODE) return SyntaxError("'goto' is only available in EXEC_MODE"); + if(mode() != PK_EXEC_MODE) return SyntaxError("'goto' is only available in PK_EXEC_MODE"); ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line); consume_end_stmt(); break; @@ -1081,7 +1081,7 @@ Error* Compiler::compile_stmt() noexcept{ } if(!is_typed_name) { ctx()->s_emit_top(); - if((mode() == CELL_MODE || mode() == REPL_MODE) && name_scope() == NAME_GLOBAL) { + if((mode() == PK_CELL_MODE || mode() == PK_REPL_MODE) && name_scope() == NAME_GLOBAL) { ctx()->emit_(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE); } else { ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE); @@ -1280,8 +1280,8 @@ Error* Compiler::read_literal(PyVar* out) noexcept{ } } -Compiler::Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope) noexcept: - lexer(vm, std::make_shared(source, filename, mode)){ +Compiler::Compiler(VM* vm, std::string_view source, const Str& filename, pkpy_CompileMode mode, bool unknown_global_scope) noexcept: + lexer(vm, {source, filename, mode}){ this->vm = vm; this->unknown_global_scope = unknown_global_scope; init_pratt_rules(); @@ -1306,7 +1306,7 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ advance(); // skip @sof, so prev() is always valid match_newlines(); // skip possible leading '\n' - if(mode() == EVAL_MODE) { + if(mode() == PK_EVAL_MODE) { check(EXPR_TUPLE()); ctx()->s_emit_top(); consume(TK("@eof")); @@ -1314,7 +1314,7 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ check(pop_context()); *out = code; return NULL; - } else if(mode() == JSON_MODE) { + } else if(mode() == PK_JSON_MODE) { check(EXPR()); Expr* e = ctx()->s_popx(); if(!e->is_json_object()) return SyntaxError("expect a JSON object, literal or array"); diff --git a/src/compiler/lexer.cpp b/src/compiler/lexer.cpp index 77130ad4..29a388f3 100644 --- a/src/compiler/lexer.cpp +++ b/src/compiler/lexer.cpp @@ -99,7 +99,7 @@ char Lexer::eatchar_include_newline() noexcept{ curr_char++; if(c == '\n') { current_line++; - src->line_starts.push_back(curr_char); + c11_vector__push_back(const char*, &src->line_starts, curr_char); } return c; } @@ -145,7 +145,7 @@ Error* Lexer::eat_name() noexcept{ if(length == 0) return SyntaxError("@id contains invalid char"); std::string_view name(token_start, length); - if(src->mode == JSON_MODE) { + if(src->mode == PK_JSON_MODE) { if(name == "true") { add_token(TK("True")); } else if(name == "false") { @@ -238,7 +238,7 @@ Error* Lexer::eat_string_until(char quote, bool raw, Str* out) noexcept{ break; } if(c == '\0') { - if(quote3 && src->mode == REPL_MODE) return NeedMoreLines(); + if(quote3 && src->mode == PK_REPL_MODE) return NeedMoreLines(); return SyntaxError("EOL while scanning string literal"); } if(c == '\n') { @@ -375,7 +375,7 @@ Error* Lexer::lex_one_token(bool* eof) noexcept{ // line continuation character char c = eatchar_include_newline(); if(c != '\n') { - if(src->mode == REPL_MODE && c == '\0') return NeedMoreLines(); + if(src->mode == PK_REPL_MODE && c == '\0') return NeedMoreLines(); return SyntaxError("expected newline after line continuation character"); } eat_spaces(); @@ -533,9 +533,9 @@ Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{ return err; } -Lexer::Lexer(VM* vm, std::shared_ptr src) noexcept : vm(vm), src(src){ - this->token_start = src->source.c_str(); - this->curr_char = src->source.c_str(); +Lexer::Lexer(VM* vm, SourceData src) noexcept : vm(vm), src(src){ + this->token_start = src.source().c_str(); + this->curr_char = src.source().c_str(); } Error* Lexer::run() noexcept{ @@ -557,7 +557,7 @@ Error* Lexer::run() noexcept{ } Error* Lexer::from_precompiled() noexcept{ - TokenDeserializer deserializer(src->source.c_str()); + TokenDeserializer deserializer(src.source().c_str()); deserializer.curr += 5; // skip "pkpy:" std::string_view version = deserializer.read_string('\n'); @@ -569,9 +569,9 @@ Error* Lexer::from_precompiled() noexcept{ } int count = deserializer.read_count(); - vector& precompiled_tokens = src->_precompiled_tokens; + auto precompiled_tokens = &src->_precompiled_tokens; for(int i = 0; i < count; i++) { - precompiled_tokens.push_back(deserializer.read_string('\n')); + c11_vector__push_back(Str, precompiled_tokens, Str(deserializer.read_string('\n'))); } count = deserializer.read_count(); @@ -580,8 +580,8 @@ Error* Lexer::from_precompiled() noexcept{ t.type = (unsigned char)deserializer.read_uint(','); if(is_raw_string_used(t.type)) { i64 index = deserializer.read_uint(','); - t.start = precompiled_tokens[index].c_str(); - t.length = precompiled_tokens[index].size; + t.start = c11__getitem(Str, precompiled_tokens, index).c_str(); + t.length = c11__getitem(Str, precompiled_tokens, index).size; } else { t.start = nullptr; t.length = 0; diff --git a/src/interpreter/ceval.cpp b/src/interpreter/ceval.cpp index 978d296e..3f0aa968 100644 --- a/src/interpreter/ceval.cpp +++ b/src/interpreter/ceval.cpp @@ -792,7 +792,7 @@ PyVar VM::__run_top_frame() { auto it = __cached_codes.try_get(string); CodeObject_ code; if(it == nullptr) { - code = vm->compile(string, "", EVAL_MODE, true); + code = vm->compile(string, "", PK_EVAL_MODE, true); __cached_codes.insert(string, code); } else { code = *it; diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index ce4d1114..3b19742d 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -173,7 +173,7 @@ bool VM::issubclass(Type cls, Type base) { return false; } -PyVar VM::exec(std::string_view source, Str filename, CompileMode mode, PyObject* _module) { +PyVar VM::exec(std::string_view source, Str filename, pkpy_CompileMode mode, PyObject* _module) { if(_module == nullptr) _module = _main; try { #if PK_DEBUG_PRECOMPILED_EXEC == 1 @@ -197,9 +197,9 @@ PyVar VM::exec(std::string_view source, Str filename, CompileMode mode, PyObject return nullptr; } -PyVar VM::exec(std::string_view source) { return exec(source, "main.py", EXEC_MODE); } +PyVar VM::exec(std::string_view source) { return exec(source, "main.py", PK_EXEC_MODE); } -PyVar VM::eval(std::string_view source) { return exec(source, "", EVAL_MODE); } +PyVar VM::eval(std::string_view source) { return exec(source, "", PK_EVAL_MODE); } PyObject* VM::new_type_object(PyObject* mod, StrName name, Type base, bool subclass_enabled, PyTypeInfo::Vt vt) { PyObject* obj = heap._new(tp_type, Type(_all_types.size())); @@ -391,7 +391,7 @@ PyObject* VM::py_import(Str path, bool throw_err) { // _lazy_modules.erase(it); // no need to erase } auto _ = __import_context.scope(path, is_init); - CodeObject_ code = compile(source, filename, EXEC_MODE); + CodeObject_ code = compile(source, filename, PK_EXEC_MODE); Str name_cpnt = path_cpnts.back(); path_cpnts.pop_back(); @@ -606,12 +606,12 @@ PyVar VM::__py_exec_internal(const CodeObject_& code, PyVar globals, PyVar local } void VM::py_exec(std::string_view source, PyVar globals, PyVar locals) { - CodeObject_ code = vm->compile(source, "", EXEC_MODE, true); + CodeObject_ code = vm->compile(source, "", PK_EXEC_MODE, true); __py_exec_internal(code, globals, locals); } PyVar VM::py_eval(std::string_view source, PyVar globals, PyVar locals) { - CodeObject_ code = vm->compile(source, "", EVAL_MODE, true); + CodeObject_ code = vm->compile(source, "", PK_EVAL_MODE, true); return __py_exec_internal(code, globals, locals); } @@ -1358,7 +1358,7 @@ PyObject* VM::bind(PyObject* obj, const char* sig, const char* docstring, Native int length = snprintf(buffer, sizeof(buffer), "def %s : pass", sig); std::string_view source(buffer, length); // fn(a, b, *c, d=1) -> None - CodeObject_ co = compile(source, "", EXEC_MODE); + CodeObject_ co = compile(source, "", PK_EXEC_MODE); assert(co->func_decls.size() == 1); FuncDecl_ decl = co->func_decls[0]; @@ -1805,12 +1805,12 @@ void VM::__breakpoint() { std::string arg = line.substr(space + 1); if(arg.empty()) continue; // ignore empty command if(cmd == "p" || cmd == "print") { - CodeObject_ code = compile(arg, "", EVAL_MODE, true); + CodeObject_ code = compile(arg, "", PK_EVAL_MODE, true); PyVar retval = vm->_exec(code.get(), frame_0->_module, frame_0->_callable, frame_0->_locals); stdout_write(vm->py_repr(retval)); stdout_write("\n"); } else if(cmd == "!") { - CodeObject_ code = compile(arg, "", EXEC_MODE, true); + CodeObject_ code = compile(arg, "", PK_EXEC_MODE, true); vm->_exec(code.get(), frame_0->_module, frame_0->_callable, frame_0->_locals); } continue; diff --git a/src/modules/modules.cpp b/src/modules/modules.cpp index c3cdca41..59cc599b 100644 --- a/src/modules/modules.cpp +++ b/src/modules/modules.cpp @@ -107,7 +107,7 @@ void add_module_json(VM* vm) { } else { sv = CAST(Str&, args[0]).sv(); } - CodeObject_ code = vm->compile(sv, "", JSON_MODE); + CodeObject_ code = vm->compile(sv, "", PK_JSON_MODE); return vm->_exec(code, vm->callstack.top()._module); }); @@ -229,7 +229,7 @@ void add_module_dis(VM* vm) { PyVar obj = args[0]; if(is_type(obj, vm->tp_str)) { const Str& source = CAST(Str, obj); - code = vm->compile(source, "", EXEC_MODE); + code = vm->compile(source, "", PK_EXEC_MODE); } PyVar f = obj; if(is_type(f, vm->tp_bound_method)) f = CAST(BoundMethod, obj).func; @@ -246,7 +246,7 @@ void add_module_gc(VM* vm) { void add_module_enum(VM* vm) { PyObject* mod = vm->new_module("enum"); - CodeObject_ code = vm->compile(kPythonLibs__enum, "enum.py", EXEC_MODE); + CodeObject_ code = vm->compile(kPythonLibs__enum, "enum.py", PK_EXEC_MODE); vm->_exec(code, mod); PyVar Enum = mod->attr("Enum"); vm->_all_types[PK_OBJ_GET(Type, Enum)].on_end_subclass = [](VM* vm, PyTypeInfo* new_ti) { diff --git a/src/objects/codeobject.cpp b/src/objects/codeobject.cpp index 28087330..949810d6 100644 --- a/src/objects/codeobject.cpp +++ b/src/objects/codeobject.cpp @@ -2,7 +2,7 @@ namespace pkpy { -CodeObject::CodeObject(std::shared_ptr src, const Str& name) : +CodeObject::CodeObject(SourceData src, const Str& name) : src(src), name(name), nlocals(0), start_line(-1), end_line(-1) { blocks.push_back(CodeBlock(CodeBlockType::NO_BLOCK, -1, 0)); } diff --git a/src/objects/sourcedata.cpp b/src/objects/sourcedata.cpp deleted file mode 100644 index f6d96ee8..00000000 --- a/src/objects/sourcedata.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "pocketpy/objects/sourcedata.hpp" - -namespace pkpy { -SourceData::SourceData(std::string_view source, const Str& filename, CompileMode mode) : - filename(filename), mode(mode) { - int index = 0; - // Skip utf8 BOM if there is any. - if(strncmp(source.data(), "\xEF\xBB\xBF", 3) == 0) index += 3; - // Drop all '\r' - SStream ss(source.size() + 1); - while(index < source.size()) { - if(source[index] != '\r') ss << source[index]; - index++; - } - this->source = ss.str(); - if(this->source.size > 5 && this->source.sv().substr(0, 5) == "pkpy:") { - this->is_precompiled = true; - } else { - this->is_precompiled = false; - } - line_starts.push_back(this->source.c_str()); -} - -SourceData::SourceData(const Str& filename, CompileMode mode) : filename(filename), mode(mode) { - line_starts.push_back(this->source.c_str()); -} - -pair SourceData::_get_line(int lineno) const { - if(is_precompiled || lineno == -1) return {nullptr, nullptr}; - lineno -= 1; - if(lineno < 0) lineno = 0; - const char* _start = line_starts[lineno]; - const char* i = _start; - // max 300 chars - while(*i != '\n' && *i != '\0' && i - _start < 300) - i++; - return {_start, i}; -} - -std::string_view SourceData::get_line(int lineno) const { - auto [_0, _1] = _get_line(lineno); - if(_0 && _1) return std::string_view(_0, _1 - _0); - return ""; -} - -Str SourceData::snapshot(int lineno, const char* cursor, std::string_view name) const { - SStream ss; - ss << " " << "File \"" << filename << "\", line " << lineno; - if(!name.empty()) ss << ", in " << name; - if(!is_precompiled) { - ss << '\n'; - pair pair = _get_line(lineno); - Str line = ""; - int removed_spaces = 0; - if(pair.first && pair.second) { - line = Str(pair.first, pair.second - pair.first).lstrip(); - removed_spaces = pair.second - pair.first - line.length(); - if(line.empty()) line = ""; - } - ss << " " << line; - if(cursor && line != "" && cursor >= pair.first && cursor <= pair.second) { - auto column = cursor - pair.first - removed_spaces; - if(column >= 0) ss << "\n " << std::string(column, ' ') << "^"; - } - } - return ss.str(); -} -} // namespace pkpy diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 4b523275..a61e6388 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -226,11 +226,11 @@ void __init_builtins(VM* _vm) { const Str& filename = CAST(Str&, args[1]); const Str& mode = CAST(Str&, args[2]); if(mode == "exec") { - return VAR(vm->precompile(source, filename, EXEC_MODE)); + return VAR(vm->precompile(source, filename, PK_EXEC_MODE)); } else if(mode == "eval") { - return VAR(vm->precompile(source, filename, EVAL_MODE)); + return VAR(vm->precompile(source, filename, PK_EVAL_MODE)); } else if(mode == "single") { - return VAR(vm->precompile(source, filename, CELL_MODE)); + return VAR(vm->precompile(source, filename, PK_CELL_MODE)); } else { vm->ValueError("compile() mode must be 'exec', 'eval' or 'single'"); return vm->None; @@ -1672,12 +1672,12 @@ void VM::__post_init_builtin_types() { try { // initialize dummy func_decl for exec/eval - CodeObject_ dynamic_co = compile("def _(): pass", "", EXEC_MODE); + CodeObject_ dynamic_co = compile("def _(): pass", "", PK_EXEC_MODE); __dynamic_func_decl = dynamic_co->func_decls[0]; // initialize builtins - CodeObject_ code = compile(kPythonLibs_builtins, "", EXEC_MODE); + CodeObject_ code = compile(kPythonLibs_builtins, "", PK_EXEC_MODE); this->_exec(code, this->builtins); - code = compile(kPythonLibs__set, "", EXEC_MODE); + code = compile(kPythonLibs__set, "", PK_EXEC_MODE); this->_exec(code, this->builtins); } catch(TopLevelException e) { std::cerr << e.summary() << std::endl; @@ -1704,7 +1704,7 @@ void VM::__post_init_builtin_types() { #endif } -CodeObject_ VM::compile(std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope) { +CodeObject_ VM::compile(std::string_view source, const Str& filename, pkpy_CompileMode mode, bool unknown_global_scope) { Compiler compiler(this, source, filename, mode, unknown_global_scope); CodeObject_ code; Error* err = compiler.compile(&code); @@ -1726,7 +1726,7 @@ void VM::__compile_error(Error* err){ _error(__last_exception); } -Str VM::precompile(std::string_view source, const Str& filename, CompileMode mode) { +Str VM::precompile(std::string_view source, const Str& filename, pkpy_CompileMode mode) { Compiler compiler(this, source, filename, mode, false); Str out; Error* err = compiler.lexer.precompile(&out); diff --git a/src/pocketpy_c.cpp b/src/pocketpy_c.cpp index 60870d91..e8417db4 100644 --- a/src/pocketpy_c.cpp +++ b/src/pocketpy_c.cpp @@ -60,7 +60,7 @@ bool pkpy_exec(pkpy_vm* vm_handle, const char* source) { PK_ASSERT_NO_ERROR() PyVar res; PK_PROTECTED( - CodeObject_ code = vm->compile(source, "main.py", EXEC_MODE); + CodeObject_ code = vm->compile(source, "main.py", PK_EXEC_MODE); res = vm->_exec(code, vm->_main); ) return res != nullptr; @@ -76,7 +76,7 @@ bool pkpy_exec_2(pkpy_vm* vm_handle, const char* source, const char* filename, i }else{ mod = vm->_modules[module].get(); // may raise } - CodeObject_ code = vm->compile(source, filename, (CompileMode)mode); + CodeObject_ code = vm->compile(source, filename, (pkpy_CompileMode)mode); res = vm->_exec(code, mod); ) return res != nullptr; @@ -417,7 +417,7 @@ bool pkpy_eval(pkpy_vm* vm_handle, const char* source) { VM* vm = (VM*)vm_handle; PK_ASSERT_NO_ERROR() PK_PROTECTED( - CodeObject_ co = vm->compile(source, "", EVAL_MODE); + CodeObject_ co = vm->compile(source, "", PK_EVAL_MODE); PyVar ret = vm->_exec(co, vm->_main); vm->s_data.push(ret); ) diff --git a/src/tools/repl.cpp b/src/tools/repl.cpp index 9758653a..30714669 100644 --- a/src/tools/repl.cpp +++ b/src/tools/repl.cpp @@ -12,7 +12,7 @@ REPL::REPL(VM* vm) : vm(vm) { } bool REPL::input(std::string line) { - CompileMode mode = REPL_MODE; + pkpy_CompileMode mode = PK_REPL_MODE; if(need_more_lines) { buffer += line; buffer += '\n'; @@ -25,7 +25,7 @@ bool REPL::input(std::string line) { need_more_lines = 0; line = buffer; buffer.clear(); - mode = CELL_MODE; + mode = PK_CELL_MODE; } else { return true; }