From 031f189a4aba030e10e1c6f02a92e2eccaa6c2b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=B9=E8=80=8C=E9=9D=99?= Date: Tue, 11 Jun 2024 20:28:51 +0800 Subject: [PATCH] Make SourceData and SStream c11 (#266) * Make SourceData c11 * make it compile * add a marco * follow up rename of c11_vetor__push * remove isascii marco * fix empty function name handling * change enum back * Remove trival accessor * make pkpy_Str__take_buf hidden * make it compile * remove rcptr and use shared_ptr instead * change enum name in C++ code back * fix type problem * remove strdup polyfill * remove xmake * ... --- .gitignore | 5 +- include/pocketpy/common/sstream.h | 27 +++++++ include/pocketpy/compiler/compiler.hpp | 2 +- include/pocketpy/objects/sourcedata.h | 32 +++++++++ include/pocketpy/objects/sourcedata.hpp | 32 ++++----- src/common/sourcedata.c | 95 +++++++++++++++++++++++++ src/common/sstream.c | 48 +++++++++++++ src/common/str.c | 34 +++++++-- src/compiler/compiler.cpp | 6 +- src/compiler/lexer.cpp | 16 ++--- src/interpreter/profiler.cpp | 4 +- src/interpreter/vm.cpp | 2 +- src/objects/sourcedata.cpp | 68 ------------------ 13 files changed, 263 insertions(+), 108 deletions(-) create mode 100644 include/pocketpy/common/sstream.h create mode 100644 include/pocketpy/objects/sourcedata.h create mode 100644 src/common/sourcedata.c create mode 100644 src/common/sstream.c delete mode 100644 src/objects/sourcedata.cpp diff --git a/.gitignore b/.gitignore index e4db4f86..ce195c89 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,7 @@ pocketpy.dSYM libpocketpy.dylib.dSYM/ main.dSYM/ -docs/references.md \ No newline at end of file +docs/references.md + +.xmake +.vs diff --git a/include/pocketpy/common/sstream.h b/include/pocketpy/common/sstream.h new file mode 100644 index 00000000..d0b7cfc5 --- /dev/null +++ b/include/pocketpy/common/sstream.h @@ -0,0 +1,27 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pocketpy/common/vector.h" +#include "pocketpy/common/str.h" +#include + +typedef struct pkpy_SStream { + c11_vector data; +} pkpy_SStream; + +void pkpy_SStream__ctor(pkpy_SStream* self); +void pkpy_SStream__dtor(pkpy_SStream* self); +void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str); +void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n); +void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str); +void pkpy_SStream__append_char(pkpy_SStream* self, char c); +void pkpy_SStream__append_int(pkpy_SStream* self, int i); +void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i); +pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/compiler/compiler.hpp b/include/pocketpy/compiler/compiler.hpp index 704523d2..39132cc2 100644 --- a/include/pocketpy/compiler/compiler.hpp +++ b/include/pocketpy/compiler/compiler.hpp @@ -41,7 +41,7 @@ struct Compiler { #if PK_DEBUG_COMPILER if(__i>=0 && __ifilename.c_str(), + lexer.src.filename().c_str(), curr().line, TK_STR(curr().type), curr().str().escape().c_str() diff --git a/include/pocketpy/objects/sourcedata.h b/include/pocketpy/objects/sourcedata.h new file mode 100644 index 00000000..7f8d7cc0 --- /dev/null +++ b/include/pocketpy/objects/sourcedata.h @@ -0,0 +1,32 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pocketpy/common/str.h" +#include "pocketpy/common/vector.h" + +enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE }; + +struct pkpy_SourceData { + enum CompileMode mode; + bool is_precompiled; + + pkpy_Str filename; + pkpy_Str source; + + c11_vector line_starts; // contains "const char *" + c11_vector _precompiled_tokens; // contains "pkpy_Str" +}; + +void pkpy_SourceData__ctor(struct pkpy_SourceData *self, const char *source, int source_size, const pkpy_Str *filename, enum CompileMode mode); +void pkpy_SourceData__dtor(struct pkpy_SourceData* self); + +bool pkpy_SourceData__get_line(const struct pkpy_SourceData *self, int lineno, const char **st, const char **ed); +pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData *self, int lineno, const char *cursor, const char *name); + +#ifdef __cplusplus +} +#endif diff --git a/include/pocketpy/objects/sourcedata.hpp b/include/pocketpy/objects/sourcedata.hpp index 07ff384e..2295e294 100644 --- a/include/pocketpy/objects/sourcedata.hpp +++ b/include/pocketpy/objects/sourcedata.hpp @@ -2,28 +2,26 @@ #include "pocketpy/common/utils.h" #include "pocketpy/common/str.hpp" +#include "pocketpy/objects/sourcedata.h" namespace pkpy { -enum CompileMode { EXEC_MODE, EVAL_MODE, REPL_MODE, JSON_MODE, CELL_MODE }; +struct SourceData : public pkpy_SourceData { + SourceData(std::string_view source, const Str& filename, CompileMode mode) { + pkpy_SourceData__ctor(this, source.data(), source.size(), &filename, mode); + } -struct SourceData { - PK_ALWAYS_PASS_BY_POINTER(SourceData) + std::string_view get_line(int lineno) const { + const char *st, *ed; + if (pkpy_SourceData__get_line(this, lineno, &st, &ed)) { + return std::string_view(st, ed - st); + } + return ""; + } - Str filename; - CompileMode mode; - - Str source; - vector line_starts; - - bool is_precompiled; - vector _precompiled_tokens; - - SourceData(std::string_view source, const Str& filename, CompileMode mode); - SourceData(const Str& filename, CompileMode mode); - pair _get_line(int lineno) const; - std::string_view get_line(int lineno) const; - Str snapshot(int lineno, const char* cursor, std::string_view name) const; + Str snapshot(int lineno, const char* cursor, std::string_view name) const { + return pkpy_SourceData__snapshot(this, lineno, cursor, name.empty() ? nullptr : name.data()); + } }; } // namespace pkpy diff --git a/src/common/sourcedata.c b/src/common/sourcedata.c new file mode 100644 index 00000000..40929655 --- /dev/null +++ b/src/common/sourcedata.c @@ -0,0 +1,95 @@ +#include "pocketpy/objects/sourcedata.h" +#include "pocketpy/common/sstream.h" +#include +#include +#include + +void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size); + +void pkpy_SourceData__ctor(struct pkpy_SourceData* self, + const char* source, + int source_size, + const pkpy_Str* filename, + enum CompileMode mode) { + self->filename = pkpy_Str__copy(filename); // OPTIMIZEME? + self->mode = mode; + + c11_vector__ctor(&self->line_starts, sizeof(const char*)); + c11_vector__ctor(&self->_precompiled_tokens, sizeof(pkpy_Str)); + + int index = (strncmp(source, "\xEF\xBB\xBF", 3) == 0) ? 3 : 0; + int len = source_size - index; + for(int i = 0; i < source_size; ++i) + len -= (source[i] == '\r'); + + char *buf = malloc(len + 1), *p = buf; + buf[len] = '\0'; + for(; index < source_size; ++index) { + if(source[index] != '\r') *(p++) = source[index]; + } + pkpy_Str__take_buf(&self->source, buf, len); + + self->is_precompiled = (strncmp(pkpy_Str__data(&self->source), "pkpy:", 5) == 0); + c11_vector__push(const char*, &self->line_starts, pkpy_Str__data(&self->source)); +} + +void pkpy_SourceData__dtor(struct pkpy_SourceData* self) { + pkpy_Str__dtor(&self->filename); + pkpy_Str__dtor(&self->source); + c11_vector__dtor(&self->line_starts); + c11_vector__dtor(&self->_precompiled_tokens); +} + +bool pkpy_SourceData__get_line(const struct pkpy_SourceData* self, int lineno, const char** st, const char** ed) { + if(self->is_precompiled || lineno == -1) { return false; } + lineno -= 1; + if(lineno < 0) lineno = 0; + const char* _start = c11__getitem(const char*, &self->line_starts, lineno); + const char* i = _start; + // max 300 chars + while(*i != '\n' && *i != '\0' && i - _start < 300) + i++; + *st = _start; + *ed = i; + return true; +} + +pkpy_Str pkpy_SourceData__snapshot(const struct pkpy_SourceData* self, int lineno, const char* cursor, const char* name) { + pkpy_SStream ss; + pkpy_SStream__ctor(&ss); + pkpy_SStream__append_cstr(&ss, " File \""); + pkpy_SStream__append_Str(&ss, &self->filename); + pkpy_SStream__append_cstr(&ss, "\", line "); + pkpy_SStream__append_int(&ss, lineno); + + if(name) { + pkpy_SStream__append_cstr(&ss, ", in "); + pkpy_SStream__append_cstr(&ss, name); + } + + if(!self->is_precompiled) { + pkpy_SStream__append_char(&ss, '\n'); + const char *st = NULL, *ed; + if(pkpy_SourceData__get_line(self, lineno, &st, &ed)) { + while(st < ed && isblank(*st)) + ++st; + if(st < ed) { + pkpy_SStream__append_cstr(&ss, " "); + pkpy_SStream__append_cstrn(&ss, st, ed - st); + if(cursor && st <= cursor && cursor <= ed) { + pkpy_SStream__append_cstr(&ss, "\n "); + for(int i = 0; i < (cursor - st); ++i) + pkpy_SStream__append_char(&ss, ' '); + pkpy_SStream__append_cstr(&ss, "^"); + } + } else { + st = NULL; + } + } + + if(!st) { pkpy_SStream__append_cstr(&ss, " "); } + } + pkpy_Str res = pkpy_SStream__to_Str(&ss); + pkpy_SStream__dtor(&ss); + return res; +} diff --git a/src/common/sstream.c b/src/common/sstream.c new file mode 100644 index 00000000..08091c3d --- /dev/null +++ b/src/common/sstream.c @@ -0,0 +1,48 @@ +#include "pocketpy/common/sstream.h" +#include + +void pkpy_SStream__ctor(pkpy_SStream* self) { + c11_vector__ctor(&self->data, sizeof(char)); +} + +void pkpy_SStream__dtor(pkpy_SStream* self) { + c11_vector__dtor(&self->data); +} + +void pkpy_SStream__append_cstr(pkpy_SStream* self, const char* str) { + for (int i = 0; str[i] != '\0'; i++) { + c11_vector__push(char, &self->data, str[i]); + } +} + +void pkpy_SStream__append_cstrn(pkpy_SStream* self, const char* str, int n) { + for (int i = 0; i < n; i++) { + c11_vector__push(char, &self->data, str[i]); + } +} + +void pkpy_SStream__append_Str(pkpy_SStream* self, const pkpy_Str* str) { + pkpy_SStream__append_cstr(self, pkpy_Str__data(str)); +} + +void pkpy_SStream__append_char(pkpy_SStream* self, char c) { + c11_vector__push(char, &self->data, c); +} + +void pkpy_SStream__append_int(pkpy_SStream* self, int i) { + char str[12]; // sign + 10 digits + null terminator + sprintf(str, "%d", i); + pkpy_SStream__append_cstr(self, str); +} + +void pkpy_SStream__append_int64(pkpy_SStream* self, int64_t i) { + char str[23]; // sign + 21 digits + null terminator + sprintf(str, "%lld", i); + pkpy_SStream__append_cstr(self, str); +} + +pkpy_Str pkpy_SStream__to_Str(const pkpy_SStream* self) { + pkpy_Str res; + pkpy_Str__ctor2(&res, self->data.data, self->data.count); + return res; +} diff --git a/src/common/str.c b/src/common/str.c index bf20a4cd..50af2b3e 100644 --- a/src/common/str.c +++ b/src/common/str.c @@ -22,6 +22,32 @@ void pkpy_Str__ctor(pkpy_Str *self, const char *data){ pkpy_Str__ctor2(self, data, strlen(data)); } +static void pkpy_Str__check_ascii(pkpy_Str *self, char *p) { + for(int i = 0; i < self->size; i++){ + if(!isascii(p[i])){ + self->is_ascii = false; + break; + } + } +} + +void pkpy_Str__take_buf(pkpy_Str *self, char *data, int size) { + self->size = size; + self->is_ascii = true; + self->is_sso = size < sizeof(self->_inlined); + char* p; + if(self->is_sso){ + p = self->_inlined; + memcpy(p, data, size); + p[size] = '\0'; + free(data); + }else{ + self->_ptr = data; + p = self->_ptr; + } + pkpy_Str__check_ascii(self, p); +} + void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ self->size = size; self->is_ascii = true; @@ -35,13 +61,7 @@ void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){ } memcpy(p, data, size); p[size] = '\0'; - // check is_ascii - for(int i = 0; i < size; i++){ - if(!isascii(p[i])){ - self->is_ascii = false; - break; - } - } + pkpy_Str__check_ascii(self, p); } void pkpy_Str__dtor(pkpy_Str *self){ diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 1db9e898..78a231e7 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -20,7 +20,7 @@ NameScope Compiler::name_scope() const noexcept{ } CodeObject_ Compiler::push_global_context() noexcept{ - CodeObject_ co = std::make_shared(lexer.src, lexer.src->filename); + CodeObject_ co = std::make_shared(lexer.src, static_cast(lexer.src->filename)); co->start_line = __i == 0 ? 1 : prev().line; contexts.push_back(CodeEmitContext(vm, co, contexts.size())); return co; @@ -1293,8 +1293,8 @@ Error* Compiler::compile(CodeObject_* out) noexcept{ Error* err; check(lexer.run()); - // if(lexer.src->filename[0] != '<'){ - // printf("%s\n", lexer.src->filename.c_str()); + // if(lexer.src.filename()[0] != '<'){ + // printf("%s\n", lexer.src.filename().c_str()); // for(int i=0; iline_starts.push_back(curr_char); + c11_vector__push(const char*, &src->line_starts, curr_char); } return c; } @@ -534,8 +534,8 @@ Error* Lexer::SyntaxError(const char* fmt, ...) noexcept{ } Lexer::Lexer(VM* vm, std::shared_ptr src) noexcept : vm(vm), src(src){ - this->token_start = src->source.c_str(); - this->curr_char = src->source.c_str(); + this->token_start = pkpy_Str__data(&src->source); + this->curr_char = pkpy_Str__data(&src->source); } Error* Lexer::run() noexcept{ @@ -557,7 +557,7 @@ Error* Lexer::run() noexcept{ } Error* Lexer::from_precompiled() noexcept{ - TokenDeserializer deserializer(src->source.c_str()); + TokenDeserializer deserializer(pkpy_Str__data(&src->source)); deserializer.curr += 5; // skip "pkpy:" std::string_view version = deserializer.read_string('\n'); @@ -569,9 +569,9 @@ Error* Lexer::from_precompiled() noexcept{ } int count = deserializer.read_count(); - vector& precompiled_tokens = src->_precompiled_tokens; + auto precompiled_tokens = &src->_precompiled_tokens; for(int i = 0; i < count; i++) { - precompiled_tokens.push_back(deserializer.read_string('\n')); + c11_vector__push(Str, precompiled_tokens, Str(deserializer.read_string('\n'))); } count = deserializer.read_count(); @@ -580,8 +580,8 @@ Error* Lexer::from_precompiled() noexcept{ t.type = (unsigned char)deserializer.read_uint(','); if(is_raw_string_used(t.type)) { i64 index = deserializer.read_uint(','); - t.start = precompiled_tokens[index].c_str(); - t.length = precompiled_tokens[index].size; + t.start = c11__getitem(Str, precompiled_tokens, index).c_str(); + t.length = c11__getitem(Str, precompiled_tokens, index).size; } else { t.start = nullptr; t.length = 0; diff --git a/src/interpreter/profiler.cpp b/src/interpreter/profiler.cpp index 04e851f5..2482f4d2 100644 --- a/src/interpreter/profiler.cpp +++ b/src/interpreter/profiler.cpp @@ -20,7 +20,7 @@ void LineProfiler::begin() { frames.clear(); } void LineProfiler::_step(int callstack_size, Frame* frame) { auto line_info = frame->co->lines[frame->ip()]; if(line_info.is_virtual) return; - std::string_view filename = frame->co->src->filename.sv(); + std::string_view filename = frame->co->src.filename().sv(); int line = line_info.lineno; if(frames.empty()) { @@ -87,7 +87,7 @@ Str LineProfiler::stats() { int start_line = decl->code->start_line; int end_line = decl->code->end_line; if(start_line == -1 || end_line == -1) continue; - std::string_view filename = decl->code->src->filename.sv(); + std::string_view filename = decl->code->src.filename().sv(); const _LineRecord* file_records = records[filename]; clock_t total_time = 0; for(int line = start_line; line <= end_line; line++) { diff --git a/src/interpreter/vm.cpp b/src/interpreter/vm.cpp index ce4d1114..898b2850 100644 --- a/src/interpreter/vm.cpp +++ b/src/interpreter/vm.cpp @@ -1706,7 +1706,7 @@ void VM::__breakpoint() { SStream ss; Frame* frame = &frames[i]->frame; int lineno = frame->curr_lineno(); - ss << "File \"" << frame->co->src->filename << "\", line " << lineno; + ss << "File \"" << frame->co->src.filename() << "\", line " << lineno; if(frame->_callable) { ss << ", in "; ss << frame->_callable->as().decl->code->name; diff --git a/src/objects/sourcedata.cpp b/src/objects/sourcedata.cpp deleted file mode 100644 index f6d96ee8..00000000 --- a/src/objects/sourcedata.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "pocketpy/objects/sourcedata.hpp" - -namespace pkpy { -SourceData::SourceData(std::string_view source, const Str& filename, CompileMode mode) : - filename(filename), mode(mode) { - int index = 0; - // Skip utf8 BOM if there is any. - if(strncmp(source.data(), "\xEF\xBB\xBF", 3) == 0) index += 3; - // Drop all '\r' - SStream ss(source.size() + 1); - while(index < source.size()) { - if(source[index] != '\r') ss << source[index]; - index++; - } - this->source = ss.str(); - if(this->source.size > 5 && this->source.sv().substr(0, 5) == "pkpy:") { - this->is_precompiled = true; - } else { - this->is_precompiled = false; - } - line_starts.push_back(this->source.c_str()); -} - -SourceData::SourceData(const Str& filename, CompileMode mode) : filename(filename), mode(mode) { - line_starts.push_back(this->source.c_str()); -} - -pair SourceData::_get_line(int lineno) const { - if(is_precompiled || lineno == -1) return {nullptr, nullptr}; - lineno -= 1; - if(lineno < 0) lineno = 0; - const char* _start = line_starts[lineno]; - const char* i = _start; - // max 300 chars - while(*i != '\n' && *i != '\0' && i - _start < 300) - i++; - return {_start, i}; -} - -std::string_view SourceData::get_line(int lineno) const { - auto [_0, _1] = _get_line(lineno); - if(_0 && _1) return std::string_view(_0, _1 - _0); - return ""; -} - -Str SourceData::snapshot(int lineno, const char* cursor, std::string_view name) const { - SStream ss; - ss << " " << "File \"" << filename << "\", line " << lineno; - if(!name.empty()) ss << ", in " << name; - if(!is_precompiled) { - ss << '\n'; - pair pair = _get_line(lineno); - Str line = ""; - int removed_spaces = 0; - if(pair.first && pair.second) { - line = Str(pair.first, pair.second - pair.first).lstrip(); - removed_spaces = pair.second - pair.first - line.length(); - if(line.empty()) line = ""; - } - ss << " " << line; - if(cursor && line != "" && cursor >= pair.first && cursor <= pair.second) { - auto column = cursor - pair.first - removed_spaces; - if(column >= 0) ss << "\n " << std::string(column, ' ') << "^"; - } - } - return ss.str(); -} -} // namespace pkpy