From 724f693761804fca08c74fd23a6e8368f6104fe9 Mon Sep 17 00:00:00 2001 From: blueloveTH Date: Mon, 2 Oct 2023 03:05:21 +0800 Subject: [PATCH] reimpl `bytes` --- include/pocketpy/obj.h | 73 ++++++++++++++++++++++++++++++------------ src/base64.cpp | 14 ++++---- src/ceval.cpp | 6 ++-- src/io.cpp | 20 ++++++------ src/pocketpy.cpp | 12 +++---- 5 files changed, 77 insertions(+), 48 deletions(-) diff --git a/include/pocketpy/obj.h b/include/pocketpy/obj.h index dab44d1a..265908a0 100644 --- a/include/pocketpy/obj.h +++ b/include/pocketpy/obj.h @@ -48,43 +48,74 @@ struct StarWrapper{ }; struct Bytes{ - std::vector _v; - bool valid; + unsigned char* _data; + int _size; - int size() const noexcept { return (int)_v.size(); } - int operator[](int i) const noexcept { return (int)(uint8_t)_v[i]; } - const char* data() const noexcept { return _v.data(); } + int size() const noexcept { return _size; } + int operator[](int i) const noexcept { return (int)_data[i]; } + const unsigned char* data() const noexcept { return _data; } - bool operator==(const Bytes& rhs) const{ return _v == rhs._v && valid == rhs.valid; } - bool operator!=(const Bytes& rhs) const{ return _v != rhs._v || valid != rhs.valid; } + bool operator==(const Bytes& rhs) const{ + if(_size != rhs._size) return false; + for(int i=0; i<_size; i++) if(_data[i] != rhs._data[i]) return false; + return true; + } + bool operator!=(const Bytes& rhs) const{ return !(*this == rhs); } - Str str() const noexcept { return Str(_v.data(), _v.size()); } - std::string_view sv() const noexcept { return std::string_view(_v.data(), _v.size()); } + Str str() const noexcept { return Str((char*)_data, _size); } + std::string_view sv() const noexcept { return std::string_view((char*)_data, _size); } - Bytes() : valid(false) {} - Bytes(std::vector&& v): _v(std::move(v)), valid(true) {} - Bytes(std::string_view sv): valid(true) { - _v.resize(sv.size()); - for(int i=0; i& v){ + _data = new unsigned char[v.size()]; + _size = v.size(); + for(int i=0; i<_size; i++) _data[i] = v[i]; + } + Bytes(std::string_view sv){ + _data = new unsigned char[sv.size()]; + _size = sv.size(); + for(int i=0; i<_size; i++) _data[i] = sv[i]; } Bytes(const Str& str): Bytes(str.sv()) {} - operator bool() const noexcept { return valid; } + operator bool() const noexcept { return _data != nullptr; } // copy constructor - Bytes(const Bytes& rhs) : _v(rhs._v), valid(rhs.valid) {} + Bytes(const Bytes& rhs){ + _data = new unsigned char[rhs._size]; + _size = rhs._size; + for(int i=0; i<_size; i++) _data[i] = rhs._data[i]; + } // move constructor - Bytes(Bytes&& rhs) noexcept : _v(std::move(rhs._v)), valid(rhs.valid) { - rhs.valid = false; + Bytes(Bytes&& rhs) noexcept { + _data = rhs._data; + _size = rhs._size; + rhs._data = nullptr; + rhs._size = 0; } Bytes& operator=(Bytes&& rhs) noexcept { - _v = std::move(rhs._v); - valid = rhs.valid; - rhs.valid = false; + delete[] _data; + _data = rhs._data; + _size = rhs._size; + rhs._data = nullptr; + rhs._size = 0; return *this; } + std::pair detach() noexcept { + unsigned char* p = _data; + int size = _size; + _data = nullptr; + _size = 0; + return {p, size}; + } + + ~Bytes(){ + delete[] _data; + } + // delete copy assignment Bytes& operator=(const Bytes& rhs) = delete; }; diff --git a/src/base64.cpp b/src/base64.cpp index 8b9a6005..5dc83ca2 100644 --- a/src/base64.cpp +++ b/src/base64.cpp @@ -171,19 +171,17 @@ void add_module_base64(VM* vm){ // b64encode vm->bind_func<1>(mod, "b64encode", [](VM* vm, ArgsView args){ Bytes& b = CAST(Bytes&, args[0]); - std::vector out(b.size() * 2); - int size = base64_encode((const unsigned char*)b.data(), b.size(), out.data()); - out.resize(size); - return VAR(Bytes(std::move(out))); + unsigned char* p = new unsigned char[b.size() * 2]; + int size = base64_encode((const unsigned char*)b.data(), b.size(), (char*)p); + return VAR(Bytes(p, size)); }); // b64decode vm->bind_func<1>(mod, "b64decode", [](VM* vm, ArgsView args){ Bytes& b = CAST(Bytes&, args[0]); - std::vector out(b.size()); - int size = base64_decode(b.data(), b.size(), (unsigned char*)out.data()); - out.resize(size); - return VAR(Bytes(std::move(out))); + unsigned char* p = new unsigned char[b.size()]; + int size = base64_decode((const char*)b.data(), b.size(), p); + return VAR(Bytes(p, size)); }); } diff --git a/src/ceval.cpp b/src/ceval.cpp index 089d9201..e867f2cd 100644 --- a/src/ceval.cpp +++ b/src/ceval.cpp @@ -248,9 +248,9 @@ __NEXT_STEP:; } DISPATCH(); TARGET(BUILD_BYTES) { const Str& s = CAST(Str&, TOP()); - std::vector buffer(s.size); - memcpy(buffer.data(), s.data, s.size); - TOP() = VAR(Bytes(std::move(buffer))); + unsigned char* p = new unsigned char[s.size]; + memcpy(p, s.data, s.size); + TOP() = VAR(Bytes(p, s.size)); } DISPATCH(); TARGET(BUILD_TUPLE) _0 = VAR(STACK_VIEW(byte.arg).to_tuple()); diff --git a/src/io.cpp b/src/io.cpp index 500e4638..f0630f6e 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -32,12 +32,13 @@ Bytes _default_import_handler(const Str& name){ FILE* fp = io_fopen(cname.c_str(), "rb"); if(!fp) return Bytes(); fseek(fp, 0, SEEK_END); - std::vector buffer(ftell(fp)); + int buffer_size = ftell(fp); + unsigned char* buffer = new unsigned char[buffer_size]; fseek(fp, 0, SEEK_SET); - size_t sz = io_fread(buffer.data(), 1, buffer.size(), fp); + size_t sz = io_fread(buffer, 1, buffer_size, fp); PK_UNUSED(sz); fclose(fp); - return Bytes(std::move(buffer)); + return Bytes(buffer, buffer_size); #else return Bytes(); #endif @@ -55,14 +56,13 @@ Bytes _default_import_handler(const Str& name){ vm->bind_method<0>(type, "read", [](VM* vm, ArgsView args){ FileIO& io = CAST(FileIO&, args[0]); fseek(io.fp, 0, SEEK_END); - std::vector buffer(ftell(io.fp)); + int buffer_size = ftell(io.fp); + unsigned char* buffer = new unsigned char[buffer_size]; fseek(io.fp, 0, SEEK_SET); - size_t sz = io_fread(buffer.data(), 1, buffer.size(), io.fp); - PK_ASSERT(sz <= buffer.size()); - // in text mode, CR may be dropped, which may cause `sz < buffer.size()` - if(sz < buffer.size()) buffer.resize(sz); - PK_UNUSED(sz); - Bytes b(std::move(buffer)); + size_t actual_size = io_fread(buffer, 1, buffer_size, io.fp); + PK_ASSERT(actual_size <= buffer_size); + // in text mode, CR may be dropped, which may cause `actual_size < buffer_size` + Bytes b(buffer, actual_size); if(io.is_text()) return VAR(b.str()); return VAR(std::move(b)); }); diff --git a/src/pocketpy.cpp b/src/pocketpy.cpp index 5c943198..df240db3 100644 --- a/src/pocketpy.cpp +++ b/src/pocketpy.cpp @@ -653,9 +653,9 @@ void init_builtins(VM* _vm) { _vm->bind_method<0>("str", "encode", [](VM* vm, ArgsView args) { const Str& self = _CAST(Str&, args[0]); - std::vector buffer(self.length()); - memcpy(buffer.data(), self.data, self.length()); - return VAR(Bytes(std::move(buffer))); + unsigned char* buffer = new unsigned char[self.length()]; + memcpy(buffer, self.data, self.length()); + return VAR(Bytes(buffer, self.length())); }); _vm->bind_method<1>("str", "join", [](VM* vm, ArgsView args) { @@ -1027,13 +1027,13 @@ void init_builtins(VM* _vm) { /************ bytes ************/ _vm->bind_constructor<2>("bytes", [](VM* vm, ArgsView args){ List& list = CAST(List&, args[1]); - std::vector buffer(list.size()); + std::vector buffer(list.size()); for(int i=0; i255) vm->ValueError("byte must be in range[0, 256)"); buffer[i] = (char)b; } - return VAR(Bytes(std::move(buffer))); + return VAR(Bytes(buffer)); }); _vm->bind__getitem__(_vm->tp_bytes, [](VM* vm, PyObject* obj, PyObject* index) { @@ -1045,7 +1045,7 @@ void init_builtins(VM* _vm) { _vm->bind__hash__(_vm->tp_bytes, [](VM* vm, PyObject* obj) { const Bytes& self = _CAST(Bytes&, obj); - std::string_view view(self.data(), self.size()); + std::string_view view((char*)self.data(), self.size()); return (i64)std::hash()(view); });