This commit is contained in:
blueloveTH 2024-06-10 22:38:49 +08:00
parent 907a1b7713
commit 3787a1da1d
25 changed files with 681 additions and 361 deletions

View File

@ -2,23 +2,30 @@ cmake_minimum_required(VERSION 3.10)
project(pocketpy)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /utf-8 /Ox /jumptablerdata /GS-")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-")
add_compile_options(/wd4267 /wd4244)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
# disable -Wshorten-64-to-32 for apple
if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-shorten-64-to-32")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-shorten-64-to-32")
endif()
endif()
include_directories(${CMAKE_CURRENT_LIST_DIR}/include)
file(GLOB_RECURSE POCKETPY_SRC ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
file(GLOB_RECURSE POCKETPY_SRC_CPP ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
file(GLOB_RECURSE POCKETPY_SRC_C ${CMAKE_CURRENT_LIST_DIR}/src/*.c)
set(POCKETPY_SRC ${POCKETPY_SRC_CPP} ${POCKETPY_SRC_C})
option(PK_USE_CJSON "" OFF)
if(PK_USE_CJSON)

View File

@ -18,7 +18,9 @@ if [ $? -ne 0 ]; then
exit 1
fi
SRC=$(find src/ -name "*.cpp")
SRC_C=$(find src/ -name "*.c")
SRC_CPP=$(find src/ -name "*.cpp")
SRC="$SRC_C $SRC_CPP"
echo "> Compiling and linking source files... "

View File

@ -1,7 +1,10 @@
python prebuild.py
SRC=$(find src/ -name "*.cpp")
SRC_C=$(find src/ -name "*.c")
SRC_CPP=$(find src/ -name "*.cpp")
SRC="$SRC_C $SRC_CPP"
FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG"
FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG -DPK_ENABLE_OS=1"
clang++ $FLAGS -o main src2/main.cpp $SRC

View File

@ -3,5 +3,8 @@ python prebuild.py
rm -rf web/lib
mkdir web/lib
SRC=$(find src/ -name "*.cpp")
SRC_C=$(find src/ -name "*.c")
SRC_CPP=$(find src/ -name "*.cpp")
SRC="$SRC_C $SRC_CPP"
em++ $SRC -Iinclude/ -fexceptions -frtti -s -Os -sEXPORTED_FUNCTIONS=_pkpy_new_repl,_pkpy_repl_input,_pkpy_new_vm -sEXPORTED_RUNTIME_METHODS=ccall -o web/lib/pocketpy.js

View File

@ -0,0 +1,52 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
typedef struct pkpy_Str{
int size;
bool is_ascii;
bool is_sso;
union{
char* _ptr;
char _inlined[16];
};
} pkpy_Str;
inline const char* pkpy_Str__data(const pkpy_Str* self){
return self->is_sso ? self->_inlined : self->_ptr;
}
inline int pkpy_Str__size(const pkpy_Str* self){
return self->size;
}
int pkpy_utils__u8len(unsigned char c, bool suppress);
void pkpy_Str__ctor(pkpy_Str* self, const char* data);
void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size);
void pkpy_Str__dtor(pkpy_Str* self);
pkpy_Str pkpy_Str__copy(const pkpy_Str* self);
pkpy_Str pkpy_Str__concat(const pkpy_Str* self, const pkpy_Str* other);
pkpy_Str pkpy_Str__concat2(const pkpy_Str* self, const char* other, int size);
pkpy_Str pkpy_Str__substr(const pkpy_Str* self, int start);
pkpy_Str pkpy_Str__substr2(const pkpy_Str* self, int start, int size);
pkpy_Str pkpy_Str__lower(const pkpy_Str* self);
pkpy_Str pkpy_Str__upper(const pkpy_Str* self);
pkpy_Str pkpy_Str__replace(const pkpy_Str* self, char old, char new_);
pkpy_Str pkpy_Str__replace2(const pkpy_Str* self, const pkpy_Str* old, const pkpy_Str* new_);
pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str* self, int i);
pkpy_Str pkpy_Str__u8_slice(const pkpy_Str* self, int start, int stop, int step);
int pkpy_Str__u8_length(const pkpy_Str* self);
int pkpy_Str__cmp(const pkpy_Str* self, const pkpy_Str* other);
int pkpy_Str__cmp2(const pkpy_Str* self, const char* other, int size);
int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i);
int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n);
int pkpy_Str__index(const pkpy_Str* self, const pkpy_Str* sub, int start);
int pkpy_Str__count(const pkpy_Str* self, const pkpy_Str* sub);
#ifdef __cplusplus
}
#endif

View File

@ -1,80 +1,162 @@
#pragma once
#include "pocketpy/common/utils.hpp"
#include "pocketpy/common/utils.h"
#include "pocketpy/common/memorypool.hpp"
#include "pocketpy/common/vector.hpp"
#include "pocketpy/common/str.h"
#include <string_view>
#include <ostream>
namespace pkpy {
int utf8len(unsigned char c, bool suppress = false);
struct SStream;
struct Str {
int size;
bool is_ascii;
char* data;
char _inlined[16];
struct Str: pkpy_Str {
bool is_inlined() const { return is_sso; }
bool is_inlined() const { return data == _inlined; }
Str(){
pkpy_Str__ctor2(this, "", 0);
}
Str(pkpy_Str&& s){
std::memcpy(this, &s, sizeof(pkpy_Str));
}
Str(const std::string& s){
pkpy_Str__ctor2(this, s.data(), s.size());
}
Str(std::string_view s){
pkpy_Str__ctor2(this, s.data(), s.size());
}
Str(const char* s){
pkpy_Str__ctor2(this, s, strlen(s));
}
Str(const char* s, int len){
pkpy_Str__ctor2(this, s, len);
}
Str();
Str(int size, bool is_ascii);
Str(const std::string& s);
Str(std::string_view s);
Str(const char* s);
Str(const char* s, int len);
Str(pair<char*, int>); // take ownership
Str(const Str& other);
Str(Str&& other);
Str(const Str& other){
pkpy_Str__ctor2(this, pkpy_Str__data(&other), other.size);
}
Str(Str&& other){
std::memcpy(this, &other, sizeof(pkpy_Str));
other.size = 0;
other.is_sso = true;
}
operator std::string_view () const { return sv(); }
const char* begin() const { return data; }
const char* end() const { return data + size; }
char operator[] (int idx) const { return data[idx]; }
const char* begin() const { return pkpy_Str__data(this); }
const char* end() const { return pkpy_Str__data(this) + size; }
int length() const { return size; }
char operator[] (int idx) const { return pkpy_Str__data(this)[idx]; }
bool empty() const { return size == 0; }
size_t hash() const { return std::hash<std::string_view>()(sv()); }
Str& operator= (const Str&);
Str operator+ (const Str&) const;
Str operator+ (const char*) const;
friend Str operator+ (const char*, const Str&);
Str& operator= (const Str& other){
pkpy_Str__dtor(this);
pkpy_Str__ctor2(this, pkpy_Str__data(&other), other.size);
return *this;
}
bool operator== (const std::string_view other) const;
bool operator!= (const std::string_view other) const;
bool operator< (const std::string_view other) const;
friend bool operator< (const std::string_view other, const Str& str);
Str operator+ (const Str& other) const{
return pkpy_Str__concat(this, &other);
}
bool operator== (const char* p) const;
bool operator!= (const char* p) const;
Str operator+ (const char* other) const{
return pkpy_Str__concat2(this, other, strlen(other));
}
bool operator== (const Str& other) const;
bool operator!= (const Str& other) const;
bool operator< (const Str& other) const;
bool operator> (const Str& other) const;
bool operator<= (const Str& other) const;
bool operator>= (const Str& other) const;
friend Str operator+ (const char* self, const Str& other){
pkpy_Str tmp;
pkpy_Str__ctor2(&tmp, self, strlen(self));
pkpy_Str retval = pkpy_Str__concat(&tmp, &other);
pkpy_Str__dtor(&tmp);
return retval;
}
~Str();
bool operator== (const std::string_view other) const{
int res = pkpy_Str__cmp2(this, other.data(), other.size());
return res == 0;
}
friend std::ostream& operator<< (std::ostream& os, const Str& str);
bool operator!= (const std::string_view other) const{
int res = pkpy_Str__cmp2(this, other.data(), other.size());
return res != 0;
}
const char* c_str() const { return data; }
bool operator< (const std::string_view other) const{
int res = pkpy_Str__cmp2(this, other.data(), other.size());
return res < 0;
}
std::string_view sv() const { return std::string_view(data, size); }
friend bool operator< (const std::string_view other, const Str& str){
int res = pkpy_Str__cmp2(&str, other.data(), other.size());
return res > 0;
}
std::string str() const { return std::string(data, size); }
bool operator== (const char* p) const{
int res = pkpy_Str__cmp2(this, p, strlen(p));
return res == 0;
}
bool operator!= (const char* p) const{
int res = pkpy_Str__cmp2(this, p, strlen(p));
return res != 0;
}
bool operator== (const Str& other) const{
return pkpy_Str__cmp(this, &other) == 0;
}
bool operator!= (const Str& other) const{
return pkpy_Str__cmp(this, &other) != 0;
}
bool operator< (const Str& other) const{
return pkpy_Str__cmp(this, &other) < 0;
}
bool operator> (const Str& other) const{
return pkpy_Str__cmp(this, &other) > 0;
}
bool operator<= (const Str& other) const{
return pkpy_Str__cmp(this, &other) <= 0;
}
bool operator>= (const Str& other) const{
return pkpy_Str__cmp(this, &other) >= 0;
}
~Str(){
pkpy_Str__dtor(this);
}
friend std::ostream& operator<< (std::ostream& os, const Str& self){
os.write(pkpy_Str__data(&self), self.size);
return os;
}
const char* c_str() const { return pkpy_Str__data(this); }
std::string_view sv() const {
return std::string_view(pkpy_Str__data(this), size);
}
std::string str() const {
return std::string(pkpy_Str__data(this), size);
}
Str substr(int start, int size) const{
return pkpy_Str__substr2(this, start, size);
}
Str substr(int start) const{
return pkpy_Str__substr(this, start);
}
Str substr(int start, int len) const;
Str substr(int start) const;
Str strip(bool left, bool right, const Str& chars) const;
Str strip(bool left = true, bool right = true) const;
@ -82,23 +164,52 @@ struct Str {
Str rstrip() const { return strip(false, true); }
Str lower() const;
Str upper() const;
Str lower() const{
return pkpy_Str__lower(this);
}
Str upper() const{
return pkpy_Str__upper(this);
}
Str replace(char old, char new_) const{
return pkpy_Str__replace(this, old, new_);
}
Str replace(const Str& old, const Str& new_) const{
return pkpy_Str__replace2(this, &old, &new_);
}
Str escape(bool single_quote = true) const;
void escape_(SStream& ss, bool single_quote = true) const;
int index(const Str& sub, int start = 0) const;
Str replace(char old, char new_) const;
Str replace(const Str& old, const Str& new_, int count = -1) const;
vector<std::string_view> split(const Str& sep) const;
vector<std::string_view> split(char sep) const;
int count(const Str& sub) const;
int index(const Str& sub, int start = 0) const{
return pkpy_Str__index(this, &sub, start);
}
int count(const Str& sub) const{
return pkpy_Str__count(this, &sub);
}
/*************unicode*************/
int _unicode_index_to_byte(int i) const;
int _byte_index_to_unicode(int n) const;
Str u8_getitem(int i) const;
Str u8_slice(int start, int stop, int step) const;
int u8_length() const;
int _unicode_index_to_byte(int i) const{
return pkpy_Str__unicode_index_to_byte(this, i);
}
int _byte_index_to_unicode(int n) const{
return pkpy_Str__byte_index_to_unicode(this, n);
}
Str u8_getitem(int i) const{
return pkpy_Str__u8_getitem(this, i);
}
Str u8_slice(int start, int stop, int step) const{
return pkpy_Str__u8_slice(this, start, stop, step);
}
int u8_length() const{
return pkpy_Str__u8_length(this);
}
};
struct StrName {

View File

@ -37,4 +37,11 @@ struct has_gc_marker<T, std::void_t<decltype(&T::_gc_mark)>> : std::true_type {}
template <typename T>
constexpr inline int py_sizeof = 16 + sizeof(T);
#define PK_ALWAYS_PASS_BY_POINTER(T) \
T(const T&) = delete; \
T& operator= (const T&) = delete; \
T(T&&) = delete; \
T& operator= (T&&) = delete;
} // namespace pkpy

View File

@ -0,0 +1,29 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#define PK_REGION(name) 1
#define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step)
// global constants
#define PK_HEX_TABLE "0123456789abcdef"
extern const char* kPlatformStrings[];
#ifdef _MSC_VER
#define PK_UNREACHABLE() __assume(0);
#else
#define PK_UNREACHABLE() __builtin_unreachable();
#endif
#define PK_FATAL_ERROR(...) { fprintf(stderr, __VA_ARGS__); abort(); }
#define PK_MIN(a, b) ((a) < (b) ? (a) : (b))
#define PK_MAX(a, b) ((a) > (b) ? (a) : (b))
#ifdef __cplusplus
}
#endif

View File

@ -1,36 +0,0 @@
#pragma once
#define PK_REGION(name) 1
#define PK_ALWAYS_PASS_BY_POINTER(T) \
T(const T&) = delete; \
T& operator= (const T&) = delete; \
T(T&&) = delete; \
T& operator= (T&&) = delete;
#define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step)
namespace pkpy {
// global constants
const inline char* PK_HEX_TABLE = "0123456789abcdef";
const inline char* kPlatformStrings[] = {
"win32", // 0
"emscripten", // 1
"ios", // 2
"darwin", // 3
"android", // 4
"linux", // 5
"unknown" // 6
};
#ifdef _MSC_VER
#define PK_UNREACHABLE() __assume(0);
#else
#define PK_UNREACHABLE() __builtin_unreachable();
#endif
#define PK_FATAL_ERROR(...) { fprintf(stderr, __VA_ARGS__); std::abort(); }
} // namespace pkpy

View File

@ -0,0 +1,55 @@
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
typedef struct c11_array{
void* data;
int count;
int elem_size;
} c11_array;
void c11_array__ctor(c11_array* self, int elem_size, int count);
void c11_array__dtor(c11_array* self);
c11_array c11_array__copy(const c11_array* self);
void* c11_array__at(c11_array* self, int index);
typedef struct c11_vector{
void* data;
int count;
int capacity;
int elem_size;
} c11_vector;
void c11_vector__ctor(c11_vector* self, int elem_size);
void c11_vector__dtor(c11_vector* self);
c11_vector c11_vector__copy(const c11_vector* self);
void* c11_vector__at(c11_vector* self, int index);
void c11_vector__reserve(c11_vector* self, int capacity);
#define c11__getitem(T, self, index) ((T*)(self)->data)[index]
#define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value;
#define c11_vector__push_back(T, self, elem) \
do{ \
if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \
((T*)(self)->data)[(self)->count] = (elem); \
(self)->count++; \
}while(0)
#define c11_vector__pop_back(T, self) \
do{ \
(self)->count--; \
}while(0)
#define c11_vector__extend(T, self, p, size) \
do{ \
c11_vector__reserve((self), (self)->count + (size)); \
memcpy((T*)(self)->data + (self)->count, (p), (size) * sizeof(T)); \
(self)->count += (size); \
}while(0)
#ifdef __cplusplus
}
#endif

View File

@ -2,7 +2,7 @@
#include "pocketpy/common/config.h"
#include "pocketpy/common/vector.hpp"
#include "pocketpy/common/utils.hpp"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.hpp"
#include "pocketpy/objects/namedict.hpp"

View File

@ -319,8 +319,8 @@ public:
#endif
#if PK_REGION("Logging Methods")
virtual void stdout_write(const Str& s){ _stdout(s.data, s.size); }
virtual void stderr_write(const Str& s){ _stderr(s.data, s.size); }
virtual void stdout_write(const Str& s){ _stdout(s.c_str(), s.size); }
virtual void stderr_write(const Str& s){ _stderr(s.c_str(), s.size); }
#endif
#if PK_REGION("Magic Bindings")

View File

@ -2,7 +2,7 @@
#include "pocketpy/common/config.h"
#include "pocketpy/common/str.hpp"
#include "pocketpy/common/utils.hpp"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.hpp"
namespace pkpy {

View File

@ -1,6 +1,6 @@
#pragma once
#include "pocketpy/common/utils.hpp"
#include "pocketpy/common/utils.h"
#include "pocketpy/common/str.hpp"
namespace pkpy {

View File

@ -1,5 +1,5 @@
#include "pocketpy/common/any.hpp"
#include "pocketpy/common/utils.hpp"
#include "pocketpy/common/utils.h"
#include <cstdio>

250
src/common/str.c Normal file
View File

@ -0,0 +1,250 @@
#include "pocketpy/common/str.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/common/utils.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdio.h>
int pkpy_utils__u8len(unsigned char c, bool suppress) {
if((c & 0b10000000) == 0) return 1;
if((c & 0b11100000) == 0b11000000) return 2;
if((c & 0b11110000) == 0b11100000) return 3;
if((c & 0b11111000) == 0b11110000) return 4;
if((c & 0b11111100) == 0b11111000) return 5;
if((c & 0b11111110) == 0b11111100) return 6;
if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n")
return 0;
}
void pkpy_Str__ctor(pkpy_Str *self, const char *data){
pkpy_Str__ctor2(self, data, strlen(data));
}
void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){
self->size = size;
self->is_ascii = true;
self->is_sso = size < sizeof(self->_inlined);
char* p;
if(self->is_sso){
p = self->_inlined;
}else{
self->_ptr = (char*)malloc(size + 1);
p = self->_ptr;
}
memcpy(p, data, size);
p[size] = '\0';
// check is_ascii
for(int i = 0; i < size; i++){
if(!isascii(p[i])){
self->is_ascii = false;
break;
}
}
}
void pkpy_Str__dtor(pkpy_Str *self){
if(!self->is_sso){
free(self->_ptr);
self->is_sso = true;
self->size = 0;
}
}
pkpy_Str pkpy_Str__copy(const pkpy_Str *self){
pkpy_Str retval = *self;
if(!self->is_sso){
retval._ptr = (char*)malloc(self->size + 1);
memcpy(retval._ptr, self->_ptr, self->size + 1);
retval._ptr[retval.size] = '\0';
}
return retval;
}
pkpy_Str pkpy_Str__concat(const pkpy_Str *self, const pkpy_Str *other){
pkpy_Str retval = {
.size = self->size + other->size,
.is_ascii = self->is_ascii && other->is_ascii,
.is_sso = self->size + other->size < sizeof(retval._inlined),
};
char* p;
if(retval.is_sso){
p = retval._inlined;
}else{
retval._ptr = (char*)malloc(retval.size + 1);
p = retval._ptr;
}
memcpy(p, pkpy_Str__data(self), self->size);
memcpy(p + self->size, pkpy_Str__data(other), other->size);
p[retval.size] = '\0';
return retval;
}
pkpy_Str pkpy_Str__concat2(const pkpy_Str *self, const char *other, int size){
pkpy_Str tmp;
pkpy_Str__ctor2(&tmp, other, size);
pkpy_Str retval = pkpy_Str__concat(self, &tmp);
pkpy_Str__dtor(&tmp);
return retval;
}
pkpy_Str pkpy_Str__substr(const pkpy_Str *self, int start){
return pkpy_Str__substr2(self, start, self->size - start);
}
pkpy_Str pkpy_Str__substr2(const pkpy_Str *self, int start, int size){
pkpy_Str retval;
pkpy_Str__ctor2(&retval, pkpy_Str__data(self) + start, size);
return retval;
}
pkpy_Str pkpy_Str__lower(const pkpy_Str *self){
pkpy_Str retval = pkpy_Str__copy(self);
char* p = (char*)pkpy_Str__data(&retval);
for(int i = 0; i < retval.size; i++){
if('A' <= p[i] && p[i] <= 'Z') p[i] += 32;
}
return retval;
}
pkpy_Str pkpy_Str__upper(const pkpy_Str *self){
pkpy_Str retval = pkpy_Str__copy(self);
char* p = (char*)pkpy_Str__data(&retval);
for(int i = 0; i < retval.size; i++){
if('a' <= p[i] && p[i] <= 'z') p[i] -= 32;
}
return retval;
}
pkpy_Str pkpy_Str__replace(const pkpy_Str *self, char old, char new_){
pkpy_Str retval = pkpy_Str__copy(self);
char* p = (char*)pkpy_Str__data(&retval);
for(int i = 0; i < retval.size; i++){
if(p[i] == old) p[i] = new_;
}
return retval;
}
pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkpy_Str *new_){
c11_vector buffer;
c11_vector__ctor(&buffer, sizeof(char));
int start = 0;
while(true) {
int i = pkpy_Str__index(self, old, start);
if(i == -1) break;
pkpy_Str tmp = pkpy_Str__substr2(self, start, i - start);
c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size);
pkpy_Str__dtor(&tmp);
c11_vector__extend(char, &buffer, pkpy_Str__data(new_), new_->size);
start = i + old->size;
}
pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start);
c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size);
pkpy_Str__dtor(&tmp);
pkpy_Str retval = {
.size = buffer.count,
.is_ascii = self->is_ascii && old->is_ascii && new_->is_ascii,
.is_sso = false,
._ptr = (char*)buffer.data,
};
return retval;
}
int pkpy_Str__cmp(const pkpy_Str *self, const pkpy_Str *other){
return pkpy_Str__cmp2(self, pkpy_Str__data(other), other->size);
}
int pkpy_Str__cmp2(const pkpy_Str *self, const char *other, int size){
int res = strncmp(pkpy_Str__data(self), other, PK_MIN(self->size, size));
if(res != 0) return res;
return self->size - size;
}
pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str *self, int i){
i = pkpy_Str__unicode_index_to_byte(self, i);
return pkpy_Str__substr2(
self, i,
pkpy_utils__u8len(pkpy_Str__data(self)[i], false)
);
}
pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step){
c11_vector buffer;
c11_vector__ctor(&buffer, sizeof(char));
assert(step != 0);
if(self->is_ascii){
const char* p = pkpy_Str__data(self);
for (int i=start; step>0 ? i<stop : i>stop; i+=step) {
c11_vector__push_back(char, &buffer, p[i]);
}
}else{
for (int i=start; step>0 ? i<stop : i>stop; i+=step) {
pkpy_Str unicode = pkpy_Str__u8_getitem(self, i);
const char* p = pkpy_Str__data(&unicode);
for(int j = 0; j < unicode.size; j++){
c11_vector__push_back(char, &buffer, p[j]);
}
pkpy_Str__dtor(&unicode);
}
}
pkpy_Str retval = {
.size = buffer.count,
.is_ascii = self->is_ascii,
.is_sso = false,
._ptr = (char*)buffer.data,
};
return retval;
}
int pkpy_Str__u8_length(const pkpy_Str *self){
return pkpy_Str__byte_index_to_unicode(self, self->size);
}
int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i) {
if(self->is_ascii) return i;
const char* p = pkpy_Str__data(self);
int j = 0;
while(i > 0) {
j += pkpy_utils__u8len(p[j], false);
i--;
}
return j;
}
int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n) {
if(self->is_ascii) return n;
const char* p = pkpy_Str__data(self);
int cnt = 0;
for(int i = 0; i < n; i++) {
if((p[i] & 0xC0) != 0x80) cnt++;
}
return cnt;
}
int pkpy_Str__index(const pkpy_Str *self, const pkpy_Str *sub, int start){
if(sub->size == 0) return start;
int max_end = self->size - sub->size;
const char* self_data = pkpy_Str__data(self);
const char* sub_data = pkpy_Str__data(sub);
for(int i=start; i<=max_end; i++){
int res = memcmp(self_data + i, sub_data, sub->size);
if(res == 0) return i;
}
return -1;
}
int pkpy_Str__count(const pkpy_Str *self, const pkpy_Str *sub){
if(sub->size == 0) return self->size + 1;
int cnt = 0;
int start = 0;
while(true) {
int i = pkpy_Str__index(self, sub, start);
if(i == -1) break;
cnt++;
start = i + sub->size;
}
return cnt;
}

View File

@ -9,159 +9,20 @@
namespace pkpy {
int utf8len(unsigned char c, bool suppress) {
if((c & 0b10000000) == 0) return 1;
if((c & 0b11100000) == 0b11000000) return 2;
if((c & 0b11110000) == 0b11100000) return 3;
if((c & 0b11111000) == 0b11110000) return 4;
if((c & 0b11111100) == 0b11111000) return 5;
if((c & 0b11111110) == 0b11111100) return 6;
if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n")
return 0;
}
#define PK_STR_ALLOCATE() \
if(this->size < (int)sizeof(this->_inlined)) { \
this->data = this->_inlined; \
} else { \
this->data = (char*)std::malloc(this->size + 1); \
}
#define PK_STR_COPY_INIT(__s) \
for(int i = 0; i < this->size; i++) { \
this->data[i] = __s[i]; \
if(!isascii(__s[i])) is_ascii = false; \
} \
this->data[this->size] = '\0';
Str::Str() : size(0), is_ascii(true), data(_inlined) { _inlined[0] = '\0'; }
Str::Str(int size, bool is_ascii) :
size(size), is_ascii(is_ascii){PK_STR_ALLOCATE()}
Str::Str(const std::string& s) :
size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
Str::Str(std::string_view s) :
size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
Str::Str(const char* s) :
size(strlen(s)), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
Str::Str(const char* s, int len) :
size(len), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
Str::Str(pair<char*, int> detached) : size(detached.second), is_ascii(true) {
this->data = detached.first;
Str::Str(pair<char*, int> detached) {
this->size = detached.second;
this->is_ascii = true;
this->is_sso = false;
this->_ptr = detached.first;
for(int i = 0; i < size; i++) {
if(!isascii(data[i])) {
if(!isascii(_ptr[i])) {
is_ascii = false;
break;
}
}
assert(data[size] == '\0');
assert(_ptr[size] == '\0');
}
Str::Str(const Str& other) : size(other.size), is_ascii(other.is_ascii) {
PK_STR_ALLOCATE()
std::memcpy(data, other.data, size);
data[size] = '\0';
}
Str::Str(Str&& other) : size(other.size), is_ascii(other.is_ascii) {
if(other.is_inlined()) {
data = _inlined;
for(int i = 0; i < size; i++)
_inlined[i] = other._inlined[i];
data[size] = '\0';
} else {
data = other.data;
// zero out `other`
other.data = other._inlined;
other.data[0] = '\0';
other.size = 0;
}
}
Str operator+ (const char* p, const Str& str) {
Str other(p);
return other + str;
}
std::ostream& operator<< (std::ostream& os, const Str& str) { return os << str.sv(); }
bool operator< (const std::string_view other, const Str& str) { return other < str.sv(); }
Str& Str::operator= (const Str& other) {
if(!is_inlined()) std::free(data);
size = other.size;
is_ascii = other.is_ascii;
PK_STR_ALLOCATE()
std::memcpy(data, other.data, size);
data[size] = '\0';
return *this;
}
Str Str::operator+ (const Str& other) const {
Str ret(size + other.size, is_ascii && other.is_ascii);
std::memcpy(ret.data, data, size);
std::memcpy(ret.data + size, other.data, other.size);
ret.data[ret.size] = '\0';
return ret;
}
Str Str::operator+ (const char* p) const {
Str other(p);
return *this + other;
}
bool Str::operator== (const Str& other) const {
if(size != other.size) return false;
return memcmp(data, other.data, size) == 0;
}
bool Str::operator!= (const Str& other) const {
if(size != other.size) return true;
return memcmp(data, other.data, size) != 0;
}
bool Str::operator== (const std::string_view other) const {
if(size != (int)other.size()) return false;
return memcmp(data, other.data(), size) == 0;
}
bool Str::operator!= (const std::string_view other) const {
if(size != (int)other.size()) return true;
return memcmp(data, other.data(), size) != 0;
}
bool Str::operator== (const char* p) const { return *this == std::string_view(p); }
bool Str::operator!= (const char* p) const { return *this != std::string_view(p); }
bool Str::operator< (const Str& other) const { return this->sv() < other.sv(); }
bool Str::operator< (const std::string_view other) const { return this->sv() < other; }
bool Str::operator> (const Str& other) const { return this->sv() > other.sv(); }
bool Str::operator<= (const Str& other) const { return this->sv() <= other.sv(); }
bool Str::operator>= (const Str& other) const { return this->sv() >= other.sv(); }
Str::~Str() {
if(!is_inlined()) std::free(data);
}
Str Str::substr(int start, int len) const {
Str ret(len, is_ascii);
std::memcpy(ret.data, data + start, len);
ret.data[len] = '\0';
return ret;
}
Str Str::substr(int start) const { return substr(start, size - start); }
Str Str::strip(bool left, bool right, const Str& chars) const {
int L = 0;
int R = u8_length();
@ -177,6 +38,7 @@ Str Str::strip(bool left, bool right, const Str& chars) const {
}
Str Str::strip(bool left, bool right) const {
const char* data = pkpy_Str__data(this);
if(is_ascii) {
int L = 0;
int R = size;
@ -194,24 +56,6 @@ Str Str::strip(bool left, bool right) const {
}
}
Str Str::lower() const {
std::string copy(data, size);
std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) {
if('A' <= c && c <= 'Z') return c + ('a' - 'A');
return (int)c;
});
return Str(copy);
}
Str Str::upper() const {
std::string copy(data, size);
std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) {
if('a' <= c && c <= 'z') return c - ('a' - 'A');
return (int)c;
});
return Str(copy);
}
Str Str::escape(bool single_quote) const {
SStream ss;
escape_(ss, single_quote);
@ -220,7 +64,7 @@ Str Str::escape(bool single_quote) const {
void Str::escape_(SStream& ss, bool single_quote) const {
ss << (single_quote ? '\'' : '"');
for(int i = 0; i < length(); i++) {
for(int i = 0; i < size; i++) {
char c = this->operator[] (i);
switch(c) {
case '"':
@ -249,71 +93,6 @@ void Str::escape_(SStream& ss, bool single_quote) const {
ss << (single_quote ? '\'' : '"');
}
int Str::index(const Str& sub, int start) const {
auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size);
if(p == data + size) return -1;
return p - data;
}
Str Str::replace(char old, char new_) const {
Str copied = *this;
for(int i = 0; i < copied.size; i++) {
if(copied.data[i] == old) copied.data[i] = new_;
}
return copied;
}
Str Str::replace(const Str& old, const Str& new_, int count) const {
SStream ss;
int start = 0;
while(true) {
int i = index(old, start);
if(i == -1) break;
ss << substr(start, i - start);
ss << new_;
start = i + old.size;
if(count != -1 && --count == 0) break;
}
ss << substr(start, size - start);
return ss.str();
}
int Str::_unicode_index_to_byte(int i) const {
if(is_ascii) return i;
int j = 0;
while(i > 0) {
j += utf8len(data[j]);
i--;
}
return j;
}
int Str::_byte_index_to_unicode(int n) const {
if(is_ascii) return n;
int cnt = 0;
for(int i = 0; i < n; i++) {
if((data[i] & 0xC0) != 0x80) cnt++;
}
return cnt;
}
Str Str::u8_getitem(int i) const {
i = _unicode_index_to_byte(i);
return substr(i, utf8len(data[i]));
}
Str Str::u8_slice(int start, int stop, int step) const {
SStream ss;
if(is_ascii) {
PK_SLICE_LOOP(i, start, stop, step) ss << data[i];
} else {
PK_SLICE_LOOP(i, start, stop, step) ss << u8_getitem(i);
}
return ss.str();
}
int Str::u8_length() const { return _byte_index_to_unicode(size); }
vector<std::string_view> Str::split(const Str& sep) const {
vector<std::string_view> result;
std::string_view tmp;
@ -332,6 +111,7 @@ vector<std::string_view> Str::split(const Str& sep) const {
vector<std::string_view> Str::split(char sep) const {
vector<std::string_view> result;
const char* data = pkpy_Str__data(this);
int i = 0;
for(int j = 0; j < size; j++) {
if(data[j] == sep) {
@ -344,19 +124,6 @@ vector<std::string_view> Str::split(char sep) const {
return result;
}
int Str::count(const Str& sub) const {
if(sub.empty()) return size + 1;
int cnt = 0;
int start = 0;
while(true) {
int i = index(sub, start);
if(i == -1) break;
cnt++;
start = i + sub.size;
}
return cnt;
}
static std::map<std::string_view, uint16_t>& _interned() {
static std::map<std::string_view, uint16_t> interned;
return interned;

9
src/common/utils.c Normal file
View File

@ -0,0 +1,9 @@
const char* kPlatformStrings[] = {
"win32", // 0
"emscripten", // 1
"ios", // 2
"darwin", // 3
"android", // 4
"linux", // 5
"unknown" // 6
};

61
src/common/vector.c Normal file
View File

@ -0,0 +1,61 @@
#include "pocketpy/common/vector.h"
#include <stdlib.h>
#include <string.h>
void c11_array__ctor(c11_array* self, int elem_size, int count){
self->data = malloc(elem_size * count);
self->count = count;
self->elem_size = elem_size;
}
void c11_array__dtor(c11_array* self){
free(self->data);
self->data = NULL;
self->count = 0;
}
c11_array c11_array__copy(const c11_array* self){
c11_array retval;
c11_array__ctor(&retval, self->elem_size, self->count);
memcpy(retval.data, self->data, self->elem_size * self->count);
return retval;
}
void* c11_array__at(c11_array* self, int index){
return (char*)self->data + self->elem_size * index;
}
void c11_vector__ctor(c11_vector* self, int elem_size){
self->data = NULL;
self->count = 0;
self->capacity = 0;
self->elem_size = elem_size;
}
void c11_vector__dtor(c11_vector* self){
if(self->data) free(self->data);
self->data = NULL;
self->count = 0;
self->capacity = 0;
}
c11_vector c11_vector__copy(const c11_vector* self){
c11_vector retval;
c11_vector__ctor(&retval, self->elem_size);
c11_vector__reserve(&retval, self->capacity);
memcpy(retval.data, self->data, self->elem_size * self->count);
retval.count = self->count;
return retval;
}
void* c11_vector__at(c11_vector* self, int index){
return (char*)self->data + self->elem_size * index;
}
void c11_vector__reserve(c11_vector* self, int capacity){
if(capacity < 4) capacity = 4;
if(capacity <= self->capacity) return;
self->capacity = capacity;
self->data = realloc(self->data, self->elem_size * self->capacity);
}

View File

@ -1,6 +1,7 @@
#include "pocketpy/compiler/lexer.hpp"
#include "pocketpy/common/gil.hpp"
#include "pocketpy/common/version.h"
#include "pocketpy/common/str.h"
#include <cstdarg>
@ -107,7 +108,7 @@ Error* Lexer::eat_name() noexcept{
curr_char--;
while(true) {
unsigned char c = peekchar();
int u8bytes = utf8len(c, true);
int u8bytes = pkpy_utils__u8len(c, true);
if(u8bytes == 0) return SyntaxError("invalid char: %c", c);
if(u8bytes == 1) {
if(isalpha(c) || c == '_' || isdigit(c)) {

View File

@ -446,7 +446,7 @@ PyVar VM::__run_top_frame() {
case OP_BUILD_BYTES: {
const Str& s = CAST(Str&, TOP());
unsigned char* p = (unsigned char*)std::malloc(s.size);
std::memcpy(p, s.data, s.size);
std::memcpy(p, s.c_str(), s.size);
TOP() = VAR(Bytes(p, s.size));
}
DISPATCH()

View File

@ -49,7 +49,7 @@ void StringIter::_register(VM* vm, PyObject* mod, PyObject* type) {
Str& s = PK_OBJ_GET(Str, self.ref);
if(self.i == s.size) return 0;
int start = self.i;
int len = utf8len(s.data[self.i]);
int len = pkpy_utils__u8len(s[self.i], false);
self.i += len;
vm->s_data.push(VAR(s.substr(start, len)));
return 1;

View File

@ -85,7 +85,7 @@ void FileIO::_register(VM* vm, PyObject* mod, PyObject* type) {
FileIO& io = PK_OBJ_GET(FileIO, args[0]);
if(io.is_text) {
Str& s = CAST(Str&, args[1]);
fwrite(s.data, 1, s.length(), io.fp);
fwrite(s.c_str(), 1, s.length(), io.fp);
} else {
Bytes& buffer = CAST(Bytes&, args[1]);
fwrite(buffer.data(), 1, buffer.size(), io.fp);

View File

@ -539,7 +539,7 @@ void __init_builtins(VM* _vm) {
double float_out;
char* p_end;
try {
float_out = std::strtod(s.data, &p_end);
float_out = std::strtod(s.c_str(), &p_end);
if(p_end != s.end()) throw 1;
} catch(...) { vm->ValueError("invalid literal for float(): " + s.escape()); }
return VAR(float_out);
@ -636,13 +636,12 @@ void __init_builtins(VM* _vm) {
return VAR(self.u8_getitem(i));
});
_vm->bind(_vm->_t(VM::tp_str), "replace(self, old, new, count=-1)", [](VM* vm, ArgsView args) {
_vm->bind(_vm->_t(VM::tp_str), "replace(self, old, new)", [](VM* vm, ArgsView args) {
const Str& self = _CAST(Str&, args[0]);
const Str& old = CAST(Str&, args[1]);
if(old.empty()) vm->ValueError("empty substring");
const Str& new_ = CAST(Str&, args[2]);
int count = CAST(int, args[3]);
return VAR(self.replace(old, new_, count));
return VAR(self.replace(old, new_));
});
_vm->bind(_vm->_t(VM::tp_str), "split(self, sep=' ')", [](VM* vm, ArgsView args) {
@ -705,14 +704,14 @@ void __init_builtins(VM* _vm) {
const Str& suffix = CAST(Str&, args[1]);
int offset = self.length() - suffix.length();
if(offset < 0) return vm->False;
bool ok = memcmp(self.data + offset, suffix.data, suffix.length()) == 0;
bool ok = memcmp(self.c_str() + offset, suffix.c_str(), suffix.length()) == 0;
return VAR(ok);
});
_vm->bind_func(VM::tp_str, "encode", 1, [](VM* vm, ArgsView args) {
const Str& self = _CAST(Str&, args[0]);
Bytes retval(self.length());
std::memcpy(retval.data(), self.data, self.length());
std::memcpy(retval.data(), self.c_str(), self.length());
return VAR(std::move(retval));
});

View File

@ -39,8 +39,8 @@ assert t[-5:] == 'ow!!!'
assert t[3:-3] == 's is string example....wow'
assert s > q;assert s < r
assert s.replace("o","") == "ftball"
assert s.replace("o","O",1) == "fOotball"
assert s.replace("foo","ball",1) == "balltball"
assert s.replace("o","O") == "fOOtball"
assert s.replace("foo","ball") == "balltball"
assert s.startswith('f') == True;assert s.endswith('o') == False
assert t.startswith('this') == True;