Compare commits

...

5 Commits

Author SHA1 Message Date
blueloveTH
db1ae5bfa2 Update py_list.c 2024-07-02 15:06:28 +08:00
blueloveTH
8319cb2ad4 some fix 2024-07-02 15:01:04 +08:00
blueloveTH
ab0f07bbd7 some fix 2024-07-02 11:47:56 +08:00
blueloveTH
a59a68b6f5 Update str.h 2024-07-02 11:09:40 +08:00
blueloveTH
78aa295876 some fix 2024-07-02 11:06:53 +08:00
20 changed files with 491 additions and 291 deletions

View File

@ -15,22 +15,35 @@ typedef struct c11_sv{
int size;
} c11_sv;
int c11_sv__cmp(c11_sv self, c11_sv other);
int c11_sv__cmp2(c11_sv self, const char* other, int size);
int c11_sv__cmp3(c11_sv self, const char* other);
/* string */
typedef struct c11_string{
// int size | char[] | '\0'
typedef const char c11_string;
int size;
const char data[]; // flexible array member
} c11_string;
/* bytes */
typedef struct c11_bytes{
int size;
unsigned char data[]; // flexible array member
} c11_bytes;
int c11_sv__cmp(c11_sv self, c11_sv other);
int c11_sv__cmp2(c11_sv self, const char* other);
bool c11__streq(const char* a, const char* b);
bool c11__sveq(c11_sv a, const char* b);
c11_string* c11_string__new(const char* data);
c11_string* c11_string__new2(const char* data, int size);
void c11_string__ctor(c11_string* self, const char* data);
void c11_string__ctor2(c11_string* self, const char* data, int size);
c11_string* c11_string__copy(c11_string* self);
void c11_string__delete(c11_string* self);
int c11_string__len(c11_string* self);
c11_sv c11_string__sv(c11_string* self);
c11_string* c11_string__replace(c11_string* self, char old, char new_);
int c11_string__u8_length(const c11_string* self);
int c11_string__u8_length(c11_string* self);
c11_sv c11_string__u8_getitem(c11_string* self, int i);
c11_string* c11_string__u8_slice(c11_string* self, int start, int stop, int step);
@ -55,6 +68,14 @@ int c11__byte_index_to_unicode(const char* data, int n);
bool c11__is_unicode_Lo_char(int c);
int c11__u8_header(unsigned char c, bool suppress);
typedef enum IntParsingResult{
IntParsing_SUCCESS,
IntParsing_FAILURE,
IntParsing_OVERFLOW,
} IntParsingResult;
IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base);
#ifdef __cplusplus
}
#endif

View File

@ -87,14 +87,6 @@ enum Precedence {
PREC_HIGHEST,
};
typedef enum IntParsingResult{
IntParsing_SUCCESS,
IntParsing_FAILURE,
IntParsing_OVERFLOW,
} IntParsingResult;
IntParsingResult parse_uint(c11_sv text, int64_t* out, int base);
typedef struct Error Error;
typedef c11_array pk_TokenArray;

View File

@ -89,6 +89,8 @@ py_Type pk_VM__new_type(pk_VM* self,
bool subclass_enabled);
// type registration
py_Type pk_str__register();
py_Type pk_bytes__register();
py_Type pk_list__register();
#ifdef __cplusplus

View File

@ -12,35 +12,24 @@ typedef struct PyObject{
bool gc_is_large;
bool gc_marked;
int slots; // number of slots in the object
char flex[];
} PyObject;
// slots >= 0, allocate N slots
// slots == -1, allocate a dict
// | 8 bytes HEADER | <N slots> | <value>
// | 8 bytes HEADER | <dict> | <value>
static_assert(sizeof(PyObject) <= 8, "!(sizeof(PyObject) <= 8)");
// | HEADER | <N slots> | <userdata>
// | HEADER | <dict> | <userdata>
py_TValue* PyObject__slots(PyObject* self);
pk_NameDict* PyObject__dict(PyObject* self);
void* PyObject__value(PyObject* self);
void* PyObject__userdata(PyObject* self);
#define PK_OBJ_HEADER_SIZE(slots) ((slots)>=0 ? 8+sizeof(py_TValue)*(slots) : 8+sizeof(pk_NameDict))
#define PK_OBJ_SLOTS_SIZE(slots) ((slots) >= 0 ? sizeof(py_TValue) * (slots) : sizeof(pk_NameDict))
PyObject* PyObject__new(py_Type type, int slots, int size);
void PyObject__delete(PyObject* self);
PK_INLINE py_TValue PyVar__fromobj(PyObject* obj){
if(!obj) return PY_NULL;
py_TValue retval = {
.type = obj->type,
.is_ptr = true,
._obj = obj
};
return retval;
}
#ifdef __cplusplus
}
#endif

View File

@ -55,7 +55,7 @@ void py_newbool(py_Ref, bool);
void py_newstr(py_Ref, const char*);
void py_newstrn(py_Ref, const char*, int);
// void py_newfstr(py_Ref, const char*, ...);
void py_newbytes(py_Ref, const unsigned char*, int);
unsigned char* py_newbytes(py_Ref, int);
void py_newnone(py_Ref);
void py_newnotimplemented(py_Ref out);
void py_newellipsis(py_Ref out);
@ -104,7 +104,7 @@ bool py_tobool(const py_Ref);
py_Type py_totype(const py_Ref);
const char* py_tostr(const py_Ref);
const char* py_tostrn(const py_Ref, int* size);
const unsigned char* py_tobytes(const py_Ref, int* size);
unsigned char* py_tobytes(const py_Ref, int* size);
void* py_touserdata(const py_Ref);
@ -311,6 +311,10 @@ py_GlobalRef py_tpobject(py_Type type);
/// Get the type name.
const char* py_tpname(py_Type type);
/// Check if the object is an instance of the given type.
/// Re
bool py_checktype(const py_Ref self, py_Type type);
/// Python favored string formatting.
/// %d: int
/// %i: py_i64 (int64_t)
@ -321,7 +325,6 @@ const char* py_tpname(py_Type type);
/// %p: void*
/// %t: py_Type
/// %n: py_Name
const char* py_fmt(const char* fmt, ...);
#define MAGIC_METHOD(x) extern uint16_t x;
#include "pocketpy/xmacros/magics.h"

View File

@ -27,7 +27,7 @@ static void pk_SourceData__ctor(struct pk_SourceData* self,
}
self->source = pk_SStream__submit(&ss);
self->is_precompiled = (strncmp(source, "pkpy:", 5) == 0);
c11_vector__push(const char*, &self->line_starts, self->source);
c11_vector__push(const char*, &self->line_starts, self->source->data);
}
static void pk_SourceData__dtor(struct pk_SourceData* self) {
@ -77,7 +77,7 @@ c11_string* pk_SourceData__snapshot(const struct pk_SourceData* self,
pk_SStream ss;
pk_SStream__ctor(&ss);
pk_sprintf(&ss, " File \"%S\", line %d", &self->filename, lineno);
pk_sprintf(&ss, " File \"%s\", line %d", self->filename->data, lineno);
if(name && *name) {
pk_SStream__write_cstr(&ss, ", in ");

View File

@ -1,5 +1,6 @@
#include "pocketpy/common/sstream.h"
#include "pocketpy/common/config.h"
#include "pocketpy/common/str.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/pocketpy.h"
@ -9,7 +10,7 @@
#include <ctype.h>
#include <math.h>
const static int C11_STRING_HEADER_SIZE = sizeof(int);
const static int C11_STRING_HEADER_SIZE = sizeof(c11_string);
void pk_SStream__ctor(pk_SStream* self) {
c11_vector__ctor(&self->data, sizeof(char));
@ -24,14 +25,14 @@ void pk_SStream__write_char(pk_SStream* self, char c) { c11_vector__push(char, &
void pk_SStream__write_int(pk_SStream* self, int i) {
// len('-2147483648') == 11
c11_vector__reserve(&self->data, self->data.count + 11 + 1);
int n = sprintf(self->data.data, "%d", i);
int n = snprintf(self->data.data, 11 + 1, "%d", i);
self->data.count += n;
}
void pk_SStream__write_i64(pk_SStream* self, int64_t val) {
// len('-9223372036854775808') == 20
c11_vector__reserve(&self->data, self->data.count + 20 + 1);
int n = sprintf(self->data.data, "%lld", (long long)val);
int n = snprintf(self->data.data, 20 + 1, "%lld", (long long)val);
self->data.count += n;
}
@ -106,9 +107,9 @@ void pk_SStream__write_ptr(pk_SStream* self, void* p) {
c11_string* pk_SStream__submit(pk_SStream* self) {
c11_vector__push(char, &self->data, '\0');
c11_array arr = c11_vector__submit(&self->data);
int* p = arr.data;
*p = arr.count - C11_STRING_HEADER_SIZE - 1;
return (c11_string*)(p + 1);
c11_string* retval = (c11_string*)arr.data;
retval->size = arr.count - C11_STRING_HEADER_SIZE - 1;
return retval;
}
void pk_vsprintf(pk_SStream* ss, const char* fmt, va_list args) {
@ -186,18 +187,3 @@ void pk_sprintf(pk_SStream* ss, const char* fmt, ...) {
pk_vsprintf(ss, fmt, args);
va_end(args);
}
const char* py_fmt(const char* fmt, ...) {
PK_THREAD_LOCAL pk_SStream ss;
if(ss.data.elem_size == 0) {
pk_SStream__ctor(&ss);
} else {
c11_vector__clear(&ss.data);
}
va_list args;
va_start(args, fmt);
pk_vsprintf(&ss, fmt, args);
va_end(args);
pk_SStream__write_char(&ss, '\0');
return (const char*)ss.data.data;
}

View File

@ -11,54 +11,49 @@
c11_string* c11_string__new(const char* data) { return c11_string__new2(data, strlen(data)); }
c11_string* c11_string__new2(const char* data, int size) {
int* p = malloc(sizeof(int) + size + 1);
*p++ = size;
c11_string* retval = malloc(sizeof(c11_string) + size + 1);
c11_string__ctor2(retval, data, size);
return retval;
}
void c11_string__ctor(c11_string* self, const char* data) {
c11_string__ctor2(self, data, strlen(data));
}
void c11_string__ctor2(c11_string* self, const char* data, int size) {
self->size = size;
char* p = (char*)self->data;
memcpy(p, data, size);
((char*)p)[size] = '\0';
return (c11_string*)p;
p[size] = '\0';
}
c11_string* c11_string__copy(c11_string* self) {
int* p = (int*)self - 1;
int total_size = sizeof(int) + *p + 1;
int* q = malloc(total_size);
memcpy(q, p, total_size);
return (c11_string*)(q + 1);
int total_size = sizeof(c11_string) + self->size + 1;
c11_string* retval = malloc(total_size);
memcpy(retval, self, total_size);
return retval;
}
void c11_string__delete(c11_string* self) {
int* p = (int*)self - 1;
free(p);
}
void c11_string__delete(c11_string* self) { free(self); }
int c11_string__len(c11_string* self) {
int* p = (int*)self - 1;
return *p;
}
c11_sv c11_string__sv(c11_string* self) {
int* p = (int*)self - 1;
return (c11_sv){self, *p};
}
c11_sv c11_string__sv(c11_string* self) { return (c11_sv){self->data, self->size}; }
c11_string* c11_string__replace(c11_string* self, char old, char new_) {
c11_string* retval = c11_string__copy(self);
char* p = (char*)retval;
int size = c11_string__len(retval);
for(int i = 0; i < size; i++) {
char* p = (char*)retval->data;
for(int i = 0; i < retval->size; i++) {
if(p[i] == old) p[i] = new_;
}
return retval;
}
int c11_string__u8_length(c11_string* self) {
int size = c11_string__len(self);
return c11__byte_index_to_unicode(self, size);
return c11__byte_index_to_unicode(self->data, self->size);
}
c11_sv c11_string__u8_getitem(c11_string* self, int i) {
i = c11__unicode_index_to_byte(self, i);
int size = c11__u8_header(self[i], false);
i = c11__unicode_index_to_byte(self->data, i);
int size = c11__u8_header(self->data[i], false);
return c11_sv__slice2(c11_string__sv(self), i, i + size);
}
@ -119,9 +114,7 @@ void c11_sv__upper(c11_sv sv, c11_vector* buf) {
}
}
c11_sv c11_sv__slice(c11_sv sv, int start) {
return c11_sv__slice2(sv, start, sv.size);
}
c11_sv c11_sv__slice(c11_sv sv, int start) { return c11_sv__slice2(sv, start, sv.size); }
c11_sv c11_sv__slice2(c11_sv sv, int start, int stop) {
if(stop < start) stop = start;
@ -233,19 +226,25 @@ int c11__byte_index_to_unicode(const char* data, int n) {
//////////////
int c11_sv__cmp(c11_sv self, c11_sv other) {
return c11_sv__cmp2(self, other.data, other.size);
int res = strncmp(self.data, other.data, PK_MIN(self.size, other.size));
if(res != 0) return res;
return self.size - other.size;
}
int c11_sv__cmp2(c11_sv self, const char* other, int size) {
int c11_sv__cmp2(c11_sv self, const char* other) {
int size = strlen(other);
int res = strncmp(self.data, other, PK_MIN(self.size, size));
if(res != 0) return res;
return self.size - size;
}
int c11_sv__cmp3(c11_sv self, const char* other) {
return c11_sv__cmp2(self, other, strlen(other));
}
bool c11__streq(const char* a, const char* b) { return strcmp(a, b) == 0; }
bool c11__sveq(c11_sv a, const char* b) {
int size = strlen(b);
if(a.size != size) return false;
return memcmp(a.data, b, size) == 0;
}
// clang-format off
static const int kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560};
@ -274,3 +273,94 @@ int c11__u8_header(unsigned char c, bool suppress) {
if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n")
return 0;
}
IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
*out = 0;
c11_sv prefix = {.data = text.data, .size = PK_MIN(2, text.size)};
if(base == -1) {
if(c11__sveq(prefix, "0b"))
base = 2;
else if(c11__sveq(prefix, "0o"))
base = 8;
else if(c11__sveq(prefix, "0x"))
base = 16;
else
base = 10;
}
if(base == 10) {
// 10-base 12334
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '9') {
*out = (*out * 10) + (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "9223372036854775807".__len__() == 19
if(text.size > 19) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 2) {
// 2-base 0b101010
if(c11__sveq(prefix, "0b")) {
// text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c == '0' || c == '1') {
*out = (*out << 1) | (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63
if(text.size > 63) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 8) {
// 8-base 0o123
if(c11__sveq(prefix, "0o")) {
// text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '7') {
*out = (*out << 3) | (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "777777777777777777777".__len__() == 21
if(text.size > 21) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 16) {
// 16-base 0x123
if(c11__sveq(prefix, "0x")) {
// text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '9') {
*out = (*out << 4) | (c - '0');
} else if(c >= 'a' && c <= 'f') {
*out = (*out << 4) | (c - 'a' + 10);
} else if(c >= 'A' && c <= 'F') {
*out = (*out << 4) | (c - 'A' + 10);
} else {
return IntParsing_FAILURE;
}
}
// "7fffffffffffffff".__len__() == 16
if(text.size > 16) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
}
return IntParsing_FAILURE;
}

View File

@ -1354,7 +1354,7 @@ int Ctx__add_const_string(Ctx* self, c11_sv key) {
c11_vector__push(py_TValue, &self->co->consts, tmp);
int index = self->co->consts.count - 1;
c11_smallmap_s2n__set(&self->co_consts_string_dedup_map,
c11_string__sv(PyObject__value(tmp._obj)),
c11_string__sv(PyObject__userdata(tmp._obj)),
index);
return index;
}
@ -2054,11 +2054,11 @@ Error* pk_compile(pk_SourceData_ src, CodeObject* out) {
if(err) return err;
Token* data = (Token*)tokens.data;
printf("%s\n", src->filename);
printf("%s\n", src->filename->data);
for(int i = 0; i < tokens.count; i++) {
Token* t = data + i;
c11_string* tmp = c11_string__new2(t->start, t->length);
printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], tmp);
printf("[%d] %s: %s\n", t->line, pk_TokenSymbols[t->type], tmp->data);
c11_string__delete(tmp);
}

View File

@ -40,7 +40,7 @@ const static TokenValue EmptyTokenValue;
static void pk_Lexer__ctor(pk_Lexer* self, pk_SourceData_ src){
PK_INCREF(src);
self->src = src;
self->curr_char = self->token_start = src->source;
self->curr_char = self->token_start = src->source->data;
self->current_line = 1;
self->brackets_level = 0;
c11_vector__ctor(&self->nexts, sizeof(Token));
@ -246,11 +246,11 @@ static Error* eat_name(pk_Lexer* self){
c11_sv name = {self->token_start, length};
if(self->src->mode == JSON_MODE) {
if(c11_sv__cmp3(name, "true") == 0) {
if(c11__sveq(name, "true")) {
add_token(self, TK_TRUE);
} else if(c11_sv__cmp3(name, "false") == 0) {
} else if(c11__sveq(name, "false")) {
add_token(self, TK_FALSE);
} else if(c11_sv__cmp3(name, "null") == 0) {
} else if(c11__sveq(name, "null")) {
add_token(self, TK_NONE);
} else {
return SyntaxError("invalid JSON token");
@ -260,12 +260,12 @@ static Error* eat_name(pk_Lexer* self){
const char** KW_BEGIN = pk_TokenSymbols + TK_FALSE;
int KW_COUNT = TK__COUNT__ - TK_FALSE;
#define less(a, b) (c11_sv__cmp3(b, a) > 0)
#define less(a, b) (c11_sv__cmp2(b, a) > 0)
int out;
c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
#undef less
if(out != KW_COUNT && c11_sv__cmp3(name, KW_BEGIN[out]) == 0) {
if(out != KW_COUNT && c11__sveq(name, KW_BEGIN[out])) {
add_token(self, (TokenIndex)(out + TK_FALSE));
} else {
add_token(self, TK_ID);
@ -372,7 +372,7 @@ static Error* eat_number(pk_Lexer* self){
}
// try integer
TokenValue value = {.index = TokenValue_I64};
switch(parse_uint(text, &value._i64, -1)) {
switch(c11__parse_uint(text, &value._i64, -1)) {
case IntParsing_SUCCESS:
add_token_with_value(self, TK_NUM, value);
return NULL;
@ -549,12 +549,12 @@ static Error* lex_one_token(pk_Lexer* self, bool* eof){
static Error* from_precompiled(pk_Lexer* self) {
TokenDeserializer deserializer;
TokenDeserializer__ctor(&deserializer, self->src->source);
TokenDeserializer__ctor(&deserializer, self->src->source->data);
deserializer.curr += 5; // skip "pkpy:"
c11_sv version = TokenDeserializer__read_string(&deserializer, '\n');
if(c11_sv__cmp3(version, PK_VERSION) != 0) {
if(c11_sv__cmp2(version, PK_VERSION) != 0) {
return SyntaxError("precompiled version mismatch");
}
if(TokenDeserializer__read_uint(&deserializer, '\n') != (int64_t)self->src->mode){
@ -576,8 +576,8 @@ static Error* from_precompiled(pk_Lexer* self) {
if(is_raw_string_used(t.type)) {
int64_t index = TokenDeserializer__read_uint(&deserializer, ',');
c11_string* p = c11__getitem(c11_string*, precompiled_tokens, index);
t.start = p;
t.length = c11_string__len(p);
t.start = p->data;
t.length = p->size;
} else {
t.start = NULL;
t.length = 0;
@ -618,97 +618,6 @@ static Error* from_precompiled(pk_Lexer* self) {
return NULL;
}
IntParsingResult parse_uint(c11_sv text, int64_t* out, int base) {
*out = 0;
c11_sv prefix = {.data = text.data, .size = PK_MIN(2, text.size)};
if(base == -1) {
if(c11_sv__cmp3(prefix, "0b") == 0)
base = 2;
else if(c11_sv__cmp3(prefix, "0o") == 0)
base = 8;
else if(c11_sv__cmp3(prefix, "0x") == 0)
base = 16;
else
base = 10;
}
if(base == 10) {
// 10-base 12334
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '9') {
*out = (*out * 10) + (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "9223372036854775807".__len__() == 19
if(text.size > 19) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 2) {
// 2-base 0b101010
if(c11_sv__cmp3(prefix, "0b") == 0) {
// text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c == '0' || c == '1') {
*out = (*out << 1) | (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "111111111111111111111111111111111111111111111111111111111111111".__len__() == 63
if(text.size > 63) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 8) {
// 8-base 0o123
if(c11_sv__cmp3(prefix, "0o") == 0) {
// text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '7') {
*out = (*out << 3) | (c - '0');
} else {
return IntParsing_FAILURE;
}
}
// "777777777777777777777".__len__() == 21
if(text.size > 21) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
} else if(base == 16) {
// 16-base 0x123
if(c11_sv__cmp3(prefix, "0x") == 0) {
// text.remove_prefix(2);
text = (c11_sv){text.data + 2, text.size - 2};
}
if(text.size == 0) return IntParsing_FAILURE;
for(int i = 0; i < text.size; i++) {
char c = text.data[i];
if(c >= '0' && c <= '9') {
*out = (*out << 4) | (c - '0');
} else if(c >= 'a' && c <= 'f') {
*out = (*out << 4) | (c - 'a' + 10);
} else if(c >= 'A' && c <= 'F') {
*out = (*out << 4) | (c - 'A' + 10);
} else {
return IntParsing_FAILURE;
}
}
// "7fffffffffffffff".__len__() == 16
if(text.size > 16) return IntParsing_OVERFLOW;
return IntParsing_SUCCESS;
}
return IntParsing_FAILURE;
}
Error* pk_Lexer__process(pk_SourceData_ src, pk_TokenArray* out_tokens){
pk_Lexer lexer;
pk_Lexer__ctor(&lexer, src);
@ -937,7 +846,7 @@ double TokenDeserializer__read_float(TokenDeserializer* self, char c){
// TODO: optimize this
c11_string* nullterm = c11_string__new2(sv.data, sv.size);
char* end;
double retval = strtod(nullterm, &end);
double retval = strtod(nullterm->data, &end);
c11_string__delete(nullterm);
assert(*end == 0);
return retval;

View File

@ -426,9 +426,8 @@ pk_FrameResult pk_VM__run_top_frame(pk_VM* self) {
case OP_BUILD_BYTES: {
int size;
const char* data = py_tostrn(TOP(), &size);
unsigned char* p = (unsigned char*)malloc(size);
unsigned char* p = py_newbytes(TOP(), size);
memcpy(p, data, size);
py_newbytes(TOP(), p, size);
DISPATCH();
}
case OP_BUILD_TUPLE: {
@ -495,7 +494,7 @@ pk_FrameResult pk_VM__run_top_frame(pk_VM* self) {
}
SP() = begin;
c11_string* res = pk_SStream__submit(&ss);
py_newstrn(SP()++, res, c11_string__len(res));
py_newstrn(SP()++, res->data, res->size);
c11_string__delete(res);
DISPATCH();
}

View File

@ -1,5 +1,6 @@
#include "pocketpy/interpreter/gc.h"
#include "pocketpy/common/memorypool.h"
#include "pocketpy/objects/base.h"
void pk_ManagedHeap__ctor(pk_ManagedHeap *self, pk_VM *vm){
c11_vector__ctor(&self->no_gc, sizeof(PyObject*));
@ -104,7 +105,8 @@ PyObject* pk_ManagedHeap__gcnew(pk_ManagedHeap *self, py_Type type, int slots, i
PyObject* PyObject__new(py_Type type, int slots, int size){
assert(slots >= 0 || slots == -1);
PyObject* self;
size += PK_OBJ_HEADER_SIZE(slots);
// header + slots + udsize
size = sizeof(PyObject) + PK_OBJ_SLOTS_SIZE(slots) + size;
if(size <= kPoolObjectBlockSize){
self = PoolObject_alloc();
self->gc_is_large = false;

View File

@ -1,4 +1,5 @@
#include "pocketpy/interpreter/vm.h"
#include "pocketpy/pocketpy.h"
#include <math.h>
@ -43,6 +44,8 @@ DEF_NUM_BINARY_OP(__ge__, >=, py_newbool, py_newbool)
#undef DEF_NUM_BINARY_OP
static bool ValueError(const char* fmt, ...) { return false; }
static bool _py_int__neg__(int argc, py_Ref argv) {
py_checkargc(1);
int64_t val = py_toint(&argv[0]);
@ -182,6 +185,161 @@ DEF_INT_BITWISE_OP(__rshift__, >>)
#undef DEF_INT_BITWISE_OP
static bool _py_int__repr__(int argc, py_Ref argv) {
py_checkargc(1);
int64_t val = py_toint(&argv[0]);
char buf[32];
int size = snprintf(buf, sizeof(buf), "%lld", (long long)val);
py_newstrn(py_retval(), buf, size);
return true;
}
static bool _py_float__repr__(int argc, py_Ref argv) {
py_checkargc(1);
double val = py_tofloat(&argv[0]);
char buf[32];
int size = snprintf(buf, sizeof(buf), "%f", val);
py_newstrn(py_retval(), buf, size);
return true;
}
union c11_8bytes {
py_i64 _i64;
py_f64 _f64;
union {
uint32_t upper;
uint32_t lower;
} bits;
};
static py_i64 c11_8bytes__hash(union c11_8bytes u) {
// https://stackoverflow.com/questions/664014/what-integer-hash-function-are-good-that-accepts-an-integer-hash-key
const uint32_t C = 2654435761;
u.bits.upper *= C;
u.bits.lower *= C;
return u._i64;
}
static bool _py_int__hash__(int argc, py_Ref argv) {
py_checkargc(1);
int64_t val = py_toint(&argv[0]);
union c11_8bytes u = {._i64 = val};
py_newint(py_retval(), c11_8bytes__hash(u));
return true;
}
static bool _py_float__hash__(int argc, py_Ref argv) {
py_checkargc(1);
double val = py_tofloat(&argv[0]);
union c11_8bytes u = {._f64 = val};
py_newint(py_retval(), c11_8bytes__hash(u));
return true;
}
static bool _py_int__new__(int argc, py_Ref argv) {
if(argc == 1 + 0) {
// int() == 0
py_newint(py_retval(), 0);
return true;
}
// 1 arg
if(argc == 1 + 1) {
switch(argv[1].type) {
case tp_float: {
// int(1.1) == 1
py_newint(py_retval(), (int64_t)py_tofloat(&argv[1]));
return true;
}
case tp_int: {
// int(1) == 1
*py_retval() = argv[1];
return true;
}
case tp_bool: {
// int(True) == 1
py_newint(py_retval(), (int64_t)py_tobool(&argv[1]));
return true;
}
case tp_str: break; // leave to the next block
default: return TypeError("invalid arguments for int()");
}
}
// 2+ args -> error
if(argc > 1 + 2) return TypeError("int() takes at most 2 arguments");
// 1 or 2 args with str
int base = 10;
if(argc == 1 + 2) {
if(!py_checktype(py_arg(2), tp_int)) return false;
base = py_toint(py_arg(2));
}
if(!py_checktype(py_arg(1), tp_str)) return false;
int size;
const char* data = py_tostrn(py_arg(1), &size);
bool negative = false;
if(size && (data[0] == '+' || data[0] == '-')) {
negative = data[0] == '-';
data++;
size--;
}
py_i64 val;
if(c11__parse_uint((c11_sv){data, size}, &val, base) != IntParsing_SUCCESS) {
return ValueError("invalid literal for int() with base %d: %q", base, data);
}
py_newint(py_retval(), negative ? -val : val);
return true;
}
static bool _py_float__new__(int argc, py_Ref argv) {
if(argc == 1 + 0) {
// float() == 0.0
py_newfloat(py_retval(), 0.0);
return true;
}
if(argc > 1 + 1) return TypeError("float() takes at most 1 argument");
// 1 arg
switch(argv[1].type) {
case tp_int: {
// float(1) == 1.0
py_newfloat(py_retval(), py_toint(&argv[1]));
return true;
}
case tp_float: {
// float(1.1) == 1.1
*py_retval() = argv[1];
return true;
}
case tp_bool: {
// float(True) == 1.0
py_newfloat(py_retval(), py_tobool(&argv[1]));
return true;
}
case tp_str: break; // leave to the next block
default: return TypeError("invalid arguments for float()");
}
// str to float
int size;
const char* data = py_tostrn(py_arg(1), &size);
if(c11__streq(data, "inf")){
py_newfloat(py_retval(), INFINITY);
return true;
}
if(c11__streq(data, "-inf")){
py_newfloat(py_retval(), -INFINITY);
return true;
}
char* p_end;
py_f64 float_out = strtod(data, &p_end);
if(p_end != data + size){
return ValueError("invalid literal for float(): %q", data);
}
py_newfloat(py_retval(), float_out);
return true;
}
void pk_VM__init_builtins(pk_VM* self) {
/****** tp_int & tp_float ******/
py_bindmagic(tp_int, __add__, _py_int__add__);
@ -208,7 +366,17 @@ void pk_VM__init_builtins(pk_VM* self) {
py_bindmagic(tp_int, __neg__, _py_int__neg__);
py_bindmagic(tp_float, __neg__, _py_float__neg__);
// TODO: __repr__, __new__, __hash__
// __repr__
py_bindmagic(tp_int, __repr__, _py_int__repr__);
py_bindmagic(tp_float, __repr__, _py_float__repr__);
// __hash__
py_bindmagic(tp_int, __hash__, _py_int__hash__);
py_bindmagic(tp_float, __hash__, _py_float__hash__);
// __new__
py_bindmagic(tp_int, __new__, _py_int__new__);
py_bindmagic(tp_float, __new__, _py_float__new__);
// __truediv__
py_bindmagic(tp_int, __truediv__, _py_int__truediv__);

View File

@ -38,7 +38,11 @@ void pk_TypeInfo__ctor(pk_TypeInfo* self,
// create type object with __dict__
pk_ManagedHeap* heap = &pk_current_vm->heap;
PyObject* typeobj = pk_ManagedHeap__new(heap, tp_type, -1, sizeof(py_Type));
self->self = PyVar__fromobj(typeobj);
self->self = (py_TValue){
.type = typeobj->type,
.is_ptr = true,
._obj = typeobj,
};
self->module = module ? *module : PY_NULL;
self->subclass_enabled = subclass_enabled;
@ -84,7 +88,7 @@ void pk_VM__ctor(pk_VM* self) {
validate(tp_int, pk_VM__new_type(self, "int", tp_object, NULL, false));
validate(tp_float, pk_VM__new_type(self, "float", tp_object, NULL, false));
validate(tp_bool, pk_VM__new_type(self, "bool", tp_object, NULL, false));
validate(tp_str, pk_VM__new_type(self, "str", tp_object, NULL, false));
validate(tp_str, pk_str__register());
validate(tp_list, pk_list__register());
validate(tp_tuple, pk_VM__new_type(self, "tuple", tp_object, NULL, false));
@ -99,7 +103,7 @@ void pk_VM__ctor(pk_VM* self) {
validate(tp_super, pk_VM__new_type(self, "super", tp_object, NULL, false));
validate(tp_exception, pk_VM__new_type(self, "Exception", tp_object, NULL, true));
validate(tp_bytes, pk_VM__new_type(self, "bytes", tp_object, NULL, false));
validate(tp_bytes, pk_bytes__register());
validate(tp_mappingproxy, pk_VM__new_type(self, "mappingproxy", tp_object, NULL, false));
validate(tp_dict, pk_VM__new_type(self, "dict", tp_object, NULL, true));
@ -191,7 +195,7 @@ py_Type pk_VM__new_type(pk_VM* self,
/****************************************/
void PyObject__delete(PyObject* self) {
pk_TypeInfo* ti = c11__at(pk_TypeInfo, &pk_current_vm->types, self->type);
if(ti->dtor) ti->dtor(PyObject__value(self));
if(ti->dtor) ti->dtor(PyObject__userdata(self));
if(self->slots == -1) pk_NameDict__dtor(PyObject__dict(self));
if(self->gc_is_large) {
free(self);

View File

@ -2,16 +2,14 @@
#include "pocketpy/pocketpy.h"
#include <assert.h>
void* PyObject__value(PyObject* self){
return (char*)self + PK_OBJ_HEADER_SIZE(self->slots);
}
void* PyObject__userdata(PyObject* self) { return self->flex + PK_OBJ_SLOTS_SIZE(self->slots); }
pk_NameDict* PyObject__dict(PyObject* self) {
assert(self->slots == -1);
return (pk_NameDict*)((char*)self + 8);
return (pk_NameDict*)(self->flex);
}
py_TValue* PyObject__slots(PyObject* self) {
assert(self->slots >= 0);
return (py_TValue*)((char*)self + 8);
return (py_TValue*)(self->flex);
}

View File

@ -1,3 +1,4 @@
#include "pocketpy/common/str.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
@ -16,14 +17,9 @@ double py_tofloat(const py_Ref self){
bool py_castfloat(const py_Ref self, double* out) {
switch(self->type) {
case tp_int:
*out = (double)self->_i64;
return true;
case tp_float:
*out = self->_f64;
return true;
default:
return false;
case tp_int: *out = (double)self->_i64; return true;
case tp_float: *out = self->_f64; return true;
default: return false;
}
}
@ -38,31 +34,17 @@ py_Type py_totype(const py_Ref self){
return *ud;
}
const char* py_tostr(const py_Ref self){
assert(self->type == tp_str);
int* p = PyObject__value(self->_obj);
return (const char*)(p+1);
}
const char* py_tostrn(const py_Ref self, int* size){
assert(self->type == tp_str);
int* p = PyObject__value(self->_obj);
*size = *p;
return (const char*)(p+1);
}
const unsigned char* py_tobytes(const py_Ref self, int* size){
assert(self->type == tp_bytes);
int* ud = PyObject__value(self->_obj);
*size = *ud;
return (unsigned char*)(ud + 1);
}
void* py_touserdata(const py_Ref self) {
assert(self && self->is_ptr);
return PyObject__value(self->_obj);
return PyObject__userdata(self->_obj);
}
bool py_istype(const py_Ref self, py_Type type){
return self->type == type;
bool py_istype(const py_Ref self, py_Type type) { return self->type == type; }
bool py_checktype(const py_Ref self, py_Type type) {
if(self->type != type) {
// py_raise(PyExc_TypeError, "expected %s, got %s", py_typename(type), py_typename(self->type));
return false;
}
return true;
}

View File

@ -16,7 +16,11 @@ py_Ref py_newmodule(const char *name, const char *package){
py_Ref r0 = py_pushtmp();
py_Ref r1 = py_pushtmp();
*r0 = PyVar__fromobj(obj);
*r0 = (py_TValue){
.type = obj->type,
.is_ptr = true,
._obj = obj,
};
py_newstr(r1, name);
py_setdict(r0, __name__, r1);

View File

@ -17,7 +17,7 @@ py_Type pk_list__register() {
void py_newlist(py_Ref out) {
pk_VM* vm = pk_current_vm;
PyObject* obj = pk_ManagedHeap__gcnew(&vm->heap, tp_list, 0, sizeof(List));
List* userdata = PyObject__value(obj);
List* userdata = PyObject__userdata(obj);
c11_vector__ctor(userdata, sizeof(py_TValue));
out->type = tp_list;
out->is_ptr = true;
@ -65,3 +65,11 @@ void py_list__insert(py_Ref self, int i, const py_Ref val) {
List* userdata = py_touserdata(self);
c11_vector__insert(py_TValue, userdata, i, *val);
}
////////////////////////////////
bool _py_list__len__(int argc, py_Ref argv){
py_checkargc(1);
py_i64 res = py_list__len(py_arg(0));
py_newint(py_retval(), res);
return true;
}

68
src/public/py_str.c Normal file
View File

@ -0,0 +1,68 @@
#include "pocketpy/common/str.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
#include "pocketpy/objects/object.h"
#include "pocketpy/interpreter/vm.h"
py_Type pk_str__register() {
pk_VM* vm = pk_current_vm;
py_Type type = pk_VM__new_type(vm, "str", tp_object, NULL, false);
// no need to dtor because the memory is controlled by the object
return type;
}
py_Type pk_bytes__register() {
pk_VM* vm = pk_current_vm;
py_Type type = pk_VM__new_type(vm, "bytes", tp_object, NULL, false);
// no need to dtor because the memory is controlled by the object
return type;
}
void py_newstr(py_Ref out, const char* data) {
return py_newstrn(out, data, strlen(data));
}
void py_newstrn(py_Ref out, const char* data, int size) {
pk_ManagedHeap* heap = &pk_current_vm->heap;
int total_size = sizeof(c11_string) + size + 1;
PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_str, 0, total_size);
c11_string* ud = PyObject__userdata(obj);
c11_string__ctor2(ud, data, size);
out->type = tp_str;
out->is_ptr = true;
out->_obj = obj;
}
unsigned char* py_newbytes(py_Ref out, int size) {
pk_ManagedHeap* heap = &pk_current_vm->heap;
// 4 bytes size + data
PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_bytes, 0, sizeof(c11_bytes) + size);
c11_bytes* ud = PyObject__userdata(obj);
ud->size = size;
out->type = tp_bytes;
out->is_ptr = true;
out->_obj = obj;
return ud->data;
}
const char* py_tostr(const py_Ref self) {
assert(self->type == tp_str);
c11_string* ud = PyObject__userdata(self->_obj);
return ud->data;
}
const char* py_tostrn(const py_Ref self, int* size) {
assert(self->type == tp_str);
c11_string* ud = PyObject__userdata(self->_obj);
*size = ud->size;
return ud->data;
}
unsigned char* py_tobytes(const py_Ref self, int* size) {
assert(self->type == tp_bytes);
c11_bytes* ud = PyObject__userdata(self->_obj);
*size = ud->size;
return ud->data;
}

View File

@ -1,3 +1,5 @@
#include "pocketpy/common/str.h"
#include "pocketpy/common/vector.h"
#include "pocketpy/pocketpy.h"
#include "pocketpy/common/utils.h"
@ -37,35 +39,8 @@ void py_newellipsis(py_Ref out) {
out->is_ptr = false;
}
void py_newnull(py_Ref out) { out->type = 0; }
void py_newstr(py_Ref out, const char* data) {
return py_newstrn(out, data, strlen(data));
}
void py_newstrn(py_Ref out, const char* data, int size) {
pk_ManagedHeap* heap = &pk_current_vm->heap;
int total_size = sizeof(int) + size + 1;
PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_str, 0, total_size);
int* p = PyObject__value(obj);
*p = size;
out->type = tp_str;
out->is_ptr = true;
out->_obj = obj;
}
void py_newbytes(py_Ref out, const unsigned char* data, int size) {
pk_ManagedHeap* heap = &pk_current_vm->heap;
// 4 bytes size + data
PyObject* obj = pk_ManagedHeap__gcnew(heap, tp_bytes, 0, sizeof(int) + size);
int* psize = (int*)PyObject__value(obj);
*psize = size;
memcpy(psize + 1, data, size);
out->type = tp_bytes;
out->is_ptr = true;
out->_obj = obj;
}
void py_newfunction(py_Ref out, py_CFunction f, const char* sig) {
py_newfunction2(out, f, sig, BindType_FUNCTION, NULL, NULL);