improve chr

This commit is contained in:
blueloveTH 2025-03-12 19:43:56 +08:00
parent b320e8d9a3
commit e25cc48463
5 changed files with 52 additions and 5 deletions

View File

@ -66,6 +66,7 @@ int c11__byte_index_to_unicode(const char* data, int n);
bool c11__is_unicode_Lo_char(int c);
int c11__u8_header(unsigned char c, bool suppress);
int c11__u8_value(int u8bytes, const char* data);
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]);
typedef enum IntParsingResult {
IntParsing_SUCCESS,

View File

@ -316,6 +316,38 @@ int c11__u8_value(int u8bytes, const char* data) {
return (int)value;
}
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]) {
int length = 0;
if(utf32_char <= 0x7F) {
// 1-byte UTF-8
utf8_output[0] = (char)utf32_char;
length = 1;
} else if(utf32_char <= 0x7FF) {
// 2-byte UTF-8
utf8_output[0] = (char)(0xC0 | ((utf32_char >> 6) & 0x1F));
utf8_output[1] = (char)(0x80 | (utf32_char & 0x3F));
length = 2;
} else if(utf32_char <= 0xFFFF) {
// 3-byte UTF-8
utf8_output[0] = (char)(0xE0 | ((utf32_char >> 12) & 0x0F));
utf8_output[1] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
utf8_output[2] = (char)(0x80 | (utf32_char & 0x3F));
length = 3;
} else if(utf32_char <= 0x10FFFF) {
// 4-byte UTF-8
utf8_output[0] = (char)(0xF0 | ((utf32_char >> 18) & 0x07));
utf8_output[1] = (char)(0x80 | ((utf32_char >> 12) & 0x3F));
utf8_output[2] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
utf8_output[3] = (char)(0x80 | (utf32_char & 0x3F));
length = 4;
} else {
// Invalid UTF-32 character
return -1;
}
return length;
}
IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
*out = 0;

View File

@ -74,7 +74,7 @@ void VM__ctor(VM* self) {
self->recursion_depth = 0;
self->max_recursion_depth = 1000;
self->is_curr_exc_handled = false;
self->ctx = NULL;
@ -92,7 +92,7 @@ void VM__ctor(VM* self) {
char* p = py_newstrn(&self->ascii_literals[i], 1);
*p = i;
}
py_newstrn(&self->ascii_literals[128], 0);
py_newstrn(&self->ascii_literals[128], 0); // empty string
// 0: unused
void* placeholder = TypeList__emplace(&self->types);

View File

@ -449,9 +449,16 @@ static bool builtins_delattr(int argc, py_Ref argv) {
static bool builtins_chr(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
PY_CHECK_ARG_TYPE(0, tp_int);
py_i64 val = py_toint(py_arg(0));
if(val < 0 || val > 128) { return ValueError("chr() arg not in range(128)"); }
py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
uint32_t val = py_toint(py_arg(0));
if(val >= 0 && val < 128) {
py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
} else {
// convert to utf-8
char utf8[4];
int len = c11__u32_to_u8(val, utf8);
if(len == -1) return ValueError("invalid unicode code point: %d", val);
py_newstrv(py_retval(), (c11_sv){utf8, len});
}
return true;
}

View File

@ -191,6 +191,13 @@ assert (1 == '1') is False
assert 1 == 1.0
assert chr(97) is 'a'
assert ord('a') == 97
assert ord('🥕') == 0x1f955
assert chr(0x1f955) == '🥕'
assert ord('') == 27979
assert chr(27979) == ''
exit()