mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 19:40:18 +00:00
improve chr
This commit is contained in:
parent
b320e8d9a3
commit
e25cc48463
@ -66,6 +66,7 @@ int c11__byte_index_to_unicode(const char* data, int n);
|
||||
bool c11__is_unicode_Lo_char(int c);
|
||||
int c11__u8_header(unsigned char c, bool suppress);
|
||||
int c11__u8_value(int u8bytes, const char* data);
|
||||
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]);
|
||||
|
||||
typedef enum IntParsingResult {
|
||||
IntParsing_SUCCESS,
|
||||
|
@ -316,6 +316,38 @@ int c11__u8_value(int u8bytes, const char* data) {
|
||||
return (int)value;
|
||||
}
|
||||
|
||||
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]) {
|
||||
int length = 0;
|
||||
|
||||
if(utf32_char <= 0x7F) {
|
||||
// 1-byte UTF-8
|
||||
utf8_output[0] = (char)utf32_char;
|
||||
length = 1;
|
||||
} else if(utf32_char <= 0x7FF) {
|
||||
// 2-byte UTF-8
|
||||
utf8_output[0] = (char)(0xC0 | ((utf32_char >> 6) & 0x1F));
|
||||
utf8_output[1] = (char)(0x80 | (utf32_char & 0x3F));
|
||||
length = 2;
|
||||
} else if(utf32_char <= 0xFFFF) {
|
||||
// 3-byte UTF-8
|
||||
utf8_output[0] = (char)(0xE0 | ((utf32_char >> 12) & 0x0F));
|
||||
utf8_output[1] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
|
||||
utf8_output[2] = (char)(0x80 | (utf32_char & 0x3F));
|
||||
length = 3;
|
||||
} else if(utf32_char <= 0x10FFFF) {
|
||||
// 4-byte UTF-8
|
||||
utf8_output[0] = (char)(0xF0 | ((utf32_char >> 18) & 0x07));
|
||||
utf8_output[1] = (char)(0x80 | ((utf32_char >> 12) & 0x3F));
|
||||
utf8_output[2] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
|
||||
utf8_output[3] = (char)(0x80 | (utf32_char & 0x3F));
|
||||
length = 4;
|
||||
} else {
|
||||
// Invalid UTF-32 character
|
||||
return -1;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
|
||||
*out = 0;
|
||||
|
||||
|
@ -74,7 +74,7 @@ void VM__ctor(VM* self) {
|
||||
|
||||
self->recursion_depth = 0;
|
||||
self->max_recursion_depth = 1000;
|
||||
|
||||
|
||||
self->is_curr_exc_handled = false;
|
||||
|
||||
self->ctx = NULL;
|
||||
@ -92,7 +92,7 @@ void VM__ctor(VM* self) {
|
||||
char* p = py_newstrn(&self->ascii_literals[i], 1);
|
||||
*p = i;
|
||||
}
|
||||
py_newstrn(&self->ascii_literals[128], 0);
|
||||
py_newstrn(&self->ascii_literals[128], 0); // empty string
|
||||
|
||||
// 0: unused
|
||||
void* placeholder = TypeList__emplace(&self->types);
|
||||
|
@ -449,9 +449,16 @@ static bool builtins_delattr(int argc, py_Ref argv) {
|
||||
static bool builtins_chr(int argc, py_Ref argv) {
|
||||
PY_CHECK_ARGC(1);
|
||||
PY_CHECK_ARG_TYPE(0, tp_int);
|
||||
py_i64 val = py_toint(py_arg(0));
|
||||
if(val < 0 || val > 128) { return ValueError("chr() arg not in range(128)"); }
|
||||
py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
|
||||
uint32_t val = py_toint(py_arg(0));
|
||||
if(val >= 0 && val < 128) {
|
||||
py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
|
||||
} else {
|
||||
// convert to utf-8
|
||||
char utf8[4];
|
||||
int len = c11__u32_to_u8(val, utf8);
|
||||
if(len == -1) return ValueError("invalid unicode code point: %d", val);
|
||||
py_newstrv(py_retval(), (c11_sv){utf8, len});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -191,6 +191,13 @@ assert (1 == '1') is False
|
||||
assert 1 == 1.0
|
||||
|
||||
assert chr(97) is 'a'
|
||||
assert ord('a') == 97
|
||||
|
||||
assert ord('🥕') == 0x1f955
|
||||
assert chr(0x1f955) == '🥕'
|
||||
|
||||
assert ord('测') == 27979
|
||||
assert chr(27979) == '测'
|
||||
|
||||
exit()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user