mirror of
https://github.com/pocketpy/pocketpy
synced 2025-10-20 11:30:18 +00:00
improve chr
This commit is contained in:
parent
b320e8d9a3
commit
e25cc48463
@ -66,6 +66,7 @@ int c11__byte_index_to_unicode(const char* data, int n);
|
|||||||
bool c11__is_unicode_Lo_char(int c);
|
bool c11__is_unicode_Lo_char(int c);
|
||||||
int c11__u8_header(unsigned char c, bool suppress);
|
int c11__u8_header(unsigned char c, bool suppress);
|
||||||
int c11__u8_value(int u8bytes, const char* data);
|
int c11__u8_value(int u8bytes, const char* data);
|
||||||
|
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]);
|
||||||
|
|
||||||
typedef enum IntParsingResult {
|
typedef enum IntParsingResult {
|
||||||
IntParsing_SUCCESS,
|
IntParsing_SUCCESS,
|
||||||
|
@ -316,6 +316,38 @@ int c11__u8_value(int u8bytes, const char* data) {
|
|||||||
return (int)value;
|
return (int)value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]) {
|
||||||
|
int length = 0;
|
||||||
|
|
||||||
|
if(utf32_char <= 0x7F) {
|
||||||
|
// 1-byte UTF-8
|
||||||
|
utf8_output[0] = (char)utf32_char;
|
||||||
|
length = 1;
|
||||||
|
} else if(utf32_char <= 0x7FF) {
|
||||||
|
// 2-byte UTF-8
|
||||||
|
utf8_output[0] = (char)(0xC0 | ((utf32_char >> 6) & 0x1F));
|
||||||
|
utf8_output[1] = (char)(0x80 | (utf32_char & 0x3F));
|
||||||
|
length = 2;
|
||||||
|
} else if(utf32_char <= 0xFFFF) {
|
||||||
|
// 3-byte UTF-8
|
||||||
|
utf8_output[0] = (char)(0xE0 | ((utf32_char >> 12) & 0x0F));
|
||||||
|
utf8_output[1] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
|
||||||
|
utf8_output[2] = (char)(0x80 | (utf32_char & 0x3F));
|
||||||
|
length = 3;
|
||||||
|
} else if(utf32_char <= 0x10FFFF) {
|
||||||
|
// 4-byte UTF-8
|
||||||
|
utf8_output[0] = (char)(0xF0 | ((utf32_char >> 18) & 0x07));
|
||||||
|
utf8_output[1] = (char)(0x80 | ((utf32_char >> 12) & 0x3F));
|
||||||
|
utf8_output[2] = (char)(0x80 | ((utf32_char >> 6) & 0x3F));
|
||||||
|
utf8_output[3] = (char)(0x80 | (utf32_char & 0x3F));
|
||||||
|
length = 4;
|
||||||
|
} else {
|
||||||
|
// Invalid UTF-32 character
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
|
IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
|
||||||
*out = 0;
|
*out = 0;
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ void VM__ctor(VM* self) {
|
|||||||
char* p = py_newstrn(&self->ascii_literals[i], 1);
|
char* p = py_newstrn(&self->ascii_literals[i], 1);
|
||||||
*p = i;
|
*p = i;
|
||||||
}
|
}
|
||||||
py_newstrn(&self->ascii_literals[128], 0);
|
py_newstrn(&self->ascii_literals[128], 0); // empty string
|
||||||
|
|
||||||
// 0: unused
|
// 0: unused
|
||||||
void* placeholder = TypeList__emplace(&self->types);
|
void* placeholder = TypeList__emplace(&self->types);
|
||||||
|
@ -449,9 +449,16 @@ static bool builtins_delattr(int argc, py_Ref argv) {
|
|||||||
static bool builtins_chr(int argc, py_Ref argv) {
|
static bool builtins_chr(int argc, py_Ref argv) {
|
||||||
PY_CHECK_ARGC(1);
|
PY_CHECK_ARGC(1);
|
||||||
PY_CHECK_ARG_TYPE(0, tp_int);
|
PY_CHECK_ARG_TYPE(0, tp_int);
|
||||||
py_i64 val = py_toint(py_arg(0));
|
uint32_t val = py_toint(py_arg(0));
|
||||||
if(val < 0 || val > 128) { return ValueError("chr() arg not in range(128)"); }
|
if(val >= 0 && val < 128) {
|
||||||
py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
|
py_assign(py_retval(), &pk_current_vm->ascii_literals[val]);
|
||||||
|
} else {
|
||||||
|
// convert to utf-8
|
||||||
|
char utf8[4];
|
||||||
|
int len = c11__u32_to_u8(val, utf8);
|
||||||
|
if(len == -1) return ValueError("invalid unicode code point: %d", val);
|
||||||
|
py_newstrv(py_retval(), (c11_sv){utf8, len});
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -191,6 +191,13 @@ assert (1 == '1') is False
|
|||||||
assert 1 == 1.0
|
assert 1 == 1.0
|
||||||
|
|
||||||
assert chr(97) is 'a'
|
assert chr(97) is 'a'
|
||||||
|
assert ord('a') == 97
|
||||||
|
|
||||||
|
assert ord('🥕') == 0x1f955
|
||||||
|
assert chr(0x1f955) == '🥕'
|
||||||
|
|
||||||
|
assert ord('测') == 27979
|
||||||
|
assert chr(27979) == '测'
|
||||||
|
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user