add unicodedata module

This commit is contained in:
blueloveTH 2025-05-06 19:23:24 +08:00
parent 0cb3684fa6
commit 3879903d73
9 changed files with 1670 additions and 20 deletions

View File

@ -0,0 +1,15 @@
---
icon: package
label: unicodedata
---
### `unicodedata.east_asian_width(char: str) -> str`
Returns the East Asian width of a Unicode character. The width is one of the following values:
- `F`: Fullwidth
- `H`: Halfwidth
- `N`: Neutral
- `Na`: Narrow
- `W`: Wide
- `A`: Ambiguous

View File

@ -21,6 +21,12 @@ typedef struct c11_bytes {
unsigned char data[]; // flexible array member unsigned char data[]; // flexible array member
} c11_bytes; } c11_bytes;
typedef struct {
int start;
int end;
char data[4];
} c11_u32_range;
bool c11_bytes__eq(c11_bytes* self, c11_bytes* other); bool c11_bytes__eq(c11_bytes* self, c11_bytes* other);
int c11_sv__cmp(c11_sv self, c11_sv other); int c11_sv__cmp(c11_sv self, c11_sv other);
@ -66,6 +72,7 @@ int c11__unicode_index_to_byte(const char* data, int i);
int c11__byte_index_to_unicode(const char* data, int n); int c11__byte_index_to_unicode(const char* data, int n);
bool c11__is_unicode_Lo_char(int c); bool c11__is_unicode_Lo_char(int c);
const char* c11__search_u32_ranges(int c, const c11_u32_range* p, int n_ranges);
int c11__u8_header(unsigned char c, bool suppress); int c11__u8_header(unsigned char c, bool suppress);
int c11__u8_value(int u8bytes, const char* data); int c11__u8_value(int u8bytes, const char* data);
int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]); int c11__u32_to_u8(uint32_t utf32_char, char utf8_output[4]);

View File

@ -17,6 +17,7 @@ void pk__add_module_inspect();
void pk__add_module_pickle(); void pk__add_module_pickle();
void pk__add_module_base64(); void pk__add_module_base64();
void pk__add_module_importlib(); void pk__add_module_importlib();
void pk__add_module_unicodedata();
void pk__add_module_vmath(); void pk__add_module_vmath();
void pk__add_module_array2d(); void pk__add_module_array2d();

View File

@ -0,0 +1,46 @@
import unicodedata
from tqdm import trange
from typing import Literal
info = []
for i in trange(0x110000):
char = chr(i)
category = unicodedata.category(char)
east_asian_width = unicodedata.east_asian_width(char)
info.append((i, category, east_asian_width))
def merge(index: Literal[1, 2], filter):
# index = 1, category
# index = 2, east_asian_width
result: list[tuple[int, int, str]] = []
last_value = None
last_start = None
for i in range(len(info)):
value = info[i][index]
if value != last_value:
if last_value is not None:
result.append((last_start, i - 1, last_value))
last_value = value
last_start = i
if last_value is not None:
result.append((last_start, len(info) - 1, last_value))
return [x for x in result if filter(x[2])]
df_category = merge(1, lambda x: x == 'Lo')
df_east_asian_width = merge(2, lambda x: x != 'N')
def to_c11(ranges, name, with_value=True):
with open(f'{name}.c', 'wt', encoding='utf-8', newline='\n') as f:
f.write(f'const static c11_u32_range {name}[] = {{\n')
for start, end, value in ranges:
if with_value:
f.write(f' {{ {start}, {end}, "{value}\\0" }},\n')
else:
f.write(f' {{ {start}, {end} }},\n')
f.write(f'}};\n')
to_c11(df_category, 'kLoRanges', with_value=False)
to_c11(df_east_asian_width, 'kEastAsianWidthRanges', with_value=True)

View File

@ -268,23 +268,6 @@ bool c11__sveq2(c11_sv a, const char* b) {
return memcmp(a.data, b, size) == 0; return memcmp(a.data, b, size) == 0;
} }
// clang-format off
static const int kLoRangeA[] = {170,186,443,448,660,1488,1519,1568,1601,1646,1649,1749,1774,1786,1791,1808,1810,1869,1969,1994,2048,2112,2144,2208,2230,2308,2365,2384,2392,2418,2437,2447,2451,2474,2482,2486,2493,2510,2524,2527,2544,2556,2565,2575,2579,2602,2610,2613,2616,2649,2654,2674,2693,2703,2707,2730,2738,2741,2749,2768,2784,2809,2821,2831,2835,2858,2866,2869,2877,2908,2911,2929,2947,2949,2958,2962,2969,2972,2974,2979,2984,2990,3024,3077,3086,3090,3114,3133,3160,3168,3200,3205,3214,3218,3242,3253,3261,3294,3296,3313,3333,3342,3346,3389,3406,3412,3423,3450,3461,3482,3507,3517,3520,3585,3634,3648,3713,3716,3718,3724,3749,3751,3762,3773,3776,3804,3840,3904,3913,3976,4096,4159,4176,4186,4193,4197,4206,4213,4238,4352,4682,4688,4696,4698,4704,4746,4752,4786,4792,4800,4802,4808,4824,4882,4888,4992,5121,5743,5761,5792,5873,5888,5902,5920,5952,5984,5998,6016,6108,6176,6212,6272,6279,6314,6320,6400,6480,6512,6528,6576,6656,6688,6917,6981,7043,7086,7098,7168,7245,7258,7401,7406,7413,7418,8501,11568,11648,11680,11688,11696,11704,11712,11720,11728,11736,12294,12348,12353,12447,12449,12543,12549,12593,12704,12784,13312,19968,40960,40982,42192,42240,42512,42538,42606,42656,42895,42999,43003,43011,43015,43020,43072,43138,43250,43259,43261,43274,43312,43360,43396,43488,43495,43514,43520,43584,43588,43616,43633,43642,43646,43697,43701,43705,43712,43714,43739,43744,43762,43777,43785,43793,43808,43816,43968,44032,55216,55243,63744,64112,64285,64287,64298,64312,64318,64320,64323,64326,64467,64848,64914,65008,65136,65142,65382,65393,65440,65474,65482,65490,65498,65536,65549,65576,65596,65599,65616,65664,66176,66208,66304,66349,66370,66384,66432,66464,66504,66640,66816,66864,67072,67392,67424,67584,67592,67594,67639,67644,67647,67680,67712,67808,67828,67840,67872,67968,68030,68096,68112,68117,68121,68192,68224,68288,68297,68352,68416,68448,68480,68608,68864,69376,69415,69424,69600,69635,69763,69840,69891,69956,69968,70006,70019,70081,70106,70108,70144,70163,70272,70280,70282,70287,70303,70320,70405,70415,70419,70442,70450,70453,70461,70480,70493,70656,70727,70751,70784,70852,70855,71040,71128,71168,71236,71296,71352,71424,71680,71935,72096,72106,72161,72163,72192,72203,72250,72272,72284,72349,72384,72704,72714,72768,72818,72960,72968,72971,73030,73056,73063,73066,73112,73440,73728,74880,77824,82944,92160,92736,92880,92928,93027,93053,93952,94032,94208,100352,110592,110928,110948,110960,113664,113776,113792,113808,123136,123214,123584,124928,126464,126469,126497,126500,126503,126505,126516,126521,126523,126530,126535,126537,126539,126541,126545,126548,126551,126553,126555,126557,126559,126561,126564,126567,126572,126580,126585,126590,126592,126603,126625,126629,126635,131072,173824,177984,178208,183984,194560};
static const int kLoRangeB[] = {170,186,443,451,660,1514,1522,1599,1610,1647,1747,1749,1775,1788,1791,1808,1839,1957,1969,2026,2069,2136,2154,2228,2237,2361,2365,2384,2401,2432,2444,2448,2472,2480,2482,2489,2493,2510,2525,2529,2545,2556,2570,2576,2600,2608,2611,2614,2617,2652,2654,2676,2701,2705,2728,2736,2739,2745,2749,2768,2785,2809,2828,2832,2856,2864,2867,2873,2877,2909,2913,2929,2947,2954,2960,2965,2970,2972,2975,2980,2986,3001,3024,3084,3088,3112,3129,3133,3162,3169,3200,3212,3216,3240,3251,3257,3261,3294,3297,3314,3340,3344,3386,3389,3406,3414,3425,3455,3478,3505,3515,3517,3526,3632,3635,3653,3714,3716,3722,3747,3749,3760,3763,3773,3780,3807,3840,3911,3948,3980,4138,4159,4181,4189,4193,4198,4208,4225,4238,4680,4685,4694,4696,4701,4744,4749,4784,4789,4798,4800,4805,4822,4880,4885,4954,5007,5740,5759,5786,5866,5880,5900,5905,5937,5969,5996,6000,6067,6108,6210,6264,6276,6312,6314,6389,6430,6509,6516,6571,6601,6678,6740,6963,6987,7072,7087,7141,7203,7247,7287,7404,7411,7414,7418,8504,11623,11670,11686,11694,11702,11710,11718,11726,11734,11742,12294,12348,12438,12447,12538,12543,12591,12686,12730,12799,19893,40943,40980,42124,42231,42507,42527,42539,42606,42725,42895,42999,43009,43013,43018,43042,43123,43187,43255,43259,43262,43301,43334,43388,43442,43492,43503,43518,43560,43586,43595,43631,43638,43642,43695,43697,43702,43709,43712,43714,43740,43754,43762,43782,43790,43798,43814,43822,44002,55203,55238,55291,64109,64217,64285,64296,64310,64316,64318,64321,64324,64433,64829,64911,64967,65019,65140,65276,65391,65437,65470,65479,65487,65495,65500,65547,65574,65594,65597,65613,65629,65786,66204,66256,66335,66368,66377,66421,66461,66499,66511,66717,66855,66915,67382,67413,67431,67589,67592,67637,67640,67644,67669,67702,67742,67826,67829,67861,67897,68023,68031,68096,68115,68119,68149,68220,68252,68295,68324,68405,68437,68466,68497,68680,68899,69404,69415,69445,69622,69687,69807,69864,69926,69956,70002,70006,70066,70084,70106,70108,70161,70187,70278,70280,70285,70301,70312,70366,70412,70416,70440,70448,70451,70457,70461,70480,70497,70708,70730,70751,70831,70853,70855,71086,71131,71215,71236,71338,71352,71450,71723,71935,72103,72144,72161,72163,72192,72242,72250,72272,72329,72349,72440,72712,72750,72768,72847,72966,72969,73008,73030,73061,73064,73097,73112,73458,74649,75075,78894,83526,92728,92766,92909,92975,93047,93071,94026,94032,100343,101106,110878,110930,110951,111355,113770,113788,113800,113817,123180,123214,123627,125124,126467,126495,126498,126500,126503,126514,126519,126521,126523,126530,126535,126537,126539,126543,126546,126548,126551,126553,126555,126557,126559,126562,126564,126570,126578,126583,126588,126590,126601,126619,126627,126633,126651,173782,177972,178205,183969,191456,195101};
// clang-format on
bool c11__is_unicode_Lo_char(int c) {
if(c == 0x1f955) return true;
int index;
c11__lower_bound(const int, kLoRangeA, 476, c, c11__less, &index);
if(index == 476) return false;
if(c == kLoRangeA[index]) return true;
index -= 1;
if(index < 0) return false;
return c >= kLoRangeA[index] && c <= kLoRangeB[index];
}
int c11__u8_header(unsigned char c, bool suppress) { int c11__u8_header(unsigned char c, bool suppress) {
if((c & 0b10000000) == 0) return 1; if((c & 0b10000000) == 0) return 1;
if((c & 0b11100000) == 0b11000000) return 2; if((c & 0b11100000) == 0b11000000) return 2;
@ -452,3 +435,532 @@ IntParsingResult c11__parse_uint(c11_sv text, int64_t* out, int base) {
} }
return IntParsing_FAILURE; return IntParsing_FAILURE;
} }
const char* c11__search_u32_ranges(int c, const c11_u32_range* p, int n_ranges) {
int lbound = 0;
int ubound = n_ranges - 1;
if(c < p[0].start || c > p[ubound].end) return NULL;
while(ubound >= lbound) {
int mid = (lbound + ubound) / 2;
if(c > p[mid].end) {
lbound = mid + 1;
} else if(c < p[mid].start) {
ubound = mid - 1;
} else {
return p[mid].data;
}
}
return NULL;
}
const static c11_u32_range kLoRanges[] = {
{170, 170 },
{186, 186 },
{443, 443 },
{448, 451 },
{660, 660 },
{1488, 1514 },
{1519, 1522 },
{1568, 1599 },
{1601, 1610 },
{1646, 1647 },
{1649, 1747 },
{1749, 1749 },
{1774, 1775 },
{1786, 1788 },
{1791, 1791 },
{1808, 1808 },
{1810, 1839 },
{1869, 1957 },
{1969, 1969 },
{1994, 2026 },
{2048, 2069 },
{2112, 2136 },
{2144, 2154 },
{2160, 2183 },
{2185, 2190 },
{2208, 2248 },
{2308, 2361 },
{2365, 2365 },
{2384, 2384 },
{2392, 2401 },
{2418, 2432 },
{2437, 2444 },
{2447, 2448 },
{2451, 2472 },
{2474, 2480 },
{2482, 2482 },
{2486, 2489 },
{2493, 2493 },
{2510, 2510 },
{2524, 2525 },
{2527, 2529 },
{2544, 2545 },
{2556, 2556 },
{2565, 2570 },
{2575, 2576 },
{2579, 2600 },
{2602, 2608 },
{2610, 2611 },
{2613, 2614 },
{2616, 2617 },
{2649, 2652 },
{2654, 2654 },
{2674, 2676 },
{2693, 2701 },
{2703, 2705 },
{2707, 2728 },
{2730, 2736 },
{2738, 2739 },
{2741, 2745 },
{2749, 2749 },
{2768, 2768 },
{2784, 2785 },
{2809, 2809 },
{2821, 2828 },
{2831, 2832 },
{2835, 2856 },
{2858, 2864 },
{2866, 2867 },
{2869, 2873 },
{2877, 2877 },
{2908, 2909 },
{2911, 2913 },
{2929, 2929 },
{2947, 2947 },
{2949, 2954 },
{2958, 2960 },
{2962, 2965 },
{2969, 2970 },
{2972, 2972 },
{2974, 2975 },
{2979, 2980 },
{2984, 2986 },
{2990, 3001 },
{3024, 3024 },
{3077, 3084 },
{3086, 3088 },
{3090, 3112 },
{3114, 3129 },
{3133, 3133 },
{3160, 3162 },
{3165, 3165 },
{3168, 3169 },
{3200, 3200 },
{3205, 3212 },
{3214, 3216 },
{3218, 3240 },
{3242, 3251 },
{3253, 3257 },
{3261, 3261 },
{3293, 3294 },
{3296, 3297 },
{3313, 3314 },
{3332, 3340 },
{3342, 3344 },
{3346, 3386 },
{3389, 3389 },
{3406, 3406 },
{3412, 3414 },
{3423, 3425 },
{3450, 3455 },
{3461, 3478 },
{3482, 3505 },
{3507, 3515 },
{3517, 3517 },
{3520, 3526 },
{3585, 3632 },
{3634, 3635 },
{3648, 3653 },
{3713, 3714 },
{3716, 3716 },
{3718, 3722 },
{3724, 3747 },
{3749, 3749 },
{3751, 3760 },
{3762, 3763 },
{3773, 3773 },
{3776, 3780 },
{3804, 3807 },
{3840, 3840 },
{3904, 3911 },
{3913, 3948 },
{3976, 3980 },
{4096, 4138 },
{4159, 4159 },
{4176, 4181 },
{4186, 4189 },
{4193, 4193 },
{4197, 4198 },
{4206, 4208 },
{4213, 4225 },
{4238, 4238 },
{4352, 4680 },
{4682, 4685 },
{4688, 4694 },
{4696, 4696 },
{4698, 4701 },
{4704, 4744 },
{4746, 4749 },
{4752, 4784 },
{4786, 4789 },
{4792, 4798 },
{4800, 4800 },
{4802, 4805 },
{4808, 4822 },
{4824, 4880 },
{4882, 4885 },
{4888, 4954 },
{4992, 5007 },
{5121, 5740 },
{5743, 5759 },
{5761, 5786 },
{5792, 5866 },
{5873, 5880 },
{5888, 5905 },
{5919, 5937 },
{5952, 5969 },
{5984, 5996 },
{5998, 6000 },
{6016, 6067 },
{6108, 6108 },
{6176, 6210 },
{6212, 6264 },
{6272, 6276 },
{6279, 6312 },
{6314, 6314 },
{6320, 6389 },
{6400, 6430 },
{6480, 6509 },
{6512, 6516 },
{6528, 6571 },
{6576, 6601 },
{6656, 6678 },
{6688, 6740 },
{6917, 6963 },
{6981, 6988 },
{7043, 7072 },
{7086, 7087 },
{7098, 7141 },
{7168, 7203 },
{7245, 7247 },
{7258, 7287 },
{7401, 7404 },
{7406, 7411 },
{7413, 7414 },
{7418, 7418 },
{8501, 8504 },
{11568, 11623 },
{11648, 11670 },
{11680, 11686 },
{11688, 11694 },
{11696, 11702 },
{11704, 11710 },
{11712, 11718 },
{11720, 11726 },
{11728, 11734 },
{11736, 11742 },
{12294, 12294 },
{12348, 12348 },
{12353, 12438 },
{12447, 12447 },
{12449, 12538 },
{12543, 12543 },
{12549, 12591 },
{12593, 12686 },
{12704, 12735 },
{12784, 12799 },
{13312, 19903 },
{19968, 40980 },
{40982, 42124 },
{42192, 42231 },
{42240, 42507 },
{42512, 42527 },
{42538, 42539 },
{42606, 42606 },
{42656, 42725 },
{42895, 42895 },
{42999, 42999 },
{43003, 43009 },
{43011, 43013 },
{43015, 43018 },
{43020, 43042 },
{43072, 43123 },
{43138, 43187 },
{43250, 43255 },
{43259, 43259 },
{43261, 43262 },
{43274, 43301 },
{43312, 43334 },
{43360, 43388 },
{43396, 43442 },
{43488, 43492 },
{43495, 43503 },
{43514, 43518 },
{43520, 43560 },
{43584, 43586 },
{43588, 43595 },
{43616, 43631 },
{43633, 43638 },
{43642, 43642 },
{43646, 43695 },
{43697, 43697 },
{43701, 43702 },
{43705, 43709 },
{43712, 43712 },
{43714, 43714 },
{43739, 43740 },
{43744, 43754 },
{43762, 43762 },
{43777, 43782 },
{43785, 43790 },
{43793, 43798 },
{43808, 43814 },
{43816, 43822 },
{43968, 44002 },
{44032, 55203 },
{55216, 55238 },
{55243, 55291 },
{63744, 64109 },
{64112, 64217 },
{64285, 64285 },
{64287, 64296 },
{64298, 64310 },
{64312, 64316 },
{64318, 64318 },
{64320, 64321 },
{64323, 64324 },
{64326, 64433 },
{64467, 64829 },
{64848, 64911 },
{64914, 64967 },
{65008, 65019 },
{65136, 65140 },
{65142, 65276 },
{65382, 65391 },
{65393, 65437 },
{65440, 65470 },
{65474, 65479 },
{65482, 65487 },
{65490, 65495 },
{65498, 65500 },
{65536, 65547 },
{65549, 65574 },
{65576, 65594 },
{65596, 65597 },
{65599, 65613 },
{65616, 65629 },
{65664, 65786 },
{66176, 66204 },
{66208, 66256 },
{66304, 66335 },
{66349, 66368 },
{66370, 66377 },
{66384, 66421 },
{66432, 66461 },
{66464, 66499 },
{66504, 66511 },
{66640, 66717 },
{66816, 66855 },
{66864, 66915 },
{67072, 67382 },
{67392, 67413 },
{67424, 67431 },
{67584, 67589 },
{67592, 67592 },
{67594, 67637 },
{67639, 67640 },
{67644, 67644 },
{67647, 67669 },
{67680, 67702 },
{67712, 67742 },
{67808, 67826 },
{67828, 67829 },
{67840, 67861 },
{67872, 67897 },
{67968, 68023 },
{68030, 68031 },
{68096, 68096 },
{68112, 68115 },
{68117, 68119 },
{68121, 68149 },
{68192, 68220 },
{68224, 68252 },
{68288, 68295 },
{68297, 68324 },
{68352, 68405 },
{68416, 68437 },
{68448, 68466 },
{68480, 68497 },
{68608, 68680 },
{68864, 68899 },
{69248, 69289 },
{69296, 69297 },
{69376, 69404 },
{69415, 69415 },
{69424, 69445 },
{69488, 69505 },
{69552, 69572 },
{69600, 69622 },
{69635, 69687 },
{69745, 69746 },
{69749, 69749 },
{69763, 69807 },
{69840, 69864 },
{69891, 69926 },
{69956, 69956 },
{69959, 69959 },
{69968, 70002 },
{70006, 70006 },
{70019, 70066 },
{70081, 70084 },
{70106, 70106 },
{70108, 70108 },
{70144, 70161 },
{70163, 70187 },
{70272, 70278 },
{70280, 70280 },
{70282, 70285 },
{70287, 70301 },
{70303, 70312 },
{70320, 70366 },
{70405, 70412 },
{70415, 70416 },
{70419, 70440 },
{70442, 70448 },
{70450, 70451 },
{70453, 70457 },
{70461, 70461 },
{70480, 70480 },
{70493, 70497 },
{70656, 70708 },
{70727, 70730 },
{70751, 70753 },
{70784, 70831 },
{70852, 70853 },
{70855, 70855 },
{71040, 71086 },
{71128, 71131 },
{71168, 71215 },
{71236, 71236 },
{71296, 71338 },
{71352, 71352 },
{71424, 71450 },
{71488, 71494 },
{71680, 71723 },
{71935, 71942 },
{71945, 71945 },
{71948, 71955 },
{71957, 71958 },
{71960, 71983 },
{71999, 71999 },
{72001, 72001 },
{72096, 72103 },
{72106, 72144 },
{72161, 72161 },
{72163, 72163 },
{72192, 72192 },
{72203, 72242 },
{72250, 72250 },
{72272, 72272 },
{72284, 72329 },
{72349, 72349 },
{72368, 72440 },
{72704, 72712 },
{72714, 72750 },
{72768, 72768 },
{72818, 72847 },
{72960, 72966 },
{72968, 72969 },
{72971, 73008 },
{73030, 73030 },
{73056, 73061 },
{73063, 73064 },
{73066, 73097 },
{73112, 73112 },
{73440, 73458 },
{73648, 73648 },
{73728, 74649 },
{74880, 75075 },
{77712, 77808 },
{77824, 78894 },
{82944, 83526 },
{92160, 92728 },
{92736, 92766 },
{92784, 92862 },
{92880, 92909 },
{92928, 92975 },
{93027, 93047 },
{93053, 93071 },
{93952, 94026 },
{94032, 94032 },
{94208, 100343},
{100352, 101589},
{101632, 101640},
{110592, 110882},
{110928, 110930},
{110948, 110951},
{110960, 111355},
{113664, 113770},
{113776, 113788},
{113792, 113800},
{113808, 113817},
{122634, 122634},
{123136, 123180},
{123214, 123214},
{123536, 123565},
{123584, 123627},
{124896, 124902},
{124904, 124907},
{124909, 124910},
{124912, 124926},
{124928, 125124},
{126464, 126467},
{126469, 126495},
{126497, 126498},
{126500, 126500},
{126503, 126503},
{126505, 126514},
{126516, 126519},
{126521, 126521},
{126523, 126523},
{126530, 126530},
{126535, 126535},
{126537, 126537},
{126539, 126539},
{126541, 126543},
{126545, 126546},
{126548, 126548},
{126551, 126551},
{126553, 126553},
{126555, 126555},
{126557, 126557},
{126559, 126559},
{126561, 126562},
{126564, 126564},
{126567, 126570},
{126572, 126578},
{126580, 126583},
{126585, 126588},
{126590, 126590},
{126592, 126601},
{126603, 126619},
{126625, 126627},
{126629, 126633},
{126635, 126651},
{131072, 173791},
{173824, 177976},
{177984, 178205},
{178208, 183969},
{183984, 191456},
{194560, 195101},
{196608, 201546},
};
bool c11__is_unicode_Lo_char(int c) {
if(c == 0x1f955) return true;
const char* data =
c11__search_u32_ranges(c, kLoRanges, sizeof(kLoRanges) / sizeof(c11_u32_range));
return data != NULL;
}

View File

@ -231,6 +231,7 @@ void VM__ctor(VM* self) {
pk__add_module_pickle(); pk__add_module_pickle();
pk__add_module_base64(); pk__add_module_base64();
pk__add_module_importlib(); pk__add_module_importlib();
pk__add_module_unicodedata();
pk__add_module_conio(); pk__add_module_conio();
pk__add_module_lz4(); // optional pk__add_module_lz4(); // optional

1051
src/modules/unicodedata.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -472,7 +472,7 @@ static bool builtins_ord(int argc, py_Ref argv) {
c11_sv__u8_length(sv)); c11_sv__u8_length(sv));
} }
int u8bytes = c11__u8_header(sv.data[0], true); int u8bytes = c11__u8_header(sv.data[0], true);
if(u8bytes == 0) { return ValueError("invalid char: %c", sv.data[0]); } if(u8bytes == 0) return ValueError("invalid utf-8 char: %c", sv.data[0]);
int value = c11__u8_value(u8bytes, sv.data); int value = c11__u8_value(u8bytes, sv.data);
py_newint(py_retval(), value); py_newint(py_retval(), value);
return true; return true;

17
tests/83_unicodedata.py Normal file
View File

@ -0,0 +1,17 @@
from unicodedata import east_asian_width
# full width
assert east_asian_width("") == "F"
# half width
assert east_asian_width("") == "H"
# narrow
assert east_asian_width("a") == "Na"
# wide
assert east_asian_width("") == "W"
assert east_asian_width("🥕") == "W"
assert east_asian_width("") == "W"
# ambiguous
assert east_asian_width("°") == "A"
# neutral
assert east_asian_width("\n") == "N"