diff --git a/include/typings/picoterm.pyi b/include/typings/picoterm.pyi index 24b4a831..a29e4299 100644 --- a/include/typings/picoterm.pyi +++ b/include/typings/picoterm.pyi @@ -3,3 +3,6 @@ def enable_full_buffering_mode() -> None: def split_ansi_escaped_string(s: str) -> list[str]: """Perform split on ANSI escaped string.""" + +def wcwidth(c: int) -> int: ... +def wcswidth(s: str) -> int: ... diff --git a/src/modules/picoterm.c b/src/modules/picoterm.c index 54cfc4f5..2024058c 100644 --- a/src/modules/picoterm.c +++ b/src/modules/picoterm.c @@ -1,8 +1,20 @@ +#include "pocketpy/common/str.h" #include "pocketpy/pocketpy.h" #include "pocketpy/objects/base.h" -#include #include "pocketpy/common/vector.h" +#include + +const char* c11__u32_east_asian_width(int c); + +static int c11__wcwidth(int c) { + if(c >= 32 && c < 0x7f) return 1; + if(c < 32) return 0; + const char* w = c11__u32_east_asian_width(c); + bool fullwidth = (w[0] == 'F' && w[1] == '\0') || (w[0] == 'W' && w[1] == '\0'); + return fullwidth ? 2 : 1; +} + static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) { PY_CHECK_ARGC(0); static char buf[1024 * 32]; // 32KB @@ -39,11 +51,51 @@ static bool picoterm_split_ansi_escaped_string(int argc, py_Ref argv) { return true; } +static bool picoterm_wcwidth(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + PY_CHECK_ARG_TYPE(0, tp_int); + int c = py_toint(py_arg(0)); + py_newint(py_retval(), c11__wcwidth(c)); + return true; +} + +static bool picoterm_wcswidth(int argc, py_Ref argv) { + PY_CHECK_ARGC(1); + PY_CHECK_ARG_TYPE(0, tp_str); + c11_sv sv = py_tosv(py_arg(0)); + c11_vector /*T=AnsiEscapedToken*/ tokens; + c11_vector__ctor(&tokens, sizeof(AnsiEscapedToken)); + if(!split_ansi_escaped_string(sv, &tokens)) { + c11_vector__dtor(&tokens); + return ValueError("invalid ANSI escape sequences"); + } + int total_width = 0; + for(int i = 0; i < tokens.length; i++) { + AnsiEscapedToken* p_token = c11__at(AnsiEscapedToken, &tokens, i); + if(p_token->suffix != '\0') continue; + const char* curr_char = p_token->text.data; + const char* end_char = p_token->text.data + p_token->text.size; + while(curr_char < end_char) { + unsigned char c = *curr_char; + int u8bytes = c11__u8_header(c, true); + if(u8bytes == 0) return ValueError("invalid utf-8 header: %d", (int)c); + int value = c11__u8_value(u8bytes, curr_char); + total_width += c11__wcwidth(value); + curr_char += u8bytes; + } + } + c11_vector__dtor(&tokens); + py_newint(py_retval(), total_width); + return true; +} + void pk__add_module_picoterm() { py_Ref mod = py_newmodule("picoterm"); py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode); py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string); + py_bindfunc(mod, "wcwidth", picoterm_wcwidth); + py_bindfunc(mod, "wcswidth", picoterm_wcswidth); } static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) { diff --git a/src/modules/unicodedata.c b/src/modules/unicodedata.c index 7cf405f0..9392e17c 100644 --- a/src/modules/unicodedata.c +++ b/src/modules/unicodedata.c @@ -1022,7 +1022,7 @@ const static c11_u32_range kEastAsianWidthRanges[] = { }; // clang-format on -const static char* c11__u32_east_asian_width(int c) { +const char* c11__u32_east_asian_width(int c) { const char* data = c11__search_u32_ranges(c, kEastAsianWidthRanges, diff --git a/tests/92_picoterm.py b/tests/92_picoterm.py index 1e648d3f..0eaaef91 100644 --- a/tests/92_picoterm.py +++ b/tests/92_picoterm.py @@ -20,4 +20,11 @@ cpnts = picoterm.split_ansi_escaped_string(text) assert cpnts == ['\x1b[3m', '\x1b[38;2;200;200;0m', '\x1b[48;2;78;118;164m', 'hello, ', '\n', 'world', '\x1b[0m', '\x1b[0m', '\x1b[0m', '123'] cpnts_join = ''.join(cpnts) -assert cpnts_join == text \ No newline at end of file +assert cpnts_join == text + +assert picoterm.wcwidth(ord('\n')) == 0 +assert picoterm.wcwidth(ord('a')) == 1 +assert picoterm.wcwidth(ord('测')) == 2 +assert picoterm.wcwidth(ord('👀')) == 2 + +assert picoterm.wcswidth("hello, 测试a测试👀测\n") == 7 + 1 + 12