add wcwidth and wcswidth

This commit is contained in:
blueloveTH 2025-11-27 11:31:14 +08:00
parent 1f782b799c
commit 6f9b7943d4
4 changed files with 65 additions and 3 deletions

View File

@ -3,3 +3,6 @@ def enable_full_buffering_mode() -> None:
def split_ansi_escaped_string(s: str) -> list[str]: def split_ansi_escaped_string(s: str) -> list[str]:
"""Perform split on ANSI escaped string.""" """Perform split on ANSI escaped string."""
def wcwidth(c: int) -> int: ...
def wcswidth(s: str) -> int: ...

View File

@ -1,8 +1,20 @@
#include "pocketpy/common/str.h"
#include "pocketpy/pocketpy.h" #include "pocketpy/pocketpy.h"
#include "pocketpy/objects/base.h" #include "pocketpy/objects/base.h"
#include <stdio.h>
#include "pocketpy/common/vector.h" #include "pocketpy/common/vector.h"
#include <stdio.h>
const char* c11__u32_east_asian_width(int c);
static int c11__wcwidth(int c) {
if(c >= 32 && c < 0x7f) return 1;
if(c < 32) return 0;
const char* w = c11__u32_east_asian_width(c);
bool fullwidth = (w[0] == 'F' && w[1] == '\0') || (w[0] == 'W' && w[1] == '\0');
return fullwidth ? 2 : 1;
}
static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) { static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) {
PY_CHECK_ARGC(0); PY_CHECK_ARGC(0);
static char buf[1024 * 32]; // 32KB static char buf[1024 * 32]; // 32KB
@ -39,11 +51,51 @@ static bool picoterm_split_ansi_escaped_string(int argc, py_Ref argv) {
return true; return true;
} }
static bool picoterm_wcwidth(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
PY_CHECK_ARG_TYPE(0, tp_int);
int c = py_toint(py_arg(0));
py_newint(py_retval(), c11__wcwidth(c));
return true;
}
static bool picoterm_wcswidth(int argc, py_Ref argv) {
PY_CHECK_ARGC(1);
PY_CHECK_ARG_TYPE(0, tp_str);
c11_sv sv = py_tosv(py_arg(0));
c11_vector /*T=AnsiEscapedToken*/ tokens;
c11_vector__ctor(&tokens, sizeof(AnsiEscapedToken));
if(!split_ansi_escaped_string(sv, &tokens)) {
c11_vector__dtor(&tokens);
return ValueError("invalid ANSI escape sequences");
}
int total_width = 0;
for(int i = 0; i < tokens.length; i++) {
AnsiEscapedToken* p_token = c11__at(AnsiEscapedToken, &tokens, i);
if(p_token->suffix != '\0') continue;
const char* curr_char = p_token->text.data;
const char* end_char = p_token->text.data + p_token->text.size;
while(curr_char < end_char) {
unsigned char c = *curr_char;
int u8bytes = c11__u8_header(c, true);
if(u8bytes == 0) return ValueError("invalid utf-8 header: %d", (int)c);
int value = c11__u8_value(u8bytes, curr_char);
total_width += c11__wcwidth(value);
curr_char += u8bytes;
}
}
c11_vector__dtor(&tokens);
py_newint(py_retval(), total_width);
return true;
}
void pk__add_module_picoterm() { void pk__add_module_picoterm() {
py_Ref mod = py_newmodule("picoterm"); py_Ref mod = py_newmodule("picoterm");
py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode); py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode);
py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string); py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string);
py_bindfunc(mod, "wcwidth", picoterm_wcwidth);
py_bindfunc(mod, "wcswidth", picoterm_wcswidth);
} }
static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) { static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) {

View File

@ -1022,7 +1022,7 @@ const static c11_u32_range kEastAsianWidthRanges[] = {
}; };
// clang-format on // clang-format on
const static char* c11__u32_east_asian_width(int c) { const char* c11__u32_east_asian_width(int c) {
const char* data = const char* data =
c11__search_u32_ranges(c, c11__search_u32_ranges(c,
kEastAsianWidthRanges, kEastAsianWidthRanges,

View File

@ -21,3 +21,10 @@ assert cpnts == ['\x1b[3m', '\x1b[38;2;200;200;0m', '\x1b[48;2;78;118;164m', 'he
cpnts_join = ''.join(cpnts) cpnts_join = ''.join(cpnts)
assert cpnts_join == text assert cpnts_join == text
assert picoterm.wcwidth(ord('\n')) == 0
assert picoterm.wcwidth(ord('a')) == 1
assert picoterm.wcwidth(ord('')) == 2
assert picoterm.wcwidth(ord('👀')) == 2
assert picoterm.wcswidth("hello, 测试a测试👀测\n") == 7 + 1 + 12