mirror of
https://github.com/pocketpy/pocketpy
synced 2025-12-06 10:10:17 +00:00
add wcwidth and wcswidth
This commit is contained in:
parent
1f782b799c
commit
6f9b7943d4
@ -3,3 +3,6 @@ def enable_full_buffering_mode() -> None:
|
||||
|
||||
def split_ansi_escaped_string(s: str) -> list[str]:
|
||||
"""Perform split on ANSI escaped string."""
|
||||
|
||||
def wcwidth(c: int) -> int: ...
|
||||
def wcswidth(s: str) -> int: ...
|
||||
|
||||
@ -1,8 +1,20 @@
|
||||
#include "pocketpy/common/str.h"
|
||||
#include "pocketpy/pocketpy.h"
|
||||
#include "pocketpy/objects/base.h"
|
||||
#include <stdio.h>
|
||||
#include "pocketpy/common/vector.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
const char* c11__u32_east_asian_width(int c);
|
||||
|
||||
static int c11__wcwidth(int c) {
|
||||
if(c >= 32 && c < 0x7f) return 1;
|
||||
if(c < 32) return 0;
|
||||
const char* w = c11__u32_east_asian_width(c);
|
||||
bool fullwidth = (w[0] == 'F' && w[1] == '\0') || (w[0] == 'W' && w[1] == '\0');
|
||||
return fullwidth ? 2 : 1;
|
||||
}
|
||||
|
||||
static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) {
|
||||
PY_CHECK_ARGC(0);
|
||||
static char buf[1024 * 32]; // 32KB
|
||||
@ -39,11 +51,51 @@ static bool picoterm_split_ansi_escaped_string(int argc, py_Ref argv) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool picoterm_wcwidth(int argc, py_Ref argv) {
|
||||
PY_CHECK_ARGC(1);
|
||||
PY_CHECK_ARG_TYPE(0, tp_int);
|
||||
int c = py_toint(py_arg(0));
|
||||
py_newint(py_retval(), c11__wcwidth(c));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool picoterm_wcswidth(int argc, py_Ref argv) {
|
||||
PY_CHECK_ARGC(1);
|
||||
PY_CHECK_ARG_TYPE(0, tp_str);
|
||||
c11_sv sv = py_tosv(py_arg(0));
|
||||
c11_vector /*T=AnsiEscapedToken*/ tokens;
|
||||
c11_vector__ctor(&tokens, sizeof(AnsiEscapedToken));
|
||||
if(!split_ansi_escaped_string(sv, &tokens)) {
|
||||
c11_vector__dtor(&tokens);
|
||||
return ValueError("invalid ANSI escape sequences");
|
||||
}
|
||||
int total_width = 0;
|
||||
for(int i = 0; i < tokens.length; i++) {
|
||||
AnsiEscapedToken* p_token = c11__at(AnsiEscapedToken, &tokens, i);
|
||||
if(p_token->suffix != '\0') continue;
|
||||
const char* curr_char = p_token->text.data;
|
||||
const char* end_char = p_token->text.data + p_token->text.size;
|
||||
while(curr_char < end_char) {
|
||||
unsigned char c = *curr_char;
|
||||
int u8bytes = c11__u8_header(c, true);
|
||||
if(u8bytes == 0) return ValueError("invalid utf-8 header: %d", (int)c);
|
||||
int value = c11__u8_value(u8bytes, curr_char);
|
||||
total_width += c11__wcwidth(value);
|
||||
curr_char += u8bytes;
|
||||
}
|
||||
}
|
||||
c11_vector__dtor(&tokens);
|
||||
py_newint(py_retval(), total_width);
|
||||
return true;
|
||||
}
|
||||
|
||||
void pk__add_module_picoterm() {
|
||||
py_Ref mod = py_newmodule("picoterm");
|
||||
|
||||
py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode);
|
||||
py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string);
|
||||
py_bindfunc(mod, "wcwidth", picoterm_wcwidth);
|
||||
py_bindfunc(mod, "wcswidth", picoterm_wcswidth);
|
||||
}
|
||||
|
||||
static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) {
|
||||
|
||||
@ -1022,7 +1022,7 @@ const static c11_u32_range kEastAsianWidthRanges[] = {
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
const static char* c11__u32_east_asian_width(int c) {
|
||||
const char* c11__u32_east_asian_width(int c) {
|
||||
const char* data =
|
||||
c11__search_u32_ranges(c,
|
||||
kEastAsianWidthRanges,
|
||||
|
||||
@ -20,4 +20,11 @@ cpnts = picoterm.split_ansi_escaped_string(text)
|
||||
assert cpnts == ['\x1b[3m', '\x1b[38;2;200;200;0m', '\x1b[48;2;78;118;164m', 'hello, ', '\n', 'world', '\x1b[0m', '\x1b[0m', '\x1b[0m', '123']
|
||||
|
||||
cpnts_join = ''.join(cpnts)
|
||||
assert cpnts_join == text
|
||||
assert cpnts_join == text
|
||||
|
||||
assert picoterm.wcwidth(ord('\n')) == 0
|
||||
assert picoterm.wcwidth(ord('a')) == 1
|
||||
assert picoterm.wcwidth(ord('测')) == 2
|
||||
assert picoterm.wcwidth(ord('👀')) == 2
|
||||
|
||||
assert picoterm.wcswidth("hello, 测试a测试👀测\n") == 7 + 1 + 12
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user