mirror of
https://github.com/pocketpy/pocketpy
synced 2025-12-06 18:20:17 +00:00
add wcwidth and wcswidth
This commit is contained in:
parent
1f782b799c
commit
6f9b7943d4
@ -3,3 +3,6 @@ def enable_full_buffering_mode() -> None:
|
|||||||
|
|
||||||
def split_ansi_escaped_string(s: str) -> list[str]:
|
def split_ansi_escaped_string(s: str) -> list[str]:
|
||||||
"""Perform split on ANSI escaped string."""
|
"""Perform split on ANSI escaped string."""
|
||||||
|
|
||||||
|
def wcwidth(c: int) -> int: ...
|
||||||
|
def wcswidth(s: str) -> int: ...
|
||||||
|
|||||||
@ -1,8 +1,20 @@
|
|||||||
|
#include "pocketpy/common/str.h"
|
||||||
#include "pocketpy/pocketpy.h"
|
#include "pocketpy/pocketpy.h"
|
||||||
#include "pocketpy/objects/base.h"
|
#include "pocketpy/objects/base.h"
|
||||||
#include <stdio.h>
|
|
||||||
#include "pocketpy/common/vector.h"
|
#include "pocketpy/common/vector.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
const char* c11__u32_east_asian_width(int c);
|
||||||
|
|
||||||
|
static int c11__wcwidth(int c) {
|
||||||
|
if(c >= 32 && c < 0x7f) return 1;
|
||||||
|
if(c < 32) return 0;
|
||||||
|
const char* w = c11__u32_east_asian_width(c);
|
||||||
|
bool fullwidth = (w[0] == 'F' && w[1] == '\0') || (w[0] == 'W' && w[1] == '\0');
|
||||||
|
return fullwidth ? 2 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) {
|
static bool picoterm_enable_full_buffering_mode(int argc, py_Ref argv) {
|
||||||
PY_CHECK_ARGC(0);
|
PY_CHECK_ARGC(0);
|
||||||
static char buf[1024 * 32]; // 32KB
|
static char buf[1024 * 32]; // 32KB
|
||||||
@ -39,11 +51,51 @@ static bool picoterm_split_ansi_escaped_string(int argc, py_Ref argv) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool picoterm_wcwidth(int argc, py_Ref argv) {
|
||||||
|
PY_CHECK_ARGC(1);
|
||||||
|
PY_CHECK_ARG_TYPE(0, tp_int);
|
||||||
|
int c = py_toint(py_arg(0));
|
||||||
|
py_newint(py_retval(), c11__wcwidth(c));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool picoterm_wcswidth(int argc, py_Ref argv) {
|
||||||
|
PY_CHECK_ARGC(1);
|
||||||
|
PY_CHECK_ARG_TYPE(0, tp_str);
|
||||||
|
c11_sv sv = py_tosv(py_arg(0));
|
||||||
|
c11_vector /*T=AnsiEscapedToken*/ tokens;
|
||||||
|
c11_vector__ctor(&tokens, sizeof(AnsiEscapedToken));
|
||||||
|
if(!split_ansi_escaped_string(sv, &tokens)) {
|
||||||
|
c11_vector__dtor(&tokens);
|
||||||
|
return ValueError("invalid ANSI escape sequences");
|
||||||
|
}
|
||||||
|
int total_width = 0;
|
||||||
|
for(int i = 0; i < tokens.length; i++) {
|
||||||
|
AnsiEscapedToken* p_token = c11__at(AnsiEscapedToken, &tokens, i);
|
||||||
|
if(p_token->suffix != '\0') continue;
|
||||||
|
const char* curr_char = p_token->text.data;
|
||||||
|
const char* end_char = p_token->text.data + p_token->text.size;
|
||||||
|
while(curr_char < end_char) {
|
||||||
|
unsigned char c = *curr_char;
|
||||||
|
int u8bytes = c11__u8_header(c, true);
|
||||||
|
if(u8bytes == 0) return ValueError("invalid utf-8 header: %d", (int)c);
|
||||||
|
int value = c11__u8_value(u8bytes, curr_char);
|
||||||
|
total_width += c11__wcwidth(value);
|
||||||
|
curr_char += u8bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c11_vector__dtor(&tokens);
|
||||||
|
py_newint(py_retval(), total_width);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void pk__add_module_picoterm() {
|
void pk__add_module_picoterm() {
|
||||||
py_Ref mod = py_newmodule("picoterm");
|
py_Ref mod = py_newmodule("picoterm");
|
||||||
|
|
||||||
py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode);
|
py_bindfunc(mod, "enable_full_buffering_mode", picoterm_enable_full_buffering_mode);
|
||||||
py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string);
|
py_bindfunc(mod, "split_ansi_escaped_string", picoterm_split_ansi_escaped_string);
|
||||||
|
py_bindfunc(mod, "wcwidth", picoterm_wcwidth);
|
||||||
|
py_bindfunc(mod, "wcswidth", picoterm_wcswidth);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) {
|
static bool split_ansi_escaped_string(c11_sv sv, c11_vector* out_tokens) {
|
||||||
|
|||||||
@ -1022,7 +1022,7 @@ const static c11_u32_range kEastAsianWidthRanges[] = {
|
|||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
const static char* c11__u32_east_asian_width(int c) {
|
const char* c11__u32_east_asian_width(int c) {
|
||||||
const char* data =
|
const char* data =
|
||||||
c11__search_u32_ranges(c,
|
c11__search_u32_ranges(c,
|
||||||
kEastAsianWidthRanges,
|
kEastAsianWidthRanges,
|
||||||
|
|||||||
@ -20,4 +20,11 @@ cpnts = picoterm.split_ansi_escaped_string(text)
|
|||||||
assert cpnts == ['\x1b[3m', '\x1b[38;2;200;200;0m', '\x1b[48;2;78;118;164m', 'hello, ', '\n', 'world', '\x1b[0m', '\x1b[0m', '\x1b[0m', '123']
|
assert cpnts == ['\x1b[3m', '\x1b[38;2;200;200;0m', '\x1b[48;2;78;118;164m', 'hello, ', '\n', 'world', '\x1b[0m', '\x1b[0m', '\x1b[0m', '123']
|
||||||
|
|
||||||
cpnts_join = ''.join(cpnts)
|
cpnts_join = ''.join(cpnts)
|
||||||
assert cpnts_join == text
|
assert cpnts_join == text
|
||||||
|
|
||||||
|
assert picoterm.wcwidth(ord('\n')) == 0
|
||||||
|
assert picoterm.wcwidth(ord('a')) == 1
|
||||||
|
assert picoterm.wcwidth(ord('测')) == 2
|
||||||
|
assert picoterm.wcwidth(ord('👀')) == 2
|
||||||
|
|
||||||
|
assert picoterm.wcswidth("hello, 测试a测试👀测\n") == 7 + 1 + 12
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user