summaryrefslogtreecommitdiff
path: root/libbb/unicode.c
diff options
context:
space:
mode:
authorDenys Vlasenko2010-01-29 09:11:47 +0100
committerDenys Vlasenko2010-01-29 09:11:47 +0100
commit2edba21f4c59d071f2241c2f47021c7034ec7cb8 (patch)
tree6cf3de29bfbdafa26fddbc1cd3dc467a2d8263f6 /libbb/unicode.c
parent083e172641b64c564b7ec5478197dccbde43b421 (diff)
downloadbusybox-2edba21f4c59d071f2241c2f47021c7034ec7cb8.zip
busybox-2edba21f4c59d071f2241c2f47021c7034ec7cb8.tar.gz
more fine-grained Unicode support
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r--libbb/unicode.c86
1 files changed, 70 insertions, 16 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 39b173e..878af84 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -216,8 +216,6 @@ size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n)
return org_n - n;
}
-#include "unicode_wcwidth.c"
-
int FAST_FUNC iswspace(wint_t wc)
{
return (unsigned)wc <= 0x7f && isspace(wc);
@@ -233,6 +231,8 @@ int FAST_FUNC iswpunct(wint_t wc)
return (unsigned)wc <= 0x7f && ispunct(wc);
}
+#include "unicode_wcwidth.c"
+
#endif /* Homegrown Unicode support */
@@ -251,8 +251,22 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
char *dst;
unsigned dst_len;
- if (unicode_status != UNICODE_ON)
- return xasprintf("%-*.*s", width, width, src);
+ if (unicode_status != UNICODE_ON) {
+ char *d = dst = xmalloc(width + 1);
+ while ((int)--width >= 0) {
+ unsigned char c = *src;
+ if (c == '\0') {
+ do
+ *d++ = ' ';
+ while ((int)--width >= 0);
+ break;
+ }
+ *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
+ src++;
+ }
+ *d = '\0';
+ return dst;
+ }
dst = NULL;
dst_len = 0;
@@ -260,31 +274,64 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
int w;
wchar_t wc;
- dst = xrealloc(dst, dst_len + 2 * MB_CUR_MAX);
#if ENABLE_LOCALE_SUPPORT
{
mbstate_t mbst = { 0 };
ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
- if (rc <= 0) /* error, or end-of-string */
+ /* If invalid sequence is seen: -1 is returned,
+ * src points to the invalid sequence, errno = EILSEQ.
+ * Else number of wchars (excluding terminating L'\0')
+ * written to dest is returned.
+ * If len (here: 1) non-L'\0' wchars stored at dest,
+ * src points to the next char to be converted.
+ * If string is completely converted: src = NULL.
+ */
+ if (rc == 0) /* end-of-string */
break;
+ if (rc < 0) { /* error */
+ src++;
+ goto subst;
+ }
+ if (!iswprint(wc))
+ goto subst;
}
#else
- src = mbstowc_internal(&wc, src);
- if (!src || wc == 0) /* error, or end-of-string */
- break;
+ {
+ const char *src1 = mbstowc_internal(&wc, src);
+ /* src = NULL: invalid sequence is seen,
+ * else: wc is set, src is advanced to next mb char
+ */
+ if (src1) {/* no error */
+ if (wc == 0) /* end-of-string */
+ break;
+ src = src1;
+ } else { /* error */
+ src++;
+ goto subst;
+ }
+ }
#endif
+ if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
+ goto subst;
w = wcwidth(wc);
- if (w < 0) /* non-printable wchar */
- break;
+ if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
+ || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0)
+ || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1)
+ ) {
+ subst:
+ wc = CONFIG_SUBST_WCHAR;
+ w = 1;
+ }
width -= w;
- if ((int)width < 0) { /* string is longer than width */
+ /* Note: if width == 0, we still may add more chars,
+ * they may be zero-width or combining ones */
+ if ((int)width < 0) {
+ /* can't add this wc, string would become longer than width */
width += w;
- while (width) {
- dst[dst_len++] = ' ';
- width--;
- }
break;
}
+
+ dst = xrealloc(dst, dst_len + MB_CUR_MAX);
#if ENABLE_LOCALE_SUPPORT
{
mbstate_t mbst = { 0 };
@@ -294,7 +341,14 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
dst_len += wcrtomb_internal(&dst[dst_len], wc);
#endif
}
+
+ /* Pad to remaining width */
+ dst = xrealloc(dst, dst_len + width + 1);
+ while ((int)--width >= 0) {
+ dst[dst_len++] = ' ';
+ }
dst[dst_len] = '\0';
+
return dst;
}