summaryrefslogtreecommitdiff
path: root/libbb/unicode.c
diff options
context:
space:
mode:
authorDenys Vlasenko2010-01-30 23:16:21 +0100
committerDenys Vlasenko2010-01-30 23:16:21 +0100
commite17764c8fb566f85020217dd8fd05fb6bc227e98 (patch)
treecf0a42cc23cd4aae92e69924087610a941c712a4 /libbb/unicode.c
parentecd90fd488cd0c519070656f5cfa0b0959979be9 (diff)
downloadbusybox-e17764c8fb566f85020217dd8fd05fb6bc227e98.zip
busybox-e17764c8fb566f85020217dd8fd05fb6bc227e98.tar.gz
further work on unicodization
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/unicode.c')
-rw-r--r--libbb/unicode.c77
1 files changed, 59 insertions, 18 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c
index 878af84..4e7e3a9 100644
--- a/libbb/unicode.c
+++ b/libbb/unicode.c
@@ -246,29 +246,45 @@ size_t FAST_FUNC unicode_strlen(const char *string)
return width;
}
-char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
+static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
{
char *dst;
unsigned dst_len;
+ unsigned uni_count;
+ unsigned uni_width;
if (unicode_status != UNICODE_ON) {
- char *d = dst = xmalloc(width + 1);
- while ((int)--width >= 0) {
- unsigned char c = *src;
- if (c == '\0') {
- do
- *d++ = ' ';
- while ((int)--width >= 0);
- break;
+ char *d;
+ if (flags & UNI_FLAG_PAD) {
+ d = dst = xmalloc(width + 1);
+ while ((int)--width >= 0) {
+ unsigned char c = *src;
+ if (c == '\0') {
+ do
+ *d++ = ' ';
+ while ((int)--width >= 0);
+ break;
+ }
+ *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
+ src++;
+ }
+ *d = '\0';
+ } else {
+ d = dst = xstrndup(src, width);
+ while (*d) {
+ unsigned char c = *d;
+ if (c < ' ' || c >= 0x7f)
+ *d = '?';
+ d++;
}
- *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
- src++;
}
- *d = '\0';
+ if (stats)
+ stats->byte_count = stats->unicode_count = (d - dst);
return dst;
}
dst = NULL;
+ uni_count = uni_width = 0;
dst_len = 0;
while (1) {
int w;
@@ -301,7 +317,7 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
/* src = NULL: invalid sequence is seen,
* else: wc is set, src is advanced to next mb char
*/
- if (src1) {/* no error */
+ if (src1) { /* no error */
if (wc == 0) /* end-of-string */
break;
src = src1;
@@ -315,8 +331,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
goto subst;
w = wcwidth(wc);
if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
- || (!ENABLE_UNICODE_COMBINING_WCHARS && wc <= 0)
- || (!ENABLE_UNICODE_WIDE_WCHARS && wc > 1)
+ || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
+ || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
) {
subst:
wc = CONFIG_SUBST_WCHAR;
@@ -331,6 +347,8 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
break;
}
+ uni_count++;
+ uni_width += w;
dst = xrealloc(dst, dst_len + MB_CUR_MAX);
#if ENABLE_LOCALE_SUPPORT
{
@@ -343,15 +361,37 @@ char* FAST_FUNC unicode_cut_nchars(unsigned width, const char *src)
}
/* Pad to remaining width */
- dst = xrealloc(dst, dst_len + width + 1);
- while ((int)--width >= 0) {
- dst[dst_len++] = ' ';
+ if (flags & UNI_FLAG_PAD) {
+ dst = xrealloc(dst, dst_len + width + 1);
+ uni_count += width;
+ uni_width += width;
+ while ((int)--width >= 0) {
+ dst[dst_len++] = ' ';
+ }
}
dst[dst_len] = '\0';
+ if (stats) {
+ stats->byte_count = dst_len;
+ stats->unicode_count = uni_count;
+ stats->unicode_width = uni_width;
+ }
return dst;
}
+char* FAST_FUNC unicode_conv_to_printable(uni_stat_t *stats, const char *src)
+{
+ return unicode_conv_to_printable2(stats, src, INT_MAX, 0);
+}
+char* FAST_FUNC unicode_conv_to_printable_maxwidth(uni_stat_t *stats, const char *src, unsigned maxwidth)
+{
+ return unicode_conv_to_printable2(stats, src, maxwidth, 0);
+}
+char* FAST_FUNC unicode_conv_to_printable_fixedwidth(uni_stat_t *stats, const char *src, unsigned width)
+{
+ return unicode_conv_to_printable2(stats, src, width, UNI_FLAG_PAD);
+}
+#ifdef UNUSED
unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
{
if (unicode_status != UNICODE_ON) {
@@ -382,3 +422,4 @@ unsigned FAST_FUNC unicode_padding_to_width(unsigned width, const char *src)
return 0;
}
}
+#endif