From: Ken Hornstein Date: Thu, 24 Jan 2013 21:05:36 +0000 (-0500) Subject: Fix the format engine so it properly keeps track of multibyte characters X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/8090b548231e5f9e0fa78166b5d7b4228b2ef608?ds=sidebyside;hp=-c Fix the format engine so it properly keeps track of multibyte characters and column positions when processing components and other functions. When working on this code I realized we have nothing that checks to make sure right justification of format function output works properly, so a test for right justification is also included. --- 8090b548231e5f9e0fa78166b5d7b4228b2ef608 diff --git a/Makefile.am b/Makefile.am index c63b3325..953a9b77 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,6 +52,7 @@ TESTS = test/ali/test-ali test/anno/test-anno \ test/format/test-dp test/format/test-fmtdump \ test/format/test-localmbox test/format/test-myname \ test/format/test-myhost test/format/test-mymbox \ + test/format/test-rightjustify \ test/forw/test-forw-digest test/forw/test-forw-format \ test/inc/test-deb359167 test/inc/test-eom-align \ test/inc/test-inc-scanout test/inc/test-msgchk \ diff --git a/docs/pending-release-notes b/docs/pending-release-notes index 9b280927..d42e35a8 100644 --- a/docs/pending-release-notes +++ b/docs/pending-release-notes @@ -34,6 +34,8 @@ NEW FEATURES - pick(1) now decodes MIME-encoded header fields before searching. - The VISUAL and EDITOR environment variables are now supported as fallbacks if the user does not configure an editor entry in their profile. +- The format engine (mh_format(5)) now properly accounts for multibyte + characters when accounting for column widths. ---------------------------- diff --git a/sbr/fmt_scan.c b/sbr/fmt_scan.c index 879f7716..b1370fb7 100644 --- a/sbr/fmt_scan.c +++ b/sbr/fmt_scan.c @@ -114,8 +114,8 @@ cpnumber(char **dest, int num, unsigned int wid, char fill, size_t n) { * no more than n bytes are copied */ static void -cptrimmed(char **dest, char *str, unsigned int wid, char fill, size_t n, - size_t max) { +cptrimmed(char **dest, char **ep, char *str, unsigned int wid, char fill, + char *epmax) { int remaining; /* remaining output width available */ int c, ljust; int end; /* number of input bytes remaining in str */ @@ -126,8 +126,6 @@ cptrimmed(char **dest, char *str, unsigned int wid, char fill, size_t n, #endif char *sp; /* current position in source string */ char *cp = *dest; /* current position in destination string */ - char *ep = cp + n; /* end of destination buffer based on desired width */ - char *epmax = cp + max; /* true end of destination buffer */ int prevCtrl = 1; /* get alignment */ @@ -137,19 +135,29 @@ cptrimmed(char **dest, char *str, unsigned int wid, char fill, size_t n, ljust++; } if ((sp = (str))) { +#ifdef MULTIBYTE_SUPPORT mbtowc(NULL, NULL, 0); /* reset shift state */ +#endif end = strlen(str); while (*sp && remaining > 0 && end > 0) { #ifdef MULTIBYTE_SUPPORT char_len = mbtowc(&wide_char, sp, end); - /* Account for multibyte characters taking only one character's - width of output. */ - if (char_len > 1 && epmax - ep >= char_len - 1) { - ep += char_len - 1; + if (char_len <= 0) + break; + + w = wcwidth(wide_char); + + /* + * Multibyte characters can have a variable number of column + * widths, so use the column width to bump the end pointer when + * appropriate. + */ + if (char_len > 1 && epmax - *ep >= char_len - w) { + *ep += char_len - w; } - if (char_len <= 0 || (cp + char_len > ep)) + if (cp + w > *ep) break; end -= char_len; @@ -176,7 +184,6 @@ cptrimmed(char **dest, char *str, unsigned int wid, char fill, size_t n, prevCtrl = 0; #ifdef MULTIBYTE_SUPPORT - w = wcwidth(wide_char); if (w >= 0 && remaining >= w) { strncpy(cp, sp, char_len); cp += char_len; @@ -191,9 +198,10 @@ cptrimmed(char **dest, char *str, unsigned int wid, char fill, size_t n, } if (ljust) { - if (cp + remaining > ep) - remaining = ep - cp; - ep = cp + remaining; + char *endfield; + if (cp + remaining > *ep) + remaining = *ep - cp; + endfield = cp + remaining; if (remaining > 0) { /* copy string to the right */ while (--cp >= *dest) @@ -203,22 +211,22 @@ cptrimmed(char **dest, char *str, unsigned int wid, char fill, size_t n, for (c=remaining; c>0; c--) *cp-- = fill; } - *dest = ep; + *dest = endfield; } else { /* pad remaining space */ - while (remaining-- > 0 && cp < ep) + while (remaining-- > 0 && cp < *ep) *cp++ = fill; *dest = cp; } } static void -cpstripped (char **dest, char *end, char *max, char *str) +cpstripped (char **dest, char **end, char *max, char *str) { int prevCtrl = 1; /* This is 1 so we strip out leading spaces */ int len; #ifdef MULTIBYTE_SUPPORT - int char_len; + int char_len, w; wchar_t wide_char; #endif /* MULTIBYTE_SUPPORT */ @@ -236,17 +244,21 @@ cpstripped (char **dest, char *end, char *max, char *str) * then deal with that here. */ - while (*str != '\0' && len > 0 && *dest < end) { + while (*str != '\0' && len > 0 && *dest < *end) { #ifdef MULTIBYTE_SUPPORT char_len = mbtowc(&wide_char, str, len); - - /* Account for multibyte characters taking only one character's - width of output. */ - if (char_len > 1 && max - end >= char_len - 1) { - end += char_len - 1; + w = wcwidth(wide_char); + + /* + * Account for multibyte characters, and increment the end pointer + * by the number of "extra" bytes in this character. That's the + * character length (char_len) minus the column width (w). + */ + if (char_len > 1 && max - *end >= char_len - w) { + *end += char_len - w; } - if (char_len <= 0 || *dest + char_len > end) + if (char_len <= 0 || *dest + char_len > *end) break; len -= char_len; @@ -384,11 +396,11 @@ fmt_scan (struct format *format, char *scanl, size_t max, int width, int *dat) switch (fmt->f_type) { case FT_COMP: - cpstripped (&cp, ep, scanl + max - 1, fmt->f_comp->c_text); + cpstripped (&cp, &ep, scanl + max - 1, fmt->f_comp->c_text); break; case FT_COMPF: - cptrimmed (&cp, fmt->f_comp->c_text, fmt->f_width, fmt->f_fill, - ep - cp, scanl - cp + max - 1); + cptrimmed (&cp, &ep, fmt->f_comp->c_text, fmt->f_width, fmt->f_fill, + scanl + max - 1); break; case FT_LIT: @@ -411,11 +423,11 @@ fmt_scan (struct format *format, char *scanl, size_t max, int width, int *dat) break; case FT_STR: - cpstripped (&cp, ep, scanl + max - 1, str); + cpstripped (&cp, &ep, scanl + max - 1, str); break; case FT_STRF: - cptrimmed (&cp, str, fmt->f_width, fmt->f_fill, ep - cp, - scanl - cp + max - 1); + cptrimmed (&cp, &ep, str, fmt->f_width, fmt->f_fill, + scanl + max - 1); break; case FT_STRLIT: sp = str; @@ -939,7 +951,7 @@ fmt_scan (struct format *format, char *scanl, size_t max, int width, int *dat) *cp++ = ' '; } } - cpstripped (&cp, ep, scanl + max - 1, lp); + cpstripped (&cp, &ep, scanl + max - 1, lp); } break; diff --git a/test/format/test-rightjustify b/test/format/test-rightjustify new file mode 100755 index 00000000..2685efd0 --- /dev/null +++ b/test/format/test-rightjustify @@ -0,0 +1,27 @@ +#!/bin/sh +# +# Test that the right justification feature works correctly. +# + +if test -z "${MH_OBJ_DIR}"; then + srcdir=`dirname "$0"`/../.. + MH_OBJ_DIR=`cd "$srcdir" && pwd`; export MH_OBJ_DIR +fi + +. "$MH_OBJ_DIR/test/common.sh" + +setup_test + +actual=`${MH_LIB_DIR}/ap -format "%-30(friendly{text})<"` +expected=" No Such User<&2 + echo " $expected" 1>&2 + echo "but instead got:" 1>&2 + echo " $actual" 1>&2 + failed=`expr ${failed:-0} + 1` +fi + +exit $failed diff --git a/test/scan/test-scan-multibyte b/test/scan/test-scan-multibyte index e877cf2d..39f22240 100755 --- a/test/scan/test-scan-multibyte +++ b/test/scan/test-scan-multibyte @@ -45,7 +45,7 @@ expected="$MH_TEST_DIR/$$.expected" actual="$MH_TEST_DIR/$$.actual" cat > "$expected" < $actual || exit 1