From: Ken Hornstein Date: Sat, 26 Jan 2013 03:22:46 +0000 (-0500) Subject: Handle the case in the test suite where wcwidth() can return different X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/de0cc2acd75970d5908922ea9d44deef595786a5?hp=-c Handle the case in the test suite where wcwidth() can return different values for combining Unicode codepoints on different operating systems. --- de0cc2acd75970d5908922ea9d44deef595786a5 diff --git a/.gitignore b/.gitignore index 0be1cc28..af04fac2 100644 --- a/.gitignore +++ b/.gitignore @@ -103,6 +103,7 @@ a.out.dSYM/ /test/fakepop /test/getfullname /test/getfqdn +/test/getcwidth # Removed by mostlyclean: *.o diff --git a/Makefile.am b/Makefile.am index 953a9b77..39ec8467 100644 --- a/Makefile.am +++ b/Makefile.am @@ -84,7 +84,8 @@ TESTS = test/ali/test-ali test/anno/test-anno \ test/cleanup ## The "cleanup" test should always be last. check_SCRIPTS = test/common.sh -check_PROGRAMS = test/getfullname test/getfqdn test/fakepop test/fakesmtp +check_PROGRAMS = test/getfullname test/getfqdn test/fakepop test/fakesmtp \ + test/getcwidth DISTCHECK_CONFIGURE_FLAGS = DISABLE_SETGID_MAIL=1 ## diff --git a/test/getcwidth.c b/test/getcwidth.c new file mode 100644 index 00000000..2df307c9 --- /dev/null +++ b/test/getcwidth.c @@ -0,0 +1,80 @@ +/* + * getcwidth - Get the OS's idea of the width of a combining character + * + * This code is Copyright (c) 2012, by the authors of nmh. See the + * COPYRIGHT file in the root directory of the nmh distribution for + * complete copyright information. + */ + +#include +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef MULTIBYTE_SUPPORT +#include +#include +#endif + +int +main(int argc, char *argv[]) +{ + wchar_t c; + int charlen; + char *p; + + /* + * This is the UTF-8 for "n" + U+0308 (Combining Diaeresis) + */ + + unsigned char string[] = "n\xcc\x88"; + + setlocale(LC_ALL, ""); + + if (argc != 1) { + fprintf(stderr, "Usage: %s\n", argv[0]); + fprintf(stderr, "Returns the column width of a UTF-8 " + "multibyte character\n"); + exit(1); + } + +#ifndef MULTIBYTE_SUPPORT + fprintf(stderr, "Nmh was not configured with multibyte support\n"); + exit(1); +#else + /* + * It's not clear to me that we can just call mbtowc() with a + * combining character; just to be safe, feed it in a base + * character first. + */ + + mbtowc(NULL, NULL, 0); + + charlen = mbtowc(&c, string, strlen(string)); + + if (charlen != 1) { + fprintf(stderr, "We expected a beginning character length " + "of 1, got %d instead\n", charlen); + exit(1); + } + + p = string + charlen; + + charlen = mbtowc(&c, p, strlen(p)); + + if (charlen != 2) { + fprintf(stderr, "We expected a multibyte character length " + "of 2, got %d instead\n", charlen); + fprintf(stderr, "Are you using a UTF-8 locale?\n"); + exit(1); + } + + printf("%d\n", wcwidth(c)); + + exit(0); +#endif /* MULTIBYTE_SUPPORT */ +} diff --git a/test/scan/test-scan-multibyte b/test/scan/test-scan-multibyte index 39f22240..1e9349a0 100755 --- a/test/scan/test-scan-multibyte +++ b/test/scan/test-scan-multibyte @@ -31,22 +31,43 @@ LC_ALL=en_US.UTF-8; export LC_ALL # # Create a test message with RFC 2047 headers we can scan # +# In this Subject header in this message is a "n" with a Combining Diaeresis +# (U+0308). There is different interpretation of this character with respect +# to wcwidth() (which is supposed to return the column width of a character). +# We use a test program to determine what the output width of U+0308 is +# and adjust our test output appropriately. +# cat > "${MH_TEST_DIR}/Mail/inbox/11" < To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? Date: Friday, 2 Mar 1984 00:00:00 -Subject: =?utf-8?q?Spin=CC=88al_Tap_=E2=86=92_Tap_into_America!?= +Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?= Things are looking great! EOF +width=`${MH_OBJ_DIR}/test/getcwidth` +if test $? -ne 0; then + echo "getcwidth failed to run" + exit 1 +fi + expected="$MH_TEST_DIR/$$.expected" actual="$MH_TEST_DIR/$$.actual" +if test "$width" -eq 1; then cat > "$expected" < "$expected" < $actual || exit 1 check "$expected" "$actual"