X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/6456295d747373150577e02588770a1a4ba1efaf..8d2e57fdcca371daba8a1a07429ef84729e1f01f:/test/scan/test-scan-multibyte diff --git a/test/scan/test-scan-multibyte b/test/scan/test-scan-multibyte index bd0da47d..98dd9a26 100755 --- a/test/scan/test-scan-multibyte +++ b/test/scan/test-scan-multibyte @@ -26,30 +26,106 @@ if test "${MULTIBYTE_ENABLED}" -ne 1; then test_skip "configure did not detect multibyte support" fi -export LC_ALL=en_US.UTF-8 +LC_ALL=en_US.UTF-8; export LC_ALL # # Create a test message with RFC 2047 headers we can scan # +# In this Subject header in this message is a "n" with a Combining Diaeresis +# (U+0308). There is different interpretation of this character with respect +# to wcwidth() (which is supposed to return the column width of a character). +# We use a test program to determine what the output width of U+0308 is +# and adjust our test output appropriately. +# cat > "${MH_TEST_DIR}/Mail/inbox/11" < To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? Date: Friday, 2 Mar 1984 00:00:00 -Subject: =?utf-8?q?Spin=CC=88al_Tap_=E2=86=92_Tap_into_America!?= +Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?= Things are looking great! EOF -expected=$MH_TEST_DIR/$$.expected -actual=$MH_TEST_DIR/$$.actual +width=`${MH_OBJ_DIR}/test/getcwidth "→n̈"` +if test $? -ne 0; then + echo "getcwidth failed to run" + exit 1 +fi + +expected="$MH_TEST_DIR/$$.expected" +actual="$MH_TEST_DIR/$$.actual" -cat > $expected < "$expected" < "$expected" < $actual || exit 1 +check "$expected" "$actual" + +# +# Check decoding with an invalid multibyte sequence. We skip this test +# if we don't have iconv support, since it requires converting from one +# character set to another. Be sure we created the test file, though, because +# it's required for the test right after it. +# + +cat >`mhpath new` < +To: Some User +Date: Mon, 31 Dec 2012 00:00:00 +Message-Id: 12@test.nmh +Subject: =?UTF-8?B?MjAxMyBOZXcgWWVhcuKAmXMgRGVhbHMhIFN0YXJ0IHRoZSB5ZWFy?= + =?UTF-8?B?IHJpZ2h0IHdpdGggYmlnIHNhdmluZ3M=?= + +This message has an encoded Subject with an invalid character for the +ISO-8859-1 character set, but it (U+2019) is valid UTF-8. +EOF + +cat >"$expected" <"$actual" + check "$expected" "$actual" +fi +# +# Find out the width of our Unicode apostrophe (U+2019). Some implementations +# say it has a width of 2, but that seems totally bizarre to me. +# + +width=`${MH_OBJ_DIR}/test/getcwidth U+2019` +if test $? -ne 0; then + echo "getcwidth failed to run" + exit 1 +fi + +# check scan width with a valid multibyte sequence +if test "$width" -eq 1; then + cat >"$expected" <"$expected" <"$actual" check "$expected" "$actual" + exit $failed