X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/71eaed755250fdd6ac66d1139e59a2cc7ad3980e..5977791e005446f4cd8b2f04bfb14c8cd265df8a:/test/scan/test-scan-multibyte diff --git a/test/scan/test-scan-multibyte b/test/scan/test-scan-multibyte index e877cf2d..dc2a284c 100755 --- a/test/scan/test-scan-multibyte +++ b/test/scan/test-scan-multibyte @@ -26,32 +26,70 @@ if test "${MULTIBYTE_ENABLED}" -ne 1; then test_skip "configure did not detect multibyte support" fi -LC_ALL=en_US.UTF-8; export LC_ALL +require_locale en_US.UTF-8 en_US.UTF8 en_US.utf-8 en_US.utf8 # # Create a test message with RFC 2047 headers we can scan # +# In this Subject header in this message is a "n" with a Combining Diaeresis +# (U+0308). There is different interpretation of this character with respect +# to wcwidth() (which is supposed to return the column width of a character). +# We use a test program to determine what the output width of U+0308 is +# and adjust our test output appropriately. +# +# True Spın̈al Tap fans will note that David st Hubbins was born in Squatney, +# London, England, and thus having his name language-tagged with "cy" is almost +# certainly incorrect. But in his own words: "Here lies David st Hubbins, +# and why not?". +# +# The second "* in the To line is just to exercise the parser a bit. +# cat > "${MH_TEST_DIR}/Mail/inbox/11" < -To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? +From: David =?utf-8*cy?q?=EF=AC=86?= Hubbins +To: Sir Denis =?utf-8*?q?Eton=E2=80=93Hogg? Date: Friday, 2 Mar 1984 00:00:00 -Subject: =?utf-8?q?Spin=CC=88al_Tap_=E2=86=92_Tap_into_America!?= +Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?= Things are looking great! EOF +width=`${MH_OBJ_DIR}/test/getcwidth "→n̈"` +if test $? -ne 0; then + echo "getcwidth failed to run" + exit 1 +fi + expected="$MH_TEST_DIR/$$.expected" actual="$MH_TEST_DIR/$$.actual" + +start_test 'RFC 2047 headers' +if test "$width" -eq 3; then cat > "$expected" < "$expected" < $actual || exit 1 +run_prog scan -width 80 +inbox 11 > $actual || exit 1 check "$expected" "$actual" -# check decoding with an invalid multibyte sequence + +# +# Check decoding with an invalid multibyte sequence. We skip this test +# if we don't have iconv support, since it requires converting from one +# character set to another. Be sure we created the test file, though, because +# it's required for the test right after it. +# + +start_test 'invalid multibyte sequence' cat >`mhpath new` < To: Some User @@ -60,24 +98,90 @@ Message-Id: 12@test.nmh Subject: =?UTF-8?B?MjAxMyBOZXcgWWVhcuKAmXMgRGVhbHMhIFN0YXJ0IHRoZSB5ZWFy?= =?UTF-8?B?IHJpZ2h0IHdpdGggYmlnIHNhdmluZ3M=?= -This message has an encoded Subject with an invalid character for the -ISO-8859-1 character set, but it (U+2019) is valid UTF-8. +This message has an encoded Subject with an invalid character for +single-byte character sets, but it (U+2019) is valid UTF-8. EOF -cat >"$expected" <"$expected" <"$actual" -check "$expected" "$actual" + # Don't use run_prog here because it loses the environment setting. + LC_ALL=C scan -width 74 last >"$actual" + check "$expected" "$actual" +fi + +# +# Find out the width of our Unicode apostrophe (U+2019). Some implementations +# say it has a width of 2, but that seems totally bizarre to me. +# + +width=`${MH_OBJ_DIR}/test/getcwidth U+2019` +if test $? -ne 0; then + echo "getcwidth failed to run" + exit 1 +fi # check scan width with a valid multibyte sequence -cat >"$expected" <"$expected" <"$expected" <"$actual" +check "$expected" "$actual" + + +if test "$ICONV_ENABLED" -eq 1; then + start_test 'encoded single quote' + cat >"$expected" <"${MH_TEST_DIR}/Mail/inbox/13" < +Subject: =?iso-8859-1?B?kgo=?= +Date: Mon, 13 Jan 2014 14:18:33 -0600 + +The Subject: is an encoded single quote, 0x92. cpstripped() didn't +properly count it when decoding, which could be seen with: + + scan -format '%(decode{subject})%{body}' + +The scan listing was two characters too long. +EOF + + run_prog scan -width 80 last >"$actual" + check "$expected" "$actual" +fi + + +start_test 'insufficient room for multicolumn character' +#### This multibyte character requires 2 columns for display, but +#### only 1 is availble. cpstripped() used to get this wrong. + +cat >"$expected" <`mhpath new` <"$actual" +run_prog scan -format '%{body}' -width 1 last >"$actual" check "$expected" "$actual" +finish_test exit $failed