X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/d5e78944632398a4e31bca0c43cb4df3b30b1640..fa462ef71fbaf819397c59b2c07adf45a04af9f8:/test/scan/test-scan-multibyte?ds=inline diff --git a/test/scan/test-scan-multibyte b/test/scan/test-scan-multibyte index 819c34c0..db1d3ba7 100755 --- a/test/scan/test-scan-multibyte +++ b/test/scan/test-scan-multibyte @@ -37,17 +37,24 @@ LC_ALL=en_US.UTF-8; export LC_ALL # We use a test program to determine what the output width of U+0308 is # and adjust our test output appropriately. # +# True Spın̈al Tap fans will note that David st Hubbins was born in Squatney, +# London, England, and thus having his name language-tagged with "cy" is almost +# certainly incorrect. But in his own words: "Here lies David st Hubbins, +# and why not?". +# +# The second "* in the To line is just to exercise the parser a bit. +# cat > "${MH_TEST_DIR}/Mail/inbox/11" < -To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? +From: David =?utf-8*cy?q?=EF=AC=86?= Hubbins +To: Sir Denis =?utf-8*?q?Eton=E2=80=93Hogg? Date: Friday, 2 Mar 1984 00:00:00 Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?= Things are looking great! EOF -width=`${MH_OBJ_DIR}/test/getcwidth` +width=`${MH_OBJ_DIR}/test/getcwidth "→n̈"` if test $? -ne 0; then echo "getcwidth failed to run" exit 1 @@ -56,30 +63,30 @@ fi expected="$MH_TEST_DIR/$$.expected" actual="$MH_TEST_DIR/$$.actual" -if test "$width" -eq 1; then +if test "$width" -eq 3; then cat > "$expected" < "$expected" < $actual || exit 1 +run_prog scan -width 80 +inbox 11 > $actual || exit 1 check "$expected" "$actual" # # Check decoding with an invalid multibyte sequence. We skip this test # if we don't have iconv support, since it requires converting from one -# character set to another +# character set to another. Be sure we created the test file, though, because +# it's required for the test right after it. # -if test "$ICONV_ENABLED" -eq 1; then - cat >`mhpath new` <`mhpath new` < To: Some User Date: Mon, 31 Dec 2012 00:00:00 @@ -91,20 +98,41 @@ This message has an encoded Subject with an invalid character for the ISO-8859-1 character set, but it (U+2019) is valid UTF-8. EOF - cat >"$expected" <"$expected" <"$actual" check "$expected" "$actual" fi +# +# Find out the width of our Unicode apostrophe (U+2019). Some implementations +# say it has a width of 2, but that seems totally bizarre to me. +# + +width=`${MH_OBJ_DIR}/test/getcwidth U+2019` +if test $? -ne 0; then + echo "getcwidth failed to run" + exit 1 +fi + # check scan width with a valid multibyte sequence -cat >"$expected" <"$expected" <"$expected" <"$actual" +LC_CTYPE=en_US.UTF-8 MM_CHARSET=UTF-8 run_prog scan -width 75 last >"$actual" check "$expected" "$actual"