# We use a test program to determine what the output width of U+0308 is
# and adjust our test output appropriately.
#
+# True Spın̈al Tap fans will note that David st Hubbins was born in Squatney,
+# London, England, and thus having his name language-tagged with "cy" is almost
+# certainly incorrect. But in his own words: "Here lies David st Hubbins,
+# and why not?".
+#
+# The second "* in the To line is just to exercise the parser a bit.
+#
cat > "${MH_TEST_DIR}/Mail/inbox/11" <<EOF
-From: David =?utf-8?q?=EF=AC=86?= Hubbins <hubbins@example.com>
-To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? <sirdenis@example.com>
+From: David =?utf-8*cy?q?=EF=AC=86?= Hubbins <hubbins@example.com>
+To: Sir Denis =?utf-8*?q?Eton=E2=80=93Hogg? <sirdenis@example.com>
Date: Friday, 2 Mar 1984 00:00:00
Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?=
Things are looking great!
EOF
-width=`${MH_OBJ_DIR}/test/getcwidth`
+width=`${MH_OBJ_DIR}/test/getcwidth "→n̈"`
if test $? -ne 0; then
echo "getcwidth failed to run"
exit 1
expected="$MH_TEST_DIR/$$.expected"
actual="$MH_TEST_DIR/$$.actual"
-if test "$width" -eq 1; then
+if test "$width" -eq 3; then
cat > "$expected" <<EOF
11 03/02 David st Hubbins Spın̈al Tap → Tap into America!<<Things are look
EOF
-elif test "$width" -eq 0; then
+elif test "$width" -eq 2; then
cat > "$expected" <<EOF
11 03/02 David st Hubbins Spın̈al Tap → Tap into America!<<Things are looki
EOF
else
- echo "Unsupported width for combining diaeresis: $width"
+ echo "Unsupported width for UTF-8 test string: $width"
exit 1
fi
-scan -width 80 +inbox 11 > $actual || exit 1
-set -x
-oldfailed="${failed:-0}"
+run_prog scan -width 80 +inbox 11 > $actual || exit 1
check "$expected" "$actual"
-if test "$oldfailed" -ne "${failed:-0}"; then
- echo "Complete UTF-8 width table for BMP"
- ${MH_OBJ_DIR}/test/getcwidth --dump
-fi
-set +x
#
# Check decoding with an invalid multibyte sequence. We skip this test
ISO-8859-1 character set, but it (U+2019) is valid UTF-8.
EOF
-cat >"$expected" <<EOF
+if test "$ICONV_ENABLED" -eq 1; then
+ cat >"$expected" <<EOF
12 12/31 Test12 2013 New Year?s Deals! Start the year right
EOF
-if test "$ICONV_ENABLED" -eq 1; then
- LC_CTYPE=ISO-8859-1 MM_CHARSET=ISO-8859-1 scan -width 75 last >"$actual"
+ # Don't use run_prog here because it loses those environment settings.
+ LC_ALL=en_US.ISO8859-1 scan -width 75 last >"$actual"
check "$expected" "$actual"
fi
+#
+# Find out the width of our Unicode apostrophe (U+2019). Some implementations
+# say it has a width of 2, but that seems totally bizarre to me.
+#
+
+width=`${MH_OBJ_DIR}/test/getcwidth U+2019`
+if test $? -ne 0; then
+ echo "getcwidth failed to run"
+ exit 1
+fi
+
# check scan width with a valid multibyte sequence
-cat >"$expected" <<EOF
+if test "$width" -eq 1; then
+ cat >"$expected" <<EOF
12 12/31 Test12 2013 New Year’s Deals! Start the year right
EOF
+elif test "$width" -eq 2; then
+ cat >"$expected" <<EOF
+ 12 12/31 Test12 2013 New Year’s Deals! Start the year righ
+EOF
+else
+ echo "Unsupported width for U+2019: $width"
+fi
+
+LC_ALL=en_US.UTF-8 scan -width 75 last >"$actual"
+check "$expected" "$actual"
+
+
+cat >"$expected" <<EOF
+ 13 01/13 sender@example.co <<The Subject: is an encoded single quote, 0x92.
+EOF
+
+cat >"${MH_TEST_DIR}/Mail/inbox/13" <<EOF
+From: <sender@example.com>
+Subject: =?iso-8859-1?B?kgo=?=
+Date: Mon, 13 Jan 2014 14:18:33 -0600
+
+The Subject: is an encoded single quote, 0x92. cpstripped() didn't
+properly count it when decoding, which could be seen with:
+
+ scan -format '%(decode{subject})%{body}'
+
+The scan listing was two characters too long.
+EOF
-LC_CTYPE=en_US.UTF-8 MM_CHARSET=UTF-8 scan -width 75 last >"$actual"
+LC_ALL=en_US.ISO8859-1 scan -width 80 last >"$actual"
check "$expected" "$actual"