]>
diplodocus.org Git - nmh/blob - test/scan/test-scan-multibyte
2 ############################################################
4 # Test scan to see if multibyte support (UTF-8 locale) works
6 # Other tests will get the normal ASCII case, so all we care
7 # about here is UTF-8 encoded headers (RFC 2047).
9 # Note that this file should be edited via a UTF-8 aware
10 # editor, since UTF-8 characters are in it.
12 ############################################################
16 if test -z "${MH_OBJ_DIR}"; then
17 srcdir
=`dirname "$0"`/..
/..
18 MH_OBJ_DIR
=`cd "$srcdir" && pwd`; export MH_OBJ_DIR
21 .
"$MH_OBJ_DIR/test/common.sh"
25 if test "${MULTIBYTE_ENABLED}" -ne 1; then
26 test_skip
"configure did not detect multibyte support"
29 require_locale en_US.utf
-8 en_US.utf8
30 LC_ALL
=en_US.UTF
-8; export LC_ALL
33 # Create a test message with RFC 2047 headers we can scan
35 # In this Subject header in this message is a "n" with a Combining Diaeresis
36 # (U+0308). There is different interpretation of this character with respect
37 # to wcwidth() (which is supposed to return the column width of a character).
38 # We use a test program to determine what the output width of U+0308 is
39 # and adjust our test output appropriately.
41 # True Spın̈al Tap fans will note that David st Hubbins was born in Squatney,
42 # London, England, and thus having his name language-tagged with "cy" is almost
43 # certainly incorrect. But in his own words: "Here lies David st Hubbins,
46 # The second "* in the To line is just to exercise the parser a bit.
49 cat > "${MH_TEST_DIR}/Mail/inbox/11" <<EOF
50 From: David =?utf-8*cy?q?=EF=AC=86?= Hubbins <hubbins@example.com>
51 To: Sir Denis =?utf-8*?q?Eton=E2=80=93Hogg? <sirdenis@example.com>
52 Date: Friday, 2 Mar 1984 00:00:00
53 Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?=
55 Things are looking great!
58 width
=`${MH_OBJ_DIR}/test/getcwidth "→n̈"`
59 if test $?
-ne 0; then
60 echo "getcwidth failed to run"
64 expected
="$MH_TEST_DIR/$$.expected"
65 actual
="$MH_TEST_DIR/$$.actual"
67 if test "$width" -eq 3; then
68 cat > "$expected" <<EOF
69 11 03/02 David st Hubbins Spın̈al Tap → Tap into America!<<Things are looki
71 elif test "$width" -eq 2; then
72 cat > "$expected" <<EOF
73 11 03/02 David st Hubbins Spın̈al Tap → Tap into America!<<Things are lookin
76 echo "Unsupported width for UTF-8 test string: $width"
80 run_prog scan
-width 80 +inbox
11 > $actual || exit 1
81 check
"$expected" "$actual"
84 # Check decoding with an invalid multibyte sequence. We skip this test
85 # if we don't have iconv support, since it requires converting from one
86 # character set to another. Be sure we created the test file, though, because
87 # it's required for the test right after it.
90 cat >`mhpath new` <<EOF
91 From: Test12 <test12@example.com>
92 To: Some User <user@example.com>
93 Date: Mon, 31 Dec 2012 00:00:00
94 Message-Id: 12@test.nmh
95 Subject: =?UTF-8?B?MjAxMyBOZXcgWWVhcuKAmXMgRGVhbHMhIFN0YXJ0IHRoZSB5ZWFy?=
96 =?UTF-8?B?IHJpZ2h0IHdpdGggYmlnIHNhdmluZ3M=?=
98 This message has an encoded Subject with an invalid character for
99 single-byte character sets, but it (U+2019) is valid UTF-8.
102 if test "$ICONV_ENABLED" -eq 1; then
103 cat >"$expected" <<EOF
104 12 12/31 Test12 2013 New Year?s Deals! Start the year right
107 # Don't use run_prog here because it loses the environment setting.
108 LC_ALL
=C scan
-width 74 last
>"$actual"
109 check
"$expected" "$actual"
113 # Find out the width of our Unicode apostrophe (U+2019). Some implementations
114 # say it has a width of 2, but that seems totally bizarre to me.
117 width
=`${MH_OBJ_DIR}/test/getcwidth U+2019`
118 if test $?
-ne 0; then
119 echo "getcwidth failed to run"
123 # check scan width with a valid multibyte sequence
124 if test "$width" -eq 1; then
125 cat >"$expected" <<EOF
126 12 12/31 Test12 2013 New Year’s Deals! Start the year right
128 elif test "$width" -eq 2; then
129 cat >"$expected" <<EOF
130 12 12/31 Test12 2013 New Year’s Deals! Start the year righ
133 echo "Unsupported width for U+2019: $width"
136 run_prog scan
-width 74 last
>"$actual"
137 check
"$expected" "$actual"
140 if test "$ICONV_ENABLED" -eq 1; then
141 cat >"$expected" <<EOF
142 13 01/13 sender@example.co <<The Subject: is an encoded single quote, 0x92.
145 cat >"${MH_TEST_DIR}/Mail/inbox/13" <<EOF
146 From: <sender@example.com>
147 Subject: =?iso-8859-1?B?kgo=?=
148 Date: Mon, 13 Jan 2014 14:18:33 -0600
150 The Subject: is an encoded single quote, 0x92. cpstripped() didn't
151 properly count it when decoding, which could be seen with:
153 scan -format '%(decode{subject})%{body}'
155 The scan listing was two characters too long.
158 run_prog scan
-width 80 last
>"$actual"
159 check
"$expected" "$actual"