]>
diplodocus.org Git - nmh/blob - test/scan/test-scan-multibyte
2 ############################################################
4 # Test scan to see if multibyte support (UTF-8 locale) works
6 # Other tests will get the normal ASCII case, so all we care
7 # about here is UTF-8 encoded headers (RFC 2047).
9 # Note that this file should be edited via a UTF-8 aware
10 # editor, since UTF-8 characters are in it.
12 ############################################################
16 if test -z "${MH_OBJ_DIR}"; then
17 srcdir
=`dirname "$0"`/..
/..
18 MH_OBJ_DIR
=`cd "$srcdir" && pwd`; export MH_OBJ_DIR
21 .
"$MH_OBJ_DIR/test/common.sh"
25 if test "${MULTIBYTE_ENABLED}" -ne 1; then
26 test_skip
"configure did not detect multibyte support"
29 LC_ALL
=en_US.UTF
-8; export LC_ALL
32 # Create a test message with RFC 2047 headers we can scan
34 # In this Subject header in this message is a "n" with a Combining Diaeresis
35 # (U+0308). There is different interpretation of this character with respect
36 # to wcwidth() (which is supposed to return the column width of a character).
37 # We use a test program to determine what the output width of U+0308 is
38 # and adjust our test output appropriately.
41 cat > "${MH_TEST_DIR}/Mail/inbox/11" <<EOF
42 From: David =?utf-8?q?=EF=AC=86?= Hubbins <hubbins@example.com>
43 To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? <sirdenis@example.com>
44 Date: Friday, 2 Mar 1984 00:00:00
45 Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?=
47 Things are looking great!
50 width
=`${MH_OBJ_DIR}/test/getcwidth`
51 if test $?
-ne 0; then
52 echo "getcwidth failed to run"
56 expected
="$MH_TEST_DIR/$$.expected"
57 actual
="$MH_TEST_DIR/$$.actual"
59 if test "$width" -eq 1; then
60 cat > "$expected" <<EOF
61 11 03/02 David st Hubbins Spın̈al Tap → Tap into America!<<Things are look
63 elif test "$width" -eq 0; then
64 cat > "$expected" <<EOF
65 11 03/02 David st Hubbins Spın̈al Tap → Tap into America!<<Things are looki
68 echo "Unsupported width for combining diaeresis: $width"
72 scan
-width 80 +inbox
11 > $actual || exit 1
73 oldfailed
="${failed:-0}"
74 check
"$expected" "$actual"
75 if test "$oldfailed" -ne "${failed:-0}"; then
76 echo "Complete UTF-8 width table for BMP"
77 ${MH_OBJ_DIR}/test
/getcwidth
--dump
81 # Check decoding with an invalid multibyte sequence. We skip this test
82 # if we don't have iconv support, since it requires converting from one
83 # character set to another. Be sure we created the test file, though, because
84 # it's required for the test right after it.
87 cat >`mhpath new` <<EOF
88 From: Test12 <test12@example.com>
89 To: Some User <user@example.com>
90 Date: Mon, 31 Dec 2012 00:00:00
91 Message-Id: 12@test.nmh
92 Subject: =?UTF-8?B?MjAxMyBOZXcgWWVhcuKAmXMgRGVhbHMhIFN0YXJ0IHRoZSB5ZWFy?=
93 =?UTF-8?B?IHJpZ2h0IHdpdGggYmlnIHNhdmluZ3M=?=
95 This message has an encoded Subject with an invalid character for the
96 ISO-8859-1 character set, but it (U+2019) is valid UTF-8.
99 cat >"$expected" <<EOF
100 12 12/31 Test12 2013 New Year?s Deals! Start the year right
103 if test "$ICONV_ENABLED" -eq 1; then
104 LC_CTYPE
=ISO
-8859-1 MM_CHARSET
=ISO
-8859-1 scan
-width 75 last
>"$actual"
105 check
"$expected" "$actual"
108 # check scan width with a valid multibyte sequence
109 cat >"$expected" <<EOF
110 12 12/31 Test12 2013 New Year’s Deals! Start the year right
113 LC_CTYPE
=en_US.UTF
-8 MM_CHARSET
=UTF
-8 scan
-width 75 last
>"$actual"
114 check
"$expected" "$actual"