decode_rfc2047 (char *str, char *dst, size_t dstlen)
{
char *p, *q, *pp;
- char *startofmime, *endofmime;
+ char *startofmime, *endofmime, *endofcharset;
int c, quoted_printable;
int encoding_found = 0; /* did we decode anything? */
int between_encodings = 0; /* are we between two encodings? */
if (!*pp)
continue;
+ /*
+ * RFC 2231 specifies that language information can appear
+ * in a charset specification like so:
+ *
+ * =?us-ascii*en?Q?Foo?=
+ *
+ * Right now we don't use language information, so ignore it.
+ */
+
+ for (endofcharset = startofmime;
+ *endofcharset != '*' && endofcharset < pp;
+ endofcharset++)
+ ;
+
/* Check if character set can be handled natively */
- if (!check_charset(startofmime, pp - startofmime)) {
+ if (!check_charset(startofmime, endofcharset - startofmime)) {
#ifdef HAVE_ICONV
/* .. it can't. We'll use iconv then. */
- *pp = '\0';
+ *endofcharset = '\0';
cd = iconv_open(get_charset(), startofmime);
fromutf8 = !mh_strcasecmp(startofmime, "UTF-8");
*pp = '?';
# We use a test program to determine what the output width of U+0308 is
# and adjust our test output appropriately.
#
+# True Spın̈al Tap fans will note that David st Hubbins was born in Squatney,
+# London, England, and thus having his name language-tagged with "cy" is almost
+# certainly incorrect. But in his own words: "Here lies David st Hubbins,
+# and why not?".
+#
+# The second "* in the To line is just to exercise the parser a bit.
+#
cat > "${MH_TEST_DIR}/Mail/inbox/11" <<EOF
-From: David =?utf-8?q?=EF=AC=86?= Hubbins <hubbins@example.com>
-To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? <sirdenis@example.com>
+From: David =?utf-8*cy?q?=EF=AC=86?= Hubbins <hubbins@example.com>
+To: Sir Denis =?utf-8*?q?Eton=E2=80=93Hogg? <sirdenis@example.com>
Date: Friday, 2 Mar 1984 00:00:00
Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?=