From: Ken Hornstein Date: Tue, 26 Feb 2013 18:19:06 +0000 (-0500) Subject: Support RFC 2231 language tags in RFC 2047 header strings. X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/25b5d3aa852eb145beea1d3abcbe57e75f2e59a7?ds=inline;hp=1c570cbefde061bf4aee1d0ab99bebb9fab40846 Support RFC 2231 language tags in RFC 2047 header strings. --- diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index 25c98c3d..7f73efd2 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -61,7 +61,7 @@ int decode_rfc2047 (char *str, char *dst, size_t dstlen) { char *p, *q, *pp; - char *startofmime, *endofmime; + char *startofmime, *endofmime, *endofcharset; int c, quoted_printable; int encoding_found = 0; /* did we decode anything? */ int between_encodings = 0; /* are we between two encodings? */ @@ -127,11 +127,25 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) if (!*pp) continue; + /* + * RFC 2231 specifies that language information can appear + * in a charset specification like so: + * + * =?us-ascii*en?Q?Foo?= + * + * Right now we don't use language information, so ignore it. + */ + + for (endofcharset = startofmime; + *endofcharset != '*' && endofcharset < pp; + endofcharset++) + ; + /* Check if character set can be handled natively */ - if (!check_charset(startofmime, pp - startofmime)) { + if (!check_charset(startofmime, endofcharset - startofmime)) { #ifdef HAVE_ICONV /* .. it can't. We'll use iconv then. */ - *pp = '\0'; + *endofcharset = '\0'; cd = iconv_open(get_charset(), startofmime); fromutf8 = !mh_strcasecmp(startofmime, "UTF-8"); *pp = '?'; diff --git a/test/scan/test-scan-multibyte b/test/scan/test-scan-multibyte index 98dd9a26..255adace 100755 --- a/test/scan/test-scan-multibyte +++ b/test/scan/test-scan-multibyte @@ -37,10 +37,17 @@ LC_ALL=en_US.UTF-8; export LC_ALL # We use a test program to determine what the output width of U+0308 is # and adjust our test output appropriately. # +# True Spın̈al Tap fans will note that David st Hubbins was born in Squatney, +# London, England, and thus having his name language-tagged with "cy" is almost +# certainly incorrect. But in his own words: "Here lies David st Hubbins, +# and why not?". +# +# The second "* in the To line is just to exercise the parser a bit. +# cat > "${MH_TEST_DIR}/Mail/inbox/11" < -To: Sir Denis =?utf-8?q?Eton=E2=80=93Hogg? +From: David =?utf-8*cy?q?=EF=AC=86?= Hubbins +To: Sir Denis =?utf-8*?q?Eton=E2=80=93Hogg? Date: Friday, 2 Mar 1984 00:00:00 Subject: =?utf-8?q?Sp=C4=B1n=CC=88al_Tap_=E2=86=92_Tap_into_America!?=