From: David Levine Date: Sat, 1 Oct 2016 18:37:27 +0000 (-0400) Subject: mhfixmsg now replaces RFC 2047 encoding with RFC 2231 encoding of X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/d3ecbe4bae7a3f1972b553d4b1efab3079ef0bae?hp=64ef3b3008a7b990c2c856c24354ed2cabcc76dc mhfixmsg now replaces RFC 2047 encoding with RFC 2231 encoding of name and filename parameters in Content-Type and Content-Disposition headers, respectively. --- diff --git a/docs/pending-release-notes b/docs/pending-release-notes index f44eb066..cb3f09d8 100644 --- a/docs/pending-release-notes +++ b/docs/pending-release-notes @@ -52,6 +52,10 @@ NEW FEATURES - Support for SMTPUTF8 (RFC 6531) has been added. mhshow(1) already supported RFC 6532, assuming all 8-bit message header field bodies are UTF-8 and use of a UTF-8 locale. +- mhfixmsg now replaces RFC 2047 encoding with RFC 2231 encoding of name and + filename parameters in Content-Type and Content-Disposition headers, + respectively. + ----------------- OBSOLETE FEATURES diff --git a/h/mhparse.h b/h/mhparse.h index 08dd77c1..c1564c99 100644 --- a/h/mhparse.h +++ b/h/mhparse.h @@ -427,6 +427,21 @@ int list_switch(CT ct, int toplevel, int realsize, int verbose, int debug, char *output_params(size_t initialwidth, PM params, int *offsetout, int external); +/* + * Encode a parameter value using RFC 2231 encode. + * + * Arguments are: + * + * pm - PM containing the parameter value and related info. + * output - Output buffer. + * len - Size, in octets, of output buffer. + * valuelen - Number of characters in the value + * valueoff - Offset into value field (pm->pm_value). + * index - If 0, output character set and language tag. + */ +size_t encode_param(PM pm, char *output, size_t len, size_t valuelen, + size_t valueoff, int index); + /* * Add a parameter to the parameter linked list. * diff --git a/man/mhfixmsg.man b/man/mhfixmsg.man index 38c322f9..88c08a79 100644 --- a/man/mhfixmsg.man +++ b/man/mhfixmsg.man @@ -1,4 +1,4 @@ -.TH MHFIXMSG %manext1% "September 22, 2016" "%nmhversion%" +.TH MHFIXMSG %manext1% "October 1, 2016" "%nmhversion%" .\" .\" %nmhwarning% .\" @@ -208,8 +208,12 @@ more descriptive MIME type. It may not be used for multipart and message types. .PP .B mhfixmsg -applies one transformation unconditionally: it removes an extraneous -trailing semicolon from the parameter lists of MIME header fields. +applies two transformations unconditionally. +The first removes an extraneous trailing semicolon from the parameter +lists of MIME header field values. +The second replaces RFC 2047 encoding with RFC 2231 encoding of name +and filename parameters in Content-Type and Content-Disposition header +field values, respectively. .PP The .B \-verbose diff --git a/test/mhfixmsg/test-mhfixmsg b/test/mhfixmsg/test-mhfixmsg index 152b11c7..e5df1430 100755 --- a/test/mhfixmsg/test-mhfixmsg +++ b/test/mhfixmsg/test-mhfixmsg @@ -586,7 +586,7 @@ MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" ------- =_aaaaaaaaaa0 -Content-Type: text/plain; charset=UTF-8; name="nul+square.txt" +Content-Type: text/plain; charset="UTF-8"; name="nul+square.txt" Content-Transfer-Encoding: base64 vbI9vAAK @@ -727,7 +727,7 @@ MIME-Version: 1.0 Content-Type: multipart/mixed; boundary=\"----- =_aaaaaaaaaa0\" ------- =_aaaaaaaaaa0 -Content-Type: text/plain; charset=UTF-8; name=\"nul+square.txt\" +Content-Type: text/plain; charset=\"UTF-8\"; name=\"nul+square.txt\" Content-Transfer-Encoding: binary " " @@ -1715,6 +1715,64 @@ set -e check `mhpath last` "$actual" +start_test "fix RFC 2047 encoded header parameters" +cat >"$expected" <`mhpath new` <"$actual" 2>/dev/null +check "$expected" "$actual" + + # make sure there are no tmp files left over find "$MH_TEST_DIR/Mail" \( -name 'mhfix*' -o -name ',mhfix*' \) -print \ >"$actual" diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 0fbff400..2bc65a9d 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -119,6 +119,8 @@ static int content_encoding (CT, const char **); static int strip_crs (CT, int *); static int convert_charsets (CT, char *, int *); static int fix_always (CT, int *); +static int fix_filename_param (char *, char *, PM *, PM *); +static int fix_filename_encoding (CT); static int write_content (CT, const char *, char *, int, int); static void set_text_ctparams(CT, char *, int); static int remove_file (const char *); @@ -2375,6 +2377,10 @@ fix_always (CT ct, int *message_mods) { default: { HF hf; + if (ct->c_first_hf) { + fix_filename_encoding (ct); + } + for (hf = ct->c_first_hf; hf; hf = hf->next) { size_t len = strlen (hf->value); @@ -2424,6 +2430,100 @@ fix_always (CT ct, int *message_mods) { } +/* + * Factor out common code for loops in fix_filename_encoding(). + */ +static int +fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) { + size_t value_len; + int fixed = 0; + + if (((value_len = strlen (value)) > 0) && + strncmp (value, "=?", 2) == 0 && + strncmp (&value[value_len - 2], "?=", 2) == 0) { + /* Looks like an RFC 2047 encoded parameter. */ + char decoded[PATH_MAX + 1]; + + if (decode_rfc2047 (value, decoded, sizeof decoded)) { + /* Encode using RFC 2231. */ + replace_param (first_pm, last_pm, name, decoded, 0); + fixed = 1; + } else { + advise (NULL, "failed to decode %s parameter %s", name, value); + } + } + + return fixed; +} + + +/* + * Replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. + */ +static int +fix_filename_encoding (CT ct) { + PM pm; + HF hf; + int fixed = 0; + + for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "name") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm); + } + } + + for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "filename") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_dispo_first, + &ct->c_dispo_last); + } + } + + /* Fix hf values to correspond. */ + for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) { + enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER; + + if (strcasecmp (hf->name, TYPE_FIELD) == 0) { + field = TYPE_HEADER; + } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) { + field = DISPO_HEADER; + } + + if (field != OTHER) { + const char *const semicolon_loc = strchr (hf->value, ';'); + + if (semicolon_loc) { + const size_t len = + strlen (hf->name) + 1 + semicolon_loc - hf->value; + const char *const params = + output_params (len, + field == TYPE_HEADER + ? ct->c_ctinfo.ci_first_pm + : ct->c_dispo_first, + NULL, 0); + const char *const new_params = concat (params, "\n", NULL); + + replace_substring (&hf->value, semicolon_loc, new_params); + free ((char *) new_params); + free ((char *) params); + } else { + advise (NULL, "did not find semicolon in %s:%s\n", + hf->name, hf->value); + } + } + } + + return OK; +} + + /* * Output content in input file to output file. */ @@ -2502,8 +2602,8 @@ write_content (CT ct, const char *input_filename, char *outfile, int modify_inpl /* - * parse_mime() does not set lf_line_endings in struct text, so use this function to do it. - * It touches the parts the decodetypes identifies. + * parse_mime() does not set lf_line_endings in struct text, so use this + * function to do it. It touches the parts the decodetypes identifies. */ static void set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) { diff --git a/uip/mhparse.c b/uip/mhparse.c index 7ec030ba..40fe61d5 100644 --- a/uip/mhparse.c +++ b/uip/mhparse.c @@ -151,7 +151,6 @@ static int openURL (CT, char **); static int parse_header_attrs (const char *, const char *, char **, PM *, PM *, char **); static size_t param_len(PM, int, size_t, int *, int *, size_t *); -static size_t encode_param(PM, char *, size_t, size_t, size_t, int); static size_t normal_param(PM, char *, size_t, size_t, size_t); static int get_dispo (char *, CT, int); @@ -3981,7 +3980,7 @@ param_len(PM pm, int index, size_t valueoff, int *encode, int *cont, * Output an encoded parameter string. */ -static size_t +size_t encode_param(PM pm, char *output, size_t len, size_t valuelen, size_t valueoff, int index) {