- Support for SMTPUTF8 (RFC 6531) has been added. mhshow(1) already supported
RFC 6532, assuming all 8-bit message header field bodies are UTF-8 and use
of a UTF-8 locale.
+- mhfixmsg now replaces RFC 2047 encoding with RFC 2231 encoding of name and
+ filename parameters in Content-Type and Content-Disposition headers,
+ respectively.
+
-----------------
OBSOLETE FEATURES
char *output_params(size_t initialwidth, PM params, int *offsetout,
int external);
+/*
+ * Encode a parameter value using RFC 2231 encode.
+ *
+ * Arguments are:
+ *
+ * pm - PM containing the parameter value and related info.
+ * output - Output buffer.
+ * len - Size, in octets, of output buffer.
+ * valuelen - Number of characters in the value
+ * valueoff - Offset into value field (pm->pm_value).
+ * index - If 0, output character set and language tag.
+ */
+size_t encode_param(PM pm, char *output, size_t len, size_t valuelen,
+ size_t valueoff, int index);
+
/*
* Add a parameter to the parameter linked list.
*
-.TH MHFIXMSG %manext1% "September 22, 2016" "%nmhversion%"
+.TH MHFIXMSG %manext1% "October 1, 2016" "%nmhversion%"
.\"
.\" %nmhwarning%
.\"
message types.
.PP
.B mhfixmsg
-applies one transformation unconditionally: it removes an extraneous
-trailing semicolon from the parameter lists of MIME header fields.
+applies two transformations unconditionally.
+The first removes an extraneous trailing semicolon from the parameter
+lists of MIME header field values.
+The second replaces RFC 2047 encoding with RFC 2231 encoding of name
+and filename parameters in Content-Type and Content-Disposition header
+field values, respectively.
.PP
The
.B \-verbose
Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset=UTF-8; name="nul+square.txt"
+Content-Type: text/plain; charset="UTF-8"; name="nul+square.txt"
Content-Transfer-Encoding: base64
vbI9vAAK
Content-Type: multipart/mixed; boundary=\"----- =_aaaaaaaaaa0\"
------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset=UTF-8; name=\"nul+square.txt\"
+Content-Type: text/plain; charset=\"UTF-8\"; name=\"nul+square.txt\"
Content-Transfer-Encoding: binary
" "
check `mhpath last` "$actual"
+start_test "fix RFC 2047 encoded header parameters"
+cat >"$expected" <<EOF
+To: recipient@example.com
+From: sender@example.com
+Date: Wed, 28 Sep 2016 11:24:28 -0400
+Subject: invalid header parameter encoding
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=001a114dd3e8fe9c56053d92f414
+
+--001a114dd3e8fe9c56053d92f414
+
+Content-Type: text/plain; charset=UTF-8
+
+This is a test.
+
+--001a114dd3e8fe9c4a053d92f412--
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset="UTF-8"; name*=UTF-8''test%C3%B8.txt
+Content-Disposition: attachment; filename*=UTF-8''test%C3%B8.txt
+Content-Transfer-Encoding: 8bit
+
+This is the first text/plain part, in a subpart. The file name
+is testø.txt.
+
+--001a114dd3e8fe9c56053d92f414--
+EOF
+
+cat >`mhpath new` <<EOF
+To: recipient@example.com
+From: sender@example.com
+Date: Wed, 28 Sep 2016 11:24:28 -0400
+Subject: invalid header parameter encoding
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=001a114dd3e8fe9c56053d92f414
+
+--001a114dd3e8fe9c56053d92f414
+
+Content-Type: text/plain; charset=UTF-8
+
+This is a test.
+
+--001a114dd3e8fe9c4a053d92f412--
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset=UTF-8; name="=?UTF-8?B?dGVzdMO4LnR4dA==?="
+Content-Disposition: attachment; filename="=?UTF-8?B?dGVzdMO4LnR4dA==?="
+Content-Transfer-Encoding: 8bit
+
+This is the first text/plain part, in a subpart. The file name
+is testø.txt.
+
+--001a114dd3e8fe9c56053d92f414--
+EOF
+run_prog mhfixmsg -file - -outfile - <`mhpath last` >"$actual" 2>/dev/null
+check "$expected" "$actual"
+
+
# make sure there are no tmp files left over
find "$MH_TEST_DIR/Mail" \( -name 'mhfix*' -o -name ',mhfix*' \) -print \
>"$actual"
static int strip_crs (CT, int *);
static int convert_charsets (CT, char *, int *);
static int fix_always (CT, int *);
+static int fix_filename_param (char *, char *, PM *, PM *);
+static int fix_filename_encoding (CT);
static int write_content (CT, const char *, char *, int, int);
static void set_text_ctparams(CT, char *, int);
static int remove_file (const char *);
default: {
HF hf;
+ if (ct->c_first_hf) {
+ fix_filename_encoding (ct);
+ }
+
for (hf = ct->c_first_hf; hf; hf = hf->next) {
size_t len = strlen (hf->value);
}
+/*
+ * Factor out common code for loops in fix_filename_encoding().
+ */
+static int
+fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
+ size_t value_len;
+ int fixed = 0;
+
+ if (((value_len = strlen (value)) > 0) &&
+ strncmp (value, "=?", 2) == 0 &&
+ strncmp (&value[value_len - 2], "?=", 2) == 0) {
+ /* Looks like an RFC 2047 encoded parameter. */
+ char decoded[PATH_MAX + 1];
+
+ if (decode_rfc2047 (value, decoded, sizeof decoded)) {
+ /* Encode using RFC 2231. */
+ replace_param (first_pm, last_pm, name, decoded, 0);
+ fixed = 1;
+ } else {
+ advise (NULL, "failed to decode %s parameter %s", name, value);
+ }
+ }
+
+ return fixed;
+}
+
+
+/*
+ * Replace RFC 2047 encoding with RFC 2231 encoding of name and
+ * filename parameters in Content-Type and Content-Disposition
+ * headers, respectively.
+ */
+static int
+fix_filename_encoding (CT ct) {
+ PM pm;
+ HF hf;
+ int fixed = 0;
+
+ for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
+ if (pm->pm_name && pm->pm_value &&
+ strcasecmp (pm->pm_name, "name") == 0) {
+ fixed = fix_filename_param (pm->pm_name, pm->pm_value,
+ &ct->c_ctinfo.ci_first_pm,
+ &ct->c_ctinfo.ci_last_pm);
+ }
+ }
+
+ for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
+ if (pm->pm_name && pm->pm_value &&
+ strcasecmp (pm->pm_name, "filename") == 0) {
+ fixed = fix_filename_param (pm->pm_name, pm->pm_value,
+ &ct->c_dispo_first,
+ &ct->c_dispo_last);
+ }
+ }
+
+ /* Fix hf values to correspond. */
+ for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
+ enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
+
+ if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
+ field = TYPE_HEADER;
+ } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
+ field = DISPO_HEADER;
+ }
+
+ if (field != OTHER) {
+ const char *const semicolon_loc = strchr (hf->value, ';');
+
+ if (semicolon_loc) {
+ const size_t len =
+ strlen (hf->name) + 1 + semicolon_loc - hf->value;
+ const char *const params =
+ output_params (len,
+ field == TYPE_HEADER
+ ? ct->c_ctinfo.ci_first_pm
+ : ct->c_dispo_first,
+ NULL, 0);
+ const char *const new_params = concat (params, "\n", NULL);
+
+ replace_substring (&hf->value, semicolon_loc, new_params);
+ free ((char *) new_params);
+ free ((char *) params);
+ } else {
+ advise (NULL, "did not find semicolon in %s:%s\n",
+ hf->name, hf->value);
+ }
+ }
+ }
+
+ return OK;
+}
+
+
/*
* Output content in input file to output file.
*/
/*
- * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
- * It touches the parts the decodetypes identifies.
+ * parse_mime() does not set lf_line_endings in struct text, so use this
+ * function to do it. It touches the parts the decodetypes identifies.
*/
static void
set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
static int parse_header_attrs (const char *, const char *, char **, PM *,
PM *, char **);
static size_t param_len(PM, int, size_t, int *, int *, size_t *);
-static size_t encode_param(PM, char *, size_t, size_t, size_t, int);
static size_t normal_param(PM, char *, size_t, size_t, size_t);
static int get_dispo (char *, CT, int);
* Output an encoded parameter string.
*/
-static size_t
+size_t
encode_param(PM pm, char *output, size_t len, size_t valuelen,
size_t valueoff, int index)
{