mhfixmsg now replaces RFC 2047 encoding with RFC 2231 encoding of

author David Levine <levinedl@acm.org>

Sat, 1 Oct 2016 18:37:27 +0000 (14:37 -0400)

committer David Levine <levinedl@acm.org>

Sat, 1 Oct 2016 18:37:27 +0000 (14:37 -0400)
author David Levine <levinedl@acm.org>
Sat, 1 Oct 2016 18:37:27 +0000 (14:37 -0400)
committer David Levine <levinedl@acm.org>
Sat, 1 Oct 2016 18:37:27 +0000 (14:37 -0400)
diff --git a/docs/pending-release-notes b/docs/pending-release-notes

index f44eb066dbcd17dfbb1d9757a61301388b53082c..cb3f09d8482ad98e4508bcf2e1ef3476348c42a6 100644 (file)
--- a/docs/pending-release-notes
+++ b/docs/pending-release-notes
@@ -52,6 +52,10 @@ NEW FEATURES
  - Support for SMTPUTF8 (RFC 6531) has been added.  mhshow(1) already supported
    RFC 6532, assuming all 8-bit message header field bodies are UTF-8 and use
    of a UTF-8 locale.
+- mhfixmsg now replaces RFC 2047 encoding with RFC 2231 encoding of name and
+  filename parameters in Content-Type and Content-Disposition headers,
+  respectively.
+
  
  -----------------
  OBSOLETE FEATURES
diff --git a/h/mhparse.h b/h/mhparse.h

index 08dd77c11180d8cad08f6248647ffd289502fecb..c1564c9971982f95343e6ab7687eb2c8468dec95 100644 (file)
--- a/h/mhparse.h
+++ b/h/mhparse.h
@@ -427,6 +427,21 @@ int list_switch(CT ct, int toplevel, int realsize, int verbose, int debug,
  char *output_params(size_t initialwidth, PM params, int *offsetout,
                     int external);
  
+/*
+ * Encode a parameter value using RFC 2231 encode.
+ *
+ * Arguments are:
+ *
+ * pm           - PM containing the parameter value and related info.
+ * output       - Output buffer.
+ * len          - Size, in octets, of output buffer.
+ * valuelen     - Number of characters in the value
+ * valueoff     - Offset into value field (pm->pm_value).
+ * index        - If 0, output character set and language tag.
+ */
+size_t encode_param(PM pm, char *output, size_t len, size_t valuelen,
+                    size_t valueoff, int index);
+
  /*
   * Add a parameter to the parameter linked list.
   *
diff --git a/man/mhfixmsg.man b/man/mhfixmsg.man

index 38c322f9318ada0b1fecba20ef66f55a6e271f50..88c08a79ae08293d3a012d15e960cd2ea29337c4 100644 (file)
--- a/man/mhfixmsg.man
+++ b/man/mhfixmsg.man
@@ -1,4 +1,4 @@
-.TH MHFIXMSG %manext1% "September 22, 2016" "%nmhversion%"
+.TH MHFIXMSG %manext1% "October 1, 2016" "%nmhversion%"
  .\"
  .\" %nmhwarning%
  .\"
@@ -208,8 +208,12 @@ more descriptive MIME type.  It may not be used for multipart and
  message types.
  .PP
  .B mhfixmsg
-applies one transformation unconditionally:  it removes an extraneous
-trailing semicolon from the parameter lists of MIME header fields.
+applies two transformations unconditionally.
+The first removes an extraneous trailing semicolon from the parameter
+lists of MIME header field values.
+The second replaces RFC 2047 encoding with RFC 2231 encoding of name
+and filename parameters in Content-Type and Content-Disposition header
+field values, respectively.
  .PP
  The
  .B \-verbose
diff --git a/test/mhfixmsg/test-mhfixmsg b/test/mhfixmsg/test-mhfixmsg

index 152b11c7913c1ca4f58175d02c00ee664fe047a2..e5df143067875c1a5fcc832af00ac15c8fc74367 100755 (executable)
--- a/test/mhfixmsg/test-mhfixmsg
+++ b/test/mhfixmsg/test-mhfixmsg
@@ -586,7 +586,7 @@ MIME-Version: 1.0
  Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
  
  ------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset=UTF-8; name="nul+square.txt"
+Content-Type: text/plain; charset="UTF-8"; name="nul+square.txt"
  Content-Transfer-Encoding: base64
  
  vbI9vAAK
@@ -727,7 +727,7 @@ MIME-Version: 1.0
  Content-Type: multipart/mixed; boundary=\"----- =_aaaaaaaaaa0\"
  
  ------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset=UTF-8; name=\"nul+square.txt\"
+Content-Type: text/plain; charset=\"UTF-8\"; name=\"nul+square.txt\"
  Content-Transfer-Encoding: binary
  
  " "
@@ -1715,6 +1715,64 @@ set -e
  check `mhpath last` "$actual"
  
  
+start_test "fix RFC 2047 encoded header parameters"
+cat >"$expected" <<EOF
+To: recipient@example.com
+From: sender@example.com
+Date: Wed, 28 Sep 2016 11:24:28 -0400
+Subject: invalid header parameter encoding
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=001a114dd3e8fe9c56053d92f414
+
+--001a114dd3e8fe9c56053d92f414
+
+Content-Type: text/plain; charset=UTF-8
+
+This is a test.
+
+--001a114dd3e8fe9c4a053d92f412--
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset="UTF-8"; name*=UTF-8''test%C3%B8.txt
+Content-Disposition: attachment; filename*=UTF-8''test%C3%B8.txt
+Content-Transfer-Encoding: 8bit
+
+This is the first text/plain part, in a subpart.  The file name
+is testø.txt.
+
+--001a114dd3e8fe9c56053d92f414--
+EOF
+
+cat >`mhpath new` <<EOF
+To: recipient@example.com
+From: sender@example.com
+Date: Wed, 28 Sep 2016 11:24:28 -0400
+Subject: invalid header parameter encoding
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=001a114dd3e8fe9c56053d92f414
+
+--001a114dd3e8fe9c56053d92f414
+
+Content-Type: text/plain; charset=UTF-8
+
+This is a test.
+
+--001a114dd3e8fe9c4a053d92f412--
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset=UTF-8; name="=?UTF-8?B?dGVzdMO4LnR4dA==?="
+Content-Disposition: attachment; filename="=?UTF-8?B?dGVzdMO4LnR4dA==?="
+Content-Transfer-Encoding: 8bit
+
+This is the first text/plain part, in a subpart.  The file name
+is testø.txt.
+
+--001a114dd3e8fe9c56053d92f414--
+EOF
+run_prog mhfixmsg -file - -outfile - <`mhpath last` >"$actual" 2>/dev/null
+check "$expected" "$actual"
+
+
  # make sure there are no tmp files left over
  find "$MH_TEST_DIR/Mail" \( -name 'mhfix*' -o -name ',mhfix*' \) -print \
    >"$actual"
diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c

index 0fbff400794fe36463cb76c2047b8b4b96ed77a8..2bc65a9d2d1ffb79457563f57f367f0bb2959879 100644 (file)
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -119,6 +119,8 @@ static int content_encoding (CT, const char **);
  static int strip_crs (CT, int *);
  static int convert_charsets (CT, char *, int *);
  static int fix_always (CT, int *);
+static int fix_filename_param (char *, char *, PM *, PM *);
+static int fix_filename_encoding (CT);
  static int write_content (CT, const char *, char *, int, int);
  static void set_text_ctparams(CT, char *, int);
  static int remove_file (const char *);
@@ -2375,6 +2377,10 @@ fix_always (CT ct, int *message_mods) {
      default: {
          HF hf;
  
+        if (ct->c_first_hf) {
+            fix_filename_encoding (ct);
+        }
+
          for (hf = ct->c_first_hf; hf; hf = hf->next) {
              size_t len = strlen (hf->value);
  
@@ -2424,6 +2430,100 @@ fix_always (CT ct, int *message_mods) {
  }
  
  
+/*
+ * Factor out common code for loops in fix_filename_encoding().
+ */
+static int
+fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
+    size_t value_len;
+    int fixed = 0;
+
+    if (((value_len = strlen (value)) > 0)  &&
+        strncmp (value, "=?", 2) == 0  &&
+        strncmp (&value[value_len - 2], "?=", 2) == 0) {
+        /* Looks like an RFC 2047 encoded parameter. */
+        char decoded[PATH_MAX + 1];
+
+        if (decode_rfc2047 (value, decoded, sizeof decoded)) {
+            /* Encode using RFC 2231. */
+            replace_param (first_pm, last_pm, name, decoded, 0);
+            fixed = 1;
+        } else {
+            advise (NULL, "failed to decode %s parameter %s", name, value);
+        }
+    }
+
+    return fixed;
+}
+
+
+/*
+ * Replace RFC 2047 encoding with RFC 2231 encoding of name and
+ * filename parameters in Content-Type and Content-Disposition
+ * headers, respectively.
+ */
+static int
+fix_filename_encoding (CT ct) {
+    PM pm;
+    HF hf;
+    int fixed = 0;
+
+    for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
+        if (pm->pm_name  &&  pm->pm_value  &&
+            strcasecmp (pm->pm_name, "name") == 0) {
+            fixed = fix_filename_param (pm->pm_name, pm->pm_value,
+                                        &ct->c_ctinfo.ci_first_pm,
+                                        &ct->c_ctinfo.ci_last_pm);
+        }
+    }
+
+    for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
+        if (pm->pm_name  &&  pm->pm_value  &&
+            strcasecmp (pm->pm_name, "filename") == 0) {
+            fixed = fix_filename_param (pm->pm_name, pm->pm_value,
+                                        &ct->c_dispo_first,
+                                        &ct->c_dispo_last);
+        }
+    }
+
+    /* Fix hf values to correspond. */
+    for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
+        enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
+
+        if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
+            field = TYPE_HEADER;
+        } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
+            field = DISPO_HEADER;
+        }
+
+        if (field != OTHER) {
+            const char *const semicolon_loc = strchr (hf->value, ';');
+
+            if (semicolon_loc) {
+                const size_t len =
+                    strlen (hf->name) + 1 + semicolon_loc - hf->value;
+                const char *const params =
+                    output_params (len,
+                                   field == TYPE_HEADER
+                                   ? ct->c_ctinfo.ci_first_pm
+                                   : ct->c_dispo_first,
+                                   NULL, 0);
+                const char *const new_params = concat (params, "\n", NULL);
+
+                replace_substring (&hf->value, semicolon_loc, new_params);
+                free ((char *) new_params);
+                free ((char *) params);
+            } else {
+                advise (NULL, "did not find semicolon in %s:%s\n",
+                        hf->name, hf->value);
+            }
+        }
+    }
+
+    return OK;
+}
+
+
  /*
   * Output content in input file to output file.
   */
@@ -2502,8 +2602,8 @@ write_content (CT ct, const char *input_filename, char *outfile, int modify_inpl
  
  
  /*
- * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
- * It touches the parts the decodetypes identifies.
+ * parse_mime() does not set lf_line_endings in struct text, so use this
+ * function to do it.  It touches the parts the decodetypes identifies.
   */
  static void
  set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
diff --git a/uip/mhparse.c b/uip/mhparse.c

index 7ec030badd20f0e5c745e300a47662e9f3d84838..40fe61d5f8fe69c95438cd145abc481a6b576044 100644 (file)
--- a/uip/mhparse.c
+++ b/uip/mhparse.c
@@ -151,7 +151,6 @@ static int openURL (CT, char **);
  static int parse_header_attrs (const char *, const char *, char **, PM *,
                                PM *, char **);
  static size_t param_len(PM, int, size_t, int *, int *, size_t *);
-static size_t encode_param(PM, char *, size_t, size_t, size_t, int);
  static size_t normal_param(PM, char *, size_t, size_t, size_t);
  static int get_dispo (char *, CT, int);
  
@@ -3981,7 +3980,7 @@ param_len(PM pm, int index, size_t valueoff, int *encode, int *cont,
   * Output an encoded parameter string.
   */
  
-static size_t
+size_t
  encode_param(PM pm, char *output, size_t len, size_t valuelen,
               size_t valueoff, int index)
  {
author	David Levine <levinedl@acm.org>
	Sat, 1 Oct 2016 18:37:27 +0000 (14:37 -0400)
committer	David Levine <levinedl@acm.org>
	Sat, 1 Oct 2016 18:37:27 +0000 (14:37 -0400)
docs/pending-release-notes		patch \| blob \| history
h/mhparse.h		patch \| blob \| history
man/mhfixmsg.man		patch \| blob \| history
test/mhfixmsg/test-mhfixmsg		patch \| blob \| history
uip/mhfixmsg.c		patch \| blob \| history
uip/mhparse.c		patch \| blob \| history