A better fix than e87f37c27828723317a71291e31b34f39ec09098, because

[nmh] / uip / mhfixmsg.c
diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c

index 55db2c0b0f55de587bd93d60460d8874c963f186..6195b316501327999bc74f083ab6c82aeb22d76c 100644 (file)
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -108,7 +108,6 @@ static CT divide_part (CT);
  static void copy_ctinfo (CI, CI);
  static int decode_part (CT);
  static int reformat_part (CT, char *, char *, char *, int);
-static int charset_encoding (CT);
  static CT build_multipart_alt (CT, CT, int, int);
  static int boundary_in_content (FILE **, char *, const char *);
  static void transfer_noncontent_headers (CT, CT);
@@ -117,6 +116,9 @@ static int decode_text_parts (CT, int, const char *, int *);
  static int should_decode(const char *, const char *, const char *);
  static int content_encoding (CT, const char **);
  static int strip_crs (CT, int *);
+static void update_cte (CT);
+static int least_restrictive_encoding (CT);
+static int less_restrictive (int, int);
  static int convert_charsets (CT, char *, int *);
  static int fix_always (CT, int *);
  static int fix_filename_param (char *, char *, PM *, PM *);
@@ -150,7 +152,7 @@ main (int argc, char **argv) {
      fx.lf_line_endings = 0;
      fx.textcharset = NULL;
  
-    if (nmh_init(argv[0], 1)) { return 1; }
+    if (nmh_init(argv[0], 2)) { return 1; }
  
      done = freects_done;
  
@@ -526,7 +528,9 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
              ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
      }
      if (status == OK  &&  fx->decodetext) {
-        status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
+        status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
+                                    &message_mods);
+        update_cte (*ctp);
      }
      if (status == OK  &&  fx->textcharset != NULL) {
          status = convert_charsets (*ctp, fx->textcharset, &message_mods);
@@ -1156,6 +1160,10 @@ set_ce (CT ct, int encoding) {
             ct->c_cefile.ce_file to the name of the file containing
             the contents. */
  
+        if (ct->c_ceclosefnx) {
+            (*ct->c_ceclosefnx) (ct);
+        }
+
          /* Restore the cefile. */
          ct->c_cefile = decoded_content_info;
  
@@ -1581,6 +1589,7 @@ decode_part (CT ct) {
  static int
  reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
      int output_subtype, output_encoding;
+    const char *reason = NULL;
      char *cp, *cf;
      int status;
  
@@ -1626,8 +1635,8 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
          /* Set subtype to 0, which is always an UNKNOWN subtype. */
          output_subtype = 0;
      }
-    output_encoding = charset_encoding (ct);
  
+    output_encoding = content_encoding (ct, &reason);
      if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
          ct->c_cefile.ce_file = file;
          ct->c_cefile.ce_unlink = 1;
@@ -1640,20 +1649,6 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
  }
  
  
-/*
- * Identifies 7bit or 8bit content based on charset.
- */
-static int
-charset_encoding (CT ct) {
-    char *ct_charset = content_charset (ct);
-    int encoding = strcasecmp (ct_charset, "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
-
-    free (ct_charset);
-
-    return encoding;
-}
-
-
  /*
   * Fill in a multipart/alternative part.
   */
@@ -1925,7 +1920,8 @@ set_ct_type (CT ct, int type, int subtype, int encoding) {
   * that character set again after decoding."
   */
  static int
-decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
+decode_text_parts (CT ct, int encoding, const char *decodetypes,
+                   int *message_mods) {
      int status = OK;
      int lf_line_endings = 0;
  
@@ -1937,7 +1933,8 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo
          /* Should check to see if the body for this part is encoded?
             For now, it gets passed along as-is by InitMultiPart(). */
          for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
-            status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
+            status = decode_text_parts (part->mp_part, encoding, decodetypes,
+                                        message_mods);
          }
          break;
      }
@@ -1946,7 +1943,8 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo
          if (ct->c_subtype == MESSAGE_EXTERNAL) {
              struct exbody *e = (struct exbody *) ct->c_ctparams;
  
-            status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
+            status = decode_text_parts (e->eb_content, encoding, decodetypes,
+                                        message_mods);
          }
          break;
  
@@ -1997,12 +1995,13 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo
                      ct->c_cefile.ce_file = NULL;
                  } else {
                      int enc;
+
                      if (ct_encoding == CE_BINARY) {
                          enc = CE_BINARY;
                      } else if (ct_encoding == CE_8BIT  &&  encoding == CE_7BIT) {
                          enc = CE_QUOTED;
                      } else {
-                        enc = charset_encoding (ct);
+                        enc = ct_encoding;
                      }
                      if (set_ce (ct, enc) == OK) {
                          ++*message_mods;
@@ -2287,6 +2286,103 @@ strip_crs (CT ct, int *message_mods) {
  }
  
  
+/*
+ * Add/update, if necessary, the message C-T-E, based on the least restrictive
+ * of the part C-T-E's.
+ */
+static void
+update_cte (CT ct) {
+    const int least_restrictive_enc = least_restrictive_encoding (ct);
+
+    if (least_restrictive_enc != CE_UNKNOWN  &&
+        least_restrictive_enc != CE_7BIT) {
+        char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
+        HF hf;
+        int found_cte = 0;
+
+        /* Update/add Content-Transfer-Encoding header field. */
+        for (hf = ct->c_first_hf; hf; hf = hf->next) {
+            if (! strcasecmp (ENCODING_FIELD, hf->name)) {
+                found_cte = 1;
+                free (hf->value);
+                hf->value = cte;
+            }
+        }
+        if (! found_cte) {
+            add_header (ct, add (ENCODING_FIELD, NULL), cte);
+        }
+    }
+}
+
+
+/*
+ * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
+ * within a message.
+ */
+static int
+least_restrictive_encoding (CT ct) {
+    int encoding = CE_UNKNOWN;
+
+    switch (ct->c_type) {
+    case CT_MULTIPART: {
+        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct part *part;
+
+        for (part = m->mp_parts; part; part = part->mp_next) {
+            const int part_encoding =
+                least_restrictive_encoding (part->mp_part);
+
+            if (less_restrictive (encoding, part_encoding)) {
+                encoding = part_encoding;
+            }
+        }
+        break;
+    }
+
+    case CT_MESSAGE:
+        if (ct->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) ct->c_ctparams;
+            const int part_encoding =
+                least_restrictive_encoding (e->eb_content);
+
+            if (less_restrictive (encoding, part_encoding)) {
+                encoding = part_encoding;
+            }
+        }
+        break;
+
+    default: {
+        if (less_restrictive (encoding, ct->c_encoding)) {
+            encoding = ct->c_encoding;
+        }
+    }}
+
+    return encoding;
+}
+
+
+/*
+ * Return whether the second encoding is less restrictive than the first, where
+ * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4.  So,
+ *   CE_BINARY is less restrictive than CE_8BIT and
+ *   CE_8BIT is less restrictive than CE_7BIT.
+ */
+static int
+less_restrictive (int encoding, int second_encoding) {
+    switch (second_encoding) {
+    case CE_BINARY:
+        return encoding != CE_BINARY;
+    case CE_8BIT:
+        return encoding != CE_BINARY  &&  encoding != CE_8BIT;
+    case CE_7BIT:
+        return encoding != CE_BINARY  &&  encoding != CE_8BIT  &&
+            encoding != CE_7BIT;
+    default :
+        return 0;
+    }
+}
+
+
  /*
   * Convert character set of each part.
   */