X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/e2eebc77cc570f77d906bc0cc6890092068468cb..f1920d78123667716f2321d37ce37628603b2700:/uip/mhfixmsg.c

diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c
index af6cd889..65abd513 100644
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -16,6 +16,9 @@
 #define MHFIXMSG_SWITCHES \
     X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
     X("nodecodetext", 0, NDECODETEXTSW) \
+    X("decodetypes", 0, DECODETYPESW) \
+    X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
+    X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
     X("textcharset", 0, TEXTCHARSETSW) \
     X("notextcharset", 0, NTEXTCHARSETSW) \
     X("reformat", 0, REFORMATSW) \
@@ -80,6 +83,9 @@ typedef struct fix_transformations {
     int reformat;
     int replacetextplain;
     int decodetext;
+    char *decodetypes;
+    /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
+    int lf_line_endings;
     char *textcharset;
 } fix_transformations;
 
@@ -89,12 +95,14 @@ static int get_multipart_boundary (CT, char **);
 static int replace_boundary (CT, char *, char *);
 static int fix_types (CT, svector_t, int *);
 static char *replace_substring (char **, const char *, const char *);
+static char *remove_parameter (char *, const char *);
 static int fix_multipart_cte (CT, int *);
 static int set_ce (CT, int);
 static int ensure_text_plain (CT *, CT, int *, int);
 static int find_textplain_sibling (CT, int, int *);
 static int insert_new_text_plain_part (CT, int, CT);
 static CT build_text_plain_part (CT);
+static int insert_into_new_mp_alt (CT *, int *);
 static CT divide_part (CT);
 static void copy_ctinfo (CI, CI);
 static int decode_part (CT);
@@ -104,12 +112,14 @@ static CT build_multipart_alt (CT, CT, int, int);
 static int boundary_in_content (FILE **, char *, const char *);
 static void transfer_noncontent_headers (CT, CT);
 static int set_ct_type (CT, int type, int subtype, int encoding);
-static int decode_text_parts (CT, int, int *);
+static int decode_text_parts (CT, int, const char *, int *);
+static int should_decode(const char *, const char *, const char *);
 static int content_encoding (CT, const char **);
 static int strip_crs (CT, int *);
 static int convert_charsets (CT, char *, int *);
 static int fix_always (CT, int *);
 static int write_content (CT, char *, char *, int, int);
+static void set_text_ctparams(CT, char *, int);
 static int remove_file (char *);
 static void report (char *, char *, char *, char *, ...);
 static void pipeser (int);
@@ -133,6 +143,8 @@ main (int argc, char **argv) {
     fx.fixtypes = NULL;
     fx.replacetextplain = 0;
     fx.decodetext = CE_8BIT;
+    fx.decodetypes = "text,application/ics";  /* Default, per man page. */
+    fx.lf_line_endings = 0;
     fx.textcharset = NULL;
 
     if (nmh_init(argv[0], 1)) { return 1; }
@@ -177,6 +189,17 @@ main (int argc, char **argv) {
             case NDECODETEXTSW:
                 fx.decodetext = 0;
                 continue;
+            case DECODETYPESW:
+                if (! (cp = *argp++)  ||  *cp == '-')
+                    adios (NULL, "missing argument to %s", argp[-2]);
+                fx.decodetypes = cp;
+                continue;
+            case CRLFLINEBREAKSSW:
+                fx.lf_line_endings = 0;
+                continue;
+            case NCRLFLINEBREAKSSW:
+                fx.lf_line_endings = 1;
+                continue;
             case TEXTCHARSETSW:
                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
                     adios (NULL, "missing argument to %s", argp[-2]);
@@ -323,7 +346,10 @@ main (int argc, char **argv) {
         }
         ctp = cts;
 
-        if ((ct = parse_mime (file))) { *ctp++ = ct; }
+        if ((ct = parse_mime (file))) {
+            set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
+            *ctp++ = ct;
+        }
     } else {
         /*
          * message(s) are coming from a folder
@@ -364,7 +390,10 @@ main (int argc, char **argv) {
                 char *msgnam;
 
                 msgnam = m_name (msgnum);
-                if ((ct = parse_mime (msgnam))) { *ctp++ = ct; }
+                if ((ct = parse_mime (msgnam))) {
+                    set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
+                    *ctp++ = ct;
+                }
             }
         }
 
@@ -443,7 +472,7 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
             ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
     }
     if (status == OK  &&  fx->decodetext) {
-        status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
+        status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
     }
     if (status == OK  &&  fx->textcharset != NULL) {
         status = convert_charsets (*ctp, fx->textcharset, &message_mods);
@@ -554,7 +583,16 @@ fix_boundary (CT *ct, int *message_mods) {
                 }
 
                 free (part_boundary);
+            } else {
+                /* Couldn't fix the boundary.  Report failure so that mhfixmsg
+                   doesn't modify the message. */
+                status = NOTOK;
             }
+        } else {
+            /* No multipart struct, even though the content type is
+               CT_MULTIPART.  Report failure so that mhfixmsg doesn't modify
+               the message. */
+            status = NOTOK;
         }
     }
 
@@ -885,6 +923,51 @@ replace_substring (char **str, const char *old, const char *new) {
     }
 }
 
+/*
+ * Remove a name=value parameter, given just its name, from a header value.
+ */
+char *
+remove_parameter (char *str, const char *name) {
+    /* It looks to me, based on the BNF in RFC 2045, than there can't
+       be whitespace betwwen the parameter name and the "=", or
+       between the "=" and the parameter value. */
+    char *param_name = concat (name, "=", NULL);
+    char *cp;
+
+    if ((cp = strstr (str, param_name))) {
+        char *start, *end;
+        size_t count = 1;
+
+        /* Remove any leading spaces, before the parameter name. */
+        for (start = cp;
+             start > str && isspace ((unsigned char) *(start-1));
+             --start) {
+            continue;
+        }
+        /* Remove a leading semicolon. */
+        if (start > str  &&  *(start-1) == ';') { --start; }
+
+        end = cp + strlen (name) + 1;
+        if (*end == '"') {
+            /* Skip past the quoted value, and then the final quote. */
+            for (++end ; *end  &&  *end != '"'; ++end) { continue; }
+            ++end;
+        } else {
+            /* Skip past the value. */
+            for (++end ; *end  &&  ! isspace ((unsigned char) *end); ++end) {}
+        }
+
+        /* Count how many characters need to be moved.  Include
+           trailing null, which is accounted for by the
+           initialization of count to 1. */
+        for (cp = end; *cp; ++cp) { ++count; }
+        (void) memmove (start, end, count);
+    }
+
+    free (param_name);
+
+    return str;
+}
 
 static int
 fix_multipart_cte (CT ct, int *message_mods) {
@@ -1063,51 +1146,72 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
             free (type_subtype);
 
             if (! has_text_plain) {
-                /* Parent is a multipart/alternative.  Insert a new
-                   text/plain subpart. */
-                const int inserted =
-                    insert_new_text_plain_part (*ct, new_subpart_number,
-                                                parent);
-                if (inserted) {
-                    ++*message_mods;
-                    if (verbosw) {
-                        report (NULL, parent->c_partno, parent->c_file,
-                                "insert text/plain part");
+                struct multipart *mp = (struct multipart *) parent->c_ctparams;
+                struct part *part;
+                int siblings = 0;
+
+                for (part = mp->mp_parts; part; part = part->mp_next) {
+                    if (*ct != part->mp_part) {
+                        ++siblings;
                     }
-                } else {
-                    status = NOTOK;
                 }
-            }
-        } else {
-            /* Slip new text/plain part into a new multipart/alternative. */
-            CT tp_part = build_text_plain_part (*ct);
 
-            if (tp_part) {
-                CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
-                                                 MULTI_ALTERNATE);
-                if (mp_alt) {
-                    struct multipart *mp =
-                        (struct multipart *) mp_alt->c_ctparams;
+                if (siblings) {
+                    /* Parent is a multipart/related.  Insert a new
+                       text/plain subpart in a new multipart/alternative. */
+                    if (insert_into_new_mp_alt (ct, message_mods)) {
+                        /* Not an error if text/plain couldn't be added. */
+                    }
+                } else {
+                    /* There are no siblings, so insert a new text/plain
+                       subpart, and change the parent type from
+                       multipart/related to multipart/alternative. */
+                    const int inserted =
+                        insert_new_text_plain_part (*ct, new_subpart_number,
+                                                    parent);
+
+                    if (inserted) {
+                        HF hf;
 
-                    if (mp  &&  mp->mp_parts) {
-                        mp->mp_parts->mp_part = tp_part;
-                        /* Make the new multipart/alternative the parent. */
-                        *ct = mp_alt;
+                        parent->c_subtype = MULTI_ALTERNATE;
+                        parent->c_ctinfo.ci_subtype = getcpy ("alternative");
+                        if (! replace_substring (&parent->c_ctline, "/related",
+                                                 "/alternative")) {
+                            advise (NULL,
+                                    "did not find multipart/related in %s",
+                                    parent->c_ctline);
+                        }
 
-                        ++*message_mods;
-                        if (verbosw) {
-                            report (NULL, (*ct)->c_partno, (*ct)->c_file,
-                                    "insert text/plain part");
+                        /* Update Content-Type header field. */
+                        for (hf = parent->c_first_hf; hf; hf = hf->next) {
+                            if (! strcasecmp (TYPE_FIELD, hf->name)) {
+                                if (replace_substring (&hf->value, "/related",
+                                                       "/alternative")) {
+                                    ++*message_mods;
+                                    if (verbosw) {
+                                        report (NULL, parent->c_partno,
+                                                parent->c_file,
+                                                "insert text/plain part");
+                                    }
+
+                                    /* Remove, e.g., type="text/html" from
+                                       multipart/alternative. */
+                                    remove_parameter (hf->value, "type");
+                                    break;
+                                } else {
+                                    advise (NULL, "did not find multipart/"
+                                                  "related in header %s",
+                                            hf->value);
+                                }
+                            }
                         }
                     } else {
-                        free_content (tp_part);
-                        free_content (mp_alt);
-                        status = NOTOK;
+                        /* Not an error if text/plain couldn't be inserted. */
                     }
-                } else {
-                    status = NOTOK;
                 }
-            } else {
+            }
+        } else {
+            if (insert_into_new_mp_alt (ct, message_mods)) {
                 status = NOTOK;
             }
         }
@@ -1240,6 +1344,43 @@ build_text_plain_part (CT encoded_part) {
 }
 
 
+/* Slip new text/plain part into a new multipart/alternative. */
+static int
+insert_into_new_mp_alt (CT *ct, int *message_mods) {
+    CT tp_part = build_text_plain_part (*ct);
+    int status = OK;
+
+    if (tp_part) {
+        CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
+                                         MULTI_ALTERNATE);
+        if (mp_alt) {
+            struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
+
+            if (mp  &&  mp->mp_parts) {
+                mp->mp_parts->mp_part = tp_part;
+                /* Make the new multipart/alternative the parent. */
+                *ct = mp_alt;
+
+                ++*message_mods;
+                if (verbosw) {
+                    report (NULL, (*ct)->c_partno, (*ct)->c_file,
+                            "insert text/plain part");
+                }
+            } else {
+                free_content (tp_part);
+                free_content (mp_alt);
+                status = NOTOK;
+            }
+        } else {
+            status = NOTOK;
+        }
+    } else {
+        /* Not an error if text/plain couldn't be built. */
+    }
+
+    return status;
+}
+
 static CT
 divide_part (CT ct) {
     CT new_part;
@@ -1642,11 +1783,39 @@ set_ct_type (CT ct, int type, int subtype, int encoding) {
 
 
 static int
-decode_text_parts (CT ct, int encoding, int *message_mods) {
+decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
     int status = OK;
+    int lf_line_endings = 0;
 
     switch (ct->c_type) {
-    case CT_TEXT:
+    case CT_MULTIPART: {
+        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct part *part;
+
+        /* Should check to see if the body for this part is encoded?
+           For now, it gets passed along as-is by InitMultiPart(). */
+        for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
+            status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
+        }
+        break;
+    }
+
+    case CT_MESSAGE:
+        if (ct->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+            status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
+        }
+        break;
+
+    default:
+        if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
+            break;
+        }
+
+        lf_line_endings =
+            ct->c_ctparams  &&  ((struct text *) ct->c_ctparams)->lf_line_endings;
+
         switch (ct->c_encoding) {
         case CE_BASE64:
         case CE_QUOTED: {
@@ -1698,7 +1867,9 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
                             report (NULL, ct->c_partno, ct->c_file, "decode%s",
                                     ct->c_ctline ? ct->c_ctline : "");
                         }
-                        strip_crs (ct, message_mods);
+                        if (lf_line_endings) {
+                            strip_crs (ct, message_mods);
+                        }
                     } else {
                         status = NOTOK;
                     }
@@ -1710,39 +1881,49 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
         }
         case CE_8BIT:
         case CE_7BIT:
-            strip_crs (ct, message_mods);
+            if (lf_line_endings) {
+                strip_crs (ct, message_mods);
+            }
             break;
         default:
             break;
         }
 
         break;
-
-    case CT_MULTIPART: {
-        struct multipart *m = (struct multipart *) ct->c_ctparams;
-        struct part *part;
-
-        /* Should check to see if the body for this part is encoded?
-           For now, it gets passed along as-is by InitMultiPart(). */
-        for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
-            status = decode_text_parts (part->mp_part, encoding, message_mods);
-        }
-        break;
     }
 
-    case CT_MESSAGE:
-        if (ct->c_subtype == MESSAGE_EXTERNAL) {
-            struct exbody *e = (struct exbody *) ct->c_ctparams;
+    return status;
+}
 
-            status = decode_text_parts (e->eb_content, encoding, message_mods);
-        }
-        break;
 
-    default:
-        break;
+/* Determine if the part with type[/subtype] should be decoded, according to
+   decodetypes (which came from the -decodetypes switch). */
+static int
+should_decode(const char *decodetypes, const char *type, const char *subtype) {
+    /* Quick search for matching type[/subtype] in decodetypes:  bracket
+       decodetypes with commas, then search for ,type, and ,type/subtype, in
+       it. */
+
+    int found_match = 0;
+    char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
+    char *delimited_type = concat(",", type, ",", NULL);
+
+    if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
+        found_match = 1;
+    } else if (subtype != NULL) {
+        char *delimited_type_subtype =
+            concat(",", type, "/", subtype, ",", NULL);
+
+        if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
+            found_match = 1;
+        }
+        free(delimited_type_subtype);
     }
 
-    return status;
+    free(delimited_type);
+    free(delimited_decodetypes);
+
+    return found_match;
 }
 
 
@@ -2167,6 +2348,44 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
 }
 
 
+/*
+ * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
+ * It touches the parts the decodetypes identifies.
+ */
+static void
+set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
+    switch (ct->c_type) {
+    case CT_MULTIPART: {
+        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct part *part;
+
+        for (part = m->mp_parts; part; part = part->mp_next) {
+            set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
+        }
+        break;
+    }
+
+    case CT_MESSAGE:
+        if (ct->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+            set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
+        }
+        break;
+
+    default:
+        if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
+            if (ct->c_ctparams == NULL) {
+                if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
+                    adios (NULL, "out of memory");
+                }
+            }
+            ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
+        }
+    }
+}
+
+
 /*
  * If "rmmproc" is defined, call that to remove the file.  Otherwise,
  * use the standard MH backup file.