]> diplodocus.org Git - nmh/blobdiff - uip/mhfixmsg.c
pending-release-notes: add mhshow's "-prefer", and mh-format's %(kibi/kilo)
[nmh] / uip / mhfixmsg.c
index 31b3efe2d5c8254b192d7a586c7b6c2651b0c70d..af6cd8894cb1ea6b12a2b4b45ff7d568e830965f 100644 (file)
@@ -26,6 +26,7 @@
     X("nofixboundary", 0, NFIXBOUNDARYSW) \
     X("fixcte", 0, FIXCTESW) \
     X("nofixcte", 0, NFIXCTESW) \
+    X("fixtype mimetype", 0, FIXTYPESW) \
     X("file file", 0, FILESW) \
     X("outfile file", 0, OUTFILESW) \
     X("rmmproc program", 0, RPROCSW) \
@@ -55,6 +56,9 @@ int debugsw; /* Needed by mhparse.c. */
 extern int skip_mp_cte_check;                 /* flag to InitMultiPart */
 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
 extern int bogus_mp_content;                  /* flag from InitMultiPart */
+/* flags to/from parse_header_attrs */
+extern int suppress_extraneous_trailing_semicolon_warning;
+extern int extraneous_trailing_semicolon;
 
 /* mhoutsbr.c */
 int output_message (CT, char *);
@@ -72,6 +76,7 @@ void freects_done (int) NORETURN;
 typedef struct fix_transformations {
     int fixboundary;
     int fixcte;
+    svector_t fixtypes;
     int reformat;
     int replacetextplain;
     int decodetext;
@@ -82,9 +87,13 @@ int mhfixmsgsbr (CT *, const fix_transformations *, char *);
 static int fix_boundary (CT *, int *);
 static int get_multipart_boundary (CT, char **);
 static int replace_boundary (CT, char *, char *);
+static int fix_types (CT, svector_t, int *);
+static char *replace_substring (char **, const char *, const char *);
 static int fix_multipart_cte (CT, int *);
 static int set_ce (CT, int);
 static int ensure_text_plain (CT *, CT, int *, int);
+static int find_textplain_sibling (CT, int, int *);
+static int insert_new_text_plain_part (CT, int, CT);
 static CT build_text_plain_part (CT);
 static CT divide_part (CT);
 static void copy_ctinfo (CI, CI);
@@ -99,6 +108,7 @@ static int decode_text_parts (CT, int, int *);
 static int content_encoding (CT, const char **);
 static int strip_crs (CT, int *);
 static int convert_charsets (CT, char *, int *);
+static int fix_always (CT, int *);
 static int write_content (CT, char *, char *, int, int);
 static int remove_file (char *);
 static void report (char *, char *, char *, char *, ...);
@@ -120,6 +130,7 @@ main (int argc, char **argv) {
     int status = OK;
     fix_transformations fx;
     fx.reformat = fx.fixcte = fx.fixboundary = 1;
+    fx.fixtypes = NULL;
     fx.replacetextplain = 0;
     fx.decodetext = CE_8BIT;
     fx.textcharset = NULL;
@@ -186,6 +197,18 @@ main (int argc, char **argv) {
             case NFIXCTESW:
                 fx.fixcte = 0;
                 continue;
+            case FIXTYPESW:
+                if (! (cp = *argp++) || (*cp == '-' && cp[1]))
+                    adios (NULL, "missing argument to %s", argp[-2]);
+                if (! strncasecmp (cp, "multipart/", 10)  ||
+                    ! strncasecmp (cp, "message/", 8)) {
+                    adios (NULL, "-fixtype %s not allowed", cp);
+                } else if (! strchr (cp, '/')) {
+                    adios (NULL, "-fixtype requires type/subtype");
+                }
+                if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
+                svector_push_back (fx.fixtypes, cp);
+                continue;
             case REFORMATSW:
                 fx.reformat = 1;
                 continue;
@@ -215,12 +238,12 @@ main (int argc, char **argv) {
             case NRPRCSW:
                 rmmproc = NULL;
                 continue;
-           case CHGSW:
-               chgflag = 1;
-               continue;
-           case NCHGSW:
-               chgflag = 0;
-               continue;
+            case CHGSW:
+                chgflag = 1;
+                continue;
+            case NCHGSW:
+                chgflag = 0;
+                continue;
             case VERBSW:
                 verbosw = 1;
                 continue;
@@ -256,6 +279,7 @@ main (int argc, char **argv) {
     }
 
     suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
+    suppress_extraneous_trailing_semicolon_warning = 1;
 
     if (! context_find ("path"))
         free (path ("./", TFOLDER));
@@ -369,6 +393,7 @@ main (int argc, char **argv) {
         status = 1;
     }
 
+    if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
     free (outfile);
     free (file);
 
@@ -403,9 +428,13 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
     }
 
     reverse_alternative_parts (*ctp);
+    status = fix_always (*ctp, &message_mods);
     if (status == OK  &&  fx->fixboundary) {
         status = fix_boundary (ctp, &message_mods);
     }
+    if (status == OK  && fx->fixtypes != NULL) {
+        status = fix_types (*ctp, fx->fixtypes, &message_mods);
+    }
     if (status == OK  &&  fx->fixcte) {
         status = fix_multipart_cte (*ctp, &message_mods);
     }
@@ -719,6 +748,144 @@ replace_boundary (CT ct, char *file, char *boundary) {
 }
 
 
+static int
+fix_types (CT ct, svector_t fixtypes, int *message_mods) {
+    int status = OK;
+
+    switch (ct->c_type) {
+    case CT_MULTIPART: {
+        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct part *part;
+
+        for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
+            status = fix_types (part->mp_part, fixtypes, message_mods);
+        }
+        break;
+    }
+
+    case CT_MESSAGE:
+        if (ct->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+            status = fix_types (e->eb_content, fixtypes, message_mods);
+        }
+        break;
+
+    default: {
+        char **typep, *type;
+
+        if (ct->c_ctinfo.ci_type  &&  ct->c_ctinfo.ci_subtype) {
+            for (typep = svector_strs (fixtypes);
+                 typep && (type = *typep);
+                 ++typep) {
+                char *type_subtype =
+                    concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
+                            NULL);
+
+                if (! strcasecmp (type, type_subtype)  &&
+                    decode_part (ct) == OK  &&
+                    ct->c_cefile.ce_file != NULL) {
+                    char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
+                    char *cp;
+
+                    if ((cp = strchr (ct_type_subtype, ';'))) {
+                        /* Truncate to remove any parameter list from
+                           mime_type () result. */
+                        *cp = '\0';
+                    }
+
+                    if (strcasecmp (type, ct_type_subtype)) {
+                        char *ct_type, *ct_subtype;
+                        HF hf;
+
+                        /* The Content-Type header does not match the
+                           content, so update these struct Content
+                           fields to match:
+                           * c_type, c_subtype
+                           * c_ctinfo.ci_type, c_ctinfo.ci_subtype
+                           * c_ctline
+                           */
+                        /* Extract type and subtype from type/subtype. */
+                        ct_type = getcpy (ct_type_subtype);
+                        if ((cp = strchr (ct_type, '/'))) {
+                            *cp = '\0';
+                            ct_subtype = getcpy (++cp);
+                        } else {
+                            advise (NULL, "missing / in MIME type of %s %s",
+                                    ct->c_file, ct->c_partno);
+                            free (ct_type);
+                            return NOTOK;
+                        }
+
+                        ct->c_type = ct_str_type (ct_type);
+                        ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
+
+                        free (ct->c_ctinfo.ci_type);
+                        ct->c_ctinfo.ci_type = ct_type;
+                        free (ct->c_ctinfo.ci_subtype);
+                        ct->c_ctinfo.ci_subtype = ct_subtype;
+                        if (! replace_substring (&ct->c_ctline, type,
+                                                 ct_type_subtype)) {
+                            advise (NULL, "did not find %s in %s",
+                                    type, ct->c_ctline);
+                        }
+
+                        /* Update Content-Type header field. */
+                        for (hf = ct->c_first_hf; hf; hf = hf->next) {
+                            if (! strcasecmp (TYPE_FIELD, hf->name)) {
+                                if (replace_substring (&hf->value, type,
+                                                       ct_type_subtype)) {
+                                    ++*message_mods;
+                                    if (verbosw) {
+                                        report (NULL, ct->c_partno, ct->c_file,
+                                                "change Content-Type in header "
+                                                "from %s to %s",
+                                                type, ct_type_subtype);
+                                    }
+                                    break;
+                                } else {
+                                    advise (NULL, "did not find %s in %s",
+                                            type, hf->value);
+                                }
+                            }
+                        }
+                    }
+                    free (ct_type_subtype);
+                }
+                free (type_subtype);
+            }
+        }
+    }}
+
+    return status;
+}
+
+char *
+replace_substring (char **str, const char *old, const char *new) {
+    char *cp;
+
+    if ((cp = strstr (*str, old))) {
+        char *remainder = cp + strlen (old);
+        char *prefix, *new_str;
+
+        if (cp - *str) {
+            prefix = getcpy (*str);
+            *(prefix + (cp - *str)) = '\0';
+            new_str = concat (prefix, new, remainder, NULL);
+            free (prefix);
+        } else {
+            new_str = concat (new, remainder, NULL);
+        }
+
+        free (*str);
+
+        return *str = new_str;
+    } else {
+        return NULL;
+    }
+}
+
+
 static int
 fix_multipart_cte (CT ct, int *message_mods) {
     int status = OK;
@@ -842,69 +1009,72 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
 
     switch ((*ct)->c_type) {
     case CT_TEXT: {
-        int has_text_plain = 0;
-
         /* Nothing to do for text/plain. */
         if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
 
         if (parent  &&  parent->c_type == CT_MULTIPART  &&
             parent->c_subtype == MULTI_ALTERNATE) {
-            struct multipart *mp = (struct multipart *) parent->c_ctparams;
-            struct part *part, *prev;
             int new_subpart_number = 1;
+            int has_text_plain =
+                find_textplain_sibling (parent, replacetextplain,
+                                        &new_subpart_number);
 
-            /* See if there is a sibling text/plain. */
-            for (prev = part = mp->mp_parts; part; part = part->mp_next) {
-                ++new_subpart_number;
-                if (part->mp_part->c_type == CT_TEXT  &&
-                    part->mp_part->c_subtype == TEXT_PLAIN) {
-                    if (replacetextplain) {
-                        struct part *old_part;
-                        if (part == mp->mp_parts) {
-                            old_part = mp->mp_parts;
-                            mp->mp_parts = part->mp_next;
-                        } else {
-                            old_part = prev->mp_next;
-                            prev->mp_next = part->mp_next;
-                        }
-                        if (verbosw) {
-                            report (NULL, parent->c_partno, parent->c_file,
-                                    "remove text/plain part %s",
-                                    old_part->mp_part->c_partno);
-                        }
-                        free_content (old_part->mp_part);
-                        free (old_part);
-                    } else {
-                        has_text_plain = 1;
+            if (! has_text_plain) {
+                /* Parent is a multipart/alternative.  Insert a new
+                   text/plain subpart. */
+                const int inserted =
+                    insert_new_text_plain_part (*ct, new_subpart_number,
+                                                parent);
+                if (inserted) {
+                    ++*message_mods;
+                    if (verbosw) {
+                        report (NULL, parent->c_partno, parent->c_file,
+                                "insert text/plain part");
                     }
-                    break;
+                } else {
+                    status = NOTOK;
                 }
-                prev = part;
+            }
+        } else if (parent  &&  parent->c_type == CT_MULTIPART  &&
+            parent->c_subtype == MULTI_RELATED) {
+            char *type_subtype =
+                concat ((*ct)->c_ctinfo.ci_type, "/",
+                        (*ct)->c_ctinfo.ci_subtype, NULL);
+            const char *parent_type =
+                get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
+            int new_subpart_number = 1;
+            int has_text_plain = 0;
+
+            /* Have to do string comparison on the subtype because we
+               don't enumerate all of them in c_subtype values.
+               parent_type will be NULL if the multipart/related part
+               doesn't have a type parameter.  The type parameter must
+               be specified according to RFC 2387 Sec. 3.1 but not all
+               messages comply. */
+            if (parent_type  &&  strcasecmp (type_subtype, parent_type) == 0) {
+                /* The type of this part matches the root type of the
+                   parent multipart/related.  Look to see if there's
+                   text/plain sibling. */
+                has_text_plain =
+                    find_textplain_sibling (parent, replacetextplain,
+                                            &new_subpart_number);
             }
 
+            free (type_subtype);
+
             if (! has_text_plain) {
                 /* Parent is a multipart/alternative.  Insert a new
                    text/plain subpart. */
-                struct part *new_part = mh_xmalloc (sizeof *new_part);
-
-                if ((new_part->mp_part = build_text_plain_part (*ct))) {
-                    char buffer[16];
-                    snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
-
-                    new_part->mp_next = mp->mp_parts;
-                    mp->mp_parts = new_part;
-                    new_part->mp_part->c_partno =
-                        concat (parent->c_partno ? parent->c_partno : "1", ".",
-                                buffer, NULL);
-
+                const int inserted =
+                    insert_new_text_plain_part (*ct, new_subpart_number,
+                                                parent);
+                if (inserted) {
                     ++*message_mods;
                     if (verbosw) {
                         report (NULL, parent->c_partno, parent->c_file,
                                 "insert text/plain part");
                     }
                 } else {
-                    free_content (new_part->mp_part);
-                    free (new_part);
                     status = NOTOK;
                 }
             }
@@ -971,6 +1141,71 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
 }
 
 
+/* See if there is a sibling text/plain. */
+static int
+find_textplain_sibling (CT parent, int replacetextplain,
+                        int *new_subpart_number) {
+    struct multipart *mp = (struct multipart *) parent->c_ctparams;
+    struct part *part, *prev;
+    int has_text_plain = 0;
+
+    for (prev = part = mp->mp_parts; part; part = part->mp_next) {
+        ++*new_subpart_number;
+        if (part->mp_part->c_type == CT_TEXT  &&
+            part->mp_part->c_subtype == TEXT_PLAIN) {
+            if (replacetextplain) {
+                struct part *old_part;
+                if (part == mp->mp_parts) {
+                    old_part = mp->mp_parts;
+                    mp->mp_parts = part->mp_next;
+                } else {
+                    old_part = prev->mp_next;
+                    prev->mp_next = part->mp_next;
+                }
+                if (verbosw) {
+                    report (NULL, parent->c_partno, parent->c_file,
+                            "remove text/plain part %s",
+                            old_part->mp_part->c_partno);
+                }
+                free_content (old_part->mp_part);
+                free (old_part);
+            } else {
+                has_text_plain = 1;
+            }
+            break;
+        }
+        prev = part;
+    }
+
+    return has_text_plain;
+}
+
+
+static int
+insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
+    struct multipart *mp = (struct multipart *) parent->c_ctparams;
+    struct part *new_part = mh_xmalloc (sizeof *new_part);
+
+    if ((new_part->mp_part = build_text_plain_part (ct))) {
+        char buffer[16];
+        snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
+
+        new_part->mp_next = mp->mp_parts;
+        mp->mp_parts = new_part;
+        new_part->mp_part->c_partno =
+            concat (parent->c_partno ? parent->c_partno : "1", ".",
+                    buffer, NULL);
+
+        return 1;
+    } else {
+        free_content (new_part->mp_part);
+        free (new_part);
+
+        return 0;
+    }
+}
+
+
 static CT
 build_text_plain_part (CT encoded_part) {
     CT tp_part = divide_part (encoded_part);
@@ -986,13 +1221,14 @@ build_text_plain_part (CT encoded_part) {
         if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
             advise (NULL, "unable to create temporary file in %s",
                     get_temp_dir());
-        }
-        tmp_plain_file = add (tempfile, NULL);
-        if (reformat_part (tp_part, tmp_plain_file,
-                           tp_part->c_ctinfo.ci_type,
-                           tp_part->c_ctinfo.ci_subtype,
-                           tp_part->c_type) == OK) {
-            return tp_part;
+        } else {
+            tmp_plain_file = add (tempfile, NULL);
+            if (reformat_part (tp_part, tmp_plain_file,
+                               tp_part->c_ctinfo.ci_type,
+                               tp_part->c_ctinfo.ci_subtype,
+                               tp_part->c_type) == OK) {
+                return tp_part;
+            }
         }
     }
 
@@ -1263,7 +1499,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
     }
 
     add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
-             "boundary", boundary, 0);
+              "boundary", boundary, 0);
 
     p = (struct part *) mh_xmalloc (sizeof *p);
     p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
@@ -1776,6 +2012,87 @@ convert_charsets (CT ct, char *dest_charset, int *message_mods) {
 }
 
 
+/*
+ * Fix various problems that aren't handled elsewhere.  These
+ * are fixed unconditionally:  there are no switches to disable
+ * them.  (Currently, "problems" is just one:  an extraneous
+ * semicolon at the end of a header parameter list.)
+ */
+static int
+fix_always (CT ct, int *message_mods) {
+    int status = OK;
+
+    switch (ct->c_type) {
+    case CT_MULTIPART: {
+        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct part *part;
+
+        for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
+            status = fix_always (part->mp_part, message_mods);
+        }
+        break;
+    }
+
+    case CT_MESSAGE:
+        if (ct->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+            status = fix_always (e->eb_content, message_mods);
+        }
+        break;
+
+    default: {
+        HF hf;
+
+        for (hf = ct->c_first_hf; hf; hf = hf->next) {
+            size_t len = strlen (hf->value);
+
+            if (strcasecmp (hf->name, TYPE_FIELD) != 0  &&
+                strcasecmp (hf->name, DISPO_FIELD) != 0) {
+                /* Only do this for Content-Type and
+                   Content-Disposition fields because those are the
+                   only headers that parse_mime() warns about. */
+                continue;
+            }
+
+            /* whitespace following a trailing ';' will be nuked as well */
+            if (hf->value[len - 1] == '\n') {
+                while (isspace((unsigned char)(hf->value[len - 2]))) {
+                    if (len-- == 0) { break; }
+                }
+            }
+
+            if (hf->value[len - 2] == ';') {
+                /* Remove trailing ';' from parameter value. */
+                hf->value[len - 2] = '\n';
+                hf->value[len - 1] = '\0';
+
+                /* Also, if Content-Type parameter, remove trailing ';'
+                   from ct->c_ctline.  This probably isn't necessary
+                   but can't hurt. */
+                if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
+                    size_t l = strlen(ct->c_ctline) - 1;
+                    while (isspace((unsigned char)(ct->c_ctline[l])) ||
+                           ct->c_ctline[l] == ';') {
+                        ct->c_ctline[l--] = '\0';
+                        if (l == 0) { break; }
+                    }
+                }
+
+                ++*message_mods;
+                if (verbosw) {
+                    report (NULL, ct->c_partno, ct->c_file,
+                            "remove trailing ; from %s parameter value",
+                            hf->name);
+                }
+            }
+        }
+    }}
+
+    return status;
+}
+
+
 static int
 write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
                int message_mods) {