X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/9b3c43815b3704fee25693fe117d88b0ca595ca5..ddf3a8574f657dcb8c53fc5908e7bebbde1994b5:/uip/mhfixmsg.c

diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c
index 969ad638..f410ecb4 100644
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -64,6 +64,8 @@
     X("nofixboundary", 0, NFIXBOUNDARYSW) \
     X("fixcte", 0, FIXCOMPOSITECTESW) \
     X("nofixcte", 0, NFIXCOMPOSITECTESW) \
+    X("checkbase64", 0, CHECKBASE64SW) \
+    X("nocheckbase64", 0, NCHECKBASE64SW) \
     X("fixtype mimetype", 0, FIXTYPESW) \
     X("file file", 0, FILESW) \
     X("outfile file", 0, OUTFILESW) \
@@ -105,6 +107,7 @@ typedef struct {
     /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
     int lf_line_endings;
     char *textcharset;
+    bool checkbase64;
 } fix_transformations;
 
 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
@@ -123,9 +126,13 @@ static int find_textplain_sibling (CT, int, int *);
 static int insert_new_text_plain_part (CT, int, CT);
 static CT build_text_plain_part (CT);
 static int insert_into_new_mp_alt (CT *, int *);
+static int insert_into_new_mp_mixed (CT *, const char *, int *);
 static CT divide_part (CT);
 static void copy_ctinfo (CI, CI);
 static int decode_part (CT);
+static size_t get_valid_base64 (CT, char **);
+static size_t find_invalid_base64_pos (const char *);
+static int check_base64_encoding (CT *);
 static int reformat_part (CT, char *, char *, char *, int);
 static CT build_multipart (CT, CT, int, int);
 static int boundary_in_content (FILE **, char *, const char *);
@@ -139,7 +146,7 @@ static void update_cte (CT);
 static int least_restrictive_encoding (CT) PURE;
 static int less_restrictive (int, int);
 static int convert_charsets (CT, char *, int *);
-static int fix_always (CT, int *);
+static int fix_always (CT *, const fix_transformations *, int *);
 static int decode_header_field_bodies (CT, int *);
 static int fix_filename_param (char *, char *, PM *, PM *);
 static int fix_filename_encoding (CT);
@@ -174,6 +181,7 @@ main (int argc, char **argv)
     fx.decodeheaderfieldbodies = NULL;
     fx.lf_line_endings = 0;
     fx.textcharset = NULL;
+    fx.checkbase64 = true;
 
     if (nmh_init(argv[0], true, false)) { return 1; }
 
@@ -254,6 +262,12 @@ main (int argc, char **argv)
             case NTEXTCHARSETSW:
                 fx.textcharset = 0;
                 continue;
+            case CHECKBASE64SW:
+                fx.checkbase64 = true;
+                continue;
+            case NCHECKBASE64SW:
+                fx.checkbase64 = false;
+                continue;
             case FIXBOUNDARYSW:
                 fx.fixboundary = 1;
                 continue;
@@ -599,7 +613,7 @@ mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
     } /* else *outfp was defined by caller */
 
     reverse_alternative_parts (*ctp);
-    status = fix_always (*ctp, &message_mods);
+    status = fix_always (ctp, fx, &message_mods);
     if (status == OK  &&  fx->fixboundary) {
         status = fix_boundary (ctp, &message_mods);
     }
@@ -1603,6 +1617,79 @@ insert_into_new_mp_alt (CT *ct, int *message_mods)
 }
 
 
+/*
+ * Slip new text/plain part into a new multipart/mixed.
+ */
+static int
+insert_into_new_mp_mixed (CT *ct, const char *content, int *message_mods)
+{
+    CT main_part = divide_part (*ct);
+    const char *reason = NULL;
+    const int encoding = content_encoding (main_part, &reason);
+    int status = OK;
+
+    if (set_ct_type(main_part, (*ct)->c_type, (*ct)->c_subtype,
+                    main_part->c_encoding) != OK) {
+        inform("failed to set Content-Type of main part");
+        return NOTOK;
+    }
+    if (set_ct_type(*ct, (*ct)->c_type, (*ct)->c_subtype, encoding) != OK) {
+        inform("failed to set Content-Type of new part");
+        return NOTOK;
+    }
+
+    if (main_part) {
+        /* Load remainder into the new part. */
+        CE cefile = &(*ct)->c_cefile;
+        CT mp_alt;
+
+        cefile->ce_file =
+            mh_xstrdup(m_mktemp2 (NULL, invo_name, NULL, &cefile->ce_fp));
+        if (cefile->ce_file == NULL) {
+            die("unable to create temporary file in %s", get_temp_dir());
+        }
+        cefile->ce_unlink = 1;
+        fprintf (cefile->ce_fp, "%s", content);
+
+        /* Put both parts into a new multipart. */
+        mp_alt = build_multipart (*ct, main_part, CT_MULTIPART, MULTI_MIXED);
+        if (mp_alt) {
+            struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
+
+            /* So fix_composite_cte doesn't try to overwrite the encoding.  If
+               the content needs to be decoded, c_encoding will be properly
+               set. */
+            mp_alt->c_encoding = encoding;
+
+            if (mp  &&  mp->mp_parts) {
+                mp->mp_parts->mp_part = main_part;
+                /* Make the new multipart/alternative the parent. */
+                *ct = mp_alt;
+
+                ++*message_mods;
+                if (verbosw) {
+                    report (NULL, (*ct)->c_partno, (*ct)->c_file,
+                            "insert text/plain part");
+                }
+            } else {
+                free_content (main_part);
+                free_content (mp_alt);
+                status = NOTOK;
+            }
+        } else {
+            inform("failed to build multipart/alternate");
+            status = NOTOK;
+        }
+    } else {
+        /* Should never happen. */
+        inform("failed to insert new text part into multipart/related");
+        status = NOTOK;
+    }
+
+    return status;
+}
+
+
 /*
  * Clone a MIME part.
  */
@@ -1689,6 +1776,134 @@ decode_part (CT ct)
 }
 
 
+/*
+ * If base64-encoded content has a text trailer, return the location, relative
+ * to c->c_begin, where the valid base64 ends.  And return the trailer in the
+ * addresses pointed to by remainderp.  The caller is responsible for
+ * deallocating that.  If no text trailer, return ct->c_end - ct->c_begin and
+ * leave remainderp unchanged.
+ */
+static size_t
+get_valid_base64 (CT ct, char **remainderp) {
+    const size_t len = ct->c_end - ct->c_begin;
+    char *buf, format[16];
+    size_t pos;
+    int fd;
+
+    if (! ct->c_fp  &&  ((ct->c_fp = fopen (ct->c_file, "r")) == NULL)) {
+        advise (ct->c_file, "unable to open for reading");
+        return NOTOK;
+    }
+    if ((fd = fileno (ct->c_fp)) == -1  ||
+        lseek (fd, ct->c_begin, SEEK_SET) == (off_t) -1) {
+        advise (ct->c_file, "unable to seek in");
+        return NOTOK;
+    }
+    buf = mh_xmalloc(len + 1);
+    snprintf(format, sizeof format, "%%%luc", (unsigned long) len);
+    if (fscanf(ct->c_fp, format, buf) == EOF) {
+        advise (ct->c_file, "unable to read");
+        return NOTOK;
+    }
+    buf[len] = '\0';
+
+    pos = find_invalid_base64_pos(buf);
+
+    if (ct->c_begin + pos < (size_t) ct->c_end) {
+        *remainderp = mh_xstrdup(&buf[pos]);
+    } else {
+        pos = ct->c_end - ct->c_begin;
+    }
+    free(buf);
+
+    return pos;
+}
+
+
+/*
+ * Find position in byte string of invalid base64 code.  Skip individual
+ * invalid characters because RFC 2045 Sec 6.8 says they should be ignored.
+ * The motivating use case is a text footer that was mistakenly applied to
+ * base64 content.  Therefore, if any of these is found, return the position
+ * of:
+ * 1. The byte (or end) after one or two consecutive pad ('=') bytes.
+ * 2. The first of a pair of invalid base64 bytes.
+ *
+ * If the base64 code is valid, return the position of the null terminator.
+ *
+ * encoded      - the base64-encoded string
+ */
+static size_t
+find_invalid_base64_pos (const char *encoded) {
+    const char *cp;
+    size_t pos;
+    bool found_pad = false;
+    unsigned int found_invalid = 0;
+
+    for (cp = encoded, pos = 0;
+         *cp && ! found_pad && found_invalid < 2;
+         ++cp, ++pos) {
+        if (isspace ((unsigned char) *cp) ||
+            isalnum ((unsigned char) *cp) ||
+            *cp == '+' || *cp == '/' || *cp == '=') {
+            /* Valid base64 byte. */
+            if (*cp == '=') {
+                /* "evidence that the end of the data has been reached"
+                   according to RFC 2045 */
+                found_pad = true;
+            }
+            /* Require consecutive invalid bytes.  Let decodeBase64() handle
+               individual ones. */
+            found_invalid = 0;
+        } else {
+            ++found_invalid;
+        }
+    }
+
+    if (found_pad  &&  *cp  &&  *cp == '=') {
+        /* Skip over last in pair of ==. */
+        ++cp, ++pos;
+    } else if (found_invalid == 2) {
+        /* If a pair of consecutive invalid bytes, back up to first one. */
+        --cp, --pos;
+        --cp, --pos;
+    }
+
+    /* Skip over any trailing whitespace. */
+    while (*cp  &&  isspace((unsigned char) *cp)) {
+        ++cp, ++pos;
+    }
+
+    return pos;
+}
+
+
+/*
+ * Check for valid base64 encoding, and "fix" if invalid.
+ */
+static int
+check_base64_encoding (CT *ctp)
+{
+    char *remainder = NULL;
+    int status = OK;
+
+    /* If there's a footer after base64 content, set c_end to before it, and
+       store the footer in remainder. */
+    (*ctp)->c_end = (*ctp)->c_begin + get_valid_base64(*ctp, &remainder);
+
+    if (remainder != NULL) {
+        /* Move ct to a subpart of a new multipart/related, and add the
+           remainder as a new text/plain subpart of it. */
+        int ignore_message_mods = 0;
+
+        status = insert_into_new_mp_mixed(ctp, remainder, &ignore_message_mods);
+        free(remainder);
+    }
+
+    return status;
+}
+
+
 /*
  * Reformat content as plain text.
  * Some of the arguments aren't really needed now, but maybe will
@@ -2581,37 +2796,37 @@ convert_charsets (CT ct, char *dest_charset, int *message_mods)
  *    headers, respectively.
  */
 static int
-fix_always (CT ct, int *message_mods)
+fix_always (CT *ctp, const fix_transformations *fx, int *message_mods)
 {
     int status = OK;
 
-    switch (ct->c_type) {
+    switch ((*ctp)->c_type) {
     case CT_MULTIPART: {
-        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct multipart *m = (struct multipart *) (*ctp)->c_ctparams;
         struct part *part;
 
         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
-            status = fix_always (part->mp_part, message_mods);
+            status = fix_always (&part->mp_part, fx, message_mods);
         }
         break;
     }
 
     case CT_MESSAGE:
-        if (ct->c_subtype == MESSAGE_EXTERNAL) {
-            struct exbody *e = (struct exbody *) ct->c_ctparams;
+        if ((*ctp)->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) (*ctp)->c_ctparams;
 
-            status = fix_always (e->eb_content, message_mods);
+            status = fix_always (&e->eb_content, fx, message_mods);
         }
         break;
 
     default: {
         HF hf;
 
-        if (ct->c_first_hf) {
-            fix_filename_encoding (ct);
+        if ((*ctp)->c_first_hf) {
+            fix_filename_encoding (*ctp);
         }
 
-        for (hf = ct->c_first_hf; hf; hf = hf->next) {
+        for (hf = (*ctp)->c_first_hf; hf; hf = hf->next) {
             size_t len = strlen (hf->value);
 
             if (strcasecmp (hf->name, TYPE_FIELD) != 0  &&
@@ -2635,25 +2850,29 @@ fix_always (CT ct, int *message_mods)
                 hf->value[len - 1] = '\0';
 
                 /* Also, if Content-Type parameter, remove trailing ';'
-                   from ct->c_ctline.  This probably isn't necessary
+                   from (*ctp)->c_ctline.  This probably isn't necessary
                    but can't hurt. */
-                if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
-                    size_t l = strlen(ct->c_ctline) - 1;
-                    while (isspace((unsigned char)(ct->c_ctline[l])) ||
-                           ct->c_ctline[l] == ';') {
-                        ct->c_ctline[l--] = '\0';
+                if (strcasecmp(hf->name, TYPE_FIELD) == 0 && (*ctp)->c_ctline) {
+                    size_t l = strlen((*ctp)->c_ctline) - 1;
+                    while (isspace((unsigned char)((*ctp)->c_ctline[l])) ||
+                           (*ctp)->c_ctline[l] == ';') {
+                        (*ctp)->c_ctline[l--] = '\0';
                         if (l == 0) { break; }
                     }
                 }
 
                 ++*message_mods;
                 if (verbosw) {
-                    report (NULL, ct->c_partno, ct->c_file,
+                    report (NULL, (*ctp)->c_partno, (*ctp)->c_file,
                             "remove trailing ; from %s parameter value",
                             hf->name);
                 }
             }
         }
+
+        if (fx->checkbase64  &&  (*ctp)->c_encoding == CE_BASE64) {
+            status = check_base64_encoding (ctp);
+        }
     }}
 
     return status;