X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/61e09274bb3338efbd9c50f797751853c99dba3b..9151385b9fed8c75663fb9299075cee080219990:/uip/mhparse.c

diff --git a/uip/mhparse.c b/uip/mhparse.c
index c09cb1ac..87714fdb 100644
--- a/uip/mhparse.c
+++ b/uip/mhparse.c
@@ -36,6 +36,16 @@ int checksw = 0;	/* check Content-MD5 field */
  */
 char *tmp;
 
+/*
+ * These are for mhfixmsg to:
+ * 1) Instruct parser not to detect invalid Content-Transfer-Encoding
+ *    in a multipart.
+ * 2) Suppress the warning about bogus multipart content, and report it.
+ */
+int skip_mp_cte_check;
+int suppress_bogus_mp_content_warning;
+int bogus_mp_content;
+
 /*
  * Structures for TEXT messages
  */
@@ -88,7 +98,6 @@ int type_ok (CT, int);
 void content_error (char *, CT, char *, ...);
 
 /* mhfree.c */
-void free_content (CT);
 void free_encoding (CT, int);
 
 /*
@@ -118,6 +127,7 @@ static int openFTP (CT, char **);
 static int InitMail (CT);
 static int openMail (CT, char **);
 static int readDigest (CT, char *);
+static int get_leftover_mp_content (CT, int);
 
 struct str2init str2cts[] = {
     { "application", CT_APPLICATION, InitApplication },
@@ -1111,8 +1121,8 @@ InitMultiPart (CT ct)
      * The encoding for multipart messages must be either
      * 7bit, 8bit, or binary (per RFC2045).
      */
-    if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
-	&& ct->c_encoding != CE_BINARY) {
+    if (! skip_mp_cte_check  &&  ct->c_encoding != CE_7BIT  &&
+        ct->c_encoding != CE_8BIT  &&  ct->c_encoding != CE_BINARY) {
 	/* Copy the Content-Transfer-Encoding header field body so we can
 	   remove any trailing whitespace and leading blanks from it. */
 	char *cte = add (ct->c_celine ? ct->c_celine : "(null)", NULL);
@@ -1237,7 +1247,11 @@ end_part:
 	}
     }
 
-    advise (NULL, "bogus multipart content in message %s", ct->c_file);
+    if (! suppress_bogus_mp_content_warning) {
+        advise (NULL, "bogus multipart content in message %s", ct->c_file);
+    }
+    bogus_mp_content = 1;
+
     if (!inout && part) {
 	p = part->mp_part;
 	p->c_end = ct->c_end;
@@ -1289,6 +1303,9 @@ last_part:
 	}
     }
 
+    get_leftover_mp_content (ct, 1);
+    get_leftover_mp_content (ct, 0);
+
     fclose (ct->c_fp);
     ct->c_fp = NULL;
     return OK;
@@ -2881,3 +2898,238 @@ invalid_digest:
 
     return OK;
 }
+
+
+/* Multipart parts might have content before the first subpart and/or
+   after the last subpart that hasn't been stored anywhere else, so do
+   that. */
+int
+get_leftover_mp_content (CT ct, int before /* or after */) {
+    struct multipart *m = (struct multipart *) ct->c_ctparams;
+    char *boundary;
+    int found_boundary = 0;
+    char buffer[BUFSIZ];
+    int max = BUFSIZ;
+    int read = 0;
+    char *content = NULL;
+
+    if (! m) return NOTOK;
+
+    if (before) {
+        if (! m->mp_parts  ||  ! m->mp_parts->mp_part) return NOTOK;
+
+        /* Isolate the beginning of this part to the beginning of the
+           first subpart and save any content between them. */
+        fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
+        max = m->mp_parts->mp_part->c_begin - ct->c_begin;
+        boundary = concat ("--", m->mp_start, NULL);
+    } else {
+        struct part *last_subpart = NULL;
+        struct part *subpart;
+
+        /* Go to the last subpart to get its end position. */
+        for (subpart = m->mp_parts; subpart; subpart = subpart->mp_next) {
+            last_subpart = subpart;
+        }
+
+        if (last_subpart == NULL) return NOTOK;
+
+        /* Isolate the end of the last subpart to the end of this part
+           and save any content between them. */
+        fseeko (ct->c_fp, last_subpart->mp_part->c_end, SEEK_SET);
+        max = ct->c_end - last_subpart->mp_part->c_end;
+        boundary = concat ("--", m->mp_stop, NULL);
+    }
+
+    /* Back up by 1 to pick up the newline. */
+    while (fgets (buffer, sizeof(buffer) - 1, ct->c_fp)) {
+        read += strlen (buffer);
+        /* Don't look beyond beginning of first subpart (before) or
+           next part (after). */
+        if (read > max) buffer[read-max] = '\0';
+
+        if (before) {
+            if (! strcmp (buffer, boundary)) {
+                found_boundary = 1;
+            }
+        } else {
+            if (! found_boundary  &&  ! strcmp (buffer, boundary)) {
+                found_boundary = 1;
+                continue;
+            }
+        }
+
+        if ((before && ! found_boundary)  ||  (! before && found_boundary)) {
+            if (content) {
+                char *old_content = content;
+                content = concat (content, buffer, NULL);
+                free (old_content);
+            } else {
+                content = before
+                    ?  concat ("\n", buffer, NULL)
+                    :  concat (buffer, NULL);
+            }
+        }
+
+        if (before) {
+            if (found_boundary  ||  read > max) break;
+        } else {
+            if (read > max) break;
+        }
+    }
+
+    /* Skip the newline if that's all there is. */
+    if (content) {
+        char *cp;
+
+        /* Remove trailing newline, except at EOF. */
+        if ((before || ! feof (ct->c_fp)) &&
+            (cp = content + strlen (content)) > content  &&
+            *--cp == '\n') {
+            *cp = '\0';
+        }
+
+        if (strlen (content) > 1) {
+            if (before) {
+                m->mp_content_before = content;
+            } else {
+                m->mp_content_after = content;
+            }
+        } else {
+            free (content);
+        }
+    }
+
+    free (boundary);
+
+    return OK;
+}
+
+
+char *
+ct_type_str (int type) {
+    switch (type) {
+    case CT_APPLICATION:
+        return "application";
+    case CT_AUDIO:
+        return "audio";
+    case CT_IMAGE:
+        return "image";
+    case CT_MESSAGE:
+        return "message";
+    case CT_MULTIPART:
+        return "multipart";
+    case CT_TEXT:
+        return "text";
+    case CT_VIDEO:
+        return "video";
+    case CT_EXTENSION:
+        return "extension";
+    default:
+        return "unknown_type";
+    }
+}
+
+
+char *
+ct_subtype_str (int type, int subtype) {
+    switch (type) {
+    case CT_APPLICATION:
+        switch (subtype) {
+        case APPLICATION_OCTETS:
+            return "octets";
+        case APPLICATION_POSTSCRIPT:
+            return "postscript";
+        default:
+            return "unknown_app_subtype";
+        }
+    case CT_MESSAGE:
+        switch (subtype) {
+        case MESSAGE_RFC822:
+            return "rfc822";
+        case MESSAGE_PARTIAL:
+            return "partial";
+        case MESSAGE_EXTERNAL:
+            return "external";
+        default:
+            return "unknown_msg_subtype";
+        }
+    case CT_MULTIPART:
+        switch (subtype) {
+        case MULTI_MIXED:
+            return "mixed";
+        case MULTI_ALTERNATE:
+            return "alternative";
+        case MULTI_DIGEST:
+            return "digest";
+        case MULTI_PARALLEL:
+            return "parallel";
+        default:
+            return "unknown_multipart_subtype";
+        }
+    case CT_TEXT:
+        switch (subtype) {
+        case TEXT_PLAIN:
+            return "plain";
+        case TEXT_RICHTEXT:
+            return "richtext";
+        case TEXT_ENRICHED:
+            return "enriched";
+        default:
+            return "unknown_text_subtype";
+        }
+    default:
+        return "unknown_type";
+    }
+}
+
+
+/* Find the content type and InitFunc for the CT. */
+const struct str2init *
+get_ct_init (int type) {
+    const struct str2init *sp;
+
+    for (sp = str2cts; sp->si_key; ++sp) {
+        if (type == sp->si_val) {
+            return sp;
+        }
+    }
+
+    return NULL;
+}
+
+const char *
+ce_str (int encoding) {
+    switch (encoding) {
+    case CE_BASE64:
+        return "base64";
+    case CE_QUOTED:
+        return "quoted";
+    case CE_8BIT:
+        return "8bit";
+    case CE_7BIT:
+        return "7bit";
+    case CE_BINARY:
+        return "binary";
+    case CE_EXTENSION:
+        return "extension";
+    case CE_EXTERNAL:
+        return "external";
+    default:
+        return "unknown";
+    }
+}
+
+/* Find the content type and InitFunc for the content encoding method. */
+const struct str2init *
+get_ce_method (const char *method) {
+    struct str2init *sp;
+
+    for (sp = str2ces; sp->si_key; ++sp) {
+        if (! strcasecmp (method, sp->si_key)) {
+            return sp;
+        }
+    }
+
+    return NULL;
+}