X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/8a14191c0a0ad15bb8f35b49748c75c1e0a8c2f4..9ffabd06f0dc8e7478b7484fceee4906990d44e2:/uip/mhparse.c?ds=inline

diff --git a/uip/mhparse.c b/uip/mhparse.c
index 2be97603..ca01545f 100644
--- a/uip/mhparse.c
+++ b/uip/mhparse.c
@@ -1,6 +1,4 @@
-
-/*
- * mhparse.c -- routines to parse the contents of MIME messages
+/* mhparse.c -- routines to parse the contents of MIME messages
  *
  * This code is Copyright (c) 2002, by the authors of nmh.  See the
  * COPYRIGHT file in the root directory of the nmh distribution for
@@ -15,6 +13,9 @@
 #include <h/mime.h>
 #include <h/mhparse.h>
 #include <h/utils.h>
+#include <h/mhcachesbr.h>
+#include "../sbr/m_mktemp.h"
+#include "mhfree.h"
 #ifdef HAVE_ICONV
 # include <iconv.h>
 #endif /* HAVE_ICONV */
@@ -22,10 +23,6 @@
 
 extern int debugsw;
 
-/* cache policies */
-extern int rcachesw;	/* mhcachesbr.c */
-extern int wcachesw;	/* mhcachesbr.c */
-
 int checksw = 0;	/* check Content-MD5 field */
 
 /*
@@ -57,8 +54,8 @@ int npreferred;
  */
 struct k2v SubText[] = {
     { "plain",    TEXT_PLAIN },
-    { "richtext", TEXT_RICHTEXT },  /* defined in RFC-1341    */
-    { "enriched", TEXT_ENRICHED },  /* defined in RFC-1896    */
+    { "richtext", TEXT_RICHTEXT },  /* defined in RFC 1341    */
+    { "enriched", TEXT_ENRICHED },  /* defined in RFC 1896    */
     { NULL,       TEXT_UNKNOWN }    /* this one must be last! */
 };
 
@@ -109,17 +106,11 @@ static struct k2v EncodingType[] = {
 };
 
 
-/* mhcachesbr.c */
-int find_cache (CT, int, int *, char *, char *, int);
-
 /* mhmisc.c */
 int part_ok (CT);
 int type_ok (CT, int);
 void content_error (char *, CT, char *, ...);
 
-/* mhfree.c */
-void free_encoding (CT, int);
-
 /*
  * static prototypes
  */
@@ -182,7 +173,7 @@ struct str2init str2ces[] = {
 /*
  * NOTE WELL: si_key MUST NOT have value of NOTOK
  *
- * si_key is 1 if access method is anonymous.
+ * si_val is 1 if access method is anonymous.
  */
 struct str2init str2methods[] = {
     { "afs",         1,	InitFile },
@@ -245,8 +236,8 @@ parse_mime (char *file)
 	    return NULL;
 	}
 	fseek (fp, 0L, SEEK_SET);
-    } else if (lstat (file, &statbuf) == NOTOK) {
-	advise (file, "unable to lstat");
+    } else if (stat (file, &statbuf) == NOTOK) {
+	advise (file, "unable to stat");
 	return NULL;
     } else if (S_ISDIR(statbuf.st_mode)) {
 	/* Don't try to parse a directory. */
@@ -300,11 +291,11 @@ static CT
 get_content (FILE *in, char *file, int toplevel)
 {
     int compnum, state;
-    char buf[BUFSIZ], name[NAMESZ];
+    char buf[NMH_BUFSIZ], name[NAMESZ];
     char *np, *vp;
     CT ct;
     HF hp;
-    m_getfld_state_t gstate = 0;
+    m_getfld_state_t gstate;
 
     /* allocate the content structure */
     NEW0(ct);
@@ -316,10 +307,11 @@ get_content (FILE *in, char *file, int toplevel)
      * Parse the header fields for this
      * content into a linked list.
      */
-    m_getfld_track_filepos (&gstate, in);
+    gstate = m_getfld_state_init(in);
+    m_getfld_track_filepos2(&gstate);
     for (compnum = 1;;) {
 	int bufsz = sizeof buf;
-	switch (state = m_getfld (&gstate, name, buf, &bufsz, in)) {
+	switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
 	case FLD:
 	case FLDPLUS:
 	    compnum++;
@@ -331,7 +323,7 @@ get_content (FILE *in, char *file, int toplevel)
 	    /* if necessary, get rest of field */
 	    while (state == FLDPLUS) {
 		bufsz = sizeof buf;
-		state = m_getfld (&gstate, name, buf, &bufsz, in);
+		state = m_getfld2(&gstate, name, buf, &bufsz);
 		vp = add (buf, vp);	/* add to previous value */
 	    }
 
@@ -343,12 +335,25 @@ get_content (FILE *in, char *file, int toplevel)
 	    continue;
 
 	case BODY:
+            /* There are two cases.  The unusual one is when there is no
+             * blank line between the headers and the body.  This is
+             * indicated by the name of the header starting with `:'.
+             *
+             * For both cases, normal first, `1' is the desired c_begin
+             * file position for the start of the body, and `2' is the
+             * file position when buf is returned.
+             *
+             *     f o o :   b a r \n \n b o d y \n    bufsz = 6
+             *                          1          2   move -5
+             *     f o o :   b a r \n b o d y \n       bufsz = 4
+             *                       1       2         move -4
+             *
+             * For the normal case, bufsz includes the
+             * header-terminating `\n', even though it is not in buf,
+             * but bufsz isn't affected when it's missing in the unusual
+             * case. */
 	    if (name[0] == ':') {
-		/* Special case:  no blank line between header and body.  The
-		   file position indicator is on the newline at the end of the
-		   line, but it needs to be one prior to the beginning of the
-		   line.  So subtract the length of the line, bufsz, plus 1. */
-		ct->c_begin = ftell (in) - (bufsz + 1);
+		ct->c_begin = ftell(in) - bufsz;
 	    } else {
 		ct->c_begin = ftell (in) - (bufsz - 1);
 	    }
@@ -413,7 +418,7 @@ get_content (FILE *in, char *file, int toplevel)
 	    ucmp = !strcasecmp (cp, VRSN_VALUE);
 	    *dp = c;
 	    if (!ucmp) {
-		admonish (NULL, "message %s has unknown value for %s: field (%s)",
+		inform("message %s has unknown value for %s: field (%s), continuing...",
 		ct->c_file, VRSN_FIELD, cp);
 	    }
 	    if (!ct->c_vrsn) {
@@ -661,9 +666,11 @@ get_ctinfo (char *cp, CT ct, int magic)
 
     for (dp = cp; istoken (*dp); dp++)
 	continue;
-    c = *dp, *dp = '\0';
+    c = *dp;
+    *dp = '\0';
     ci->ci_type = mh_xstrdup(cp);	/* store content type */
-    *dp = c, cp = dp;
+    *dp = c;
+    cp = dp;
 
     if (!*ci->ci_type) {
 	inform("invalid %s: field in message %s (empty type)",
@@ -695,9 +702,11 @@ get_ctinfo (char *cp, CT ct, int magic)
 
     for (dp = cp; istoken (*dp); dp++)
 	continue;
-    c = *dp, *dp = '\0';
+    c = *dp;
+    *dp = '\0';
     ci->ci_subtype = mh_xstrdup(cp);	/* store the content subtype */
-    *dp = c, cp = dp;
+    *dp = c;
+    cp = dp;
 
     if (!*ci->ci_subtype) {
 	inform("invalid %s: field in message %s (empty subtype for \"%s\")",
@@ -856,8 +865,8 @@ magic_skip:
 	    }
         }
 	else
-	    inform("extraneous information in message %s's %s: field\n%*s(%s)",
-                ct->c_file, TYPE_FIELD, strlen(invo_name) + 2, "", cp);
+            inform("extraneous information in message %s's %s: field\n"
+                "    (%s)", ct->c_file, TYPE_FIELD, cp);
     }
 
     return OK;
@@ -908,9 +917,11 @@ get_dispo (char *cp, CT ct, int buildflag)
 
     for (dp = cp; istoken (*dp); dp++)
 	continue;
-    c = *dp, *dp = '\0';
+    c = *dp;
+    *dp = '\0';
     ct->c_dispo_type = mh_xstrdup(cp);	/* store disposition type */
-    *dp = c, cp = dp;
+    *dp = c;
+    cp = dp;
 
     if (*cp == '(' && get_comment (ct->c_file, DISPO_FIELD, &cp, NULL) == NOTOK)
 	return NOTOK;
@@ -923,8 +934,8 @@ get_dispo (char *cp, CT ct, int buildflag)
 	    return NOTOK;
 	}
     } else if (*cp) {
-	inform("extraneous information in message %s's %s: field\n%*s(%s)",
-            ct->c_file, DISPO_FIELD, strlen(invo_name) + 2, "", cp);
+        inform("extraneous information in message %s's %s: field\n    (%s)",
+            ct->c_file, DISPO_FIELD, cp);
     }
 
     if (buildflag)
@@ -1091,7 +1102,7 @@ InitMultiPart (CT ct)
 
     /*
      * The encoding for multipart messages must be either
-     * 7bit, 8bit, or binary (per RFC2045).
+     * 7bit, 8bit, or binary (per RFC 2045).
      */
     if (! skip_mp_cte_check  &&  ct->c_encoding != CE_7BIT  &&
         ct->c_encoding != CE_8BIT  &&  ct->c_encoding != CE_BINARY) {
@@ -1103,13 +1114,12 @@ InitMultiPart (CT ct)
 	while (bp >= cte && isspace ((unsigned char) *bp)) *bp-- = '\0';
 	for (bp = cte; *bp && isblank ((unsigned char) *bp); ++bp) continue;
 
-	admonish (NULL,
-		  "\"%s/%s\" type in message %s must be encoded in\n"
-		  "7bit, 8bit, or binary, per RFC 2045 (6.4).  "
-                  "mhfixmsg -fixcte can fix it, or\n"
-                  "manually edit the file and change the \"%s\"\n"
-		  "Content-Transfer-Encoding to one of those.  For now",
-		  ci->ci_type, ci->ci_subtype, ct->c_file, bp);
+	inform("\"%s/%s\" type in message %s must be encoded in\n"
+	    "7bit, 8bit, or binary, per RFC 2045 (6.4).  "
+	    "mhfixmsg -fixcte can fix it, or\n"
+	    "manually edit the file and change the \"%s\"\n"
+	    "Content-Transfer-Encoding to one of those.  For now, continuing...",
+	    ci->ci_type, ci->ci_subtype, ct->c_file, bp);
 	free (cte);
 
 	return NOTOK;
@@ -1289,7 +1299,7 @@ last_part:
  * ease of choosing/displaying it later on.  from a mail message on
  * nmh-workers, from kenh:
  *  "Stock" MH 6.8.5 did not have a reverse_parts() function, but I
- *  see code in mhn that did the same thing...  Acccording to the RCS
+ *  see code in mhn that did the same thing...  According to the RCS
  *  logs, that code was around from the initial checkin of mhn.c by
  *  John Romine in 1992, which is as far back as we have."
  */
@@ -1398,9 +1408,9 @@ InitMessage (CT ct)
     CI ci = &ct->c_ctinfo;
 
     if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
-	admonish (NULL,
-		  "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit",
-		  ci->ci_type, ci->ci_subtype, ct->c_file);
+	inform("\"%s/%s\" type in message %s should be encoded in "
+	    "7bit or 8bit, continuing...", ci->ci_type, ci->ci_subtype,
+	    ct->c_file);
 	return NOTOK;
     }
 
@@ -1831,7 +1841,10 @@ openBase64 (CT ct, char **file)
     *cp = '\0';
 
     if (decodeBase64 (buffer, &decoded, &decoded_len, ct->c_type == CT_TEXT,
-                      ct->c_digested ? digest : NULL) == OK) {
+                      ct->c_digested ? digest : NULL) != OK)
+        goto clean_up;
+
+    {
         size_t i;
         unsigned char *decoded_p = decoded;
         for (i = 0; i < decoded_len; ++i) {
@@ -1845,7 +1858,7 @@ openBase64 (CT ct, char **file)
 
         if (ct->c_digested) {
             if (memcmp(digest, ct->c_digest,
-                       sizeof(digest) / sizeof(digest[0]))) {
+                       sizeof digest)) {
                 content_error (NULL, ct,
                                "content integrity suspect (digest mismatch) -- continuing");
             } else {
@@ -1854,8 +1867,6 @@ openBase64 (CT ct, char **file)
                 }
             }
         }
-    } else {
-        goto clean_up;
     }
 
     fseek (ct->c_fp, 0L, SEEK_SET);
@@ -2011,7 +2022,8 @@ openQuoted (CT ct, char **file)
 	for (ep = (cp = bufp) + cc - 1; cp <= ep; ep--)
 	    if (!isspace ((unsigned char) *ep))
 		break;
-	*++ep = '\n', ep++;
+        *++ep = '\n';
+        ep++;
 
 	for (; cp < ep; cp++) {
 	    if (quoted > 0) {
@@ -2057,11 +2069,10 @@ openQuoted (CT ct, char **file)
 		     * sequence; let's decode it (above). */
 		    quoted = 1;
 		    continue;
-		} else {
-		    /* One or both of the next 2 is out of range, making this
-		     * an invalid escape sequence; just show the raw bytes
-		     * (below). */
 		}
+                /* One or both of the next 2 is out of range, making this
+                 * an invalid escape sequence; just show the raw bytes
+                 * (below). */
 	    }
 
 	    /* Just show the raw byte. */
@@ -2097,12 +2108,11 @@ openQuoted (CT ct, char **file)
 
 	MD5Final (digest, &mdContext);
 	if (memcmp((char *) digest, (char *) ct->c_digest,
-		   sizeof(digest) / sizeof(digest[0])))
+		   sizeof digest))
 	    content_error (NULL, ct,
 			   "content integrity suspect (digest mismatch) -- continuing");
-	else
-	    if (debugsw)
-		fprintf (stderr, "content integrity confirmed\n");
+	else if (debugsw)
+            fprintf (stderr, "content integrity confirmed\n");
     }
 
     fseek (ce->ce_fp, 0L, SEEK_SET);
@@ -2330,7 +2340,7 @@ openExternal (CT ct, CT cb, CE ce, char **file, int *fd)
         admonish (cachefile, "unable to fopen for reading");
     }
 
-    *fd = fileno (ce->ce_fp);
+    *fd = ce->ce_fp ? fileno (ce->ce_fp) : -1;
     return OK;
 
 ready_already:
@@ -2406,12 +2416,10 @@ openFile (CT ct, char **file)
 	    if (ferror (gp)) {
 		admonish (ce->ce_file, "error reading");
 		(void) m_unlink (cachefile);
-	    }
-	    else
-		if (ferror (fp)) {
-		    admonish (cachefile, "error writing");
-		    (void) m_unlink (cachefile);
-		}
+	    } else if (ferror (fp)) {
+                admonish (cachefile, "error writing");
+                (void) m_unlink (cachefile);
+            }
 	    fclose (fp);
 	}
 	umask (mask);
@@ -2617,12 +2625,10 @@ openFTP (CT ct, char **file)
 		if (ferror (gp)) {
 		    admonish (ce->ce_file, "error reading");
 		    (void) m_unlink (cachefile);
-		}
-		else
-		    if (ferror (fp)) {
-			admonish (cachefile, "error writing");
-			(void) m_unlink (cachefile);
-		    }
+		} else if (ferror (fp)) {
+                    admonish (cachefile, "error writing");
+                    (void) m_unlink (cachefile);
+                }
 		fclose (fp);
 	    }
 	    umask (mask);
@@ -2887,7 +2893,7 @@ openURL (CT ct, char **file)
 
     fseeko(ce->ce_fp, 0, SEEK_SET);
     *file = ce->ce_file;
-    return fd;
+    return fileno(ce->ce_fp);
 }
 
 
@@ -2902,7 +2908,7 @@ readDigest (CT ct, char *cp)
 
     size_t len;
     if (decodeBase64 (cp, &digest, &len, 0, NULL) == OK) {
-        const size_t maxlen = sizeof ct->c_digest / sizeof ct->c_digest[0];
+        const size_t maxlen = sizeof ct->c_digest;
 
         if (strlen ((char *) digest) <= maxlen) {
             memcpy (ct->c_digest, digest, maxlen);
@@ -3295,9 +3301,9 @@ parse_header_attrs (const char *filename, const char *fieldname,
 	for (up = dp; isspace ((unsigned char) *dp);)
 	    dp++;
 	if (dp == cp || *dp != '=') {
-	    inform("invalid parameter in message %s's %s: "
-                "field\n%*sparameter %s (error detected at offset %d)",
-                filename, fieldname, strlen(invo_name) + 2, "",cp, dp - cp);
+            inform("invalid parameter in message %s's %s: field\n"
+                "    parameter %s (error detected at offset %ld)",
+                filename, fieldname, cp, (long)(dp - cp));
 	    return NOTOK;
 	}
 
@@ -3321,15 +3327,15 @@ parse_header_attrs (const char *filename, const char *fieldname,
 	    if (*vp == '*' && vp < up - 1) {
 		partial = 1;
 		continue;
-	    } else if (*vp == '*' && vp == up - 1) {
+	    }
+            if (*vp == '*' && vp == up - 1) {
 	    	encoded = 1;
 	    } else if (partial) {
 		if (isdigit((unsigned char) *vp))
 		    index = *vp - '0' + index * 10;
 		else {
-		    inform("invalid parameter index in message %s's "
-			    "%s: field\n%*s(parameter %s)", filename,
-			    fieldname, strlen(invo_name) + 2, "", cp);
+                    inform("invalid parameter index in message %s's %s: field"
+                        "\n    (parameter %s)", filename, fieldname, cp);
 		    return NOTOK;
 		}
 	    } else {
@@ -3371,9 +3377,8 @@ parse_header_attrs (const char *filename, const char *fieldname,
 		    }
 		    vp++;
 		} else {
-		    inform("missing charset in message %s's %s: "
-			   "field\n%*s(parameter %s)", filename, fieldname,
-			   strlen(invo_name) + 2, "", nameptr);
+                    inform("missing charset in message %s's %s: field\n"
+                        "    (parameter %s)", filename, fieldname, nameptr);
 		    free(nameptr);
 		    return NOTOK;
 		}
@@ -3394,9 +3399,8 @@ parse_header_attrs (const char *filename, const char *fieldname,
 		    }
 		    vp++;
 		} else {
-		    inform("missing language tag in message %s's %s: "
-			   "field\n%*s(parameter %s)", filename, fieldname,
-			   strlen(invo_name) + 2, "", nameptr);
+                    inform("missing language tag in message %s's %s: field\n"
+                        "    (parameter %s)", filename, fieldname, nameptr);
 		    free(nameptr);
                     mh_xfree(charset);
 		    return NOTOK;
@@ -3418,10 +3422,8 @@ parse_header_attrs (const char *filename, const char *fieldname,
 				!isxdigit((unsigned char) *(vp + 1)) ||
 				*(vp + 2) == '\0' ||
 				!isxdigit((unsigned char) *(vp + 2))) {
-			inform("invalid encoded sequence in message "
-			       "%s's %s: field\n%*s(parameter %s)",
-			       filename, fieldname, strlen(invo_name) + 2,
-			       "", nameptr);
+                        inform("invalid encoded sequence in message %s's %s: field\n"
+                            "    (parameter %s)", filename, fieldname, nameptr);
 			free(nameptr);
                         mh_xfree(charset);
                         mh_xfree(lang);
@@ -3461,9 +3463,8 @@ parse_header_attrs (const char *filename, const char *fieldname,
 		    switch (*cp++) {
 		    case '\0':
 bad_quote:
-		        inform("invalid quoted-string in message %s's %s: "
-                            "field\n%*s(parameter %s)", filename,
-                            fieldname, strlen(invo_name) + 2, "", nameptr);
+                        inform("invalid quoted-string in message %s's %s: field\n"
+                            "    (parameter %s)", filename, fieldname, nameptr);
 			free(nameptr);
                         mh_xfree(charset);
                         mh_xfree(lang);
@@ -3547,10 +3548,9 @@ bad_quote:
 	    } else {
 		for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) {
 		    if (sp2->index == sp->index) {
-			inform("duplicate index (%d) in message "
-				"%s's %s: field\n%*s(parameter %s)", sp->index,
-				filename, fieldname, strlen(invo_name) + 2, "",
-				nameptr);
+                        inform("duplicate index (%d) in message %s's %s: field"
+                            "\n    (parameter %s)", sp->index, filename,
+                            fieldname, nameptr);
 			return NOTOK;
 		    }
 		    if (sp2->index < sp->index &&
@@ -3562,10 +3562,9 @@ bad_quote:
 		}
 
 		if (sp2 == NULL) {
-		    inform("Internal error: cannot insert partial "
-		    	   "param in message %s's %s: field\n%*s(parameter %s)",
-			   filename, fieldname, strlen(invo_name) + 2, "",
-			   nameptr);
+                    inform("Internal error: cannot insert partial param "
+                        "in message %s's %s: field\n    (parameter %s)",
+                        filename, fieldname, nameptr);
 		    return NOTOK;
 		}
 	    }
@@ -3605,10 +3604,9 @@ bad_quote:
 	int pindex = 0;
 	for (sp = pp->sechead; sp != NULL; sp = sp->next) {
 	    if (sp->index != pindex++) {
-		inform("missing section %d for parameter in "
-		       "message %s's %s: field\n%*s(parameter %s)", pindex - 1,
-		       filename, fieldname, strlen(invo_name) + 2, "",
-		       pp->name);
+                inform("missing section %d for parameter in message "
+                    "%s's %s: field\n    (parameter %s)", pindex - 1,
+                    filename, fieldname, pp->name);
 		return NOTOK;
 	    }
 	    tlen += sp->len;
@@ -3757,7 +3755,7 @@ output_params(size_t initialwidth, PM params, int *offsetout, int external)
 	}
 
 	/*
-	 * At this point, we're either finishing a contined parameter, or
+	 * At this point, we're either finishing a continued parameter, or
 	 * we're working on a new one.
 	 */