Document argsplit changes in mh-profile man page.

[nmh] / uip / mhparse.c
diff --git a/uip/mhparse.c b/uip/mhparse.c

index ea5400e07ed2194fb50e84b37075e7ed9011de48..a80cc7460ca59a0401efd46f0c897dbcafe7e2db 100644 (file)
--- a/uip/mhparse.c
+++ b/uip/mhparse.c
@@ -36,6 +36,12 @@ int checksw = 0;     /* check Content-MD5 field */
   */
  char *tmp;
  
+/*
+ * Instruct parser not to detect invalid Content-Transfer-Encoding
+ * in a multipart.
+ */
+int skip_mp_cte_check;
+
  /*
   * Structures for TEXT messages
   */
@@ -88,14 +94,13 @@ int type_ok (CT, int);
  void content_error (char *, CT, char *, ...);
  
  /* mhfree.c */
-void free_content (CT);
  void free_encoding (CT, int);
  
  /*
   * static prototypes
   */
  static CT get_content (FILE *, char *, int);
-static int get_comment (CT, unsigned char **, int);
+static int get_comment (CT, char **, int);
  
  static int InitGeneric (CT);
  static int InitText (CT);
@@ -118,6 +123,7 @@ static int openFTP (CT, char **);
  static int InitMail (CT);
  static int openMail (CT, char **);
  static int readDigest (CT, char *);
+static int get_leftover_mp_content (CT, int);
  
  struct str2init str2cts[] = {
      { "application", CT_APPLICATION, InitApplication },
@@ -262,6 +268,7 @@ get_content (FILE *in, char *file, int toplevel)
      char *np, *vp;
      CT ct;
      HF hp;
+    m_getfld_state_t gstate = 0;
  
      /* allocate the content structure */
      if (!(ct = (CT) calloc (1, sizeof(*ct))))
@@ -275,12 +282,12 @@ get_content (FILE *in, char *file, int toplevel)
       * Parse the header fields for this
       * content into a linked list.
       */
-    for (compnum = 1, state = FLD;;) {
+    m_getfld_track_filepos (&gstate, in);
+    for (compnum = 1;;) {
         int bufsz = sizeof buf;
-       switch (state = m_getfld (state, name, buf, &bufsz, in)) {
+       switch (state = m_getfld (&gstate, name, buf, &bufsz, in)) {
         case FLD:
         case FLDPLUS:
-       case FLDEOF:
             compnum++;
  
             /* get copies of the buffers */
@@ -290,22 +297,18 @@ get_content (FILE *in, char *file, int toplevel)
             /* if necessary, get rest of field */
             while (state == FLDPLUS) {
                 bufsz = sizeof buf;
-               state = m_getfld (state, name, buf, &bufsz, in);
+               state = m_getfld (&gstate, name, buf, &bufsz, in);
                 vp = add (buf, vp);     /* add to previous value */
             }
  
             /* Now add the header data to the list */
             add_header (ct, np, vp);
  
-           /* continue, if this isn't the last header field */
-           if (state != FLDEOF) {
-               ct->c_begin = ftell (in) + 1;
-               continue;
-           }
-           /* else fall... */
+           /* continue, to see if this isn't the last header field */
+           ct->c_begin = ftell (in) + 1;
+           continue;
  
         case BODY:
-       case BODYEOF:
             ct->c_begin = ftell (in) - strlen (buf);
             break;
  
@@ -324,6 +327,7 @@ get_content (FILE *in, char *file, int toplevel)
         /* break out of the loop */
         break;
      }
+    m_getfld_state_destroy (&gstate);
  
      /*
       * Read the content headers.  We will parse the
@@ -337,8 +341,7 @@ get_content (FILE *in, char *file, int toplevel)
         /* Get MIME-Version field */
         if (!mh_strcasecmp (hp->name, VRSN_FIELD)) {
             int ucmp;
-           char c;
-           unsigned char *cp, *dp;
+           char c, *cp, *dp;
  
             if (ct->c_vrsn) {
                 advise (NULL, "message %s has multiple %s: fields",
@@ -350,12 +353,12 @@ get_content (FILE *in, char *file, int toplevel)
             /* Now, cleanup this field */
             cp = ct->c_vrsn;
  
-           while (isspace (*cp))
+           while (isspace ((unsigned char) *cp))
                 cp++;
             for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
                 *dp++ = ' ';
             for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
-               if (!isspace (*dp))
+               if (!isspace ((unsigned char) *dp))
                     break;
             *++dp = '\0';
             if (debugsw)
@@ -405,8 +408,7 @@ get_content (FILE *in, char *file, int toplevel)
         }
         else if (!mh_strcasecmp (hp->name, ENCODING_FIELD)) {
         /* Get Content-Transfer-Encoding field */
-           char c;
-           unsigned char *cp, *dp;
+           char c, *cp, *dp;
             struct str2init *s2i;
  
             /*
@@ -422,7 +424,7 @@ get_content (FILE *in, char *file, int toplevel)
             /* get copy of this field */
             ct->c_celine = cp = add (hp->value, NULL);
  
-           while (isspace (*cp))
+           while (isspace ((unsigned char) *cp))
                 cp++;
             for (dp = cp; istoken (*dp); dp++)
                 continue;
@@ -447,8 +449,7 @@ get_content (FILE *in, char *file, int toplevel)
         }
         else if (!mh_strcasecmp (hp->name, MD5_FIELD)) {
         /* Get Content-MD5 field */
-           unsigned char *cp, *dp;
-           char *ep;
+           char *cp, *dp, *ep;
  
             if (!checksw)
                 goto next_header;
@@ -461,12 +462,12 @@ get_content (FILE *in, char *file, int toplevel)
  
             ep = cp = add (hp->value, NULL);    /* get a copy */
  
-           while (isspace (*cp))
+           while (isspace ((unsigned char) *cp))
                 cp++;
             for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
                 *dp++ = ' ';
             for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
-               if (!isspace (*dp))
+               if (!isspace ((unsigned char) *dp))
                     break;
             *++dp = '\0';
             if (debugsw)
@@ -477,7 +478,7 @@ get_content (FILE *in, char *file, int toplevel)
                 goto out;
             }
  
-           for (dp = cp; *dp && !isspace (*dp); dp++)
+           for (dp = cp; *dp && !isspace ((unsigned char) *dp); dp++)
                 continue;
             *dp = '\0';
  
@@ -580,7 +581,7 @@ add_header (CT ct, char *name, char *value)
     filename="foo".  If it doesn't and value does, use value from
     that. */
  static char *
-incl_name_value (unsigned char *buf, char *name, char *value) {
+incl_name_value (char *buf, char *name, char *value) {
      char *newbuf = buf;
  
      /* Assume that name is non-null. */
@@ -589,12 +590,11 @@ incl_name_value (unsigned char *buf, char *name, char *value) {
  
         if (! strstr (buf, name_plus_equal)) {
             char *insertion;
-           unsigned char *cp;
-           char *prefix, *suffix;
+           char *cp, *prefix, *suffix;
  
             /* Trim trailing space, esp. newline. */
             for (cp = &buf[strlen (buf) - 1];
-                cp >= buf && isspace (*cp);
+                cp >= buf && isspace ((unsigned char) *cp);
                  --cp) {
                 *cp = '\0';
             }
@@ -660,11 +660,10 @@ extract_name_value (char *name_suffix, char *value) {
   * directives.  Fills in the information of the CTinfo structure.
   */
  int
-get_ctinfo (unsigned char *cp, CT ct, int magic)
+get_ctinfo (char *cp, CT ct, int magic)
  {
      int        i;
-    unsigned char *dp;
-    char **ap, **ep;
+    char *dp, **ap, **ep;
      char c;
      CI ci;
  
@@ -674,7 +673,7 @@ get_ctinfo (unsigned char *cp, CT ct, int magic)
      /* store copy of Content-Type line */
      cp = ct->c_ctline = add (cp, NULL);
  
-    while (isspace (*cp))      /* trim leading spaces */
+    while (isspace ((unsigned char) *cp))      /* trim leading spaces */
         cp++;
  
      /* change newlines to spaces */
@@ -683,7 +682,7 @@ get_ctinfo (unsigned char *cp, CT ct, int magic)
  
      /* trim trailing spaces */
      for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
-       if (!isspace (*dp))
+       if (!isspace ((unsigned char) *dp))
             break;
      *++dp = '\0';
  
@@ -707,10 +706,10 @@ get_ctinfo (unsigned char *cp, CT ct, int magic)
  
      /* down case the content type string */
      for (dp = ci->ci_type; *dp; dp++)
-       if (isalpha(*dp) && isupper (*dp))
-           *dp = tolower (*dp);
+       if (isalpha((unsigned char) *dp) && isupper ((unsigned char) *dp))
+           *dp = tolower ((unsigned char) *dp);
  
-    while (isspace (*cp))
+    while (isspace ((unsigned char) *cp))
         cp++;
  
      if (*cp == '(' && get_comment (ct, &cp, 1) == NOTOK)
@@ -723,7 +722,7 @@ get_ctinfo (unsigned char *cp, CT ct, int magic)
      }
  
      cp++;
-    while (isspace (*cp))
+    while (isspace ((unsigned char) *cp))
         cp++;
  
      if (*cp == '(' && get_comment (ct, &cp, 1) == NOTOK)
@@ -744,11 +743,11 @@ get_ctinfo (unsigned char *cp, CT ct, int magic)
  
      /* down case the content subtype string */
      for (dp = ci->ci_subtype; *dp; dp++)
-       if (isalpha(*dp) && isupper (*dp))
-           *dp = tolower (*dp);
+       if (isalpha((unsigned char) *dp) && isupper ((unsigned char) *dp))
+           *dp = tolower ((unsigned char) *dp);
  
  magic_skip:
-    while (isspace (*cp))
+    while (isspace ((unsigned char) *cp))
         cp++;
  
      if (*cp == '(' && get_comment (ct, &cp, 1) == NOTOK)
@@ -759,8 +758,7 @@ magic_skip:
       */
      ep = (ap = ci->ci_attrs) + NPARMS;
      while (*cp == ';') {
-       char *vp;
-       unsigned char *up;
+       char *vp, *up;
  
         if (ap >= ep) {
             advise (NULL,
@@ -770,7 +768,7 @@ magic_skip:
         }
  
         cp++;
-       while (isspace (*cp))
+       while (isspace ((unsigned char) *cp))
             cp++;
  
         if (*cp == '(' && get_comment (ct, &cp, 1) == NOTOK)
@@ -784,11 +782,11 @@ magic_skip:
         }
  
         /* down case the attribute name */
-       for (dp = cp; istoken (*dp); dp++)
-           if (isalpha(*dp) && isupper (*dp))
-               *dp = tolower (*dp);
+       for (dp = cp; istoken ((unsigned char) *dp); dp++)
+           if (isalpha((unsigned char) *dp) && isupper ((unsigned char) *dp))
+               *dp = tolower ((unsigned char) *dp);
  
-       for (up = dp; isspace (*dp);)
+       for (up = dp; isspace ((unsigned char) *dp);)
             dp++;
         if (dp == cp || *dp != '=') {
             advise (NULL,
@@ -799,7 +797,7 @@ magic_skip:
  
         vp = (*ap = add (cp, NULL)) + (up - cp);
         *vp = '\0';
-       for (dp++; isspace (*dp);)
+       for (dp++; isspace ((unsigned char) *dp);)
             dp++;
  
         /* now add the attribute value */
@@ -844,7 +842,7 @@ bad_quote:
         }
         ap++;
  
-       while (isspace (*cp))
+       while (isspace ((unsigned char) *cp))
             cp++;
  
         if (*cp == '(' && get_comment (ct, &cp, 1) == NOTOK)
@@ -872,7 +870,7 @@ bad_quote:
         *dp++ = c;
         cp = dp;
  
-       while (isspace (*cp))
+       while (isspace ((unsigned char) *cp))
             cp++;
      }
  
@@ -899,7 +897,7 @@ bad_quote:
         *dp++ = c;
         cp = dp;
  
-       while (isspace (*cp))
+       while (isspace ((unsigned char) *cp))
             cp++;
      }
  
@@ -926,7 +924,7 @@ bad_quote:
         *dp++ = c;
         cp = dp;
  
-       while (isspace (*cp))
+       while (isspace ((unsigned char) *cp))
             cp++;
      }
  
@@ -961,11 +959,10 @@ bad_quote:
  
  
  static int
-get_comment (CT ct, unsigned char **ap, int istype)
+get_comment (CT ct, char **ap, int istype)
  {
      int i;
-    char *bp;
-    unsigned char *cp;
+    char *bp, *cp;
      char c, buffer[BUFSIZ], *dp;
      CI ci;
  
@@ -1015,7 +1012,7 @@ invalid:
         }
      }
  
-    while (isspace (*cp))
+    while (isspace ((unsigned char) *cp))
         cp++;
  
      *ap = cp;
@@ -1107,8 +1104,7 @@ InitMultiPart (CT ct)
  {
      int        inout;
      long last, pos;
-    unsigned char *cp, *dp;
-    char **ap, **ep;
+    char *cp, *dp, **ap, **ep;
      char *bp, buffer[BUFSIZ];
      struct multipart *m;
      struct k2v *kv;
@@ -1121,11 +1117,24 @@ InitMultiPart (CT ct)
       * The encoding for multipart messages must be either
       * 7bit, 8bit, or binary (per RFC2045).
       */
-    if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT
-       && ct->c_encoding != CE_BINARY) {
+    if (! skip_mp_cte_check  &&  ct->c_encoding != CE_7BIT  &&
+        ct->c_encoding != CE_8BIT  &&  ct->c_encoding != CE_BINARY) {
+       /* Copy the Content-Transfer-Encoding header field body so we can
+          remove any trailing whitespace and leading blanks from it. */
+       char *cte = add (ct->c_celine ? ct->c_celine : "(null)", NULL);
+
+       bp = cte + strlen (cte) - 1;
+       while (bp >= cte && isspace ((unsigned char) *bp)) *bp-- = '\0';
+       for (bp = cte; *bp && isblank ((unsigned char) *bp); ++bp) continue;
+
         admonish (NULL,
-                 "\"%s/%s\" type in message %s must be encoded in 7bit, 8bit, or binary",
-                 ci->ci_type, ci->ci_subtype, ct->c_file);
+                 "\"%s/%s\" type in message %s must be encoded in\n"
+                 "7bit, 8bit, or binary, per RFC 2045 (6.4).  One workaround "
+                 "is to\nmanually edit the file and change the \"%s\"\n"
+                 "Content-Transfer-Encoding to one of those.  For now",
+                 ci->ci_type, ci->ci_subtype, ct->c_file, bp);
+       free (cte);
+
         return NOTOK;
      }
  
@@ -1161,7 +1170,7 @@ InitMultiPart (CT ct)
      ct->c_ctparams = (void *) m;
  
      /* check if boundary parameter contains only whitespace characters */
-    for (cp = bp; isspace (*cp); cp++)
+    for (cp = bp; isspace ((unsigned char) *cp); cp++)
         continue;
      if (!*cp) {
         advise (NULL, "invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field",
@@ -1171,7 +1180,7 @@ InitMultiPart (CT ct)
  
      /* remove trailing whitespace from boundary parameter */
      for (cp = bp, dp = cp + strlen (cp) - 1; dp > cp; dp--)
-       if (!isspace (*dp))
+       if (!isspace ((unsigned char) *dp))
             break;
      *++dp = '\0';
  
@@ -1286,6 +1295,9 @@ last_part:
         }
      }
  
+    get_leftover_mp_content (ct, 1);
+    get_leftover_mp_content (ct, 0);
+
      fclose (ct->c_fp);
      ct->c_fp = NULL;
      return OK;
@@ -1720,8 +1732,7 @@ openBase64 (CT ct, char **file)
      int fd, len, skip, own_ct_fp = 0;
      uint32_t bits;
      unsigned char value, b;
-    unsigned char *cp, *ep;
-    char buffer[BUFSIZ];
+    char *cp, *ep, buffer[BUFSIZ];
      /* sbeck -- handle suffixes */
      CI ci;
      CE ce;
@@ -1817,13 +1828,13 @@ openBase64 (CT ct, char **file)
             for (ep = (cp = buffer) + cc; cp < ep; cp++) {
                 switch (*cp) {
                 default:
-                   if (isspace (*cp))
+                   if (isspace ((unsigned char) *cp))
                         break;
-                   if (skip || (*cp & 0x80)
-                       || (value = b642nib[*cp & 0x7f]) > 0x3f) {
+                   if (skip || (((unsigned char) *cp) & 0x80)
+                       || (value = b642nib[((unsigned char) *cp) & 0x7f]) > 0x3f) {
                         if (debugsw) {
                             fprintf (stderr, "*cp=0x%x pos=%ld skip=%d\n",
-                               *cp,
+                               (unsigned char) *cp,
                                 (long) (lseek (fd, (off_t) 0, SEEK_CUR) - (ep - cp)),
                                 skip);
                         }
@@ -1954,7 +1965,7 @@ static int
  openQuoted (CT ct, char **file)
  {
      int        cc, digested, len, quoted, own_ct_fp = 0;
-    unsigned char *cp, *ep;
+    char *cp, *ep;
      char buffer[BUFSIZ];
      unsigned char mask;
      CE ce;
@@ -2046,7 +2057,7 @@ openQuoted (CT ct, char **file)
         len -= cc;
  
         for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--)
-           if (!isspace (*ep))
+           if (!isspace ((unsigned char) *ep))
                 break;
         *++ep = '\n', ep++;
  
@@ -2055,13 +2066,13 @@ openQuoted (CT ct, char **file)
                 /* in an escape sequence */
                 if (quoted == 1) {
                     /* at byte 1 of an escape sequence */
-                   mask = hex2nib[*cp & 0x7f];
+                   mask = hex2nib[((unsigned char) *cp) & 0x7f];
                     /* next is byte 2 */
                     quoted = 2;
                 } else {
                     /* at byte 2 of an escape sequence */
                     mask <<= 4;
-                   mask |= hex2nib[*cp & 0x7f];
+                   mask |= hex2nib[((unsigned char) *cp) & 0x7f];
                     putc (mask, ce->ce_fp);
                     if (digested)
                         MD5Update (&mdContext, &mask, 1);
@@ -2088,7 +2099,8 @@ openQuoted (CT ct, char **file)
                 if (cp + 1 >= ep || cp + 2 >= ep) {
                     /* We don't have 2 bytes left, so this is an invalid
                      * escape sequence; just show the raw bytes (below). */
-               } else if (isxdigit (cp[1]) && isxdigit (cp[2])) {
+               } else if (isxdigit ((unsigned char) cp[1]) &&
+                                       isxdigit ((unsigned char) cp[2])) {
                     /* Next 2 bytes are hex digits, making this a valid escape
                      * sequence; let's decode it (above). */
                     quoted = 1;
@@ -2617,7 +2629,7 @@ openFTP (CT ct, char **file)
  
         fflush (stdout);
  
-       for (i = 0; (child_id = vfork()) == NOTOK && i < 5; i++)
+       for (i = 0; (child_id = fork()) == NOTOK && i < 5; i++)
             sleep (5);
         switch (child_id) {
             case NOTOK:
@@ -2759,7 +2771,7 @@ openMail (CT ct, char **file)
      vec[vecp++] = e->eb_body;
      vec[vecp] = NULL;
  
-    for (i = 0; (child_id = vfork()) == NOTOK && i < 5; i++)
+    for (i = 0; (child_id = fork()) == NOTOK && i < 5; i++)
         sleep (5);
      switch (child_id) {
         case NOTOK:
@@ -2878,3 +2890,109 @@ invalid_digest:
  
      return OK;
  }
+
+
+/* Multipart parts might have content before the first subpart and/or
+   after the last subpart that hasn't been stored anywhere else, so do
+   that. */
+int
+get_leftover_mp_content (CT ct, int before /* or after */) {
+    struct multipart *m = (struct multipart *) ct->c_ctparams;
+    char *boundary;
+    int found_boundary = 0;
+    char buffer[BUFSIZ];
+    int max = BUFSIZ;
+    int read = 0;
+    char *content = NULL;
+
+    if (! m) return NOTOK;
+
+    if (before) {
+        if (! m->mp_parts  ||  ! m->mp_parts->mp_part) return NOTOK;
+
+        /* Isolate the beginning of this part to the beginning of the
+           first subpart and save any content between them. */
+        fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
+        max = m->mp_parts->mp_part->c_begin - ct->c_begin;
+        boundary = concat ("--", m->mp_start, NULL);
+    } else {
+        struct part *last_subpart = NULL;
+        struct part *subpart;
+
+        /* Go to the last subpart to get its end position. */
+        for (subpart = m->mp_parts; subpart; subpart = subpart->mp_next) {
+            last_subpart = subpart;
+        }
+
+        if (last_subpart == NULL) return NOTOK;
+
+        /* Isolate the end of the last subpart to the end of this part
+           and save any content between them. */
+        fseeko (ct->c_fp, last_subpart->mp_part->c_end, SEEK_SET);
+        max = ct->c_end - last_subpart->mp_part->c_end;
+        boundary = concat ("--", m->mp_stop, NULL);
+    }
+
+    /* Back up by 1 to pick up the newline. */
+    while (fgets (buffer, sizeof(buffer) - 1, ct->c_fp)) {
+        read += strlen (buffer);
+        /* Don't look beyond beginning of first subpart (before) or
+           next part (after). */
+        if (read > max) buffer[read-max] = '\0';
+
+        if (before) {
+            if (! strcmp (buffer, boundary)) {
+                found_boundary = 1;
+            }
+        } else {
+            if (! found_boundary  &&  ! strcmp (buffer, boundary)) {
+                found_boundary = 1;
+                continue;
+            }
+        }
+
+        if ((before && ! found_boundary)  ||  (! before && found_boundary)) {
+            if (content) {
+                char *old_content = content;
+                content = concat (content, buffer, NULL);
+                free (old_content);
+            } else {
+                content = before
+                    ?  concat ("\n", buffer, NULL)
+                    :  concat (buffer, NULL);
+            }
+        }
+
+        if (before) {
+            if (found_boundary  ||  read > max) break;
+        } else {
+            if (read > max) break;
+        }
+    }
+
+    /* Skip the newline if that's all there is. */
+    if (content) {
+        char *cp;
+
+        /* Remove trailing newline, except at EOF. */
+        if ((before || ! feof (ct->c_fp)) &&
+            (cp = content + strlen (content)) > content  &&
+            *--cp == '\n') {
+            *cp = '\0';
+        }
+
+        if (strlen (content) > 1) {
+            if (before) {
+                m->mp_content_before = content;
+            } else {
+                m->mp_content_after = content;
+            }
+        } else {
+            free (content);
+        }
+    }
+
+    free (boundary);
+
+    return OK;
+}