Add a new (but incomplete) test for mhbuild's new functionality.

[nmh] / uip / mhbuildsbr.c
diff --git a/uip/mhbuildsbr.c b/uip/mhbuildsbr.c

index 2b78a364d75742c81949ee4022f8587da8618565..b2949c3905c01bc5c54cd723388f86fe93cae4ee 100644 (file)
--- a/uip/mhbuildsbr.c
+++ b/uip/mhbuildsbr.c
@@ -74,10 +74,10 @@ void free_encoding (CT, int);
  static int init_decoded_content (CT);
  static void setup_attach_content(CT, char *);
  static char *fgetstr (char *, int, FILE *);
-static int user_content (FILE *, char *, char *, CT *);
+static int user_content (FILE *, char *, CT *);
  static void set_id (CT, int);
  static int compose_content (CT);
-static int scan_content (CT);
+static int scan_content (CT, size_t);
  static int build_headers (CT);
  static char *calculate_digest (CT, int);
  
@@ -123,7 +123,8 @@ static void directive_pop(void)
   */
  
  CT
-build_mime (char *infile, int autobuild, int directives, int header_encoding)
+build_mime (char *infile, int autobuild, int dist, int directives,
+           int header_encoding, size_t maxunencoded)
  {
      int        compnum, state;
      char buf[BUFSIZ], name[NAMESZ];
@@ -230,7 +231,9 @@ build_mime (char *infile, int autobuild, int directives, int header_encoding)
  
                 entry = mh_xmalloc(sizeof(*entry));
                 entry->filename = getcpy(s);
+               entry->next = NULL;
                 free(vp);
+
                 if (attach_tail) {
                     attach_tail->next = entry;
                     attach_tail = entry;
@@ -277,9 +280,12 @@ finish_field:
       * Now add the MIME-Version header field
       * to the list of header fields.
       */
-    np = add (VRSN_FIELD, NULL);
-    vp = concat (" ", VRSN_VALUE, "\n", NULL);
-    add_header (ct, np, vp);
+
+    if (! dist) {
+       np = add (VRSN_FIELD, NULL);
+       vp = concat (" ", VRSN_VALUE, "\n", NULL);
+       add_header (ct, np, vp);
+    }
  
      /*
       * We initally assume we will find multiple contents in the
@@ -305,7 +311,7 @@ finish_field:
         struct part *part;
         CT p;
  
-       if (user_content (in, infile, buf, &p) == DONE) {
+       if (user_content (in, buf, &p) == DONE) {
             admonish (NULL, "ignoring spurious #end");
             continue;
         }
@@ -319,12 +325,6 @@ finish_field:
         part->mp_part = p;
      }
  
-    /*
-     * close the composition draft since
-     * it's not needed any longer.
-     */
-    fclose (in);
-
      /*
       * Add any Attach headers to the list of MIME parts at the end of the
       * message.
@@ -363,9 +363,55 @@ finish_field:
         free(at_prev);
      }
  
-    /* check if any contents were found */
-    if (!m->mp_parts)
-       adios (NULL, "no content directives found");
+    /*
+     * To allow for empty message bodies, if we've found NO content at all
+     * yet cook up an empty text/plain part.
+     */
+
+    if (!m->mp_parts) {
+       CT p;
+       struct part *part;
+       struct text *t;
+
+       if ((p = (CT) calloc (1, sizeof(*p))) == NULL)
+           adios(NULL, "out of memory");
+
+       init_decoded_content(p);
+
+       if (get_ctinfo ("text/plain", p, 0) == NOTOK)
+           done (1);
+
+       p->c_type = CT_TEXT;
+       p->c_subtype = TEXT_PLAIN;
+       p->c_encoding = CE_7BIT;
+       p->c_file = getcpy(infile);
+       /*
+        * Sigh.  ce_file contains the "decoded" contents of this part.
+        * So this seems like the best option available since we're going
+        * to call scan_content() on this.
+        */
+       p->c_cefile.ce_file = getcpy("/dev/null");
+       p->c_begin = ftell(in);
+       p->c_end = ftell(in);
+
+       if ((t = (struct text *) calloc (1, sizeof (*t))) == NULL)
+           adios (NULL, "out of memory");
+
+       t->tx_charset = CHARSET_SPECIFIED;
+       p->c_ctparams = t;
+
+       if ((part = (struct part *) calloc (1, sizeof(*part))) == NULL)
+           adios (NULL, "out of memory");
+       *pp = part;
+       pp = &part->mp_next;
+       part->mp_part = p;
+    }
+
+    /*
+     * close the composition draft since
+     * it's not needed any longer.
+     */
+    fclose (in);
  
      /*
       * If only one content was found, then remove and
@@ -403,7 +449,7 @@ finish_field:
       * check if prefix for multipart boundary clashes with
       * any of the contents.
       */
-    while (scan_content (ct) == NOTOK) {
+    while (scan_content (ct, maxunencoded) == NOTOK) {
         if (*cp < 'z') {
             (*cp)++;
          } else {
@@ -415,7 +461,8 @@ finish_field:
      }
  
      /* Build the rest of the header field structures */
-    build_headers (ct);
+    if (! dist)
+       build_headers (ct);
  
      return ct;
  }
@@ -481,7 +528,7 @@ fgetstr (char *s, int n, FILE *stream)
   */
  
  static int
-user_content (FILE *in, char *file, char *buf, CT *ctp)
+user_content (FILE *in, char *buf, CT *ctp)
  {
      int        extrnal, vrsn;
      char *cp, **ap;
@@ -950,7 +997,7 @@ use_forw:
             struct part *part;
             CT p;
  
-           if (user_content (in, file, buffer, &p) == DONE) {
+           if (user_content (in, buffer, &p) == DONE) {
                 if (!m->mp_parts)
                     adios (NULL, "empty \"#begin ... #end\" sequence");
                 return OK;
@@ -1231,11 +1278,12 @@ raw:
   */
  
  static int
-scan_content (CT ct)
+scan_content (CT ct, size_t maxunencoded)
  {
      int len;
-    int check8bit = 0, contains8bit = 0;  /* check if contains 8bit data                */
-    int checklinelen = 0, linelen = 0;   /* check for long lines                       */
+    int check8bit = 0, contains8bit = 0;  /* check if contains 8bit data */
+    int checklinelen = 0, linelen = 0;  /* check for long lines */ 
+    int checkllinelen = 0; /* check for extra-long lines */
      int checkboundary = 0, boundaryclash = 0; /* check if clashes with multipart boundary   */
      int checklinespace = 0, linespace = 0;  /* check if any line ends with space          */
      char *cp = NULL, buffer[BUFSIZ];
@@ -1257,7 +1305,7 @@ scan_content (CT ct)
         for (part = m->mp_parts; part; part = part->mp_next) {
             CT p = part->mp_part;
  
-           if (scan_content (p) == NOTOK)      /* choose encoding for subpart */
+           if (scan_content (p, maxunencoded) == NOTOK)        /* choose encoding for subpart */
                 return NOTOK;
  
             /* if necessary, enlarge encoding for enclosing multipart */
@@ -1271,58 +1319,81 @@ scan_content (CT ct)
      }
  
      /*
-     * Decide what to check while scanning this content.
+     * Decide what to check while scanning this content.  Note that
+     * for text content we always check for 8bit characters if the
+     * charset is unspecified, because that controls whether or not the
+     * character set is us-ascii or retrieved from the locale.
       */
-    switch (ct->c_type) {
-    case CT_TEXT:
-       check8bit = 1;
+
+    if (ct->c_type == CT_TEXT) {
+       t = (struct text *) ct->c_ctparams;
+       if (t->tx_charset == CHARSET_UNSPECIFIED)
+           check8bit = 1;
+    }
+
+    switch (ct->c_reqencoding) {
+    case CE_8BIT:
+       checkllinelen = 1;
         checkboundary = 1;
-       if (ct->c_subtype == TEXT_PLAIN) {
-           checklinelen = 0;
-           checklinespace = 0;
-       } else {
+       break;
+    case CE_QUOTED:
+       checkboundary = 1;
+       break;
+    case CE_BASE64:
+       break;
+    case CE_UNKNOWN:
+       /* Use the default rules based on content-type */
+       switch (ct->c_type) {
+       case CT_TEXT:
+           checkboundary = 1;
             checklinelen = 1;
-           checklinespace = 1;
-       }
+           if (ct->c_subtype == TEXT_PLAIN) {
+               checklinespace = 0;
+           } else {
+               checklinespace = 1;
+           }
         break;
  
-    case CT_APPLICATION:
-       check8bit = 1;
-       checklinelen = 1;
-       checklinespace = 1;
-       checkboundary = 1;
+       case CT_APPLICATION:
+           check8bit = 1;
+           checklinelen = 1;
+           checklinespace = 1;
+           checkboundary = 1;
         break;
  
-    case CT_MESSAGE:
-       check8bit = 0;
-       checklinelen = 0;
-       checklinespace = 0;
+       case CT_MESSAGE:
+           check8bit = 0;
+           checklinelen = 0;
+           checklinespace = 0;
  
-       /* don't check anything for message/external */
-       if (ct->c_subtype == MESSAGE_EXTERNAL)
-           checkboundary = 0;
-       else
-           checkboundary = 1;
-       break;
+           /* don't check anything for message/external */
+           if (ct->c_subtype == MESSAGE_EXTERNAL)
+               checkboundary = 0;
+           else
+               checkboundary = 1;
+           break;
  
-    case CT_AUDIO:
-    case CT_IMAGE:
-    case CT_VIDEO:
-       /*
-        * Don't check anything for these types,
-        * since we are forcing use of base64.
-        */
-       check8bit = 0;
-       checklinelen = 0;
-       checklinespace = 0;
-       checkboundary = 0;
-       break;
+       case CT_AUDIO:
+       case CT_IMAGE:
+       case CT_VIDEO:
+           /*
+            * Don't check anything for these types,
+            * since we are forcing use of base64, unless
+            * the content-type was specified by a mhbuild directive.
+            */
+           check8bit = 0;
+           checklinelen = 0;
+           checklinespace = 0;
+           checkboundary = 0;
+           break;
+       }
      }
  
      /*
       * Scan the unencoded content
       */
-    if (check8bit || checklinelen || checklinespace || checkboundary) {
+    if (check8bit || checklinelen || checklinespace || checkboundary ||
+       checkllinelen) {
         if ((in = fopen (ce->ce_file, "r")) == NULL)
             adios (ce->ce_file, "unable to open for reading");
         len = strlen (prefix);
@@ -1343,11 +1414,23 @@ scan_content (CT ct)
             /*
              * Check line length.
              */
-           if (checklinelen && (strlen (buffer) > CPERLIN + 1)) {
+           if (checklinelen && (strlen (buffer) > maxunencoded + 1)) {
                 linelen = 1;
                 checklinelen = 0;       /* no need to keep checking */
             }
  
+           /*
+            * RFC 5322 specifies that a message cannot contain a line
+            * greater than 998 characters (excluding the CRLF).  If we
+            * get one of those lines and linelen is NOT set, then abort.
+            */
+
+           if (checkllinelen && !linelen &&
+                                       (strlen(buffer) > MAXLONGLINE + 1)) {
+               adios(NULL, "Line in content exceeds maximum line limit (%d)",
+                     MAXLONGLINE);
+           }
+
             /*
              * Check if line ends with a space.
              */
@@ -1375,14 +1458,11 @@ scan_content (CT ct)
      }
  
      /*
-     * Decide which transfer encoding to use.
+     * If the content is text and didn't specify a character set,
+     * we need to figure out which one was used.
       */
-    switch (ct->c_type) {
-    case CT_TEXT:
-       /*
-        * If the text content didn't specify a character
-        * set, we need to figure out which one was used.
-        */
+
+    if (ct->c_type == CT_TEXT) {
         t = (struct text *) ct->c_ctparams;
         if (t->tx_charset == CHARSET_UNSPECIFIED) {
             CI ci = &ct->c_ctinfo;
@@ -1403,33 +1483,45 @@ scan_content (CT ct)
             *cp++ = '\0';
             *ep = cp;
         }
+    }
  
-       if (contains8bit || linelen || linespace || checksw)
-           ct->c_encoding = CE_QUOTED;
-       else
-           ct->c_encoding = CE_7BIT;
-       break;
+    /*
+     * Decide which transfer encoding to use.
+     */
  
-    case CT_APPLICATION:
-       /* For application type, use base64, except when postscript */
-       if (contains8bit || linelen || linespace || checksw)
-           ct->c_encoding = (ct->c_subtype == APPLICATION_POSTSCRIPT)
-               ? CE_QUOTED : CE_BASE64;
-       else
-           ct->c_encoding = CE_7BIT;
-       break;
+    if (ct->c_reqencoding != CE_UNKNOWN)
+       ct->c_encoding = ct->c_reqencoding;
+    else
+       switch (ct->c_type) {
+       case CT_TEXT:
+           if (contains8bit && !linelen && !linespace && !checksw)
+               ct->c_encoding = CE_8BIT;
+           else if (contains8bit || linelen || linespace || checksw)
+               ct->c_encoding = CE_QUOTED;
+           else
+               ct->c_encoding = CE_7BIT;
+           break;
  
-    case CT_MESSAGE:
-       ct->c_encoding = CE_7BIT;
-       break;
+       case CT_APPLICATION:
+           /* For application type, use base64, except when postscript */
+           if (contains8bit || linelen || linespace || checksw)
+               ct->c_encoding = (ct->c_subtype == APPLICATION_POSTSCRIPT)
+                   ? CE_QUOTED : CE_BASE64;
+           else
+               ct->c_encoding = CE_7BIT;
+           break;
  
-    case CT_AUDIO:
-    case CT_IMAGE:
-    case CT_VIDEO:
-       /* For audio, image, and video contents, just use base64 */
-       ct->c_encoding = CE_BASE64;
-       break;
-    }
+       case CT_MESSAGE:
+           ct->c_encoding = CE_7BIT;
+           break;
+
+       case CT_AUDIO:
+       case CT_IMAGE:
+       case CT_VIDEO:
+           /* For audio, image, and video contents, just use base64 */
+           ct->c_encoding = CE_BASE64;
+           break;
+        }
  
      return (boundaryclash ? NOTOK : OK);
  }