Support for selectable Content-Transfer-Encoding.

author Ken Hornstein <kenh@pobox.com>

Tue, 4 Feb 2014 01:14:55 +0000 (20:14 -0500)

committer Ken Hornstein <kenh@pobox.com>

Tue, 4 Feb 2014 01:14:55 +0000 (20:14 -0500)
author Ken Hornstein <kenh@pobox.com>
Tue, 4 Feb 2014 01:14:55 +0000 (20:14 -0500)
committer Ken Hornstein <kenh@pobox.com>
Tue, 4 Feb 2014 01:14:55 +0000 (20:14 -0500)
diff --git a/h/mhparse.h b/h/mhparse.h

index 85b1bc16ea70ad19678f95666831c543a99b0982..bb55153feb5ca39ea66418fd11ed19261aaf1f6f 100644 (file)
--- a/h/mhparse.h
+++ b/h/mhparse.h
@@ -304,14 +304,16 @@ CT parse_mime (char *);
   *               processed by default.
   * encoding    - The default encoding to use when doing RFC 2047 header
   *               encoding.  Must be one of CE_UNKNOWN, CE_BASE64, or
   *               processed by default.
   * encoding    - The default encoding to use when doing RFC 2047 header
   *               encoding.  Must be one of CE_UNKNOWN, CE_BASE64, or
- *               CE_QUOTED;
+ *               CE_QUOTED.
+ * maxunencoded        - The maximum line length before the default encoding for
+ *               text parts is quoted-printable.
   *
   * Returns a CT structure describing the resulting MIME message.  If the
   * -auto flag is set and a MIME-Version header is encountered, the return
   * value is NULL.
   */
  CT build_mime (char *infile, int autobuild, int dist, int directives,
   *
   * Returns a CT structure describing the resulting MIME message.  If the
   * -auto flag is set and a MIME-Version header is encountered, the return
   * value is NULL.
   */
  CT build_mime (char *infile, int autobuild, int dist, int directives,
-              int encoding);
+              int encoding, size_t maxunencoded);
  
  int add_header (CT, char *, char *);
  int get_ctinfo (char *, CT, int);
  
  int add_header (CT, char *, char *);
  int get_ctinfo (char *, CT, int);
diff --git a/h/mime.h b/h/mime.h

index 2ed5378c9f3b055105a83e7c71d86775f7ca3083..d152094a8a113fd8e16d128df43df81626d777fc 100644 (file)
--- a/h/mime.h
+++ b/h/mime.h
@@ -35,6 +35,8 @@
                      && (c) != '/' && (c) != '['  && (c) != ']' \
                      && (c) != '?' && (c) != '=')
  
                      && (c) != '/' && (c) != '['  && (c) != ']' \
                      && (c) != '?' && (c) != '=')
  
+#define        MAXTEXTPERLN 78
+#define        MAXLONGLINE 998
  #define        CPERLIN 76
  #define        BPERLIN (CPERLIN / 4)
  #define        LPERMSG 632
  #define        CPERLIN 76
  #define        BPERLIN (CPERLIN / 4)
  #define        LPERMSG 632
diff --git a/test/mhbuild/test-attach b/test/mhbuild/test-attach

index 9f6526981a13ab7d25d74a15dccd63f1d7dbb56f..12d3da39a3adeab8cc508d0d72a8be6ae9c5fe94 100755 (executable)
--- a/test/mhbuild/test-attach
+++ b/test/mhbuild/test-attach
@@ -233,23 +233,25 @@ Fcc: +outbox
  Subject: A more complete multipart test
  MIME-Version: 1.0
  Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
  Subject: A more complete multipart test
  MIME-Version: 1.0
  Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+Content-Transfer-Encoding: 8bit
  
  ------- =_aaaaaaaaaa0
  Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1"
  
  ------- =_aaaaaaaaaa0
  Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1"
+Content-Transfer-Encoding: 8bit
  
  ------- =_aaaaaaaaaa1
  Content-Type: text/plain; charset="UTF-8"
  
  ------- =_aaaaaaaaaa1
  Content-Type: text/plain; charset="UTF-8"
-Content-Transfer-Encoding: quoted-printable
+Content-Transfer-Encoding: 8bit
  
  
-This is some t=C3=ABxt.
+This is some tëxt.
  
  ------- =_aaaaaaaaaa1
  Content-Type: text/html; charset="UTF-8"
  
  ------- =_aaaaaaaaaa1
  Content-Type: text/html; charset="UTF-8"
-Content-Transfer-Encoding: quoted-printable
+Content-Transfer-Encoding: 8bit
  
  <html>
  <body>
  
  <html>
  <body>
-This is some HTML t=C3=ABxt.
+This is some HTML tëxt.
  </body>
  </html>
  
  </body>
  </html>
  
diff --git a/test/mhbuild/test-utf8-body b/test/mhbuild/test-utf8-body

index 033a2ef84e9487ec3b26d1e6c8d5b13edf3c5144..9798ac26b1ae17d121c9508f030c3f30b1dc04e9 100755 (executable)
--- a/test/mhbuild/test-utf8-body
+++ b/test/mhbuild/test-utf8-body
@@ -102,13 +102,14 @@ To: Somebody <somebody@example.com>
  Subject: Test
  MIME-Version: 1.0
  Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
  Subject: Test
  MIME-Version: 1.0
  Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+Content-Transfer-Encoding: 8bit
  Date:
  
  ------- =_aaaaaaaaaa0
  Content-Type: text/plain
  Date:
  
  ------- =_aaaaaaaaaa0
  Content-Type: text/plain
-Content-Transfer-Encoding: quoted-printable
+Content-Transfer-Encoding: 8bit
  
  
-=C2=A1Ay, caramba!
+¡Ay, caramba!
  
  ------- =_aaaaaaaaaa0
  Content-Type: text/plain; name="attachment.txt"
  
  ------- =_aaaaaaaaaa0
  Content-Type: text/plain; name="attachment.txt"
diff --git a/uip/mhbuild.c b/uip/mhbuild.c

index de849a39549fee0e89385daaaedc4d1ce6aec24b..85bfef473b0c08e21da1d23b53ece7e434b1db78 100644 (file)
--- a/uip/mhbuild.c
+++ b/uip/mhbuild.c
@@ -41,6 +41,7 @@
      X("nocontentid", 0, NCONTENTIDSW) \
      X("headerencoding encoding-algorithm", 0, HEADERENCSW) \
      X("autoheaderencoding", 0, AUTOHEADERENCSW) \
      X("nocontentid", 0, NCONTENTIDSW) \
      X("headerencoding encoding-algorithm", 0, HEADERENCSW) \
      X("autoheaderencoding", 0, AUTOHEADERENCSW) \
+    X("maxunencoded", 0, MAXUNENCSW) \
      X("version", 0, VERSIONSW) \
      X("help", 0, HELPSW) \
      X("debug", -5, DEBUGSW) \
      X("version", 0, VERSIONSW) \
      X("help", 0, HELPSW) \
      X("debug", -5, DEBUGSW) \
@@ -102,6 +103,7 @@ int
  main (int argc, char **argv)
  {
      int sizesw = 1, headsw = 1, directives = 1, autobuild = 0, dist = 0;
  main (int argc, char **argv)
  {
      int sizesw = 1, headsw = 1, directives = 1, autobuild = 0, dist = 0;
+    size_t maxunencoded = MAXTEXTPERLN;
      int *icachesw;
      char *cp, buf[BUFSIZ];
      char buffer[BUFSIZ], *compfile = NULL;
      int *icachesw;
      char *cp, buf[BUFSIZ];
      char buffer[BUFSIZ], *compfile = NULL;
@@ -253,6 +255,15 @@ main (int argc, char **argv)
                 header_encoding = CE_UNKNOWN;
                 continue;
  
                 header_encoding = CE_UNKNOWN;
                 continue;
  
+           case MAXUNENCSW:
+               if (!(cp = *argp++) || *cp == '-')
+                   adios (NULL, "missing argument to %s", argp[-2]);
+               if ((maxunencoded = atoi(cp)) < 1)
+                   adios (NULL, "Invalid argument for %s: %s", argp[-2], cp);
+               if (maxunencoded > 998)
+                   adios (NULL, "limit of -maxunencoded is 998");
+               continue;
+
             case VERBSW: 
                 verbosw++;
                 continue;
             case VERBSW: 
                 verbosw++;
                 continue;
@@ -326,7 +337,8 @@ main (int argc, char **argv)
         unlink_infile = 1;
  
         /* build the content structures for MIME message */
         unlink_infile = 1;
  
         /* build the content structures for MIME message */
-       ct = build_mime (infile, autobuild, dist, directives, header_encoding);
+       ct = build_mime (infile, autobuild, dist, directives, header_encoding,
+                        maxunencoded);
  
         /*
          * If ct == NULL, that means that -auto was set and a MIME version
  
         /*
          * If ct == NULL, that means that -auto was set and a MIME version
@@ -356,7 +368,8 @@ main (int argc, char **argv)
       */
  
      /* build the content structures for MIME message */
       */
  
      /* build the content structures for MIME message */
-    ct = build_mime (compfile, autobuild, dist, directives, header_encoding);
+    ct = build_mime (compfile, autobuild, dist, directives, header_encoding,
+                    maxunencoded);
  
      /*
       * If ct == NULL, that means -auto was set and we found a MIME version
  
      /*
       * If ct == NULL, that means -auto was set and we found a MIME version
diff --git a/uip/mhbuildsbr.c b/uip/mhbuildsbr.c

index 70747c85ee93a35fca37d17d698e61367ee9bd38..e44f01416bafb78d84db0fd5809296125f32102b 100644 (file)
--- a/uip/mhbuildsbr.c
+++ b/uip/mhbuildsbr.c
@@ -77,7 +77,7 @@ static char *fgetstr (char *, int, FILE *);
  static int user_content (FILE *, char *, CT *);
  static void set_id (CT, int);
  static int compose_content (CT);
  static int user_content (FILE *, char *, CT *);
  static void set_id (CT, int);
  static int compose_content (CT);
-static int scan_content (CT);
+static int scan_content (CT, size_t);
  static int build_headers (CT);
  static char *calculate_digest (CT, int);
  
  static int build_headers (CT);
  static char *calculate_digest (CT, int);
  
@@ -124,7 +124,7 @@ static void directive_pop(void)
  
  CT
  build_mime (char *infile, int autobuild, int dist, int directives,
  
  CT
  build_mime (char *infile, int autobuild, int dist, int directives,
-           int header_encoding)
+           int header_encoding, size_t maxunencoded)
  {
      int        compnum, state;
      char buf[BUFSIZ], name[NAMESZ];
  {
      int        compnum, state;
      char buf[BUFSIZ], name[NAMESZ];
@@ -449,7 +449,7 @@ finish_field:
       * check if prefix for multipart boundary clashes with
       * any of the contents.
       */
       * check if prefix for multipart boundary clashes with
       * any of the contents.
       */
-    while (scan_content (ct) == NOTOK) {
+    while (scan_content (ct, maxunencoded) == NOTOK) {
         if (*cp < 'z') {
             (*cp)++;
          } else {
         if (*cp < 'z') {
             (*cp)++;
          } else {
@@ -1278,11 +1278,12 @@ raw:
   */
  
  static int
   */
  
  static int
-scan_content (CT ct)
+scan_content (CT ct, size_t maxunencoded)
  {
      int len;
  {
      int len;
-    int check8bit = 0, contains8bit = 0;  /* check if contains 8bit data                */
-    int checklinelen = 0, linelen = 0;   /* check for long lines                       */
+    int check8bit = 0, contains8bit = 0;  /* check if contains 8bit data */
+    int checklinelen = 0, linelen = 0;  /* check for long lines */ 
+    int checkllinelen = 0; /* check for extra-long lines */
      int checkboundary = 0, boundaryclash = 0; /* check if clashes with multipart boundary   */
      int checklinespace = 0, linespace = 0;  /* check if any line ends with space          */
      char *cp = NULL, buffer[BUFSIZ];
      int checkboundary = 0, boundaryclash = 0; /* check if clashes with multipart boundary   */
      int checklinespace = 0, linespace = 0;  /* check if any line ends with space          */
      char *cp = NULL, buffer[BUFSIZ];
@@ -1304,7 +1305,7 @@ scan_content (CT ct)
         for (part = m->mp_parts; part; part = part->mp_next) {
             CT p = part->mp_part;
  
         for (part = m->mp_parts; part; part = part->mp_next) {
             CT p = part->mp_part;
  
-           if (scan_content (p) == NOTOK)      /* choose encoding for subpart */
+           if (scan_content (p, maxunencoded) == NOTOK)        /* choose encoding for subpart */
                 return NOTOK;
  
             /* if necessary, enlarge encoding for enclosing multipart */
                 return NOTOK;
  
             /* if necessary, enlarge encoding for enclosing multipart */
@@ -1320,56 +1321,72 @@ scan_content (CT ct)
      /*
       * Decide what to check while scanning this content.
       */
      /*
       * Decide what to check while scanning this content.
       */
-    switch (ct->c_type) {
-    case CT_TEXT:
-       check8bit = 1;
+
+    switch (ct->c_reqencoding) {
+    case CE_8BIT:
+       checkllinelen = 1;
         checkboundary = 1;
         checkboundary = 1;
-       if (ct->c_subtype == TEXT_PLAIN) {
-           checklinelen = 0;
-           checklinespace = 0;
-       } else {
+       break;
+    case CE_QUOTED:
+       checkboundary = 1;
+       break;
+    case CE_BASE64:
+       /* We check nothing here */
+       break;
+    case CE_UNKNOWN:
+       /* Use the default rules based on content-type */
+       switch (ct->c_type) {
+       case CT_TEXT:
+           checkboundary = 1;
+           check8bit = 1;
             checklinelen = 1;
             checklinelen = 1;
-           checklinespace = 1;
-       }
+           if (ct->c_subtype == TEXT_PLAIN) {
+               checklinespace = 0;
+           } else {
+               checklinespace = 1;
+           }
         break;
  
         break;
  
-    case CT_APPLICATION:
-       check8bit = 1;
-       checklinelen = 1;
-       checklinespace = 1;
-       checkboundary = 1;
+       case CT_APPLICATION:
+           check8bit = 1;
+           checklinelen = 1;
+           checklinespace = 1;
+           checkboundary = 1;
         break;
  
         break;
  
-    case CT_MESSAGE:
-       check8bit = 0;
-       checklinelen = 0;
-       checklinespace = 0;
+       case CT_MESSAGE:
+           check8bit = 0;
+           checklinelen = 0;
+           checklinespace = 0;
  
  
-       /* don't check anything for message/external */
-       if (ct->c_subtype == MESSAGE_EXTERNAL)
-           checkboundary = 0;
-       else
-           checkboundary = 1;
-       break;
+           /* don't check anything for message/external */
+           if (ct->c_subtype == MESSAGE_EXTERNAL)
+               checkboundary = 0;
+           else
+               checkboundary = 1;
+           break;
  
  
-    case CT_AUDIO:
-    case CT_IMAGE:
-    case CT_VIDEO:
-       /*
-        * Don't check anything for these types,
-        * since we are forcing use of base64.
-        */
-       check8bit = 0;
-       checklinelen = 0;
-       checklinespace = 0;
-       checkboundary = 0;
-       break;
+       case CT_AUDIO:
+       case CT_IMAGE:
+       case CT_VIDEO:
+           /*
+            * Don't check anything for these types,
+            * since we are forcing use of base64, unless
+            * the content-type was specified by a mhbuild directive.
+            */
+           check8bit = 0;
+           checklinelen = 0;
+           checklinespace = 0;
+           checkboundary = 0;
+           break;
+       }
      }
  
      /*
       * Scan the unencoded content
       */
      }
  
      /*
       * Scan the unencoded content
       */
-    if (check8bit || checklinelen || checklinespace || checkboundary) {
+    if (check8bit || checklinelen || checklinespace || checkboundary ||
+       checkllinelen) {
         if ((in = fopen (ce->ce_file, "r")) == NULL)
             adios (ce->ce_file, "unable to open for reading");
         len = strlen (prefix);
         if ((in = fopen (ce->ce_file, "r")) == NULL)
             adios (ce->ce_file, "unable to open for reading");
         len = strlen (prefix);
@@ -1390,11 +1407,23 @@ scan_content (CT ct)
             /*
              * Check line length.
              */
             /*
              * Check line length.
              */
-           if (checklinelen && (strlen (buffer) > CPERLIN + 1)) {
+           if (checklinelen && (strlen (buffer) > maxunencoded + 1)) {
                 linelen = 1;
                 checklinelen = 0;       /* no need to keep checking */
             }
  
                 linelen = 1;
                 checklinelen = 0;       /* no need to keep checking */
             }
  
+           /*
+            * RFC 5322 specifies that a message cannot contain a line
+            * greater than 998 characters (excluding the CRLF).  If we
+            * get one of those lines and linelen is NOT set, then abort.
+            */
+
+           if (checkllinelen && !linelen &&
+                                       (strlen(buffer) > MAXLONGLINE + 1)) {
+               adios(NULL, "Line in content exceeds maximum line limit (%d)",
+                     MAXLONGLINE);
+           }
+
             /*
              * Check if line ends with a space.
              */
             /*
              * Check if line ends with a space.
              */
@@ -1424,59 +1453,65 @@ scan_content (CT ct)
      /*
       * Decide which transfer encoding to use.
       */
      /*
       * Decide which transfer encoding to use.
       */
-    switch (ct->c_type) {
-    case CT_TEXT:
-       /*
-        * If the text content didn't specify a character
-        * set, we need to figure out which one was used.
-        */
-       t = (struct text *) ct->c_ctparams;
-       if (t->tx_charset == CHARSET_UNSPECIFIED) {
-           CI ci = &ct->c_ctinfo;
-           char **ap, **ep;
  
  
-           for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
-               continue;
+    if (ct->c_reqencoding != CE_UNKNOWN)
+       ct->c_encoding = ct->c_reqencoding;
+    else
+       switch (ct->c_type) {
+       case CT_TEXT:
+           /*
+            * If the text content didn't specify a character
+            * set, we need to figure out which one was used.
+            */
+           t = (struct text *) ct->c_ctparams;
+           if (t->tx_charset == CHARSET_UNSPECIFIED) {
+               CI ci = &ct->c_ctinfo;
+               char **ap, **ep;
  
  
-           if (contains8bit) {
-               *ap = concat ("charset=", write_charset_8bit(), NULL);
-           } else {
-               *ap = add ("charset=us-ascii", NULL);
+               for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++)
+                   continue;
+
+               if (contains8bit) {
+                   *ap = concat ("charset=", write_charset_8bit(), NULL);
+               } else {
+                   *ap = add ("charset=us-ascii", NULL);
+               }
+               t->tx_charset = CHARSET_SPECIFIED;
+
+               cp = strchr(*ap++, '=');
+               *ap = NULL;
+               *cp++ = '\0';
+               *ep = cp;
             }
             }
-           t->tx_charset = CHARSET_SPECIFIED;
  
  
-           cp = strchr(*ap++, '=');
-           *ap = NULL;
-           *cp++ = '\0';
-           *ep = cp;
-       }
+           if (contains8bit && !linelen && !linespace && !checksw)
+               ct->c_encoding = CE_8BIT;
+           else if (contains8bit || linelen || linespace || checksw)
+               ct->c_encoding = CE_QUOTED;
+           else
+               ct->c_encoding = CE_7BIT;
+           break;
  
  
-       if (contains8bit || linelen || linespace || checksw)
-           ct->c_encoding = CE_QUOTED;
-       else
-           ct->c_encoding = CE_7BIT;
-       break;
+       case CT_APPLICATION:
+           /* For application type, use base64, except when postscript */
+           if (contains8bit || linelen || linespace || checksw)
+               ct->c_encoding = (ct->c_subtype == APPLICATION_POSTSCRIPT)
+                   ? CE_QUOTED : CE_BASE64;
+           else
+               ct->c_encoding = CE_7BIT;
+           break;
  
  
-    case CT_APPLICATION:
-       /* For application type, use base64, except when postscript */
-       if (contains8bit || linelen || linespace || checksw)
-           ct->c_encoding = (ct->c_subtype == APPLICATION_POSTSCRIPT)
-               ? CE_QUOTED : CE_BASE64;
-       else
+       case CT_MESSAGE:
             ct->c_encoding = CE_7BIT;
             ct->c_encoding = CE_7BIT;
-       break;
-
-    case CT_MESSAGE:
-       ct->c_encoding = CE_7BIT;
-       break;
+           break;
  
  
-    case CT_AUDIO:
-    case CT_IMAGE:
-    case CT_VIDEO:
-       /* For audio, image, and video contents, just use base64 */
-       ct->c_encoding = CE_BASE64;
-       break;
-    }
+       case CT_AUDIO:
+       case CT_IMAGE:
+       case CT_VIDEO:
+           /* For audio, image, and video contents, just use base64 */
+           ct->c_encoding = CE_BASE64;
+           break;
+        }
  
      return (boundaryclash ? NOTOK : OK);
  }
  
      return (boundaryclash ? NOTOK : OK);
  }
author	Ken Hornstein <kenh@pobox.com>
	Tue, 4 Feb 2014 01:14:55 +0000 (20:14 -0500)
committer	Ken Hornstein <kenh@pobox.com>
	Tue, 4 Feb 2014 01:14:55 +0000 (20:14 -0500)
h/mhparse.h		patch \| blob \| history
h/mime.h		patch \| blob \| history
test/mhbuild/test-attach		patch \| blob \| history
test/mhbuild/test-utf8-body		patch \| blob \| history
uip/mhbuild.c		patch \| blob \| history
uip/mhbuildsbr.c		patch \| blob \| history