From f93ce652c5d1361b00a28db7bbb9e638197a6676 Mon Sep 17 00:00:00 2001 From: Ken Hornstein Date: Mon, 3 Feb 2014 20:14:55 -0500 Subject: [PATCH 1/1] Support for selectable Content-Transfer-Encoding. Now default to 8bit for CTE for text types. --- h/mhparse.h | 6 +- h/mime.h | 2 + test/mhbuild/test-attach | 10 +- test/mhbuild/test-utf8-body | 5 +- uip/mhbuild.c | 17 ++- uip/mhbuildsbr.c | 215 +++++++++++++++++++++--------------- 6 files changed, 155 insertions(+), 100 deletions(-) diff --git a/h/mhparse.h b/h/mhparse.h index 85b1bc16..bb55153f 100644 --- a/h/mhparse.h +++ b/h/mhparse.h @@ -304,14 +304,16 @@ CT parse_mime (char *); * processed by default. * encoding - The default encoding to use when doing RFC 2047 header * encoding. Must be one of CE_UNKNOWN, CE_BASE64, or - * CE_QUOTED; + * CE_QUOTED. + * maxunencoded - The maximum line length before the default encoding for + * text parts is quoted-printable. * * Returns a CT structure describing the resulting MIME message. If the * -auto flag is set and a MIME-Version header is encountered, the return * value is NULL. */ CT build_mime (char *infile, int autobuild, int dist, int directives, - int encoding); + int encoding, size_t maxunencoded); int add_header (CT, char *, char *); int get_ctinfo (char *, CT, int); diff --git a/h/mime.h b/h/mime.h index 2ed5378c..d152094a 100644 --- a/h/mime.h +++ b/h/mime.h @@ -35,6 +35,8 @@ && (c) != '/' && (c) != '[' && (c) != ']' \ && (c) != '?' && (c) != '=') +#define MAXTEXTPERLN 78 +#define MAXLONGLINE 998 #define CPERLIN 76 #define BPERLIN (CPERLIN / 4) #define LPERMSG 632 diff --git a/test/mhbuild/test-attach b/test/mhbuild/test-attach index 9f652698..12d3da39 100755 --- a/test/mhbuild/test-attach +++ b/test/mhbuild/test-attach @@ -233,23 +233,25 @@ Fcc: +outbox Subject: A more complete multipart test MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" +Content-Transfer-Encoding: 8bit ------- =_aaaaaaaaaa0 Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1" +Content-Transfer-Encoding: 8bit ------- =_aaaaaaaaaa1 Content-Type: text/plain; charset="UTF-8" -Content-Transfer-Encoding: quoted-printable +Content-Transfer-Encoding: 8bit -This is some t=C3=ABxt. +This is some tëxt. ------- =_aaaaaaaaaa1 Content-Type: text/html; charset="UTF-8" -Content-Transfer-Encoding: quoted-printable +Content-Transfer-Encoding: 8bit -This is some HTML t=C3=ABxt. +This is some HTML tëxt. diff --git a/test/mhbuild/test-utf8-body b/test/mhbuild/test-utf8-body index 033a2ef8..9798ac26 100755 --- a/test/mhbuild/test-utf8-body +++ b/test/mhbuild/test-utf8-body @@ -102,13 +102,14 @@ To: Somebody Subject: Test MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" +Content-Transfer-Encoding: 8bit Date: ------- =_aaaaaaaaaa0 Content-Type: text/plain -Content-Transfer-Encoding: quoted-printable +Content-Transfer-Encoding: 8bit -=C2=A1Ay, caramba! +¡Ay, caramba! ------- =_aaaaaaaaaa0 Content-Type: text/plain; name="attachment.txt" diff --git a/uip/mhbuild.c b/uip/mhbuild.c index de849a39..85bfef47 100644 --- a/uip/mhbuild.c +++ b/uip/mhbuild.c @@ -41,6 +41,7 @@ X("nocontentid", 0, NCONTENTIDSW) \ X("headerencoding encoding-algorithm", 0, HEADERENCSW) \ X("autoheaderencoding", 0, AUTOHEADERENCSW) \ + X("maxunencoded", 0, MAXUNENCSW) \ X("version", 0, VERSIONSW) \ X("help", 0, HELPSW) \ X("debug", -5, DEBUGSW) \ @@ -102,6 +103,7 @@ int main (int argc, char **argv) { int sizesw = 1, headsw = 1, directives = 1, autobuild = 0, dist = 0; + size_t maxunencoded = MAXTEXTPERLN; int *icachesw; char *cp, buf[BUFSIZ]; char buffer[BUFSIZ], *compfile = NULL; @@ -253,6 +255,15 @@ main (int argc, char **argv) header_encoding = CE_UNKNOWN; continue; + case MAXUNENCSW: + if (!(cp = *argp++) || *cp == '-') + adios (NULL, "missing argument to %s", argp[-2]); + if ((maxunencoded = atoi(cp)) < 1) + adios (NULL, "Invalid argument for %s: %s", argp[-2], cp); + if (maxunencoded > 998) + adios (NULL, "limit of -maxunencoded is 998"); + continue; + case VERBSW: verbosw++; continue; @@ -326,7 +337,8 @@ main (int argc, char **argv) unlink_infile = 1; /* build the content structures for MIME message */ - ct = build_mime (infile, autobuild, dist, directives, header_encoding); + ct = build_mime (infile, autobuild, dist, directives, header_encoding, + maxunencoded); /* * If ct == NULL, that means that -auto was set and a MIME version @@ -356,7 +368,8 @@ main (int argc, char **argv) */ /* build the content structures for MIME message */ - ct = build_mime (compfile, autobuild, dist, directives, header_encoding); + ct = build_mime (compfile, autobuild, dist, directives, header_encoding, + maxunencoded); /* * If ct == NULL, that means -auto was set and we found a MIME version diff --git a/uip/mhbuildsbr.c b/uip/mhbuildsbr.c index 70747c85..e44f0141 100644 --- a/uip/mhbuildsbr.c +++ b/uip/mhbuildsbr.c @@ -77,7 +77,7 @@ static char *fgetstr (char *, int, FILE *); static int user_content (FILE *, char *, CT *); static void set_id (CT, int); static int compose_content (CT); -static int scan_content (CT); +static int scan_content (CT, size_t); static int build_headers (CT); static char *calculate_digest (CT, int); @@ -124,7 +124,7 @@ static void directive_pop(void) CT build_mime (char *infile, int autobuild, int dist, int directives, - int header_encoding) + int header_encoding, size_t maxunencoded) { int compnum, state; char buf[BUFSIZ], name[NAMESZ]; @@ -449,7 +449,7 @@ finish_field: * check if prefix for multipart boundary clashes with * any of the contents. */ - while (scan_content (ct) == NOTOK) { + while (scan_content (ct, maxunencoded) == NOTOK) { if (*cp < 'z') { (*cp)++; } else { @@ -1278,11 +1278,12 @@ raw: */ static int -scan_content (CT ct) +scan_content (CT ct, size_t maxunencoded) { int len; - int check8bit = 0, contains8bit = 0; /* check if contains 8bit data */ - int checklinelen = 0, linelen = 0; /* check for long lines */ + int check8bit = 0, contains8bit = 0; /* check if contains 8bit data */ + int checklinelen = 0, linelen = 0; /* check for long lines */ + int checkllinelen = 0; /* check for extra-long lines */ int checkboundary = 0, boundaryclash = 0; /* check if clashes with multipart boundary */ int checklinespace = 0, linespace = 0; /* check if any line ends with space */ char *cp = NULL, buffer[BUFSIZ]; @@ -1304,7 +1305,7 @@ scan_content (CT ct) for (part = m->mp_parts; part; part = part->mp_next) { CT p = part->mp_part; - if (scan_content (p) == NOTOK) /* choose encoding for subpart */ + if (scan_content (p, maxunencoded) == NOTOK) /* choose encoding for subpart */ return NOTOK; /* if necessary, enlarge encoding for enclosing multipart */ @@ -1320,56 +1321,72 @@ scan_content (CT ct) /* * Decide what to check while scanning this content. */ - switch (ct->c_type) { - case CT_TEXT: - check8bit = 1; + + switch (ct->c_reqencoding) { + case CE_8BIT: + checkllinelen = 1; checkboundary = 1; - if (ct->c_subtype == TEXT_PLAIN) { - checklinelen = 0; - checklinespace = 0; - } else { + break; + case CE_QUOTED: + checkboundary = 1; + break; + case CE_BASE64: + /* We check nothing here */ + break; + case CE_UNKNOWN: + /* Use the default rules based on content-type */ + switch (ct->c_type) { + case CT_TEXT: + checkboundary = 1; + check8bit = 1; checklinelen = 1; - checklinespace = 1; - } + if (ct->c_subtype == TEXT_PLAIN) { + checklinespace = 0; + } else { + checklinespace = 1; + } break; - case CT_APPLICATION: - check8bit = 1; - checklinelen = 1; - checklinespace = 1; - checkboundary = 1; + case CT_APPLICATION: + check8bit = 1; + checklinelen = 1; + checklinespace = 1; + checkboundary = 1; break; - case CT_MESSAGE: - check8bit = 0; - checklinelen = 0; - checklinespace = 0; + case CT_MESSAGE: + check8bit = 0; + checklinelen = 0; + checklinespace = 0; - /* don't check anything for message/external */ - if (ct->c_subtype == MESSAGE_EXTERNAL) - checkboundary = 0; - else - checkboundary = 1; - break; + /* don't check anything for message/external */ + if (ct->c_subtype == MESSAGE_EXTERNAL) + checkboundary = 0; + else + checkboundary = 1; + break; - case CT_AUDIO: - case CT_IMAGE: - case CT_VIDEO: - /* - * Don't check anything for these types, - * since we are forcing use of base64. - */ - check8bit = 0; - checklinelen = 0; - checklinespace = 0; - checkboundary = 0; - break; + case CT_AUDIO: + case CT_IMAGE: + case CT_VIDEO: + /* + * Don't check anything for these types, + * since we are forcing use of base64, unless + * the content-type was specified by a mhbuild directive. + */ + check8bit = 0; + checklinelen = 0; + checklinespace = 0; + checkboundary = 0; + break; + } } /* * Scan the unencoded content */ - if (check8bit || checklinelen || checklinespace || checkboundary) { + if (check8bit || checklinelen || checklinespace || checkboundary || + checkllinelen) { if ((in = fopen (ce->ce_file, "r")) == NULL) adios (ce->ce_file, "unable to open for reading"); len = strlen (prefix); @@ -1390,11 +1407,23 @@ scan_content (CT ct) /* * Check line length. */ - if (checklinelen && (strlen (buffer) > CPERLIN + 1)) { + if (checklinelen && (strlen (buffer) > maxunencoded + 1)) { linelen = 1; checklinelen = 0; /* no need to keep checking */ } + /* + * RFC 5322 specifies that a message cannot contain a line + * greater than 998 characters (excluding the CRLF). If we + * get one of those lines and linelen is NOT set, then abort. + */ + + if (checkllinelen && !linelen && + (strlen(buffer) > MAXLONGLINE + 1)) { + adios(NULL, "Line in content exceeds maximum line limit (%d)", + MAXLONGLINE); + } + /* * Check if line ends with a space. */ @@ -1424,59 +1453,65 @@ scan_content (CT ct) /* * Decide which transfer encoding to use. */ - switch (ct->c_type) { - case CT_TEXT: - /* - * If the text content didn't specify a character - * set, we need to figure out which one was used. - */ - t = (struct text *) ct->c_ctparams; - if (t->tx_charset == CHARSET_UNSPECIFIED) { - CI ci = &ct->c_ctinfo; - char **ap, **ep; - for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) - continue; + if (ct->c_reqencoding != CE_UNKNOWN) + ct->c_encoding = ct->c_reqencoding; + else + switch (ct->c_type) { + case CT_TEXT: + /* + * If the text content didn't specify a character + * set, we need to figure out which one was used. + */ + t = (struct text *) ct->c_ctparams; + if (t->tx_charset == CHARSET_UNSPECIFIED) { + CI ci = &ct->c_ctinfo; + char **ap, **ep; - if (contains8bit) { - *ap = concat ("charset=", write_charset_8bit(), NULL); - } else { - *ap = add ("charset=us-ascii", NULL); + for (ap = ci->ci_attrs, ep = ci->ci_values; *ap; ap++, ep++) + continue; + + if (contains8bit) { + *ap = concat ("charset=", write_charset_8bit(), NULL); + } else { + *ap = add ("charset=us-ascii", NULL); + } + t->tx_charset = CHARSET_SPECIFIED; + + cp = strchr(*ap++, '='); + *ap = NULL; + *cp++ = '\0'; + *ep = cp; } - t->tx_charset = CHARSET_SPECIFIED; - cp = strchr(*ap++, '='); - *ap = NULL; - *cp++ = '\0'; - *ep = cp; - } + if (contains8bit && !linelen && !linespace && !checksw) + ct->c_encoding = CE_8BIT; + else if (contains8bit || linelen || linespace || checksw) + ct->c_encoding = CE_QUOTED; + else + ct->c_encoding = CE_7BIT; + break; - if (contains8bit || linelen || linespace || checksw) - ct->c_encoding = CE_QUOTED; - else - ct->c_encoding = CE_7BIT; - break; + case CT_APPLICATION: + /* For application type, use base64, except when postscript */ + if (contains8bit || linelen || linespace || checksw) + ct->c_encoding = (ct->c_subtype == APPLICATION_POSTSCRIPT) + ? CE_QUOTED : CE_BASE64; + else + ct->c_encoding = CE_7BIT; + break; - case CT_APPLICATION: - /* For application type, use base64, except when postscript */ - if (contains8bit || linelen || linespace || checksw) - ct->c_encoding = (ct->c_subtype == APPLICATION_POSTSCRIPT) - ? CE_QUOTED : CE_BASE64; - else + case CT_MESSAGE: ct->c_encoding = CE_7BIT; - break; - - case CT_MESSAGE: - ct->c_encoding = CE_7BIT; - break; + break; - case CT_AUDIO: - case CT_IMAGE: - case CT_VIDEO: - /* For audio, image, and video contents, just use base64 */ - ct->c_encoding = CE_BASE64; - break; - } + case CT_AUDIO: + case CT_IMAGE: + case CT_VIDEO: + /* For audio, image, and video contents, just use base64 */ + ct->c_encoding = CE_BASE64; + break; + } return (boundaryclash ? NOTOK : OK); } -- 2.48.1