From: Ken Hornstein Date: Wed, 4 Dec 2013 02:52:30 +0000 (-0500) Subject: Merge branch 'encode-rfc2047' X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/6ba5b855ebc737cc46cabfbe799b0c7706f437f4?hp=4c1236a0c2f9fd289746f1e371d61f086e643cfa Merge branch 'encode-rfc2047' --- diff --git a/Makefile.am b/Makefile.am index 0749c07d..a056b24b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -55,6 +55,7 @@ TESTS = test/ali/test-ali test/anno/test-anno \ test/format/test-localmbox test/format/test-myname \ test/format/test-myhost test/format/test-mymbox \ test/format/test-rightjustify \ + test/format/test-unquote \ test/forw/test-forw-digest test/forw/test-forw-format \ test/inc/test-deb359167 test/inc/test-eom-align \ test/inc/test-inc-scanout test/inc/test-msgchk \ @@ -62,7 +63,8 @@ TESTS = test/ali/test-ali test/anno/test-anno \ test/install-mh/test-install-mh \ test/locking/test-datalocking test/locking/test-spoollocking \ test/manpages/test-manpages \ - test/mhbuild/test-forw test/mhbuild/test-utf8-body \ + test/mhbuild/test-forw test/mhbuild/test-header-encode \ + test/mhbuild/test-utf8-body \ test/mhfixmsg/test-mhfixmsg \ test/mhlist/test-mhlist test/mhmail/test-mhmail \ test/mhparam/test-mhparam test/mhpath/test-mhpath \ @@ -521,6 +523,7 @@ sbr_libmh_a_SOURCES = sbr/addrsbr.c sbr/ambigsw.c sbr/atooi.c sbr/arglist.c \ sbr/copy.c sbr/copyip.c sbr/cpydata.c \ sbr/cpydgst.c sbr/crawl_folders.c sbr/credentials.c \ sbr/discard.c sbr/done.c sbr/dtimep.l sbr/dtime.c \ + sbr/encode_rfc2047.c \ sbr/escape_addresses.c \ sbr/error.c sbr/ext_hook.c sbr/fdcompare.c \ sbr/folder_addmsg.c sbr/folder_delmsgs.c \ @@ -546,7 +549,8 @@ sbr_libmh_a_SOURCES = sbr/addrsbr.c sbr/ambigsw.c sbr/atooi.c sbr/arglist.c \ sbr/seq_setcur.c sbr/seq_setprev.c sbr/seq_setunseen.c \ sbr/showfile.c sbr/signals.c sbr/smatch.c \ sbr/snprintb.c sbr/ssequal.c \ - sbr/strindex.c sbr/trimcpy.c sbr/uprf.c sbr/vfgets.c \ + sbr/strindex.c sbr/trimcpy.c sbr/unquote.c \ + sbr/uprf.c sbr/vfgets.c \ sbr/fmt_def.c sbr/mf.c sbr/utils.c sbr/ctype-checked.c \ sbr/m_mktemp.c sbr/getansreadline.c sbr/vector.c \ config/config.c config/version.c diff --git a/h/addrsbr.h b/h/addrsbr.h index 5ca801e6..fbb4cd05 100644 --- a/h/addrsbr.h +++ b/h/addrsbr.h @@ -12,21 +12,30 @@ #define NETHOST 1 #define BADHOST 2 +/* + * The email structure used by nmh to define an email address + */ + struct mailname { - struct mailname *m_next; - char *m_text; - char *m_pers; - char *m_mbox; - char *m_host; - char *m_path; - int m_type; - char m_nohost; - char m_bcc; - int m_ingrp; - char *m_gname; - char *m_note; + struct mailname *m_next; /* Linked list linkage; available for */ + /* application use */ + char *m_text; /* Full unparsed text of email address */ + char *m_pers; /* display-name in RFC 5322 parlance */ + char *m_mbox; /* local-part in RFC 5322 parlance */ + char *m_host; /* domain in RFC 5322 parlance */ + char *m_path; /* Host routing; should not be used */ + int m_type; /* UUCPHOST, LOCALHOST, NETHOST, or BADHOST */ + char m_nohost; /* True if no host part available */ + char m_bcc; /* Used by post to keep track of bcc's */ + int m_ingrp; /* True if email address is in a group */ + char *m_gname; /* display-name of group */ + char *m_note; /* Note (post-address comment) */ }; +/* + * See notes for auxformat() below. + */ + #define adrformat(m) auxformat ((m), 1) /* @@ -34,7 +43,71 @@ struct mailname { */ void mnfree(struct mailname *); int ismymbox(struct mailname *); -char *getname(const char *); -char *getlocaladdr(void); -char *auxformat(struct mailname *, int); -struct mailname *getm(char *, char *, int, int, char *); + +/* + * Parse an address header, and return a sequence of email addresses. + * This function is the main entry point into the nmh address parser. + * It is used in conjunction with getm() to parse an email header. + * + * Arguments include: + * + * header - Pointer to the start of an email header. + * + * On the first call, header is copied and saved internally. Each email + * address in the header is returned on the first and subsequent calls + * to getname(). When there are no more email addresses available in + * the header, NULL is returned and the parser's internal state is + * reset. + */ + +char *getname(const char *header); + +/* + * Format an email address given a struct mailname. + * + * This function takes a pointer to a struct mailname and returns a pointer + * to a static buffer holding the resulting email address. + * + * It is worth noting that group names are NOT handled, so if you want to + * do something with groups you need to handle it externally to this function. + * + * Arguments include: + * + * mp - Pointer to mailname structure + * extras - If true, include the personal name and/or note in the + * address. Otherwise, omit it. + */ + +char *auxformat(struct mailname *mp, int extras); + +/* + * Parse an email address into it's components. + * + * Used in conjunction with getname() to parse a complete email header. + * + * Arguments include: + * + * str - Email address being parsed. + * dfhost - A default host to append to the email address if + * one is not included. If NULL, use nmh's idea of + * localhost(). + * dftype - If dfhost is given, use dftype as the email address type + * if no host is in the email address. + * wanthost - One of AD_HOST or AD_NHST. If AD_HOST, look up the + * "official name" of the host. Well, that's what the + * documentation says, at least ... support for that + * functionality was removed when hostable support was + * removed and the address parser was converted by default + * to always being in DUMB mode. So nowadays this only + * affects where error messages are put if there is no + * host part (set it to AD_HOST if you want error messages + * to appear on standard error). + * eresult - Any error string returned by the address parser. String + * must contain sufficient room for the error message. + * (BUFSIZ is used in general by the code). Can be NULL. + * + * A pointer to an allocated struct mailname corresponding to the email + * address is returned. + */ +struct mailname *getm(char *str, char *dfhost, int dftype, + int wanthost, char *eresult); diff --git a/h/mhparse.h b/h/mhparse.h index b5a73627..1dfd7ed8 100644 --- a/h/mhparse.h +++ b/h/mhparse.h @@ -285,6 +285,21 @@ extern struct str2init str2methods[]; */ int pidcheck (int); CT parse_mime (char *); + +/* + * Translate a composition file into a MIME data structure. Arguments are: + * + * infile - Name of input filename + * directives - A flag to control whether or not build directives are + * processed by default. + * encoding - The default encoding to use when doing RFC 2047 header + * encoding. Must be one of CE_UNKNOWN, CE_BASE64, or + * CE_QUOTED; + * + * Returns a CT structure describing the resulting MIME message. + */ +CT build_mime (char *infile, int directives, int encoding); + int add_header (CT, char *, char *); int get_ctinfo (char *, CT, int); int params_external (CT, int); diff --git a/h/prototypes.h b/h/prototypes.h index b1a53cde..3797fecc 100644 --- a/h/prototypes.h +++ b/h/prototypes.h @@ -59,6 +59,27 @@ char *cpytrim (const char *); int decode_rfc2047 (char *, char *, size_t); void discard (FILE *); int default_done (int); + +/* + * Encode a message header using RFC 2047 encoding. If the message contains + * no non-ASCII characters, then leave the header as-is. + * + * Arguments include: + * + * name - Message header name + * value - Message header content; must point to allocated memory + * (may be changed if encoding is necessary) + * encoding - Encoding type. May be one of CE_UNKNOWN (function chooses + * the encoding), CE_BASE64 or CE_QUOTED + * charset - Charset used for encoding. If NULL, obtain from system + * locale. + * + * Returns 0 on success, any other value on failure. + */ + +int encode_rfc2047(const char *name, char **value, int encoding, + const char *charset); + void escape_display_name (char *, size_t); void escape_local_part (char *, size_t); int ext_hook(char *, char *, char *); @@ -224,6 +245,23 @@ int ssequal (char *, char *); int stringdex (char *, char *); char *trimcpy (char *); int unputenv (char *); + +/* + * Remove quotes and quoted-pair sequences from RFC-5322 atoms. + * + * Currently the actual algorithm is simpler than it technically should + * be: any quotes are simply eaten, unless they're preceded by the escape + * character (\). This seems to be sufficient for our needs for now. + * + * Arguments: + * + * input - The input string + * output - The output string; is assumed to have at least as much + * room as the input string. At worst the output string will + * be the same size as the input string; it might be smaller. + * + */ +void unquote_string(const char *input, char *output); int uprf (char *, char *); int vfgets (FILE *, char **); char *write_charset_8bit (void); @@ -262,6 +300,7 @@ int what_now (char *, int, int, char *, char *, int WhatNow(int, char **); int writeBase64aux(FILE *, FILE *); int writeBase64 (unsigned char *, size_t, unsigned char *); +int writeBase64raw (unsigned char *, size_t, unsigned char *); /* * credentials management diff --git a/man/fmttest.man b/man/fmttest.man index 59626da3..639f578a 100644 --- a/man/fmttest.man +++ b/man/fmttest.man @@ -15,6 +15,7 @@ language .RB [ \-format .IR formatstring ] .RB [ \-address " | " \-raw " | " \-date " | " \-message ] +.RB [ \-file " | " \-nofile ] .RB [ \-\|\-component .IR component-text ] .RB [ \-dupaddrs " | " \-nodupaddrs ] @@ -140,7 +141,15 @@ for the .RI %( unseen ), and .RI %( size ) -function escapes will be made available for each message. +function escapes will be made available for each message. If the +.B \-file +switch is given, the arguments are interpreted as filenames instead of +message numbers, but otherwise the behavior is the same (except that the +.RI %( msg ), +.RI %( cur ), +and +.RI %( unseen ) +function escapes will not provide any useful information). .PP The default format used in address mode is the default format used by .BR scan . @@ -394,6 +403,7 @@ dat[4] %(\fIunseen\fR) .SH DEFAULTS .nf .RB ` \-message ' +.RB ` \-nofile ' .RB ` \-dupaddrs ' .fi .SH BUGS diff --git a/man/mhbuild.man b/man/mhbuild.man index ab757d34..6f50006b 100644 --- a/man/mhbuild.man +++ b/man/mhbuild.man @@ -17,6 +17,9 @@ mhbuild \- translate MIME composition draft .RB [ \-contentid " | " \-nocontentid ] .RB [ \-verbose " | " \-noverbose ] .RB [ \-check " | " \-nocheck ] +.RB [ \-headerencoding +.IR encoding\-algorithm +.RB " | " \-autoheaderencoding ] .RB [ \-version ] .RB [ \-help ] .ad @@ -28,11 +31,8 @@ a valid MIME message. .PP .B mhbuild creates multi-media messages as specified in RFC 2045 -to RFC 2049. Currently -.B mhbuild -only supports encodings in -message bodies, and does not support the encoding of message headers as -specified in RFC 2047. +to RFC 2049. This includes the encoding of message headers as specified +by RFC 2047. .PP If you specify the name of the composition file as \*(lq-\*(rq, then @@ -77,6 +77,20 @@ switch is present, then the listing will show any \*(lqextra\*(rq information that is present in the message, such as comments in the \*(lqContent-Type\*(rq header. +.PP +The +.B \-headerencoding +switch will indicate which algorithm to use when encoding any message headers +that contain 8\-bit characters. The valid arguments are +.I base64 +for based\-64 encoding and +.I quoted +for quoted\-printable encoding. The +.B \-autoheaderencoding +switch will instruct +.B mhbuild +to automatically pick the encoding algorithm based on the frequency of +8\-bit characters. .SS "Translating the Composition File" .B mhbuild is essentially a filter to aid in the composition of MIME @@ -714,4 +728,5 @@ is checked. .RB ` \-contentid ' .RB ` \-nocheck ' .RB ` \-noverbose ' +.RB ` \-autoheaderencoding ' .fi diff --git a/sbr/addrsbr.c b/sbr/addrsbr.c index ce14394e..e5b20520 100644 --- a/sbr/addrsbr.c +++ b/sbr/addrsbr.c @@ -274,24 +274,6 @@ auxformat (struct mailname *mp, int extras) } -/* - * This used to be adrsprintf() (where it would format an address for you - * given a username and a domain). But somewhere we got to the point where - * the only caller was post, and it only called it with both arguments NULL. - * So the function was renamed with a more sensible name. - */ - -char * -getlocaladdr(void) -{ - char *username; - - username = getusername(); - - return username; -} - - #define W_NIL 0x0000 #define W_MBEG 0x0001 #define W_MEND 0x0002 diff --git a/sbr/base64.c b/sbr/base64.c index c3045ab7..5ff9f53c 100644 --- a/sbr/base64.c +++ b/sbr/base64.c @@ -114,3 +114,49 @@ writeBase64 (unsigned char *in, size_t length, unsigned char *out) return OK; } + +/* + * Essentially a duplicate of writeBase64, but without line wrapping or + * newline termination (note: string IS NUL terminated) + */ + +int +writeBase64raw (unsigned char *in, size_t length, unsigned char *out) +{ + while (1) { + unsigned long bits; + unsigned char *bp; + unsigned int cc; + for (cc = 0, bp = in; length > 0 && cc < 3; ++cc, ++bp, --length) + /* empty */ ; + + if (cc == 0) { + break; + } else { + bits = (in[0] & 0xff) << 16; + if (cc > 1) { + bits |= (in[1] & 0xff) << 8; + if (cc > 2) { + bits |= in[2] & 0xff; + } + } + } + + for (bp = out + 4; bp > out; bits >>= 6) + *--bp = nib2b64[bits & 0x3f]; + if (cc < 3) { + out[3] = '='; + if (cc < 2) + out[2] = '='; + out += 4; + break; + } + + in += 3; + out += 4; + } + + *out = '\0'; + + return OK; +} diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c new file mode 100644 index 00000000..ac2b6dc3 --- /dev/null +++ b/sbr/encode_rfc2047.c @@ -0,0 +1,856 @@ +/* + * Routines to encode message headers using RFC 2047-encoding. + * + * This code is Copyright (c) 2002, by the authors of nmh. See the + * COPYRIGHT file in the root directory of the nmh distribution for + * complete copyright information. + */ + +#include +#include +#include +#include + +/* + * List of headers that contain addresses and as a result require special + * handling + */ + +static char *address_headers[] = { + "To", + "From", + "cc", + "Bcc", + "Reply-To", + "Sender", + "Resent-To", + "Resent-From", + "Resent-cc", + "Resent-Bcc", + "Resent-Reply-To", + "Resent-Sender", + NULL, +}; + +/* + * Macros we use for parsing headers + */ + +#define is_fws(c) (c == '\t' || c == ' ' || c == '\n') + +#define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \ + (c >= 'a' && c <= 'z') || \ + c == '!' || c == '*' || c == '+' || c == '-' || \ + c == '/' || c == '=' || c == '_') +#define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_') + +#define base64len(n) ((((n) + 2) / 3 ) * 4) /* String len to base64 len */ +#define strbase64(n) ((n) / 4 * 3) /* Chars that fit in base64 */ + +#define ENCODELINELIMIT 76 + +static void unfold_header(char **, int); +static int field_encode_address(const char *, char **, int, const char *); +static int field_encode_quoted(const char *, char **, const char *, int, + int, int); +static int field_encode_base64(const char *, char **, const char *); +static int scanstring(const char *, int *, int *, int *); +static int utf8len(const char *); +static int pref_encoding(int, int, int); + +/* + * Encode a message header using RFC 2047 encoding. We make the assumption + * that all characters < 128 are ASCII and as a consequence don't need any + * encoding. + */ + +int +encode_rfc2047(const char *name, char **value, int encoding, + const char *charset) +{ + int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0; + char *p; + + /* + * First, check to see if we even need to encode the header + */ + + for (p = *value; *p != '\0'; p++) { + if (isascii((unsigned char) *p)) { + asciicount++; + if (qpspecial((unsigned char) *p)) + qpspecialcount++; + } else + eightbitcount++; + } + + if (eightbitcount == 0) + return 0; + + /* + * Some rules from RFC 2047: + * + * - Encoded words cannot be more than 75 characters long + * - Multiple "long" encoded words must be on new lines. + * + * Also, we're not permitted to encode email addresses, so + * we need to actually _parse_ email addresses and only encode + * the right bits. + */ + + /* + * If charset was NULL, then get the value from the locale. But + * we reject it if it returns US-ASCII + */ + + if (charset == NULL) + charset = write_charset_8bit(); + + if (strcasecmp(charset, "US-ASCII") == 0) { + advise(NULL, "Cannot use US-ASCII with 8 bit characters in header"); + return 1; + } + + /* + * If we have an address header, then we need to parse the addresses + * and only encode the names or comments. Otherwise, handle it normally. + */ + + for (i = 0; address_headers[i]; i++) { + if (strcasecmp(name, address_headers[i]) == 0) + return field_encode_address(name, value, encoding, charset); + } + + /* + * On the encoding we choose, and the specifics of encoding: + * + * - If a specified encoding is passed in, we use that. + * - Otherwise, pick which encoding is shorter. + * + * We don't quite handle continuation right here, but it should be + * pretty close. + */ + + if (encoding == CE_UNKNOWN) + encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount); + + unfold_header(value, asciicount + eightbitcount); + + switch (encoding) { + + case CE_BASE64: + return field_encode_base64(name, value, charset); + + case CE_QUOTED: + return field_encode_quoted(name, value, charset, asciicount, + eightbitcount + qpspecialcount, 0); + + default: + advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + return 1; + } +} + +/* + * Encode our specified header (or field) using quoted-printable + */ + +static int +field_encode_quoted(const char *name, char **value, const char *charset, + int ascii, int encoded, int phraserules) +{ + int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1; + int charsetlen = strlen(charset), utf8; + char *output = NULL, *p, *q; + + /* + * Right now we just encode the whole thing. Maybe later on we'll + * only encode things on a per-atom basis. + */ + + p = *value; + + column = prefixlen + 2; /* Header name plus ": " */ + + utf8 = strcasecmp(charset, "UTF-8") == 0; + + while (*p != '\0') { + /* + * Start a new line, if it's time + */ + if (newline) { + /* + * If it's the start of the header, we don't need to pad it + * + * The length of the output string is ... + * =?charset?Q?...?= so that's 7+strlen(charset) + 2 for \n NUL + * + * plus 1 for every ASCII character and 3 for every eight bit + * or special character (eight bit characters are written as =XX). + * + */ + + int tokenlen; + + outlen += 9 + charsetlen + ascii + 3 * encoded; + + /* + * If output is set, then we're continuing the header. Otherwise + * do the initial allocation. + */ + + if (output) { + int curlen = q - output, i; + outlen += prefixlen + 1; /* Header plus \n ": " */ + output = mh_xrealloc(output, outlen); + q = output + curlen; + *q++ = '?'; + *q++ = '='; + *q++ = '\n'; + for (i = 0; i < prefixlen; i++) + *q++ = ' '; + } else { + /* + * A bit of a hack here; the header can contain multiple + * spaces (probably at least one) until we get to the + * actual text. Copy until we get to a non-space. + */ + output = mh_xmalloc(outlen); + q = output; + while (is_fws(*p)) + *q++ = *p++; + } + + tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset); + q += tokenlen; + column = prefixlen + tokenlen; + newline = 0; + } + + /* + * Process each character, encoding if necessary + * + * Note that we have a different set of rules if we're processing + * RFC 5322 'phrase' (something you'd see in an address header). + */ + + column++; + + if (*p == ' ') { + *q++ = '_'; + ascii--; + } else if (isascii((unsigned char) *p) && + (phraserules ? qphrasevalid((unsigned char) *p) : + !qpspecial((unsigned char) *p))) { + *q++ = *p; + ascii--; + } else { + snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p); + q += 3; + column += 2; /* column already incremented by 1 above */ + encoded--; + } + + p++; + + /* + * We're not allowed more than ENCODELINELIMIT characters per line, + * so reserve some room for the final ?=. + * + * If prefixlen == 0, we haven't been passed in a header name, so + * don't ever wrap the field (we're likely doing an address). + */ + + if (prefixlen == 0) + continue; + + if (column >= ENCODELINELIMIT - 2) { + newline = 1; + } else if (utf8) { + /* + * Okay, this is a bit weird, but to explain a bit more ... + * + * RFC 2047 prohibits the splitting of multibyte characters + * across encoded words. Right now we only handle the case + * of UTF-8, the most common multibyte encoding. + * + * p is now pointing at the next input character. If we're + * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the + * length of the complete character, then trigger a newline + * now. Note that we check the length * 3 since we have to + * allow for the encoded output. + */ + if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) { + newline = 1; + } + } + } + + *q++ = '?'; + *q++ = '='; + + if (prefixlen) + *q++ = '\n'; + + *q = '\0'; + + free(*value); + + *value = output; + + return 0; +} + +/* + * Encode our specified header (or field) using base64. + * + * This is a little easier since every character gets encoded, we can + * calculate the line wrap up front. + */ + +static int +field_encode_base64(const char *name, char **value, const char *charset) +{ + int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset); + int outlen = 0, numencode, curlen; + char *output = NULL, *p = *value, *q = NULL, *linestart; + + /* + * Skip over any leading white space. + */ + + while (*p == ' ' || *p == '\t') + p++; + + /* + * If we had a zero-length prefix, then just encode the whole field + * as-is, without line wrapping. Note that in addition to the encoding + * + * The added length we need is =? + charset + ?B? ... ?= + * + * That's 7 + strlen(charset) + 2 (for \n NUL). + */ + + while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen + + prefixlen) > ENCODELINELIMIT)) { + + /* + * Our very first time, don't pad the line in the front + * + * Note ENCODELINELIMIT is + 2 because of \n \0 + */ + + + if (! output) { + outlen += ENCODELINELIMIT + 2; + output = q = mh_xmalloc(outlen); + linestart = q - prefixlen; /* Yes, this is intentional */ + } else { + int curstart = linestart - output; + curlen = q - output; + + outlen += ENCODELINELIMIT + 2; + output = mh_xrealloc(output, outlen); + q = output + curlen; + linestart = output + curstart; + } + + /* + * We should have enough space now, so prepend the encoding markers + * and character set information. The leading space is intentional. + */ + + q += snprintf(q, outlen - (q - output), " =?%s?B?", charset); + + /* + * Find out how much room we have left on the line and see how + * many characters we can stuff in. The start of our line + * is marked by "linestart", so use that to figure out how + * many characters are left out of ENCODELINELIMIT. Reserve + * 2 characters for the end markers and calculate how many + * characters we can fit into that space given the base64 + * encoding expansion. + */ + + numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2); + + if (numencode <= 0) { + advise(NULL, "Internal error: tried to encode %d characters " + "in base64", numencode); + return 1; + } + + /* + * RFC 2047 prohibits spanning multibyte characters across tokens. + * Right now we only check for UTF-8. + * + * So note the key here ... we want to make sure the character BEYOND + * our last character is not a continuation byte. If it's the start + * of a new multibyte character or a single-byte character, that's ok. + */ + + if (strcasecmp(charset, "UTF-8") == 0) { + /* + * p points to the start of our current buffer, so p + numencode + * is one past the last character to encode + */ + + while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80)) + numencode--; + + if (numencode == 0) { + advise(NULL, "Internal error: could not find start of " + "UTF-8 character when base64 encoding header"); + return 1; + } + } + + if (writeBase64raw((unsigned char *) p, numencode, + (unsigned char *) q) != OK) { + advise(NULL, "Internal error: base64 encoding of header failed"); + return 1; + } + + p += numencode; + q += base64len(numencode); + + /* + * This will point us at the beginning of the new line (trust me). + */ + + linestart = q + 3; + + /* + * What's going on here? Well, we know we're continuing to the next + * line, so we want to add continuation padding. We also add the + * trailing marker for the RFC 2047 token at this time as well. + * This uses a trick of snprintf(); we tell it to print a zero-length + * string, but pad it out to prefixlen - 1 characters; that ends + * up always printing out the requested number of spaces. We use + * prefixlen - 1 because we always add a space on the starting + * token marker; this makes things work out correctly for the first + * line, which should have a space between the ':' and the start + * of the token. + * + * It's okay if you don't follow all of that. + */ + + q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, ""); + } + + /* + * We're here if there is either no prefix, or we can fit it in less + * than ENCODELINELIMIT characters. Encode the whole thing. + */ + + outlen += prefixlen + 9 + charsetlen + base64len(strlen(p)); + curlen = q - output; + + output = mh_xrealloc(output, outlen); + q = output + curlen; + + q += snprintf(q, outlen - (q - output), "%s=?%s?B?", + prefixlen ? " " : "", charset); + + if (writeBase64raw((unsigned char *) p, strlen(p), + (unsigned char *) q) != OK) { + advise(NULL, "Internal error: base64 encoding of header failed"); + return 1; + } + + strcat(q, "?="); + + if (prefixlen) + strcat(q, "\n"); + + free(*value); + + *value = output; + + return 0; +} + +/* + * Calculate the length of a UTF-8 character. + * + * If it's not a UTF-8 character (or we're in the middle of a multibyte + * character) then simply return 0. + */ + +static int +utf8len(const char *p) +{ + int len = 1; + + if (*p == '\0') + return 0; + + if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80) + return 0; + + p++; + while ((((unsigned char) *p++) & 0xc0) == 0x80) + len++; + + return len; +} + +/* + * "Unfold" a header, making it a single line (without continuation) + * + * We cheat a bit here; we never make the string longer, so using the + * original length here is fine. + */ + +static void +unfold_header(char **value, int len) +{ + char *str = mh_xmalloc(len + 1); + char *p = str, *q = *value; + + while (*q != '\0') { + if (*q == '\n') { + /* + * When we get a newline, skip to the next non-whitespace + * character and add a space to replace all of the whitespace + * + * This has the side effect of stripping off the final newline + * for the header; we put it back in the encoding routine. + */ + while (is_fws(*q)) + q++; + if (*q == '\0') + break; + + *p++ = ' '; + } else { + *p++ = *q++; + } + } + + *p = '\0'; + + free(*value); + *value = str; +} + +/* + * Decode a header containing addresses. This means we have to parse + * each address and only encode the display-name or comment field. + */ + +static int +field_encode_address(const char *name, char **value, int encoding, + const char *charset) +{ + int prefixlen = strlen(name) + 2, column = prefixlen, groupflag; + int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0; + int retval; + size_t len; + char *mp, *cp = NULL, *output = NULL; + char *tmpbuf = NULL; + size_t tmpbufsize = 0; + struct mailname *mn; + + /* + * Because these are addresses, we need to handle them individually. + * + * Break them down and process them one by one. This means we have to + * rewrite the whole header, but that's unavoidable. + */ + + /* + * The output headers always have to start with a space first; this + * is just the way the API works right now. + */ + + output = add(" ", output); + + for (groupflag = 0; (mp = getname(*value)); ) { + if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) { + errflag++; + continue; + } + + reformat = 0; + + /* + * We only care if the phrase (m_pers) or any trailing comment + * (m_note) have 8-bit characters. If doing q-p, we also need + * to encode anything marked as qspecial(). Unquote it first + * so the specialchars count is right. + */ + + if (! mn->m_pers) + goto check_note; + + if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) { + tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1); + } + + unquote_string(mn->m_pers, tmpbuf); + + if (scanstring(tmpbuf, &asciichars, &eightbitchars, + &specialchars)) { + /* + * If we have 8-bit characters, encode it. + */ + + if (encoding == CE_UNKNOWN) + encoding = pref_encoding(asciichars, specialchars, + eightbitchars); + + /* + * This is okay, because the output of unquote_string will be either + * equal or shorter than the original. + */ + + strcpy(mn->m_pers, tmpbuf); + + switch (encoding) { + + case CE_BASE64: + retval = field_encode_base64(NULL, &mn->m_pers, charset); + break; + + case CE_QUOTED: + retval = field_encode_quoted(NULL, &mn->m_pers, charset, + asciichars, + eightbitchars + specialchars, 1); + break; + + default: + advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + errflag++; + goto out; + } + + reformat++; + } + + check_note: + + /* + * The "note" field is generally a comment at the end of the address, + * at least as how it's implemented here. Notes are always surrounded + * by parenthesis (since they're comments). Strip them out and + * then put them back when we format the final field, but they do + * not get encoded. + */ + + if (! mn->m_note) + goto do_reformat; + + len = strlen(mn->m_note); + + if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) { + tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1); + } + + if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') { + advise(NULL, "Internal error: Invalid note field \"%s\"", + mn->m_note); + errflag++; + goto out; + } + + strncpy(tmpbuf, mn->m_note + 1, len - 1); + tmpbuf[len - 2] = '\0'; + + if (scanstring(tmpbuf, &asciichars, &eightbitchars, + &specialchars)) { + /* + * If we have 8-bit characters, encode it. + */ + + if (encoding == CE_UNKNOWN) + encoding = pref_encoding(asciichars, specialchars, + eightbitchars); + + switch (encoding) { + + case CE_BASE64: + retval = field_encode_base64(NULL, &tmpbuf, charset); + break; + + case CE_QUOTED: + retval = field_encode_quoted(NULL, &tmpbuf, charset, + asciichars, + eightbitchars + specialchars, 1); + break; + + default: + advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + errflag++; + goto out; + } + + reformat++; + + /* + * Make sure the size of tmpbuf is correct (it always gets + * reallocated in the above functions). + */ + + tmpbufsize = strlen(tmpbuf) + 1; + + /* + * Put the note field back surrounded by parenthesis. + */ + + mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2); + + snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf); + } + +do_reformat: + + /* + * So, some explanation is in order. + * + * We know we need to rewrite at least one address in the header, + * otherwise we wouldn't be here. If we had to reformat this + * particular address, then run it through adrformat(). Otherwise + * we can use m_text directly. + */ + + /* + * If we were in a group but are no longer, make sure we add a + * semicolon (which needs to be FIRST, as it needs to be at the end + * of the last address). + */ + + if (groupflag && ! mn->m_ingrp) { + output = add(";", output); + column += 1; + } + + groupflag = mn->m_ingrp; + + if (mn->m_gname) { + cp = add(mn->m_gname, NULL); + } + + if (reformat) { + cp = add(adrformat(mn), cp); + } else { + cp = add(mn->m_text, cp); + } + + len = strlen(cp); + + /* + * If we're not at the beginning of the line, add a command and + * either a space or a newline. + */ + + if (column != prefixlen) { + if (len + column + 2 > OUTPUTLINELEN) { + + if ((size_t) (prefixlen + 3) < tmpbufsize) + tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3); + + snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, ""); + output = add(tmpbuf, output); + } else { + output = add(", ", output); + column += 2; + } + } + + /* + * Finally add the address + */ + + output = add(cp, output); + column += len; + free(cp); + cp = NULL; + } + + /* + * Just in case we're at the end of a list + */ + + if (groupflag) { + output = add(";", output); + } + + output = add("\n", output); + + free(*value); + *value = output; + output = NULL; + +out: + + if (tmpbuf) + free(tmpbuf); + if (output) + free(output); + + return errflag > 0; +} + +/* + * Scan a string, check for characters that need to be encoded + */ + +static int +scanstring(const char *string, int *asciilen, int *eightbitchars, + int *specialchars) +{ + *asciilen = 0; + *eightbitchars = 0; + *specialchars = 0; + + for (; *string != '\0'; string++) { + if ((isascii((unsigned char) *string))) { + (*asciilen)++; + /* + * So, a space is not a valid phrase character, but we're counting + * an exception here, because in q-p a space can be directly + * encoded as an underscore. + */ + if (!qphrasevalid((unsigned char) *string) && *string != ' ') + (*specialchars)++; + } else { + (*eightbitchars)++; + } + } + + return *eightbitchars > 0; +} + +/* + * This function is to be used to decide which encoding algorithm we should + * use if one is not given. Basically, we pick whichever one is the shorter + * of the two. + * + * Arguments are: + * + * ascii - Number of ASCII characters in to-be-encoded string. + * specials - Number of ASCII characters in to-be-encoded string that + * still require encoding under quoted-printable. Note that + * these are included in the "ascii" total. + * eightbit - Eight-bit characters in the to-be-encoded string. + * + * Returns one of CE_BASE64 or CE_QUOTED. + */ + +static int +pref_encoding(int ascii, int specials, int eightbits) +{ + /* + * The length of the q-p encoding is: + * + * ascii - specials + (specials + eightbits) * 3. + * + * The length of the base64 encoding is: + * + * base64len(ascii + eightbits) (See macro for details) + */ + + return base64len(ascii + eightbits) < (ascii - specials + + (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED; +} diff --git a/sbr/fmt_scan.c b/sbr/fmt_scan.c index d0bb16df..408b1093 100644 --- a/sbr/fmt_scan.c +++ b/sbr/fmt_scan.c @@ -868,32 +868,10 @@ fmt_scan (struct format *format, char *scanl, size_t max, int width, int *dat, /* UNQUOTEs RFC-2822 quoted-string and quoted-pair */ case FT_LS_UNQUOTE: if (str) { - int m; strncpy(buffer, str, sizeof(buffer)); /* strncpy doesn't NUL-terminate if it fills the buffer */ buffer[sizeof(buffer)-1] = '\0'; - str = buffer; - - /* we will parse from buffer to buffer2 */ - n = 0; /* n is the input position in str */ - m = 0; /* m is the ouput position in buffer2 */ - - while ( str[n] != '\0') { - switch ( str[n] ) { - case '\\': - n++; - if ( str[n] != '\0') - buffer2[m++] = str[n++]; - break; - case '"': - n++; - break; - default: - buffer2[m++] = str[n++]; - break; - } - } - buffer2[m] = '\0'; + unquote_string(buffer, buffer2); str = buffer2; } break; diff --git a/sbr/unquote.c b/sbr/unquote.c new file mode 100644 index 00000000..f9fa9112 --- /dev/null +++ b/sbr/unquote.c @@ -0,0 +1,45 @@ +/* + * unquote.c: Handle quote removal and quoted-pair strings on + * RFC 2822-5322 atoms. + * + * This code is Copyright (c) 2013, by the authors of nmh. See the + * COPYRIGHT file in the root directory of the nmh distribution for + * complete copyright information. + */ + +#include + +/* + * Remove quotes (and handle escape strings) from RFC 5322 quoted-strings. + * + * Since we never add characters to the string, the output buffer is assumed + * to have at least as many characters as the input string. + * + */ + +void +unquote_string(const char *input, char *output) +{ + int n = 0; /* n is the position in the input buffer */ + int m = 0; /* m is the position in the output buffer */ + + while ( input[n] != '\0') { + switch ( input[n] ) { + case '\\': + n++; + if ( input[n] != '\0') + output[m++] = input[n++]; + break; + case '"': + n++; + break; + default: + output[m++] = input[n++]; + break; + } + } + + output[m] = '\0'; + + return; +} diff --git a/test/format/test-unquote b/test/format/test-unquote new file mode 100755 index 00000000..ea465b6d --- /dev/null +++ b/test/format/test-unquote @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Test that the unquote function works properly. +# + +if test -z "${MH_OBJ_DIR}"; then + srcdir=`dirname "$0"`/../.. + MH_OBJ_DIR=`cd "$srcdir" && pwd`; export MH_OBJ_DIR +fi + +. "$MH_OBJ_DIR/test/common.sh" + +setup_test + +# +# Test various unquoting scenarios +# + +run_test 'eval fmttest -raw -format "%(unquote{text})" "Mr. Foo Bar"' \ + 'Mr. Foo Bar' + +run_test 'eval fmttest -raw -format "%(unquote{text})" "Mr. \"Foo\" Bar"' \ + 'Mr. Foo Bar' + +# +# Note here: the string we wanted passed on the command line is: +# +# "Mr. \"Foo\" Bar" +# +# The extra \ are necessary to get it past the shell +# + +run_test 'eval fmttest -raw -format "%(unquote{text})" "Mr. \\\"Foo\\\" Bar"' \ + 'Mr. "Foo" Bar' + +exit ${failed:-0} diff --git a/test/mhbuild/test-header-encode b/test/mhbuild/test-header-encode new file mode 100755 index 00000000..a8fb8187 --- /dev/null +++ b/test/mhbuild/test-header-encode @@ -0,0 +1,187 @@ +#!/bin/sh +###################################################### +# +# Test encoding headers according to RFC 2047 +# +###################################################### + +set -e + +if test -z "${MH_OBJ_DIR}"; then + srcdir=`dirname "$0"`/../.. + MH_OBJ_DIR=`cd "$srcdir" && pwd`; export MH_OBJ_DIR +fi + +. "${MH_OBJ_DIR}/test/common.sh" + +setup_test +testname="${MH_TEST_DIR}/$$" + +# +# We're going to hardcode UTF-8 for this test. +# + +LC_ALL=en_US.UTF-8; export LC_ALL + +# +# Basic test of encoding a short subject +# +cat > "${testname}.basic.actual" < +To: Somebody +Subject: This is ä test + +This is a test +EOF + +cat > "${testname}.basic.expected" < +To: Somebody +Subject: =?UTF-8?Q?This_is_=C3=A4_test?= +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" + +This is a test +EOF + +mhbuild "${testname}.basic.actual" + +check "${testname}.basic.actual" "${testname}.basic.expected" 'keep first' + +# +# Make sure we can undo the encoding +# + +run_test 'eval fmttest -outsize max -format "%(decode{subject})" -message -file "${testname}.basic.actual"' 'This is ä test' + +rm -f "${testname}.basic.actual" + +# +# Basic test of encoding a short subject, but with base64 +# +cat > "${testname}.basic.actual" < +To: Somebody +Subject: This is ä test + +This is a test +EOF + +cat > "${testname}.basic.expected" < +To: Somebody +Subject: =?UTF-8?B?VGhpcyBpcyDDpCB0ZXN0?= +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" + +This is a test +EOF + +mhbuild -headerencoding base64 "${testname}.basic.actual" + +check "${testname}.basic.actual" "${testname}.basic.expected" 'keep first' + +run_test 'eval fmttest -outsize max -format "%(decode{subject})" -message -file "${testname}.basic.actual"' 'This is ä test' + +rm -f "${testname}.basic.actual" + +# +# Have a subject that will pick base64 as the shorter encoding +# + +cat > "${testname}.autopick.actual" < +To: Somebody +Subject: This is ä tëst© + +This is a test +EOF + +cat > "${testname}.autopick.expected" < +To: Somebody +Subject: =?UTF-8?B?VGhpcyBpcyDDpCB0w6tzdMKp?= +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" + +This is a test +EOF + +mhbuild "${testname}.autopick.actual" + +check "${testname}.autopick.actual" "${testname}.autopick.expected" 'keep first' + +run_test 'eval fmttest -outsize max -format "%(decode{subject})" -message -file "${testname}.autopick.actual"' 'This is ä tëst©' + +rm -f "${testname}.basic.autopick" + +# +# Tests using longer subject lines. +# + +cat > "${testname}.longsubject1.actual" < +To: Somebody +Subject: This is ä tëst of a rather long sübject line; will it overflow + the line length? I sure hope thät ='s and "'s are encoded properly. Will + they be? + +This is a test of a very long subject line. +EOF + +cat > "${testname}.longsubject1.expected" < +To: Somebody +Subject: =?UTF-8?Q?This_is_=C3=A4_t=C3=ABst_of_a_rather_long_s=C3=BCbject_?= + =?UTF-8?Q?line;_will_it_overflow_the_line_length=3F__I_sure_hope_?= + =?UTF-8?Q?th=C3=A4t_=3D's_and_"'s_are_encoded_properly.__Will_the?= + =?UTF-8?Q?y_be=3F?= +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" + +This is a test of a very long subject line. +EOF + +mhbuild "${testname}.longsubject1.actual" + +check "${testname}.longsubject1.actual" "${testname}.longsubject1.expected" 'keep first' + +run_test 'eval fmttest -outsize max -format "%(putlit(decode(trim{subject})))" -message -file "${testname}.longsubject1.actual"' "This is ä tëst of a rather long sübject line; will it overflow the line length? I sure hope thät ='s and \"'s are encoded properly. Will they be?" + +rm -f "${testname}.longsubject1.actual" + +# +# Test a longer line with base64 encoding +# + +cat > "${testname}.longsubject2.actual" < +To: Somebody +Subject: This is ä tëst øf ä räthër løng sübjëct line; will it øvërfløw + the line length? I sure hope thät ='s and "'s are encoded properly. Will + they be? + +This is a test of a very long subject line using base64. +EOF + +cat > "${testname}.longsubject2.expected" < +To: Somebody +Subject: =?UTF-8?B?VGhpcyBpcyDDpCB0w6tzdCDDuGYgw6QgcsOkdGjDq3IgbMO4bmcg?= + =?UTF-8?B?c8O8YmrDq2N0IGxpbmU7IHdpbGwgaXQgw7h2w6tyZmzDuHcgdGhl?= + =?UTF-8?B?IGxpbmUgbGVuZ3RoPyAgSSBzdXJlIGhvcGUgdGjDpHQgPSdzIGFu?= + =?UTF-8?B?ZCAiJ3MgYXJlIGVuY29kZWQgcHJvcGVybHkuICBXaWxsIHRoZXkg?= + =?UTF-8?B?YmU/?= +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" + +This is a test of a very long subject line using base64. +EOF + +mhbuild "${testname}.longsubject2.actual" + +check "${testname}.longsubject2.actual" "${testname}.longsubject2.expected" 'keep first' + +run_test 'eval fmttest -outsize max -format "%(putlit(decode(trim{subject})))" -message -file "${testname}.longsubject2.actual"' "This is ä tëst øf ä räthër løng sübjëct line; will it øvërfløw the line length? I sure hope thät ='s and \"'s are encoded properly. Will they be?" + +exit ${failed:-0} diff --git a/uip/fmttest.c b/uip/fmttest.c index b3deda81..4794c7bf 100644 --- a/uip/fmttest.c +++ b/uip/fmttest.c @@ -21,6 +21,8 @@ X("raw", 0, RAWSW) \ X("date", 0, DATESW) \ X("message", 0, MESSAGESW) \ + X("file", 0, FILESW) \ + X("nofile", 0, NFILESW) \ X("-component-name component-text", 0, OTHERSW) \ X("dupaddrs", 0, DUPADDRSW) \ X("nodupaddrs", 0, NDUPADDRSW) \ @@ -91,7 +93,10 @@ static void process_raw(struct format *, struct msgs_array *, char *, int, int, int *, struct fmt_callbacks *); static void process_messages(struct format *, struct msgs_array *, struct msgs_array *, char *, char *, int, - int, int *, struct fmt_callbacks *); + int, int, int *, struct fmt_callbacks *); +static void process_single_file(FILE *, struct msgs_array *, int *, int, + struct format *, char *, int, int, + struct fmt_callbacks *); static void test_trace(void *, struct format *, int, char *, char *); static char *test_formataddr(char *, char *); static char *test_concataddr(char *, char *); @@ -112,7 +117,7 @@ main (int argc, char **argv) struct comp *cptr; struct msgs_array msgs = { 0, 0, NULL }, compargs = { 0, 0, NULL}; int dump = 0, i; - int outputsize = 0, bufsize = 0, dupaddrs = 1, trace = 0; + int outputsize = 0, bufsize = 0, dupaddrs = 1, trace = 0, files = 0; int colwidth = -1, msgnum = -1, msgcur = -1, msgsize = -1, msgunseen = -1; int normalize = AD_HOST; enum mode_t mode = MESSAGE; @@ -222,6 +227,13 @@ main (int argc, char **argv) defformat = DEFDATEFORMAT; continue; + case FILESW: + files++; + continue; + case NFILESW: + files = 0; + continue; + case DUPADDRSW: dupaddrs++; continue; @@ -276,7 +288,7 @@ main (int argc, char **argv) * Only interpret as a folder if we're in message mode */ - if (mode == MESSAGE && (*cp == '+' || *cp == '@')) { + if (mode == MESSAGE && !files && (*cp == '+' || *cp == '@')) { if (folder) adios (NULL, "only one folder at a time!"); else @@ -380,7 +392,7 @@ main (int argc, char **argv) if (mode == MESSAGE) { process_messages(fmt, &compargs, &msgs, buffer, folder, bufsize, - outputsize, dat, cbp); + outputsize, files, dat, cbp); } else { if (compargs.size) { for (i = 0; i < compargs.size; i += 2) { @@ -488,17 +500,33 @@ process_addresses(struct format *fmt, struct msgs_array *addrs, char *buffer, static void process_messages(struct format *fmt, struct msgs_array *comps, struct msgs_array *msgs, char *buffer, char *folder, - int bufsize, int outwidth, int *dat, struct fmt_callbacks *cb) + int bufsize, int outwidth, int files, int *dat, + struct fmt_callbacks *cb) { - int i, state, msgnum, msgsize = dat[2], num = dat[0], cur = dat[1]; + int i, msgnum, msgsize = dat[2], num = dat[0], cur = dat[1]; int num_unseen_seq = 0; ivector_t seqnum = ivector_create (0); - char *maildir, *cp, name[NAMESZ], rbuf[BUFSIZ]; + char *maildir, *cp; struct msgs *mp; - struct comp *c; FILE *in; - m_getfld_state_t gstate = 0; - int bufsz; + + /* + * If 'files' is set, short-circuit everything else and just process + * everything now. + */ + + if (files) { + for (i = 0; i < msgs->size; i++) { + if ((in = fopen(cp = msgs->msgs[i], "r")) == NULL) { + admonish(cp, "unable to open file"); + continue; + } + process_single_file(in, comps, dat, msgsize, fmt, buffer, + bufsize, outwidth, cb); + } + + return; + } if (! folder) folder = getfolder(1); @@ -558,19 +586,6 @@ process_messages(struct format *fmt, struct msgs_array *comps, if (cur == -1) dat[1] = msgnum == mp->curmsg; - /* - * Get our size if we didn't include one - */ - - if (msgsize == -1) { - struct stat st; - - if (fstat(fileno(in), &st) < 0) - dat[2] = 0; - else - dat[2] = st.st_size; - } - /* * Check to see if this is in the unseen sequence */ @@ -587,69 +602,116 @@ process_messages(struct format *fmt, struct msgs_array *comps, * Read in the message and process the components */ - for (state = FLD;;) { - bufsz = sizeof(rbuf); - state = m_getfld(&gstate, name, rbuf, &bufsz, in); - switch (state) { - case FLD: - case FLDPLUS: - i = fmt_addcomptext(name, rbuf); - if (i != -1) { - while (state == FLDPLUS) { - bufsz = sizeof(rbuf); - state = m_getfld(&gstate, name, rbuf, &bufsz, in); - fmt_appendcomp(i, name, rbuf); - } - } - - while (state == FLDPLUS) { - bufsz = sizeof(rbuf); - state = m_getfld(&gstate, name, rbuf, &bufsz, in); - } - break; + process_single_file(in, comps, dat, msgsize, fmt, buffer, + bufsize, outwidth, cb); + } + } - case BODY: - if (fmt_findcomp("body")) { - if ((i = strlen(rbuf)) < outwidth) { - bufsz = outwidth - 1; - state = m_getfld(&gstate, name, rbuf + i, - &bufsz, in); - } + ivector_free (seqnum); + folder_free(mp); + return; +} - fmt_addcomptext("body", rbuf); - } - /* fall through */ +/* + * Process a single file in message mode + */ + +static void +process_single_file(FILE *in, struct msgs_array *comps, int *dat, int msgsize, + struct format *fmt, char *buffer, int bufsize, + int outwidth, struct fmt_callbacks *cb) +{ + int i, state; + char name[NAMESZ], rbuf[BUFSIZ]; + m_getfld_state_t gstate = 0; + struct comp *c; + int bufsz; + + /* + * Get our size if we didn't include one + */ - default: - goto finished; + if (msgsize == -1) { + struct stat st; + + if (fstat(fileno(in), &st) < 0) + dat[2] = 0; + else + dat[2] = st.st_size; + } + + /* + * Initialize everyting else + */ + + if (dat[0] == -1) + dat[0] = 0; + if (dat[1] == -1) + dat[1] = 0; + if (dat[4] == -1) + dat[4] = 0; + + /* + * Read in the message and process the components + */ + + for (state = FLD;;) { + bufsz = sizeof(rbuf); + state = m_getfld(&gstate, name, rbuf, &bufsz, in); + switch (state) { + case FLD: + case FLDPLUS: + i = fmt_addcomptext(name, rbuf); + if (i != -1) { + while (state == FLDPLUS) { + bufsz = sizeof(rbuf); + state = m_getfld(&gstate, name, rbuf, &bufsz, in); + fmt_appendcomp(i, name, rbuf); } } -finished: - fclose(in); - m_getfld_state_destroy(&gstate); - /* - * Do this now to override any components in the original message - */ - if (comps->size) { - for (i = 0; i < comps->size; i += 2) { - c = fmt_findcomp(comps->msgs[i]); - if (c) { - if (c->c_text) - free(c->c_text); - c->c_text = getcpy(comps->msgs[i + 1]); - } + while (state == FLDPLUS) { + bufsz = sizeof(rbuf); + state = m_getfld(&gstate, name, rbuf, &bufsz, in); + } + break; + + case BODY: + if (fmt_findcomp("body")) { + if ((i = strlen(rbuf)) < outwidth) { + bufsz = outwidth - 1; + state = m_getfld(&gstate, name, rbuf + i, + &bufsz, in); } + + fmt_addcomptext("body", rbuf); } - fmt_scan(fmt, buffer, bufsize, outwidth, dat, cb); - fputs(buffer, stdout); - mlistfree(); + /* fall through */ + + default: + goto finished; } } +finished: + fclose(in); + m_getfld_state_destroy(&gstate); - ivector_free (seqnum); - folder_free(mp); - return; + /* + * Do this now to override any components in the original message + */ + if (comps->size) { + for (i = 0; i < comps->size; i += 2) { + c = fmt_findcomp(comps->msgs[i]); + if (c) { + if (c->c_text) + free(c->c_text); + c->c_text = getcpy(comps->msgs[i + 1]); + } + } + } + fmt_scan(fmt, buffer, bufsize, outwidth, dat, cb); + fputs(buffer, stdout); + mlistfree(); } /* diff --git a/uip/mhbuild.c b/uip/mhbuild.c index 91132ce3..ef459858 100644 --- a/uip/mhbuild.c +++ b/uip/mhbuild.c @@ -37,6 +37,8 @@ X("wcache policy", 0, WCACHESW) \ X("contentid", 0, CONTENTIDSW) \ X("nocontentid", 0, NCONTENTIDSW) \ + X("headerencoding encoding-algorithm", 0, HEADERENCSW) \ + X("autoheaderencoding", 0, AUTOHEADERENCSW) \ X("version", 0, VERSIONSW) \ X("help", 0, HELPSW) \ X("debug", -5, DEBUGSW) \ @@ -49,6 +51,17 @@ DEFINE_SWITCH_ENUM(MHBUILD); DEFINE_SWITCH_ARRAY(MHBUILD, switches); #undef X +#define MIMEENCODING_SWITCHES \ + X("base64", 0, BASE64SW) \ + X("quoted-printable", 0, QUOTEDPRINTSW) \ + +#define X(sw, minchars, id) id, +DEFINE_SWITCH_ENUM(MIMEENCODING); +#undef X + +#define X(sw, minchars, id) { sw, minchars, id }, +DEFINE_SWITCH_ARRAY(MIMEENCODING, encodingswitches); +#undef X /* mhbuildsbr.c */ extern char *tmp; /* directory to place temp files */ @@ -78,7 +91,6 @@ static int unlink_outfile = 0; static void unlink_done (int) NORETURN; /* mhbuildsbr.c */ -CT build_mime (char *, int); int output_message (CT, char *); int output_message_fp (CT, FILE *, char*); @@ -97,6 +109,7 @@ main (int argc, char **argv) CT ct, cts[2]; FILE *fp = NULL; FILE *fp_out = NULL; + int header_encoding = CE_UNKNOWN; done=unlink_done; @@ -205,6 +218,33 @@ main (int argc, char **argv) contentidsw = 0; continue; + case HEADERENCSW: { + int encoding; + + if (!(cp = *argp++) || *cp == '-') + adios (NULL, "missing argument to %s", argp[-2]); + switch (encoding = smatch (cp, encodingswitches)) { + case AMBIGSW: + ambigsw (cp, encodingswitches); + done (1); + case UNKWNSW: + adios (NULL, "%s unknown encoding algorithm", cp); + case BASE64SW: + header_encoding = CE_BASE64; + break; + case QUOTEDPRINTSW: + header_encoding = CE_QUOTED; + break; + default: + adios (NULL, "Internal error: algorithm %s", cp); + } + continue; + } + + case AUTOHEADERENCSW: + header_encoding = CE_UNKNOWN; + continue; + case VERBSW: verbosw++; continue; @@ -280,7 +320,7 @@ main (int argc, char **argv) unlink_infile = 1; /* build the content structures for MIME message */ - ct = build_mime (infile, directives); + ct = build_mime (infile, directives, header_encoding); cts[0] = ct; cts[1] = NULL; @@ -314,7 +354,7 @@ main (int argc, char **argv) */ /* build the content structures for MIME message */ - ct = build_mime (compfile, directives); + ct = build_mime (compfile, directives, header_encoding); cts[0] = ct; cts[1] = NULL; diff --git a/uip/mhbuildsbr.c b/uip/mhbuildsbr.c index 8d3a76df..e4499170 100644 --- a/uip/mhbuildsbr.c +++ b/uip/mhbuildsbr.c @@ -69,11 +69,6 @@ int find_cache (CT, int, int *, char *, char *, int); void free_ctinfo (CT); void free_encoding (CT, int); -/* - * prototypes - */ -CT build_mime (char *, int); - /* * static prototypes */ @@ -128,7 +123,7 @@ static void directive_pop(void) */ CT -build_mime (char *infile, int directives) +build_mime (char *infile, int directives, int header_encoding) { int compnum, state; char buf[BUFSIZ], name[NAMESZ]; @@ -137,6 +132,7 @@ build_mime (char *infile, int directives) struct part **pp; CT ct; FILE *in; + HF hp; m_getfld_state_t gstate = 0; directive_init(directives); @@ -227,6 +223,17 @@ finish_field: } m_getfld_state_destroy (&gstate); + /* + * Iterate through the list of headers and call the function to MIME-ify + * them if required. + */ + + for (hp = ct->c_first_hf; hp != NULL; hp = hp->next) { + if (encode_rfc2047(hp->name, &hp->value, header_encoding, NULL)) { + adios(NULL, "Unable to encode header \"%s\"", hp->name); + } + } + /* * Now add the MIME-Version header field * to the list of header fields. diff --git a/uip/post.c b/uip/post.c index 9bbaa5f5..679ae6df 100644 --- a/uip/post.c +++ b/uip/post.c @@ -223,7 +223,6 @@ static char from[BUFSIZ]; /* my network address */ static char sender[BUFSIZ]; /* my Sender: header */ static char efrom[BUFSIZ]; /* my Envelope-From: header */ static char fullfrom[BUFSIZ]; /* full contents of From header */ -static char signature[BUFSIZ]; /* my signature */ static char *filter = NULL; /* the filter for BCC'ing */ static char *subject = NULL; /* the subject field for BCC'ing */ static char *fccfold[FCCS]; /* foldernames for FCC'ing */ @@ -934,9 +933,6 @@ putfmt (char *name, char *str, FILE *out) static void start_headers (void) { - char *cp, sigbuf[BUFSIZ]; - struct mailname *mp; - time (&tclock); /* @@ -947,21 +943,6 @@ start_headers (void) efrom[0] = '\0'; sender[0] = '\0'; fullfrom[0] = '\0'; - - if ((cp = getfullname ()) && *cp) { - strncpy (sigbuf, cp, sizeof(sigbuf)); - snprintf (signature, sizeof(signature), "%s <%s>", - sigbuf, getlocaladdr()); - if ((cp = getname (signature)) == NULL) - adios (NULL, "getname () failed -- you lose extraordinarily big"); - if ((mp = getm (cp, NULL, 0, AD_HOST, NULL)) == NULL) - adios (NULL, "bad signature '%s'", sigbuf); - mnfree (mp); - while (getname ("")) - continue; - } else { - strncpy (signature, getlocaladdr(), sizeof(signature)); - } }