From: Ken Hornstein <kenh@pobox.com>
Date: Wed, 4 Dec 2013 02:52:30 +0000 (-0500)
Subject: Merge branch 'encode-rfc2047'
X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/6ba5b855ebc737cc46cabfbe799b0c7706f437f4?hp=4c1236a0c2f9fd289746f1e371d61f086e643cfa

Merge branch 'encode-rfc2047'
---

diff --git a/Makefile.am b/Makefile.am
index 0749c07d..a056b24b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -55,6 +55,7 @@ TESTS = test/ali/test-ali test/anno/test-anno \
 	test/format/test-localmbox test/format/test-myname \
 	test/format/test-myhost test/format/test-mymbox \
 	test/format/test-rightjustify \
+	test/format/test-unquote \
 	test/forw/test-forw-digest test/forw/test-forw-format \
 	test/inc/test-deb359167 test/inc/test-eom-align \
 	test/inc/test-inc-scanout test/inc/test-msgchk \
@@ -62,7 +63,8 @@ TESTS = test/ali/test-ali test/anno/test-anno \
 	test/install-mh/test-install-mh \
 	test/locking/test-datalocking test/locking/test-spoollocking \
 	test/manpages/test-manpages \
-	test/mhbuild/test-forw test/mhbuild/test-utf8-body \
+	test/mhbuild/test-forw test/mhbuild/test-header-encode \
+	test/mhbuild/test-utf8-body \
 	test/mhfixmsg/test-mhfixmsg \
 	test/mhlist/test-mhlist test/mhmail/test-mhmail \
 	test/mhparam/test-mhparam test/mhpath/test-mhpath \
@@ -521,6 +523,7 @@ sbr_libmh_a_SOURCES = sbr/addrsbr.c sbr/ambigsw.c sbr/atooi.c sbr/arglist.c \
 		      sbr/copy.c sbr/copyip.c sbr/cpydata.c \
 		      sbr/cpydgst.c sbr/crawl_folders.c sbr/credentials.c \
 		      sbr/discard.c sbr/done.c sbr/dtimep.l sbr/dtime.c \
+		      sbr/encode_rfc2047.c \
 		      sbr/escape_addresses.c \
 		      sbr/error.c sbr/ext_hook.c sbr/fdcompare.c \
 		      sbr/folder_addmsg.c sbr/folder_delmsgs.c \
@@ -546,7 +549,8 @@ sbr_libmh_a_SOURCES = sbr/addrsbr.c sbr/ambigsw.c sbr/atooi.c sbr/arglist.c \
 		      sbr/seq_setcur.c sbr/seq_setprev.c sbr/seq_setunseen.c \
 		      sbr/showfile.c sbr/signals.c sbr/smatch.c \
 		      sbr/snprintb.c sbr/ssequal.c \
-		      sbr/strindex.c sbr/trimcpy.c sbr/uprf.c sbr/vfgets.c \
+		      sbr/strindex.c sbr/trimcpy.c sbr/unquote.c \
+		      sbr/uprf.c sbr/vfgets.c \
 		      sbr/fmt_def.c sbr/mf.c sbr/utils.c sbr/ctype-checked.c \
 		      sbr/m_mktemp.c sbr/getansreadline.c sbr/vector.c \
 		      config/config.c config/version.c
diff --git a/h/addrsbr.h b/h/addrsbr.h
index 5ca801e6..fbb4cd05 100644
--- a/h/addrsbr.h
+++ b/h/addrsbr.h
@@ -12,21 +12,30 @@
 #define	NETHOST		1
 #define	BADHOST		2
 
+/*
+ * The email structure used by nmh to define an email address
+ */
+
 struct mailname {
-    struct mailname *m_next;
-    char *m_text;
-    char *m_pers;
-    char *m_mbox;
-    char *m_host;
-    char *m_path;
-    int m_type;
-    char m_nohost;
-    char m_bcc;
-    int m_ingrp;
-    char *m_gname;
-    char *m_note;
+    struct mailname *m_next;	/* Linked list linkage; available for */
+				/* application use */
+    char *m_text;		/* Full unparsed text of email address */
+    char *m_pers;		/* display-name in RFC 5322 parlance */
+    char *m_mbox;		/* local-part in RFC 5322 parlance */
+    char *m_host;		/* domain in RFC 5322 parlance */
+    char *m_path;		/* Host routing; should not be used */
+    int m_type;			/* UUCPHOST, LOCALHOST, NETHOST, or BADHOST */
+    char m_nohost;		/* True if no host part available */
+    char m_bcc;			/* Used by post to keep track of bcc's */
+    int m_ingrp;		/* True if email address is in a group */
+    char *m_gname;		/* display-name of group */
+    char *m_note;		/* Note (post-address comment) */
 };
 
+/*
+ * See notes for auxformat() below.
+ */
+
 #define	adrformat(m) auxformat ((m), 1)
 
 /*
@@ -34,7 +43,71 @@ struct mailname {
  */
 void mnfree(struct mailname *);
 int ismymbox(struct mailname *);
-char *getname(const char *);
-char *getlocaladdr(void);
-char *auxformat(struct mailname *, int);
-struct mailname *getm(char *, char *, int, int, char *);
+
+/*
+ * Parse an address header, and return a sequence of email addresses.
+ * This function is the main entry point into the nmh address parser.
+ * It is used in conjunction with getm() to parse an email header.
+ *
+ * Arguments include:
+ *
+ * header	- Pointer to the start of an email header.
+ *
+ * On the first call, header is copied and saved internally.  Each email
+ * address in the header is returned on the first and subsequent calls
+ * to getname().  When there are no more email addresses available in
+ * the header, NULL is returned and the parser's internal state is
+ * reset.
+ */
+
+char *getname(const char *header);
+
+/*
+ * Format an email address given a struct mailname.
+ *
+ * This function takes a pointer to a struct mailname and returns a pointer
+ * to a static buffer holding the resulting email address.
+ *
+ * It is worth noting that group names are NOT handled, so if you want to
+ * do something with groups you need to handle it externally to this function.
+ *
+ * Arguments include:
+ *
+ * mp		- Pointer to mailname structure
+ * extras	- If true, include the personal name and/or note in the
+ *		  address.  Otherwise, omit it.
+ */
+
+char *auxformat(struct mailname *mp, int extras);
+
+/*
+ * Parse an email address into it's components.
+ *
+ * Used in conjunction with getname() to parse a complete email header.
+ *
+ * Arguments include:
+ *
+ * str		- Email address being parsed.
+ * dfhost	- A default host to append to the email address if
+ *		  one is not included.  If NULL, use nmh's idea of
+ *		  localhost().
+ * dftype	- If dfhost is given, use dftype as the email address type
+ *		  if no host is in the email address.
+ * wanthost	- One of AD_HOST or AD_NHST.  If AD_HOST, look up the
+ *		  "official name" of the host.  Well, that's what the
+ *		  documentation says, at least ... support for that
+ *		  functionality was removed when hostable support was
+ *		  removed and the address parser was converted by default
+ *		  to always being in DUMB mode.  So nowadays this only
+ *		  affects where error messages are put if there is no
+ *		  host part (set it to AD_HOST if you want error messages
+ *		  to appear on standard error).
+ * eresult	- Any error string returned by the address parser.  String
+ *		  must contain sufficient room for the error message.
+ *		  (BUFSIZ is used in general by the code).  Can be NULL.
+ *
+ * A pointer to an allocated struct mailname corresponding to the email
+ * address is returned.
+ */
+struct mailname *getm(char *str, char *dfhost, int dftype,
+		      int wanthost, char *eresult);
diff --git a/h/mhparse.h b/h/mhparse.h
index b5a73627..1dfd7ed8 100644
--- a/h/mhparse.h
+++ b/h/mhparse.h
@@ -285,6 +285,21 @@ extern struct str2init str2methods[];
  */
 int pidcheck (int);
 CT parse_mime (char *);
+
+/*
+ * Translate a composition file into a MIME data structure.  Arguments are:
+ *
+ * infile	- Name of input filename
+ * directives	- A flag to control whether or not build directives are
+ *		  processed by default.
+ * encoding	- The default encoding to use when doing RFC 2047 header
+ *		  encoding.  Must be one of CE_UNKNOWN, CE_BASE64, or
+ *		  CE_QUOTED;
+ *
+ * Returns a CT structure describing the resulting MIME message.
+ */
+CT build_mime (char *infile, int directives, int encoding);
+
 int add_header (CT, char *, char *);
 int get_ctinfo (char *, CT, int);
 int params_external (CT, int);
diff --git a/h/prototypes.h b/h/prototypes.h
index b1a53cde..3797fecc 100644
--- a/h/prototypes.h
+++ b/h/prototypes.h
@@ -59,6 +59,27 @@ char *cpytrim (const char *);
 int decode_rfc2047 (char *, char *, size_t);
 void discard (FILE *);
 int default_done (int);
+
+/*
+ * Encode a message header using RFC 2047 encoding.  If the message contains
+ * no non-ASCII characters, then leave the header as-is.
+ *
+ * Arguments include:
+ *
+ * name		- Message header name
+ * value	- Message header content; must point to allocated memory
+ *		  (may be changed if encoding is necessary)
+ * encoding	- Encoding type.  May be one of CE_UNKNOWN (function chooses
+ *		  the encoding), CE_BASE64 or CE_QUOTED
+ * charset	- Charset used for encoding.  If NULL, obtain from system
+ *		  locale.
+ *
+ * Returns 0 on success, any other value on failure.
+ */
+
+int encode_rfc2047(const char *name, char **value, int encoding,
+		   const char *charset);
+
 void escape_display_name (char *, size_t);
 void escape_local_part (char *, size_t);
 int ext_hook(char *, char *, char *);
@@ -224,6 +245,23 @@ int ssequal (char *, char *);
 int stringdex (char *, char *);
 char *trimcpy (char *);
 int unputenv (char *);
+
+/*
+ * Remove quotes and quoted-pair sequences from RFC-5322 atoms.
+ *
+ * Currently the actual algorithm is simpler than it technically should
+ * be: any quotes are simply eaten, unless they're preceded by the escape
+ * character (\).  This seems to be sufficient for our needs for now.
+ *
+ * Arguments:
+ *
+ * input	- The input string
+ * output	- The output string; is assumed to have at least as much
+ *		  room as the input string.  At worst the output string will
+ *		  be the same size as the input string; it might be smaller.
+ *
+ */
+void unquote_string(const char *input, char *output);
 int uprf (char *, char *);
 int vfgets (FILE *, char **);
 char *write_charset_8bit (void);
@@ -262,6 +300,7 @@ int what_now (char *, int, int, char *, char *,
 int WhatNow(int, char **);
 int writeBase64aux(FILE *, FILE *);
 int writeBase64 (unsigned char *, size_t, unsigned char *);
+int writeBase64raw (unsigned char *, size_t, unsigned char *);
 
 /*
  * credentials management
diff --git a/man/fmttest.man b/man/fmttest.man
index 59626da3..639f578a 100644
--- a/man/fmttest.man
+++ b/man/fmttest.man
@@ -15,6 +15,7 @@ language
 .RB [ \-format
 .IR formatstring ]
 .RB [ \-address " | " \-raw " | " \-date " | " \-message ]
+.RB [ \-file " | " \-nofile ]
 .RB [ \-\|\-component
 .IR component-text ]
 .RB [ \-dupaddrs " | " \-nodupaddrs ]
@@ -140,7 +141,15 @@ for the
 .RI %( unseen ),
 and
 .RI %( size )
-function escapes will be made available for each message.
+function escapes will be made available for each message.  If the
+.B \-file
+switch is given, the arguments are interpreted as filenames instead of
+message numbers, but otherwise the behavior is the same (except that the
+.RI %( msg ),
+.RI %( cur ),
+and
+.RI %( unseen )
+function escapes will not provide any useful information).
 .PP
 The default format used in address mode is the default format used by
 .BR scan .
@@ -394,6 +403,7 @@ dat[4]	%(\fIunseen\fR)
 .SH DEFAULTS
 .nf
 .RB ` \-message '
+.RB ` \-nofile '
 .RB ` \-dupaddrs '
 .fi
 .SH BUGS
diff --git a/man/mhbuild.man b/man/mhbuild.man
index ab757d34..6f50006b 100644
--- a/man/mhbuild.man
+++ b/man/mhbuild.man
@@ -17,6 +17,9 @@ mhbuild \- translate MIME composition draft
 .RB [ \-contentid " | " \-nocontentid ]
 .RB [ \-verbose " | " \-noverbose ]
 .RB [ \-check " | " \-nocheck ]
+.RB [ \-headerencoding 
+.IR encoding\-algorithm
+.RB " | " \-autoheaderencoding ]
 .RB [ \-version ]
 .RB [ \-help ]
 .ad
@@ -28,11 +31,8 @@ a valid MIME message.
 .PP
 .B mhbuild
 creates multi-media messages as specified in RFC 2045
-to RFC 2049.  Currently
-.B mhbuild
-only supports encodings in
-message bodies, and does not support the encoding of message headers as
-specified in RFC 2047.
+to RFC 2049.  This includes the encoding of message headers as specified
+by RFC 2047.
 .PP
 If you specify the name of the composition file as \*(lq-\*(rq,
 then
@@ -77,6 +77,20 @@ switch
 is present, then the listing will show any \*(lqextra\*(rq information
 that is present in the message, such as comments in the
 \*(lqContent-Type\*(rq header.
+.PP
+The
+.B \-headerencoding
+switch will indicate which algorithm to use when encoding any message headers
+that contain 8\-bit characters.  The valid arguments are
+.I base64
+for based\-64 encoding and 
+.I quoted
+for quoted\-printable encoding.  The
+.B \-autoheaderencoding
+switch will instruct
+.B mhbuild
+to automatically pick the encoding algorithm based on the frequency of
+8\-bit characters.
 .SS "Translating the Composition File"
 .B mhbuild
 is essentially a filter to aid in the composition of MIME
@@ -714,4 +728,5 @@ is checked.
 .RB ` \-contentid '
 .RB ` \-nocheck '
 .RB ` \-noverbose '
+.RB ` \-autoheaderencoding '
 .fi
diff --git a/sbr/addrsbr.c b/sbr/addrsbr.c
index ce14394e..e5b20520 100644
--- a/sbr/addrsbr.c
+++ b/sbr/addrsbr.c
@@ -274,24 +274,6 @@ auxformat (struct mailname *mp, int extras)
 }
 
 
-/*
- * This used to be adrsprintf() (where it would format an address for you
- * given a username and a domain).  But somewhere we got to the point where
- * the only caller was post, and it only called it with both arguments NULL.
- * So the function was renamed with a more sensible name.
- */
-
-char *
-getlocaladdr(void)
-{
-    char	 *username;
-
-    username = getusername();
-
-    return username;
-}
-
-
 #define	W_NIL	0x0000
 #define	W_MBEG	0x0001
 #define	W_MEND	0x0002
diff --git a/sbr/base64.c b/sbr/base64.c
index c3045ab7..5ff9f53c 100644
--- a/sbr/base64.c
+++ b/sbr/base64.c
@@ -114,3 +114,49 @@ writeBase64 (unsigned char *in, size_t length, unsigned char *out)
 
     return OK;
 }
+
+/* 
+ * Essentially a duplicate of writeBase64, but without line wrapping or
+ * newline termination (note: string IS NUL terminated)
+ */
+
+int
+writeBase64raw (unsigned char *in, size_t length, unsigned char *out)
+{
+    while (1) {
+	unsigned long bits;
+	unsigned char *bp;
+	unsigned int cc;
+	for (cc = 0, bp = in; length > 0 && cc < 3; ++cc, ++bp, --length)
+          /* empty */ ;
+
+	if (cc == 0) {
+	    break;
+	} else {
+	    bits = (in[0] & 0xff) << 16;
+	    if (cc > 1) {
+		bits |= (in[1] & 0xff) << 8;
+		if (cc > 2) {
+		    bits |= in[2] & 0xff;
+		}
+	    }
+	}
+
+	for (bp = out + 4; bp > out; bits >>= 6)
+	    *--bp = nib2b64[bits & 0x3f];
+	if (cc < 3) {
+	    out[3] = '=';
+	    if (cc < 2)
+		out[2] = '=';
+	    out += 4;
+	    break;
+	}
+
+	in += 3;
+	out += 4;
+    }
+
+    *out = '\0';
+
+    return OK;
+}
diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c
new file mode 100644
index 00000000..ac2b6dc3
--- /dev/null
+++ b/sbr/encode_rfc2047.c
@@ -0,0 +1,856 @@
+/*
+ * Routines to encode message headers using RFC 2047-encoding.
+ *
+ * This code is Copyright (c) 2002, by the authors of nmh.  See the
+ * COPYRIGHT file in the root directory of the nmh distribution for
+ * complete copyright information.
+ */
+
+#include <h/mh.h>
+#include <h/mhparse.h>
+#include <h/addrsbr.h>
+#include <h/utils.h>
+
+/*
+ * List of headers that contain addresses and as a result require special
+ * handling
+ */
+
+static char *address_headers[] = {
+    "To",
+    "From",
+    "cc",
+    "Bcc",
+    "Reply-To",
+    "Sender",
+    "Resent-To",
+    "Resent-From",
+    "Resent-cc",
+    "Resent-Bcc",
+    "Resent-Reply-To",
+    "Resent-Sender",
+    NULL,
+};
+
+/*
+ * Macros we use for parsing headers
+ */
+
+#define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
+
+#define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
+			 (c >= 'a' && c <= 'z') || \
+			 c == '!' || c == '*' || c == '+' || c == '-' || \
+			 c == '/' || c == '=' || c == '_')
+#define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
+
+#define base64len(n) ((((n) + 2) / 3 ) * 4)	/* String len to base64 len */
+#define strbase64(n) ((n) / 4 * 3)		/* Chars that fit in base64 */
+
+#define ENCODELINELIMIT	76
+
+static void unfold_header(char **, int);
+static int field_encode_address(const char *, char **, int, const char *);
+static int field_encode_quoted(const char *, char **, const char *, int,
+			       int, int);
+static int field_encode_base64(const char *, char **, const char *);
+static int scanstring(const char *, int *, int *, int *);
+static int utf8len(const char *);
+static int pref_encoding(int, int, int);
+
+/*
+ * Encode a message header using RFC 2047 encoding.  We make the assumption
+ * that all characters < 128 are ASCII and as a consequence don't need any
+ * encoding.
+ */
+
+int
+encode_rfc2047(const char *name, char **value, int encoding,
+	       const char *charset)
+{
+    int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
+    char *p;
+
+    /*
+     * First, check to see if we even need to encode the header
+     */
+
+    for (p = *value; *p != '\0'; p++) {
+	if (isascii((unsigned char) *p)) {
+	    asciicount++;
+	    if (qpspecial((unsigned char) *p))
+	    	qpspecialcount++;
+	} else
+	    eightbitcount++;
+    }
+
+    if (eightbitcount == 0)
+    	return 0;
+
+    /*
+     * Some rules from RFC 2047:
+     *
+     * - Encoded words cannot be more than 75 characters long
+     * - Multiple "long" encoded words must be on new lines.
+     *
+     * Also, we're not permitted to encode email addresses, so
+     * we need to actually _parse_ email addresses and only encode
+     * the right bits.  
+     */
+
+    /*
+     * If charset was NULL, then get the value from the locale.  But
+     * we reject it if it returns US-ASCII
+     */
+
+    if (charset == NULL)
+    	charset = write_charset_8bit();
+
+    if (strcasecmp(charset, "US-ASCII") == 0) {
+    	advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
+	return 1;
+    }
+
+    /*
+     * If we have an address header, then we need to parse the addresses
+     * and only encode the names or comments.  Otherwise, handle it normally.
+     */
+
+    for (i = 0; address_headers[i]; i++) {
+    	if (strcasecmp(name, address_headers[i]) == 0)
+	    return field_encode_address(name, value, encoding, charset);
+    }
+
+    /*
+     * On the encoding we choose, and the specifics of encoding:
+     *
+     * - If a specified encoding is passed in, we use that.
+     * - Otherwise, pick which encoding is shorter.
+     *
+     * We don't quite handle continuation right here, but it should be
+     * pretty close.
+     */
+
+    if (encoding == CE_UNKNOWN)
+        encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
+
+    unfold_header(value, asciicount + eightbitcount);
+
+    switch (encoding) {
+
+    case CE_BASE64:
+    	return field_encode_base64(name, value, charset);
+
+    case CE_QUOTED:
+	return field_encode_quoted(name, value, charset, asciicount,
+				   eightbitcount + qpspecialcount, 0);
+
+    default:
+    	advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+	return 1;
+    }
+}
+
+/*
+ * Encode our specified header (or field) using quoted-printable
+ */
+
+static int
+field_encode_quoted(const char *name, char **value, const char *charset,
+		    int ascii, int encoded, int phraserules)
+{
+    int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
+    int charsetlen = strlen(charset), utf8;
+    char *output = NULL, *p, *q;
+
+    /*
+     * Right now we just encode the whole thing.  Maybe later on we'll
+     * only encode things on a per-atom basis.
+     */
+
+    p = *value;
+
+    column = prefixlen + 2;	/* Header name plus ": " */
+
+    utf8 = strcasecmp(charset, "UTF-8") == 0;
+
+    while (*p != '\0') {
+    	/*
+	 * Start a new line, if it's time
+	 */
+    	if (newline) {
+	    /*
+	     * If it's the start of the header, we don't need to pad it
+	     *
+	     * The length of the output string is ...
+	     * =?charset?Q?...?=  so that's 7+strlen(charset) + 2 for \n NUL
+	     *
+	     * plus 1 for every ASCII character and 3 for every eight bit
+	     * or special character (eight bit characters are written as =XX).
+	     *
+	     */
+
+	    int tokenlen;
+
+	    outlen += 9 + charsetlen + ascii + 3 * encoded;
+
+	    /*
+	     * If output is set, then we're continuing the header.  Otherwise
+	     * do the initial allocation.
+	     */
+
+	    if (output) {
+	        int curlen = q - output, i;
+		outlen += prefixlen + 1;	/* Header plus \n ": " */
+		output = mh_xrealloc(output, outlen);
+		q = output + curlen;
+		*q++ = '?';
+		*q++ = '=';
+		*q++ = '\n';
+		for (i = 0; i < prefixlen; i++)
+		    *q++ = ' ';
+	    } else {
+	    	/*
+		 * A bit of a hack here; the header can contain multiple
+		 * spaces (probably at least one) until we get to the
+		 * actual text.  Copy until we get to a non-space.
+		 */
+	    	output = mh_xmalloc(outlen);
+		q = output;
+		while (is_fws(*p))
+		    *q++ = *p++;
+	    }
+
+	    tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
+	    q += tokenlen;
+	    column = prefixlen + tokenlen;
+	    newline = 0;
+	}
+
+	/*
+	 * Process each character, encoding if necessary
+	 *
+	 * Note that we have a different set of rules if we're processing
+	 * RFC 5322 'phrase' (something you'd see in an address header).
+	 */
+
+	column++;
+
+	if (*p == ' ') {
+	    *q++ = '_';
+	    ascii--;
+	} else if (isascii((unsigned char) *p) &&
+		   (phraserules ? qphrasevalid((unsigned char) *p) :
+		   			!qpspecial((unsigned char) *p))) {
+	    *q++ = *p;
+	    ascii--;
+	} else {
+	    snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
+	    q += 3;
+	    column += 2;	/* column already incremented by 1 above */
+	    encoded--;
+	}
+
+	p++;
+
+	/*
+	 * We're not allowed more than ENCODELINELIMIT characters per line,
+	 * so reserve some room for the final ?=.
+	 *
+	 * If prefixlen == 0, we haven't been passed in a header name, so
+	 * don't ever wrap the field (we're likely doing an address).
+	 */
+
+	if (prefixlen == 0)
+	    continue;
+
+	if (column >= ENCODELINELIMIT - 2) {
+	    newline = 1;
+	} else if (utf8) {
+	    /*
+	     * Okay, this is a bit weird, but to explain a bit more ...
+	     *
+	     * RFC 2047 prohibits the splitting of multibyte characters
+	     * across encoded words.  Right now we only handle the case
+	     * of UTF-8, the most common multibyte encoding.
+	     *
+	     * p is now pointing at the next input character.  If we're
+	     * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
+	     * length of the complete character, then trigger a newline
+	     * now.  Note that we check the length * 3 since we have to
+	     * allow for the encoded output.
+	     */
+	    if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
+	    	newline = 1;
+	    }
+	}
+    }
+
+    *q++ = '?';
+    *q++ = '=';
+
+    if (prefixlen)
+    	*q++ = '\n';
+
+    *q = '\0';
+
+    free(*value);
+
+    *value = output;
+
+    return 0;
+}
+
+/*
+ * Encode our specified header (or field) using base64.
+ *
+ * This is a little easier since every character gets encoded, we can
+ * calculate the line wrap up front.
+ */
+
+static int
+field_encode_base64(const char *name, char **value, const char *charset)
+{
+    int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
+    int outlen = 0, numencode, curlen;
+    char *output = NULL, *p = *value, *q = NULL, *linestart;
+
+    /*
+     * Skip over any leading white space.
+     */
+
+    while (*p == ' ' || *p == '\t')
+    	p++;
+
+    /*
+     * If we had a zero-length prefix, then just encode the whole field
+     * as-is, without line wrapping.  Note that in addition to the encoding
+     *
+     * The added length we need is =? + charset + ?B? ... ?=
+     *
+     * That's 7 + strlen(charset) + 2 (for \n NUL).
+     */
+
+    while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
+    			  prefixlen) > ENCODELINELIMIT)) {
+
+	/*
+	 * Our very first time, don't pad the line in the front
+	 *
+	 * Note ENCODELINELIMIT is + 2 because of \n \0
+	 */
+
+
+	if (! output) {
+	    outlen += ENCODELINELIMIT + 2;
+	    output = q = mh_xmalloc(outlen);
+	    linestart = q - prefixlen;	/* Yes, this is intentional */
+	} else {
+	    int curstart = linestart - output;
+	    curlen = q - output;
+
+	    outlen += ENCODELINELIMIT + 2;
+	    output = mh_xrealloc(output, outlen);
+	    q = output + curlen;
+	    linestart = output + curstart;
+	}
+
+	/*
+	 * We should have enough space now, so prepend the encoding markers
+	 * and character set information.  The leading space is intentional.
+	 */
+
+	q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
+
+	/*
+         * Find out how much room we have left on the line and see how
+         * many characters we can stuff in.  The start of our line
+         * is marked by "linestart", so use that to figure out how
+         * many characters are left out of ENCODELINELIMIT.  Reserve
+         * 2 characters for the end markers and calculate how many
+         * characters we can fit into that space given the base64
+         * encoding expansion.
+	 */
+
+	numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
+
+	if (numencode <= 0) {
+	    advise(NULL, "Internal error: tried to encode %d characters "
+	    	   "in base64", numencode);
+	    return 1;
+	}
+
+	/*
+	 * RFC 2047 prohibits spanning multibyte characters across tokens.
+	 * Right now we only check for UTF-8.
+	 *
+	 * So note the key here ... we want to make sure the character BEYOND
+	 * our last character is not a continuation byte.  If it's the start
+	 * of a new multibyte character or a single-byte character, that's ok.
+	 */
+
+	if (strcasecmp(charset, "UTF-8") == 0) {
+	    /*
+	     * p points to the start of our current buffer, so p + numencode
+	     * is one past the last character to encode
+	     */
+
+	    while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
+	    	numencode--;
+
+	    if (numencode == 0) {
+	    	advise(NULL, "Internal error: could not find start of "
+		       "UTF-8 character when base64 encoding header");
+		return 1;
+	    }
+	}
+
+	if (writeBase64raw((unsigned char *) p, numencode,
+			   (unsigned char *) q) != OK) {
+	    advise(NULL, "Internal error: base64 encoding of header failed");
+	    return 1;
+	}
+
+	p += numencode;
+	q += base64len(numencode);
+
+	/*
+	 * This will point us at the beginning of the new line (trust me).
+	 */
+
+	linestart = q + 3;
+
+	/*
+	 * What's going on here?  Well, we know we're continuing to the next
+	 * line, so we want to add continuation padding.  We also add the
+	 * trailing marker for the RFC 2047 token at this time as well.
+	 * This uses a trick of snprintf(); we tell it to print a zero-length
+	 * string, but pad it out to prefixlen - 1 characters; that ends
+	 * up always printing out the requested number of spaces.  We use
+	 * prefixlen - 1 because we always add a space on the starting
+	 * token marker; this makes things work out correctly for the first
+	 * line, which should have a space between the ':' and the start
+	 * of the token.
+	 *
+	 * It's okay if you don't follow all of that.
+	 */
+
+	q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
+    }
+
+    /*
+     * We're here if there is either no prefix, or we can fit it in less
+     * than ENCODELINELIMIT characters.  Encode the whole thing.
+     */
+
+    outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
+    curlen = q - output;
+
+    output = mh_xrealloc(output, outlen);
+    q = output + curlen;
+
+    q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
+    		  prefixlen ? " " : "", charset);
+
+    if (writeBase64raw((unsigned char *) p, strlen(p),
+    		       (unsigned char *) q) != OK) {
+	advise(NULL, "Internal error: base64 encoding of header failed");
+	return 1;
+    }
+
+    strcat(q, "?=");
+
+    if (prefixlen)
+    	strcat(q, "\n");
+
+    free(*value);
+
+    *value = output;
+
+    return 0;
+}
+
+/*
+ * Calculate the length of a UTF-8 character.
+ *
+ * If it's not a UTF-8 character (or we're in the middle of a multibyte
+ * character) then simply return 0.
+ */
+
+static int
+utf8len(const char *p)
+{
+    int len = 1;
+
+    if (*p == '\0')
+    	return 0;
+
+    if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
+    	return 0;
+
+    p++;
+    while ((((unsigned char) *p++) & 0xc0) == 0x80)
+    	len++;
+
+    return len;
+}
+
+/*
+ * "Unfold" a header, making it a single line (without continuation)
+ *
+ * We cheat a bit here; we never make the string longer, so using the
+ * original length here is fine.
+ */
+
+static void
+unfold_header(char **value, int len)
+{
+    char *str = mh_xmalloc(len + 1);
+    char *p = str, *q = *value;
+
+    while (*q != '\0') {
+    	if (*q == '\n') {
+	    /*
+	     * When we get a newline, skip to the next non-whitespace
+	     * character and add a space to replace all of the whitespace
+	     *
+	     * This has the side effect of stripping off the final newline
+	     * for the header; we put it back in the encoding routine.
+	     */
+	    while (is_fws(*q))
+	    	q++;
+	    if (*q == '\0')
+	    	break;
+
+	    *p++ = ' ';
+	} else {
+	    *p++ = *q++;
+	}
+    }
+
+    *p = '\0';
+
+    free(*value);
+    *value = str;
+}
+
+/*
+ * Decode a header containing addresses.  This means we have to parse
+ * each address and only encode the display-name or comment field.
+ */
+
+static int
+field_encode_address(const char *name, char **value, int encoding,
+		     const char *charset)
+{
+    int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
+    int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
+    int retval;
+    size_t len;
+    char *mp, *cp = NULL, *output = NULL;
+    char *tmpbuf = NULL;
+    size_t tmpbufsize = 0;
+    struct mailname *mn;
+
+    /*
+     * Because these are addresses, we need to handle them individually.
+     *
+     * Break them down and process them one by one.  This means we have to
+     * rewrite the whole header, but that's unavoidable.
+     */
+
+    /*
+     * The output headers always have to start with a space first; this
+     * is just the way the API works right now.
+     */
+
+    output = add(" ", output);
+
+    for (groupflag = 0; (mp = getname(*value)); ) {
+    	if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) {
+	    errflag++;
+	    continue;
+	}
+
+	reformat = 0;
+
+	/*
+	 * We only care if the phrase (m_pers) or any trailing comment
+	 * (m_note) have 8-bit characters.  If doing q-p, we also need
+	 * to encode anything marked as qspecial().  Unquote it first
+	 * so the specialchars count is right.
+	 */
+
+	if (! mn->m_pers)
+	    goto check_note;
+
+	if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
+	    tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
+	}
+
+	unquote_string(mn->m_pers, tmpbuf);
+
+	if (scanstring(tmpbuf, &asciichars, &eightbitchars,
+		       &specialchars)) {
+	    /*
+	     * If we have 8-bit characters, encode it.
+	     */
+
+	    if (encoding == CE_UNKNOWN)
+	    	encoding = pref_encoding(asciichars, specialchars,
+					 eightbitchars);
+
+	    /*
+	     * This is okay, because the output of unquote_string will be either
+	     * equal or shorter than the original.
+	     */
+
+	    strcpy(mn->m_pers, tmpbuf);
+
+	    switch (encoding) {
+
+	    case CE_BASE64:
+	    	retval = field_encode_base64(NULL, &mn->m_pers, charset);
+		break;
+
+	    case CE_QUOTED:
+	    	retval = field_encode_quoted(NULL, &mn->m_pers, charset,
+					     asciichars,
+					     eightbitchars + specialchars, 1);
+		break;
+
+	    default:
+		advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+		errflag++;
+		goto out;
+	    }
+
+	    reformat++;
+	}
+
+	check_note:
+
+	/*
+	 * The "note" field is generally a comment at the end of the address,
+	 * at least as how it's implemented here.  Notes are always surrounded
+	 * by parenthesis (since they're comments).  Strip them out and
+	 * then put them back when we format the final field, but they do
+	 * not get encoded.
+	 */
+
+	if (! mn->m_note)
+	    goto do_reformat;
+
+	len = strlen(mn->m_note);
+
+	if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
+	    tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
+	}
+
+	if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
+	    advise(NULL, "Internal error: Invalid note field \"%s\"",
+	    	   mn->m_note);
+	    errflag++;
+	    goto out;
+	}
+
+	strncpy(tmpbuf, mn->m_note + 1, len - 1);
+	tmpbuf[len - 2] = '\0';
+
+	if (scanstring(tmpbuf, &asciichars, &eightbitchars,
+		       &specialchars)) {
+	    /*
+	     * If we have 8-bit characters, encode it.
+	     */
+
+	    if (encoding == CE_UNKNOWN)
+	    	encoding = pref_encoding(asciichars, specialchars,
+					 eightbitchars);
+
+	    switch (encoding) {
+
+	    case CE_BASE64:
+	    	retval = field_encode_base64(NULL, &tmpbuf, charset);
+		break;
+
+	    case CE_QUOTED:
+	    	retval = field_encode_quoted(NULL, &tmpbuf, charset,
+					     asciichars,
+					     eightbitchars + specialchars, 1);
+		break;
+
+	    default:
+		advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+		errflag++;
+		goto out;
+	    }
+
+	    reformat++;
+
+	    /*
+	     * Make sure the size of tmpbuf is correct (it always gets
+	     * reallocated in the above functions).
+	     */
+
+	    tmpbufsize = strlen(tmpbuf) + 1;
+
+	    /*
+	     * Put the note field back surrounded by parenthesis.
+	     */
+
+	    mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
+
+	    snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
+	}
+
+do_reformat:
+
+	/*
+	 * So, some explanation is in order.
+	 *
+	 * We know we need to rewrite at least one address in the header,
+	 * otherwise we wouldn't be here.  If we had to reformat this
+	 * particular address, then run it through adrformat().  Otherwise
+	 * we can use m_text directly.
+	 */
+
+	/*
+	 * If we were in a group but are no longer, make sure we add a
+	 * semicolon (which needs to be FIRST, as it needs to be at the end
+	 * of the last address).
+	 */
+
+	if (groupflag && ! mn->m_ingrp) {
+	    output = add(";", output);
+	    column += 1;
+	}
+
+	groupflag = mn->m_ingrp;
+
+	if (mn->m_gname) {
+	    cp = add(mn->m_gname, NULL);
+	}
+
+	if (reformat) {
+	    cp = add(adrformat(mn), cp);
+	} else {
+	    cp = add(mn->m_text, cp);
+	}
+
+	len = strlen(cp);
+
+	/*
+	 * If we're not at the beginning of the line, add a command and
+	 * either a space or a newline.
+	 */
+
+	if (column != prefixlen) {
+	    if (len + column + 2 > OUTPUTLINELEN) {
+
+	    	if ((size_t) (prefixlen + 3) < tmpbufsize)
+		    tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
+
+		snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
+		output = add(tmpbuf, output);
+	    } else {
+	    	output = add(", ", output);
+		column += 2;
+	    }
+	}
+
+	/*
+	 * Finally add the address
+	 */
+
+	output = add(cp, output);
+	column += len;
+	free(cp);
+	cp = NULL;
+    }
+
+    /*
+     * Just in case we're at the end of a list
+     */
+
+    if (groupflag) {
+	output = add(";", output);
+    }
+
+    output = add("\n", output);
+
+    free(*value);
+    *value = output;
+    output = NULL;
+
+out:
+
+    if (tmpbuf)
+    	free(tmpbuf);
+    if (output)
+    	free(output);
+
+    return errflag > 0;
+}
+
+/*
+ * Scan a string, check for characters that need to be encoded
+ */
+
+static int
+scanstring(const char *string, int *asciilen, int *eightbitchars,
+	   int *specialchars)
+{
+    *asciilen = 0;
+    *eightbitchars = 0;
+    *specialchars = 0;
+
+    for (; *string != '\0'; string++) {
+    	if ((isascii((unsigned char) *string))) {
+	    (*asciilen)++;
+	    /*
+	     * So, a space is not a valid phrase character, but we're counting
+	     * an exception here, because in q-p a space can be directly
+	     * encoded as an underscore.
+	     */
+	    if (!qphrasevalid((unsigned char) *string) && *string != ' ')
+	    	(*specialchars)++;
+	} else {
+	    (*eightbitchars)++;
+	}
+    }
+
+    return *eightbitchars > 0;
+}
+
+/*
+ * This function is to be used to decide which encoding algorithm we should
+ * use if one is not given.  Basically, we pick whichever one is the shorter
+ * of the two.
+ *
+ * Arguments are:
+ *
+ * ascii	- Number of ASCII characters in to-be-encoded string.
+ * specials	- Number of ASCII characters in to-be-encoded string that
+ *		  still require encoding under quoted-printable.  Note that
+ *		  these are included in the "ascii" total.
+ * eightbit	- Eight-bit characters in the to-be-encoded string.
+ *
+ * Returns one of CE_BASE64 or CE_QUOTED.
+ */
+
+static int
+pref_encoding(int ascii, int specials, int eightbits)
+{
+    /*
+     * The length of the q-p encoding is:
+     *
+     * ascii - specials + (specials + eightbits) * 3.
+     *
+     * The length of the base64 encoding is:
+     *
+     * base64len(ascii + eightbits)	(See macro for details)
+     */
+
+    return base64len(ascii + eightbits) < (ascii - specials +
+    			(specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
+}
diff --git a/sbr/fmt_scan.c b/sbr/fmt_scan.c
index d0bb16df..408b1093 100644
--- a/sbr/fmt_scan.c
+++ b/sbr/fmt_scan.c
@@ -868,32 +868,10 @@ fmt_scan (struct format *format, char *scanl, size_t max, int width, int *dat,
 		/* UNQUOTEs RFC-2822 quoted-string and quoted-pair */
 	case FT_LS_UNQUOTE:
 	    if (str) {	  	
-		int m;
 		strncpy(buffer, str, sizeof(buffer));
 		/* strncpy doesn't NUL-terminate if it fills the buffer */
 		buffer[sizeof(buffer)-1] = '\0';
-		str = buffer;
-	
-		/* we will parse from buffer to buffer2 */
-		n = 0; /* n is the input position in str */
-		m = 0; /* m is the ouput position in buffer2 */
-
-		while ( str[n] != '\0') {
-		    switch ( str[n] ) {
-			case '\\':
-			    n++;
-			    if ( str[n] != '\0')
-				buffer2[m++] = str[n++];
-			    break;
-			case '"':
-			    n++;
-			    break;
-			default:
-			    buffer2[m++] = str[n++];
-			    break;
-			}
-		}
-		buffer2[m] = '\0';
+		unquote_string(buffer, buffer2);
 		str = buffer2;
             }
 	    break;
diff --git a/sbr/unquote.c b/sbr/unquote.c
new file mode 100644
index 00000000..f9fa9112
--- /dev/null
+++ b/sbr/unquote.c
@@ -0,0 +1,45 @@
+/*
+ * unquote.c: Handle quote removal and quoted-pair strings on
+ * RFC 2822-5322 atoms.
+ *
+ * This code is Copyright (c) 2013, by the authors of nmh.  See the
+ * COPYRIGHT file in the root directory of the nmh distribution for
+ * complete copyright information.
+ */
+
+#include <h/mh.h>
+
+/*
+ * Remove quotes (and handle escape strings) from RFC 5322 quoted-strings.
+ *
+ * Since we never add characters to the string, the output buffer is assumed
+ * to have at least as many characters as the input string.
+ *
+ */
+
+void
+unquote_string(const char *input, char *output)
+{
+    int n = 0;	/* n is the position in the input buffer */
+    int m = 0;	/* m is the position in the output buffer */
+
+    while ( input[n] != '\0') {
+	switch ( input[n] ) {
+	case '\\':
+	    n++;
+	    if ( input[n] != '\0')
+		output[m++] = input[n++];
+	    break;
+	case '"':
+	    n++;
+	    break;
+	default:
+	    output[m++] = input[n++];
+	    break;
+	}
+    }
+
+    output[m] = '\0';
+
+    return;
+}
diff --git a/test/format/test-unquote b/test/format/test-unquote
new file mode 100755
index 00000000..ea465b6d
--- /dev/null
+++ b/test/format/test-unquote
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# Test that the unquote function works properly.
+#
+
+if test -z "${MH_OBJ_DIR}"; then
+    srcdir=`dirname "$0"`/../..
+    MH_OBJ_DIR=`cd "$srcdir" && pwd`; export MH_OBJ_DIR
+fi
+
+. "$MH_OBJ_DIR/test/common.sh"
+
+setup_test
+
+#
+# Test various unquoting scenarios
+#
+
+run_test 'eval fmttest -raw -format "%(unquote{text})" "Mr. Foo Bar"' \
+		'Mr. Foo Bar'
+
+run_test 'eval fmttest -raw -format "%(unquote{text})" "Mr. \"Foo\" Bar"' \
+		'Mr. Foo Bar'
+
+#
+# Note here: the string we wanted passed on the command line is:
+#
+# "Mr. \"Foo\" Bar"
+#
+# The extra \ are necessary to get it past the shell
+#
+
+run_test 'eval fmttest -raw -format "%(unquote{text})" "Mr. \\\"Foo\\\" Bar"' \
+		'Mr. "Foo" Bar'
+
+exit ${failed:-0}
diff --git a/test/mhbuild/test-header-encode b/test/mhbuild/test-header-encode
new file mode 100755
index 00000000..a8fb8187
--- /dev/null
+++ b/test/mhbuild/test-header-encode
@@ -0,0 +1,187 @@
+#!/bin/sh
+######################################################
+#
+# Test encoding headers according to RFC 2047
+#
+######################################################
+
+set -e
+
+if test -z "${MH_OBJ_DIR}"; then
+    srcdir=`dirname "$0"`/../..
+    MH_OBJ_DIR=`cd "$srcdir" && pwd`; export MH_OBJ_DIR
+fi
+
+. "${MH_OBJ_DIR}/test/common.sh"
+
+setup_test
+testname="${MH_TEST_DIR}/$$"
+
+#
+# We're going to hardcode UTF-8 for this test.
+#
+
+LC_ALL=en_US.UTF-8; export LC_ALL
+
+#
+# Basic test of encoding a short subject
+#
+cat > "${testname}.basic.actual" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: This is Ã¤ test
+
+This is a test
+EOF
+
+cat > "${testname}.basic.expected" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: =?UTF-8?Q?This_is_=C3=A4_test?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+
+This is a test
+EOF
+
+mhbuild "${testname}.basic.actual"
+
+check "${testname}.basic.actual" "${testname}.basic.expected" 'keep first'
+
+#
+# Make sure we can undo the encoding
+#
+
+run_test 'eval fmttest -outsize max -format "%(decode{subject})" -message -file "${testname}.basic.actual"' 'This is Ã¤ test'
+
+rm -f "${testname}.basic.actual"
+
+#
+# Basic test of encoding a short subject, but with base64
+#
+cat > "${testname}.basic.actual" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: This is Ã¤ test
+
+This is a test
+EOF
+
+cat > "${testname}.basic.expected" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: =?UTF-8?B?VGhpcyBpcyDDpCB0ZXN0?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+
+This is a test
+EOF
+
+mhbuild -headerencoding base64 "${testname}.basic.actual"
+
+check "${testname}.basic.actual" "${testname}.basic.expected" 'keep first'
+
+run_test 'eval fmttest -outsize max -format "%(decode{subject})" -message -file "${testname}.basic.actual"' 'This is Ã¤ test'
+
+rm -f "${testname}.basic.actual"
+
+#
+# Have a subject that will pick base64 as the shorter encoding
+#
+
+cat > "${testname}.autopick.actual" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: This is Ã¤ tÃ«stÂ©
+
+This is a test
+EOF
+
+cat > "${testname}.autopick.expected" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: =?UTF-8?B?VGhpcyBpcyDDpCB0w6tzdMKp?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+
+This is a test
+EOF
+
+mhbuild "${testname}.autopick.actual"
+
+check "${testname}.autopick.actual" "${testname}.autopick.expected" 'keep first'
+
+run_test 'eval fmttest -outsize max -format "%(decode{subject})" -message -file "${testname}.autopick.actual"' 'This is Ã¤ tÃ«stÂ©'
+
+rm -f "${testname}.basic.autopick"
+
+#
+# Tests using longer subject lines.
+#
+
+cat > "${testname}.longsubject1.actual" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: This is Ã¤ tÃ«st of a rather long sÃ¼bject line; will it overflow
+  the line length?  I sure hope thÃ¤t ='s and "'s are encoded properly.  Will
+  they be?
+
+This is a test of a very long subject line.
+EOF
+
+cat > "${testname}.longsubject1.expected" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: =?UTF-8?Q?This_is_=C3=A4_t=C3=ABst_of_a_rather_long_s=C3=BCbject_?=
+         =?UTF-8?Q?line;_will_it_overflow_the_line_length=3F__I_sure_hope_?=
+         =?UTF-8?Q?th=C3=A4t_=3D's_and_"'s_are_encoded_properly.__Will_the?=
+         =?UTF-8?Q?y_be=3F?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+
+This is a test of a very long subject line.
+EOF
+
+mhbuild "${testname}.longsubject1.actual"
+
+check "${testname}.longsubject1.actual" "${testname}.longsubject1.expected" 'keep first'
+
+run_test 'eval fmttest -outsize max -format "%(putlit(decode(trim{subject})))" -message -file "${testname}.longsubject1.actual"' "This is Ã¤ tÃ«st of a rather long sÃ¼bject line; will it overflow the line length?  I sure hope thÃ¤t ='s and \"'s are encoded properly.  Will they be?"
+
+rm -f "${testname}.longsubject1.actual"
+
+#
+# Test a longer line with base64 encoding
+#
+
+cat > "${testname}.longsubject2.actual" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: This is Ã¤ tÃ«st Ã¸f Ã¤ rÃ¤thÃ«r lÃ¸ng sÃ¼bjÃ«ct line; will it Ã¸vÃ«rflÃ¸w
+  the line length?  I sure hope thÃ¤t ='s and "'s are encoded properly.  Will
+  they be?
+
+This is a test of a very long subject line using base64.
+EOF
+
+cat > "${testname}.longsubject2.expected" <<EOF
+From: Mr Foo Bar <foobar@example.com>
+To: Somebody <somebody@example.com>
+Subject: =?UTF-8?B?VGhpcyBpcyDDpCB0w6tzdCDDuGYgw6QgcsOkdGjDq3IgbMO4bmcg?=
+         =?UTF-8?B?c8O8YmrDq2N0IGxpbmU7IHdpbGwgaXQgw7h2w6tyZmzDuHcgdGhl?=
+         =?UTF-8?B?IGxpbmUgbGVuZ3RoPyAgSSBzdXJlIGhvcGUgdGjDpHQgPSdzIGFu?=
+         =?UTF-8?B?ZCAiJ3MgYXJlIGVuY29kZWQgcHJvcGVybHkuICBXaWxsIHRoZXkg?=
+         =?UTF-8?B?YmU/?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+
+This is a test of a very long subject line using base64.
+EOF
+
+mhbuild "${testname}.longsubject2.actual"
+
+check "${testname}.longsubject2.actual" "${testname}.longsubject2.expected" 'keep first'
+
+run_test 'eval fmttest -outsize max -format "%(putlit(decode(trim{subject})))" -message -file "${testname}.longsubject2.actual"' "This is Ã¤ tÃ«st Ã¸f Ã¤ rÃ¤thÃ«r lÃ¸ng sÃ¼bjÃ«ct line; will it Ã¸vÃ«rflÃ¸w the line length?  I sure hope thÃ¤t ='s and \"'s are encoded properly.  Will they be?"
+
+exit ${failed:-0}
diff --git a/uip/fmttest.c b/uip/fmttest.c
index b3deda81..4794c7bf 100644
--- a/uip/fmttest.c
+++ b/uip/fmttest.c
@@ -21,6 +21,8 @@
     X("raw", 0, RAWSW) \
     X("date", 0, DATESW) \
     X("message", 0, MESSAGESW) \
+    X("file", 0, FILESW) \
+    X("nofile", 0, NFILESW) \
     X("-component-name component-text", 0, OTHERSW) \
     X("dupaddrs", 0, DUPADDRSW) \
     X("nodupaddrs", 0, NDUPADDRSW) \
@@ -91,7 +93,10 @@ static void process_raw(struct format *, struct msgs_array *, char *,
 			int, int, int *, struct fmt_callbacks *);
 static void process_messages(struct format *, struct msgs_array *,
 			     struct msgs_array *, char *, char *, int,
-			     int, int *, struct fmt_callbacks *);
+			     int, int, int *, struct fmt_callbacks *);
+static void process_single_file(FILE *, struct msgs_array *, int *, int,
+				struct format *, char *, int, int,
+				struct fmt_callbacks *);
 static void test_trace(void *, struct format *, int, char *, char *);
 static char *test_formataddr(char *, char *);
 static char *test_concataddr(char *, char *);
@@ -112,7 +117,7 @@ main (int argc, char **argv)
     struct comp *cptr;
     struct msgs_array msgs = { 0, 0, NULL }, compargs = { 0, 0, NULL};
     int dump = 0, i;
-    int outputsize = 0, bufsize = 0, dupaddrs = 1, trace = 0;
+    int outputsize = 0, bufsize = 0, dupaddrs = 1, trace = 0, files = 0;
     int colwidth = -1, msgnum = -1, msgcur = -1, msgsize = -1, msgunseen = -1;
     int normalize = AD_HOST;
     enum mode_t mode = MESSAGE;
@@ -222,6 +227,13 @@ main (int argc, char **argv)
 		    defformat = DEFDATEFORMAT;
 		    continue;
 
+		case FILESW:
+		    files++;
+		    continue;
+		case NFILESW:
+		    files = 0;
+		    continue;
+
 		case DUPADDRSW:
 		    dupaddrs++;
 		    continue;
@@ -276,7 +288,7 @@ main (int argc, char **argv)
 	 * Only interpret as a folder if we're in message mode
 	 */
 
-	if (mode == MESSAGE && (*cp == '+' || *cp == '@')) {
+	if (mode == MESSAGE && !files && (*cp == '+' || *cp == '@')) {
 	    if (folder)
 	    	adios (NULL, "only one folder at a time!");
 	    else
@@ -380,7 +392,7 @@ main (int argc, char **argv)
 
     if (mode == MESSAGE) {
     	process_messages(fmt, &compargs, &msgs, buffer, folder, bufsize,
-			 outputsize, dat, cbp);
+			 outputsize, files, dat, cbp);
     } else {
 	if (compargs.size) {
 	    for (i = 0; i < compargs.size; i += 2) {
@@ -488,17 +500,33 @@ process_addresses(struct format *fmt, struct msgs_array *addrs, char *buffer,
 static void
 process_messages(struct format *fmt, struct msgs_array *comps,
 		 struct msgs_array *msgs, char *buffer, char *folder,
-		 int bufsize, int outwidth, int *dat, struct fmt_callbacks *cb)
+		 int bufsize, int outwidth, int files, int *dat,
+		 struct fmt_callbacks *cb)
 {
-    int i, state, msgnum, msgsize = dat[2], num = dat[0], cur = dat[1];
+    int i, msgnum, msgsize = dat[2], num = dat[0], cur = dat[1];
     int num_unseen_seq = 0;
     ivector_t seqnum = ivector_create (0);
-    char *maildir, *cp, name[NAMESZ], rbuf[BUFSIZ];
+    char *maildir, *cp;
     struct msgs *mp;
-    struct comp *c;
     FILE *in;
-    m_getfld_state_t gstate = 0;
-    int bufsz;
+
+    /*
+     * If 'files' is set, short-circuit everything else and just process
+     * everything now.
+     */
+
+    if (files) {
+	for (i = 0; i < msgs->size; i++) {
+	    if ((in = fopen(cp = msgs->msgs[i], "r")) == NULL) {
+	    	admonish(cp, "unable to open file");
+		continue;
+	    }
+	    process_single_file(in, comps, dat, msgsize, fmt, buffer,
+	    			bufsize, outwidth, cb);
+	}
+
+	return;
+    }
 
     if (! folder)
     	folder = getfolder(1);
@@ -558,19 +586,6 @@ process_messages(struct format *fmt, struct msgs_array *comps,
 	    if (cur == -1)
 	    	dat[1] = msgnum == mp->curmsg;
 
-	    /*
-	     * Get our size if we didn't include one
-	     */
-
-	    if (msgsize == -1) {
-	    	struct stat st;
-
-		if (fstat(fileno(in), &st) < 0)
-		    dat[2] = 0;
-		else
-		    dat[2] = st.st_size;
-	    }
-
 	    /*
 	     * Check to see if this is in the unseen sequence
 	     */
@@ -587,69 +602,116 @@ process_messages(struct format *fmt, struct msgs_array *comps,
 	     * Read in the message and process the components
 	     */
 
-	    for (state = FLD;;) {
-	    	bufsz = sizeof(rbuf);
-	    	state = m_getfld(&gstate, name, rbuf, &bufsz, in);
-		switch (state) {
-		case FLD:
-		case FLDPLUS:
-		    i = fmt_addcomptext(name, rbuf);
-		    if (i != -1) {
-		    	while (state == FLDPLUS) {
-			    bufsz = sizeof(rbuf);
-			    state = m_getfld(&gstate, name, rbuf, &bufsz, in);
-			    fmt_appendcomp(i, name, rbuf);
-			}
-		    }
-
-		    while (state == FLDPLUS) {
-		    	bufsz = sizeof(rbuf);
-		    	state = m_getfld(&gstate, name, rbuf, &bufsz, in);
-		    }
-		    break;
+	    process_single_file(in, comps, dat, msgsize, fmt, buffer,
+	    			bufsize, outwidth, cb);
+	}
+    }
 
-		case BODY:
-		    if (fmt_findcomp("body")) {
-			if ((i = strlen(rbuf)) < outwidth) {
-			    bufsz = outwidth - 1;
-			    state = m_getfld(&gstate, name, rbuf + i,
-			    		     &bufsz, in);
-			}
+    ivector_free (seqnum);
+    folder_free(mp);
+    return;
+}
 
-			fmt_addcomptext("body", rbuf);
-		    }
-		    /* fall through */
+/*
+ * Process a single file in message mode
+ */
+
+static void
+process_single_file(FILE *in, struct msgs_array *comps, int *dat, int msgsize,
+		    struct format *fmt, char *buffer, int bufsize,
+		    int outwidth, struct fmt_callbacks *cb)
+{
+    int i, state;
+    char name[NAMESZ], rbuf[BUFSIZ];
+    m_getfld_state_t gstate = 0;
+    struct comp *c;
+    int bufsz;
+
+    /*
+     * Get our size if we didn't include one
+     */
 
-		default:
-		    goto finished;
+    if (msgsize == -1) {
+	struct stat st;
+
+	if (fstat(fileno(in), &st) < 0)
+	    dat[2] = 0;
+	else
+	    dat[2] = st.st_size;
+    }
+
+    /*
+     * Initialize everyting else
+     */
+
+    if (dat[0] == -1)
+    	dat[0] = 0;
+    if (dat[1] == -1)
+    	dat[1] = 0;
+    if (dat[4] == -1)
+    	dat[4] = 0;
+
+    /*
+     * Read in the message and process the components
+     */
+
+    for (state = FLD;;) {
+	bufsz = sizeof(rbuf);
+	state = m_getfld(&gstate, name, rbuf, &bufsz, in);
+	switch (state) {
+	case FLD:
+	case FLDPLUS:
+	    i = fmt_addcomptext(name, rbuf);
+	    if (i != -1) {
+		while (state == FLDPLUS) {
+		    bufsz = sizeof(rbuf);
+		    state = m_getfld(&gstate, name, rbuf, &bufsz, in);
+		    fmt_appendcomp(i, name, rbuf);
 		}
 	    }
-finished:
-	    fclose(in);
-	    m_getfld_state_destroy(&gstate);
 
-	    /*
-	     * Do this now to override any components in the original message
-	     */
-	    if (comps->size) {
-		for (i = 0; i < comps->size; i += 2) {
-		    c = fmt_findcomp(comps->msgs[i]);
-		    if (c) {
-		    	if (c->c_text)
-			    free(c->c_text);
-			c->c_text = getcpy(comps->msgs[i + 1]);
-		    }
+	    while (state == FLDPLUS) {
+		bufsz = sizeof(rbuf);
+		state = m_getfld(&gstate, name, rbuf, &bufsz, in);
+	    }
+	    break;
+
+	case BODY:
+	    if (fmt_findcomp("body")) {
+		if ((i = strlen(rbuf)) < outwidth) {
+		    bufsz = outwidth - 1;
+		    state = m_getfld(&gstate, name, rbuf + i,
+				     &bufsz, in);
 		}
+
+		fmt_addcomptext("body", rbuf);
 	    }
-	    fmt_scan(fmt, buffer, bufsize, outwidth, dat, cb);
-	    fputs(buffer, stdout);
-	    mlistfree();
+	    /* fall through */
+
+	default:
+	    goto finished;
 	}
     }
+finished:
+    fclose(in);
+    m_getfld_state_destroy(&gstate);
 
-    ivector_free (seqnum);
-    folder_free(mp);
-    return;
+    /*
+     * Do this now to override any components in the original message
+     */
+    if (comps->size) {
+	for (i = 0; i < comps->size; i += 2) {
+	    c = fmt_findcomp(comps->msgs[i]);
+	    if (c) {
+		if (c->c_text)
+		    free(c->c_text);
+		c->c_text = getcpy(comps->msgs[i + 1]);
+	    }
+	}
+    }
+    fmt_scan(fmt, buffer, bufsize, outwidth, dat, cb);
+    fputs(buffer, stdout);
+    mlistfree();
 }
 
 /*
diff --git a/uip/mhbuild.c b/uip/mhbuild.c
index 91132ce3..ef459858 100644
--- a/uip/mhbuild.c
+++ b/uip/mhbuild.c
@@ -37,6 +37,8 @@
     X("wcache policy", 0, WCACHESW) \
     X("contentid", 0, CONTENTIDSW) \
     X("nocontentid", 0, NCONTENTIDSW) \
+    X("headerencoding encoding-algorithm", 0, HEADERENCSW) \
+    X("autoheaderencoding", 0, AUTOHEADERENCSW) \
     X("version", 0, VERSIONSW) \
     X("help", 0, HELPSW) \
     X("debug", -5, DEBUGSW) \
@@ -49,6 +51,17 @@ DEFINE_SWITCH_ENUM(MHBUILD);
 DEFINE_SWITCH_ARRAY(MHBUILD, switches);
 #undef X
 
+#define MIMEENCODING_SWITCHES \
+    X("base64", 0, BASE64SW) \
+    X("quoted-printable", 0, QUOTEDPRINTSW) \
+
+#define X(sw, minchars, id) id,
+DEFINE_SWITCH_ENUM(MIMEENCODING);
+#undef X
+
+#define X(sw, minchars, id) { sw, minchars, id },
+DEFINE_SWITCH_ARRAY(MIMEENCODING, encodingswitches);
+#undef X
 
 /* mhbuildsbr.c */
 extern char *tmp;	/* directory to place temp files */
@@ -78,7 +91,6 @@ static int unlink_outfile = 0;
 static void unlink_done (int) NORETURN;
 
 /* mhbuildsbr.c */
-CT build_mime (char *, int);
 int output_message (CT, char *);
 int output_message_fp (CT, FILE *, char*);
 
@@ -97,6 +109,7 @@ main (int argc, char **argv)
     CT ct, cts[2];
     FILE *fp = NULL;
     FILE *fp_out = NULL;
+    int header_encoding = CE_UNKNOWN;
 
     done=unlink_done;
 
@@ -205,6 +218,33 @@ main (int argc, char **argv)
 		contentidsw = 0;
 		continue;
 
+	    case HEADERENCSW: {
+	    	int encoding;
+
+		if (!(cp = *argp++) || *cp == '-')
+		    adios (NULL, "missing argument to %s", argp[-2]);
+		switch (encoding = smatch (cp, encodingswitches)) {
+		case AMBIGSW:
+		    ambigsw (cp, encodingswitches);
+		    done (1);
+		case UNKWNSW:
+		    adios (NULL, "%s unknown encoding algorithm", cp);
+		case BASE64SW:
+		    header_encoding = CE_BASE64;
+		    break;
+		case QUOTEDPRINTSW:
+		    header_encoding = CE_QUOTED;
+		    break;
+		default:
+		    adios (NULL, "Internal error: algorithm %s", cp);
+		}
+		continue;
+	    }
+
+	    case AUTOHEADERENCSW:
+	    	header_encoding = CE_UNKNOWN;
+		continue;
+
 	    case VERBSW: 
 		verbosw++;
 		continue;
@@ -280,7 +320,7 @@ main (int argc, char **argv)
 	unlink_infile = 1;
 
 	/* build the content structures for MIME message */
-	ct = build_mime (infile, directives);
+	ct = build_mime (infile, directives, header_encoding);
 	cts[0] = ct;
 	cts[1] = NULL;
 
@@ -314,7 +354,7 @@ main (int argc, char **argv)
      */
 
     /* build the content structures for MIME message */
-    ct = build_mime (compfile, directives);
+    ct = build_mime (compfile, directives, header_encoding);
     cts[0] = ct;
     cts[1] = NULL;
 
diff --git a/uip/mhbuildsbr.c b/uip/mhbuildsbr.c
index 8d3a76df..e4499170 100644
--- a/uip/mhbuildsbr.c
+++ b/uip/mhbuildsbr.c
@@ -69,11 +69,6 @@ int find_cache (CT, int, int *, char *, char *, int);
 void free_ctinfo (CT);
 void free_encoding (CT, int);
 
-/*
- * prototypes
- */
-CT build_mime (char *, int);
-
 /*
  * static prototypes
  */
@@ -128,7 +123,7 @@ static void directive_pop(void)
  */
 
 CT
-build_mime (char *infile, int directives)
+build_mime (char *infile, int directives, int header_encoding)
 {
     int	compnum, state;
     char buf[BUFSIZ], name[NAMESZ];
@@ -137,6 +132,7 @@ build_mime (char *infile, int directives)
     struct part **pp;
     CT ct;
     FILE *in;
+    HF hp;
     m_getfld_state_t gstate = 0;
 
     directive_init(directives);
@@ -227,6 +223,17 @@ finish_field:
     }
     m_getfld_state_destroy (&gstate);
 
+    /*
+     * Iterate through the list of headers and call the function to MIME-ify
+     * them if required.
+     */
+
+    for (hp = ct->c_first_hf; hp != NULL; hp = hp->next) {
+    	if (encode_rfc2047(hp->name, &hp->value, header_encoding, NULL)) {
+	    adios(NULL, "Unable to encode header \"%s\"", hp->name);
+	}
+    }
+
     /*
      * Now add the MIME-Version header field
      * to the list of header fields.
diff --git a/uip/post.c b/uip/post.c
index 9bbaa5f5..679ae6df 100644
--- a/uip/post.c
+++ b/uip/post.c
@@ -223,7 +223,6 @@ static char from[BUFSIZ];	/* my network address            */
 static char sender[BUFSIZ];	/* my Sender: header		 */
 static char efrom[BUFSIZ];	/* my Envelope-From: header	 */
 static char fullfrom[BUFSIZ];	/* full contents of From header  */
-static char signature[BUFSIZ];	/* my signature                  */
 static char *filter = NULL;	/* the filter for BCC'ing        */
 static char *subject = NULL;	/* the subject field for BCC'ing */
 static char *fccfold[FCCS];	/* foldernames for FCC'ing       */
@@ -934,9 +933,6 @@ putfmt (char *name, char *str, FILE *out)
 static void
 start_headers (void)
 {
-    char  *cp, sigbuf[BUFSIZ];
-    struct mailname *mp;
-
     time (&tclock);
 
     /*
@@ -947,21 +943,6 @@ start_headers (void)
     efrom[0] = '\0';
     sender[0] = '\0';
     fullfrom[0] = '\0';
-
-    if ((cp = getfullname ()) && *cp) {
-	strncpy (sigbuf, cp, sizeof(sigbuf));
-	snprintf (signature, sizeof(signature), "%s <%s>",
-		sigbuf, getlocaladdr());
-	if ((cp = getname (signature)) == NULL)
-	    adios (NULL, "getname () failed -- you lose extraordinarily big");
-	if ((mp = getm (cp, NULL, 0, AD_HOST, NULL)) == NULL)
-	    adios (NULL, "bad signature '%s'", sigbuf);
-	mnfree (mp);
-	while (getname (""))
-	    continue;
-    } else {
-	strncpy (signature, getlocaladdr(), sizeof(signature));
-    }
 }