From: Ken Hornstein <kenh@pobox.com>
Date: Wed, 23 Oct 2013 18:40:02 +0000 (-0400)
Subject: A little tiny bit closer to working.
X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/f63b238c3fa2af9db08be8ec72e0e62e6a3842ad?ds=sidebyside;hp=a6d1640cf0599d8dfb69a58f1acc597bd6155db5

A little tiny bit closer to working.
---

diff --git a/Makefile.am b/Makefile.am
index 78cce559..28d49047 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -520,6 +520,7 @@ sbr_libmh_a_SOURCES = sbr/addrsbr.c sbr/ambigsw.c sbr/atooi.c sbr/arglist.c \
 		      sbr/copy.c sbr/copyip.c sbr/cpydata.c \
 		      sbr/cpydgst.c sbr/crawl_folders.c sbr/credentials.c \
 		      sbr/discard.c sbr/done.c sbr/dtimep.l sbr/dtime.c \
+		      sbr/encode_rfc2047.c \
 		      sbr/escape_addresses.c \
 		      sbr/error.c sbr/ext_hook.c sbr/fdcompare.c \
 		      sbr/folder_addmsg.c sbr/folder_delmsgs.c \
diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c
index 0160aa91..0f040bc9 100644
--- a/sbr/encode_rfc2047.c
+++ b/sbr/encode_rfc2047.c
@@ -7,12 +7,39 @@
  */
 
 #include <h/mh.h>
+#include <h/mhparse.h>
+#include <h/utils.h>
 
 /*
  * List of headers that contain addresses and as a result require special
  * handling
  */
 
+static char *address_headers[] = {
+    "To",
+    "From",
+    "cc",
+    "Bcc",
+    "Reply-To",
+    "Sender",
+    "Resent-To",
+    "Resent-From",
+    "Resent-cc",
+    "Resent-Bcc",
+    "Resent-Reply-To",
+    "Resent-Sender",
+    NULL,
+};
+
+/*
+ * Macros we use for parsing headers
+ */
+
+#define is_fws(c) (c == '\t' || c == ' ')
+
+static void unfold_header(char **, int);
+static int field_encode_address(const char *, char **, int, const char *);
+
 /*
  * Encode a message header using RFC 2047 encoding.  We make the assumption
  * that all characters < 128 are ASCII and as a consequence don't need any
@@ -23,6 +50,7 @@ int
 encode_rfc2047(const char *name, char **value, int encoding,
 	       const char *charset)
 {
+    int i, count = 0, len;
     char *p;
 
     /*
@@ -30,13 +58,13 @@ encode_rfc2047(const char *name, char **value, int encoding,
      */
 
     for (p = *value; *p != '\0'; p++) {
-	if (! isascii((int) *p)
-	    goto encode;
+	if (! isascii((int) *p))
+	count++;
     }
 
-    return 0;
+    if (count == 0)
+    	return 0;
 
-encode:
     /*
      * Some rules from RFC 2047:
      *
@@ -47,3 +75,102 @@ encode:
      * we need to actually _parse_ email addresses and only encode
      * the right bits.  
      */
+
+    /*
+     * If charset was NULL, then get the value from the locale.  But
+     * we reject it if it returns US-ASCII
+     */
+
+    if (charset == NULL)
+    	charset = write_charset_8bit();
+
+    if (strcasecmp(charset, "US-ASCII") == 0) {
+    	advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
+	return 1;
+    }
+
+    /*
+     * If we have an address header, then we need to parse the addresses
+     * and only encode the names or comments.  Otherwise, handle it normally.
+     */
+
+    for (i = 0; address_headers[i]; i++) {
+    	if (strcasecmp(name, address_headers[i]) == 0)
+	    return field_encode_address(name, value, encoding, charset);
+    }
+
+    /*
+     * On the encoding we choose, and the specifics of encoding:
+     *
+     * - If a specified encoding is passed in, we use that.
+     * - If more than 50% of the characters are high-bit, we use base64
+     *   and encode the whole field as one atom (possibly split).
+     *   Otherwise, we use quoted-printable.
+     * - If more than 10% of the characters are high-bit, then we encode
+     *   the entire header as one (possibly split) atom.  Otherwise,
+     *   take each atom as they come and encode it on a per-atom basis.
+     */
+
+    len = strlen(*value);
+
+    if (encoding == CE_UNKNOWN)
+    	encoding = (count * 10 / len > 5) ? CE_BASE64 : CE_QUOTED;
+
+    switch (encoding) {
+
+    case CE_BASE64:
+    	return field_encode_base64(value, charset, len, NULL);
+
+    case CE_QUOTED:
+    	if (count * 100 / len > 10) {
+	    return field_encode_quoted(value, charset, len, NULL);
+	} else {
+	    /*
+	     * Break it down by atoms.
+	     */
+
+	    unfold_header(value, len);
+	}
+    default:
+    	advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+	return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * "Unfold" a header, making it a single line (without continuation)
+ *
+ * We cheat a bit here; we never make the string longer, so using the
+ * original length here is fine.
+ */
+
+static void
+unfold_header(char **value, int len)
+{
+    char *str = mh_xmalloc(len + 1);
+    char *p = str, *q = *value;
+
+    while (*q != '\0') {
+    	if (*q == '\n') {
+	    /*
+	     * When we get a newline, skip to the next non-whitespace
+	     * character and add a space to replace all of the whitespace
+	     */
+	    while (is_fws(*q))
+	    	q++;
+	    if (*q == '\0')
+	    	break;
+
+	    *p++ = ' ';
+	} else {
+	    *p++ = *q++;
+	}
+    }
+
+    *p = '\0';
+
+    free(*value);
+    *value = str;
+}