X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/dd4503c862644d4dbc45cf97db64c2c1ac001f8d..23816efd3e8e294e8f1c1ce811ac3a1cd15d334e:/sbr/encode_rfc2047.c

diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c
index 3c4f45e5..bb45b5c1 100644
--- a/sbr/encode_rfc2047.c
+++ b/sbr/encode_rfc2047.c
@@ -1,15 +1,19 @@
-/*
- * Routines to encode message headers using RFC 2047-encoding.
+/* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
  *
  * This code is Copyright (c) 2002, by the authors of nmh.  See the
  * COPYRIGHT file in the root directory of the nmh distribution for
  * complete copyright information.
  */
 
-#include <h/mh.h>
-#include <h/mhparse.h>
-#include <h/addrsbr.h>
-#include <h/utils.h>
+#include "h/mh.h"
+#include "encode_rfc2047.h"
+#include "check_charset.h"
+#include "error.h"
+#include "h/mhparse.h"
+#include "h/addrsbr.h"
+#include "h/utils.h"
+#include "base64.h"
+#include "unquote.h"
 
 /*
  * List of headers that contain addresses and as a result require special
@@ -79,13 +83,13 @@ encode_rfc2047(const char *name, char **value, int encoding,
 	if (isascii((unsigned char) *p)) {
 	    asciicount++;
 	    if (qpspecial((unsigned char) *p))
-	    	qpspecialcount++;
+		qpspecialcount++;
 	} else
 	    eightbitcount++;
     }
 
     if (eightbitcount == 0)
-    	return 0;
+	return 0;
 
     /*
      * Some rules from RFC 2047:
@@ -104,10 +108,10 @@ encode_rfc2047(const char *name, char **value, int encoding,
      */
 
     if (charset == NULL)
-    	charset = write_charset_8bit();
+	charset = write_charset_8bit();
 
     if (strcasecmp(charset, "US-ASCII") == 0) {
-    	advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
+	inform("Cannot use US-ASCII with 8 bit characters in header");
 	return 1;
     }
 
@@ -117,7 +121,7 @@ encode_rfc2047(const char *name, char **value, int encoding,
      */
 
     for (i = 0; address_headers[i]; i++) {
-    	if (strcasecmp(name, address_headers[i]) == 0)
+	if (strcasecmp(name, address_headers[i]) == 0)
 	    return field_encode_address(name, value, encoding, charset);
     }
 
@@ -125,28 +129,28 @@ encode_rfc2047(const char *name, char **value, int encoding,
      * On the encoding we choose, and the specifics of encoding:
      *
      * - If a specified encoding is passed in, we use that.
-     * - If more than 50% of the characters are high-bit, we use base64
-     *   and encode the whole field as one atom (possibly split).
-     * - Otherwise, we use quoted-printable.
+     * - Otherwise, pick which encoding is shorter.
+     *
+     * We don't quite handle continuation right here, but it should be
+     * pretty close.
      */
 
     if (encoding == CE_UNKNOWN)
-    	encoding = (eightbitcount * 10 / (asciicount + eightbitcount) > 5) ?
-						CE_BASE64 : CE_QUOTED;
+        encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
 
     unfold_header(value, asciicount + eightbitcount);
 
     switch (encoding) {
 
     case CE_BASE64:
-    	return field_encode_base64(name, value, charset);
+	return field_encode_base64(name, value, charset);
 
     case CE_QUOTED:
 	return field_encode_quoted(name, value, charset, asciicount,
 				   eightbitcount + qpspecialcount, 0);
 
     default:
-    	advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+	inform("Internal error: unknown RFC-2047 encoding type");
 	return 1;
     }
 }
@@ -159,9 +163,9 @@ static int
 field_encode_quoted(const char *name, char **value, const char *charset,
 		    int ascii, int encoded, int phraserules)
 {
-    int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
+    int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column;
     int charsetlen = strlen(charset), utf8;
-    char *output = NULL, *p, *q;
+    char *output = NULL, *p, *q = NULL;
 
     /*
      * Right now we just encode the whole thing.  Maybe later on we'll
@@ -174,11 +178,12 @@ field_encode_quoted(const char *name, char **value, const char *charset,
 
     utf8 = strcasecmp(charset, "UTF-8") == 0;
 
+    bool newline = true;
     while (*p != '\0') {
-    	/*
+	/*
 	 * Start a new line, if it's time
 	 */
-    	if (newline) {
+	if (newline) {
 	    /*
 	     * If it's the start of the header, we don't need to pad it
 	     *
@@ -210,12 +215,12 @@ field_encode_quoted(const char *name, char **value, const char *charset,
 		for (i = 0; i < prefixlen; i++)
 		    *q++ = ' ';
 	    } else {
-	    	/*
+		/*
 		 * A bit of a hack here; the header can contain multiple
 		 * spaces (probably at least one) until we get to the
 		 * actual text.  Copy until we get to a non-space.
 		 */
-	    	output = mh_xmalloc(outlen);
+		output = mh_xmalloc(outlen);
 		q = output;
 		while (is_fws(*p))
 		    *q++ = *p++;
@@ -224,7 +229,7 @@ field_encode_quoted(const char *name, char **value, const char *charset,
 	    tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
 	    q += tokenlen;
 	    column = prefixlen + tokenlen;
-	    newline = 0;
+	    newline = false;
 	}
 
 	/*
@@ -241,7 +246,7 @@ field_encode_quoted(const char *name, char **value, const char *charset,
 	    ascii--;
 	} else if (isascii((unsigned char) *p) &&
 		   (phraserules ? qphrasevalid((unsigned char) *p) :
-		   			!qpspecial((unsigned char) *p))) {
+					!qpspecial((unsigned char) *p))) {
 	    *q++ = *p;
 	    ascii--;
 	} else {
@@ -265,7 +270,7 @@ field_encode_quoted(const char *name, char **value, const char *charset,
 	    continue;
 
 	if (column >= ENCODELINELIMIT - 2) {
-	    newline = 1;
+	    newline = true;
 	} else if (utf8) {
 	    /*
 	     * Okay, this is a bit weird, but to explain a bit more ...
@@ -281,15 +286,24 @@ field_encode_quoted(const char *name, char **value, const char *charset,
 	     * allow for the encoded output.
 	     */
 	    if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
-	    	newline = 1;
+		newline = true;
 	    }
 	}
     }
 
-    strcat(q, "?=");
+    if (q == NULL) {
+	/* This should never happen, but just in case.  Found by
+	   clang static analyzer. */
+	inform("null output encoding for %s, continuing...", *value);
+	return 1;
+    }
+    *q++ = '?';
+    *q++ = '=';
 
     if (prefixlen)
-    	strcat(q, "\n");
+	*q++ = '\n';
+
+    *q = '\0';
 
     free(*value);
 
@@ -310,14 +324,14 @@ field_encode_base64(const char *name, char **value, const char *charset)
 {
     int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
     int outlen = 0, numencode, curlen;
-    char *output = NULL, *p = *value, *q = NULL, *linestart;
+    char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
 
     /*
      * Skip over any leading white space.
      */
 
     while (*p == ' ' || *p == '\t')
-    	p++;
+	p++;
 
     /*
      * If we had a zero-length prefix, then just encode the whole field
@@ -329,7 +343,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
      */
 
     while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
-    			  prefixlen) > ENCODELINELIMIT)) {
+			  prefixlen) > ENCODELINELIMIT)) {
 
 	/*
 	 * Our very first time, don't pad the line in the front
@@ -372,8 +386,8 @@ field_encode_base64(const char *name, char **value, const char *charset)
 	numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
 
 	if (numencode <= 0) {
-	    advise(NULL, "Internal error: tried to encode %d characters "
-	    	   "in base64", numencode);
+	    inform("Internal error: tried to encode %d characters "
+		   "in base64", numencode);
 	    return 1;
 	}
 
@@ -393,10 +407,10 @@ field_encode_base64(const char *name, char **value, const char *charset)
 	     */
 
 	    while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
-	    	numencode--;
+		numencode--;
 
 	    if (numencode == 0) {
-	    	advise(NULL, "Internal error: could not find start of "
+		inform("Internal error: could not find start of "
 		       "UTF-8 character when base64 encoding header");
 		return 1;
 	    }
@@ -404,7 +418,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
 
 	if (writeBase64raw((unsigned char *) p, numencode,
 			   (unsigned char *) q) != OK) {
-	    advise(NULL, "Internal error: base64 encoding of header failed");
+	    inform("Internal error: base64 encoding of header failed");
 	    return 1;
 	}
 
@@ -447,18 +461,18 @@ field_encode_base64(const char *name, char **value, const char *charset)
     q = output + curlen;
 
     q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
-    		  prefixlen ? " " : "", charset);
+		  prefixlen ? " " : "", charset);
 
     if (writeBase64raw((unsigned char *) p, strlen(p),
-    		       (unsigned char *) q) != OK) {
-	advise(NULL, "Internal error: base64 encoding of header failed");
+		       (unsigned char *) q) != OK) {
+	inform("Internal error: base64 encoding of header failed");
 	return 1;
     }
 
     strcat(q, "?=");
 
     if (prefixlen)
-    	strcat(q, "\n");
+	strcat(q, "\n");
 
     free(*value);
 
@@ -480,14 +494,14 @@ utf8len(const char *p)
     int len = 1;
 
     if (*p == '\0')
-    	return 0;
+	return 0;
 
     if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
-    	return 0;
+	return 0;
 
     p++;
     while ((((unsigned char) *p++) & 0xc0) == 0x80)
-    	len++;
+	len++;
 
     return len;
 }
@@ -506,7 +520,7 @@ unfold_header(char **value, int len)
     char *p = str, *q = *value;
 
     while (*q != '\0') {
-    	if (*q == '\n') {
+	if (*q == '\n') {
 	    /*
 	     * When we get a newline, skip to the next non-whitespace
 	     * character and add a space to replace all of the whitespace
@@ -514,10 +528,10 @@ unfold_header(char **value, int len)
 	     * This has the side effect of stripping off the final newline
 	     * for the header; we put it back in the encoding routine.
 	     */
-	    while (is_fws(*q++))
-	    	;
+	    while (is_fws(*q))
+		q++;
 	    if (*q == '\0')
-	    	break;
+		break;
 
 	    *p++ = ' ';
 	} else {
@@ -541,13 +555,17 @@ field_encode_address(const char *name, char **value, int encoding,
 		     const char *charset)
 {
     int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
-    int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
-    int retval;
+    int asciichars;
+    int specialchars;
+    int eightbitchars;
+    bool reformat = false;
+    bool errflag = false;
     size_t len;
-    char *mp, *output = NULL;
+    char *mp, *cp = NULL, *output = NULL;
     char *tmpbuf = NULL;
     size_t tmpbufsize = 0;
     struct mailname *mn;
+    char errbuf[BUFSIZ];
 
     /*
      * Because these are addresses, we need to handle them individually.
@@ -564,11 +582,14 @@ field_encode_address(const char *name, char **value, int encoding,
     output = add(" ", output);
 
     for (groupflag = 0; (mp = getname(*value)); ) {
-    	if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) {
-	    errflag++;
+	if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
+	    inform("%s: %s", errbuf, mp);
+	    errflag = true;
 	    continue;
 	}
 
+	reformat = false;
+
 	/*
 	 * We only care if the phrase (m_pers) or any trailing comment
 	 * (m_note) have 8-bit characters.  If doing q-p, we also need
@@ -592,7 +613,7 @@ field_encode_address(const char *name, char **value, int encoding,
 	     */
 
 	    if (encoding == CE_UNKNOWN)
-	    	encoding = pref_encoding(asciichars, specialchars,
+		encoding = pref_encoding(asciichars, specialchars,
 					 eightbitchars);
 
 	    /*
@@ -605,22 +626,27 @@ field_encode_address(const char *name, char **value, int encoding,
 	    switch (encoding) {
 
 	    case CE_BASE64:
-	    	retval = field_encode_base64(NULL, &mn->m_pers, charset);
+		if (field_encode_base64(NULL, &mn->m_pers, charset)) {
+		    errflag = true;
+		    goto out;
+		}
 		break;
 
 	    case CE_QUOTED:
-	    	retval = field_encode_quoted(NULL, &mn->m_pers, charset,
-					     asciichars,
-					     eightbitchars + specialchars, 1);
+		if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
+					eightbitchars + specialchars, 1)) {
+		    errflag = true;
+		    goto out;
+		}
 		break;
 
 	    default:
-		advise(NULL, "Internal error: unknown RFC-2047 encoding type");
-		errflag++;
+		inform("Internal error: unknown RFC-2047 encoding type");
+		errflag = true;
 		goto out;
 	    }
 
-	    reformat++;
+	    reformat = true;
 	}
 
 	check_note:
@@ -636,16 +662,14 @@ field_encode_address(const char *name, char **value, int encoding,
 	if (! mn->m_note)
 	    goto do_reformat;
 
-	len = strlen(mn->m_note);
-
-	if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
+	if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
 	    tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 	}
 
 	if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
-	    advise(NULL, "Internal error: Invalid note field \"%s\"",
-	    	   mn->m_note);
-	    errflag++;
+	    inform("Internal error: Invalid note field \"%s\"",
+		   mn->m_note);
+	    errflag = true;
 	    goto out;
 	}
 
@@ -659,39 +683,134 @@ field_encode_address(const char *name, char **value, int encoding,
 	     */
 
 	    if (encoding == CE_UNKNOWN)
-	    	encoding = pref_encoding(asciichars, specialchars,
+		encoding = pref_encoding(asciichars, specialchars,
 					 eightbitchars);
 
 	    switch (encoding) {
 
 	    case CE_BASE64:
-	    	retval = field_encode_base64(NULL, &tmpbuf, charset);
+		if (field_encode_base64(NULL, &tmpbuf, charset)) {
+		    errflag = true;
+		    goto out;
+		}
 		break;
 
 	    case CE_QUOTED:
-	    	retval = field_encode_quoted(NULL, &tmpbuf, charset,
-					     asciichars,
-					     eightbitchars + specialchars, 1);
+		if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
+					eightbitchars + specialchars, 1)) {
+		    errflag = true;
+		    goto out;
+		}
 		break;
 
 	    default:
-		advise(NULL, "Internal error: unknown RFC-2047 encoding type");
-		errflag++;
+		inform("Internal error: unknown RFC-2047 encoding type");
+		errflag = true;
 		goto out;
 	    }
 
-	    reformat++;
+	    reformat = true;
+
+	    /*
+	     * Make sure the size of tmpbuf is correct (it always gets
+	     * reallocated in the above functions).
+	     */
+
+	    tmpbufsize = strlen(tmpbuf) + 1;
+
+	    /*
+	     * Put the note field back surrounded by parenthesis.
+	     */
+
+	    mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
+
+	    snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
+	}
+
+do_reformat:
+
+	/*
+	 * So, some explanation is in order.
+	 *
+	 * We know we need to rewrite at least one address in the header,
+	 * otherwise we wouldn't be here.  If we had to reformat this
+	 * particular address, then run it through adrformat().  Otherwise
+	 * we can use m_text directly.
+	 */
+
+	/*
+	 * If we were in a group but are no longer, make sure we add a
+	 * semicolon (which needs to be FIRST, as it needs to be at the end
+	 * of the last address).
+	 */
+
+	if (groupflag && ! mn->m_ingrp) {
+	    output = add(";", output);
+	    column++;
 	}
 
+	groupflag = mn->m_ingrp;
 
+	if (mn->m_gname) {
+	    cp = mh_xstrdup(mn->m_gname);
+	}
+
+	if (reformat) {
+	    cp = add(adrformat(mn), cp);
+	} else {
+	    cp = add(mn->m_text, cp);
+	}
+
+	len = strlen(cp);
+
+	/*
+	 * If we're not at the beginning of the line, add a command and
+	 * either a space or a newline.
+	 */
+
+	if (column != prefixlen) {
+	    if (len + column + 2 > OUTPUTLINELEN) {
+
+		if ((size_t) (prefixlen + 3) < tmpbufsize)
+		    tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
+
+		snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
+		output = add(tmpbuf, output);
+	    } else {
+		output = add(", ", output);
+		column += 2;
+	    }
+	}
+
+	/*
+	 * Finally add the address
+	 */
+
+	output = add(cp, output);
+	column += len;
+	free(cp);
+	cp = NULL;
     }
 
-out:
+    /*
+     * Just in case we're at the end of a list
+     */
+
+    if (groupflag) {
+	output = add(";", output);
+    }
 
-    if (tmpbuf)
-    	free(tmpbuf);
+    output = add("\n", output);
 
-    return errflag > 0;
+    free(*value);
+    *value = output;
+    output = NULL;
+
+out:
+    free(tmpbuf);
+    free(output);
+
+    return errflag;
 }
 
 /*
@@ -707,10 +826,15 @@ scanstring(const char *string, int *asciilen, int *eightbitchars,
     *specialchars = 0;
 
     for (; *string != '\0'; string++) {
-    	if ((isascii((unsigned char) *string))) {
+	if ((isascii((unsigned char) *string))) {
 	    (*asciilen)++;
-	    if (!qphrasevalid((unsigned char) *string))
-	    	(*specialchars)++;
+	    /*
+	     * So, a space is not a valid phrase character, but we're counting
+	     * an exception here, because in q-p a space can be directly
+	     * encoded as an underscore.
+	     */
+	    if (!qphrasevalid((unsigned char) *string) && *string != ' ')
+		(*specialchars)++;
 	} else {
 	    (*eightbitchars)++;
 	}
@@ -749,5 +873,5 @@ pref_encoding(int ascii, int specials, int eightbits)
      */
 
     return base64len(ascii + eightbits) < (ascii - specials +
-    			(specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
+			(specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
 }