-/*
- * Routines to encode message headers using RFC 2047-encoding.
+/* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
*
* This code is Copyright (c) 2002, by the authors of nmh. See the
* COPYRIGHT file in the root directory of the nmh distribution for
static int field_encode_base64(const char *, char **, const char *);
static int scanstring(const char *, int *, int *, int *);
static int utf8len(const char *);
+static int pref_encoding(int, int, int);
/*
* Encode a message header using RFC 2047 encoding. We make the assumption
charset = write_charset_8bit();
if (strcasecmp(charset, "US-ASCII") == 0) {
- advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
+ inform("Cannot use US-ASCII with 8 bit characters in header");
return 1;
}
* On the encoding we choose, and the specifics of encoding:
*
* - If a specified encoding is passed in, we use that.
- * - If more than 50% of the characters are high-bit, we use base64
- * and encode the whole field as one atom (possibly split).
- * - Otherwise, we use quoted-printable.
+ * - Otherwise, pick which encoding is shorter.
+ *
+ * We don't quite handle continuation right here, but it should be
+ * pretty close.
*/
if (encoding == CE_UNKNOWN)
- encoding = (eightbitcount * 10 / (asciicount + eightbitcount) > 5) ?
- CE_BASE64 : CE_QUOTED;
+ encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
unfold_header(value, asciicount + eightbitcount);
eightbitcount + qpspecialcount, 0);
default:
- advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+ inform("Internal error: unknown RFC-2047 encoding type");
return 1;
}
}
{
int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
int charsetlen = strlen(charset), utf8;
- char *output = NULL, *p, *q;
+ char *output = NULL, *p, *q = NULL;
/*
* Right now we just encode the whole thing. Maybe later on we'll
* allow for the encoded output.
*/
if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
- newline = 1;
+ newline = 1;
}
}
}
- strcat(q, "?=");
+ if (q == NULL) {
+ /* This should never happen, but just in case. Found by
+ clang static analyzer. */
+ inform("null output encoding for %s, continuing...", *value);
+ return 1;
+ }
+ *q++ = '?';
+ *q++ = '=';
if (prefixlen)
- strcat(q, "\n");
+ *q++ = '\n';
+
+ *q = '\0';
free(*value);
{
int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
int outlen = 0, numencode, curlen;
- char *output = NULL, *p = *value, *q = NULL, *linestart;
+ char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
/*
* Skip over any leading white space.
numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
if (numencode <= 0) {
- advise(NULL, "Internal error: tried to encode %d characters "
+ inform("Internal error: tried to encode %d characters "
"in base64", numencode);
return 1;
}
numencode--;
if (numencode == 0) {
- advise(NULL, "Internal error: could not find start of "
+ inform("Internal error: could not find start of "
"UTF-8 character when base64 encoding header");
return 1;
}
if (writeBase64raw((unsigned char *) p, numencode,
(unsigned char *) q) != OK) {
- advise(NULL, "Internal error: base64 encoding of header failed");
+ inform("Internal error: base64 encoding of header failed");
return 1;
}
if (writeBase64raw((unsigned char *) p, strlen(p),
(unsigned char *) q) != OK) {
- advise(NULL, "Internal error: base64 encoding of header failed");
+ inform("Internal error: base64 encoding of header failed");
return 1;
}
* This has the side effect of stripping off the final newline
* for the header; we put it back in the encoding routine.
*/
- while (is_fws(*q++))
- ;
+ while (is_fws(*q))
+ q++;
if (*q == '\0')
break;
field_encode_address(const char *name, char **value, int encoding,
const char *charset)
{
- int prefixlen = strlen(name) + 2, column = prefixlen, groupflag, errflag;
- int asciichars, specialchars, eightbitchars, reformat, len;
- char *mp, *output = NULL;
+ int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
+ int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
+ size_t len;
+ char *mp, *cp = NULL, *output = NULL;
char *tmpbuf = NULL;
size_t tmpbufsize = 0;
struct mailname *mn;
+ char errbuf[BUFSIZ];
/*
* Because these are addresses, we need to handle them individually.
output = add(" ", output);
- for (groupflag = 0; mp = getname(*value); ) {
- if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) {
+ for (groupflag = 0; (mp = getname(*value)); ) {
+ if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
+ inform("%s: %s", errbuf, mp);
errflag++;
continue;
}
+ reformat = 0;
+
/*
* We only care if the phrase (m_pers) or any trailing comment
* (m_note) have 8-bit characters. If doing q-p, we also need
* so the specialchars count is right.
*/
+ if (! mn->m_pers)
+ goto check_note;
+
if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
}
if (scanstring(tmpbuf, &asciichars, &eightbitchars,
&specialchars)) {
- /*
- * If we have 8-bit characters, encode it.
- */
+ /*
+ * If we have 8-bit characters, encode it.
+ */
+
+ if (encoding == CE_UNKNOWN)
+ encoding = pref_encoding(asciichars, specialchars,
+ eightbitchars);
+
+ /*
+ * This is okay, because the output of unquote_string will be either
+ * equal or shorter than the original.
+ */
+
+ strcpy(mn->m_pers, tmpbuf);
+
+ switch (encoding) {
+
+ case CE_BASE64:
+ if (field_encode_base64(NULL, &mn->m_pers, charset)) {
+ errflag++;
+ goto out;
+ }
+ break;
+
+ case CE_QUOTED:
+ if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
+ eightbitchars + specialchars, 1)) {
+ errflag++;
+ goto out;
+ }
+ break;
+
+ default:
+ inform("Internal error: unknown RFC-2047 encoding type");
+ errflag++;
+ goto out;
+ }
+
+ reformat++;
+ }
+
+ check_note:
+
+ /*
+ * The "note" field is generally a comment at the end of the address,
+ * at least as how it's implemented here. Notes are always surrounded
+ * by parenthesis (since they're comments). Strip them out and
+ * then put them back when we format the final field, but they do
+ * not get encoded.
+ */
+
+ if (! mn->m_note)
+ goto do_reformat;
+
+ if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
+ tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
+ }
+
+ if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
+ inform("Internal error: Invalid note field \"%s\"",
+ mn->m_note);
+ errflag++;
+ goto out;
+ }
+
+ strncpy(tmpbuf, mn->m_note + 1, len - 1);
+ tmpbuf[len - 2] = '\0';
+
+ if (scanstring(tmpbuf, &asciichars, &eightbitchars,
+ &specialchars)) {
+ /*
+ * If we have 8-bit characters, encode it.
+ */
+
+ if (encoding == CE_UNKNOWN)
+ encoding = pref_encoding(asciichars, specialchars,
+ eightbitchars);
+
+ switch (encoding) {
+
+ case CE_BASE64:
+ if (field_encode_base64(NULL, &tmpbuf, charset)) {
+ errflag++;
+ goto out;
+ }
+ break;
+
+ case CE_QUOTED:
+ if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
+ eightbitchars + specialchars, 1)) {
+ errflag++;
+ goto out;
+ }
+ break;
+
+ default:
+ inform("Internal error: unknown RFC-2047 encoding type");
+ errflag++;
+ goto out;
+ }
+
+ reformat++;
+
+ /*
+ * Make sure the size of tmpbuf is correct (it always gets
+ * reallocated in the above functions).
+ */
+
+ tmpbufsize = strlen(tmpbuf) + 1;
+
+ /*
+ * Put the note field back surrounded by parenthesis.
+ */
+
+ mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
+
+ snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
+ }
+
+do_reformat:
+
+ /*
+ * So, some explanation is in order.
+ *
+ * We know we need to rewrite at least one address in the header,
+ * otherwise we wouldn't be here. If we had to reformat this
+ * particular address, then run it through adrformat(). Otherwise
+ * we can use m_text directly.
+ */
+
+ /*
+ * If we were in a group but are no longer, make sure we add a
+ * semicolon (which needs to be FIRST, as it needs to be at the end
+ * of the last address).
+ */
+
+ if (groupflag && ! mn->m_ingrp) {
+ output = add(";", output);
+ column++;
+ }
+
+ groupflag = mn->m_ingrp;
+
+ if (mn->m_gname) {
+ cp = mh_xstrdup(mn->m_gname);
+ }
+
+ if (reformat) {
+ cp = add(adrformat(mn), cp);
+ } else {
+ cp = add(mn->m_text, cp);
+ }
+
+ len = strlen(cp);
+
+ /*
+ * If we're not at the beginning of the line, add a command and
+ * either a space or a newline.
+ */
+
+ if (column != prefixlen) {
+ if (len + column + 2 > OUTPUTLINELEN) {
+
+ if ((size_t) (prefixlen + 3) < tmpbufsize)
+ tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
+
+ snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
+ output = add(tmpbuf, output);
+ } else {
+ output = add(", ", output);
+ column += 2;
+ }
+ }
+
+ /*
+ * Finally add the address
+ */
+
+ output = add(cp, output);
+ column += len;
+ free(cp);
+ cp = NULL;
+ }
+ /*
+ * Just in case we're at the end of a list
+ */
+
+ if (groupflag) {
+ output = add(";", output);
}
+
+ output = add("\n", output);
+
+ free(*value);
+ *value = output;
+ output = NULL;
+
+out:
+ mh_xfree(tmpbuf);
+ mh_xfree(output);
+
+ return errflag > 0;
}
/*
for (; *string != '\0'; string++) {
if ((isascii((unsigned char) *string))) {
- (*asciilen++);
- if (!qphrasevalid((unsigned char) *string))
+ (*asciilen)++;
+ /*
+ * So, a space is not a valid phrase character, but we're counting
+ * an exception here, because in q-p a space can be directly
+ * encoded as an underscore.
+ */
+ if (!qphrasevalid((unsigned char) *string) && *string != ' ')
(*specialchars)++;
} else {
(*eightbitchars)++;
}
}
- return eightbitchars > 0;
+ return *eightbitchars > 0;
+}
+
+/*
+ * This function is to be used to decide which encoding algorithm we should
+ * use if one is not given. Basically, we pick whichever one is the shorter
+ * of the two.
+ *
+ * Arguments are:
+ *
+ * ascii - Number of ASCII characters in to-be-encoded string.
+ * specials - Number of ASCII characters in to-be-encoded string that
+ * still require encoding under quoted-printable. Note that
+ * these are included in the "ascii" total.
+ * eightbit - Eight-bit characters in the to-be-encoded string.
+ *
+ * Returns one of CE_BASE64 or CE_QUOTED.
+ */
+
+static int
+pref_encoding(int ascii, int specials, int eightbits)
+{
+ /*
+ * The length of the q-p encoding is:
+ *
+ * ascii - specials + (specials + eightbits) * 3.
+ *
+ * The length of the base64 encoding is:
+ *
+ * base64len(ascii + eightbits) (See macro for details)
+ */
+
+ return base64len(ascii + eightbits) < (ascii - specials +
+ (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
}