* On the encoding we choose, and the specifics of encoding:
*
* - If a specified encoding is passed in, we use that.
- * - If more than 50% of the characters are high-bit, we use base64
- * and encode the whole field as one atom (possibly split).
- * - Otherwise, we use quoted-printable.
+ * - Otherwise, pick which encoding is shorter.
+ *
+ * We don't quite handle continuation right here, but it should be
+ * pretty close.
*/
if (encoding == CE_UNKNOWN)
- encoding = (eightbitcount * 10 / (asciicount + eightbitcount) > 5) ?
- CE_BASE64 : CE_QUOTED;
+ encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
unfold_header(value, asciicount + eightbitcount);
{
int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
int charsetlen = strlen(charset), utf8;
- char *output = NULL, *p, *q;
+ char *output = NULL, *p, *q = NULL;
/*
* Right now we just encode the whole thing. Maybe later on we'll
}
}
- strcat(q, "?=");
+ *q++ = '?';
+ *q++ = '=';
if (prefixlen)
- strcat(q, "\n");
+ *q++ = '\n';
+
+ *q = '\0';
free(*value);
{
int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
int outlen = 0, numencode, curlen;
- char *output = NULL, *p = *value, *q = NULL, *linestart;
+ char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
/*
* Skip over any leading white space.
* This has the side effect of stripping off the final newline
* for the header; we put it back in the encoding routine.
*/
- while (is_fws(*q++))
- ;
+ while (is_fws(*q))
+ q++;
if (*q == '\0')
break;
field_encode_address(const char *name, char **value, int encoding,
const char *charset)
{
- int prefixlen = strlen(name) + 2, column = prefixlen, groupflag, errflag;
- int asciichars, specialchars, eightbitchars, reformat, len, retval;
- char *mp, *output = NULL;
+ int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
+ int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
+ size_t len;
+ char *mp, *cp = NULL, *output = NULL;
char *tmpbuf = NULL;
size_t tmpbufsize = 0;
struct mailname *mn;
+ char errbuf[BUFSIZ];
/*
* Because these are addresses, we need to handle them individually.
output = add(" ", output);
for (groupflag = 0; (mp = getname(*value)); ) {
- if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) {
+ if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
+ advise(NULL, "%s: %s", errbuf, mp);
errflag++;
continue;
}
+ reformat = 0;
+
/*
* We only care if the phrase (m_pers) or any trailing comment
* (m_note) have 8-bit characters. If doing q-p, we also need
* so the specialchars count is right.
*/
+ if (! mn->m_pers)
+ goto check_note;
+
if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
}
if (scanstring(tmpbuf, &asciichars, &eightbitchars,
&specialchars)) {
- /*
- * If we have 8-bit characters, encode it.
- */
+ /*
+ * If we have 8-bit characters, encode it.
+ */
if (encoding == CE_UNKNOWN)
- encoding = prefencoding(asciichars, specialchars,
- eightbitchars);
+ encoding = pref_encoding(asciichars, specialchars,
+ eightbitchars);
+
+ /*
+ * This is okay, because the output of unquote_string will be either
+ * equal or shorter than the original.
+ */
strcpy(mn->m_pers, tmpbuf);
switch (encoding) {
case CE_BASE64:
- retval = field_encode_base64(NULL, &mn->m_pers, charset);
+ if (field_encode_base64(NULL, &mn->m_pers, charset)) {
+ errflag++;
+ goto out;
+ }
break;
case CE_QUOTED:
- retval = field_encode_quoted(NULL, &mn->m_pers, charset,
- asciichars,
- eightbitchars + specialchars, 1);
+ if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
+ eightbitchars + specialchars, 1)) {
+ errflag++;
+ goto out;
+ }
break;
default:
advise(NULL, "Internal error: unknown RFC-2047 encoding type");
- return 1;
+ errflag++;
+ goto out;
+ }
+
+ reformat++;
+ }
+
+ check_note:
+
+ /*
+ * The "note" field is generally a comment at the end of the address,
+ * at least as how it's implemented here. Notes are always surrounded
+ * by parenthesis (since they're comments). Strip them out and
+ * then put them back when we format the final field, but they do
+ * not get encoded.
+ */
+
+ if (! mn->m_note)
+ goto do_reformat;
+
+ if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
+ tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
+ }
+
+ if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
+ advise(NULL, "Internal error: Invalid note field \"%s\"",
+ mn->m_note);
+ errflag++;
+ goto out;
+ }
+
+ strncpy(tmpbuf, mn->m_note + 1, len - 1);
+ tmpbuf[len - 2] = '\0';
+
+ if (scanstring(tmpbuf, &asciichars, &eightbitchars,
+ &specialchars)) {
+ /*
+ * If we have 8-bit characters, encode it.
+ */
+
+ if (encoding == CE_UNKNOWN)
+ encoding = pref_encoding(asciichars, specialchars,
+ eightbitchars);
+
+ switch (encoding) {
+
+ case CE_BASE64:
+ if (field_encode_base64(NULL, &tmpbuf, charset)) {
+ errflag++;
+ goto out;
+ }
+ break;
+
+ case CE_QUOTED:
+ if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
+ eightbitchars + specialchars, 1)) {
+ errflag++;
+ goto out;
+ }
+ break;
+
+ default:
+ advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+ errflag++;
+ goto out;
+ }
+
+ reformat++;
+
+ /*
+ * Make sure the size of tmpbuf is correct (it always gets
+ * reallocated in the above functions).
+ */
+
+ tmpbufsize = strlen(tmpbuf) + 1;
+
+ /*
+ * Put the note field back surrounded by parenthesis.
+ */
+
+ mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
+
+ snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
+ }
+
+do_reformat:
+
+ /*
+ * So, some explanation is in order.
+ *
+ * We know we need to rewrite at least one address in the header,
+ * otherwise we wouldn't be here. If we had to reformat this
+ * particular address, then run it through adrformat(). Otherwise
+ * we can use m_text directly.
+ */
+
+ /*
+ * If we were in a group but are no longer, make sure we add a
+ * semicolon (which needs to be FIRST, as it needs to be at the end
+ * of the last address).
+ */
+
+ if (groupflag && ! mn->m_ingrp) {
+ output = add(";", output);
+ column += 1;
+ }
+
+ groupflag = mn->m_ingrp;
+
+ if (mn->m_gname) {
+ cp = add(mn->m_gname, NULL);
+ }
+
+ if (reformat) {
+ cp = add(adrformat(mn), cp);
+ } else {
+ cp = add(mn->m_text, cp);
+ }
+
+ len = strlen(cp);
+
+ /*
+ * If we're not at the beginning of the line, add a command and
+ * either a space or a newline.
+ */
+
+ if (column != prefixlen) {
+ if (len + column + 2 > OUTPUTLINELEN) {
+
+ if ((size_t) (prefixlen + 3) < tmpbufsize)
+ tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
+
+ snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
+ output = add(tmpbuf, output);
+ } else {
+ output = add(", ", output);
+ column += 2;
}
}
+
+ /*
+ * Finally add the address
+ */
+
+ output = add(cp, output);
+ column += len;
+ free(cp);
+ cp = NULL;
}
+
+ /*
+ * Just in case we're at the end of a list
+ */
+
+ if (groupflag) {
+ output = add(";", output);
+ }
+
+ output = add("\n", output);
+
+ free(*value);
+ *value = output;
+ output = NULL;
+
+out:
+
+ if (tmpbuf)
+ free(tmpbuf);
+ if (output)
+ free(output);
+
+ return errflag > 0;
}
/*
for (; *string != '\0'; string++) {
if ((isascii((unsigned char) *string))) {
- (*asciilen++);
- if (!qphrasevalid((unsigned char) *string))
+ (*asciilen)++;
+ /*
+ * So, a space is not a valid phrase character, but we're counting
+ * an exception here, because in q-p a space can be directly
+ * encoded as an underscore.
+ */
+ if (!qphrasevalid((unsigned char) *string) && *string != ' ')
(*specialchars)++;
} else {
(*eightbitchars)++;
}
}
- return eightbitchars > 0;
+ return *eightbitchars > 0;
}
/*
*/
static int
-prefencoding(int ascii, int specials, int eightbits)
+pref_encoding(int ascii, int specials, int eightbits)
{
/*
* The length of the q-p encoding is: