X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/dd4503c862644d4dbc45cf97db64c2c1ac001f8d..23816efd3e8e294e8f1c1ce811ac3a1cd15d334e:/sbr/encode_rfc2047.c diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c index 3c4f45e5..bb45b5c1 100644 --- a/sbr/encode_rfc2047.c +++ b/sbr/encode_rfc2047.c @@ -1,15 +1,19 @@ -/* - * Routines to encode message headers using RFC 2047-encoding. +/* encode_rfc2047.c -- encode message headers using RFC 2047 encoding. * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for * complete copyright information. */ -#include -#include -#include -#include +#include "h/mh.h" +#include "encode_rfc2047.h" +#include "check_charset.h" +#include "error.h" +#include "h/mhparse.h" +#include "h/addrsbr.h" +#include "h/utils.h" +#include "base64.h" +#include "unquote.h" /* * List of headers that contain addresses and as a result require special @@ -79,13 +83,13 @@ encode_rfc2047(const char *name, char **value, int encoding, if (isascii((unsigned char) *p)) { asciicount++; if (qpspecial((unsigned char) *p)) - qpspecialcount++; + qpspecialcount++; } else eightbitcount++; } if (eightbitcount == 0) - return 0; + return 0; /* * Some rules from RFC 2047: @@ -104,10 +108,10 @@ encode_rfc2047(const char *name, char **value, int encoding, */ if (charset == NULL) - charset = write_charset_8bit(); + charset = write_charset_8bit(); if (strcasecmp(charset, "US-ASCII") == 0) { - advise(NULL, "Cannot use US-ASCII with 8 bit characters in header"); + inform("Cannot use US-ASCII with 8 bit characters in header"); return 1; } @@ -117,7 +121,7 @@ encode_rfc2047(const char *name, char **value, int encoding, */ for (i = 0; address_headers[i]; i++) { - if (strcasecmp(name, address_headers[i]) == 0) + if (strcasecmp(name, address_headers[i]) == 0) return field_encode_address(name, value, encoding, charset); } @@ -125,28 +129,28 @@ encode_rfc2047(const char *name, char **value, int encoding, * On the encoding we choose, and the specifics of encoding: * * - If a specified encoding is passed in, we use that. - * - If more than 50% of the characters are high-bit, we use base64 - * and encode the whole field as one atom (possibly split). - * - Otherwise, we use quoted-printable. + * - Otherwise, pick which encoding is shorter. + * + * We don't quite handle continuation right here, but it should be + * pretty close. */ if (encoding == CE_UNKNOWN) - encoding = (eightbitcount * 10 / (asciicount + eightbitcount) > 5) ? - CE_BASE64 : CE_QUOTED; + encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount); unfold_header(value, asciicount + eightbitcount); switch (encoding) { case CE_BASE64: - return field_encode_base64(name, value, charset); + return field_encode_base64(name, value, charset); case CE_QUOTED: return field_encode_quoted(name, value, charset, asciicount, eightbitcount + qpspecialcount, 0); default: - advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + inform("Internal error: unknown RFC-2047 encoding type"); return 1; } } @@ -159,9 +163,9 @@ static int field_encode_quoted(const char *name, char **value, const char *charset, int ascii, int encoded, int phraserules) { - int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1; + int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column; int charsetlen = strlen(charset), utf8; - char *output = NULL, *p, *q; + char *output = NULL, *p, *q = NULL; /* * Right now we just encode the whole thing. Maybe later on we'll @@ -174,11 +178,12 @@ field_encode_quoted(const char *name, char **value, const char *charset, utf8 = strcasecmp(charset, "UTF-8") == 0; + bool newline = true; while (*p != '\0') { - /* + /* * Start a new line, if it's time */ - if (newline) { + if (newline) { /* * If it's the start of the header, we don't need to pad it * @@ -210,12 +215,12 @@ field_encode_quoted(const char *name, char **value, const char *charset, for (i = 0; i < prefixlen; i++) *q++ = ' '; } else { - /* + /* * A bit of a hack here; the header can contain multiple * spaces (probably at least one) until we get to the * actual text. Copy until we get to a non-space. */ - output = mh_xmalloc(outlen); + output = mh_xmalloc(outlen); q = output; while (is_fws(*p)) *q++ = *p++; @@ -224,7 +229,7 @@ field_encode_quoted(const char *name, char **value, const char *charset, tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset); q += tokenlen; column = prefixlen + tokenlen; - newline = 0; + newline = false; } /* @@ -241,7 +246,7 @@ field_encode_quoted(const char *name, char **value, const char *charset, ascii--; } else if (isascii((unsigned char) *p) && (phraserules ? qphrasevalid((unsigned char) *p) : - !qpspecial((unsigned char) *p))) { + !qpspecial((unsigned char) *p))) { *q++ = *p; ascii--; } else { @@ -265,7 +270,7 @@ field_encode_quoted(const char *name, char **value, const char *charset, continue; if (column >= ENCODELINELIMIT - 2) { - newline = 1; + newline = true; } else if (utf8) { /* * Okay, this is a bit weird, but to explain a bit more ... @@ -281,15 +286,24 @@ field_encode_quoted(const char *name, char **value, const char *charset, * allow for the encoded output. */ if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) { - newline = 1; + newline = true; } } } - strcat(q, "?="); + if (q == NULL) { + /* This should never happen, but just in case. Found by + clang static analyzer. */ + inform("null output encoding for %s, continuing...", *value); + return 1; + } + *q++ = '?'; + *q++ = '='; if (prefixlen) - strcat(q, "\n"); + *q++ = '\n'; + + *q = '\0'; free(*value); @@ -310,14 +324,14 @@ field_encode_base64(const char *name, char **value, const char *charset) { int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset); int outlen = 0, numencode, curlen; - char *output = NULL, *p = *value, *q = NULL, *linestart; + char *output = NULL, *p = *value, *q = NULL, *linestart = NULL; /* * Skip over any leading white space. */ while (*p == ' ' || *p == '\t') - p++; + p++; /* * If we had a zero-length prefix, then just encode the whole field @@ -329,7 +343,7 @@ field_encode_base64(const char *name, char **value, const char *charset) */ while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen + - prefixlen) > ENCODELINELIMIT)) { + prefixlen) > ENCODELINELIMIT)) { /* * Our very first time, don't pad the line in the front @@ -372,8 +386,8 @@ field_encode_base64(const char *name, char **value, const char *charset) numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2); if (numencode <= 0) { - advise(NULL, "Internal error: tried to encode %d characters " - "in base64", numencode); + inform("Internal error: tried to encode %d characters " + "in base64", numencode); return 1; } @@ -393,10 +407,10 @@ field_encode_base64(const char *name, char **value, const char *charset) */ while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80)) - numencode--; + numencode--; if (numencode == 0) { - advise(NULL, "Internal error: could not find start of " + inform("Internal error: could not find start of " "UTF-8 character when base64 encoding header"); return 1; } @@ -404,7 +418,7 @@ field_encode_base64(const char *name, char **value, const char *charset) if (writeBase64raw((unsigned char *) p, numencode, (unsigned char *) q) != OK) { - advise(NULL, "Internal error: base64 encoding of header failed"); + inform("Internal error: base64 encoding of header failed"); return 1; } @@ -447,18 +461,18 @@ field_encode_base64(const char *name, char **value, const char *charset) q = output + curlen; q += snprintf(q, outlen - (q - output), "%s=?%s?B?", - prefixlen ? " " : "", charset); + prefixlen ? " " : "", charset); if (writeBase64raw((unsigned char *) p, strlen(p), - (unsigned char *) q) != OK) { - advise(NULL, "Internal error: base64 encoding of header failed"); + (unsigned char *) q) != OK) { + inform("Internal error: base64 encoding of header failed"); return 1; } strcat(q, "?="); if (prefixlen) - strcat(q, "\n"); + strcat(q, "\n"); free(*value); @@ -480,14 +494,14 @@ utf8len(const char *p) int len = 1; if (*p == '\0') - return 0; + return 0; if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80) - return 0; + return 0; p++; while ((((unsigned char) *p++) & 0xc0) == 0x80) - len++; + len++; return len; } @@ -506,7 +520,7 @@ unfold_header(char **value, int len) char *p = str, *q = *value; while (*q != '\0') { - if (*q == '\n') { + if (*q == '\n') { /* * When we get a newline, skip to the next non-whitespace * character and add a space to replace all of the whitespace @@ -514,10 +528,10 @@ unfold_header(char **value, int len) * This has the side effect of stripping off the final newline * for the header; we put it back in the encoding routine. */ - while (is_fws(*q++)) - ; + while (is_fws(*q)) + q++; if (*q == '\0') - break; + break; *p++ = ' '; } else { @@ -541,13 +555,17 @@ field_encode_address(const char *name, char **value, int encoding, const char *charset) { int prefixlen = strlen(name) + 2, column = prefixlen, groupflag; - int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0; - int retval; + int asciichars; + int specialchars; + int eightbitchars; + bool reformat = false; + bool errflag = false; size_t len; - char *mp, *output = NULL; + char *mp, *cp = NULL, *output = NULL; char *tmpbuf = NULL; size_t tmpbufsize = 0; struct mailname *mn; + char errbuf[BUFSIZ]; /* * Because these are addresses, we need to handle them individually. @@ -564,11 +582,14 @@ field_encode_address(const char *name, char **value, int encoding, output = add(" ", output); for (groupflag = 0; (mp = getname(*value)); ) { - if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) { - errflag++; + if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) { + inform("%s: %s", errbuf, mp); + errflag = true; continue; } + reformat = false; + /* * We only care if the phrase (m_pers) or any trailing comment * (m_note) have 8-bit characters. If doing q-p, we also need @@ -592,7 +613,7 @@ field_encode_address(const char *name, char **value, int encoding, */ if (encoding == CE_UNKNOWN) - encoding = pref_encoding(asciichars, specialchars, + encoding = pref_encoding(asciichars, specialchars, eightbitchars); /* @@ -605,22 +626,27 @@ field_encode_address(const char *name, char **value, int encoding, switch (encoding) { case CE_BASE64: - retval = field_encode_base64(NULL, &mn->m_pers, charset); + if (field_encode_base64(NULL, &mn->m_pers, charset)) { + errflag = true; + goto out; + } break; case CE_QUOTED: - retval = field_encode_quoted(NULL, &mn->m_pers, charset, - asciichars, - eightbitchars + specialchars, 1); + if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars, + eightbitchars + specialchars, 1)) { + errflag = true; + goto out; + } break; default: - advise(NULL, "Internal error: unknown RFC-2047 encoding type"); - errflag++; + inform("Internal error: unknown RFC-2047 encoding type"); + errflag = true; goto out; } - reformat++; + reformat = true; } check_note: @@ -636,16 +662,14 @@ field_encode_address(const char *name, char **value, int encoding, if (! mn->m_note) goto do_reformat; - len = strlen(mn->m_note); - - if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) { + if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) { tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1); } if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') { - advise(NULL, "Internal error: Invalid note field \"%s\"", - mn->m_note); - errflag++; + inform("Internal error: Invalid note field \"%s\"", + mn->m_note); + errflag = true; goto out; } @@ -659,39 +683,134 @@ field_encode_address(const char *name, char **value, int encoding, */ if (encoding == CE_UNKNOWN) - encoding = pref_encoding(asciichars, specialchars, + encoding = pref_encoding(asciichars, specialchars, eightbitchars); switch (encoding) { case CE_BASE64: - retval = field_encode_base64(NULL, &tmpbuf, charset); + if (field_encode_base64(NULL, &tmpbuf, charset)) { + errflag = true; + goto out; + } break; case CE_QUOTED: - retval = field_encode_quoted(NULL, &tmpbuf, charset, - asciichars, - eightbitchars + specialchars, 1); + if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars, + eightbitchars + specialchars, 1)) { + errflag = true; + goto out; + } break; default: - advise(NULL, "Internal error: unknown RFC-2047 encoding type"); - errflag++; + inform("Internal error: unknown RFC-2047 encoding type"); + errflag = true; goto out; } - reformat++; + reformat = true; + + /* + * Make sure the size of tmpbuf is correct (it always gets + * reallocated in the above functions). + */ + + tmpbufsize = strlen(tmpbuf) + 1; + + /* + * Put the note field back surrounded by parenthesis. + */ + + mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2); + + snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf); + } + +do_reformat: + + /* + * So, some explanation is in order. + * + * We know we need to rewrite at least one address in the header, + * otherwise we wouldn't be here. If we had to reformat this + * particular address, then run it through adrformat(). Otherwise + * we can use m_text directly. + */ + + /* + * If we were in a group but are no longer, make sure we add a + * semicolon (which needs to be FIRST, as it needs to be at the end + * of the last address). + */ + + if (groupflag && ! mn->m_ingrp) { + output = add(";", output); + column++; } + groupflag = mn->m_ingrp; + if (mn->m_gname) { + cp = mh_xstrdup(mn->m_gname); + } + + if (reformat) { + cp = add(adrformat(mn), cp); + } else { + cp = add(mn->m_text, cp); + } + + len = strlen(cp); + + /* + * If we're not at the beginning of the line, add a command and + * either a space or a newline. + */ + + if (column != prefixlen) { + if (len + column + 2 > OUTPUTLINELEN) { + + if ((size_t) (prefixlen + 3) < tmpbufsize) + tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3); + + snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, ""); + output = add(tmpbuf, output); + } else { + output = add(", ", output); + column += 2; + } + } + + /* + * Finally add the address + */ + + output = add(cp, output); + column += len; + free(cp); + cp = NULL; } -out: + /* + * Just in case we're at the end of a list + */ + + if (groupflag) { + output = add(";", output); + } - if (tmpbuf) - free(tmpbuf); + output = add("\n", output); - return errflag > 0; + free(*value); + *value = output; + output = NULL; + +out: + free(tmpbuf); + free(output); + + return errflag; } /* @@ -707,10 +826,15 @@ scanstring(const char *string, int *asciilen, int *eightbitchars, *specialchars = 0; for (; *string != '\0'; string++) { - if ((isascii((unsigned char) *string))) { + if ((isascii((unsigned char) *string))) { (*asciilen)++; - if (!qphrasevalid((unsigned char) *string)) - (*specialchars)++; + /* + * So, a space is not a valid phrase character, but we're counting + * an exception here, because in q-p a space can be directly + * encoded as an underscore. + */ + if (!qphrasevalid((unsigned char) *string) && *string != ' ') + (*specialchars)++; } else { (*eightbitchars)++; } @@ -749,5 +873,5 @@ pref_encoding(int ascii, int specials, int eightbits) */ return base64len(ascii + eightbits) < (ascii - specials + - (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED; + (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED; }