X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/fc6ab5f54b16af90d04ca2dc70af82b9e97ebb9c..94187a80bd60baab4b9c4b949ad820d730578123:/sbr/encode_rfc2047.c?ds=sidebyside diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c index 0df5f706..0ded2e93 100644 --- a/sbr/encode_rfc2047.c +++ b/sbr/encode_rfc2047.c @@ -1,5 +1,4 @@ -/* - * Routines to encode message headers using RFC 2047-encoding. +/* encode_rfc2047.c -- encode message headers using RFC 2047 encoding. * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -10,6 +9,8 @@ #include #include #include +#include "base64.h" +#include "unquote.h" /* * List of headers that contain addresses and as a result require special @@ -107,7 +108,7 @@ encode_rfc2047(const char *name, char **value, int encoding, charset = write_charset_8bit(); if (strcasecmp(charset, "US-ASCII") == 0) { - advise(NULL, "Cannot use US-ASCII with 8 bit characters in header"); + inform("Cannot use US-ASCII with 8 bit characters in header"); return 1; } @@ -125,14 +126,14 @@ encode_rfc2047(const char *name, char **value, int encoding, * On the encoding we choose, and the specifics of encoding: * * - If a specified encoding is passed in, we use that. - * - If more than 50% of the characters are high-bit, we use base64 - * and encode the whole field as one atom (possibly split). - * - Otherwise, we use quoted-printable. + * - Otherwise, pick which encoding is shorter. + * + * We don't quite handle continuation right here, but it should be + * pretty close. */ if (encoding == CE_UNKNOWN) - encoding = (eightbitcount * 10 / (asciicount + eightbitcount) > 5) ? - CE_BASE64 : CE_QUOTED; + encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount); unfold_header(value, asciicount + eightbitcount); @@ -146,7 +147,7 @@ encode_rfc2047(const char *name, char **value, int encoding, eightbitcount + qpspecialcount, 0); default: - advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + inform("Internal error: unknown RFC-2047 encoding type"); return 1; } } @@ -161,7 +162,7 @@ field_encode_quoted(const char *name, char **value, const char *charset, { int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1; int charsetlen = strlen(charset), utf8; - char *output = NULL, *p, *q; + char *output = NULL, *p, *q = NULL; /* * Right now we just encode the whole thing. Maybe later on we'll @@ -281,15 +282,24 @@ field_encode_quoted(const char *name, char **value, const char *charset, * allow for the encoded output. */ if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) { - newline = 1; + newline = 1; } } } - strcat(q, "?="); + if (q == NULL) { + /* This should never happen, but just in case. Found by + clang static analyzer. */ + inform("null output encoding for %s, continuing...", *value); + return 1; + } + *q++ = '?'; + *q++ = '='; if (prefixlen) - strcat(q, "\n"); + *q++ = '\n'; + + *q = '\0'; free(*value); @@ -310,7 +320,7 @@ field_encode_base64(const char *name, char **value, const char *charset) { int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset); int outlen = 0, numencode, curlen; - char *output = NULL, *p = *value, *q = NULL, *linestart; + char *output = NULL, *p = *value, *q = NULL, *linestart = NULL; /* * Skip over any leading white space. @@ -372,7 +382,7 @@ field_encode_base64(const char *name, char **value, const char *charset) numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2); if (numencode <= 0) { - advise(NULL, "Internal error: tried to encode %d characters " + inform("Internal error: tried to encode %d characters " "in base64", numencode); return 1; } @@ -396,7 +406,7 @@ field_encode_base64(const char *name, char **value, const char *charset) numencode--; if (numencode == 0) { - advise(NULL, "Internal error: could not find start of " + inform("Internal error: could not find start of " "UTF-8 character when base64 encoding header"); return 1; } @@ -404,7 +414,7 @@ field_encode_base64(const char *name, char **value, const char *charset) if (writeBase64raw((unsigned char *) p, numencode, (unsigned char *) q) != OK) { - advise(NULL, "Internal error: base64 encoding of header failed"); + inform("Internal error: base64 encoding of header failed"); return 1; } @@ -451,7 +461,7 @@ field_encode_base64(const char *name, char **value, const char *charset) if (writeBase64raw((unsigned char *) p, strlen(p), (unsigned char *) q) != OK) { - advise(NULL, "Internal error: base64 encoding of header failed"); + inform("Internal error: base64 encoding of header failed"); return 1; } @@ -514,8 +524,8 @@ unfold_header(char **value, int len) * This has the side effect of stripping off the final newline * for the header; we put it back in the encoding routine. */ - while (is_fws(*q++)) - ; + while (is_fws(*q)) + q++; if (*q == '\0') break; @@ -542,12 +552,12 @@ field_encode_address(const char *name, char **value, int encoding, { int prefixlen = strlen(name) + 2, column = prefixlen, groupflag; int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0; - int retval; size_t len; char *mp, *cp = NULL, *output = NULL; char *tmpbuf = NULL; size_t tmpbufsize = 0; struct mailname *mn; + char errbuf[BUFSIZ]; /* * Because these are addresses, we need to handle them individually. @@ -564,7 +574,8 @@ field_encode_address(const char *name, char **value, int encoding, output = add(" ", output); for (groupflag = 0; (mp = getname(*value)); ) { - if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) { + if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) { + inform("%s: %s", errbuf, mp); errflag++; continue; } @@ -607,17 +618,22 @@ field_encode_address(const char *name, char **value, int encoding, switch (encoding) { case CE_BASE64: - retval = field_encode_base64(NULL, &mn->m_pers, charset); + if (field_encode_base64(NULL, &mn->m_pers, charset)) { + errflag++; + goto out; + } break; case CE_QUOTED: - retval = field_encode_quoted(NULL, &mn->m_pers, charset, - asciichars, - eightbitchars + specialchars, 1); + if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars, + eightbitchars + specialchars, 1)) { + errflag++; + goto out; + } break; default: - advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + inform("Internal error: unknown RFC-2047 encoding type"); errflag++; goto out; } @@ -638,14 +654,12 @@ field_encode_address(const char *name, char **value, int encoding, if (! mn->m_note) goto do_reformat; - len = strlen(mn->m_note); - - if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) { + if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) { tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1); } if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') { - advise(NULL, "Internal error: Invalid note field \"%s\"", + inform("Internal error: Invalid note field \"%s\"", mn->m_note); errflag++; goto out; @@ -667,17 +681,22 @@ field_encode_address(const char *name, char **value, int encoding, switch (encoding) { case CE_BASE64: - retval = field_encode_base64(NULL, &tmpbuf, charset); + if (field_encode_base64(NULL, &tmpbuf, charset)) { + errflag++; + goto out; + } break; case CE_QUOTED: - retval = field_encode_quoted(NULL, &tmpbuf, charset, - asciichars, - eightbitchars + specialchars, 1); + if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars, + eightbitchars + specialchars, 1)) { + errflag++; + goto out; + } break; default: - advise(NULL, "Internal error: unknown RFC-2047 encoding type"); + inform("Internal error: unknown RFC-2047 encoding type"); errflag++; goto out; } @@ -711,13 +730,27 @@ do_reformat: * we can use m_text directly. */ + /* + * If we were in a group but are no longer, make sure we add a + * semicolon (which needs to be FIRST, as it needs to be at the end + * of the last address). + */ + + if (groupflag && ! mn->m_ingrp) { + output = add(";", output); + column++; + } + + groupflag = mn->m_ingrp; + + if (mn->m_gname) { + cp = mh_xstrdup(mn->m_gname); + } + if (reformat) { - if (mn->m_gname) { - cp = add(mn->m_gname, NULL); - } cp = add(adrformat(mn), cp); } else { - cp = add(mn->m_text, NULL); + cp = add(mn->m_text, cp); } len = strlen(cp); @@ -749,28 +782,25 @@ do_reformat: column += len; free(cp); cp = NULL; + } - /* - * If we were in a group but are no longer, make sure we add a - * trailing semicolon. - */ - - if (groupflag && ! mn->m_ingrp) { - output = add(";", output); - } + /* + * Just in case we're at the end of a list + */ - groupflag = mn->m_ingrp; + if (groupflag) { + output = add(";", output); } - *value = output; - output = NULL; + output = add("\n", output); -out: + free(*value); + *value = output; + output = NULL; - if (tmpbuf) - free(tmpbuf); - if (output) - free(output); +out: + free(tmpbuf); + free(output); return errflag > 0; } @@ -790,7 +820,12 @@ scanstring(const char *string, int *asciilen, int *eightbitchars, for (; *string != '\0'; string++) { if ((isascii((unsigned char) *string))) { (*asciilen)++; - if (!qphrasevalid((unsigned char) *string)) + /* + * So, a space is not a valid phrase character, but we're counting + * an exception here, because in q-p a space can be directly + * encoded as an underscore. + */ + if (!qphrasevalid((unsigned char) *string) && *string != ' ') (*specialchars)++; } else { (*eightbitchars)++;