X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/ee9fb87fde55c25f9ab1df2cbcc6e9d21d53b235..17378602e0eed3c39e5f5ba9f3b28274b3dd26b2:/sbr/encode_rfc2047.c?ds=sidebyside diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c index fcb1c104..04a74f1a 100644 --- a/sbr/encode_rfc2047.c +++ b/sbr/encode_rfc2047.c @@ -8,6 +8,7 @@ #include #include +#include #include /* @@ -43,15 +44,17 @@ static char *address_headers[] = { c == '/' || c == '=' || c == '_') #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_') -#define base64len(n) (((n + 2) / 3 ) * 4) /* String len to base64 len */ -#define strbase64(n) (n * 3 / 4) /* Chars that fit in base64 */ +#define base64len(n) ((((n) + 2) / 3 ) * 4) /* String len to base64 len */ +#define strbase64(n) ((n) / 4 * 3) /* Chars that fit in base64 */ #define ENCODELINELIMIT 76 static void unfold_header(char **, int); static int field_encode_address(const char *, char **, int, const char *); -static int field_encode_quoted(const char *, char **, const char *, int, int); +static int field_encode_quoted(const char *, char **, const char *, int, + int, int); static int field_encode_base64(const char *, char **, const char *); +static int scanstring(const char *, int *, int *, int *); static int utf8len(const char *); /* @@ -139,7 +142,7 @@ encode_rfc2047(const char *name, char **value, int encoding, case CE_QUOTED: return field_encode_quoted(name, value, charset, asciicount, - eightbitcount + qpspecialcount); + eightbitcount + qpspecialcount, 0); default: advise(NULL, "Internal error: unknown RFC-2047 encoding type"); @@ -153,7 +156,7 @@ encode_rfc2047(const char *name, char **value, int encoding, static int field_encode_quoted(const char *name, char **value, const char *charset, - int ascii, int encoded) + int ascii, int encoded, int phraserules) { int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1; int charsetlen = strlen(charset), utf8; @@ -225,6 +228,9 @@ field_encode_quoted(const char *name, char **value, const char *charset, /* * Process each character, encoding if necessary + * + * Note that we have a different set of rules if we're processing + * RFC 5322 'phrase' (something you'd see in an address header). */ column++; @@ -232,7 +238,9 @@ field_encode_quoted(const char *name, char **value, const char *charset, if (*p == ' ') { *q++ = '_'; ascii--; - } else if (isascii((int) *p) && !qpspecial((int) *p)) { + } else if (isascii((unsigned char) *p) && + (phraserules ? qphrasevalid((unsigned char) *p) : + !qpspecial((unsigned char) *p))) { *q++ = *p; ascii--; } else { @@ -277,7 +285,10 @@ field_encode_quoted(const char *name, char **value, const char *charset, } } - strcat(q, "?=\n"); + strcat(q, "?="); + + if (prefixlen) + strcat(q, "\n"); free(*value); @@ -297,8 +308,15 @@ static int field_encode_base64(const char *name, char **value, const char *charset) { int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset); - int outlen = 0, numencode; - char *output = NULL, *p = *value, *q, *linestart; + int outlen = 0, numencode, curlen; + char *output = NULL, *p = *value, *q = NULL, *linestart; + + /* + * Skip over any leading white space. + */ + + while (*p == ' ' || *p == '\t') + p++; /* * If we had a zero-length prefix, then just encode the whole field @@ -314,36 +332,40 @@ field_encode_base64(const char *name, char **value, const char *charset) /* * Our very first time, don't pad the line in the front + * + * Note ENCODELINELIMIT is + 2 because of \n \0 */ if (! output) { - outlen += ENCODELINELIMIT - prefixlen + 1; + outlen += ENCODELINELIMIT + 2; output = q = mh_xmalloc(outlen); linestart = q - prefixlen; /* Yes, this is intentional */ } else { - int curlen = q - output; + int curstart = linestart - output; + curlen = q - output; - outlen += ENCODELINELIMIT + 1; + outlen += ENCODELINELIMIT + 2; output = mh_xrealloc(output, outlen); - linestart = q = output + curlen; - q += snprintf(q, outlen - (q - output), "%*s", prefixlen, ""); + q = output + curlen; + linestart = output + curstart; } /* * We should have enough space now, so prepend the encoding markers - * and character set information + * and character set information. The leading space is intentional. */ - q += snprintf(q, outlen - (q - output), "=?%s?B?", charset); + q += snprintf(q, outlen - (q - output), " =?%s?B?", charset); /* - * Find out how much room we have left on the line and see how many - * characters we can stuff in. The start of our line is marked - * by "linestart", so use that to figure out how many characters - * are left out of ENCODELINELIMIT. Reserve 2 characters for the - * end markers, and calculate how many characters we can fit into - * that space given the base64 encoding expansion. + * Find out how much room we have left on the line and see how + * many characters we can stuff in. The start of our line + * is marked by "linestart", so use that to figure out how + * many characters are left out of ENCODELINELIMIT. Reserve + * 2 characters for the end markers and calculate how many + * characters we can fit into that space given the base64 + * encoding expansion. */ numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2); @@ -357,9 +379,90 @@ field_encode_base64(const char *name, char **value, const char *charset) /* * RFC 2047 prohibits spanning multibyte characters across tokens. * Right now we only check for UTF-8. + * + * So note the key here ... we want to make sure the character BEYOND + * our last character is not a continuation byte. If it's the start + * of a new multibyte character or a single-byte character, that's ok. */ + + if (strcasecmp(charset, "UTF-8") == 0) { + /* + * p points to the start of our current buffer, so p + numencode + * is one past the last character to encode + */ + + while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80)) + numencode--; + + if (numencode == 0) { + advise(NULL, "Internal error: could not find start of " + "UTF-8 character when base64 encoding header"); + return 1; + } + } + + if (writeBase64raw((unsigned char *) p, numencode, + (unsigned char *) q) != OK) { + advise(NULL, "Internal error: base64 encoding of header failed"); + return 1; + } + + p += numencode; + q += base64len(numencode); + + /* + * This will point us at the beginning of the new line (trust me). + */ + + linestart = q + 3; + + /* + * What's going on here? Well, we know we're continuing to the next + * line, so we want to add continuation padding. We also add the + * trailing marker for the RFC 2047 token at this time as well. + * This uses a trick of snprintf(); we tell it to print a zero-length + * string, but pad it out to prefixlen - 1 characters; that ends + * up always printing out the requested number of spaces. We use + * prefixlen - 1 because we always add a space on the starting + * token marker; this makes things work out correctly for the first + * line, which should have a space between the ':' and the start + * of the token. + * + * It's okay if you don't follow all of that. + */ + + q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, ""); + } + + /* + * We're here if there is either no prefix, or we can fit it in less + * than ENCODELINELIMIT characters. Encode the whole thing. + */ + + outlen += prefixlen + 9 + charsetlen + base64len(strlen(p)); + curlen = q - output; + + output = mh_xrealloc(output, outlen); + q = output + curlen; + + q += snprintf(q, outlen - (q - output), "%s=?%s?B?", + prefixlen ? " " : "", charset); + + if (writeBase64raw((unsigned char *) p, strlen(p), + (unsigned char *) q) != OK) { + advise(NULL, "Internal error: base64 encoding of header failed"); + return 1; } + strcat(q, "?="); + + if (prefixlen) + strcat(q, "\n"); + + free(*value); + + *value = output; + return 0; } @@ -427,9 +530,68 @@ unfold_header(char **value, int len) *value = str; } +/* + * Decode a header containing addresses. This means we have to parse + * each address and only encode the display-name or comment field. + */ + static int field_encode_address(const char *name, char **value, int encoding, const char *charset) { - return 0; + int prefixlen = strlen(name) + 2, column = prefixlen, groupflag, errflag; + int eightbitchars; + char *mp, *output = NULL; + struct mailname *mn; + + /* + * Because these are addresses, we need to handle them individually. + * + * Break them down and process them one by one. This means we have to + * rewrite the whole header, but that's unavoidable. + */ + + /* + * The output headers always have to start with a space first. + */ + + output = add(" ", output); + + for (groupflag = 0; mp = getname(*value); ) { + if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) { + errflag++; + continue; + } + + /* + * We only care if the phrase (m_pers) or any trailing comment + * (m_note) have 8-bit characters. If doing q-p, we also need + * to encode anything marked as qspecial(). + */ + } +} + +/* + * Scan a string, check for characters that need to be encoded + */ + +static int +scanstring(const char *string, int *asciilen, int *eightbitchars, + int *specialchars) +{ + *asciilen = 0; + *eightbitchars = 0; + *specialchars = 0; + + for (; *string != '\0'; string++) { + if ((isascii((unsigned char) *string))) { + (*asciilen++); + if (!qphrasevalid((unsigned char) *string)) + (*specialchars)++; + } else { + (*eightbitchars)++; + } + } + + return eightbitchars > 0; }