X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/4c24408bdff496a631709326b0d07a4e12fa9277..8c6e995a43e71e012ae133ff8ebea5719d9117fe:/sbr/fmt_rfc2047.c diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index a87fc0e8..d98b5008 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -1,8 +1,4 @@ - -/* - * fmt_rfc2047.c -- decode RFC-2047 header format - * - * $Id$ +/* fmt_rfc2047.c -- decode RFC-2047 header format * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -10,12 +6,12 @@ */ #include +#include #ifdef HAVE_ICONV # include -# include #endif -static signed char hexindex[] = { +static const signed char hexindex[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, @@ -23,10 +19,18 @@ static signed char hexindex[] = { -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; -static signed char index_64[128] = { +static const signed char index_64[128] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, @@ -39,12 +43,16 @@ static signed char index_64[128] = { #define char64(c) (((unsigned char) (c) > 127) ? -1 : index_64[(unsigned char) (c)]) -static int -unqp (unsigned char byte1, unsigned char byte2) +/* + * Decode two quoted-pair characters + */ + +int +decode_qp (unsigned char byte1, unsigned char byte2) { if (hexindex[byte1] == -1 || hexindex[byte2] == -1) return -1; - return (hexindex[byte1] << 4 | hexindex[byte2]); + return hexindex[byte1] << 4 | hexindex[byte2]; } /* Check if character is linear whitespace */ @@ -55,21 +63,22 @@ unqp (unsigned char byte1, unsigned char byte2) * Decode the string as a RFC-2047 header field */ +/* Add character to the destination buffer, and bomb out if it fills up */ +#define ADDCHR(C) do { *q++ = (C); dstlen--; if (!dstlen) goto buffull; } while (0) + int -decode_rfc2047 (char *str, char *dst) +decode_rfc2047 (char *str, char *dst, size_t dstlen) { char *p, *q, *pp; - char *startofmime, *endofmime; + char *startofmime, *endofmime, *endofcharset; int c, quoted_printable; int encoding_found = 0; /* did we decode anything? */ - int between_encodings = 0; /* are we between two encodings? */ - int equals_pending = 0; /* is there a '=' pending? */ int whitespace = 0; /* how much whitespace between encodings? */ #ifdef HAVE_ICONV - int use_iconv = 0; /* are we converting encoding with iconv? */ - iconv_t cd; - int fromutf8; - char *saveq, *convbuf; + iconv_t cd = NULL; + int fromutf8 = 0; + char *saveq, *convbuf = NULL; + size_t savedstlen; #endif if (!str) @@ -82,13 +91,16 @@ decode_rfc2047 (char *str, char *dst) if (!strchr (str, '=')) return 0; + bool use_iconv = false; /* are we converting encoding with iconv? */ + bool between_encodings = false; + bool equals_pending = false; for (p = str, q = dst; *p; p++) { /* reset iconv */ #ifdef HAVE_ICONV if (use_iconv) { iconv_close(cd); - use_iconv = 0; + use_iconv = false; } #endif /* @@ -96,9 +108,9 @@ decode_rfc2047 (char *str, char *dst) * last iteration, then add it first. */ if (equals_pending) { - *q++ = '='; - equals_pending = 0; - between_encodings = 0; /* we have added non-whitespace text */ + ADDCHR('='); + equals_pending = false; + between_encodings = false; /* we have added non-whitespace text */ } if (*p != '=') { @@ -106,15 +118,15 @@ decode_rfc2047 (char *str, char *dst) if (between_encodings && is_lws(*p)) whitespace++; else - between_encodings = 0; /* we have added non-whitespace text */ - *q++ = *p; + between_encodings = false; /* we have added non-whitespace text */ + ADDCHR(*p); continue; } - equals_pending = 1; /* we have a '=' pending */ + equals_pending = true; /* Check for initial =? */ - if (*p == '=' && p[1] && p[1] == '?' && p[2]) { + if (*p == '=' && p[1] == '?' && p[2]) { startofmime = p + 2; /* Scan ahead for the next '?' character */ @@ -124,16 +136,30 @@ decode_rfc2047 (char *str, char *dst) if (!*pp) continue; + /* + * RFC 2231 specifies that language information can appear + * in a charset specification like so: + * + * =?us-ascii*en?Q?Foo?= + * + * Right now we don't use language information, so ignore it. + */ + + for (endofcharset = startofmime; + *endofcharset != '*' && endofcharset < pp; + endofcharset++) + ; + /* Check if character set can be handled natively */ - if (!check_charset(startofmime, pp - startofmime)) { + if (!check_charset(startofmime, endofcharset - startofmime)) { #ifdef HAVE_ICONV /* .. it can't. We'll use iconv then. */ - *pp = '\0'; + *endofcharset = '\0'; cd = iconv_open(get_charset(), startofmime); fromutf8 = !strcasecmp(startofmime, "UTF-8"); *pp = '?'; if (cd == (iconv_t)-1) continue; - use_iconv = 1; + use_iconv = true; #else continue; #endif @@ -163,9 +189,9 @@ decode_rfc2047 (char *str, char *dst) */ endofmime = NULL; for (pp = startofmime; *pp && *(pp+1); pp++) { - if (is_lws(*pp)) { + if (is_lws(*pp)) break; - } else if (*pp == '?' && pp[1] == '=') { + if (*pp == '?' && pp[1] == '=') { endofmime = pp; break; } @@ -177,7 +203,7 @@ decode_rfc2047 (char *str, char *dst) * We've found an encoded word, so we can drop * the '=' that was pending */ - equals_pending = 0; + equals_pending = false; /* * If we are between two encoded words separated only by @@ -185,36 +211,55 @@ decode_rfc2047 (char *str, char *dst) * We will roll back the buffer the number of whitespace * characters we've seen since last encoded word. */ - if (between_encodings) + if (between_encodings) { q -= whitespace; + dstlen += whitespace; + } #ifdef HAVE_ICONV + /* + * empty encoded text. This ensures that we don't + * malloc 0 bytes but skip on to the end + */ + if (endofmime == startofmime && use_iconv) { + use_iconv = false; + iconv_close(cd); + } + if (use_iconv) { - saveq = q; - if (!(q = convbuf = (char *)malloc(endofmime - startofmime))) - continue; + saveq = q; + savedstlen = dstlen; + q = convbuf = mh_xmalloc(endofmime - startofmime); } +/* ADDCHR2 is for adding characters when q is or might be convbuf: + * in this case on buffer-full we want to run iconv before returning. + * I apologise for the dreadful name. + */ +#define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0) +#else +#define ADDCHR2(C) ADDCHR(C) #endif /* Now decode the text */ if (quoted_printable) { for (pp = startofmime; pp < endofmime; pp++) { if (*pp == '=') { - c = unqp (pp[1], pp[2]); + c = decode_qp (pp[1], pp[2]); if (c == -1) continue; if (c != 0) *q++ = c; pp += 2; } else if (*pp == '_') { - *q++ = ' '; + ADDCHR2(' '); } else { - *q++ = *pp; + ADDCHR2(*pp); } } } else { /* base64 */ int c1, c2, c3, c4; + c1 = c2 = c3 = c4 = -1; pp = startofmime; while (pp < endofmime) { @@ -231,7 +276,7 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c1 != -1 && c2 != -1) { - *q++ = (c1 << 2) | (c2 >> 4); + ADDCHR2((c1 << 2) | (c2 >> 4)); pp++; } /* 4 + 4 bits */ @@ -240,7 +285,7 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c2 != -1 && c3 != -1) { - *q++ = ((c2 & 0xF) << 4) | (c3 >> 2); + ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2)); pp++; } /* 2 + 6 bits */ @@ -249,37 +294,56 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c3 != -1 && c4 != -1) { - *q++ = ((c3 & 0x3) << 6) | (c4); + ADDCHR2(((c3 & 0x3) << 6) | (c4)); pp++; } } } #ifdef HAVE_ICONV + iconvbuffull: + /* NB that the string at convbuf is not necessarily NUL terminated here: + * q points to the first byte after the valid part. + */ /* Convert to native character set */ if (use_iconv) { size_t inbytes = q - convbuf; - size_t outbytes = BUFSIZ; ICONV_CONST char *start = convbuf; while (inbytes) { - if (iconv(cd, &start, &inbytes, &saveq, &outbytes) == + if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) == (size_t)-1) { if (errno != EILSEQ) break; /* character couldn't be converted. we output a `?' * and try to carry on which won't work if * either encoding was stateful */ - iconv (cd, 0, 0, &saveq, &outbytes); + iconv (cd, 0, 0, &saveq, &savedstlen); + if (!savedstlen) + break; *saveq++ = '?'; - /* skip to next input character */ + savedstlen--; + if (!savedstlen) + break; + /* skip to next input character */ if (fromutf8) { - for (start++;(*start & 192) == 128;start++) - inbytes--; + for (++start, --inbytes; + start < q && (*start & 192) == 128; + ++start, --inbytes) + continue; } else start++, inbytes--; + if (start >= q) + break; } } q = saveq; + /* Stop now if (1) we hit the end of the buffer trying to do + * MIME decoding and have just iconv-converted a partial string + * or (2) our iconv-conversion hit the end of the buffer. + */ + if (!dstlen || !savedstlen) + goto buffull; + dstlen = savedstlen; free(convbuf); } #endif @@ -291,7 +355,7 @@ decode_rfc2047 (char *str, char *dst) p = endofmime + 1; encoding_found = 1; /* we found (at least 1) encoded word */ - between_encodings = 1; /* we have just decoded something */ + between_encodings = true; /* we have just decoded something */ whitespace = 0; /* re-initialize amount of whitespace */ } } @@ -301,8 +365,14 @@ decode_rfc2047 (char *str, char *dst) /* If an equals was pending at end of string, add it now. */ if (equals_pending) - *q++ = '='; + ADDCHR('='); *q = '\0'; return encoding_found; + + buffull: + /* q is currently just off the end of the buffer, so rewind to NUL terminate */ + q--; + *q = '\0'; + return encoding_found; }