X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/4c24408bdff496a631709326b0d07a4e12fa9277..39ecf70bb:/sbr/fmt_rfc2047.c?ds=inline diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index a87fc0e8..bd703b43 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -1,8 +1,4 @@ - -/* - * fmt_rfc2047.c -- decode RFC-2047 header format - * - * $Id$ +/* fmt_rfc2047.c -- decode RFC-2047 header format * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -10,9 +6,9 @@ */ #include +#include #ifdef HAVE_ICONV # include -# include #endif static signed char hexindex[] = { @@ -23,6 +19,14 @@ static signed char hexindex[] = { -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; @@ -39,8 +43,12 @@ static signed char index_64[128] = { #define char64(c) (((unsigned char) (c) > 127) ? -1 : index_64[(unsigned char) (c)]) -static int -unqp (unsigned char byte1, unsigned char byte2) +/* + * Decode two quoted-pair characters + */ + +int +decode_qp (unsigned char byte1, unsigned char byte2) { if (hexindex[byte1] == -1 || hexindex[byte2] == -1) return -1; @@ -55,11 +63,14 @@ unqp (unsigned char byte1, unsigned char byte2) * Decode the string as a RFC-2047 header field */ +/* Add character to the destination buffer, and bomb out if it fills up */ +#define ADDCHR(C) do { *q++ = (C); dstlen--; if (!dstlen) goto buffull; } while (0) + int -decode_rfc2047 (char *str, char *dst) +decode_rfc2047 (char *str, char *dst, size_t dstlen) { char *p, *q, *pp; - char *startofmime, *endofmime; + char *startofmime, *endofmime, *endofcharset; int c, quoted_printable; int encoding_found = 0; /* did we decode anything? */ int between_encodings = 0; /* are we between two encodings? */ @@ -67,9 +78,10 @@ decode_rfc2047 (char *str, char *dst) int whitespace = 0; /* how much whitespace between encodings? */ #ifdef HAVE_ICONV int use_iconv = 0; /* are we converting encoding with iconv? */ - iconv_t cd; - int fromutf8; - char *saveq, *convbuf; + iconv_t cd = NULL; + int fromutf8 = 0; + char *saveq, *convbuf = NULL; + size_t savedstlen; #endif if (!str) @@ -96,7 +108,7 @@ decode_rfc2047 (char *str, char *dst) * last iteration, then add it first. */ if (equals_pending) { - *q++ = '='; + ADDCHR('='); equals_pending = 0; between_encodings = 0; /* we have added non-whitespace text */ } @@ -107,14 +119,14 @@ decode_rfc2047 (char *str, char *dst) whitespace++; else between_encodings = 0; /* we have added non-whitespace text */ - *q++ = *p; + ADDCHR(*p); continue; } equals_pending = 1; /* we have a '=' pending */ /* Check for initial =? */ - if (*p == '=' && p[1] && p[1] == '?' && p[2]) { + if (*p == '=' && p[1] == '?' && p[2]) { startofmime = p + 2; /* Scan ahead for the next '?' character */ @@ -124,11 +136,25 @@ decode_rfc2047 (char *str, char *dst) if (!*pp) continue; + /* + * RFC 2231 specifies that language information can appear + * in a charset specification like so: + * + * =?us-ascii*en?Q?Foo?= + * + * Right now we don't use language information, so ignore it. + */ + + for (endofcharset = startofmime; + *endofcharset != '*' && endofcharset < pp; + endofcharset++) + ; + /* Check if character set can be handled natively */ - if (!check_charset(startofmime, pp - startofmime)) { + if (!check_charset(startofmime, endofcharset - startofmime)) { #ifdef HAVE_ICONV /* .. it can't. We'll use iconv then. */ - *pp = '\0'; + *endofcharset = '\0'; cd = iconv_open(get_charset(), startofmime); fromutf8 = !strcasecmp(startofmime, "UTF-8"); *pp = '?'; @@ -163,9 +189,9 @@ decode_rfc2047 (char *str, char *dst) */ endofmime = NULL; for (pp = startofmime; *pp && *(pp+1); pp++) { - if (is_lws(*pp)) { + if (is_lws(*pp)) break; - } else if (*pp == '?' && pp[1] == '=') { + if (*pp == '?' && pp[1] == '=') { endofmime = pp; break; } @@ -185,36 +211,55 @@ decode_rfc2047 (char *str, char *dst) * We will roll back the buffer the number of whitespace * characters we've seen since last encoded word. */ - if (between_encodings) + if (between_encodings) { q -= whitespace; + dstlen += whitespace; + } #ifdef HAVE_ICONV + /* + * empty encoded text. This ensures that we don't + * malloc 0 bytes but skip on to the end + */ + if (endofmime == startofmime && use_iconv) { + use_iconv = 0; + iconv_close(cd); + } + if (use_iconv) { - saveq = q; - if (!(q = convbuf = (char *)malloc(endofmime - startofmime))) - continue; + saveq = q; + savedstlen = dstlen; + q = convbuf = (char *) mh_xmalloc(endofmime - startofmime); } +/* ADDCHR2 is for adding characters when q is or might be convbuf: + * in this case on buffer-full we want to run iconv before returning. + * I apologise for the dreadful name. + */ +#define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0) +#else +#define ADDCHR2(C) ADDCHR(C) #endif /* Now decode the text */ if (quoted_printable) { for (pp = startofmime; pp < endofmime; pp++) { if (*pp == '=') { - c = unqp (pp[1], pp[2]); + c = decode_qp (pp[1], pp[2]); if (c == -1) continue; if (c != 0) *q++ = c; pp += 2; } else if (*pp == '_') { - *q++ = ' '; + ADDCHR2(' '); } else { - *q++ = *pp; + ADDCHR2(*pp); } } } else { /* base64 */ int c1, c2, c3, c4; + c1 = c2 = c3 = c4 = -1; pp = startofmime; while (pp < endofmime) { @@ -231,7 +276,7 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c1 != -1 && c2 != -1) { - *q++ = (c1 << 2) | (c2 >> 4); + ADDCHR2((c1 << 2) | (c2 >> 4)); pp++; } /* 4 + 4 bits */ @@ -240,7 +285,7 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c2 != -1 && c3 != -1) { - *q++ = ((c2 & 0xF) << 4) | (c3 >> 2); + ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2)); pp++; } /* 2 + 6 bits */ @@ -249,37 +294,56 @@ decode_rfc2047 (char *str, char *dst) pp++; } if (pp < endofmime && c3 != -1 && c4 != -1) { - *q++ = ((c3 & 0x3) << 6) | (c4); + ADDCHR2(((c3 & 0x3) << 6) | (c4)); pp++; } } } #ifdef HAVE_ICONV + iconvbuffull: + /* NB that the string at convbuf is not necessarily NUL terminated here: + * q points to the first byte after the valid part. + */ /* Convert to native character set */ if (use_iconv) { size_t inbytes = q - convbuf; - size_t outbytes = BUFSIZ; ICONV_CONST char *start = convbuf; while (inbytes) { - if (iconv(cd, &start, &inbytes, &saveq, &outbytes) == + if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) == (size_t)-1) { if (errno != EILSEQ) break; /* character couldn't be converted. we output a `?' * and try to carry on which won't work if * either encoding was stateful */ - iconv (cd, 0, 0, &saveq, &outbytes); + iconv (cd, 0, 0, &saveq, &savedstlen); + if (!savedstlen) + break; *saveq++ = '?'; - /* skip to next input character */ + savedstlen--; + if (!savedstlen) + break; + /* skip to next input character */ if (fromutf8) { - for (start++;(*start & 192) == 128;start++) - inbytes--; + for (++start, --inbytes; + start < q && (*start & 192) == 128; + ++start, --inbytes) + continue; } else start++, inbytes--; + if (start >= q) + break; } } q = saveq; + /* Stop now if (1) we hit the end of the buffer trying to do + * MIME decoding and have just iconv-converted a partial string + * or (2) our iconv-conversion hit the end of the buffer. + */ + if (!dstlen || !savedstlen) + goto buffull; + dstlen = savedstlen; free(convbuf); } #endif @@ -301,8 +365,14 @@ decode_rfc2047 (char *str, char *dst) /* If an equals was pending at end of string, add it now. */ if (equals_pending) - *q++ = '='; + ADDCHR('='); *q = '\0'; return encoding_found; + + buffull: + /* q is currently just off the end of the buffer, so rewind to NUL terminate */ + q--; + *q = '\0'; + return encoding_found; }