X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/1388183fc96a86e5fbbb9d43cebdedd9f44ab64f..ec173fd2c:/sbr/fmt_rfc2047.c?ds=inline diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c index 9d5b6d3c..8022c2f6 100644 --- a/sbr/fmt_rfc2047.c +++ b/sbr/fmt_rfc2047.c @@ -1,22 +1,19 @@ - -/* - * fmt_rfc2047.c -- decode RFC-2047 header format - * - * $Id$ +/* fmt_rfc2047.c -- decode RFC-2047 header format * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for * complete copyright information. */ -#include -#include +#include "h/mh.h" +#include "fmt_rfc2047.h" +#include "check_charset.h" +#include "h/utils.h" #ifdef HAVE_ICONV # include -# include #endif -static signed char hexindex[] = { +static const signed char hexindex[] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, @@ -24,10 +21,18 @@ static signed char hexindex[] = { -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; -static signed char index_64[128] = { +static const signed char index_64[128] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, @@ -40,12 +45,16 @@ static signed char index_64[128] = { #define char64(c) (((unsigned char) (c) > 127) ? -1 : index_64[(unsigned char) (c)]) -static int -unqp (unsigned char byte1, unsigned char byte2) +/* + * Decode two quoted-pair characters + */ + +int +decode_qp (unsigned char byte1, unsigned char byte2) { if (hexindex[byte1] == -1 || hexindex[byte2] == -1) return -1; - return (hexindex[byte1] << 4 | hexindex[byte2]); + return hexindex[byte1] << 4 | hexindex[byte2]; } /* Check if character is linear whitespace */ @@ -63,15 +72,12 @@ int decode_rfc2047 (char *str, char *dst, size_t dstlen) { char *p, *q, *pp; - char *startofmime, *endofmime; + char *startofmime, *endofmime, *endofcharset; int c, quoted_printable; int encoding_found = 0; /* did we decode anything? */ - int between_encodings = 0; /* are we between two encodings? */ - int equals_pending = 0; /* is there a '=' pending? */ int whitespace = 0; /* how much whitespace between encodings? */ #ifdef HAVE_ICONV - int use_iconv = 0; /* are we converting encoding with iconv? */ - iconv_t cd; + iconv_t cd = NULL; int fromutf8 = 0; char *saveq, *convbuf = NULL; size_t savedstlen; @@ -87,13 +93,18 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) if (!strchr (str, '=')) return 0; +#ifdef HAVE_ICONV + bool use_iconv = false; /* are we converting encoding with iconv? */ +#endif + bool between_encodings = false; + bool equals_pending = false; for (p = str, q = dst; *p; p++) { /* reset iconv */ #ifdef HAVE_ICONV if (use_iconv) { iconv_close(cd); - use_iconv = 0; + use_iconv = false; } #endif /* @@ -102,8 +113,8 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) */ if (equals_pending) { ADDCHR('='); - equals_pending = 0; - between_encodings = 0; /* we have added non-whitespace text */ + equals_pending = false; + between_encodings = false; /* we have added non-whitespace text */ } if (*p != '=') { @@ -111,15 +122,15 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) if (between_encodings && is_lws(*p)) whitespace++; else - between_encodings = 0; /* we have added non-whitespace text */ + between_encodings = false; /* we have added non-whitespace text */ ADDCHR(*p); continue; } - equals_pending = 1; /* we have a '=' pending */ + equals_pending = true; /* Check for initial =? */ - if (*p == '=' && p[1] && p[1] == '?' && p[2]) { + if (*p == '=' && p[1] == '?' && p[2]) { startofmime = p + 2; /* Scan ahead for the next '?' character */ @@ -129,16 +140,30 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) if (!*pp) continue; + /* + * RFC 2231 specifies that language information can appear + * in a charset specification like so: + * + * =?us-ascii*en?Q?Foo?= + * + * Right now we don't use language information, so ignore it. + */ + + for (endofcharset = startofmime; + *endofcharset != '*' && endofcharset < pp; + endofcharset++) + ; + /* Check if character set can be handled natively */ - if (!check_charset(startofmime, pp - startofmime)) { + if (!check_charset(startofmime, endofcharset - startofmime)) { #ifdef HAVE_ICONV /* .. it can't. We'll use iconv then. */ - *pp = '\0'; + *endofcharset = '\0'; cd = iconv_open(get_charset(), startofmime); - fromutf8 = !mh_strcasecmp(startofmime, "UTF-8"); + fromutf8 = !strcasecmp(startofmime, "UTF-8"); *pp = '?'; if (cd == (iconv_t)-1) continue; - use_iconv = 1; + use_iconv = true; #else continue; #endif @@ -168,9 +193,9 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) */ endofmime = NULL; for (pp = startofmime; *pp && *(pp+1); pp++) { - if (is_lws(*pp)) { + if (is_lws(*pp)) break; - } else if (*pp == '?' && pp[1] == '=') { + if (*pp == '?' && pp[1] == '=') { endofmime = pp; break; } @@ -182,7 +207,7 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) * We've found an encoded word, so we can drop * the '=' that was pending */ - equals_pending = 0; + equals_pending = false; /* * If we are between two encoded words separated only by @@ -201,14 +226,14 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) * malloc 0 bytes but skip on to the end */ if (endofmime == startofmime && use_iconv) { - use_iconv = 0; + use_iconv = false; iconv_close(cd); } if (use_iconv) { saveq = q; savedstlen = dstlen; - q = convbuf = (char *) mh_xmalloc(endofmime - startofmime); + q = convbuf = mh_xmalloc(endofmime - startofmime); } /* ADDCHR2 is for adding characters when q is or might be convbuf: * in this case on buffer-full we want to run iconv before returning. @@ -223,7 +248,7 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) if (quoted_printable) { for (pp = startofmime; pp < endofmime; pp++) { if (*pp == '=') { - c = unqp (pp[1], pp[2]); + c = decode_qp (pp[1], pp[2]); if (c == -1) continue; if (c != 0) @@ -238,6 +263,7 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) } else { /* base64 */ int c1, c2, c3, c4; + c1 = c2 = c3 = c4 = -1; pp = startofmime; while (pp < endofmime) { @@ -304,8 +330,10 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) break; /* skip to next input character */ if (fromutf8) { - for (start++;(start < q) && ((*start & 192) == 128);start++) - inbytes--; + for (++start, --inbytes; + start < q && (*start & 192) == 128; + ++start, --inbytes) + continue; } else start++, inbytes--; if (start >= q) @@ -331,7 +359,7 @@ decode_rfc2047 (char *str, char *dst, size_t dstlen) p = endofmime + 1; encoding_found = 1; /* we found (at least 1) encoded word */ - between_encodings = 1; /* we have just decoded something */ + between_encodings = true; /* we have just decoded something */ whitespace = 0; /* re-initialize amount of whitespace */ } }