X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/4c24408bdff496a631709326b0d07a4e12fa9277..8c6e995a43e71e012ae133ff8ebea5719d9117fe:/sbr/fmt_rfc2047.c

diff --git a/sbr/fmt_rfc2047.c b/sbr/fmt_rfc2047.c
index a87fc0e8..d98b5008 100644
--- a/sbr/fmt_rfc2047.c
+++ b/sbr/fmt_rfc2047.c
@@ -1,8 +1,4 @@
-
-/*
- * fmt_rfc2047.c -- decode RFC-2047 header format 
- *
- * $Id$
+/* fmt_rfc2047.c -- decode RFC-2047 header format 
  *
  * This code is Copyright (c) 2002, by the authors of nmh.  See the
  * COPYRIGHT file in the root directory of the nmh distribution for
@@ -10,12 +6,12 @@
  */
 
 #include <h/mh.h>
+#include <h/utils.h>
 #ifdef HAVE_ICONV
 #  include <iconv.h>
-#  include <errno.h>
 #endif
 
-static signed char hexindex[] = {
+static const signed char hexindex[] = {
     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
@@ -23,10 +19,18 @@ static signed char hexindex[] = {
     -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
     -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
     -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
 };
 
-static signed char index_64[128] = {
+static const signed char index_64[128] = {
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
@@ -39,12 +43,16 @@ static signed char index_64[128] = {
 
 #define char64(c) (((unsigned char) (c) > 127) ? -1 : index_64[(unsigned char) (c)])
 
-static int
-unqp (unsigned char byte1, unsigned char byte2)
+/*
+ * Decode two quoted-pair characters
+ */
+
+int
+decode_qp (unsigned char byte1, unsigned char byte2)
 {
     if (hexindex[byte1] == -1 || hexindex[byte2] == -1)
 	return -1;
-    return (hexindex[byte1] << 4 | hexindex[byte2]);
+    return hexindex[byte1] << 4 | hexindex[byte2];
 }
 
 /* Check if character is linear whitespace */
@@ -55,21 +63,22 @@ unqp (unsigned char byte1, unsigned char byte2)
  * Decode the string as a RFC-2047 header field
  */
 
+/* Add character to the destination buffer, and bomb out if it fills up */
+#define ADDCHR(C) do { *q++ = (C); dstlen--; if (!dstlen) goto buffull; } while (0)
+
 int
-decode_rfc2047 (char *str, char *dst) 
+decode_rfc2047 (char *str, char *dst, size_t dstlen)
 {
     char *p, *q, *pp;
-    char *startofmime, *endofmime;
+    char *startofmime, *endofmime, *endofcharset;
     int c, quoted_printable;
     int encoding_found = 0;	/* did we decode anything?                */
-    int between_encodings = 0;	/* are we between two encodings?          */
-    int equals_pending = 0;	/* is there a '=' pending?                */
     int whitespace = 0;		/* how much whitespace between encodings? */
 #ifdef HAVE_ICONV
-    int use_iconv = 0;          /* are we converting encoding with iconv? */
-    iconv_t cd;
-    int fromutf8;
-    char *saveq, *convbuf;
+    iconv_t cd = NULL;
+    int fromutf8 = 0;
+    char *saveq, *convbuf = NULL;
+    size_t savedstlen;
 #endif
 
     if (!str)
@@ -82,13 +91,16 @@ decode_rfc2047 (char *str, char *dst)
     if (!strchr (str, '='))
 	return 0;
 
+    bool use_iconv = false; /* are we converting encoding with iconv? */
+    bool between_encodings = false;
+    bool equals_pending = false;
     for (p = str, q = dst; *p; p++) {
 
         /* reset iconv */
 #ifdef HAVE_ICONV
         if (use_iconv) {
 	    iconv_close(cd);
-	    use_iconv = 0;
+	    use_iconv = false;
         }
 #endif
 	/*
@@ -96,9 +108,9 @@ decode_rfc2047 (char *str, char *dst)
 	 * last iteration, then add it first.
 	 */
 	if (equals_pending) {
-	    *q++ = '=';
-	    equals_pending = 0;
-	    between_encodings = 0;	/* we have added non-whitespace text */
+	    ADDCHR('=');
+	    equals_pending = false;
+	    between_encodings = false;	/* we have added non-whitespace text */
 	}
 
 	if (*p != '=') {
@@ -106,15 +118,15 @@ decode_rfc2047 (char *str, char *dst)
 	    if (between_encodings && is_lws(*p))
 		whitespace++;
 	    else
-		between_encodings = 0;	/* we have added non-whitespace text */
-	    *q++ = *p;
+		between_encodings = false;	/* we have added non-whitespace text */
+	    ADDCHR(*p);
 	    continue;
 	}
 
-	equals_pending = 1;	/* we have a '=' pending */
+	equals_pending = true;
 
 	/* Check for initial =? */
-	if (*p == '=' && p[1] && p[1] == '?' && p[2]) {
+	if (*p == '=' && p[1] == '?' && p[2]) {
 	    startofmime = p + 2;
 
 	    /* Scan ahead for the next '?' character */
@@ -124,16 +136,30 @@ decode_rfc2047 (char *str, char *dst)
 	    if (!*pp)
 		continue;
 
+	    /*
+	     * RFC 2231 specifies that language information can appear
+	     * in a charset specification like so:
+	     *
+	     * =?us-ascii*en?Q?Foo?=
+	     *
+	     * Right now we don't use language information, so ignore it.
+	     */
+
+	    for (endofcharset = startofmime;
+	    		*endofcharset != '*' && endofcharset < pp;
+							endofcharset++)
+		;
+
 	    /* Check if character set can be handled natively */
-	    if (!check_charset(startofmime, pp - startofmime)) {
+	    if (!check_charset(startofmime, endofcharset - startofmime)) {
 #ifdef HAVE_ICONV
 	        /* .. it can't. We'll use iconv then. */
-		*pp = '\0';
+		*endofcharset = '\0';
 	        cd = iconv_open(get_charset(), startofmime);
 		fromutf8 = !strcasecmp(startofmime, "UTF-8");
 		*pp = '?';
                 if (cd == (iconv_t)-1) continue;
-		use_iconv = 1;
+		use_iconv = true;
 #else
 		continue;
 #endif
@@ -163,9 +189,9 @@ decode_rfc2047 (char *str, char *dst)
 	     */
 	    endofmime = NULL;
 	    for (pp = startofmime; *pp && *(pp+1); pp++) {
-		if (is_lws(*pp)) {
+		if (is_lws(*pp))
 		    break;
-		} else if (*pp == '?' && pp[1] == '=') {
+		if (*pp == '?' && pp[1] == '=') {
 		    endofmime = pp;
 		    break;
 		}
@@ -177,7 +203,7 @@ decode_rfc2047 (char *str, char *dst)
 	     * We've found an encoded word, so we can drop
 	     * the '=' that was pending
 	     */
-	    equals_pending = 0;
+	    equals_pending = false;
 
 	    /*
 	     * If we are between two encoded words separated only by
@@ -185,36 +211,55 @@ decode_rfc2047 (char *str, char *dst)
 	     * We will roll back the buffer the number of whitespace
 	     * characters we've seen since last encoded word.
 	     */
-	    if (between_encodings)
+	    if (between_encodings) {
 		q -= whitespace;
+		dstlen += whitespace;
+	    }
 
 #ifdef HAVE_ICONV
+	    /*
+	     * empty encoded text. This ensures that we don't
+	     * malloc 0 bytes but skip on to the end
+	     */
+	    if (endofmime == startofmime && use_iconv) {
+		use_iconv = false;
+		iconv_close(cd);
+            }
+
 	    if (use_iconv) {
-	        saveq = q;
-		if (!(q = convbuf = (char *)malloc(endofmime - startofmime)))
-		    continue;
+		saveq = q;
+		savedstlen = dstlen;
+                q = convbuf = mh_xmalloc(endofmime - startofmime);
             }
+/* ADDCHR2 is for adding characters when q is or might be convbuf:
+ * in this case on buffer-full we want to run iconv before returning.
+ * I apologise for the dreadful name.
+ */
+#define ADDCHR2(C) do { *q++ = (C); dstlen--; if (!dstlen) goto iconvbuffull; } while (0)
+#else
+#define ADDCHR2(C) ADDCHR(C)
 #endif
 
 	    /* Now decode the text */
 	    if (quoted_printable) {
 		for (pp = startofmime; pp < endofmime; pp++) {
 		    if (*pp == '=') {
-			c = unqp (pp[1], pp[2]);
+			c = decode_qp (pp[1], pp[2]);
 			if (c == -1)
 			    continue;
 			if (c != 0)
 			    *q++ = c;
 			pp += 2;
 		    } else if (*pp == '_') {
-			*q++ = ' ';
+			ADDCHR2(' ');
 		    } else {
-			*q++ = *pp;
+			ADDCHR2(*pp);
 		    }
 		}
 	    } else {
 		/* base64 */
 		int c1, c2, c3, c4;
+		c1 = c2 = c3 = c4 = -1;
 
 		pp = startofmime;
 		while (pp < endofmime) {
@@ -231,7 +276,7 @@ decode_rfc2047 (char *str, char *dst)
 			pp++;
 		    }
 		    if (pp < endofmime && c1 != -1 && c2 != -1) {
-			*q++ = (c1 << 2) | (c2 >> 4);
+			ADDCHR2((c1 << 2) | (c2 >> 4));
 			pp++;
 		    }
 		    /* 4 + 4 bits */
@@ -240,7 +285,7 @@ decode_rfc2047 (char *str, char *dst)
 			pp++;
 		    }
 		    if (pp < endofmime && c2 != -1 && c3 != -1) {
-			*q++ = ((c2 & 0xF) << 4) | (c3 >> 2);
+			ADDCHR2(((c2 & 0xF) << 4) | (c3 >> 2));
 			pp++;
 		    }
 		    /* 2 + 6 bits */
@@ -249,37 +294,56 @@ decode_rfc2047 (char *str, char *dst)
 			pp++;
 		    }
 		    if (pp < endofmime && c3 != -1 && c4 != -1) {
-			*q++ = ((c3 & 0x3) << 6) | (c4);
+			ADDCHR2(((c3 & 0x3) << 6) | (c4));
 			pp++;
 		    }
 		}
 	    }
 
 #ifdef HAVE_ICONV
+	iconvbuffull:
+	    /* NB that the string at convbuf is not necessarily NUL terminated here:
+	     * q points to the first byte after the valid part.
+	     */
             /* Convert to native character set */
 	    if (use_iconv) {
 		size_t inbytes = q - convbuf;
-		size_t outbytes = BUFSIZ;
 		ICONV_CONST char *start = convbuf;
 		
 		while (inbytes) {
-		    if (iconv(cd, &start, &inbytes, &saveq, &outbytes) ==
+		    if (iconv(cd, &start, &inbytes, &saveq, &savedstlen) ==
 		            (size_t)-1) {
 			if (errno != EILSEQ) break;
 			/* character couldn't be converted. we output a `?'
 			 * and try to carry on which won't work if
 			 * either encoding was stateful */
-			iconv (cd, 0, 0, &saveq, &outbytes);
+			iconv (cd, 0, 0, &saveq, &savedstlen);
+			if (!savedstlen)
+			    break;
 			*saveq++ = '?';
-                        /* skip to next input character */
+			savedstlen--;
+			if (!savedstlen)
+			    break;
+			/* skip to next input character */
 			if (fromutf8) {
-			    for (start++;(*start & 192) == 128;start++)
-			        inbytes--;
+			    for (++start, --inbytes;
+				 start < q  &&  (*start & 192) == 128;
+				 ++start, --inbytes)
+				continue;
 			} else
 			    start++, inbytes--;
+			if (start >= q)
+			    break;
 		    }
 		}
 		q = saveq;
+		/* Stop now if (1) we hit the end of the buffer trying to do
+		 * MIME decoding and have just iconv-converted a partial string
+		 * or (2) our iconv-conversion hit the end of the buffer.
+		 */
+		if (!dstlen || !savedstlen)
+		    goto buffull;
+		dstlen = savedstlen;
 		free(convbuf);
 	    }
 #endif
@@ -291,7 +355,7 @@ decode_rfc2047 (char *str, char *dst)
 	    p = endofmime + 1;
 
 	    encoding_found = 1;		/* we found (at least 1) encoded word */
-	    between_encodings = 1;	/* we have just decoded something     */
+	    between_encodings = true;	/* we have just decoded something     */
 	    whitespace = 0;		/* re-initialize amount of whitespace */
 	}
     }
@@ -301,8 +365,14 @@ decode_rfc2047 (char *str, char *dst)
 
     /* If an equals was pending at end of string, add it now. */
     if (equals_pending)
-	*q++ = '=';
+	ADDCHR('=');
     *q = '\0';
 
     return encoding_found;
+
+  buffull:
+    /* q is currently just off the end of the buffer, so rewind to NUL terminate */
+    q--;
+    *q = '\0';
+    return encoding_found;
 }