Fix invalid pointer arithmetic.

[nmh] / sbr / encode_rfc2047.c
diff --git a/sbr/encode_rfc2047.c b/sbr/encode_rfc2047.c

index 0df5f7064cb4cf82a79ec2c7d3a79e42b973c029..0ded2e932cfa88fae443df7c9a96c1aa80307a0e 100644 (file)
--- a/sbr/encode_rfc2047.c
+++ b/sbr/encode_rfc2047.c
@@ -1,5 +1,4 @@
-/*
- * Routines to encode message headers using RFC 2047-encoding.
+/* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
   *
   * This code is Copyright (c) 2002, by the authors of nmh.  See the
   * COPYRIGHT file in the root directory of the nmh distribution for
@@ -10,6 +9,8 @@
  #include <h/mhparse.h>
  #include <h/addrsbr.h>
  #include <h/utils.h>
+#include "base64.h"
+#include "unquote.h"
  
  /*
   * List of headers that contain addresses and as a result require special
@@ -107,7 +108,7 @@ encode_rfc2047(const char *name, char **value, int encoding,
         charset = write_charset_8bit();
  
      if (strcasecmp(charset, "US-ASCII") == 0) {
-       advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
+       inform("Cannot use US-ASCII with 8 bit characters in header");
         return 1;
      }
  
@@ -125,14 +126,14 @@ encode_rfc2047(const char *name, char **value, int encoding,
       * On the encoding we choose, and the specifics of encoding:
       *
       * - If a specified encoding is passed in, we use that.
-     * - If more than 50% of the characters are high-bit, we use base64
-     *   and encode the whole field as one atom (possibly split).
-     * - Otherwise, we use quoted-printable.
+     * - Otherwise, pick which encoding is shorter.
+     *
+     * We don't quite handle continuation right here, but it should be
+     * pretty close.
       */
  
      if (encoding == CE_UNKNOWN)
-       encoding = (eightbitcount * 10 / (asciicount + eightbitcount) > 5) ?
-                                               CE_BASE64 : CE_QUOTED;
+        encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
  
      unfold_header(value, asciicount + eightbitcount);
  
@@ -146,7 +147,7 @@ encode_rfc2047(const char *name, char **value, int encoding,
                                    eightbitcount + qpspecialcount, 0);
  
      default:
-       advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+       inform("Internal error: unknown RFC-2047 encoding type");
         return 1;
      }
  }
@@ -161,7 +162,7 @@ field_encode_quoted(const char *name, char **value, const char *charset,
  {
      int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
      int charsetlen = strlen(charset), utf8;
-    char *output = NULL, *p, *q;
+    char *output = NULL, *p, *q = NULL;
  
      /*
       * Right now we just encode the whole thing.  Maybe later on we'll
@@ -281,15 +282,24 @@ field_encode_quoted(const char *name, char **value, const char *charset,
              * allow for the encoded output.
              */
             if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
-               newline = 1;
+               newline = 1;
             }
         }
      }
  
-    strcat(q, "?=");
+    if (q == NULL) {
+       /* This should never happen, but just in case.  Found by
+          clang static analyzer. */
+       inform("null output encoding for %s, continuing...", *value);
+       return 1;
+    }
+    *q++ = '?';
+    *q++ = '=';
  
      if (prefixlen)
-       strcat(q, "\n");
+       *q++ = '\n';
+
+    *q = '\0';
  
      free(*value);
  
@@ -310,7 +320,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
  {
      int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
      int outlen = 0, numencode, curlen;
-    char *output = NULL, *p = *value, *q = NULL, *linestart;
+    char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
  
      /*
       * Skip over any leading white space.
@@ -372,7 +382,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
         numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
  
         if (numencode <= 0) {
-           advise(NULL, "Internal error: tried to encode %d characters "
+           inform("Internal error: tried to encode %d characters "
                    "in base64", numencode);
             return 1;
         }
@@ -396,7 +406,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
                 numencode--;
  
             if (numencode == 0) {
-               advise(NULL, "Internal error: could not find start of "
+               inform("Internal error: could not find start of "
                        "UTF-8 character when base64 encoding header");
                 return 1;
             }
@@ -404,7 +414,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
  
         if (writeBase64raw((unsigned char *) p, numencode,
                            (unsigned char *) q) != OK) {
-           advise(NULL, "Internal error: base64 encoding of header failed");
+           inform("Internal error: base64 encoding of header failed");
             return 1;
         }
  
@@ -451,7 +461,7 @@ field_encode_base64(const char *name, char **value, const char *charset)
  
      if (writeBase64raw((unsigned char *) p, strlen(p),
                        (unsigned char *) q) != OK) {
-       advise(NULL, "Internal error: base64 encoding of header failed");
+       inform("Internal error: base64 encoding of header failed");
         return 1;
      }
  
@@ -514,8 +524,8 @@ unfold_header(char **value, int len)
              * This has the side effect of stripping off the final newline
              * for the header; we put it back in the encoding routine.
              */
-           while (is_fws(*q++))
-               ;
+           while (is_fws(*q))
+               q++;
             if (*q == '\0')
                 break;
  
@@ -542,12 +552,12 @@ field_encode_address(const char *name, char **value, int encoding,
  {
      int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
      int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
-    int retval;
      size_t len;
      char *mp, *cp = NULL, *output = NULL;
      char *tmpbuf = NULL;
      size_t tmpbufsize = 0;
      struct mailname *mn;
+    char errbuf[BUFSIZ];
  
      /*
       * Because these are addresses, we need to handle them individually.
@@ -564,7 +574,8 @@ field_encode_address(const char *name, char **value, int encoding,
      output = add(" ", output);
  
      for (groupflag = 0; (mp = getname(*value)); ) {
-       if ((mn = getm(mp, NULL, 0, AD_HOST, NULL)) == NULL) {
+       if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
+           inform("%s: %s", errbuf, mp);
             errflag++;
             continue;
         }
@@ -607,17 +618,22 @@ field_encode_address(const char *name, char **value, int encoding,
             switch (encoding) {
  
             case CE_BASE64:
-               retval = field_encode_base64(NULL, &mn->m_pers, charset);
+               if (field_encode_base64(NULL, &mn->m_pers, charset)) {
+                   errflag++;
+                   goto out;
+               }
                 break;
  
             case CE_QUOTED:
-               retval = field_encode_quoted(NULL, &mn->m_pers, charset,
-                                            asciichars,
-                                            eightbitchars + specialchars, 1);
+               if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
+                                       eightbitchars + specialchars, 1)) {
+                   errflag++;
+                   goto out;
+               }
                 break;
  
             default:
-               advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+               inform("Internal error: unknown RFC-2047 encoding type");
                 errflag++;
                 goto out;
             }
@@ -638,14 +654,12 @@ field_encode_address(const char *name, char **value, int encoding,
         if (! mn->m_note)
             goto do_reformat;
  
-       len = strlen(mn->m_note);
-
-       if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
+       if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
         }
  
         if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
-           advise(NULL, "Internal error: Invalid note field \"%s\"",
+           inform("Internal error: Invalid note field \"%s\"",
                    mn->m_note);
             errflag++;
             goto out;
@@ -667,17 +681,22 @@ field_encode_address(const char *name, char **value, int encoding,
             switch (encoding) {
  
             case CE_BASE64:
-               retval = field_encode_base64(NULL, &tmpbuf, charset);
+               if (field_encode_base64(NULL, &tmpbuf, charset)) {
+                   errflag++;
+                   goto out;
+               }
                 break;
  
             case CE_QUOTED:
-               retval = field_encode_quoted(NULL, &tmpbuf, charset,
-                                            asciichars,
-                                            eightbitchars + specialchars, 1);
+               if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
+                                       eightbitchars + specialchars, 1)) {
+                   errflag++;
+                   goto out;
+               }
                 break;
  
             default:
-               advise(NULL, "Internal error: unknown RFC-2047 encoding type");
+               inform("Internal error: unknown RFC-2047 encoding type");
                 errflag++;
                 goto out;
             }
@@ -711,13 +730,27 @@ do_reformat:
          * we can use m_text directly.
          */
  
+       /*
+        * If we were in a group but are no longer, make sure we add a
+        * semicolon (which needs to be FIRST, as it needs to be at the end
+        * of the last address).
+        */
+
+       if (groupflag && ! mn->m_ingrp) {
+           output = add(";", output);
+           column++;
+       }
+
+       groupflag = mn->m_ingrp;
+
+       if (mn->m_gname) {
+           cp = mh_xstrdup(mn->m_gname);
+       }
+
         if (reformat) {
-           if (mn->m_gname) {
-               cp = add(mn->m_gname, NULL);
-           }
             cp = add(adrformat(mn), cp);
         } else {
-           cp = add(mn->m_text, NULL);
+           cp = add(mn->m_text, cp);
         }
  
         len = strlen(cp);
@@ -749,28 +782,25 @@ do_reformat:
         column += len;
         free(cp);
         cp = NULL;
+    }
  
-       /*
-        * If we were in a group but are no longer, make sure we add a
-        * trailing semicolon.
-        */
-
-       if (groupflag && ! mn->m_ingrp) {
-           output = add(";", output);
-       }
+    /*
+     * Just in case we're at the end of a list
+     */
  
-       groupflag = mn->m_ingrp;
+    if (groupflag) {
+       output = add(";", output);
      }
  
-   *value = output;
-   output = NULL;
+    output = add("\n", output);
  
-out:
+    free(*value);
+    *value = output;
+    output = NULL;
  
-    if (tmpbuf)
-       free(tmpbuf);
-    if (output)
-       free(output);
+out:
+    free(tmpbuf);
+    free(output);
  
      return errflag > 0;
  }
@@ -790,7 +820,12 @@ scanstring(const char *string, int *asciilen, int *eightbitchars,
      for (; *string != '\0'; string++) {
         if ((isascii((unsigned char) *string))) {
             (*asciilen)++;
-           if (!qphrasevalid((unsigned char) *string))
+           /*
+            * So, a space is not a valid phrase character, but we're counting
+            * an exception here, because in q-p a space can be directly
+            * encoded as an underscore.
+            */
+           if (!qphrasevalid((unsigned char) *string) && *string != ' ')
                 (*specialchars)++;
         } else {
             (*eightbitchars)++;