Compare character with EOF using signed comparison because

[nmh] / uip / mhfixmsg.c
diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c

index 4c0a3ef53f6e1135427cc3989f246c17718f571d..f00fc97d06a71f5329f4270991abefe14515cb02 100644 (file)
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -16,8 +16,8 @@
  #define MHFIXMSG_SWITCHES \
      X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
      X("nodecodetext", 0, NDECODETEXTSW) \
-    X("textcodeset", 0, TEXTCODESETSW) \
-    X("notextcodeset", 0, NTEXTCODESETSW) \
+    X("textcharset", 0, TEXTCHARSETSW) \
+    X("notextcharset", 0, NTEXTCHARSETSW) \
      X("reformat", 0, REFORMATSW) \
      X("noreformat", 0, NREFORMATSW) \
      X("replacetextplain", 0, REPLACETEXTPLAINSW) \
@@ -78,14 +78,14 @@ typedef struct fix_transformations {
      int reformat;
      int replacetextplain;
      int decodetext;
-    char *textcodeset;
+    char *textcharset;
  } fix_transformations;
  
  int mhfixmsgsbr (CT *, const fix_transformations *, char *);
  static void reverse_alternative_parts (CT);
  static int fix_boundary (CT *, int *);
  static int get_multipart_boundary (CT, char **);
-static int replace_boundary (CT, char *, const char *);
+static int replace_boundary (CT, char *, char *);
  static int fix_multipart_cte (CT, int *);
  static int set_ce (CT, int);
  static int ensure_text_plain (CT *, CT, int *, int);
@@ -100,9 +100,9 @@ static int boundary_in_content (FILE **, char *, const char *);
  static void transfer_noncontent_headers (CT, CT);
  static int set_ct_type (CT, int type, int subtype, int encoding);
  static int decode_text_parts (CT, int, int *);
-static int content_encoding (CT);
+static int content_encoding (CT, const char **);
  static int strip_crs (CT, int *);
-static int convert_codesets (CT, char *, int *);
+static int convert_charsets (CT, char *, int *);
  static int write_content (CT, char *, char *, int, int);
  static int remove_file (char *);
  static void report (char *, char *, char *, ...);
@@ -125,7 +125,7 @@ main (int argc, char **argv) {
      fx.reformat = fx.fixcte = fx.fixboundary = 1;
      fx.replacetextplain = 0;
      fx.decodetext = CE_8BIT;
-    fx.textcodeset = NULL;
+    fx.textcharset = NULL;
  
      if (nmh_init(argv[0], 1)) { return 1; }
  
@@ -169,13 +169,13 @@ main (int argc, char **argv) {
              case NDECODETEXTSW:
                  fx.decodetext = 0;
                  continue;
-            case TEXTCODESETSW:
+            case TEXTCHARSETSW:
                  if (! (cp = *argp++) || (*cp == '-' && cp[1]))
                      adios (NULL, "missing argument to %s", argp[-2]);
-                fx.textcodeset = cp;
+                fx.textcharset = cp;
                  continue;
-            case NTEXTCODESETSW:
-                fx.textcodeset = 0;
+            case NTEXTCHARSETSW:
+                fx.textcharset = 0;
                  continue;
              case FIXBOUNDARYSW:
                  fx.fixboundary = 1;
@@ -408,8 +408,8 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
      if (status == OK  &&  fx->decodetext) {
          status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
      }
-    if (status == OK  &&  fx->textcodeset != NULL) {
-        status = convert_codesets (*ctp, fx->textcodeset, &message_mods);
+    if (status == OK  &&  fx->textcharset != NULL) {
+        status = convert_charsets (*ctp, fx->textcharset, &message_mods);
      }
  
      if (! (*ctp)->c_umask) {
@@ -622,7 +622,7 @@ get_multipart_boundary (CT ct, char **part_boundary) {
  
  /* Open and copy ct->c_file to file, replacing the multipart boundary. */
  static int
-replace_boundary (CT ct, char *file, const char *boundary) {
+replace_boundary (CT ct, char *file, char *boundary) {
      FILE *fpin, *fpout;
      int compnum, state;
      char buf[BUFSIZ], name[NAMESZ];
@@ -668,10 +668,22 @@ replace_boundary (CT ct, char *file, const char *boundary) {
              if (strcasecmp (TYPE_FIELD, np)) {
                  fprintf (fpout, "%s:%s", np, vp);
              } else {
-                char *new_boundary = update_attr (vp, "boundary=", boundary);
-
-                fprintf (fpout, "%s:%s\n", np, new_boundary);
-                free (new_boundary);
+               char *new_ctline, *new_params;
+
+               replace_param(&ct->c_ctinfo.ci_first_pm,
+                             &ct->c_ctinfo.ci_last_pm, "boundary",
+                             boundary, 0);
+
+               new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
+                                   ct->c_ctinfo.ci_subtype, NULL);
+               new_params = output_params(strlen(TYPE_FIELD) +
+                                          strlen(new_ctline) + 1,
+                                          ct->c_ctinfo.ci_first_pm, NULL, 0);
+                fprintf (fpout, "%s:%s%s\n", np, new_ctline,
+                        new_params ? new_params : "");
+               free(new_ctline);
+               if (new_params)
+                   free(new_params);
              }
  
              free (vp);
@@ -1028,19 +1040,19 @@ divide_part (CT ct) {
  
  static void
  copy_ctinfo (CI dest, CI src) {
-    char **s_ap, **d_ap, **s_vp, **d_vp;
+    PM s_pm, d_pm;
  
      dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
      dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
  
-    for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs,
-             s_vp = src->ci_values, d_vp = dest->ci_values;
-         *s_ap;
-         ++s_ap, ++d_ap, ++s_vp, ++d_vp) {
-        *d_ap = add (*s_ap, NULL);
-        *d_vp = *s_vp;
+    for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
+       d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
+                        s_pm->pm_value, 0);
+       if (s_pm->pm_charset)
+           d_pm->pm_charset = getcpy(s_pm->pm_charset);
+       if (s_pm->pm_lang)
+           d_pm->pm_lang = getcpy(s_pm->pm_lang);
      }
-    *d_ap = NULL;
  
      dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
      dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
@@ -1151,11 +1163,11 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
  static int
  charset_encoding (CT ct) {
      /* norm_charmap() is case sensitive. */
-    char *codeset = upcase (content_charset (ct));
+    char *charset = upcase (content_charset (ct));
      int encoding =
-        strcmp (norm_charmap (codeset), "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
+        strcmp (norm_charmap (charset), "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
  
-    free (codeset);
+    free (charset);
      return encoding;
  }
  
@@ -1169,7 +1181,6 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
      CT ct;
      struct part *p;
      struct multipart *m;
-    char *cp;
      const struct str2init *ctinit;
  
      if ((ct = (CT) calloc (1, sizeof *ct)) == NULL)
@@ -1269,14 +1280,8 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
          ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
      }
  
-    name = concat (" ", typename, "/", subtypename, boundary_indicator,
-                   boundary, NULL);
-    if ((cp = strstr (name, boundary_indicator))) {
-        ct->c_ctinfo.ci_attrs[0] = name;
-        ct->c_ctinfo.ci_attrs[1] = NULL;
-        /* ci_values don't get free'd, so point into ci_attrs. */
-        ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator);
-    }
+    add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
+             "boundary", boundary, 0);
  
      p = (struct part *) mh_xmalloc (sizeof *p);
      p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
@@ -1430,16 +1435,19 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
              int ct_encoding;
  
              if (decode_part (ct) == OK  &&  ct->c_cefile.ce_file) {
-                if ((ct_encoding = content_encoding (ct)) == CE_BINARY  &&
-                    encoding != CE_BINARY) {
+                const char *reason = NULL;
+
+                if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
+                    &&  encoding != CE_BINARY) {
                      /* The decoding isn't acceptable so discard it.
                         Leave status as OK to allow other transformations. */
                      if (verbosw) {
                          report (ct->c_partno, ct->c_file,
-                                "will not decode%s because it is binary",
+                                "will not decode%s because it is binary (%s)",
                                  ct->c_partno  ?  ""
                                                :  ct->c_ctline  ?  ct->c_ctline
-                                                               :  "");
+                                                               :  "",
+                                reason);
                      }
                      (void) m_unlink (ct->c_cefile.ce_file);
                      free (ct->c_cefile.ce_file);
@@ -1523,9 +1531,10 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
  
  /* See if the decoded content is 7bit, 8bit, or binary.  It's binary
     if it has any NUL characters, a CR not followed by a LF, or lines
-   greater than 998 characters in length. */
+   greater than 998 characters in length.  If binary, reason is set
+   to a string explaining why. */
  static int
-content_encoding (CT ct) {
+content_encoding (CT ct, const char **reason) {
      CE ce = &ct->c_cefile;
      int encoding = CE_7BIT;
  
@@ -1550,6 +1559,16 @@ content_encoding (CT ct) {
                  if (*cp == '\0'  ||  ++line_len > 998  ||
                      (*cp != '\n'  &&  last_char_was_cr)) {
                      encoding = CE_BINARY;
+                    if (*cp == '\0') {
+                        *reason = "null character";
+                    } else if (line_len > 998) {
+                        *reason = "line length > 998";
+                    } else if (*cp != '\n'  &&  last_char_was_cr) {
+                        *reason = "CR not followed by LF";
+                    } else {
+                        /* Should not reach this. */
+                        *reason = "";
+                    }
                      break;
                  } else if (*cp == '\n') {
                      line_len = 0;
@@ -1572,18 +1591,18 @@ content_encoding (CT ct) {
  static int
  strip_crs (CT ct, int *message_mods) {
      /* norm_charmap() is case sensitive. */
-    char *codeset = upcase (content_charset (ct));
+    char *charset = upcase (content_charset (ct));
      int status = OK;
  
      /* Only strip carriage returns if content is ASCII or another
-       codeset that has the same readily recognizable CR followed by a
+       charset that has the same readily recognizable CR followed by a
         LF.  We can include UTF-8 here because if the high-order bit of
         a UTF-8 byte is 0, then it must be a single-byte ASCII
         character. */
-    if (! strcmp (norm_charmap (codeset), "US-ASCII")  ||
-        ! strncmp (norm_charmap (codeset), "ISO-8859-", 9)  ||
-        ! strncmp (norm_charmap (codeset), "UTF-8", 5)  ||
-        ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) {
+    if (! strcmp (norm_charmap (charset), "US-ASCII")  ||
+        ! strncmp (norm_charmap (charset), "ISO-8859-", 9)  ||
+        ! strncmp (norm_charmap (charset), "UTF-8", 5)  ||
+        ! strncmp (norm_charmap (charset), "WINDOWS-12", 10)) {
          char **file = NULL;
          FILE **fp = NULL;
          size_t begin;
@@ -1645,7 +1664,7 @@ strip_crs (CT ct, int *message_mods) {
              if (has_crs) {
                  int fd;
                  char *stripped_content_file;
-                char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL); 
+                char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
  
                  if (tempfile == NULL) {
                      adios (NULL, "unable to create temporary file in %s",
@@ -1707,22 +1726,22 @@ strip_crs (CT ct, int *message_mods) {
          }
      }
  
-    free (codeset);
+    free (charset);
      return status;
  }
  
  
  static int
-convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
+convert_charsets (CT ct, char *dest_charset, int *message_mods) {
      int status = OK;
  
      switch (ct->c_type) {
      case CT_TEXT:
          if (ct->c_subtype == TEXT_PLAIN) {
-            status = convert_charset (ct, dest_codeset, message_mods);
+            status = convert_charset (ct, dest_charset, message_mods);
              if (verbosw  &&  status == OK) {
                 report (ct->c_partno, ct->c_file, "convert %s to %s",
-                       content_charset(ct), dest_codeset);
+                       content_charset(ct), dest_charset);
              }
          }
          break;
@@ -1735,7 +1754,7 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
             For now, it gets passed along as-is by InitMultiPart(). */
          for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
              status =
-                convert_codesets (part->mp_part, dest_codeset, message_mods);
+                convert_charsets (part->mp_part, dest_charset, message_mods);
          }
          break;
      }
@@ -1746,7 +1765,7 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
  
              e = (struct exbody *) ct->c_ctparams;
              status =
-                convert_codesets (e->eb_content, dest_codeset, message_mods);
+                convert_charsets (e->eb_content, dest_charset, message_mods);
          }
          break;