seq_nameok(): Distinguish three identical error messages.

[nmh] / uip / mhfixmsg.c
diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c

index 25116206e92f6fda792915ce2af05858e28c2db3..df0306552f755a817208a8fcdbcb02b4d9a3d3ac 100644 (file)
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -5,13 +5,38 @@
   * distribution for complete copyright information.
   */
  
-#include <h/mh.h>
-#include <h/fmt_scan.h>
-#include <h/mime.h>
-#include <h/mhparse.h>
+#include "h/mh.h"
+#include "sbr/m_name.h"
+#include "sbr/m_gmprot.h"
+#include "sbr/m_getfld.h"
+#include "sbr/getarguments.h"
+#include "sbr/concat.h"
+#include "sbr/seq_setprev.h"
+#include "sbr/seq_setcur.h"
+#include "sbr/seq_save.h"
+#include "sbr/smatch.h"
+#include "sbr/fmt_rfc2047.h"
+#include "sbr/cpydata.h"
+#include "sbr/trimcpy.h"
+#include "sbr/m_convert.h"
+#include "sbr/m_backup.h"
+#include "sbr/getfolder.h"
+#include "sbr/folder_read.h"
+#include "sbr/context_save.h"
+#include "sbr/context_replace.h"
+#include "sbr/context_find.h"
+#include "sbr/readconfig.h"
+#include "sbr/ambigsw.h"
+#include "sbr/path.h"
+#include "sbr/print_version.h"
+#include "sbr/print_help.h"
+#include "sbr/error.h"
+#include "h/fmt_scan.h"
+#include "h/mime.h"
+#include "h/mhparse.h"
  #include "h/done.h"
-#include <h/utils.h>
-#include <h/signals.h>
+#include "h/utils.h"
+#include "h/signals.h"
  #include "sbr/m_maildir.h"
  #include "sbr/m_mktemp.h"
  #include "sbr/mime_type.h"
@@ -25,6 +50,8 @@
      X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
      X("nodecodetext", 0, NDECODETEXTSW) \
      X("decodetypes", 0, DECODETYPESW) \
+    X("decodeheaderfieldbodies utf-8", 0, DECODEHEADERFIELDBODIESSW) \
+    X("nodecodeheaderfieldbodies", 0, NDECODEHEADERFIELDBODIESSW) \
      X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
      X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
      X("textcharset", 0, TEXTCHARSETSW) \
@@ -74,6 +101,7 @@ typedef struct fix_transformations {
      int replacetextplain;
      int decodetext;
      char *decodetypes;
+    char *decodeheaderfieldbodies; /* Either NULL or "utf-8". */
      /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
      int lf_line_endings;
      char *textcharset;
@@ -112,6 +140,7 @@ static int least_restrictive_encoding (CT) PURE;
  static int less_restrictive (int, int);
  static int convert_charsets (CT, char *, int *);
  static int fix_always (CT, int *);
+static int decode_header_field_bodies (CT, int *);
  static int fix_filename_param (char *, char *, PM *, PM *);
  static int fix_filename_encoding (CT);
  static int write_content (CT, const char *, char *, FILE *, int, int);
@@ -123,7 +152,8 @@ static void pipeser (int);
  
  
  int
-main (int argc, char **argv) {
+main (int argc, char **argv)
+{
      int msgnum;
      char *cp, *file = NULL, *folder = NULL;
      char *maildir = NULL, buf[100], *outfile = NULL;
@@ -141,6 +171,7 @@ main (int argc, char **argv) {
      fx.replacetextplain = 0;
      fx.decodetext = CE_8BIT;
      fx.decodetypes = "text,application/ics";  /* Default, per man page. */
+    fx.decodeheaderfieldbodies = NULL;
      fx.lf_line_endings = 0;
      fx.textcharset = NULL;
  
@@ -193,6 +224,21 @@ main (int argc, char **argv) {
                  }
                  fx.decodetypes = cp;
                  continue;
+            case DECODEHEADERFIELDBODIESSW:
+                if (! (cp = *argp++)  ||  *cp == '-') {
+                    die("missing argument to %s", argp[-2]);
+                }
+                fx.decodeheaderfieldbodies = cp;
+                if (strcasecmp (cp, "utf-8")  && strcasecmp (cp, "utf8")) {
+                    /* Because UTF-8 strings can't have embedded nulls.  Other
+                       encodings support that, too, but we won't bother to
+                       enumerate them. */
+                    die("-decodeheaderfieldbodies only supports utf-8");
+                }
+                continue;
+            case NDECODEHEADERFIELDBODIESSW:
+                fx.decodeheaderfieldbodies = NULL;
+                continue;
              case CRLFLINEBREAKSSW:
                  fx.lf_line_endings = 0;
                  continue;
@@ -517,7 +563,8 @@ main (int argc, char **argv) {
   */
  static int
  mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
-             FILE **infp, char *outfile, FILE **outfp) {
+             FILE **infp, char *outfile, FILE **outfp)
+{
      /* Store input filename in case one of the transformations, i.e.,
         fix_boundary(), rewrites to a tmp file. */
      char *input_filename = maildir
@@ -571,6 +618,9 @@ mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
                                      &message_mods);
          update_cte (*ctp);
      }
+    if (status == OK  &&  fx->decodeheaderfieldbodies) {
+        status = decode_header_field_bodies(*ctp, &message_mods);
+    }
      if (status == OK  &&  fx->textcharset != NULL) {
          status = convert_charsets (*ctp, fx->textcharset, &message_mods);
      }
@@ -624,7 +674,8 @@ mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
   */
  static int
  copy_input_to_output (const char *input_filename, FILE *infp,
-                      const char *output_filename, FILE *outfp) {
+                      const char *output_filename, FILE *outfp)
+{
      int in = fileno (infp);
      int out = fileno (outfp);
      int status = OK;
@@ -643,7 +694,8 @@ copy_input_to_output (const char *input_filename, FILE *infp,
   * Fix mismatched outer level boundary.
   */
  static int
-fix_boundary (CT *ct, int *message_mods) {
+fix_boundary (CT *ct, int *message_mods)
+{
      struct multipart *mp;
      int status = OK;
  
@@ -719,7 +771,8 @@ fix_boundary (CT *ct, int *message_mods) {
   * Find boundary at end of multipart.
   */
  static int
-get_multipart_boundary (CT ct, char **part_boundary) {
+get_multipart_boundary (CT ct, char **part_boundary)
+{
      char buffer[NMH_BUFSIZ];
      char *end_boundary = NULL;
      off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
@@ -806,7 +859,8 @@ get_multipart_boundary (CT ct, char **part_boundary) {
   * Open and copy ct->c_file to file, replacing the multipart boundary.
   */
  static int
-replace_boundary (CT ct, char *file, char *boundary) {
+replace_boundary (CT ct, char *file, char *boundary)
+{
      FILE *fpin, *fpout;
      int compnum, state;
      char buf[NMH_BUFSIZ], name[NAMESZ];
@@ -913,7 +967,8 @@ replace_boundary (CT ct, char *file, char *boundary) {
   * Fix Content-Type header to reflect the content of its part.
   */
  static int
-fix_types (CT ct, svector_t fixtypes, int *message_mods) {
+fix_types (CT ct, svector_t fixtypes, int *message_mods)
+{
      int status = OK;
  
      switch (ct->c_type) {
@@ -1027,7 +1082,8 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) {
   * Replace a substring, allocating space to hold the new one.
   */
  char *
-replace_substring (char **str, const char *old, const char *new) {
+replace_substring (char **str, const char *old, const char *new)
+{
      char *cp;
  
      if ((cp = strstr (*str, old))) {
@@ -1056,7 +1112,8 @@ replace_substring (char **str, const char *old, const char *new) {
   * Remove a name=value parameter, given just its name, from a header value.
   */
  char *
-remove_parameter (char *str, const char *name) {
+remove_parameter (char *str, const char *name)
+{
      /* It looks to me, based on the BNF in RFC 2045, than there can't
         be whitespace between the parameter name and the "=", or
         between the "=" and the parameter value. */
@@ -1105,7 +1162,8 @@ remove_parameter (char *str, const char *name) {
   * 8 bit.
   */
  static int
-fix_composite_cte (CT ct, int *message_mods) {
+fix_composite_cte (CT ct, int *message_mods)
+{
      int status = OK;
  
      if (ct->c_type == CT_MESSAGE  ||  ct->c_type == CT_MULTIPART) {
@@ -1177,7 +1235,8 @@ fix_composite_cte (CT ct, int *message_mods) {
   * Set content encoding.
   */
  static int
-set_ce (CT ct, int encoding) {
+set_ce (CT ct, int encoding)
+{
      const char *ce = ce_str (encoding);
      const struct str2init *ctinit = get_ce_method (ce);
  
@@ -1234,7 +1293,8 @@ set_ce (CT ct, int encoding) {
   * Make sure each text part has a corresponding text/plain part.
   */
  static int
-ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
+ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
+{
      int status = OK;
  
      switch ((*ct)->c_type) {
@@ -1395,7 +1455,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
   */
  static int
  find_textplain_sibling (CT parent, int replacetextplain,
-                        int *new_subpart_number) {
+                        int *new_subpart_number)
+{
      struct multipart *mp = (struct multipart *) parent->c_ctparams;
      struct part *part, *prev;
      bool has_text_plain = false;
@@ -1436,7 +1497,8 @@ find_textplain_sibling (CT parent, int replacetextplain,
   * Insert a new text/plain part.
   */
  static int
-insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
+insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
+{
      struct multipart *mp = (struct multipart *) parent->c_ctparams;
      struct part *new_part;
  
@@ -1465,7 +1527,8 @@ insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
   * Create a text/plain part to go along with non-plain sibling part.
   */
  static CT
-build_text_plain_part (CT encoded_part) {
+build_text_plain_part (CT encoded_part)
+{
      CT tp_part = divide_part (encoded_part);
      char *tmp_plain_file = NULL;
  
@@ -1503,7 +1566,8 @@ build_text_plain_part (CT encoded_part) {
   * Slip new text/plain part into a new multipart/alternative.
   */
  static int
-insert_into_new_mp_alt (CT *ct, int *message_mods) {
+insert_into_new_mp_alt (CT *ct, int *message_mods)
+{
      CT tp_part = build_text_plain_part (*ct);
      int status = OK;
  
@@ -1543,7 +1607,8 @@ insert_into_new_mp_alt (CT *ct, int *message_mods) {
   * Clone a MIME part.
   */
  static CT
-divide_part (CT ct) {
+divide_part (CT ct)
+{
      CT new_part;
  
      NEW0(new_part);
@@ -1573,7 +1638,8 @@ divide_part (CT ct) {
   * Copy the content info from one part to another.
   */
  static void
-copy_ctinfo (CI dest, CI src) {
+copy_ctinfo (CI dest, CI src)
+{
      PM s_pm, d_pm;
  
      dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
@@ -1599,7 +1665,8 @@ copy_ctinfo (CI dest, CI src) {
   * Decode content.
   */
  static int
-decode_part (CT ct) {
+decode_part (CT ct)
+{
      char *tmp_decoded;
      int status;
      FILE *file;
@@ -1629,7 +1696,8 @@ decode_part (CT ct) {
   * be in the future for other than text types.
   */
  static int
-reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
+reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
+{
      int output_subtype, output_encoding;
      const char *reason = NULL;
      char *cp, *cf;
@@ -1695,7 +1763,8 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
   * Fill in a multipart/alternative part.
   */
  static CT
-build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
+build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
+{
      char *boundary_prefix = "----=_nmh-multipart";
      char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
      char *boundary_indicator = "; boundary=";
@@ -1831,7 +1900,8 @@ return_null:
   * Check that the boundary does not appear in the content.
   */
  static int
-boundary_in_content (FILE **fp, char *file, const char *boundary) {
+boundary_in_content (FILE **fp, char *file, const char *boundary)
+{
      char buffer[NMH_BUFSIZ];
      size_t bytes_read;
      bool found_boundary = false;
@@ -1858,7 +1928,8 @@ boundary_in_content (FILE **fp, char *file, const char *boundary) {
   * Remove all non-Content headers.
   */
  static void
-transfer_noncontent_headers (CT old, CT new) {
+transfer_noncontent_headers (CT old, CT new)
+{
      HF hp, hp_prev;
  
      hp_prev = hp = old->c_first_hf;
@@ -1902,7 +1973,8 @@ transfer_noncontent_headers (CT old, CT new) {
   * Set content type.
   */
  static int
-set_ct_type (CT ct, int type, int subtype, int encoding) {
+set_ct_type (CT ct, int type, int subtype, int encoding)
+{
      char *typename = ct_type_str (type);
      char *subtypename = ct_subtype_str (type, subtype);
      /* E.g, " text/plain" */
@@ -1963,7 +2035,8 @@ set_ct_type (CT ct, int type, int subtype, int encoding) {
   */
  static int
  decode_text_parts (CT ct, int encoding, const char *decodetypes,
-                   int *message_mods) {
+                   int *message_mods)
+{
      int status = OK;
      int lf_line_endings = 0;
  
@@ -2083,7 +2156,8 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes,
   * decodetypes (which came from the -decodetypes switch).
   */
  static int
-should_decode(const char *decodetypes, const char *type, const char *subtype) {
+should_decode(const char *decodetypes, const char *type, const char *subtype)
+{
      /* Quick search for matching type[/subtype] in decodetypes:  bracket
         decodetypes with commas, then search for ,type, and ,type/subtype, in
         it. */
@@ -2118,7 +2192,8 @@ should_decode(const char *decodetypes, const char *type, const char *subtype) {
   *  to a string explaining why.
   */
  static int
-content_encoding (CT ct, const char **reason) {
+content_encoding (CT ct, const char **reason)
+{
      CE ce = &ct->c_cefile;
      int encoding = CE_7BIT;
  
@@ -2177,7 +2252,8 @@ content_encoding (CT ct, const char **reason) {
   * Strip carriage returns from content.
   */
  static int
-strip_crs (CT ct, int *message_mods) {
+strip_crs (CT ct, int *message_mods)
+{
      char *charset = content_charset (ct);
      int status = OK;
  
@@ -2331,7 +2407,8 @@ strip_crs (CT ct, int *message_mods) {
   * of the part C-T-E's.
   */
  static void
-update_cte (CT ct) {
+update_cte (CT ct)
+{
      const int least_restrictive_enc = least_restrictive_encoding (ct);
  
      if (least_restrictive_enc != CE_UNKNOWN  &&
@@ -2360,7 +2437,8 @@ update_cte (CT ct) {
   * within a message.
   */
  static int
-least_restrictive_encoding (CT ct) {
+least_restrictive_encoding (CT ct)
+{
      int encoding = CE_UNKNOWN;
  
      switch (ct->c_type) {
@@ -2408,7 +2486,8 @@ least_restrictive_encoding (CT ct) {
   *   CE_8BIT is less restrictive than CE_7BIT.
   */
  static int
-less_restrictive (int encoding, int second_encoding) {
+less_restrictive (int encoding, int second_encoding)
+{
      switch (second_encoding) {
      case CE_BINARY:
          return encoding != CE_BINARY;
@@ -2427,7 +2506,8 @@ less_restrictive (int encoding, int second_encoding) {
   * Convert character set of each part.
   */
  static int
-convert_charsets (CT ct, char *dest_charset, int *message_mods) {
+convert_charsets (CT ct, char *dest_charset, int *message_mods)
+{
      int status = OK;
  
      switch (ct->c_type) {
@@ -2492,7 +2572,8 @@ convert_charsets (CT ct, char *dest_charset, int *message_mods) {
   *    headers, respectively.
   */
  static int
-fix_always (CT ct, int *message_mods) {
+fix_always (CT ct, int *message_mods)
+{
      int status = OK;
  
      switch (ct->c_type) {
@@ -2570,11 +2651,72 @@ fix_always (CT ct, int *message_mods) {
  }
  
  
+/*
+ * Decodes UTF-8 encoded header values.  Similar to fix_filename_param(), but
+ * does not modify any MIME parameter values.
+ */
+static int
+decode_header_field_bodies (CT ct, int *message_mods)
+{
+    int status = OK;
+
+    switch (ct->c_type) {
+    case CT_MULTIPART: {
+        struct multipart *m = (struct multipart *) ct->c_ctparams;
+        struct part *part;
+
+        for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
+            status = decode_header_field_bodies (part->mp_part, message_mods);
+        }
+        break;
+    }
+
+    case CT_MESSAGE:
+        if (ct->c_subtype == MESSAGE_EXTERNAL) {
+            struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+            status = decode_header_field_bodies (e->eb_content, message_mods);
+        }
+        break;
+    }
+
+    HF hf;
+
+    for (hf = ct->c_first_hf; hf; hf = hf->next) {
+        /* Only decode UTF-8 values. */
+        if (hf->value  &&  has_suffix(hf->value, "?=\n")  &&
+            (! strncasecmp (hf->value, " =?utf8?", 8)  ||
+             ! strncasecmp (hf->value, " =?utf-8?", 9))) {
+            /* Looks like an RFC 2047 encoded parameter. */
+            char decoded[PATH_MAX + 1];
+
+            if (decode_rfc2047 (hf->value, decoded, sizeof decoded)) {
+                const size_t len = strlen(decoded);
+
+                /* decode_rfc2047() could truncate if the buffer fills up.
+                   Detect and discard if that happened. */
+                if (len < sizeof(decoded) - 1  &&  strcmp(hf->value, decoded)) {
+                    hf->value = mh_xrealloc (hf->value, len + 1);
+                    strncpy (hf->value, decoded, len + 1);
+                    ++*message_mods;
+                }
+            } else {
+                inform("failed to decode %s parameter %s", hf->name, hf->value);
+                status = NOTOK;
+            }
+        }
+    }
+
+    return status;
+}
+
+
  /*
   * Factor out common code for loops in fix_filename_encoding().
   */
  static int
-fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
+fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
+{
      bool fixed = false;
  
      if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
@@ -2600,7 +2742,8 @@ fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
   * headers, respectively.
   */
  static int
-fix_filename_encoding (CT ct) {
+fix_filename_encoding (CT ct)
+{
      PM pm;
      HF hf;
      int fixed = 0;
@@ -2666,7 +2809,8 @@ fix_filename_encoding (CT ct) {
   */
  static int
  write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
-               int modify_inplace, int message_mods) {
+               int modify_inplace, int message_mods)
+{
      int status = OK;
  
      if (modify_inplace) {
@@ -2743,7 +2887,8 @@ write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
   * function to do it.  It touches the parts the decodetypes identifies.
   */
  static void
-set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
+set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
+{
      switch (ct->c_type) {
      case CT_MULTIPART: {
          struct multipart *m = (struct multipart *) ct->c_ctparams;
@@ -2779,7 +2924,8 @@ set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
   * use the standard MH backup file.
   */
  static int
-remove_file (const char *file) {
+remove_file (const char *file)
+{
      if (rmmproc) {
          char *rmm_command = concat (rmmproc, " ", file, NULL);
          int status = system (rmm_command);
@@ -2798,7 +2944,8 @@ remove_file (const char *file) {
   * Output formatted message to user.
   */
  static void
-report (char *what, char *partno, char *filename, char *message, ...) {
+report (char *what, char *partno, char *filename, char *message, ...)
+{
      va_list args;
      char *fmt;