From: David Levine <levinedl@acm.org>
Date: Sat, 20 Feb 2016 18:41:52 +0000 (-0500)
Subject: Added -decodetypes switch to mhfixmsg(1).
X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/5682db8c0089c419257c612b167771e1685f683e?hp=7f0c3f960c5e3d8817121da1a75139a83130b668

Added -decodetypes switch to mhfixmsg(1).
---

diff --git a/docs/pending-release-notes b/docs/pending-release-notes
index e9f60138..6163ec86 100644
--- a/docs/pending-release-notes
+++ b/docs/pending-release-notes
@@ -25,8 +25,8 @@ NEW FEATURES
 - When building from source, configure will derive ${prefix} from an existing
   nmh installation if it finds one in your $PATH.
 - Added getmymbox and getmyaddr mh-format(5) function escapes.
-- New -changecur, -nochangecur, and -fixtype switches have been added
-  to mhfixmsg(1).
+- New -changecur, -nochangecur, -fixtype, and -decodetypes switches have been
+  added to mhfixmsg(1).
 - mhfixmsg now removes an extraneous trailing semicolon from header
   parameter lists.
 - Added -convertargs switch to repl(1), to pass arguments to programs
diff --git a/man/mhfixmsg.man b/man/mhfixmsg.man
index 84245661..17fca716 100644
--- a/man/mhfixmsg.man
+++ b/man/mhfixmsg.man
@@ -1,4 +1,4 @@
-.TH MHFIXMSG %manext1% "February 8, 2015" "%nmhversion%"
+.TH MHFIXMSG %manext1% "February 20, 2016" "%nmhversion%"
 .\"
 .\" %nmhwarning%
 .\"
@@ -16,6 +16,8 @@ mhfixmsg \- rewrite MIME messages with various transformations
 .RB [ \-decodetext
 8bit/7bit |
 .BR \-nodecodetext ]
+.RB [ \-decodetypes
+.IR "type/[subtype][,...]" ]
 .RB [ \-textcharset
 .I charset
 .RB "| " \-notextcharset ]
@@ -72,6 +74,24 @@ linefeed character is removed from text parts encoded in ASCII,
 ISO-8859-x, UTF-8, or Windows-12xx.
 .PP
 The
+.B \-decodetypes
+switch specifies the message parts, by type and optionally subtype,
+to which
+.B \-decodetext
+applies.  Its argument is a comma-separated list of type/subtype
+elements.  If an element does not contain a subtype, then
+.B \-decodetext
+applies to all subtypes of the type.  The default is
+.B \-decodetypes
+.IR text ;
+it can be overridden, e.g., with
+.B \-decodetypes
+.I text/plain
+to restrict
+.B \-decodetext
+to just text/plain parts.
+.PP
+The
 .B \-textcharset
 switch specifies that all text/plain parts of the message(s)
 should be converted to
@@ -257,6 +277,7 @@ content type and/or encoding as follows:
 .nf
 .ta \w'\-fixboundary 'u
 \-decodetext   base64 and quoted-printable encoded text parts
+\-decodetypes  limits parts to which -decodetext applies
 \-textcharset  text/plain parts
 \-reformat     text parts that are not text/plain
 \-fixboundary  outermost multipart part
@@ -387,6 +408,7 @@ is checked.
 .RB ` +folder "' defaults to the current folder"
 .RB ` msgs "' defaults to cur"
 .RB ` "\-decodetext 8bit"'
+.RB ` "\-decodetypes text"'
 .RB ` \-notextcharset '
 .RB ` \-reformat '
 .RB ` \-noreplacetextplain '
diff --git a/test/mhfixmsg/test-mhfixmsg b/test/mhfixmsg/test-mhfixmsg
index f7f47b90..522c0b89 100755
--- a/test/mhfixmsg/test-mhfixmsg
+++ b/test/mhfixmsg/test-mhfixmsg
@@ -51,6 +51,7 @@ Usage: mhfixmsg [+folder] [msgs] [switches]
   switches are:
   -decodetext 8bit|7bit
   -nodecodetext
+  -decodetypes
   -[no]textcharset
   -[no]reformat
   -[no]replacetextplain
@@ -418,11 +419,11 @@ else
 fi
 
 
-# check -decode (enabled by default)
+# check -decodetext (enabled by default)
 cat >"$expected" <<EOF
 To: recipient@example.com
 From: sender@example.com
-Subject: mhfixmsg decode test
+Subject: mhfixmsg decode test 1
 MIME-Version: 1.0
 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
 
@@ -439,7 +440,7 @@ EOF
 cat >`mhpath new` <<EOF
 To: recipient@example.com
 From: sender@example.com
-Subject: mhfixmsg decode test
+Subject: mhfixmsg decode test 1
 MIME-Version: 1.0
 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
 
@@ -457,7 +458,7 @@ run_prog mhfixmsg last -outfile "$actual"
 check "$expected" "$actual"
 
 
-# check -decode with more complicated content structure
+# check -decodetext with more complicated content structure
 cat >$expected <<EOF
 To: recipient@example.com
 From: sender@example.com
@@ -559,7 +560,7 @@ run_prog mhfixmsg last -outfile "$actual"
 check "$expected" "$actual"
 
 
-# check attempted -decode of binary text
+# check attempted -decodetext of binary text
 #### Generated the encoded text below with:
 ####   $ printf '\x0\xbd\xb2=\xbc\n' | base64
 cat >`mhpath new` <<EOF
@@ -583,7 +584,7 @@ run_prog mhfixmsg last
 check `mhpath last` "$expected" 'keep first'
 
 
-# check -decode of binary text
+# check -decodetext of binary text
 printf "%s\x0d\xbd\xb2=\xbc%s" "To: recipient@example.com
 From: sender@example.com
 Subject: mhfixmsg binary decode test
@@ -1210,6 +1211,133 @@ mhfixmsg -fixtype application/octet-stream last
 check "$expected" `mhpath last`
 
 
+# check default -decodetypes
+cat >$expected <<EOF
+To: recipient@example.com
+From: sender@example.com
+Subject: mhfixmsg decode test 3
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Disposition: attachment; filename="test1.txt"
+Content-MD5: kq+Hnc2SD/eKwAnkFBDuEA==
+Content-Transfer-Encoding: 8bit
+
+This is the text/plain part.
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"; name="test2.txt"
+Content-Disposition: attachment; filename="test2.txt"
+Content-Transfer-Encoding: 8bit
+
+<html>
+<head>
+    This is the text/html part.
+</head>
+</body>
+</html>
+
+------- =_aaaaaaaaaa0--
+EOF
+
+cat >`mhpath new` <<EOF
+To: recipient@example.com
+From: sender@example.com
+Subject: mhfixmsg decode test 3
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Disposition: attachment; filename="test1.txt"
+Content-MD5: kq+Hnc2SD/eKwAnkFBDuEA==
+Content-Transfer-Encoding: quoted-printable
+
+This is the text/plain part.
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"; name="test2.txt"
+Content-Disposition: attachment; filename="test2.txt"
+Content-Transfer-Encoding: base64
+
+PGh0bWw+CjxoZWFkPgogICAgVGhpcyBpcyB0aGUgdGV4dC9odG1sIHBhcnQuCjwvaGVhZD4KPC9i
+b2R5Pgo8L2h0bWw+Cg==
+
+------- =_aaaaaaaaaa0--
+EOF
+run_prog mhfixmsg last -outfile "$actual"
+check "$expected" "$actual"
+
+
+# check -decodetypes text/plain
+cat >$expected <<EOF
+To: recipient@example.com
+From: sender@example.com
+Subject: mhfixmsg decode test 4
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Disposition: attachment; filename="test1.txt"
+Content-MD5: kq+Hnc2SD/eKwAnkFBDuEA==
+Content-Transfer-Encoding: 8bit
+
+This is the text/plain part.
+
+------- =_aaaaaaaaaa0
+Content-Type: multipart/alternative; boundary="----=_nmh-multipart2"
+
+------=_nmh-multipart2
+Content-Type: text/plain; charset="iso-8859-1"; name="test2.txt"
+Content-Transfer-Encoding: 8bit
+
+This is the text/html part.
+
+------=_nmh-multipart2
+Content-Type: text/html; charset="iso-8859-1"; name="test2.txt"
+Content-Disposition: attachment; filename="test2.txt"
+Content-Transfer-Encoding: base64
+
+PGh0bWw+DQo8aGVhZD4NCiAgICBUaGlzIGlzIHRoZSB0ZXh0L2h0bWwgcGFydC4NCjwvaGVhZD4N
+CjwvYm9keT4NCjwvaHRtbD4NCg==
+
+------=_nmh-multipart2--
+
+------- =_aaaaaaaaaa0--
+EOF
+
+cat >`mhpath new` <<EOF
+To: recipient@example.com
+From: sender@example.com
+Subject: mhfixmsg decode test 4
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Disposition: attachment; filename="test1.txt"
+Content-MD5: kq+Hnc2SD/eKwAnkFBDuEA==
+Content-Transfer-Encoding: quoted-printable
+
+This is the text/plain part.
+
+------- =_aaaaaaaaaa0
+Content-Type: text/html; charset="iso-8859-1"; name="test2.txt"
+Content-Disposition: attachment; filename="test2.txt"
+Content-Transfer-Encoding: base64
+
+PGh0bWw+CjxoZWFkPgogICAgVGhpcyBpcyB0aGUgdGV4dC9odG1sIHBhcnQuCjwvaGVhZD4KPC9i
+b2R5Pgo8L2h0bWw+Cg==
+
+------- =_aaaaaaaaaa0--
+EOF
+run_prog mhfixmsg last -outfile "$actual" -decodetypes text/plain
+check "$expected" "$actual"
+
+
 # make sure there are no tmp files left over
 find "$MH_TEST_DIR/Mail" \( -name 'mhfix*' -o -name ',mhfix*' \) -print \
   >"$actual"
diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c
index 8d67e827..f9f001b3 100644
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -16,6 +16,7 @@
 #define MHFIXMSG_SWITCHES \
     X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
     X("nodecodetext", 0, NDECODETEXTSW) \
+    X("decodetypes", 0, DECODETYPESW) \
     X("textcharset", 0, TEXTCHARSETSW) \
     X("notextcharset", 0, NTEXTCHARSETSW) \
     X("reformat", 0, REFORMATSW) \
@@ -80,6 +81,7 @@ typedef struct fix_transformations {
     int reformat;
     int replacetextplain;
     int decodetext;
+    char *decodetypes;
     char *textcharset;
 } fix_transformations;
 
@@ -106,7 +108,8 @@ static CT build_multipart_alt (CT, CT, int, int);
 static int boundary_in_content (FILE **, char *, const char *);
 static void transfer_noncontent_headers (CT, CT);
 static int set_ct_type (CT, int type, int subtype, int encoding);
-static int decode_text_parts (CT, int, int *);
+static int decode_text_parts (CT, int, const char *, int *);
+static int should_decode(const char *, const char *, const char *);
 static int content_encoding (CT, const char **);
 static int strip_crs (CT, int *);
 static int convert_charsets (CT, char *, int *);
@@ -135,6 +138,7 @@ main (int argc, char **argv) {
     fx.fixtypes = NULL;
     fx.replacetextplain = 0;
     fx.decodetext = CE_8BIT;
+    fx.decodetypes = "text";  /* Default to all text content. */
     fx.textcharset = NULL;
 
     if (nmh_init(argv[0], 1)) { return 1; }
@@ -179,6 +183,11 @@ main (int argc, char **argv) {
             case NDECODETEXTSW:
                 fx.decodetext = 0;
                 continue;
+            case DECODETYPESW:
+                if (! (cp = *argp++)  ||  *cp == '-')
+                    adios (NULL, "missing argument to %s", argp[-2]);
+                fx.decodetypes = cp;
+                continue;
             case TEXTCHARSETSW:
                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
                     adios (NULL, "missing argument to %s", argp[-2]);
@@ -445,7 +454,7 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
             ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
     }
     if (status == OK  &&  fx->decodetext) {
-        status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
+        status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
     }
     if (status == OK  &&  fx->textcharset != NULL) {
         status = convert_charsets (*ctp, fx->textcharset, &message_mods);
@@ -1748,11 +1757,15 @@ set_ct_type (CT ct, int type, int subtype, int encoding) {
 
 
 static int
-decode_text_parts (CT ct, int encoding, int *message_mods) {
+decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
     int status = OK;
 
     switch (ct->c_type) {
     case CT_TEXT:
+        if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
+            break;
+        }
+
         switch (ct->c_encoding) {
         case CE_BASE64:
         case CE_QUOTED: {
@@ -1831,7 +1844,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
         /* Should check to see if the body for this part is encoded?
            For now, it gets passed along as-is by InitMultiPart(). */
         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
-            status = decode_text_parts (part->mp_part, encoding, message_mods);
+            status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
         }
         break;
     }
@@ -1840,7 +1853,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
         if (ct->c_subtype == MESSAGE_EXTERNAL) {
             struct exbody *e = (struct exbody *) ct->c_ctparams;
 
-            status = decode_text_parts (e->eb_content, encoding, message_mods);
+            status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
         }
         break;
 
@@ -1852,6 +1865,37 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
 }
 
 
+/* Determine if the part with type[/subtype] should be decoded, according to
+   decodetypes (which came from the -decodetypes switch). */
+static int
+should_decode(const char *decodetypes, const char *type, const char *subtype) {
+    /* Quick search for matching type[/subtype] in decodetypes:  bracket
+       decodetypes with commas, then search for ,type, and ,type/subtype, in
+       it. */
+
+    int found_match = 0;
+    char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
+    char *delimited_type = concat(",", type, ",", NULL);
+
+    if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
+        found_match = 1;
+    } else if (subtype != NULL) {
+        char *delimited_type_subtype =
+            concat(",", type, "/", subtype, ",", NULL);
+
+        if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
+            found_match = 1;
+        }
+        free(delimited_type_subtype);
+    }
+
+    free(delimited_type);
+    free(delimited_decodetypes);
+
+    return found_match;
+}
+
+
 /* See if the decoded content is 7bit, 8bit, or binary.  It's binary
    if it has any NUL characters, a CR not followed by a LF, or lines
    greater than 998 characters in length.  If binary, reason is set