From: David Levine <levinedl@acm.org>
Date: Thu, 22 Sep 2016 17:07:32 +0000 (-0400)
Subject: Allow -decodetext binary, though 8bit is still the default because
X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/1903d5af62f05f3b6f69a4950734154a1a698dc8?ds=sidebyside;hp=-c

Allow -decodetext binary, though 8bit is still the default because
binary can produce messages that are not RFC 2045 compliant.
---

1903d5af62f05f3b6f69a4950734154a1a698dc8
diff --git a/docs/pending-release-notes b/docs/pending-release-notes
index fce0f5c4..9428772e 100644
--- a/docs/pending-release-notes
+++ b/docs/pending-release-notes
@@ -47,6 +47,7 @@ NEW FEATURES
   domain name in From: header line in message draft.
 - post(8) -snoop now attempts to decode base64-encoded SMTP traffic.
 - folder(1) -nocreate now prints a warning message for a non-existant folder.
+- mhfixmsg(1) now allows -decodetext binary, though 8bit is still the default.
 
 -----------------
 OBSOLETE FEATURES
diff --git a/man/mhfixmsg.man b/man/mhfixmsg.man
index eb9595de..38c322f9 100644
--- a/man/mhfixmsg.man
+++ b/man/mhfixmsg.man
@@ -1,4 +1,4 @@
-.TH MHFIXMSG %manext1% "March 12, 2016" "%nmhversion%"
+.TH MHFIXMSG %manext1% "September 22, 2016" "%nmhversion%"
 .\"
 .\" %nmhwarning%
 .\"
@@ -14,7 +14,7 @@ mhfixmsg \- rewrite MIME messages with various transformations
 .RB \-file
 .IR file ]
 .RB [ \-decodetext
-8bit/7bit |
+8bit|7bit|binary |
 .BR \-nodecodetext ]
 .RB [ \-decodetypes
 .IR "type/[subtype][,...]" ]
@@ -61,14 +61,18 @@ can safely be run multiple times on a message.
 The
 .B \-decodetext
 switch enables a transformation to decode each base64 and
-quoted-printable text message part to the selected 8bit or 7bit
-encoding.  If 7bit is selected for a base64 part but it will only fit
+quoted-printable text message part to the selected 8bit, 7bit, or
+binary encoding.
+If 7bit is selected for a base64 part but it will only fit
 8bit, as defined by RFC 2045, then it will be decoded to 8bit
-quoted-printable.  Otherwise, if the decoded text would not fit the
-selected encoding, the part is not decoded (and a message will be
+quoted-printable.
+Similarly, with 8bit, if the decoded text would be binary,
+then the part is not decoded (and a message will be
 displayed if
 .B \-verbose
-is enabled).
+is enabled).  Note that
+.B \-decodetext
+binary can produce messages that are not RFC 2045 compliant.
 .PP
 When the
 .B \-decodetext
@@ -218,8 +222,10 @@ The return status of
 is 0 if all of the requested transformations are performed, or
 non-zero otherwise.
 .RB ( mhfixmsg
-will not decode to binary content, but a request to do so is
-not considered a failure, and is noted with
+will not decode to binary content with the default
+.B \-decodetext
+setting, but a request to do so is not considered a failure, and is noted
+with
 .BR \-verbose .)
 If a problem is detected with any one of multiple messages such that
 the return status is non-zero, then none of the messages will be
@@ -531,11 +537,3 @@ opens files internally for decoding and character set conversion, and apparently
 close them expeditiously.  Until that is resolved, it is recommended that
 .B mhfixmsg
 not be run on a large number of messages at once, as noted in the EXAMPLES above.
-.PP
-As noted in the DESCRIPTION above,
-.B mhfixmsg
-will not decode to binary content.  This restriction should be removed at some point.  It's
-not due to any issue in
-.BR mhfixmsg ,
-but rather an observation of incorrect behavior by other nmh tools on messages with binary
-content.
diff --git a/test/mhfixmsg/test-mhfixmsg b/test/mhfixmsg/test-mhfixmsg
index 2194c7a2..efe7de7e 100755
--- a/test/mhfixmsg/test-mhfixmsg
+++ b/test/mhfixmsg/test-mhfixmsg
@@ -49,7 +49,7 @@ fi
 cat >"$expected" <<EOF
 Usage: mhfixmsg [+folder] [msgs] [switches]
   switches are:
-  -decodetext 8bit|7bit
+  -decodetext 8bit|7bit|binary
   -nodecodetext
   -decodetypes
   -[no]crlflinebreaks
@@ -561,21 +561,21 @@ run_prog mhfixmsg last -outfile "$actual"
 check "$expected" "$actual"
 
 
-# check attempted -decodetext of binary text
+# check attempted (default, 8 bit) -decodetext of binary text
 #### Generated the encoded text below with:
-####   $ printf '\x0\xbd\xb2=\xbc\n' | base64
+####   $ printf '\xbd\xb2=\xbc\x00\n' | base64
 cat >`mhpath new` <<EOF
 To: recipient@example.com
 From: sender@example.com
-Subject: mhfixmsg attempted binary decode test
+Subject: mhfixmsg binary decode test
 MIME-Version: 1.0
 Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
 
 ------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset="iso-8859-1"; name="nul+square.txt"
+Content-Type: text/plain; charset=UTF-8; name="nul+square.txt"
 Content-Transfer-Encoding: base64
 
-AL2yPbwK
+vbI9vAAK
 
 ------- =_aaaaaaaaaa0--
 EOF
@@ -586,9 +586,7 @@ check `mhpath last` "$expected" 'keep first'
 
 
 # check for successful decode of a different part with attempted -decodetext
-# of binary text
-#### Generated the encoded text below with:
-####   $ printf '\x0\xbd\xb2=\xbc\n' | base64
+# of binary (>998 characters) text
 cat >$expected <<EOF
 To: recipient@example.com
 From: sender@example.com
@@ -672,29 +670,55 @@ ICA8L2JvZHk+PC9odG1sPg==
 EOF
 
 run_prog mhfixmsg -noreformat last
+check `mhpath last` "$expected" 'keep first'
+
+
+# check for successful decode of a different part with -decodetext of binary
+# (>998 characters) text
+cat >$expected <<EOF
+To: recipient@example.com
+From: sender@example.com
+Subject: mhfixmsg successful decode of text/plain with failed binary decode
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: 8bit
+
+This is a text plain part
+
+------- =_aaaaaaaaaa0
+Content-Type: text/html; charset="ascii"
+Content-Transfer-Encoding: binary
+Content-Disposition: inline
+
+<html><head><title>long line</title></head><body>This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  This line is greater than 998 characters in length, so this part should not be decoded.  </body></html>
+
+------- =_aaaaaaaaaa0--
+EOF
+
+run_prog mhfixmsg -noreformat -decodetext binary last
 check `mhpath last` "$expected"
 
 
-# check -decodetext of binary text
-printf "%s\x0d\xbd\xb2=\xbc%s" "To: recipient@example.com
+# check -decodetext of binary (containing ASCII NUL) text
+printf "%s\xbd\xb2=\xbc\x00%s" "To: recipient@example.com
 From: sender@example.com
 Subject: mhfixmsg binary decode test
 MIME-Version: 1.0
 Content-Type: multipart/mixed; boundary=\"----- =_aaaaaaaaaa0\"
 
 ------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset=\"iso-8859-1\"; name=\"nul+square.txt\"
+Content-Type: text/plain; charset=UTF-8; name=\"nul+square.txt\"
 Content-Transfer-Encoding: binary
 
 " "
 
 ------- =_aaaaaaaaaa0--
 " >"$expected"
-## output_content() in mhoutsbr.c can't handle binary content.
-## mhfixmsg last -decodetext binary -outfile "$actual"
-## check "$expected" "$actual"
-rm -f "$expected"
-rmm last
+mhfixmsg last -decodetext binary -outfile "$actual"
+check "$expected" "$actual"
 
 
 # check that -reformat succeeds when decode of binary text fails
@@ -844,7 +868,7 @@ EOF
 cp -p `mhpath last` `mhpath new`
 
 run_test 'mhfixmsg last -nofixboundary' ''
-check "$MH_TEST_DIR"/Mail/inbox/17 "$MH_TEST_DIR"/Mail/inbox/18 'keep first'
+check "$MH_TEST_DIR"/Mail/inbox/18 "$MH_TEST_DIR"/Mail/inbox/19 'keep first'
 
 
 # check that message is not output when fed through stdin
@@ -883,7 +907,7 @@ The boundaries of this part don't match the header boundary.
 EOF
 
 run_test 'mhfixmsg last -outfile '"$actual"' -verbose' \
-         "mhfixmsg: 17, fix multipart boundary"
+         "mhfixmsg: 18, fix multipart boundary"
 check "$expected" "$actual"
 
 
@@ -1298,24 +1322,24 @@ mv "$1" "$1.backup"
 EOF
 chmod a+x "${MH_TEST_DIR}/Mail/rmmproc"
 echo "rmmproc: ${MH_TEST_DIR}/Mail/rmmproc" >>"$MH"
-cp "${MH_TEST_DIR}/Mail/inbox/15" "${MH_TEST_DIR}/Mail/inbox/15.original"
+cp "${MH_TEST_DIR}/Mail/inbox/16" "${MH_TEST_DIR}/Mail/inbox/16.original"
 
-run_test 'mhfixmsg 15' ''
-check "${MH_TEST_DIR}/Mail/inbox/15.backup" \
-      "${MH_TEST_DIR}/Mail/inbox/15.original"
+run_test 'mhfixmsg 16' ''
+check "${MH_TEST_DIR}/Mail/inbox/16.backup" \
+      "${MH_TEST_DIR}/Mail/inbox/16.original"
 
 
 # check -normmproc
-cp "${MH_TEST_DIR}/Mail/inbox/20" "${MH_TEST_DIR}/Mail/inbox/21"
+cp "${MH_TEST_DIR}/Mail/inbox/21" "${MH_TEST_DIR}/Mail/inbox/22"
 
-run_test 'mhfixmsg 20 -normmproc'
-check "${MH_TEST_DIR}/Mail/inbox/21" \
-      "${MH_TEST_DIR}/Mail/inbox/,20" 'keep first'
+run_test 'mhfixmsg 21 -normmproc'
+check "${MH_TEST_DIR}/Mail/inbox/22" \
+      "${MH_TEST_DIR}/Mail/inbox/,21" 'keep first'
 
 
 # check -rmmproc
 run_test 'mhfixmsg 21 -rmmproc true'
-if test -f '${MH_TEST_DIR}/Mail/inbox/21.backup'; then
+if test -f '${MH_TEST_DIR}/Mail/inbox/22.backup'; then
   echo check of mhfixmsg -rmmproc FAILED, should not have created backup file
   failed=`expr ${failed:-0} + 1`
 fi
@@ -1609,9 +1633,9 @@ check "$expected" "$actual"
 # check that input is passed through to output when there's a parse error
 # (the charset string is missing its closing quote) with -outfile
 cat >"$expected.err" <<EOF
-mhfixmsg: invalid quoted-string in message 30's Content-Type: field
+mhfixmsg: invalid quoted-string in message 31's Content-Type: field
           (parameter charset)
-mhfixmsg: unable to parse message 30
+mhfixmsg: unable to parse message 31
 EOF
 
 cat >`mhpath new` <<EOF
diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c
index 5814eaee..4ececa5d 100644
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -14,7 +14,7 @@
 #include <fcntl.h>
 
 #define MHFIXMSG_SWITCHES \
-    X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
+    X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
     X("nodecodetext", 0, NDECODETEXTSW) \
     X("decodetypes", 0, DECODETYPESW) \
     X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
@@ -184,6 +184,8 @@ main (int argc, char **argv) {
                     fx.decodetext = CE_8BIT;
                 } else if (! strcasecmp (cp, "7bit")) {
                     fx.decodetext = CE_7BIT;
+                } else if (! strcasecmp (cp, "binary")) {
+                    fx.decodetext = CE_BINARY;
                 } else {
                     adios (NULL, "invalid argument to %s", argp[-2]);
                 }
@@ -1843,6 +1845,12 @@ set_ct_type (CT ct, int type, int subtype, int encoding) {
 }
 
 
+/*
+ * It's not necessary to update the charset parameter of a Content-Type
+ * header for a text part.  According to RFC 2045 Sec. 6.4, the body
+ * (content) was originally in the specified charset, "and will be in
+ * that character set again after decoding."
+ */
 static int
 decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
     int status = OK;
diff --git a/uip/mhoutsbr.c b/uip/mhoutsbr.c
index 0f3e9c1d..c9974456 100644
--- a/uip/mhoutsbr.c
+++ b/uip/mhoutsbr.c
@@ -126,7 +126,7 @@ output_content (CT ct, FILE *out)
 	    if (output_content (p, out) == NOTOK) {
 		if (boundary && *boundary != '\0')
 		    free(boundary);
-		return NOTOK;
+                return NOTOK;
 	    }
 	}
 	fprintf (out, "\n--%s--\n", boundary);
@@ -187,8 +187,14 @@ output_content (CT ct, FILE *out)
 	    break;
 
 	case CE_BINARY:
-	    advise (NULL, "can't handle binary transfer encoding in content");
-	    result = NOTOK;
+	    if (ct->c_type == CT_TEXT) {
+		/* So that mhfixmsg can decode to binary text. */
+		putc ('\n', out);
+		result = write8Bit (ct, out);
+	    } else {
+		advise (NULL, "can't handle binary transfer encoding in content");
+		result = NOTOK;
+	    }
 	    break;
 
 	default: