-.TH MHFIXMSG %manext1% "March 12, 2016" "%nmhversion%"
+.TH MHFIXMSG %manext1% "September 22, 2016" "%nmhversion%"
.\"
.\" %nmhwarning%
.\"
.RB \-file
.IR file ]
.RB [ \-decodetext
-8bit/7bit |
+8bit|7bit|binary |
.BR \-nodecodetext ]
.RB [ \-decodetypes
.IR "type/[subtype][,...]" ]
The
.B \-decodetext
switch enables a transformation to decode each base64 and
-quoted-printable text message part to the selected 8bit or 7bit
-encoding. If 7bit is selected for a base64 part but it will only fit
+quoted-printable text message part to the selected 8bit, 7bit, or
+binary encoding.
+If 7bit is selected for a base64 part but it will only fit
8bit, as defined by RFC 2045, then it will be decoded to 8bit
-quoted-printable. Otherwise, if the decoded text would not fit the
-selected encoding, the part is not decoded (and a message will be
+quoted-printable.
+Similarly, with 8bit, if the decoded text would be binary,
+then the part is not decoded (and a message will be
displayed if
.B \-verbose
-is enabled).
+is enabled). Note that
+.B \-decodetext
+binary can produce messages that are not RFC 2045 compliant.
.PP
When the
.B \-decodetext
is 0 if all of the requested transformations are performed, or
non-zero otherwise.
.RB ( mhfixmsg
-will not decode to binary content, but a request to do so is
-not considered a failure, and is noted with
+will not decode to binary content with the default
+.B \-decodetext
+setting, but a request to do so is not considered a failure, and is noted
+with
.BR \-verbose .)
If a problem is detected with any one of multiple messages such that
the return status is non-zero, then none of the messages will be
close them expeditiously. Until that is resolved, it is recommended that
.B mhfixmsg
not be run on a large number of messages at once, as noted in the EXAMPLES above.
-.PP
-As noted in the DESCRIPTION above,
-.B mhfixmsg
-will not decode to binary content. This restriction should be removed at some point. It's
-not due to any issue in
-.BR mhfixmsg ,
-but rather an observation of incorrect behavior by other nmh tools on messages with binary
-content.
cat >"$expected" <<EOF
Usage: mhfixmsg [+folder] [msgs] [switches]
switches are:
- -decodetext 8bit|7bit
+ -decodetext 8bit|7bit|binary
-nodecodetext
-decodetypes
-[no]crlflinebreaks
check "$expected" "$actual"
-# check attempted -decodetext of binary text
+# check attempted (default, 8 bit) -decodetext of binary text
#### Generated the encoded text below with:
-#### $ printf '\x0\xbd\xb2=\xbc\n' | base64
+#### $ printf '\xbd\xb2=\xbc\x00\n' | base64
cat >`mhpath new` <<EOF
To: recipient@example.com
From: sender@example.com
-Subject: mhfixmsg attempted binary decode test
+Subject: mhfixmsg binary decode test
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset="iso-8859-1"; name="nul+square.txt"
+Content-Type: text/plain; charset=UTF-8; name="nul+square.txt"
Content-Transfer-Encoding: base64
-AL2yPbwK
+vbI9vAAK
------- =_aaaaaaaaaa0--
EOF
# check for successful decode of a different part with attempted -decodetext
-# of binary text
-#### Generated the encoded text below with:
-#### $ printf '\x0\xbd\xb2=\xbc\n' | base64
+# of binary (>998 characters) text
cat >$expected <<EOF
To: recipient@example.com
From: sender@example.com
EOF
run_prog mhfixmsg -noreformat last
+check `mhpath last` "$expected" 'keep first'
+
+
+# check for successful decode of a different part with -decodetext of binary
+# (>998 characters) text
+cat >$expected <<EOF
+To: recipient@example.com
+From: sender@example.com
+Subject: mhfixmsg successful decode of text/plain with failed binary decode
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: 8bit
+
+This is a text plain part
+
+------- =_aaaaaaaaaa0
+Content-Type: text/html; charset="ascii"
+Content-Transfer-Encoding: binary
+Content-Disposition: inline
+
+<html><head><title>long line</title></head><body>This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. This line is greater than 998 characters in length, so this part should not be decoded. </body></html>
+
+------- =_aaaaaaaaaa0--
+EOF
+
+run_prog mhfixmsg -noreformat -decodetext binary last
check `mhpath last` "$expected"
-# check -decodetext of binary text
-printf "%s\x0d\xbd\xb2=\xbc%s" "To: recipient@example.com
+# check -decodetext of binary (containing ASCII NUL) text
+printf "%s\xbd\xb2=\xbc\x00%s" "To: recipient@example.com
From: sender@example.com
Subject: mhfixmsg binary decode test
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary=\"----- =_aaaaaaaaaa0\"
------- =_aaaaaaaaaa0
-Content-Type: text/plain; charset=\"iso-8859-1\"; name=\"nul+square.txt\"
+Content-Type: text/plain; charset=UTF-8; name=\"nul+square.txt\"
Content-Transfer-Encoding: binary
" "
------- =_aaaaaaaaaa0--
" >"$expected"
-## output_content() in mhoutsbr.c can't handle binary content.
-## mhfixmsg last -decodetext binary -outfile "$actual"
-## check "$expected" "$actual"
-rm -f "$expected"
-rmm last
+mhfixmsg last -decodetext binary -outfile "$actual"
+check "$expected" "$actual"
# check that -reformat succeeds when decode of binary text fails
cp -p `mhpath last` `mhpath new`
run_test 'mhfixmsg last -nofixboundary' ''
-check "$MH_TEST_DIR"/Mail/inbox/17 "$MH_TEST_DIR"/Mail/inbox/18 'keep first'
+check "$MH_TEST_DIR"/Mail/inbox/18 "$MH_TEST_DIR"/Mail/inbox/19 'keep first'
# check that message is not output when fed through stdin
EOF
run_test 'mhfixmsg last -outfile '"$actual"' -verbose' \
- "mhfixmsg: 17, fix multipart boundary"
+ "mhfixmsg: 18, fix multipart boundary"
check "$expected" "$actual"
EOF
chmod a+x "${MH_TEST_DIR}/Mail/rmmproc"
echo "rmmproc: ${MH_TEST_DIR}/Mail/rmmproc" >>"$MH"
-cp "${MH_TEST_DIR}/Mail/inbox/15" "${MH_TEST_DIR}/Mail/inbox/15.original"
+cp "${MH_TEST_DIR}/Mail/inbox/16" "${MH_TEST_DIR}/Mail/inbox/16.original"
-run_test 'mhfixmsg 15' ''
-check "${MH_TEST_DIR}/Mail/inbox/15.backup" \
- "${MH_TEST_DIR}/Mail/inbox/15.original"
+run_test 'mhfixmsg 16' ''
+check "${MH_TEST_DIR}/Mail/inbox/16.backup" \
+ "${MH_TEST_DIR}/Mail/inbox/16.original"
# check -normmproc
-cp "${MH_TEST_DIR}/Mail/inbox/20" "${MH_TEST_DIR}/Mail/inbox/21"
+cp "${MH_TEST_DIR}/Mail/inbox/21" "${MH_TEST_DIR}/Mail/inbox/22"
-run_test 'mhfixmsg 20 -normmproc'
-check "${MH_TEST_DIR}/Mail/inbox/21" \
- "${MH_TEST_DIR}/Mail/inbox/,20" 'keep first'
+run_test 'mhfixmsg 21 -normmproc'
+check "${MH_TEST_DIR}/Mail/inbox/22" \
+ "${MH_TEST_DIR}/Mail/inbox/,21" 'keep first'
# check -rmmproc
run_test 'mhfixmsg 21 -rmmproc true'
-if test -f '${MH_TEST_DIR}/Mail/inbox/21.backup'; then
+if test -f '${MH_TEST_DIR}/Mail/inbox/22.backup'; then
echo check of mhfixmsg -rmmproc FAILED, should not have created backup file
failed=`expr ${failed:-0} + 1`
fi
# check that input is passed through to output when there's a parse error
# (the charset string is missing its closing quote) with -outfile
cat >"$expected.err" <<EOF
-mhfixmsg: invalid quoted-string in message 30's Content-Type: field
+mhfixmsg: invalid quoted-string in message 31's Content-Type: field
(parameter charset)
-mhfixmsg: unable to parse message 30
+mhfixmsg: unable to parse message 31
EOF
cat >`mhpath new` <<EOF
#include <fcntl.h>
#define MHFIXMSG_SWITCHES \
- X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
+ X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
X("nodecodetext", 0, NDECODETEXTSW) \
X("decodetypes", 0, DECODETYPESW) \
X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
fx.decodetext = CE_8BIT;
} else if (! strcasecmp (cp, "7bit")) {
fx.decodetext = CE_7BIT;
+ } else if (! strcasecmp (cp, "binary")) {
+ fx.decodetext = CE_BINARY;
} else {
adios (NULL, "invalid argument to %s", argp[-2]);
}
}
+/*
+ * It's not necessary to update the charset parameter of a Content-Type
+ * header for a text part. According to RFC 2045 Sec. 6.4, the body
+ * (content) was originally in the specified charset, "and will be in
+ * that character set again after decoding."
+ */
static int
decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
int status = OK;