-.TH MHFIXMSG %manext1% 2016-11-08 "%nmhversion%"
+.TH MHFIXMSG %manext1% 2018-01-14 "%nmhversion%"
.
.\" %nmhwarning%
.
.BR \-nodecodetext ]
.RB [ \-decodetypes
.IR "type/[subtype][,...]" ]
+.RB [ \-decodeheaderfieldbodies
+utf-8 |
+.BR \-nodecodeheaderfieldbodies ]
.RB [ \-crlflinebreaks " | " \-nocrlflinebreaks ]
.RB [ \-textcharset
.I charset
.B \-decodetext
to just text/plain parts.
.PP
+The
+.B \-decodeheaderfieldbodies
+switch enables decoding of UTF-8 header field bodies, when supplied
+with its mandatory
+.I utf-8
+argument. The
+.B \-nodecodeheaderfieldbodies
+inhibits this transformation. The transformation can produce a message
+that does not conform with RFC 2047, §1, paragraph 6, because the decoded
+header field body could contain unencoded non-ASCII characters. It is
+therefore not enabled by default.
+.PP
By default, carriage return characters are preserved or inserted at
the end of each line of text content. The
.B \-crlflinebreaks
switch causes carriage return characters to be stripped from, and not
inserted in, text content when it is decoded and encoded. Note that
its use can cause the generation of MIME messages that do not conform
-to RFC 2046, §4.1.1, paragraph 1.
+with RFC 2046, §4.1.1, paragraph 1.
.PP
The
.B \-textcharset
.PP
.RS 5
.nf
-.ta \w'\-crlflinebreaks 'u
-\-decodetext base64 and quoted-printable encoded text parts
-\-decodetypes limits parts to which -decodetext applies
-\-crlflinebreaks text parts
-\-textcharset text/plain parts
-\-reformat text parts that are not text/plain
-\-fixboundary outermost multipart part
-\-fixcte multipart or message part
-\-fixtype all except multipart and message parts
+.ta \w'\-decodeheaderfieldbodies 'u
+\-decodetext base64 and quoted-printable encoded text parts
+\-decodetypes limits parts to which -decodetext applies
+\-decodeheaderfieldbodies all message parts
+\-crlflinebreaks text parts
+\-textcharset text/plain parts
+\-reformat text parts that are not text/plain
+\-fixboundary outermost multipart part
+\-fixcte multipart or message part
+\-fixtype all except multipart and message parts
.fi
.RE
.SS "Backup of Original Message/File"
.RB ` msgs "' defaults to cur"
.RB ` "\-decodetext 8bit"'
.RB ` "\-decodetypes text,application/ics"'
+.RB ` \-nodecodeheaderfieldbodies '
.RB ` \-crlflinebreaks '
.RB ` \-notextcharset '
.RB ` \-reformat '
-decodetext 8bit|7bit|binary
-nodecodetext
-decodetypes
+ -decodeheaderfieldbodies utf-8
+ -nodecodeheaderfieldbodies
-[no]crlflinebreaks
-[no]textcharset
-[no]reformat
fi
-# check -nodecode
-start_test "-nodecode"
+# check -nodecodetext
+start_test "-nodecodetext"
prepare_space >"$expected" <<EOF
MIME-Version: 1.0
From: sender@example.com
printf '%s\n' 'mhfixmsg: 12, insert text/plain part' >"$expected.err"
#### lynx inserts multiple blank lines, so squeeze them.
- run_prog mhfixmsg last -nodecode -outfile - -verbose 2>"$actual.err" | \
+ run_prog mhfixmsg last -nodecodetext -outfile - -verbose 2>"$actual.err" | \
squeeze_lines >"$actual"
check "$expected" "$actual" 'ignore space'
check "$expected.err" "$actual.err"
check "$expected" "$actual"
+start_test "-nodecodeheaderfieldbodies"
+cat >"`mhpath new`" <<EOF
+To: recipient@example.com
+From: sender@example.com
+Date: Wed, 28 Sep 2016 11:24:28 -0400
+Subject: =?utf-8?B?dGhpcyBTdWJqZWN0IHdhcyBVVEYtOCBlbmNvZGVk?=
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=001a114dd3e8fe9c56053d92f414
+Content-Transfer-Encoding: 8bit
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset=UTF-8
+
+This is a test.
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset=UTF-8; name="test.txt"
+Content-Disposition: attachment; filename="test.txt"
+Content-Transfer-Encoding: 8bit
+
+This is the first text/plain part, in a subpart. The file name
+is test.txt.
+
+--001a114dd3e8fe9c56053d92f414--
+EOF
+run_prog mhfixmsg -file - -outfile - -decodeheaderfieldbodies utf-8 \
+ -nodecodeheaderfieldbodies <`mhpath last` >"$actual" 2>/dev/null
+check `mhpath last` "$actual" 'keep first'
+
+
+start_test "test decoding of UTF-8 header value"
+cat >"$expected" <<EOF
+To: recipient@example.com
+From: sender@example.com
+Date: Wed, 28 Sep 2016 11:24:28 -0400
+Subject: this Subject was UTF-8 encoded
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=001a114dd3e8fe9c56053d92f414
+Content-Transfer-Encoding: 8bit
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset=UTF-8
+
+This is a test.
+
+--001a114dd3e8fe9c56053d92f414
+Content-Type: text/plain; charset=UTF-8; name="test.txt"
+Content-Disposition: attachment; filename="test.txt"
+Content-Transfer-Encoding: 8bit
+
+This is the first text/plain part, in a subpart. The file name
+is test.txt.
+
+--001a114dd3e8fe9c56053d92f414--
+EOF
+
+run_prog mhfixmsg -file - -outfile - -decodeheaderfieldbodies utf-8 \
+ <`mhpath last` >"$actual" 2>/dev/null
+check "$expected" "$actual"
+
+
# make sure there are no tmp files left over
find "$MH_TEST_DIR/Mail" -name '*mhfix*' -print \
>"$actual"
check "$expected" "$actual"
+
finish_test
exit $failed
X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
X("nodecodetext", 0, NDECODETEXTSW) \
X("decodetypes", 0, DECODETYPESW) \
+ X("decodeheaderfieldbodies utf-8", 0, DECODEHEADERFIELDBODIESSW) \
+ X("nodecodeheaderfieldbodies", 0, NDECODEHEADERFIELDBODIESSW) \
X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
X("textcharset", 0, TEXTCHARSETSW) \
int replacetextplain;
int decodetext;
char *decodetypes;
+ char *decodeheaderfieldbodies; /* Either NULL or "utf-8". */
/* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
int lf_line_endings;
char *textcharset;
static int less_restrictive (int, int);
static int convert_charsets (CT, char *, int *);
static int fix_always (CT, int *);
+static int decode_header_field_bodies (CT, int *);
static int fix_filename_param (char *, char *, PM *, PM *);
static int fix_filename_encoding (CT);
static int write_content (CT, const char *, char *, FILE *, int, int);
fx.replacetextplain = 0;
fx.decodetext = CE_8BIT;
fx.decodetypes = "text,application/ics"; /* Default, per man page. */
+ fx.decodeheaderfieldbodies = NULL;
fx.lf_line_endings = 0;
fx.textcharset = NULL;
}
fx.decodetypes = cp;
continue;
+ case DECODEHEADERFIELDBODIESSW:
+ if (! (cp = *argp++) || *cp == '-') {
+ die("missing argument to %s", argp[-2]);
+ }
+ fx.decodeheaderfieldbodies = cp;
+ if (strcasecmp (cp, "utf-8") && strcasecmp (cp, "utf8")) {
+ /* Because UTF-8 strings can't have embedded nulls. Other
+ encodings support that, too, but we won't bother to
+ enumerate them. */
+ die("-decodeheaderfieldbodies only supports utf-8");
+ }
+ continue;
+ case NDECODEHEADERFIELDBODIESSW:
+ fx.decodeheaderfieldbodies = NULL;
+ continue;
case CRLFLINEBREAKSSW:
fx.lf_line_endings = 0;
continue;
&message_mods);
update_cte (*ctp);
}
+ if (status == OK && fx->decodeheaderfieldbodies) {
+ status = decode_header_field_bodies(*ctp, &message_mods);
+ }
if (status == OK && fx->textcharset != NULL) {
status = convert_charsets (*ctp, fx->textcharset, &message_mods);
}
}
+/*
+ * Decodes UTF-8 encoded header values. Similar to fix_filename_param(), but
+ * does not modify any MIME parameter values.
+ */
+static int
+decode_header_field_bodies (CT ct, int *message_mods)
+{
+ int status = OK;
+
+ switch (ct->c_type) {
+ case CT_MULTIPART: {
+ struct multipart *m = (struct multipart *) ct->c_ctparams;
+ struct part *part;
+
+ for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
+ status = decode_header_field_bodies (part->mp_part, message_mods);
+ }
+ break;
+ }
+
+ case CT_MESSAGE:
+ if (ct->c_subtype == MESSAGE_EXTERNAL) {
+ struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+ status = decode_header_field_bodies (e->eb_content, message_mods);
+ }
+ break;
+ }
+
+ HF hf;
+
+ for (hf = ct->c_first_hf; hf; hf = hf->next) {
+ /* Only decode UTF-8 values. */
+ if (hf->value && has_suffix(hf->value, "?=\n") &&
+ (! strncasecmp (hf->value, " =?utf8?", 8) ||
+ ! strncasecmp (hf->value, " =?utf-8?", 9))) {
+ /* Looks like an RFC 2047 encoded parameter. */
+ char decoded[PATH_MAX + 1];
+
+ if (decode_rfc2047 (hf->value, decoded, sizeof decoded)) {
+ const size_t len = strlen(decoded);
+
+ /* decode_rfc2047() could truncate if the buffer fills up.
+ Detect and discard if that happened. */
+ if (len < sizeof(decoded) - 1 && strcmp(hf->value, decoded)) {
+ hf->value = mh_xrealloc (hf->value, len + 1);
+ strncpy (hf->value, decoded, len + 1);
+ ++*message_mods;
+ }
+ } else {
+ inform("failed to decode %s parameter %s", hf->name, hf->value);
+ status = NOTOK;
+ }
+ }
+ }
+
+ return status;
+}
+
+
/*
* Factor out common code for loops in fix_filename_encoding().
*/