From: David Levine Date: Sat, 20 Feb 2016 18:41:52 +0000 (-0500) Subject: Added -decodetypes switch to mhfixmsg(1). X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/5682db8c0089c419257c612b167771e1685f683e?hp=7f0c3f960c5e3d8817121da1a75139a83130b668 Added -decodetypes switch to mhfixmsg(1). --- diff --git a/docs/pending-release-notes b/docs/pending-release-notes index e9f60138..6163ec86 100644 --- a/docs/pending-release-notes +++ b/docs/pending-release-notes @@ -25,8 +25,8 @@ NEW FEATURES - When building from source, configure will derive ${prefix} from an existing nmh installation if it finds one in your $PATH. - Added getmymbox and getmyaddr mh-format(5) function escapes. -- New -changecur, -nochangecur, and -fixtype switches have been added - to mhfixmsg(1). +- New -changecur, -nochangecur, -fixtype, and -decodetypes switches have been + added to mhfixmsg(1). - mhfixmsg now removes an extraneous trailing semicolon from header parameter lists. - Added -convertargs switch to repl(1), to pass arguments to programs diff --git a/man/mhfixmsg.man b/man/mhfixmsg.man index 84245661..17fca716 100644 --- a/man/mhfixmsg.man +++ b/man/mhfixmsg.man @@ -1,4 +1,4 @@ -.TH MHFIXMSG %manext1% "February 8, 2015" "%nmhversion%" +.TH MHFIXMSG %manext1% "February 20, 2016" "%nmhversion%" .\" .\" %nmhwarning% .\" @@ -16,6 +16,8 @@ mhfixmsg \- rewrite MIME messages with various transformations .RB [ \-decodetext 8bit/7bit | .BR \-nodecodetext ] +.RB [ \-decodetypes +.IR "type/[subtype][,...]" ] .RB [ \-textcharset .I charset .RB "| " \-notextcharset ] @@ -72,6 +74,24 @@ linefeed character is removed from text parts encoded in ASCII, ISO-8859-x, UTF-8, or Windows-12xx. .PP The +.B \-decodetypes +switch specifies the message parts, by type and optionally subtype, +to which +.B \-decodetext +applies. Its argument is a comma-separated list of type/subtype +elements. If an element does not contain a subtype, then +.B \-decodetext +applies to all subtypes of the type. The default is +.B \-decodetypes +.IR text ; +it can be overridden, e.g., with +.B \-decodetypes +.I text/plain +to restrict +.B \-decodetext +to just text/plain parts. +.PP +The .B \-textcharset switch specifies that all text/plain parts of the message(s) should be converted to @@ -257,6 +277,7 @@ content type and/or encoding as follows: .nf .ta \w'\-fixboundary 'u \-decodetext base64 and quoted-printable encoded text parts +\-decodetypes limits parts to which -decodetext applies \-textcharset text/plain parts \-reformat text parts that are not text/plain \-fixboundary outermost multipart part @@ -387,6 +408,7 @@ is checked. .RB ` +folder "' defaults to the current folder" .RB ` msgs "' defaults to cur" .RB ` "\-decodetext 8bit"' +.RB ` "\-decodetypes text"' .RB ` \-notextcharset ' .RB ` \-reformat ' .RB ` \-noreplacetextplain ' diff --git a/test/mhfixmsg/test-mhfixmsg b/test/mhfixmsg/test-mhfixmsg index f7f47b90..522c0b89 100755 --- a/test/mhfixmsg/test-mhfixmsg +++ b/test/mhfixmsg/test-mhfixmsg @@ -51,6 +51,7 @@ Usage: mhfixmsg [+folder] [msgs] [switches] switches are: -decodetext 8bit|7bit -nodecodetext + -decodetypes -[no]textcharset -[no]reformat -[no]replacetextplain @@ -418,11 +419,11 @@ else fi -# check -decode (enabled by default) +# check -decodetext (enabled by default) cat >"$expected" <`mhpath new` <$expected <`mhpath new` <$expected < + + This is the text/html part. + + + + +------- =_aaaaaaaaaa0-- +EOF + +cat >`mhpath new` <$expected <`mhpath new` <"$actual" diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 8d67e827..f9f001b3 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -16,6 +16,7 @@ #define MHFIXMSG_SWITCHES \ X("decodetext 8bit|7bit", 0, DECODETEXTSW) \ X("nodecodetext", 0, NDECODETEXTSW) \ + X("decodetypes", 0, DECODETYPESW) \ X("textcharset", 0, TEXTCHARSETSW) \ X("notextcharset", 0, NTEXTCHARSETSW) \ X("reformat", 0, REFORMATSW) \ @@ -80,6 +81,7 @@ typedef struct fix_transformations { int reformat; int replacetextplain; int decodetext; + char *decodetypes; char *textcharset; } fix_transformations; @@ -106,7 +108,8 @@ static CT build_multipart_alt (CT, CT, int, int); static int boundary_in_content (FILE **, char *, const char *); static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); -static int decode_text_parts (CT, int, int *); +static int decode_text_parts (CT, int, const char *, int *); +static int should_decode(const char *, const char *, const char *); static int content_encoding (CT, const char **); static int strip_crs (CT, int *); static int convert_charsets (CT, char *, int *); @@ -135,6 +138,7 @@ main (int argc, char **argv) { fx.fixtypes = NULL; fx.replacetextplain = 0; fx.decodetext = CE_8BIT; + fx.decodetypes = "text"; /* Default to all text content. */ fx.textcharset = NULL; if (nmh_init(argv[0], 1)) { return 1; } @@ -179,6 +183,11 @@ main (int argc, char **argv) { case NDECODETEXTSW: fx.decodetext = 0; continue; + case DECODETYPESW: + if (! (cp = *argp++) || *cp == '-') + adios (NULL, "missing argument to %s", argp[-2]); + fx.decodetypes = cp; + continue; case TEXTCHARSETSW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); @@ -445,7 +454,7 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { - status = decode_text_parts (*ctp, fx->decodetext, &message_mods); + status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods); } if (status == OK && fx->textcharset != NULL) { status = convert_charsets (*ctp, fx->textcharset, &message_mods); @@ -1748,11 +1757,15 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { static int -decode_text_parts (CT ct, int encoding, int *message_mods) { +decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) { int status = OK; switch (ct->c_type) { case CT_TEXT: + if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) { + break; + } + switch (ct->c_encoding) { case CE_BASE64: case CE_QUOTED: { @@ -1831,7 +1844,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { /* Should check to see if the body for this part is encoded? For now, it gets passed along as-is by InitMultiPart(). */ for (part = m->mp_parts; status == OK && part; part = part->mp_next) { - status = decode_text_parts (part->mp_part, encoding, message_mods); + status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods); } break; } @@ -1840,7 +1853,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { if (ct->c_subtype == MESSAGE_EXTERNAL) { struct exbody *e = (struct exbody *) ct->c_ctparams; - status = decode_text_parts (e->eb_content, encoding, message_mods); + status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods); } break; @@ -1852,6 +1865,37 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } +/* Determine if the part with type[/subtype] should be decoded, according to + decodetypes (which came from the -decodetypes switch). */ +static int +should_decode(const char *decodetypes, const char *type, const char *subtype) { + /* Quick search for matching type[/subtype] in decodetypes: bracket + decodetypes with commas, then search for ,type, and ,type/subtype, in + it. */ + + int found_match = 0; + char *delimited_decodetypes = concat(",", decodetypes, ",", NULL); + char *delimited_type = concat(",", type, ",", NULL); + + if (nmh_strcasestr(delimited_decodetypes, delimited_type)) { + found_match = 1; + } else if (subtype != NULL) { + char *delimited_type_subtype = + concat(",", type, "/", subtype, ",", NULL); + + if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) { + found_match = 1; + } + free(delimited_type_subtype); + } + + free(delimited_type); + free(delimited_decodetypes); + + return found_match; +} + + /* See if the decoded content is 7bit, 8bit, or binary. It's binary if it has any NUL characters, a CR not followed by a LF, or lines greater than 998 characters in length. If binary, reason is set