From: David Levine Date: Sun, 9 Feb 2014 15:58:00 +0000 (-0600) Subject: Added -[no]textcharset switches to mhshow(1). These only apply if X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/721b0395fb2fceac4e66fed1009ed2f17fd5351f?ds=inline;hp=86cb743c202774a5541bf926baf7d80dc107baf5 Added -[no]textcharset switches to mhshow(1). These only apply if nmh was configured with iconv(3) support. If -textcharset is not used, mhshow will convert, if necessary, the charset of text/plain content to match the user's locale setting. --- diff --git a/Makefile.am b/Makefile.am index 5bc612c8..73fd9ced 100644 --- a/Makefile.am +++ b/Makefile.am @@ -72,7 +72,7 @@ TESTS = test/ali/test-ali test/anno/test-anno \ test/mhfixmsg/test-mhfixmsg \ test/mhlist/test-mhlist test/mhmail/test-mhmail \ test/mhparam/test-mhparam test/mhpath/test-mhpath \ - test/mhshow/test-charset \ + test/mhshow/test-charset test/mhshow/test-textcharset \ test/mhshow/test-cte-binary test/mhshow/test-qp \ test/mhshow/test-subpart test/mhshow/test-msg-buffer-boundaries \ test/mhstore/test-mhstore test/new/test-basic \ diff --git a/docs/pending-release-notes b/docs/pending-release-notes index 35bc26a3..dc764f1e 100644 --- a/docs/pending-release-notes +++ b/docs/pending-release-notes @@ -74,6 +74,10 @@ NEW FEATURES Nmh-Attachment). - The default Content-Transfer-Encoding for text parts is now 8bit. - mhbuild(1) now supports a selectable Content-Transfer-Encoding +- Added -[no]textcharset switches to mhshow(1). These only apply if + nmh was configured with iconv(3) support. If -textcharset is not + used, mhshow will convert, if necessary, the charset of text/plain + content to match the user's locale setting. ----------------- OBSOLETE FEATURES diff --git a/man/mhshow.man b/man/mhshow.man index 9cc44c1b..baafe7e5 100644 --- a/man/mhshow.man +++ b/man/mhshow.man @@ -1,4 +1,4 @@ -.TH MHSHOW %manext1% "May 4, 2013" "%nmhversion%" +.TH MHSHOW %manext1% "February 9, 2014" "%nmhversion%" .\" .\" %nmhwarning% .\" @@ -22,6 +22,9 @@ mhshow \- display MIME messages .RB [ \-pause " | " \-nopause ] .RB [ \-form .IR formfile ] +.RB [ \-textcharset +.I charset +.RB "| " \-notextcharset ] .RB [ \-rcache .IR policy ] .RB [ \-wcache @@ -321,6 +324,20 @@ switch can be given to tell .B mhshow to never display parts in parallel. .SS "Showing Alternate Character Sets" +If +.B mhshow +was built with +.IR iconv (3), +then all text/plain parts of the message(s) will be displayed +using the character set of the current locale. This character +set can be overridden with the +.B \-textcharset +switch. To convert text parts other than text/plain, or if +.B mhshow +was not built with +.IR iconv , +an external program can be used, as described next. +.PP Because a content of type text might be in a non-ASCII character set, when .B mhshow @@ -379,29 +396,24 @@ needed beforehand. Note that many pagers strip off the high-order bit or have problems displaying text with the high-order bit set. However, the pager .B less -has support for single-octet character sets. The source -to -.B less -is available on many ftp sites carrying free software. -In order to view messages sent in the ISO-8859-1 character set using +has support for single-octet character sets. For example, messages +encoded in the ISO-8859-1 character set can be view using .BR less , -.PP -put these lines in your -.I \&.login -file: +with these environment variable settings: .PP .RS 5 .nf -setenv LESSCHARSET latin1 -setenv LESS "-f" +.ta \w'%F 'u +LESSCHARSET latin1 +LESS -f .fi .RE .PP -The first line tells +The first setting tells .B less to use the ISO-8859-1 definition for determining whether a character is \*(lqnormal\*(rq, \*(lqcontrol\*(lq, -or \*(lqbinary\*(rq. The second line tells +or \*(lqbinary\*(rq. The second setting tells .B less not to warn you if it encounters a file that has non-ASCII characters. Then, simply @@ -599,6 +611,7 @@ is checked. ^moreproc:~^Default program to display text/plain content .fi .SH "SEE ALSO" +.IR iconv (3), .IR mhbuild (1), .IR mhl (1), .IR mhlist (1), @@ -611,6 +624,7 @@ is checked. .RB ` \-nocheck ' .RB ` \-form\ mhl.headers ' .RB ` \-pause ' +.RB ` \-notextcharset ' .RB ` \-rcache\ ask ' .RB ` \-noserialonly ' .RB ` \-wcache\ ask ' diff --git a/test/mhshow/test-charset b/test/mhshow/test-charset index 83363da8..d0d8028b 100755 --- a/test/mhshow/test-charset +++ b/test/mhshow/test-charset @@ -16,6 +16,8 @@ fi setup_test +LC_ALL=en_US.UTF-8; export LC_ALL + expected=$MH_TEST_DIR/$$.expected actual=$MH_TEST_DIR/$$.actual diff --git a/test/mhshow/test-textcharset b/test/mhshow/test-textcharset new file mode 100755 index 00000000..b80802f1 --- /dev/null +++ b/test/mhshow/test-textcharset @@ -0,0 +1,75 @@ +#!/bin/sh +########################################################## +# +# Test display of text/plain parts with charset conversion +# +########################################################## + +set -e + +if test -z "${MH_OBJ_DIR}"; then + srcdir=`dirname "$0"`/../.. + MH_OBJ_DIR=`cd "$srcdir" && pwd`; export MH_OBJ_DIR +fi + +. "$MH_OBJ_DIR/test/common.sh" + +setup_test + +if test "$ICONV_ENABLED" -eq 0; then + echo "$0: skipping test-textcharset because nmh was built without iconv" + exit 0 +fi + +expected="$MH_TEST_DIR"/$$.expected +actual="$MH_TEST_DIR"/$$.actual + +msgfile=`mhpath new` +cat >"$msgfile" <"$expected" <"$actual" 2>&1 +check "$expected" "$actual" + +# Check -textcharset. +cat >"$expected" <"$actual" 2>&1 +check "$expected" "$actual" 'keep first' + +# Check use of user's locale. +LC_ALL=en_US.UTF-8; export LC_ALL +run_prog mhshow -nopause last >"$actual" 2>&1 +check "$expected" "$actual" + +exit $failed diff --git a/uip/mhshow.c b/uip/mhshow.c index f83236b7..eb6ea16b 100644 --- a/uip/mhshow.c +++ b/uip/mhshow.c @@ -31,6 +31,8 @@ X("form formfile", 0, FORMSW) \ X("part number", 0, PARTSW) \ X("type content", 0, TYPESW) \ + X("textcharset", 0, TEXTCHARSETSW) \ + X("notextcharset", 0, NTEXTCHARSETSW) \ X("rcache policy", 0, RCACHESW) \ X("wcache policy", 0, WCACHESW) \ X("version", 0, VERSIONSW) \ @@ -66,6 +68,7 @@ extern char *cache_private; extern int pausesw; extern int serialsw; extern char *progsw; +extern char *display_charset; extern int nomore; /* flags for moreproc/header display */ extern char *formsw; @@ -200,6 +203,16 @@ do_cache: types[ntype++] = cp; continue; + case TEXTCHARSETSW: + if (!(cp = *argp++) || *cp == '-') + adios (NULL, "missing argument to %s", argp[-2]); + display_charset = cp; + continue; + + case NTEXTCHARSETSW: + display_charset = NULL; + continue; + case FILESW: if (!(cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); diff --git a/uip/mhshowsbr.c b/uip/mhshowsbr.c index 8c595d76..fa449772 100644 --- a/uip/mhshowsbr.c +++ b/uip/mhshowsbr.c @@ -28,6 +28,7 @@ int serialsw = 0; int nolist = 0; char *progsw = NULL; +char *display_charset = NULL; /* flags for moreproc/header display */ int nomore = 0; @@ -69,6 +70,7 @@ static int show_multi_aux (CT, int, int, char *); static int show_message_rfc822 (CT, int, int); static int show_partial (CT, int, int); static int show_external (CT, int, int); +static int convert_content_charset (CT, char **); static void intrser (int); @@ -314,6 +316,17 @@ show_content_aux (CT ct, int serial, int alternate, char *cp, char *cracked) if (ct->c_showproc && !strcmp (ct->c_showproc, "true")) return (alternate ? DONE : OK); + if (! strcmp(invo_name, "mhshow") && + ct->c_type == CT_TEXT && ct->c_subtype == TEXT_PLAIN) { + /* This has to be done after calling c_ceopenfnx, so + unfortunately the type checks are necessary without + some code rearrangement. And to make this really ugly, + only do it in mhshow, not mhfixmsg, mhn, or mhstore. */ + if (convert_content_charset (ct, &file) != OK) { + return NOTOK; + } + } + xlist = 0; xpause = 0; xstdin = 0; @@ -1217,6 +1230,39 @@ convert_charset (CT ct, char *dest_charset, int *message_mods) { } +static int +convert_content_charset (CT ct, char **file) { + /* Convert character set if needed and if built with iconv. */ +#ifdef HAVE_ICONV + if (display_charset == NULL) { + /* The user did not specify a display charset, so use + current setting and see if the content will need to be + converted. */ + char *charset = content_charset (ct); + + if (! check_charset (charset, strlen (charset))) { + int unused = 0; + if (convert_charset (ct, get_charset (), &unused) == 0) { + *file = ct->c_cefile.ce_file; + } else { + return NOTOK; + } + } + } else { + /* The user requested display with a specific charset. */ + int unused = 0; + if (convert_charset (ct, display_charset, &unused) == 0) { + *file = ct->c_cefile.ce_file; + } else { + return NOTOK; + } + } +#endif /* ! HAVE_ICONV */ + + return OK; +} + + static void intrser (int i) {