X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/0cb3c82ac7c780c1588c495a36d725c1ca2a5c76..5da3c5ae104a852069311096fe828c7eb5af1190:/docs/contrib/replyfilter

diff --git a/docs/contrib/replyfilter b/docs/contrib/replyfilter
index 193dd40b..78c5c4ac 100755
--- a/docs/contrib/replyfilter
+++ b/docs/contrib/replyfilter
@@ -10,11 +10,16 @@
 #
 # - Put the path to this program in your .mh_profile under formatproc:
 # 
-#   formatproc: replyfilter
+#      formatproc: replyfilter
 #
-# - Create a mhl reply filter that consists of the following line:
+#   or invoke repl with "-fmtproc replyfilter".
 #
-#   body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
+# - Create an mhl reply filter that consists of the following line:
+#
+#      body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
+#
+#   By default, repl will look for the mhl reply filter by the name
+#   "mhl.reply", but it will look elsewhere if the -filter switch is given.
 #
 #   To decode this a bit:
 #
@@ -23,8 +28,8 @@
 #		  component prefix of ">" as a quote character, but we're
 #		  going to have replyfilter do that).
 #   nowrap	- Don't wrap lines if they exceed the column width
-#   formatarg   - Arguments to fmtproc.  The first argument is the value of
-#		  the Content-type header; the second is the value of the
+#   formatarg   - Arguments to formatproc.  The first argument is the value
+#		  of the Content-type header; the second is the value of the
 #		  Content-Transfer-Encoding header.  The last "formatarg"
 #		  is used as your quoting prefix.  Replace it with whatever
 #		  you want.
@@ -37,11 +42,22 @@ use MIME::Base64;
 use Encode;
 
 #
-# The program we use to format "long" text
+# The program we use to format "long" text.  Should be capable of reading
+# from standard input and sending the formatted text to standard output.
 #
 
 $filterprogram = 'par';
 
+#
+# If the above filter program has problems with some input, use the following
+# regular expression to remove any problematic input.  In this example we
+# filter out the UTF-8 non-breaking space (U+00A0) because that makes par
+# mangle the output.  Uncomment this if this ends up being a problem for
+# you, or feel free to add others.
+#
+
+#%filterreplace = ( "\N{U+a0}" => " " );
+
 #
 # Our output character set.  This script assumes a UTF-8 locale, but if you
 # want to run under a different locale the change it here.
@@ -57,11 +73,11 @@ $outcharset = 'utf-8';
 $maxcolwidth = 78;
 
 #
-# Out HTML converter program & arguments
+# Out HTML converter program & arguments. charset will be appended
 #
 
 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
-	     '-O', $outcharset);
+	     '-O', $outcharset, '-I');
 
 
 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
@@ -147,12 +163,12 @@ exit 0;
 # or base64 to feed it into a formatting filter.
 #
 
-sub process_text (*$$;$)
+sub process_text
 {
 	my ($input, $encoding, $charset, $boundary) = @_;
 	my $text, $filterpid, $prefixpid, $finread, $finwrite;
 	my $foutread, $foutwrite, $decoder, $ret, $filterflag;
-	my @text = ( '' ), $maxline = 0;
+	my $text, $maxline = 0;
 
 	#
 	# In the simple case, just spit out the text prefixed by the
@@ -160,16 +176,29 @@ sub process_text (*$$;$)
 	#
 
 	if ($encoding eq '7bit' || $encoding eq '8bit') {
+		#
+		# Switch the character set to whatever is specified by
+		# the MIME message
+		#
+		binmode($input, ":encoding($charset)");
 		while (<$input>) {
 			$ret = match_boundary($_, $boundary);
 			if (defined $ret) {
+				binmode($input, ':encoding(us-ascii)');
 				return $ret;
 			}
 			print $quoteprefix, $_;
 		}
 		return 'EOF';
 	} else {
-		$decoder = find_decoder($encoding);
+		#
+		# If we've got some other encoding, the input text is almost
+		# certainly US-ASCII
+		#
+
+		binmode($input, ':encoding(us-ascii)');
+
+		$decoder = find_decoder(lc($encoding));
 		if (! defined $decoder) {
 			return 'EOF';
 		}
@@ -177,26 +206,30 @@ sub process_text (*$$;$)
 
 	#
 	# Okay, assume that the encoding will make it so that we MIGHT need
-	# to filter it.  Read it in; if it's too long, filter it.
+	# to filter it.  Read it in; if the lines are too long, filter it
 	#
 
+	my $chardecode = find_encoding($charset);
+
 	while (<$input>) {
-		my $line, $len;
+		my @lines, $len;
 
 		last if ($ret = match_boundary($_, $boundary));
 
-		$line = decode($charset, &$decoder($_));
+		$text .= $_;
 
-		if (substr($text[$#text], -1, 1) eq "\n") {
-			push @text, $line;
-		} else {
-			$text[$#text] .= $line;
-		}
-		if (($len = length($text[$#text])) > $maxline) {
-			$maxline = $len;
-		}
 	}
 
+	binmode($input, ':encoding(us-ascii)');
+
+	$text = $chardecode->decode(&$decoder($text));
+
+	grep {
+		my $len;
+		if (($len = length) > $maxline) {
+			$maxline = $len;
+		}} split(/^/, $text);
+
 	if (! defined $ret) {
 		$ret = 'EOF';
 	}
@@ -205,7 +238,9 @@ sub process_text (*$$;$)
 		#
 		# These are short enough; just output it now as-is
 		#
-		print STDOUT @text;
+		foreach my $line (split(/^/, $text)) {
+			print STDOUT $quoteprefix, $line;
+		}
 		return $ret;
 	}
 
@@ -291,7 +326,13 @@ sub process_text (*$$;$)
 	# Send our input to the filter program
 	#
 
-	print $finwrite @text;
+	if (%filterreplace) {
+		foreach my $match (keys %filterreplace) {
+			 $text =~ s/$match/$filterreplace{$match}/g;
+		}
+	}
+
+	print $finwrite $text;
 
 	close($finwrite);
 	waitpid $filterpid, 0;
@@ -306,13 +347,13 @@ sub process_text (*$$;$)
 # Filter HTML through a converter program
 #
 
-sub process_html (*$$;$)
+sub process_html
 {
 	my ($input, $encoding, $charset, $boundary) = @_;
 	my $filterpid, $prefixpid, $finread, $finwrite;
 	my $foutread, $foutwrite, $decoder, $ret;
 
-	if (! defined($decoder = find_decoder($encoding))) {
+	if (! defined($decoder = find_decoder(lc($encoding)))) {
 		return 'EOF';
 	}
 
@@ -364,8 +405,9 @@ sub process_html (*$$;$)
 		# Exec our converter
 		#
 
-		exec (@htmlconv) ||
-				die "Unable to exec $filterprogram: $!\n";
+		my @conv = (@htmlconv, $charset);
+		exec (@conv) ||
+				die "Unable to exec $htmlconv[0]: $!\n";
 	} else {
 		die "Fork for $htmlconv[0] failed: $!\n";
 	}
@@ -420,7 +462,7 @@ sub process_html (*$$;$)
 # Decide what to do, based on what kind of content it is.
 #
 
-sub process_part (*$$$$;$)
+sub process_part
 {
 	my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
 	my ($type, $subtype) = (split('/', $content_type, -1), '');
@@ -466,7 +508,7 @@ sub process_part (*$$$$;$)
 # the content of this part
 #
 
-sub process_multipart ($$$)
+sub process_multipart
 {
 	my ($input, $subtype, $boundary) = @_;
 	my $altout;
@@ -499,7 +541,7 @@ sub process_multipart ($$$)
 			$charset = 'us-ascii';
 		}
 
-		$encoding = defined $cte ? $cte->param('_') : '7bit';
+		$encoding = defined $cte ? lc($cte->param('_')) : '7bit';
 		$name = defined $cdispo ? $cdispo->param('filename') : undef;
 
                 #
@@ -565,7 +607,7 @@ sub process_multipart ($$$)
 # "Eat" a MIME part; consume content until we hit the boundary or EOF
 #
 
-sub eat_part ($$)
+sub eat_part
 {
 	my ($input, $boundary) = @_;
 	my $ret;
@@ -596,7 +638,7 @@ sub eat_part ($$)
 # Return the decoder subroutine to use
 #
 
-sub find_decoder ($)
+sub find_decoder
 {
 	my ($encoding) = @_;
 
@@ -612,7 +654,7 @@ sub find_decoder ($)
 	}
 }
 
-sub null_decoder ($)
+sub null_decoder
 {
 	my ($input) = @_;
 
@@ -623,15 +665,17 @@ sub null_decoder ($)
 # Match a line against the boundary string
 #
 
-sub match_boundary($$)
+sub match_boundary
 {
-	my ($_, $boundary) = @_;
+	my ($line, $boundary) = @_;
+
+	return if ! defined $boundary;
 
-	if (substr($_, 0, 2) eq '--') {
-		s/[ \t\r\n]+\Z//;
-		if ($_ eq "--$boundary") {
+	if (substr($line, 0, 2) eq '--') {
+		$line =~ s/[ \t\r\n]+\Z//;
+		if ($line eq "--$boundary") {
 			return 'EOP';
-		} elsif ($_ eq "--$boundary--") {
+		} elsif ($line eq "--$boundary--") {
 			return 'EOM';
 		}
 	}