X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/0cb3c82ac7c780c1588c495a36d725c1ca2a5c76..5da3c5ae104a852069311096fe828c7eb5af1190:/docs/contrib/replyfilter diff --git a/docs/contrib/replyfilter b/docs/contrib/replyfilter index 193dd40b..78c5c4ac 100755 --- a/docs/contrib/replyfilter +++ b/docs/contrib/replyfilter @@ -10,11 +10,16 @@ # # - Put the path to this program in your .mh_profile under formatproc: # -# formatproc: replyfilter +# formatproc: replyfilter # -# - Create a mhl reply filter that consists of the following line: +# or invoke repl with "-fmtproc replyfilter". # -# body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">" +# - Create an mhl reply filter that consists of the following line: +# +# body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">" +# +# By default, repl will look for the mhl reply filter by the name +# "mhl.reply", but it will look elsewhere if the -filter switch is given. # # To decode this a bit: # @@ -23,8 +28,8 @@ # component prefix of ">" as a quote character, but we're # going to have replyfilter do that). # nowrap - Don't wrap lines if they exceed the column width -# formatarg - Arguments to fmtproc. The first argument is the value of -# the Content-type header; the second is the value of the +# formatarg - Arguments to formatproc. The first argument is the value +# of the Content-type header; the second is the value of the # Content-Transfer-Encoding header. The last "formatarg" # is used as your quoting prefix. Replace it with whatever # you want. @@ -37,11 +42,22 @@ use MIME::Base64; use Encode; # -# The program we use to format "long" text +# The program we use to format "long" text. Should be capable of reading +# from standard input and sending the formatted text to standard output. # $filterprogram = 'par'; +# +# If the above filter program has problems with some input, use the following +# regular expression to remove any problematic input. In this example we +# filter out the UTF-8 non-breaking space (U+00A0) because that makes par +# mangle the output. Uncomment this if this ends up being a problem for +# you, or feel free to add others. +# + +#%filterreplace = ( "\N{U+a0}" => " " ); + # # Our output character set. This script assumes a UTF-8 locale, but if you # want to run under a different locale the change it here. @@ -57,11 +73,11 @@ $outcharset = 'utf-8'; $maxcolwidth = 78; # -# Out HTML converter program & arguments +# Out HTML converter program & arguments. charset will be appended # @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html', - '-O', $outcharset); + '-O', $outcharset, '-I'); die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n" @@ -147,12 +163,12 @@ exit 0; # or base64 to feed it into a formatting filter. # -sub process_text (*$$;$) +sub process_text { my ($input, $encoding, $charset, $boundary) = @_; my $text, $filterpid, $prefixpid, $finread, $finwrite; my $foutread, $foutwrite, $decoder, $ret, $filterflag; - my @text = ( '' ), $maxline = 0; + my $text, $maxline = 0; # # In the simple case, just spit out the text prefixed by the @@ -160,16 +176,29 @@ sub process_text (*$$;$) # if ($encoding eq '7bit' || $encoding eq '8bit') { + # + # Switch the character set to whatever is specified by + # the MIME message + # + binmode($input, ":encoding($charset)"); while (<$input>) { $ret = match_boundary($_, $boundary); if (defined $ret) { + binmode($input, ':encoding(us-ascii)'); return $ret; } print $quoteprefix, $_; } return 'EOF'; } else { - $decoder = find_decoder($encoding); + # + # If we've got some other encoding, the input text is almost + # certainly US-ASCII + # + + binmode($input, ':encoding(us-ascii)'); + + $decoder = find_decoder(lc($encoding)); if (! defined $decoder) { return 'EOF'; } @@ -177,26 +206,30 @@ sub process_text (*$$;$) # # Okay, assume that the encoding will make it so that we MIGHT need - # to filter it. Read it in; if it's too long, filter it. + # to filter it. Read it in; if the lines are too long, filter it # + my $chardecode = find_encoding($charset); + while (<$input>) { - my $line, $len; + my @lines, $len; last if ($ret = match_boundary($_, $boundary)); - $line = decode($charset, &$decoder($_)); + $text .= $_; - if (substr($text[$#text], -1, 1) eq "\n") { - push @text, $line; - } else { - $text[$#text] .= $line; - } - if (($len = length($text[$#text])) > $maxline) { - $maxline = $len; - } } + binmode($input, ':encoding(us-ascii)'); + + $text = $chardecode->decode(&$decoder($text)); + + grep { + my $len; + if (($len = length) > $maxline) { + $maxline = $len; + }} split(/^/, $text); + if (! defined $ret) { $ret = 'EOF'; } @@ -205,7 +238,9 @@ sub process_text (*$$;$) # # These are short enough; just output it now as-is # - print STDOUT @text; + foreach my $line (split(/^/, $text)) { + print STDOUT $quoteprefix, $line; + } return $ret; } @@ -291,7 +326,13 @@ sub process_text (*$$;$) # Send our input to the filter program # - print $finwrite @text; + if (%filterreplace) { + foreach my $match (keys %filterreplace) { + $text =~ s/$match/$filterreplace{$match}/g; + } + } + + print $finwrite $text; close($finwrite); waitpid $filterpid, 0; @@ -306,13 +347,13 @@ sub process_text (*$$;$) # Filter HTML through a converter program # -sub process_html (*$$;$) +sub process_html { my ($input, $encoding, $charset, $boundary) = @_; my $filterpid, $prefixpid, $finread, $finwrite; my $foutread, $foutwrite, $decoder, $ret; - if (! defined($decoder = find_decoder($encoding))) { + if (! defined($decoder = find_decoder(lc($encoding)))) { return 'EOF'; } @@ -364,8 +405,9 @@ sub process_html (*$$;$) # Exec our converter # - exec (@htmlconv) || - die "Unable to exec $filterprogram: $!\n"; + my @conv = (@htmlconv, $charset); + exec (@conv) || + die "Unable to exec $htmlconv[0]: $!\n"; } else { die "Fork for $htmlconv[0] failed: $!\n"; } @@ -420,7 +462,7 @@ sub process_html (*$$;$) # Decide what to do, based on what kind of content it is. # -sub process_part (*$$$$;$) +sub process_part { my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_; my ($type, $subtype) = (split('/', $content_type, -1), ''); @@ -466,7 +508,7 @@ sub process_part (*$$$$;$) # the content of this part # -sub process_multipart ($$$) +sub process_multipart { my ($input, $subtype, $boundary) = @_; my $altout; @@ -499,7 +541,7 @@ sub process_multipart ($$$) $charset = 'us-ascii'; } - $encoding = defined $cte ? $cte->param('_') : '7bit'; + $encoding = defined $cte ? lc($cte->param('_')) : '7bit'; $name = defined $cdispo ? $cdispo->param('filename') : undef; # @@ -565,7 +607,7 @@ sub process_multipart ($$$) # "Eat" a MIME part; consume content until we hit the boundary or EOF # -sub eat_part ($$) +sub eat_part { my ($input, $boundary) = @_; my $ret; @@ -596,7 +638,7 @@ sub eat_part ($$) # Return the decoder subroutine to use # -sub find_decoder ($) +sub find_decoder { my ($encoding) = @_; @@ -612,7 +654,7 @@ sub find_decoder ($) } } -sub null_decoder ($) +sub null_decoder { my ($input) = @_; @@ -623,15 +665,17 @@ sub null_decoder ($) # Match a line against the boundary string # -sub match_boundary($$) +sub match_boundary { - my ($_, $boundary) = @_; + my ($line, $boundary) = @_; + + return if ! defined $boundary; - if (substr($_, 0, 2) eq '--') { - s/[ \t\r\n]+\Z//; - if ($_ eq "--$boundary") { + if (substr($line, 0, 2) eq '--') { + $line =~ s/[ \t\r\n]+\Z//; + if ($line eq "--$boundary") { return 'EOP'; - } elsif ($_ eq "--$boundary--") { + } elsif ($line eq "--$boundary--") { return 'EOM'; } }