]>
diplodocus.org Git - nmh/blob - docs/contrib/replyfilter
3 # replyfilter - A reply filter for nmh
5 # The idea behind this program is that it will act as a format filter
6 # for nmh. It will try to extract out all text/plain parts and format
7 # them if necessary using a filter program.
9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
11 # - Put the path to this program in your .mh_profile under formatproc:
13 # formatproc: replyfilter
15 # - Create a mhl reply filter that consists of the following line:
17 # body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
19 # To decode this a bit:
21 # body - Output the "body" component
22 # nocomponent - Don't output a component prefix (normally here we use a
23 # component prefix of ">" as a quote character, but we're
24 # going to have replyfilter do that).
25 # nowrap - Don't wrap lines if they exceed the column width
26 # formatarg - Arguments to fmtproc. The first argument is the value of
27 # the Content-type header; the second is the value of the
28 # Content-Transfer-Encoding header. The last "formatarg"
29 # is used as your quoting prefix. Replace it with whatever
35 use MIME
::QuotedPrint
;
40 # The program we use to format "long" text. Should be capable of reading
41 # from standard input and sending the formatted text to standard output.
44 $filterprogram = 'par';
47 # If the above filter program has problems with some input, use the following
48 # regular expression to remove any problematic input. In this example we
49 # filter out the UTF-8 non-breaking space (U+00A0) because that makes par
50 # mangle the output. Uncomment this if this ends up being a problem for
51 # you, or feel free to add others.
54 #%filterreplace = ( "\N{U+a0}" => " " );
57 # Our output character set. This script assumes a UTF-8 locale, but if you
58 # want to run under a different locale the change it here.
61 $outcharset = 'utf-8';
64 # Maximum column width (used by the HTML converter and to decide if we need
65 # to invoke the filter program
71 # Out HTML converter program & arguments. charset will be appended
74 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
75 '-O', $outcharset, '-I');
78 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
82 my $ctype = Mail
::Field-
>new('Content-Type', $ARGV[0]);
83 $content_type = $ctype->type;
84 $charset = $ctype->charset;
85 $boundary = $ctype->boundary;
87 $content_type = 'text/plain';
88 $charset = 'us-ascii';
91 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
92 $quoteprefix = $ARGV[2];
95 # Set up our output to be in our character set
98 binmode(STDOUT
, ":encoding($outcharset)");
101 # The simplest case: if we have a single type of text/plain, send it
102 # to our format subroutine.
105 if ($content_type eq 'text/plain') {
106 process_text
(\
*STDIN
, $encoding, $charset);
111 # Alright, here's what we need to do.
113 # Find any text/plain parts and decode them. Decode them via base64 or
114 # quoted-printable, and feed them to our formatting filter when appropriate.
115 # Put markers in the output for other content types.
118 ($type) = (split('/', $content_type));
120 if ($type eq 'multipart') {
123 # For multipart messages we have to do a little extra.
124 # Eat the MIME prologue (everything up until the first boundary)
127 if (! defined $boundary || $boundary eq '') {
128 print "No boundary in Content-Type header!\n";
134 last if match_boundary
($_, $boundary);
138 print "Unable to find boundary in message\n";
145 process_part
(\
*STDIN
, $content_type, $encoding, $charset, $boundary);
149 # Eat the MIME epilog
157 # Handled encoded text. I think we can assume if the encoding is q-p
158 # or base64 to feed it into a formatting filter.
161 sub process_text
(*$$;$)
163 my ($input, $encoding, $charset, $boundary) = @_;
164 my $text, $filterpid, $prefixpid, $finread, $finwrite;
165 my $foutread, $foutwrite, $decoder, $ret, $filterflag;
166 my $text, $maxline = 0;
169 # In the simple case, just spit out the text prefixed by the
173 if ($encoding eq '7bit' || $encoding eq '8bit') {
175 # Switch the character set to whatever is specified by
178 binmode($input, ":encoding($charset)");
180 $ret = match_boundary
($_, $boundary);
182 binmode($input, ':encoding(us-ascii)');
185 print $quoteprefix, $_;
190 # If we've got some other encoding, the input text is almost
194 binmode($input, ':encoding(us-ascii)');
196 $decoder = find_decoder
(lc($encoding));
197 if (! defined $decoder) {
203 # Okay, assume that the encoding will make it so that we MIGHT need
204 # to filter it. Read it in; if the lines are too long, filter it
207 my $chardecode = find_encoding
($charset);
212 last if ($ret = match_boundary
($_, $boundary));
218 binmode($input, ':encoding(us-ascii)');
220 $text = $chardecode->decode(&$decoder($text));
224 if (($len = length) > $maxline) {
226 }} split(/^/, $text);
228 if (! defined $ret) {
232 if ($maxline <= $maxcolwidth) {
234 # These are short enough; just output it now as-is
236 foreach my $line (split(/^/, $text)) {
237 print STDOUT
$quoteprefix, $line;
243 # We fork a copy of ourselves to read the output from the filter
244 # program and prefix the quote character.
247 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
248 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
250 binmode($finread, ":encoding($outcharset)");
251 binmode($finwrite, ":encoding($outcharset)");
252 binmode($foutread, ":encoding($outcharset)");
253 binmode($foutwrite, ":encoding($outcharset)");
255 if ($filterpid = fork) {
257 # Close the pipes in the parent that we're not using
262 } elsif (defined $filterpid) {
264 # Close our ununsed filehandles
271 # Dup() down the filehandles to standard input and output
274 open(STDIN
, "<&", $finread) ||
275 die "dup(filterin) failed: $!\n";
276 open(STDOUT
, ">&", $foutwrite) ||
277 die "dup(filterout) failed: $!\n";
290 exec $filterprogram ||
291 die "Unable to exec $filterprogram: $!\n";
293 die "Fork for $filterprogram failed: $!\n";
297 # Fork our output handler.
300 if ($prefixpid = fork) {
302 # We don't need these anymore
306 } elsif (defined $prefixpid) {
308 # Read from foutwrite, and output (with prefix) to stdout
313 while (<$foutread>) {
314 print STDOUT
$quoteprefix, $_;
321 # Send our input to the filter program
324 if (%filterreplace) {
325 foreach my $match (keys %filterreplace) {
326 $text =~ s/$match/$filterreplace{$match}/g;
330 print $finwrite $text;
333 waitpid $filterpid, 0;
334 warn "Filter process exited with ", ($? >> 8), "\n" if $?;
335 waitpid $prefixpid, 0;
336 warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
342 # Filter HTML through a converter program
345 sub process_html
(*$$;$)
347 my ($input, $encoding, $charset, $boundary) = @_;
348 my $filterpid, $prefixpid, $finread, $finwrite;
349 my $foutread, $foutwrite, $decoder, $ret;
351 if (! defined($decoder = find_decoder
(lc($encoding)))) {
356 # We fork a copy of ourselves to read the output from the filter
357 # program and prefix the quote character.
360 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
361 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
363 binmode($finread, ":encoding($outcharset)");
364 binmode($finread, ":encoding($outcharset)");
365 binmode($foutread, ":encoding($outcharset)");
366 binmode($foutwrite, ":encoding($outcharset)");
368 if ($filterpid = fork) {
370 # Close the pipes in the parent that we're not using
375 } elsif (defined $filterpid) {
377 # Close our ununsed filehandles
384 # Dup() down the filehandles to standard input and output
387 open(STDIN
, "<&", $finread) ||
388 die "dup(filterin) failed: $!\n";
389 open(STDOUT
, ">&", $foutwrite) ||
390 die "dup(filterout) failed: $!\n";
403 my @conv = (@htmlconv, $charset);
405 die "Unable to exec $filterprogram: $!\n";
407 die "Fork for $htmlconv[0] failed: $!\n";
411 # Fork our output handler.
414 if ($prefixpid = fork) {
416 # We don't need these anymore
420 } elsif (defined $prefixpid) {
422 # Read from foutwrite, and output (with prefix) to stdout
427 while (<$foutread>) {
428 print STDOUT
$quoteprefix, $_;
435 # Send our input to the filter program
439 last if ($ret = match_boundary
($_, $boundary));
440 print $finwrite (&$decoder($_));
443 if (! defined $ret) {
448 waitpid $filterpid, 0;
449 warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
450 waitpid $prefixpid, 0;
451 warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
457 # Decide what to do, based on what kind of content it is.
460 sub process_part
(*$$$$;$)
462 my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
463 my ($type, $subtype) = (split('/', $content_type, -1), '');
465 if ($type eq 'text') {
467 # If this is a text part, right now we only deal with
468 # plain and HTML parts.
470 if ($subtype eq 'plain') {
471 return process_text
($input, $encoding, $charset,
473 } elsif ($subtype eq 'html') {
474 return process_html
($input, $encoding, $charset,
477 print ">>> $content_type content\n";
478 return eat_part
($input, $boundary);
480 } elsif ($type eq 'multipart') {
481 return process_multipart
($input, $subtype, $boundary);
484 # Other types we're not sure what to do with right now
485 # Just put a marker in there
488 print ">>> $content_type attachment";
490 print ", name=$name";
494 return eat_part
($input, $boundary);
499 # Process a multipart message.
501 # When called, we should be right after the beginning of the first
502 # boundary marker. So we should be pointed at header lines which describe
503 # the content of this part
506 sub process_multipart
($$$)
508 my ($input, $subtype, $boundary) = @_;
512 my $encoding, $type, $end, $name, $charset;
515 # Use the Mail::Header package to read in any headers
516 # corresponding to this part
519 my $head = Mail
::Header-
>new($input, (MailFrom
=> 'IGNORE'));
522 # Extract out any Content-Type, Content-Transfer-Encoding, and
523 # Content-Disposition headers
526 my $ctype = Mail
::Field-
>extract('Content-Type', $head);
527 my $cte = Mail
::Field-
>extract('Content-Transfer-Encoding',
529 my $cdispo = Mail
::Field-
>extract('Content-Disposition', $head);
531 if (defined $ctype) {
532 $type = $ctype->type;
533 $charset = $ctype->charset;
535 $type = 'text/plain';
536 $charset = 'us-ascii';
539 $encoding = defined $cte ? lc($cte->param('_')) : '7bit';
540 $name = defined $cdispo ? $cdispo->param('filename') : undef;
543 # Special handling for multipart/alternative; pick
544 # the "first" one we can handle (which is usually
545 # text/plain) and silently eat the rest, but output a
546 # warning if we can't handle anything.
550 $end = eat_part
($input, $boundary);
552 my $subboundary = $boundary;
553 my $maintype = (split('/', $type))[0];
555 if ($maintype eq 'multipart') {
556 $subboundary = $ctype->boundary;
558 # Go until we find our beginning of this
561 my $subend = eat_part
($input, $subboundary);
563 if ($subend ne 'EOP') {
564 print ">>> WARNING: malformed ",
565 "nested multipart\n";
570 $end = process_part
($input, $type, $encoding,
571 $charset, $subboundary, $name);
573 if ($subtype eq 'alternative' && ! defined $altout &&
574 $type eq 'text/plain') {
579 # Since we changed the semantics of $boundary
580 # above for nested multiparts, if we are
581 # handling a nested multipart then find the end
582 # of our current part
585 if ($maintype eq 'multipart') {
586 $end = eat_part
($input, $boundary);
591 if ($end eq 'EOM' || $end eq 'EOF') {
592 if ($subtype eq 'alternative' && !defined $altout) {
593 print ">>>multipart/alternative: no suitable ",
602 # "Eat" a MIME part; consume content until we hit the boundary or EOF
607 my ($input, $boundary) = @_;
611 # If we weren't given a boundary just eat input until EOF
614 if (! defined $boundary) {
620 # Otherwise, consume data until we hit our boundary
624 if ($ret = match_boundary
($_, $boundary)) {
633 # Return the decoder subroutine to use
640 if ($encoding eq '7bit' || $encoding eq '8bit') {
641 return \
&null_decoder
;
642 } elsif ($encoding eq 'base64') {
643 return \
&decode_base64
;
644 } elsif ($encoding eq 'quoted-printable') {
647 warn "Unknown encoding: $encoding\n";
660 # Match a line against the boundary string
663 sub match_boundary
($$)
665 my ($line, $boundary) = @_;
667 return if ! defined $boundary;
669 if (substr($line, 0, 2) eq '--') {
670 $line =~ s/[ \t\r\n]+\Z//;
671 if ($line eq "--$boundary") {
673 } elsif ($line eq "--$boundary--") {