]>
diplodocus.org Git - nmh/blob - docs/contrib/replyfilter
3 # replyfilter - A reply filter for nmh
5 # The idea behind this program is that it will act as a format filter
6 # for nmh. It will try to extract out all text/plain parts and format
7 # them if necessary using a filter program.
9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
11 # - Put the path to this program in your .mh_profile under formatproc:
13 # formatproc: replyfilter
15 # or invoke repl with "-fmtproc replyfilter".
17 # - Create an mhl reply filter that consists of the following line:
19 # body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
21 # By default, repl will look for the mhl reply filter by the name
22 # "mhl.reply", but it will look elsewhere if the -filter switch is given.
24 # To decode this a bit:
26 # body - Output the "body" component
27 # nocomponent - Don't output a component prefix (normally here we use a
28 # component prefix of ">" as a quote character, but we're
29 # going to have replyfilter do that).
30 # nowrap - Don't wrap lines if they exceed the column width
31 # formatarg - Arguments to formatproc. The first argument is the value
32 # of the Content-type header; the second is the value of the
33 # Content-Transfer-Encoding header. The last "formatarg"
34 # is used as your quoting prefix. Replace it with whatever
40 use MIME
::QuotedPrint
;
45 # The program we use to format "long" text. Should be capable of reading
46 # from standard input and sending the formatted text to standard output.
49 $filterprogram = 'par';
52 # If the above filter program has problems with some input, use the following
53 # regular expression to remove any problematic input. In this example we
54 # filter out the UTF-8 non-breaking space (U+00A0) because that makes par
55 # mangle the output. Uncomment this if this ends up being a problem for
56 # you, or feel free to add others.
59 #%filterreplace = ( "\N{U+a0}" => " " );
62 # Our output character set. This script assumes a UTF-8 locale, but if you
63 # want to run under a different locale the change it here.
66 $outcharset = 'utf-8';
69 # Maximum column width (used by the HTML converter and to decide if we need
70 # to invoke the filter program
76 # Out HTML converter program & arguments. charset will be appended
79 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
80 '-O', $outcharset, '-I');
83 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
87 my $ctype = Mail
::Field-
>new('Content-Type', $ARGV[0]);
88 $content_type = $ctype->type;
89 $charset = $ctype->charset;
90 $boundary = $ctype->boundary;
92 $content_type = 'text/plain';
93 $charset = 'us-ascii';
96 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
97 $quoteprefix = $ARGV[2];
100 # Set up our output to be in our character set
103 binmode(STDOUT
, ":encoding($outcharset)");
106 # The simplest case: if we have a single type of text/plain, send it
107 # to our format subroutine.
110 if ($content_type eq 'text/plain') {
111 process_text
(\
*STDIN
, $encoding, $charset);
116 # Alright, here's what we need to do.
118 # Find any text/plain parts and decode them. Decode them via base64 or
119 # quoted-printable, and feed them to our formatting filter when appropriate.
120 # Put markers in the output for other content types.
123 ($type) = (split('/', $content_type));
125 if ($type eq 'multipart') {
128 # For multipart messages we have to do a little extra.
129 # Eat the MIME prologue (everything up until the first boundary)
132 if (! defined $boundary || $boundary eq '') {
133 print "No boundary in Content-Type header!\n";
139 last if match_boundary
($_, $boundary);
143 print "Unable to find boundary in message\n";
150 process_part
(\
*STDIN
, $content_type, $encoding, $charset, $boundary);
154 # Eat the MIME epilog
162 # Handled encoded text. I think we can assume if the encoding is q-p
163 # or base64 to feed it into a formatting filter.
168 my ($input, $encoding, $charset, $boundary) = @_;
169 my $text, $filterpid, $prefixpid, $finread, $finwrite;
170 my $foutread, $foutwrite, $decoder, $ret, $filterflag;
171 my $text, $maxline = 0;
174 # In the simple case, just spit out the text prefixed by the
178 if ($encoding eq '7bit' || $encoding eq '8bit') {
180 # Switch the character set to whatever is specified by
183 binmode($input, ":encoding($charset)");
185 $ret = match_boundary
($_, $boundary);
187 binmode($input, ':encoding(us-ascii)');
190 print $quoteprefix, $_;
195 # If we've got some other encoding, the input text is almost
199 binmode($input, ':encoding(us-ascii)');
201 $decoder = find_decoder
(lc($encoding));
202 if (! defined $decoder) {
208 # Okay, assume that the encoding will make it so that we MIGHT need
209 # to filter it. Read it in; if the lines are too long, filter it
212 my $chardecode = find_encoding
($charset);
217 last if ($ret = match_boundary
($_, $boundary));
223 binmode($input, ':encoding(us-ascii)');
225 $text = $chardecode->decode(&$decoder($text));
229 if (($len = length) > $maxline) {
231 }} split(/^/, $text);
233 if (! defined $ret) {
237 if ($maxline <= $maxcolwidth) {
239 # These are short enough; just output it now as-is
241 foreach my $line (split(/^/, $text)) {
242 print STDOUT
$quoteprefix, $line;
248 # We fork a copy of ourselves to read the output from the filter
249 # program and prefix the quote character.
252 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
253 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
255 binmode($finread, ":encoding($outcharset)");
256 binmode($finwrite, ":encoding($outcharset)");
257 binmode($foutread, ":encoding($outcharset)");
258 binmode($foutwrite, ":encoding($outcharset)");
260 if ($filterpid = fork) {
262 # Close the pipes in the parent that we're not using
267 } elsif (defined $filterpid) {
269 # Close our ununsed filehandles
276 # Dup() down the filehandles to standard input and output
279 open(STDIN
, "<&", $finread) ||
280 die "dup(filterin) failed: $!\n";
281 open(STDOUT
, ">&", $foutwrite) ||
282 die "dup(filterout) failed: $!\n";
295 exec $filterprogram ||
296 die "Unable to exec $filterprogram: $!\n";
298 die "Fork for $filterprogram failed: $!\n";
302 # Fork our output handler.
305 if ($prefixpid = fork) {
307 # We don't need these anymore
311 } elsif (defined $prefixpid) {
313 # Read from foutwrite, and output (with prefix) to stdout
318 while (<$foutread>) {
319 print STDOUT
$quoteprefix, $_;
326 # Send our input to the filter program
329 if (%filterreplace) {
330 foreach my $match (keys %filterreplace) {
331 $text =~ s/$match/$filterreplace{$match}/g;
335 print $finwrite $text;
338 waitpid $filterpid, 0;
339 warn "Filter process exited with ", ($? >> 8), "\n" if $?;
340 waitpid $prefixpid, 0;
341 warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
347 # Filter HTML through a converter program
352 my ($input, $encoding, $charset, $boundary) = @_;
353 my $filterpid, $prefixpid, $finread, $finwrite;
354 my $foutread, $foutwrite, $decoder, $ret;
356 if (! defined($decoder = find_decoder
(lc($encoding)))) {
361 # We fork a copy of ourselves to read the output from the filter
362 # program and prefix the quote character.
365 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
366 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
368 binmode($finread, ":encoding($outcharset)");
369 binmode($finread, ":encoding($outcharset)");
370 binmode($foutread, ":encoding($outcharset)");
371 binmode($foutwrite, ":encoding($outcharset)");
373 if ($filterpid = fork) {
375 # Close the pipes in the parent that we're not using
380 } elsif (defined $filterpid) {
382 # Close our ununsed filehandles
389 # Dup() down the filehandles to standard input and output
392 open(STDIN
, "<&", $finread) ||
393 die "dup(filterin) failed: $!\n";
394 open(STDOUT
, ">&", $foutwrite) ||
395 die "dup(filterout) failed: $!\n";
408 my @conv = (@htmlconv, $charset);
410 die "Unable to exec $htmlconv[0]: $!\n";
412 die "Fork for $htmlconv[0] failed: $!\n";
416 # Fork our output handler.
419 if ($prefixpid = fork) {
421 # We don't need these anymore
425 } elsif (defined $prefixpid) {
427 # Read from foutwrite, and output (with prefix) to stdout
432 while (<$foutread>) {
433 print STDOUT
$quoteprefix, $_;
440 # Send our input to the filter program
444 last if ($ret = match_boundary
($_, $boundary));
445 print $finwrite (&$decoder($_));
448 if (! defined $ret) {
453 waitpid $filterpid, 0;
454 warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
455 waitpid $prefixpid, 0;
456 warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
462 # Decide what to do, based on what kind of content it is.
467 my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
468 my ($type, $subtype) = (split('/', $content_type, -1), '');
470 if ($type eq 'text') {
472 # If this is a text part, right now we only deal with
473 # plain and HTML parts.
475 if ($subtype eq 'plain') {
476 return process_text
($input, $encoding, $charset,
478 } elsif ($subtype eq 'html') {
479 return process_html
($input, $encoding, $charset,
482 print ">>> $content_type content\n";
483 return eat_part
($input, $boundary);
485 } elsif ($type eq 'multipart') {
486 return process_multipart
($input, $subtype, $boundary);
489 # Other types we're not sure what to do with right now
490 # Just put a marker in there
493 print ">>> $content_type attachment";
495 print ", name=$name";
499 return eat_part
($input, $boundary);
504 # Process a multipart message.
506 # When called, we should be right after the beginning of the first
507 # boundary marker. So we should be pointed at header lines which describe
508 # the content of this part
511 sub process_multipart
513 my ($input, $subtype, $boundary) = @_;
517 my $encoding, $type, $end, $name, $charset;
520 # Use the Mail::Header package to read in any headers
521 # corresponding to this part
524 my $head = Mail
::Header-
>new($input, (MailFrom
=> 'IGNORE'));
527 # Extract out any Content-Type, Content-Transfer-Encoding, and
528 # Content-Disposition headers
531 my $ctype = Mail
::Field-
>extract('Content-Type', $head);
532 my $cte = Mail
::Field-
>extract('Content-Transfer-Encoding',
534 my $cdispo = Mail
::Field-
>extract('Content-Disposition', $head);
536 if (defined $ctype) {
537 $type = $ctype->type;
538 $charset = $ctype->charset;
540 $type = 'text/plain';
541 $charset = 'us-ascii';
544 $encoding = defined $cte ? lc($cte->param('_')) : '7bit';
545 $name = defined $cdispo ? $cdispo->param('filename') : undef;
548 # Special handling for multipart/alternative; pick
549 # the "first" one we can handle (which is usually
550 # text/plain) and silently eat the rest, but output a
551 # warning if we can't handle anything.
555 $end = eat_part
($input, $boundary);
557 my $subboundary = $boundary;
558 my $maintype = (split('/', $type))[0];
560 if ($maintype eq 'multipart') {
561 $subboundary = $ctype->boundary;
563 # Go until we find our beginning of this
566 my $subend = eat_part
($input, $subboundary);
568 if ($subend ne 'EOP') {
569 print ">>> WARNING: malformed ",
570 "nested multipart\n";
575 $end = process_part
($input, $type, $encoding,
576 $charset, $subboundary, $name);
578 if ($subtype eq 'alternative' && ! defined $altout &&
579 $type eq 'text/plain') {
584 # Since we changed the semantics of $boundary
585 # above for nested multiparts, if we are
586 # handling a nested multipart then find the end
587 # of our current part
590 if ($maintype eq 'multipart') {
591 $end = eat_part
($input, $boundary);
596 if ($end eq 'EOM' || $end eq 'EOF') {
597 if ($subtype eq 'alternative' && !defined $altout) {
598 print ">>>multipart/alternative: no suitable ",
607 # "Eat" a MIME part; consume content until we hit the boundary or EOF
612 my ($input, $boundary) = @_;
616 # If we weren't given a boundary just eat input until EOF
619 if (! defined $boundary) {
625 # Otherwise, consume data until we hit our boundary
629 if ($ret = match_boundary
($_, $boundary)) {
638 # Return the decoder subroutine to use
645 if ($encoding eq '7bit' || $encoding eq '8bit') {
646 return \
&null_decoder
;
647 } elsif ($encoding eq 'base64') {
648 return \
&decode_base64
;
649 } elsif ($encoding eq 'quoted-printable') {
652 warn "Unknown encoding: $encoding\n";
665 # Match a line against the boundary string
670 my ($line, $boundary) = @_;
672 return if ! defined $boundary;
674 if (substr($line, 0, 2) eq '--') {
675 $line =~ s/[ \t\r\n]+\Z//;
676 if ($line eq "--$boundary") {
678 } elsif ($line eq "--$boundary--") {