]> diplodocus.org Git - nmh/blob - docs/contrib/replyfilter
Fixed install-mh -auto test by specifying HOME.
[nmh] / docs / contrib / replyfilter
1 #!/usr/bin/perl
2 #
3 # replyfilter - A reply filter for nmh
4 #
5 # The idea behind this program is that it will act as a format filter
6 # for nmh. It will try to extract out all text/plain parts and format
7 # them if necessary using a filter program.
8 #
9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
10 #
11 # - Put the path to this program in your .mh_profile under formatproc:
12 #
13 # formatproc: replyfilter
14 #
15 # - Create a mhl reply filter that consists of the following line:
16 #
17 # body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
18 #
19 # To decode this a bit:
20 #
21 # body - Output the "body" component
22 # nocomponent - Don't output a component prefix (normally here we use a
23 # component prefix of ">" as a quote character, but we're
24 # going to have replyfilter do that).
25 # nowrap - Don't wrap lines if they exceed the column width
26 # formatarg - Arguments to fmtproc. The first argument is the value of
27 # the Content-type header; the second is the value of the
28 # Content-Transfer-Encoding header. The last "formatarg"
29 # is used as your quoting prefix. Replace it with whatever
30 # you want.
31 #
32
33 use Mail::Field;
34 use MIME::Head;
35 use MIME::QuotedPrint;
36 use MIME::Base64;
37 use Encode;
38
39 #
40 # The program we use to format "long" text
41 #
42
43 $filterprogram = 'par';
44
45 #
46 # Our output character set. This script assumes a UTF-8 locale, but if you
47 # want to run under a different locale the change it here.
48 #
49
50 $outcharset = 'utf-8';
51
52 #
53 # Maximum column width (used by the HTML converter and to decide if we need
54 # to invoke the filter program
55 #
56
57 $maxcolwidth = 78;
58
59 #
60 # Out HTML converter program & arguments
61 #
62
63 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
64 '-O', $outcharset);
65
66
67 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
68 if $#ARGV != 2;
69
70 if ($ARGV[0] ne "") {
71 my $ctype = Mail::Field->new('Content-Type', $ARGV[0]);
72 $content_type = $ctype->type;
73 $charset = $ctype->charset;
74 $boundary = $ctype->boundary;
75 } else {
76 $content_type = 'text/plain';
77 $charset = 'us-ascii';
78 }
79
80 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
81 $quoteprefix = $ARGV[2];
82
83 #
84 # Set up our output to be in our character set
85 #
86
87 binmode(STDOUT, ":encoding($outcharset)");
88
89 #
90 # The simplest case: if we have a single type of text/plain, send it
91 # to our format subroutine.
92 #
93
94 if ($content_type eq 'text/plain') {
95 process_text(\*STDIN, $encoding, $charset);
96 exit 0;
97 }
98
99 #
100 # Alright, here's what we need to do.
101 #
102 # Find any text/plain parts and decode them. Decode them via base64 or
103 # quoted-printable, and feed them to our formatting filter when appropriate.
104 # Put markers in the output for other content types.
105 #
106
107 ($type) = (split('/', $content_type));
108
109 if ($type eq 'multipart') {
110
111 #
112 # For multipart messages we have to do a little extra.
113 # Eat the MIME prologue (everything up until the first boundary)
114 #
115
116 if (! defined $boundary || $boundary eq '') {
117 print "No boundary in Content-Type header!\n";
118 eat_part(\*STDIN);
119 exit 1;
120 }
121
122 while (<STDIN>) {
123 last if match_boundary($_, $boundary);
124 }
125
126 if (eof(STDIN)) {
127 print "Unable to find boundary in message\n";
128 exit 1;
129 }
130 } else {
131 undef $boundary;
132 }
133
134 process_part(\*STDIN, $content_type, $encoding, $charset, $boundary);
135
136 if ($boundary) {
137 #
138 # Eat the MIME epilog
139 #
140 eat_part(\*STDIN);
141 }
142
143 exit 0;
144
145 #
146 # Handled encoded text. I think we can assume if the encoding is q-p
147 # or base64 to feed it into a formatting filter.
148 #
149
150 sub process_text (*$$;$)
151 {
152 my ($input, $encoding, $charset, $boundary) = @_;
153 my $text, $filterpid, $prefixpid, $finread, $finwrite;
154 my $foutread, $foutwrite, $decoder, $ret, $filterflag;
155 my @text = ( '' ), $maxline = 0;
156
157 #
158 # In the simple case, just spit out the text prefixed by the
159 # quote character
160 #
161
162 if ($encoding eq '7bit' || $encoding eq '8bit') {
163 #
164 # Switch the character set to whatever is specified by
165 # the MIME message
166 #
167 binmode($input, ":encoding($charset)");
168 while (<$input>) {
169 $ret = match_boundary($_, $boundary);
170 if (defined $ret) {
171 binmode($input, ':encoding(us-ascii)');
172 return $ret;
173 }
174 print $quoteprefix, $_;
175 }
176 return 'EOF';
177 } else {
178 #
179 # If we've got some other encoding, the input text is almost
180 # certainly US-ASCII
181 #
182
183 binmode($input, ':encoding(us-ascii)');
184
185 $decoder = find_decoder($encoding);
186 if (! defined $decoder) {
187 return 'EOF';
188 }
189 }
190
191 #
192 # Okay, assume that the encoding will make it so that we MIGHT need
193 # to filter it. Read it in; if it's too long, filter it.
194 #
195
196 my $chardecode = find_encoding($charset);
197
198 while (<$input>) {
199 my @lines, $len;
200
201 last if ($ret = match_boundary($_, $boundary));
202
203 @lines = split(/^/, $chardecode->decode(&$decoder($_)));
204
205 if (substr($text[$#text], -1, 1) eq "\n") {
206 push @text, shift @lines;
207 } else {
208 $text[$#text] .= shift @lines;
209 }
210 if (($len = length($text[$#text])) > $maxline) {
211 $maxline = $len;
212 }
213
214 if ($#lines > -1) {
215 push @text, @lines;
216 }
217 }
218
219 binmode($input, ':encoding(us-ascii)');
220
221 if (! defined $ret) {
222 $ret = 'EOF';
223 }
224
225 if ($maxline <= $maxcolwidth) {
226 #
227 # These are short enough; just output it now as-is
228 #
229 foreach my $line (@text) {
230 print STDOUT $quoteprefix, $line;
231 }
232 return $ret;
233 }
234
235 #
236 # We fork a copy of ourselves to read the output from the filter
237 # program and prefix the quote character.
238 #
239
240 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
241 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
242
243 binmode($finread, ":encoding($outcharset)");
244 binmode($finwrite, ":encoding($outcharset)");
245 binmode($foutread, ":encoding($outcharset)");
246 binmode($foutwrite, ":encoding($outcharset)");
247
248 if ($filterpid = fork) {
249 #
250 # Close the pipes in the parent that we're not using
251 #
252
253 close($finread);
254 close($foutwrite);
255 } elsif (defined $filterpid) {
256 #
257 # Close our ununsed filehandles
258 #
259
260 close($finwrite);
261 close($foutread);
262
263 #
264 # Dup() down the filehandles to standard input and output
265 #
266
267 open(STDIN, "<&", $finread) ||
268 die "dup(filterin) failed: $!\n";
269 open(STDOUT, ">&", $foutwrite) ||
270 die "dup(filterout) failed: $!\n";
271
272 #
273 # Close our copies.
274 #
275
276 close($finread);
277 close($foutwrite);
278
279 #
280 # Exec our filter
281 #
282
283 exec $filterprogram ||
284 die "Unable to exec $filterprogram: $!\n";
285 } else {
286 die "Fork for $filterprogram failed: $!\n";
287 }
288
289 #
290 # Fork our output handler.
291 #
292
293 if ($prefixpid = fork) {
294 #
295 # We don't need these anymore
296 #
297 close($foutread);
298
299 } elsif (defined $prefixpid) {
300 #
301 # Read from foutwrite, and output (with prefix) to stdout
302 #
303
304 close($finwrite);
305
306 while (<$foutread>) {
307 print STDOUT $quoteprefix, $_;
308 }
309
310 exit 0;
311 }
312
313 #
314 # Send our input to the filter program
315 #
316
317 print $finwrite @text;
318
319 close($finwrite);
320 waitpid $filterpid, 0;
321 warn "Filter process exited with ", ($? >> 8), "\n" if $?;
322 waitpid $prefixpid, 0;
323 warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
324
325 return $ret;
326 }
327
328 #
329 # Filter HTML through a converter program
330 #
331
332 sub process_html (*$$;$)
333 {
334 my ($input, $encoding, $charset, $boundary) = @_;
335 my $filterpid, $prefixpid, $finread, $finwrite;
336 my $foutread, $foutwrite, $decoder, $ret;
337
338 if (! defined($decoder = find_decoder($encoding))) {
339 return 'EOF';
340 }
341
342 #
343 # We fork a copy of ourselves to read the output from the filter
344 # program and prefix the quote character.
345 #
346
347 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
348 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
349
350 binmode($finread, ":encoding($outcharset)");
351 binmode($finread, ":encoding($outcharset)");
352 binmode($foutread, ":encoding($outcharset)");
353 binmode($foutwrite, ":encoding($outcharset)");
354
355 if ($filterpid = fork) {
356 #
357 # Close the pipes in the parent that we're not using
358 #
359
360 close($finread);
361 close($foutwrite);
362 } elsif (defined $filterpid) {
363 #
364 # Close our ununsed filehandles
365 #
366
367 close($finwrite);
368 close($foutread);
369
370 #
371 # Dup() down the filehandles to standard input and output
372 #
373
374 open(STDIN, "<&", $finread) ||
375 die "dup(filterin) failed: $!\n";
376 open(STDOUT, ">&", $foutwrite) ||
377 die "dup(filterout) failed: $!\n";
378
379 #
380 # Close our copies.
381 #
382
383 close($finread);
384 close($foutwrite);
385
386 #
387 # Exec our converter
388 #
389
390 exec (@htmlconv) ||
391 die "Unable to exec $filterprogram: $!\n";
392 } else {
393 die "Fork for $htmlconv[0] failed: $!\n";
394 }
395
396 #
397 # Fork our output handler.
398 #
399
400 if ($prefixpid = fork) {
401 #
402 # We don't need these anymore
403 #
404 close($foutread);
405
406 } elsif (defined $prefixpid) {
407 #
408 # Read from foutwrite, and output (with prefix) to stdout
409 #
410
411 close($finwrite);
412
413 while (<$foutread>) {
414 print STDOUT $quoteprefix, $_;
415 }
416
417 exit 0;
418 }
419
420 #
421 # Send our input to the filter program
422 #
423
424 while (<$input>) {
425 last if ($ret = match_boundary($_, $boundary));
426 print $finwrite (&$decoder($_));
427 }
428
429 if (! defined $ret) {
430 $ret = 'EOF';
431 }
432
433 close($finwrite);
434 waitpid $filterpid, 0;
435 warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
436 waitpid $prefixpid, 0;
437 warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
438
439 return $ret;
440 }
441
442 #
443 # Decide what to do, based on what kind of content it is.
444 #
445
446 sub process_part (*$$$$;$)
447 {
448 my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
449 my ($type, $subtype) = (split('/', $content_type, -1), '');
450
451 if ($type eq 'text') {
452 #
453 # If this is a text part, right now we only deal with
454 # plain and HTML parts.
455 #
456 if ($subtype eq 'plain') {
457 return process_text($input, $encoding, $charset,
458 $boundary);
459 } elsif ($subtype eq 'html') {
460 return process_html($input, $encoding, $charset,
461 $boundary);
462 } else {
463 print ">>> $content_type content\n";
464 return eat_part($input, $boundary);
465 }
466 } elsif ($type eq 'multipart') {
467 return process_multipart($input, $subtype, $boundary);
468 } else {
469 #
470 # Other types we're not sure what to do with right now
471 # Just put a marker in there
472 #
473
474 print ">>> $content_type attachment";
475 if (defined $name) {
476 print ", name=$name";
477 }
478 print "\n";
479
480 return eat_part($input, $boundary);
481 }
482 }
483
484 #
485 # Process a multipart message.
486 #
487 # When called, we should be right after the beginning of the first
488 # boundary marker. So we should be pointed at header lines which describe
489 # the content of this part
490 #
491
492 sub process_multipart ($$$)
493 {
494 my ($input, $subtype, $boundary) = @_;
495 my $altout;
496
497 while (1) {
498 my $encoding, $type, $end, $name, $charset;
499
500 #
501 # Use the Mail::Header package to read in any headers
502 # corresponding to this part
503 #
504
505 my $head = Mail::Header->new($input, (MailFrom => 'IGNORE'));
506
507 #
508 # Extract out any Content-Type, Content-Transfer-Encoding, and
509 # Content-Disposition headers
510 #
511
512 my $ctype = Mail::Field->extract('Content-Type', $head);
513 my $cte = Mail::Field->extract('Content-Transfer-Encoding',
514 $head);
515 my $cdispo = Mail::Field->extract('Content-Disposition', $head);
516
517 if (defined $ctype) {
518 $type = $ctype->type;
519 $charset = $ctype->charset;
520 } else {
521 $type = 'text/plain';
522 $charset = 'us-ascii';
523 }
524
525 $encoding = defined $cte ? $cte->param('_') : '7bit';
526 $name = defined $cdispo ? $cdispo->param('filename') : undef;
527
528 #
529 # Special handling for multipart/alternative; pick
530 # the "first" one we can handle (which is usually
531 # text/plain) and silently eat the rest, but output a
532 # warning if we can't handle anything.
533 #
534
535 if ($altout) {
536 $end = eat_part($input, $boundary);
537 } else {
538 my $subboundary = $boundary;
539 my $maintype = (split('/', $type))[0];
540
541 if ($maintype eq 'multipart') {
542 $subboundary = $ctype->boundary;
543 #
544 # Go until we find our beginning of this
545 # part
546 #
547 my $subend = eat_part($input, $subboundary);
548
549 if ($subend ne 'EOP') {
550 print ">>> WARNING: malformed ",
551 "nested multipart\n";
552 return $subend;
553 }
554 }
555
556 $end = process_part($input, $type, $encoding,
557 $charset, $subboundary, $name);
558
559 if ($subtype eq 'alternative' && ! defined $altout &&
560 $type eq 'text/plain') {
561 $altout = 1;
562 }
563
564 #
565 # Since we changed the semantics of $boundary
566 # above for nested multiparts, if we are
567 # handling a nested multipart then find the end
568 # of our current part
569 #
570
571 if ($maintype eq 'multipart') {
572 $end = eat_part($input, $boundary);
573 }
574
575 }
576
577 if ($end eq 'EOM' || $end eq 'EOF') {
578 if ($subtype eq 'alternative' && !defined $altout) {
579 print ">>>multipart/alternative: no suitable ",
580 "parts\n";
581 }
582 return $end;
583 }
584 }
585 }
586
587 #
588 # "Eat" a MIME part; consume content until we hit the boundary or EOF
589 #
590
591 sub eat_part ($$)
592 {
593 my ($input, $boundary) = @_;
594 my $ret;
595
596 #
597 # If we weren't given a boundary just eat input until EOF
598 #
599
600 if (! defined $boundary) {
601 while (<$input>) { }
602 return 'EOF';
603 }
604
605 #
606 # Otherwise, consume data until we hit our boundary
607 #
608
609 while (<$input>) {
610 if ($ret = match_boundary($_, $boundary)) {
611 return $ret;
612 }
613 }
614
615 return 'EOF';
616 }
617
618 #
619 # Return the decoder subroutine to use
620 #
621
622 sub find_decoder ($)
623 {
624 my ($encoding) = @_;
625
626 if ($encoding eq '7bit' || $encoding eq '8bit') {
627 return \&null_decoder;
628 } elsif ($encoding eq 'base64') {
629 return \&decode_base64;
630 } elsif ($encoding eq 'quoted-printable') {
631 return \&decode_qp;
632 } else {
633 warn "Unknown encoding: $encoding\n";
634 return undef;
635 }
636 }
637
638 sub null_decoder ($)
639 {
640 my ($input) = @_;
641
642 return $input;
643 }
644
645 #
646 # Match a line against the boundary string
647 #
648
649 sub match_boundary($$)
650 {
651 my ($_, $boundary) = @_;
652
653 return if ! defined $boundary;
654
655 if (substr($_, 0, 2) eq '--') {
656 s/[ \t\r\n]+\Z//;
657 if ($_ eq "--$boundary") {
658 return 'EOP';
659 } elsif ($_ eq "--$boundary--") {
660 return 'EOM';
661 }
662 }
663
664 return undef;
665 }