]> diplodocus.org Git - nmh/blob - docs/replyfilter
Whoops, fix a typo in my last commit.
[nmh] / docs / replyfilter
1 #!/usr/bin/perl
2 #
3 # replyfilter - A reply filter for nmh
4 #
5 # The idea behind this program is that it will act as a format filter
6 # for nmh. It will try to extract out all text/plain parts and format
7 # them if necessary using a filter program.
8 #
9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
10 #
11 # - Put the path to this program in your .mh_profile under formatproc:
12 #
13 # formatproc: replyfilter
14 #
15 # - Create a mhl reply filter that consists of the following line:
16 #
17 # body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
18 #
19 # To decode this a bit:
20 #
21 # body - Output the "body" component
22 # nocomponent - Don't output a component prefix (normally here we use a
23 # component prefix of ">" as a quote character, but we're
24 # going to have replyfilter do that).
25 # nowrap - Don't wrap lines if they exceed the column width
26 # formatarg - Arguments to fmtproc. The first argument is the value of
27 # the Content-type header; the second is the value of the
28 # Content-Transfer-Encoding header. The last "formatarg"
29 # is used as your quoting prefix. Replace it with whatever
30 # you want.
31 #
32
33 use Mail::Field;
34 use MIME::Head;
35 use MIME::QuotedPrint;
36 use MIME::Base64;
37 use Encode;
38
39 #
40 # The program we use to format "long" text
41 #
42
43 $filterprogram = 'par';
44
45 #
46 # Our output character set. This script assumes a UTF-8 locale, but if you
47 # want to run under a different locale the change it here.
48 #
49
50 $outcharset = 'utf-8';
51
52 #
53 # Maximum column width (used by the HTML converter and to decide if we need
54 # to invoke the filter program
55 #
56
57 $maxcolwidth = 78;
58
59 #
60 # Out HTML converter program & arguments
61 #
62
63 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
64 '-O', $outcharset);
65
66
67 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
68 if $#ARGV != 2;
69
70 if ($ARGV[0] ne "") {
71 $content_type = Mail::Field->new('Content-Type', $ARGV[0]);
72 }
73
74 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
75 $quoteprefix = $ARGV[2];
76
77 #
78 # Set up our output to be in our character set
79 #
80
81 binmode(STDOUT, ":encoding($outcharset)");
82
83 #
84 # The simplest case: if we have a single type of text/plain, send it
85 # to our format subroutine.
86 #
87
88 if ($ARGV[0] eq "" || $content_type->type eq 'text/plain') {
89 process_text(\*STDIN, $encoding, $content_type->charset);
90 exit 0;
91 }
92
93 #
94 # Alright, here's what we need to do.
95 #
96 # Find any text/plain parts and decode them. Decode them via base64 or
97 # quoted-printable, and feed them to our formatting filter when appropriate.
98 # Put markers in the output for other content types.
99 #
100
101 ($type) = (split('/', $content_type->type));
102
103 if ($type eq 'multipart') {
104
105 #
106 # For multipart messages we have to do a little extra.
107 # Eat the MIME prologue (everything up until the first boundary)
108 #
109
110 $boundary = $content_type->boundary;
111
112 if ($boundary eq '') {
113 print "No boundary in Content-Type header!\n";
114 eat_part(\*STDIN);
115 exit 1;
116 }
117
118 while (<STDIN>) {
119 last if match_boundary($_, $boundary);
120 }
121
122 if (eof(STDIN)) {
123 print "Unable to find boundary in message\n";
124 exit 1;
125 }
126 } else {
127 undef $boundary;
128 }
129
130 process_part(\*STDIN, $content_type->type, $encoding, $content_type->charset,
131 $boundary);
132
133 if ($boundary) {
134 #
135 # Eat the MIME eplilog
136 #
137 eat_part(\*STDIN);
138 }
139
140 exit 0;
141
142 #
143 # Handled encoded text. I think we can assume if the encoding is q-p
144 # or base64 to feed it into a formatting filter.
145 #
146
147 sub process_text (*$$;$)
148 {
149 my ($input, $encoding, $charset, $boundary) = @_;
150 my $text, $filterpid, $prefixpid, $finread, $finwrite;
151 my $foutread, $foutwrite, $decoder, $ret, $filterflag;
152 my @text = ( '' ), $maxline = 0;
153
154 #
155 # In the simple case, just spit out the text prefixed by the
156 # quote character
157 #
158
159 if ($encoding eq '7bit' || $encoding ne '8bit') {
160 while (<$input>) {
161 $ret = match_boundary($_, $boundary);
162 if (defined $ret) {
163 return $ret;
164 }
165 print $quoteprefix, $_;
166 }
167 return 'EOF';
168 } else {
169 $decoder = find_decoder($encoding);
170 if (! defined $decoder) {
171 return 'EOF';
172 }
173 }
174
175 #
176 # Okay, assume that the encoding will make it so that we MIGHT need
177 # to filter it. Read it in; if it's too long, filter it.
178 #
179
180 while (<$input>) {
181 my $line, $len;
182
183 last if ($ret = match_boundary($_, $boundary));
184
185 $line = decode($charset, &$decoder($_));
186
187 if (substr($text[$#text], -1, 1) eq "\n") {
188 push @text, $line;
189 } else {
190 $text[$#text] .= $line;
191 }
192 if (($len = length($text[$#text])) > $maxline) {
193 $maxline = $len;
194 }
195 }
196
197 if (! defined $ret) {
198 $ret = 'EOF';
199 }
200
201 if ($maxline <= $maxcolwidth) {
202 #
203 # These are short enough; just output it now as-is
204 #
205 print STDOUT @text;
206 return $ret;
207 }
208
209 #
210 # We fork a copy of ourselves to read the output from the filter
211 # program and prefix the quote character.
212 #
213
214 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
215 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
216
217 binmode($finread, ":encoding($outcharset)");
218 binmode($finread, ":encoding($outcharset)");
219 binmode($foutread, ":encoding($outcharset)");
220 binmode($foutwrite, ":encoding($outcharset)");
221
222 if ($filterpid = fork) {
223 #
224 # Close the pipes in the parent that we're not using
225 #
226
227 close($finread);
228 close($foutwrite);
229 } elsif (defined $filterpid) {
230 #
231 # Close our ununsed filehandles
232 #
233
234 close($finwrite);
235 close($foutread);
236
237 #
238 # Dup() down the filehandles to standard input and output
239 #
240
241 open(STDIN, "<&", $finread) ||
242 die "dup(filterin) failed: $!\n";
243 open(STDOUT, ">&", $foutwrite) ||
244 die "dup(filterout) failed: $!\n";
245
246 #
247 # Close our copies.
248 #
249
250 close($finread);
251 close($foutwrite);
252
253 #
254 # Exec our filter
255 #
256
257 exec $filterprogram ||
258 die "Unable to exec $filterprogram: $!\n";
259 } else {
260 die "Fork for $filterprogram failed: $!\n";
261 }
262
263 #
264 # Fork our output handler.
265 #
266
267 if ($prefixpid = fork) {
268 #
269 # We don't need these anymore
270 #
271 close($foutread);
272
273 } elsif (defined $prefixpid) {
274 #
275 # Read from foutwrite, and output (with prefix) to stdout
276 #
277
278 close($finwrite);
279
280 while (<$foutread>) {
281 print STDOUT $quoteprefix, $_;
282 }
283
284 exit 0;
285 }
286
287 #
288 # Send our input to the filter program
289 #
290
291 print $finwrite @text;
292
293 close($finwrite);
294 waitpid $filterpid, 0;
295 warn "Filter process exited with ", ($? >> 8), "\n" if $?;
296 waitpid $prefixpid, 0;
297 warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
298
299 return $ret;
300 }
301
302 #
303 # Filter HTML through a converter program
304 #
305
306 sub process_html (*$$;$)
307 {
308 my ($input, $encoding, $charset, $boundary) = @_;
309 my $filterpid, $prefixpid, $finread, $finwrite;
310 my $foutread, $foutwrite, $decoder, $ret;
311
312 if (! defined($decoder = find_decoder($encoding))) {
313 return 'EOF';
314 }
315
316 #
317 # We fork a copy of ourselves to read the output from the filter
318 # program and prefix the quote character.
319 #
320
321 pipe($finread, $finwrite) || die "pipe() failed: $!\n";
322 pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
323
324 binmode($finread, ":encoding($outcharset)");
325 binmode($finread, ":encoding($outcharset)");
326 binmode($foutread, ":encoding($outcharset)");
327 binmode($foutwrite, ":encoding($outcharset)");
328
329 if ($filterpid = fork) {
330 #
331 # Close the pipes in the parent that we're not using
332 #
333
334 close($finread);
335 close($foutwrite);
336 } elsif (defined $filterpid) {
337 #
338 # Close our ununsed filehandles
339 #
340
341 close($finwrite);
342 close($foutread);
343
344 #
345 # Dup() down the filehandles to standard input and output
346 #
347
348 open(STDIN, "<&", $finread) ||
349 die "dup(filterin) failed: $!\n";
350 open(STDOUT, ">&", $foutwrite) ||
351 die "dup(filterout) failed: $!\n";
352
353 #
354 # Close our copies.
355 #
356
357 close($finread);
358 close($foutwrite);
359
360 #
361 # Exec our converter
362 #
363
364 exec (@htmlconv) ||
365 die "Unable to exec $filterprogram: $!\n";
366 } else {
367 die "Fork for $htmlconv[0] failed: $!\n";
368 }
369
370 #
371 # Fork our output handler.
372 #
373
374 if ($prefixpid = fork) {
375 #
376 # We don't need these anymore
377 #
378 close($foutread);
379
380 } elsif (defined $prefixpid) {
381 #
382 # Read from foutwrite, and output (with prefix) to stdout
383 #
384
385 close($finwrite);
386
387 while (<$foutread>) {
388 print STDOUT $quoteprefix, $_;
389 }
390
391 exit 0;
392 }
393
394 #
395 # Send our input to the filter program
396 #
397
398 while (<$input>) {
399 last if ($ret = match_boundary($_, $boundary));
400 print $finwrite (&$decoder($_));
401 }
402
403 if (! defined $ret) {
404 $ret = 'EOF';
405 }
406
407 close($finwrite);
408 waitpid $filterpid, 0;
409 warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
410 waitpid $prefixpid, 0;
411 warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
412
413 return $ret;
414 }
415
416 #
417 # Decide what to do, based on what kind of content it is.
418 #
419
420 sub process_part (*$$$$;$)
421 {
422 my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
423 my ($type, $subtype) = (split('/', $content_type, -1), '');
424
425 if ($type eq 'text') {
426 #
427 # If this is a text part, right now we only deal with
428 # plain and HTML parts.
429 #
430 if ($subtype eq 'plain') {
431 return process_text($input, $encoding, $charset,
432 $boundary);
433 } elsif ($subtype eq 'html') {
434 return process_html($input, $encoding, $charset,
435 $boundary);
436 } else {
437 print ">>> $content_type content\n";
438 return eat_part($input, $boundary);
439 }
440 } elsif ($type eq 'multipart') {
441 return process_multipart($input, $subtype, $boundary);
442 } else {
443 #
444 # Other types we're not sure what to do with right now
445 # Just put a marker in there
446 #
447
448 print ">>> $content_type attachment";
449 if (defined $name) {
450 print ", name=$name";
451 }
452 print "\n";
453
454 return eat_part($input, $boundary);
455 }
456 }
457
458 #
459 # Process a multipart message.
460 #
461 # When called, we should be right after the beginning of the first
462 # boundary marker. So we should be pointed at header lines which describe
463 # the content of this part
464 #
465
466 sub process_multipart ($$$)
467 {
468 my ($input, $subtype, $boundary) = @_;
469 my $altout;
470
471 while (1) {
472 my $encoding, $type, $end, $name, $charset;
473
474 #
475 # Use the Mail::Header package to read in any headers
476 # corresponding to this part
477 #
478
479 my $head = Mail::Header->new($input, (MailFrom => 'IGNORE'));
480
481 #
482 # Extract out any Content-Type, Content-Transfer-Encoding, and
483 # Content-Disposition headers
484 #
485
486 my $ctype = Mail::Field->extract('Content-Type', $head);
487 my $cte = Mail::Field->extract('Content-Transfer-Encoding',
488 $head);
489 my $cdispo = Mail::Field->extract('Content-Disposition', $head);
490
491 if (defined $ctype) {
492 $type = $ctype->type;
493 $charset = $ctype->charset;
494 } else {
495 $type = 'text/plain';
496 $charset = 'us-ascii';
497 }
498
499 $encoding = defined $cte ? $cte->param('_') : '7bit';
500 $name = defined $cdispo ? $cdispo->param('filename') : undef;
501
502 #
503 # Special handling for multipart/alternative; pick
504 # the "first" one we can handle (which is usually
505 # text/plain) and silently eat the rest, but output a
506 # warning if we can't handle anything.
507 #
508
509 if ($altout) {
510 $end = eat_part($input, $boundary);
511 } else {
512 my $subboundary = $boundary;
513 my $maintype = (split('/', $type))[0];
514
515 if ($maintype eq 'multipart') {
516 $subboundary = $ctype->boundary;
517 #
518 # Go until we find our beginning of this
519 # part
520 #
521 my $subend = eat_part($input, $subboundary);
522
523 if ($subend ne 'EOP') {
524 print ">>> WARNING: malformed ",
525 "nested multipart\n";
526 return $subend;
527 }
528 }
529
530 $end = process_part($input, $type, $encoding,
531 $charset, $subboundary, $name);
532
533 if ($subtype eq 'alternative' && ! defined $altout &&
534 $type eq 'text/plain') {
535 $altout = 1;
536 }
537
538 #
539 # Since we changed the semantics of $boundary
540 # above for nested multiparts, if we are
541 # handling a nested multipart then find the end
542 # of our current part
543 #
544
545 if ($maintype eq 'multipart') {
546 $end = eat_part($input, $boundary);
547 }
548
549 }
550
551 if ($end eq 'EOM' || $end eq 'EOF') {
552 if ($subtype eq 'alternative' && !defined $altout) {
553 print ">>>multipart/alternative: no suitable ",
554 "parts\n";
555 }
556 return $end;
557 }
558 }
559 }
560
561 #
562 # "Eat" a MIME part; consume content until we hit the boundary or EOF
563 #
564
565 sub eat_part ($$)
566 {
567 my ($input, $boundary) = @_;
568 my $ret;
569
570 #
571 # If we weren't given a boundary just eat input until EOF
572 #
573
574 if (! defined $boundary) {
575 while (<$input>) { }
576 return 'EOF';
577 }
578
579 #
580 # Otherwise, consume data until we hit our boundary
581 #
582
583 while (<$input>) {
584 if ($ret = match_boundary($_, $boundary)) {
585 return $ret;
586 }
587 }
588
589 return 'EOF';
590 }
591
592 #
593 # Return the decoder subroutine to use
594 #
595
596 sub find_decoder ($)
597 {
598 my ($encoding) = @_;
599
600 if ($encoding eq '7bit' || $encoding eq '8bit') {
601 return \&null_decoder;
602 } elsif ($encoding eq 'base64') {
603 return \&decode_base64;
604 } elsif ($encoding eq 'quoted-printable') {
605 return \&decode_qp;
606 } else {
607 warn "Unknown encoding: $encoding\n";
608 return undef;
609 }
610 }
611
612 sub null_decoder ($)
613 {
614 my ($input) = @_;
615
616 return $input;
617 }
618
619 #
620 # Match a line against the boundary string
621 #
622
623 sub match_boundary($$)
624 {
625 my ($_, $boundary) = @_;
626
627 if (substr($_, 0, 2) eq '--') {
628 s/[ \t\r\n]+\Z//;
629 if ($_ eq "--$boundary") {
630 return 'EOP';
631 } elsif ($_ eq "--$boundary--") {
632 return 'EOM';
633 }
634 }
635
636 return undef;
637 }