diplodocus.org Git - nmh/blob - docs/replyfilter

   1 #!/usr/bin/perl
   2 #
   3 # replyfilter - A reply filter for nmh
   4 #
   5 # The idea behind this program is that it will act as a format filter
   6 # for nmh.  It will try to extract out all text/plain parts and format
   7 # them if necessary using a filter program.
   8 #
   9 # To use this program, configure nmh in the following way (nmh 1.5 or later):
  10 #
  11 # - Put the path to this program in your .mh_profile under formatproc:
  12 #
  13 #   formatproc: replyfilter
  14 #
  15 # - Create a mhl reply filter that consists of the following line:
  16 #
  17 #   body:nocomponent,format,nowrap,formatarg="%(trim{content-type})%(putstr)",formatarg="%(trim{content-transfer-encoding})%(putstr)",formatarg=">"
  18 #
  19 #   To decode this a bit:
  20 #
  21 #   body        - Output the "body" component
  22 #   nocomponent - Don't output a component prefix (normally here we use a
  23 #                 component prefix of ">" as a quote character, but we're
  24 #                 going to have replyfilter do that).
  25 #   nowrap      - Don't wrap lines if they exceed the column width
  26 #   formatarg   - Arguments to fmtproc.  The first argument is the value of
  27 #                 the Content-type header; the second is the value of the
  28 #                 Content-Transfer-Encoding header.  The last "formatarg"
  29 #                 is used as your quoting prefix.  Replace it with whatever
  30 #                 you want.
  31 #
  32
  33 use Mail::Field;
  34 use MIME::Head;
  35 use MIME::QuotedPrint;
  36 use MIME::Base64;
  37 use Encode;
  38
  39 #
  40 # The program we use to format "long" text
  41 #
  42
  43 $filterprogram = 'par';
  44
  45 #
  46 # Our output character set.  This script assumes a UTF-8 locale, but if you
  47 # want to run under a different locale the change it here.
  48 #
  49
  50 $outcharset = 'utf-8';
  51
  52 #
  53 # Maximum column width (used by the HTML converter and to decide if we need
  54 # to invoke the filter program
  55 #
  56
  57 $maxcolwidth = 78;
  58
  59 #
  60 # Out HTML converter program & arguments
  61 #
  62
  63 @htmlconv = ('w3m', '-dump', '-cols', $maxcolwidth - 2, '-T', 'text/html',
  64              '-O', $outcharset);
  65
  66
  67 die "Usage: $0 Content-type content-transfer-encoding quote-prefix\n"
  68                                 if $#ARGV != 2;
  69
  70 if ($ARGV[0] ne "") {
  71         $content_type = Mail::Field->new('Content-Type', $ARGV[0]);
  72 }
  73
  74 $encoding = $ARGV[1] eq "" ? '7bit' : lc($ARGV[1]);
  75 $quoteprefix = $ARGV[2];
  76
  77 #
  78 # Set up our output to be in our character set
  79 #
  80
  81 binmode(STDOUT, ":encoding($outcharset)");
  82
  83 #
  84 # The simplest case: if we have a single type of text/plain, send it
  85 # to our format subroutine.
  86 #
  87
  88 if ($ARGV[0] eq "" || $content_type->type eq 'text/plain') {
  89         process_text(\*STDIN, $encoding, $content_type->charset);
  90         exit 0;
  91 }
  92
  93 #
  94 # Alright, here's what we need to do.
  95 #
  96 # Find any text/plain parts and decode them.  Decode them via base64 or
  97 # quoted-printable, and feed them to our formatting filter when appropriate.
  98 # Put markers in the output for other content types.
  99 #
 100
 101 ($type) = (split('/', $content_type->type));
 102
 103 if ($type eq 'multipart') {
 104
 105         #
 106         # For multipart messages we have to do a little extra.
 107         # Eat the MIME prologue (everything up until the first boundary)
 108         #
 109
 110         $boundary = $content_type->boundary;
 111
 112         if ($boundary eq '') {
 113                 print "No boundary in Content-Type header!\n";
 114                 eat_part(\*STDIN);
 115                 exit 1;
 116         }
 117
 118         while (<STDIN>) {
 119                 last if match_boundary($_, $boundary);
 120         }
 121
 122         if (eof(STDIN)) {
 123                 print "Unable to find boundary in message\n";
 124                 exit 1;
 125         }
 126 } else {
 127         undef $boundary;
 128 }
 129
 130 process_part(\*STDIN, $content_type->type, $encoding, $content_type->charset,
 131              $boundary);
 132
 133 if ($boundary) {
 134         #
 135         # Eat the MIME eplilog
 136         #
 137         eat_part(\*STDIN);
 138 }
 139
 140 exit 0;
 141
 142 #
 143 # Handled encoded text.  I think we can assume if the encoding is q-p
 144 # or base64 to feed it into a formatting filter.
 145 #
 146
 147 sub process_text (*$$;$)
 148 {
 149         my ($input, $encoding, $charset, $boundary) = @_;
 150         my $text, $filterpid, $prefixpid, $finread, $finwrite;
 151         my $foutread, $foutwrite, $decoder, $ret, $filterflag;
 152         my @text = ( '' ), $maxline = 0;
 153
 154         #
 155         # In the simple case, just spit out the text prefixed by the
 156         # quote character
 157         #
 158
 159         if ($encoding eq '7bit' || $encoding ne '8bit') {
 160                 while (<$input>) {
 161                         $ret = match_boundary($_, $boundary);
 162                         if (defined $ret) {
 163                                 return $ret;
 164                         }
 165                         print $quoteprefix, $_;
 166                 }
 167                 return 'EOF';
 168         } else {
 169                 $decoder = find_decoder($encoding);
 170                 if (! defined $decoder) {
 171                         return 'EOF';
 172                 }
 173         }
 174
 175         #
 176         # Okay, assume that the encoding will make it so that we MIGHT need
 177         # to filter it.  Read it in; if it's too long, filter it.
 178         #
 179
 180         while (<$input>) {
 181                 my $line, $len;
 182
 183                 last if ($ret = match_boundary($_, $boundary));
 184
 185                 $line = decode($charset, &$decoder($_));
 186
 187                 if (substr($text[$#text], -1, 1) eq "\n") {
 188                         push @text, $line;
 189                 } else {
 190                         $text[$#text] .= $line;
 191                 }
 192                 if (($len = length($text[$#text])) > $maxline) {
 193                         $maxline = $len;
 194                 }
 195         }
 196
 197         if (! defined $ret) {
 198                 $ret = 'EOF';
 199         }
 200
 201         if ($maxline <= $maxcolwidth) {
 202                 #
 203                 # These are short enough; just output it now as-is
 204                 #
 205                 print STDOUT @text;
 206                 return $ret;
 207         }
 208
 209         #
 210         # We fork a copy of ourselves to read the output from the filter
 211         # program and prefix the quote character.
 212         #
 213
 214         pipe($finread, $finwrite) || die "pipe() failed: $!\n";
 215         pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
 216
 217         binmode($finread, ":encoding($outcharset)");
 218         binmode($finread, ":encoding($outcharset)");
 219         binmode($foutread, ":encoding($outcharset)");
 220         binmode($foutwrite, ":encoding($outcharset)");
 221
 222         if ($filterpid = fork) {
 223                 #
 224                 # Close the pipes in the parent that we're not using
 225                 #
 226
 227                 close($finread);
 228                 close($foutwrite);
 229         } elsif (defined $filterpid) {
 230                 #
 231                 # Close our ununsed filehandles
 232                 #
 233
 234                 close($finwrite);
 235                 close($foutread);
 236
 237                 #
 238                 # Dup() down the filehandles to standard input and output
 239                 #
 240
 241                 open(STDIN, "<&", $finread) ||
 242                                         die "dup(filterin) failed: $!\n";
 243                 open(STDOUT, ">&", $foutwrite) ||
 244                                         die "dup(filterout) failed: $!\n";
 245
 246                 #
 247                 # Close our copies.
 248                 #
 249
 250                 close($finread);
 251                 close($foutwrite);
 252
 253                 #
 254                 # Exec our filter
 255                 #
 256
 257                 exec $filterprogram ||
 258                                 die "Unable to exec $filterprogram: $!\n";
 259         } else {
 260                 die "Fork for $filterprogram failed: $!\n";
 261         }
 262
 263         #
 264         # Fork our output handler.
 265         #
 266
 267         if ($prefixpid = fork) {
 268                 #
 269                 # We don't need these anymore
 270                 #
 271                 close($foutread);
 272
 273         } elsif (defined $prefixpid) {
 274                 #
 275                 # Read from foutwrite, and output (with prefix) to stdout
 276                 #
 277
 278                 close($finwrite);
 279
 280                 while (<$foutread>) {
 281                         print STDOUT $quoteprefix, $_;
 282                 }
 283
 284                 exit 0;
 285         }
 286
 287         #
 288         # Send our input to the filter program
 289         #
 290
 291         print $finwrite @text;
 292
 293         close($finwrite);
 294         waitpid $filterpid, 0;
 295         warn "Filter process exited with ", ($? >> 8), "\n" if $?;
 296         waitpid $prefixpid, 0;
 297         warn "Pipe reader process exited with ", ($? >> 8), "\n" if $?;
 298
 299         return $ret;
 300 }
 301
 302 #
 303 # Filter HTML through a converter program
 304 #
 305
 306 sub process_html (*$$;$)
 307 {
 308         my ($input, $encoding, $charset, $boundary) = @_;
 309         my $filterpid, $prefixpid, $finread, $finwrite;
 310         my $foutread, $foutwrite, $decoder, $ret;
 311
 312         if (! defined($decoder = find_decoder($encoding))) {
 313                 return 'EOF';
 314         }
 315
 316         #
 317         # We fork a copy of ourselves to read the output from the filter
 318         # program and prefix the quote character.
 319         #
 320
 321         pipe($finread, $finwrite) || die "pipe() failed: $!\n";
 322         pipe($foutread, $foutwrite) || die "pipe() (second) failed: $!\n";
 323
 324         binmode($finread, ":encoding($outcharset)");
 325         binmode($finread, ":encoding($outcharset)");
 326         binmode($foutread, ":encoding($outcharset)");
 327         binmode($foutwrite, ":encoding($outcharset)");
 328
 329         if ($filterpid = fork) {
 330                 #
 331                 # Close the pipes in the parent that we're not using
 332                 #
 333
 334                 close($finread);
 335                 close($foutwrite);
 336         } elsif (defined $filterpid) {
 337                 #
 338                 # Close our ununsed filehandles
 339                 #
 340
 341                 close($finwrite);
 342                 close($foutread);
 343
 344                 #
 345                 # Dup() down the filehandles to standard input and output
 346                 #
 347
 348                 open(STDIN, "<&", $finread) ||
 349                                         die "dup(filterin) failed: $!\n";
 350                 open(STDOUT, ">&", $foutwrite) ||
 351                                         die "dup(filterout) failed: $!\n";
 352
 353                 #
 354                 # Close our copies.
 355                 #
 356
 357                 close($finread);
 358                 close($foutwrite);
 359
 360                 #
 361                 # Exec our converter
 362                 #
 363
 364                 exec (@htmlconv) ||
 365                                 die "Unable to exec $filterprogram: $!\n";
 366         } else {
 367                 die "Fork for $htmlconv[0] failed: $!\n";
 368         }
 369
 370         #
 371         # Fork our output handler.
 372         #
 373
 374         if ($prefixpid = fork) {
 375                 #
 376                 # We don't need these anymore
 377                 #
 378                 close($foutread);
 379
 380         } elsif (defined $prefixpid) {
 381                 #
 382                 # Read from foutwrite, and output (with prefix) to stdout
 383                 #
 384
 385                 close($finwrite);
 386
 387                 while (<$foutread>) {
 388                         print STDOUT $quoteprefix, $_;
 389                 }
 390
 391                 exit 0;
 392         }
 393
 394         #
 395         # Send our input to the filter program
 396         #
 397
 398         while (<$input>) {
 399                 last if ($ret = match_boundary($_, $boundary));
 400                 print $finwrite (&$decoder($_));
 401         }
 402
 403         if (! defined $ret) {
 404                 $ret = 'EOF';
 405         }
 406
 407         close($finwrite);
 408         waitpid $filterpid, 0;
 409         warn "HTML converter process exited with ", scalar($? >> 8), "\n" if $?;
 410         waitpid $prefixpid, 0;
 411         warn "Pipe reader process exited with ", $? >> 8, "\n" if $?;
 412
 413         return $ret;
 414 }
 415
 416 #
 417 # Decide what to do, based on what kind of content it is.
 418 #
 419
 420 sub process_part (*$$$$;$)
 421 {
 422         my ($input, $content_type, $encoding, $charset, $boundary, $name) = @_;
 423         my ($type, $subtype) = (split('/', $content_type, -1), '');
 424
 425         if ($type eq 'text') {
 426                 #
 427                 # If this is a text part, right now we only deal with
 428                 # plain and HTML parts.
 429                 #
 430                 if ($subtype eq 'plain') {
 431                         return process_text($input, $encoding, $charset,
 432                                             $boundary);
 433                 } elsif ($subtype eq 'html') {
 434                         return process_html($input, $encoding, $charset,
 435                                             $boundary);
 436                 } else {
 437                         print ">>> $content_type content\n";
 438                         return eat_part($input, $boundary);
 439                 }
 440         } elsif ($type eq 'multipart') {
 441                 return process_multipart($input, $subtype, $boundary);
 442         } else {
 443                 #
 444                 # Other types we're not sure what to do with right now
 445                 # Just put a marker in there
 446                 #
 447
 448                 print ">>> $content_type attachment";
 449                 if (defined $name) {
 450                         print ", name=$name";
 451                 }
 452                 print "\n";
 453
 454                 return eat_part($input, $boundary);
 455         }
 456 }
 457
 458 #
 459 # Process a multipart message.
 460 #
 461 # When called, we should be right after the beginning of the first
 462 # boundary marker.  So we should be pointed at header lines which describe
 463 # the content of this part
 464 #
 465
 466 sub process_multipart ($$$)
 467 {
 468         my ($input, $subtype, $boundary) = @_;
 469         my $altout;
 470
 471         while (1) {
 472                 my $encoding, $type, $end, $name, $charset;
 473
 474                 #
 475                 # Use the Mail::Header package to read in any headers
 476                 # corresponding to this part
 477                 #
 478
 479                 my $head = Mail::Header->new($input, (MailFrom => 'IGNORE'));
 480
 481                 #
 482                 # Extract out any Content-Type, Content-Transfer-Encoding, and
 483                 # Content-Disposition headers
 484                 #
 485
 486                 my $ctype = Mail::Field->extract('Content-Type', $head);
 487                 my $cte = Mail::Field->extract('Content-Transfer-Encoding',
 488                                                $head);
 489                 my $cdispo = Mail::Field->extract('Content-Disposition', $head);
 490
 491                 if (defined $ctype) {
 492                         $type = $ctype->type;
 493                         $charset = $ctype->charset;
 494                 } else {
 495                         $type = 'text/plain';
 496                         $charset = 'us-ascii';
 497                 }
 498
 499                 $encoding = defined $cte ? $cte->param('_') : '7bit';
 500                 $name = defined $cdispo ? $cdispo->param('filename') : undef;
 501
 502                 #
 503                 # Special handling for multipart/alternative; pick
 504                 # the "first" one we can handle (which is usually
 505                 # text/plain) and silently eat the rest, but output a
 506                 # warning if we can't handle anything.
 507                 #
 508
 509                 if ($altout) {
 510                         $end = eat_part($input, $boundary);
 511                 } else {
 512                         my $subboundary = $boundary;
 513                         my $maintype = (split('/', $type))[0];
 514
 515                         if ($maintype eq 'multipart') {
 516                                 $subboundary = $ctype->boundary;
 517                                 #
 518                                 # Go until we find our beginning of this
 519                                 # part
 520                                 #
 521                                 my $subend = eat_part($input, $subboundary);
 522
 523                                 if ($subend ne 'EOP') {
 524                                         print ">>> WARNING: malformed ",
 525                                                 "nested multipart\n";
 526                                         return $subend;
 527                                 }
 528                         }
 529
 530                         $end = process_part($input, $type, $encoding,
 531                                             $charset, $subboundary, $name);
 532
 533                         if ($subtype eq 'alternative' && ! defined $altout &&
 534                             $type eq 'text/plain') {
 535                                 $altout = 1;
 536                         }
 537
 538                         #
 539                         # Since we changed the semantics of $boundary
 540                         # above for nested multiparts, if we are
 541                         # handling a nested multipart then find the end
 542                         # of our current part
 543                         #
 544
 545                         if ($maintype eq 'multipart') {
 546                                 $end = eat_part($input, $boundary);
 547                         }
 548
 549                 }
 550
 551                 if ($end eq 'EOM' || $end eq 'EOF') {
 552                         if ($subtype eq 'alternative' && !defined $altout) {
 553                                 print ">>>multipart/alternative: no suitable ",
 554                                         "parts\n";
 555                         }
 556                         return $end;
 557                 }
 558         }
 559 }
 560
 561 #
 562 # "Eat" a MIME part; consume content until we hit the boundary or EOF
 563 #
 564
 565 sub eat_part ($$)
 566 {
 567         my ($input, $boundary) = @_;
 568         my $ret;
 569
 570         #
 571         # If we weren't given a boundary just eat input until EOF
 572         #
 573
 574         if (! defined $boundary) {
 575                 while (<$input>) { }
 576                 return 'EOF';
 577         }
 578
 579         #
 580         # Otherwise, consume data until we hit our boundary
 581         #
 582
 583         while (<$input>) {
 584                 if ($ret = match_boundary($_, $boundary)) {
 585                         return $ret;
 586                 }
 587         }
 588
 589         return 'EOF';
 590 }
 591
 592 #
 593 # Return the decoder subroutine to use
 594 #
 595
 596 sub find_decoder ($)
 597 {
 598         my ($encoding) = @_;
 599
 600         if ($encoding eq '7bit' || $encoding eq '8bit') {
 601                 return \&null_decoder;
 602         } elsif ($encoding eq 'base64') {
 603                 return \&decode_base64;
 604         } elsif ($encoding eq 'quoted-printable') {
 605                 return \&decode_qp;
 606         } else {
 607                 warn "Unknown encoding: $encoding\n";
 608                 return undef;
 609         }
 610 }
 611
 612 sub null_decoder ($)
 613 {
 614         my ($input) = @_;
 615
 616         return $input;
 617 }
 618
 619 #
 620 # Match a line against the boundary string
 621 #
 622
 623 sub match_boundary($$)
 624 {
 625         my ($_, $boundary) = @_;
 626
 627         if (substr($_, 0, 2) eq '--') {
 628                 s/[ \t\r\n]+\Z//;
 629                 if ($_ eq "--$boundary") {
 630                         return 'EOP';
 631                 } elsif ($_ eq "--$boundary--") {
 632                         return 'EOM';
 633                 }
 634         }
 635
 636         return undef;
 637 }