(find_mh_folder): Require $regexp to be a regular expression object

[minc] / minc
diff --git a/minc b/minc

index 17c3d3925459537ead644884e48dd0e39f78c374..cbf2996d453c0e81bb1b3e74bfa69c493a8584e0 100755 (executable)
--- a/minc
+++ b/minc
@@ -1,41 +1,62 @@
-#! /usr/local/bin/perl
+#! /usr/bin/env perl
  
  # $Id$
  
  =head1 NAME
  
-B<minc> - Incorporate mail from a maildir into mh folders.
+B<minc> - incorporate mail from a maildir into mh folders
  
  =head1 SYNOPSIS
  
-B<minc> [-B<dhns>]
+B<minc> [B<-m> I<MAX>] [B<-n>]
  
-=head1 DESCRIPTION
+B<minc> B<-d>
+
+B<minc> B<-r>
  
-B<minc> is a program for incorporating mail from a maildir to a mh
-folder hierarchy.  It takes mail from a maildir folder (not a maildir
-folder hierarchy), optionally checks for spam with a user-defined
-spam-checking function, and optionally filters mail into separate mh
-folders.
+=head1 DESCRIPTION
  
-The filtering is quite sophisticated, as it is done using real Perl
-matching (m//) commands.
+B<minc> incorporates mail from a maildir to a mh folder hierarchy.  It
+takes mail from a maildir folder (not a maildir folder hierarchy),
+running each message through regular expression based filter and hook
+functions to determine in which folder to store it or whether it's
+spam.  Post-processing hooks may be applied to each message.
+
+As it processes each message, B<minc> prints a line for each message
+similar to B<inc(1)> and B<scan(1)>.  This line includes the folder
+and message number in which the message was stored, the last 'From'
+header, and the last 'Subject' header.  These fields are truncated to
+fit in the user's terminal (see COLUMNS in B<ENVIRONMENT> below) in
+the following proportions: folder (0.1), message number (0.0625), from
+header (0.175).  Any of these may be overridden with $SCAN_P_FOLDER,
+$SCAN_P_MESSAGE, or $SCAN_P_FROM.  The subject always fills out the
+rest of the line.
  
  =cut
  
  use strict;
  use warnings;
  
-require 'sysexits.ph';
-
  use Data::Dumper;
  use Errno;
  use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
-use File::Basename;
-use Getopt::Std;
-use Log::Dispatch;
-use Log::Dispatch::File;
+use FileHandle;
+use File::Temp qw(tempfile);
+use File::stat;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case);
  use POSIX qw(strftime WEXITSTATUS WIFEXITED);
+use Pod::Usage;
+
+our $VERSION = 1;
+
+# If a filter set's header is $MAGIC_TO_TOKEN, that set is compared
+# against headers matching this regex (taken from procmail).
+my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)';
+my $MAGIC_TO_TOKEN = ' TO';
+
+# List of SPAM message numbers, scanned at the end so the user can
+# check for false positives.
+my @SPAM;
  
  =head1 OPTIONS
  
@@ -44,67 +65,59 @@ use POSIX qw(strftime WEXITSTATUS WIFEXITED);
  =item B<-d>
  
  Dump (using Data::Dumper) the FILTERS list and exit.  This is useful
-for testing the syntax of .mincfilter.
-
-=item B<-f>
-
-Filter only, then exit.  This is useful after running B<minc -s>.
+for testing the syntax of .minc.
  
  =item B<-h>
  
  Show help.
  
+=item B<-m> I<MAX>
+
+Stop processing after I<MAX> messages.
+
  =item B<-n>
  
  Dry run; do not actually incorporate the mail, but log and report to
  stdout/stderr as normal.
  
-=item B<-s>
+=item B<-r>
  
-Process SPAM only, then exit, leaving all non-spam messages in the
-maildir.
+Rebuild `mhpath +`/.folders from scratch, processing no mail.
  
  =back
  
  =cut
  
-my $dumpfilters = 0;
-my $filteronly = 0;
-our $run = 1;
-my $spamonly = 0;
-
-our $opt_d;
-our $opt_f;
-our $opt_h;
-our $opt_n;
-our $opt_s;                     # ;; # stupid cperl-mode
-
-if (not getopts('dfhns')) {
-    exit(&EX_USAGE);
-}
+my $dumpfilters;
+my $help;
+my $maxmsgs;
+my $norun;
+my $rebuild_dot_folders;
  
-if ($opt_d) {
-    $dumpfilters = 1;
-} elsif ($opt_h) {
-    print("Sorry bub, no help.\n");
-    exit(&EX_OK);
-} elsif ($opt_n) {
-    $run = 0;
-}
+GetOptions(
+           'd' => \$dumpfilters,
+           'h|help' => \$help,
+           'm=i' => \$maxmsgs,
+           'n' => \$norun,
+           'r' => \$rebuild_dot_folders,
+          ) or pod2usage();
+$help and pod2usage(-exitstatus=>0, -verbose=>1);
+@ARGV == 0 or pod2usage();
  
-if ($opt_f) {
-    $filteronly = 1;
-} elsif ($opt_s) {              # ))){ # stupid cperl-mode
-    $spamonly = 1;
-}
+our $run = !$norun;
  
  =head1 ENVIRONMENT
  
  =over 4
  
+=item COLUMNS
+
+How many columns the user's terminal can hold, used to print scan
+lines for each processed message.  Defaults to 80.
+
  =item HOME
  
-Where configuration files (.mincfilter) are found.  Also,
+Where the configuration file (.minc) is found.  Also,
  $HOME/Maildir is used for the maildir if MAILDIR is not set.
  
  =item MAILDIR
@@ -115,7 +128,9 @@ Where mail is delivered.
  
  =cut
  
-use Env qw(HOME MAILDIR);
+use Env qw(COLUMNS HOME MAILDIR);
+
+$COLUMNS ||= 80;
  
  if (not $HOME) {
      die("HOME environment variable must be set.\n");
@@ -128,35 +143,21 @@ if (not $MAILDIR) {
  
  =over 4
  
-=item $HOME/.mincfilter
-
-This file is Perl code (included via the 'require' directive) which is
-expected to define the FILTERS list.
+=item $HOME/.minc
  
-=item $HOME/.mincspam
+This file is Perl code (included via the 'require' directive) which
+may define the FILTERS list, @start_hooks, @filter_hooks,
+@post_store_hooks, and @stop_hooks.
  
-If this file exists, B<minc> will include it with the expectation that it
-will define a B<is_spam> function.  This function takes a message
-filename as an argument and returns 1 if the message is spam, else 0.
-If this file does not exist, B<minc> will define a simple function that
-always returns 0.
-
-One of B<minc>'s global variables is available to the user-defined
-is_spam function: $run.  This boolean should be honored; is_spam
-should only take real action (i.e. removing or creating files, running
-external programs, etc.) if $run is non-zero.
+=item `mhpath +`/.folders
  
-XXX: need more details about the spam-handling process; for now read
-the code.
+B<minc> adds all folders it filters into to this file, which is used
+by lukem's B<new(1)> (XXX need a link).
  
  =item `mhpath +`/logs/minc.log
  
  Where B<minc> logs what it does, unless in -n mode.
  
-=item `mhpath +`/logs/dryrun.log
-
-Where B<minc> logs what it would do; used in -n mode.
-
  =item `mhpath +`/.minc.context
  
  B<minc> uses this file for context (i.e. current folder) instead of
@@ -173,27 +174,19 @@ marked unseen.
  =cut
  
  our @FILTERS;
-require "$HOME/.mincfilter2";
-
-if (-f "$HOME/.mincspam") {
-    require "$HOME/.mincspam";
-} else {
-    sub is_spam {
-        return 0;
-    }
-}
+our (@start_hooks, @stop_hooks, @filter_hooks, @post_store_hooks);
+our $SCAN_P_FOLDER = 0.1;
+our $SCAN_P_MESSAGE = 0.0625;
+our $SCAN_P_FROM = 0.175;
+our @folder_sort_list = (qr/^inbox$/);
+our $folder_sorter = sub { sort_by_list(@_, @folder_sort_list) };
  
  my $mh;
-my $logfile;
  
  $mh = `mhpath +`;
  chomp($mh);
  
-if ($run) {
-    $logfile = $mh . '/logs/minc.log';
-} else {
-    $logfile = $mh . '/logs/dryrun.log';
-}
+my $logfile = $mh . '/logs/minc.log';
  
  $ENV{"MHCONTEXT"} = $mh . '/.minc.context';
  
@@ -201,29 +194,9 @@ $ENV{"MHCONTEXT"} = $mh . '/.minc.context';
  ###############################################################################
  # Logging
  
-# debuglevels for the logger
-use constant LOG_DEBUGLEVEL     => 'info';
-use constant SCREEN_DEBUGLEVEL  => 'debug';
-
-# characters used to wrap around the id field in the log so I can more
-# easily parse the msg flow
-use constant DEBUGCHAR          => '%';
-use constant INFOCHAR           => '=';
-use constant INCOMINGCHAR       => '<';
-use constant SAVECHAR           => '>';
-use constant WARNCHAR           => 'W';
-use constant ERRORCHAR          => 'E';
-
-my $logger = new Log::Dispatch;
-$logger->add(new Log::Dispatch::File (name=>'logfile',
-                                      filename=>$logfile,
-                                      min_level=>'info',
-                                      mode=>'append'));
-
-# log alias, handles getting passed vars that are undef
  sub mylog {
-    my $level = shift;
-    my $act = shift;
+    $run or return;
+
      my $timestamp = strftime('%b %e %H:%M:%S', localtime);
      my $msg;
      foreach my $part (@_) {
@@ -233,86 +206,69 @@ sub mylog {
      }
      # no newlines in the log message, thanks
      $msg =~ s/\n/ /gm;
-    $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg);
-    $logger->log(level=>$level, message=>$msg);
  
-    if ($act eq SAVECHAR) {
-        $logger->log(level=>$level, message=>"\n");
-    }
+    open(LOG, ">>$logfile") or die("open(>>$logfile): $!");
+    print(LOG "$timestamp $msg\n") or die("print(>>$logfile): $!");
+    close(LOG) or die("close($logfile): $!");
  }
  
-sub logincoming { mylog('info', INCOMINGCHAR, @_); }
-sub logsave { mylog('notice', SAVECHAR, @_); }
-sub loginfo { mylog('info', INFOCHAR, @_); }
-sub logdebug { mylog('debug', DEBUGCHAR, @_); }
-sub logwarn { mylog('warning', WARNCHAR, @_); }
-sub logerr { mylog('error', ERRORCHAR, @_); }
+sub logheader {
+    my ($text, @contents) = @_;
+    my $last;
+
+    if (@contents) {
+        $last = $contents[-1];
+    } else {
+        $last = '';
+    }
+
+    mylog('<< ', $text, $last);
+}
  
  sub log_headers {
      my %headers = @_;
  
      # For an explanation of the %headers structure, see the
      # get_headers function below.
-    logincoming('From:       ', @{$headers{'return-path'}}[-1]);
-    logincoming('To:         ', @{$headers{'to'}}[-1]);
-    logincoming('Subject:    ', @{$headers{'subject'}}[-1]);
-    logincoming('Message-Id: ', @{$headers{'message-id'}}[-1]);
+    logheader('From:       ', @{$headers{'return-path'}});
+    logheader('To:         ', @{$headers{'to'}});
+    logheader('Subject:    ', @{$headers{'subject'}});
+    logheader('Message-Id: ', @{$headers{'message-id'}});
  }
  
  \f
  ###############################################################################
  # Utility procedures
  
-sub _errprint {
-    printf(STDERR '%s:%s', __FILE__, __LINE__);
+sub sort_by_list {
+    my $a = shift;
+    my $b = shift;
  
-    if (@_) {
-        print(STDERR ': ');
-        foreach (@_) {
-            print(STDERR);
+    for my $i (@_) {
+        my $am = $a =~ $i;
+        my $bm = $b =~ $i;
+        if ($am) {
+            if ($bm) {
+                last;
+            }
+            return -1;
+        } elsif ($bm) {
+            return 1;
          }
      }
-}
-
-sub err {
-    my $ex = shift;
  
-    _errprint(@_);
-    print(STDERR ": $!\n");
-
-    exit($ex);
-}
-
-sub errx {
-    my $ex = shift;
-
-    _errprint(@_);
-    print(STDERR "\n");
-
-    exit($ex);
+    return $a cmp $b;
  }
  
  sub mkfolder {
-    my $mhfolder = shift;
-    my $folder;
+    my $folder = shift;
      my $target;
      my $component;
  
-    $folder = $mh . '/' . $mhfolder;
-    $target = '';
-
-    if (not -d $folder) {
-        foreach $component (split('/', $folder)) {
-            if (defined($component) and length($component) >= 1) {
-                $target = $target . '/' . $component;
-                if (-d $target or mkdir($target)) {
-                    next;
-                } else {
-                    err(&EX_OSERR,
-                        "Failed to create +$mhfolder ($component)");
-                }
-            }
-        }
+    $target = $mh;
+    foreach $component (split('/', $folder)) {
+        $target = join('/', $target, $component);
+        -d $target or mkdir($target) or die("mkdir($target): $!");
      }
  }
  
@@ -321,7 +277,7 @@ sub getfiles {
      my @result;
  
      if (not opendir(DIR, $dir)) {
-        err(&EX_OSERR, "Failed opendir($dir)");
+        die("opendir($dir): $!");
      }
  
      # Initialize $! to 0 (success) because stupid stupid Perl provides
@@ -330,15 +286,18 @@ sub getfiles {
      # to detect an error.  Real Programmers don't handle errors,
      # right?  >sigh<
      $! = 0;
-    @result = grep {
-        ($_ ne '.' and $_ ne '..')
-          and $_ = "$MAILDIR/new/$_"
-      } readdir(DIR);
-
+    @result = readdir(DIR);
      if ($! != 0) {
-        err(&EX_OSERR, "Failed readdir($dir)");
+        die("readdir($dir): $!");
+    }
+
+    if (@result <= 2) {
+        exit(0);
      }
  
+    STDOUT->autoflush(1);
+    print(@result - 2, " messages...");
+
      closedir(DIR);
  
      return @result;
@@ -353,14 +312,15 @@ sub get_highest_msgnum {
  
      $dir = "$mh/$mhfolder";
      if (not opendir(DIR, $dir)) {
-        err(&EX_OSERR, "Failed opendir($dir)");
+        die("opendir($dir): $!");
      }
  
+    # Insert rant from getfiles here.
      $! = 0;
      @list = readdir(DIR);
  
      if ($! != 0) {
-        err(&EX_OSERR, "Failed readdir($dir)");
+        die("readdir($dir): $!");
      }
  
      closedir(DIR);
@@ -378,12 +338,6 @@ sub get_highest_msgnum {
      return $highest;
  }
  
-# We want to print the name of each list that has new mail only once,
-# so use this hash to store the lists that have already been printed.
-# Start the list out with SPAM already in it, since we don't care when
-# new messages are added to it.
-my %FOLDERS = ('SPAM'=>1);
-
  sub store_message {
      my $msg = shift;
      my $mhfolder = shift;
@@ -411,8 +365,19 @@ sub store_message {
                  # So attempt to create a file with this name.  If it
                  # succeeds, in just a bit here we'll knowingly clobber
                  # this file with the rename(2) call.
+
+                # Another way to do this is not to use rename(2), but
+                # use link(2) + unlink(2) instead.  That's how the
+                # standard maildir algorithm does it.  Each method has
+                # a disadvantage: the program may crash between the
+                # link(2) and unlink(2) calls.  With the standard
+                # maildir algorithm, that means the message will end
+                # up duplicated.  The advantage of creating an empty
+                # file followed by rename(2) is that an extra empty
+                # file is left behind as opposed to a duplicate
+                # message.  This is more easily detected by the user.
                  if ($run) {
-                    if (sysopen(MSG, "$mhmsg",
+                    if (sysopen(MSG, $mhmsg,
                                  O_WRONLY | O_EXCL | O_CREAT, 0600)) {
                          close(MSG);
                          last;
@@ -426,38 +391,40 @@ sub store_message {
          # This algorithm is different from the maildir one; let's make
          # 10 tries instead of 3.
          if ($try == 9) {
-            errx(&EX_TEMPFAIL, "Attempted filename $mhmsg exists.");
+            die("Attempted filename $mhmsg exists.");
          }
  
          # This algorithm is different; i don't think we need to sleep.
          #sleep(2);
      }
  
-    logsave("+$mhfolder");
+    mylog('+', $mhfolder);
  
      if ($run) {
          if (not rename($msg, $mhmsg)) {
-            err(&EX_OSERR, "Failed rename($msg, $mhmsg)");
+            die("rename($msg, $mhmsg): $!");
          }
  
          # Mark each message as soon as we store it and bomb if that
          # fails.  While it is slow, it is not safe to store multiple
          # messages and then have a failure before marking some (or
          # all).
-        if ($mhfolder ne 'SPAM') {
-            $status = system("mark +$mhfolder $msgnum -sequence unseen -add");
+        if ($mhfolder eq 'SPAM') {
+            push(@SPAM, $msgnum);
+        } else {
+            $status = system('mark', "+$mhfolder", "$msgnum", '-sequence',
+                             'unseen', '-add');
+            # XXX need to handle signalled and stopped, and print
+            # the exit code or signal number.
              if (not WIFEXITED($status)) {
-                err(&EX_OSERR, "Failed to run mark");
+                die("Failed to run mark");
              } elsif (WEXITSTATUS($status) != 0) {
-                errx(&EX_SOFTWARE, "Failed to mark message unseen.");
+                die("Failed to mark message unseen.");
              }
          }
      }
  
-    if (not $FOLDERS{$mhfolder}) {
-        print("+$mhfolder\n");
-        $FOLDERS{$mhfolder} = 1;
-    }
+    return $msgnum;
  }
  
  # Parse a message file into a structure describing the headers.  The
@@ -474,7 +441,7 @@ sub get_headers {
      my $fieldname;              # unmolested header name
      my $contents;               # contents of header
  
-    open(MSG, $msg);
+    open(MSG, $msg) or die("open(MSG, $msg): $!");
      while (<MSG>) {
          chomp;
          if (length == 0) {
@@ -485,8 +452,8 @@ sub get_headers {
              # folded header continuation
  
              if (not defined($current)) {
-                print(STDERR "Malformed message, cannot parse headers.\n");
-                return ();
+                warn('Malformed message, cannot parse headers.');
+                next;
              }
  
              @{$headers{$current}}[-1] .= $_;
@@ -509,69 +476,60 @@ sub get_headers {
      return %headers;
  }
  
-###############################################################################
-# Spam handling
-
-sub kill_spam {
-    my @msglist = @_;
-    my @result;
-    my $msg;
-    my $i;
-
-    @result = ();
-
-    $i = 0;
-    foreach $msg (@msglist) {
-        printf('%sChecking for spam... %6d/%d',
-               "\r", ++$i, scalar(@msglist));
-        if (is_spam($msg)) {
-            print(" SPAM\n");
-            store_message($msg, 'SPAM');
-        } else {
-            push(@result, $msg);
-        }
-    }
-    print("\nDone: ", scalar(@result), " survivors\n");
-
-    return @result;
-}
-
  \f
  ###############################################################################
  # Filtering
  
  sub find_mh_folder {
      my $msg = shift;
-    my %headers;
+    my %headers = @_;
      my $filterref;
      my @filter;
      my $header;
      my $contents;
      my $pair;
-    my $match;
+    my $regexp;
      my $expression;
      my $result;
  
-    %headers = get_headers($msg);
      if (not %headers) {
          return 'malformed';
      }
  
-    log_headers(%headers);
-
      # Walk the list of filters.  This structure is documented in
      # pod at the end of the program.
      foreach $filterref (@FILTERS) {
          @filter = @$filterref;
          $header = shift(@filter);
  
+        # Handle filters using the magic TO header specially.
+        if ($header eq $MAGIC_TO_TOKEN) {
+            foreach $header (keys(%headers)) {
+                if ($header =~ /$MAGIC_TO_REGEX/i) {
+                    foreach $contents (@{$headers{lc($header)}}) {
+                        foreach $pair (@filter) {
+                            ($regexp, $expression) = @$pair;
+                            if ($contents =~ $regexp) {
+                                if (eval "\$result = \"$expression\"") {
+                                    return $result;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            # Now that it's been processed specially, skip normal handling.
+            next;
+        }
+
          # Walk the list of message headers matching the filter's
          # specified header.
          foreach $contents (@{$headers{lc($header)}}) {
-            # Walk the filter's list of match/expression pairs.
+            # Walk the filter's list of regexp/expression pairs.
              foreach $pair (@filter) {
-                ($match, $expression) = @$pair;
-                if ($contents =~ /$match/) {
+                ($regexp, $expression) = @$pair;
+                if ($contents =~ $regexp) {
                      if (eval "\$result = \"$expression\"") {
                          return $result;
                      }
@@ -584,35 +542,174 @@ sub find_mh_folder {
  }
  
  sub filter_mail {
-    my @msglist = @_;
+    my $msgcount = @_ - 2; # don't count . and ..
+    my $len = length($msgcount);
+    my @baton;
      my $msg;
      my $mhfolder;
+    my $spam = 0;
+    my $saved = 0;
+    my $msgnum;
+    my %folders;
+
+    if (-f "$HOME/.minc") {
+        require "$HOME/.minc";
+    }
+
+    # XXX lame names
+    my $nf = int($COLUMNS * $SCAN_P_FOLDER);
+    my $nm = int($COLUMNS * $SCAN_P_MESSAGE);
+    my $nF = int($COLUMNS * $SCAN_P_FROM);
+    my $ns = $COLUMNS - $nf - $nm - $nF - 3;
+
+    my %batons;
+    for my $hook (@start_hooks) {
+        my ($handle, @baton) = $hook->();
+        if (defined($handle)) {
+            $batons{$handle} = [@baton];
+        }
+    }
+
+    for $msg (@_) {
+        ($msg eq '.' or $msg eq '..') and next;
+
+        my %headers = get_headers($msg);
+        log_headers(%headers);
+
+        undef($mhfolder);
+        for my $hook (@filter_hooks) {
+            my $result = $hook->(\%batons, \%headers, $msg);
+            defined($result) and ($mhfolder = $result);
+        }
+
+        defined($mhfolder) or ($mhfolder = find_mh_folder($msg, %headers));
+
+        $msgnum = store_message($msg, $mhfolder);
+        $folders{$mhfolder}++;
+
+        if ($mhfolder eq 'SPAM') {
+            $spam++;
+        } else {
+            $saved++;
+            my $from = '';
+            my $subject = '';
+            # Sometimes these headers are missing...
+            eval { $from = [@{$headers{'from'}}]->[-1] };
+            eval { $subject = [@{$headers{'subject'}}]->[-1] };
+            for ($from, $subject) {
+                tr/\x00-\x1f\x80-\xff/ /;
+            }
+            print("\r");
+            print(' ' x $COLUMNS);
+            printf("\r\%-${nf}s \%${nm}d \%-${nF}s \%s\n",
+                   substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
+                   substr($from, 0, $nF),
+                   substr($subject, 0, $ns));
+        }
+
+        for my $hook (@post_store_hooks) {
+            $hook->(\%batons, \%headers, $mhfolder, $msgnum);
+        }
+
+        print("\r");
+        printf(" \%${len}d SPAM \%${len}d saved \%${len}d/%1d",
+               $spam, $saved, $spam + $saved, $msgcount);
+
+        defined($maxmsgs) and ($spam + $saved < $maxmsgs or last);
+    }
+    print("\n");
+
+    for my $hook (@stop_hooks) {
+        $hook->(\%batons);
+    }
+
+    return %folders;
+}
+
+sub build_dot_folders {
+    my $folders = shift;
+    my $fh = shift;
+    my $fn;
+
+    if (defined($fh)) {
+        while (<$fh>) {
+            chomp;
+            $folders->{$_}++;
+        }
+    }
+
+    eval { ($fh, $fn) = tempfile("$mh/.folders.XXXXX") };
+    if ($@) {
+        warn("$@");
+        return;
+    }
+
+    for my $folder (sort { $folder_sorter->($a,$b) } keys(%$folders)) {
+        print($fh "$folder\n");
+    }
+
+    if (not close($fh)) {
+        warn("close($fn): $!");
+        unlink($fn) or warn("unlink($fn): $!");
+        return;
+    }
+
+    rename($fn, "$mh/.folders") or warn("rename($fn, $mh/.folders): $!");
+}
+
+sub create_dot_folders {
+    if (-f "$HOME/.minc") {
+        require "$HOME/.minc";
+    }
+
+    my %folders;
+    open(my $fh, '-|', 'folders', '-fast', '-recur')
+      or die("open(folders|): $!");
+    build_dot_folders(\%folders, $fh);
+    return 0;
+}
  
-    foreach $msg (@msglist) {
-        $mhfolder = find_mh_folder($msg);
-        store_message($msg, $mhfolder);
+sub update_dot_folders {
+    my $folders = shift;
+    my $fh;
+
+    if (not open($fh, "$mh/.folders") and not $!{ENOENT}) {
+        # For ENOENT, we go ahead and create it, else we error and
+        # don't clobber it.
+        warn("open($mh/.folders): $!");
+        return;
      }
+
+    build_dot_folders($folders, $fh);
  }
  
  \f
  MAIN: {
-    my @msglist;
+    my $st;
  
      if ($dumpfilters) {
+        require "$HOME/.minc";
          $Data::Dumper::Indent = 1;
          print(Dumper(\@FILTERS));
-        exit(&EX_OK);
+        exit;
      }
  
-    @msglist = getfiles("$MAILDIR/new");
+    $rebuild_dot_folders and exit(create_dot_folders);
  
-    if (not $filteronly) {
-        @msglist = kill_spam(@msglist);
-    }
+    chdir("$MAILDIR/new") or die("chdir($MAILDIR/new): $!");
+    my %folders = filter_mail(map { $_->[1] }
+                sort { $a->[0] <=> $b->[0] }
+                map {
+                    if (not ($st = stat($_))) {
+                        die("stat($_): $!");
+                    }
+                    [$st->mtime, $_]
+                }
+                getfiles('.'));
  
-    if (not $spamonly) {
-        filter_mail(@msglist);
-    }
+    $run and update_dot_folders(\%folders);
+
+    @SPAM and (exec('scan', '+SPAM', @SPAM) or die);
  }
  
  \f
@@ -620,14 +717,13 @@ __END__
  
  =head1 THE FILTERS STRUCTURE
  
-The user's .mincfilter file must define the @FILTERS structure.  This
-structure is an array.  Each element of @FILTERS is a filter.  A
-filter is itself an array.  The first element of a filter is a string,
-the name of the header this filter acts upon.  The header name is not
-case-sensitive.  Each subsequent element of a filter is a pair (i.e. a
-two-element array): first, a regular expression B<minc> uses to
-determine whether this filter matches or not, and second, an
-expression which B<minc> evaluates to get the folder name.
+Each element of @FILTERS is a filter.  A filter is itself an array.
+The first element of a filter is a string, the name of the header this
+filter acts upon.  The header name is not case-sensitive.  Each
+subsequent element of a filter is a pair (i.e. a two-element array):
+first, a regular expression B<minc> uses to determine whether this
+filter matches or not, and second, an expression which B<minc>
+evaluates to get the folder name.
  
  B<minc> decides where to store a message by iterating over the
  @FILTERS array.  It tests each regexp of each filter against all
@@ -645,6 +741,36 @@ are traversed I<in order>.  This means the filters can be arranged so
  that some have priority over others.  XXX: get Doug to write an
  example of having the same header matched more than once.
  
+Lastly, B<minc> supports a magic ' TO' header.  Filters using this
+header are matched against a collection of headers related to (and
+including) the To.  Which headers to use is determined by a regular
+expression borrowed from procmail.
+
+=head1 HOOKS
+
+Filter hooks take a reference to a hash of batons, a reference to a
+hash of headers, and the message filename as arguments.  It returns
+undef to decline filtering of this message (thus falling back to
+subsequent filter hooks, and finally @FILTERS), or the name of the
+folder to store this message into.
+
+One of B<minc>'s global variables is available to the user-defined
+hooks: $run.  This boolean should be honored; hooks should only take
+real action (i.e. removing or creating files, running external
+programs, etc.) if $run is non-zero.
+
+The baton hash is created simply from the start hooks; if the hook
+returns at least one defined value, this value is used as the key and
+all other return values are put into a list reference as the value.
+This hash is then passed by reference to all filter, post-store, and
+stop hooks.
+
+Post store hooks take a reference to a hash of batons, a reference to
+a hash of headers, the folder this message was stored in, and its new
+message number.
+
+XXX: need more details about the hook process; for now read the code.
+
  =head1 EXAMPLES
  
  @FILTERS = (
@@ -680,14 +806,22 @@ apache.org subdomain, with mailing lists hosted there.  So, given a
  list such as dev@httpd.apache.org, this filter will create the folder
  name l/apache/httpd/dev.
  
-For an example is_spam function, see
-L<http:E<047>E<047>pretzelnet.orgE<047>cvsE<047>dotfilesE<047>.mincspam>
+XXX Need hook examples.
  
  =head1 AUTHORS
  
-Written by Eric Gillespie <epg@pretzelnet.org> with logging code
-stolen from Adam Lazur <adam@lazur.org>.
+Written by Eric Gillespie <epg@pretzelnet.org>.  Design by Eric
+Gillespie and Doug Porter <dsp@waterspout.com>.
  
-Design by Eric Gillespie and Doug Porter <dsp@waterspout.com>.
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
  
  =cut
+
+# Local variables:
+# cperl-indent-level: 4
+# perl-indent-level: 4
+# indent-tabs-mode: nil
+# End:
+
+# vi: set tabstop=4 expandtab: