X-Git-Url: https://diplodocus.org/git/minc/blobdiff_plain/569e7c8a550bb469aa95d31c340830d567635269..cdef2c782619aacd262bf37efa5308073dff5c66:/minc diff --git a/minc b/minc index 743accc..22ef143 100755 --- a/minc +++ b/minc @@ -1,6 +1,4 @@ -#! /usr/local/bin/perl - -# $Id$ +#!/usr/bin/perl =head1 NAME @@ -8,32 +6,49 @@ B - incorporate mail from a maildir into mh folders =head1 SYNOPSIS +B [B<-m> I] [B<-n>] + B B<-d> -B [B<-m>] [B<-n>] [B<-p>] +B B<-r> =head1 DESCRIPTION B incorporates mail from a maildir to a mh folder hierarchy. It takes mail from a maildir folder (not a maildir folder hierarchy), -optionally checks for spam with a user-defined spam-checking function, -and optionally filters mail into separate mh folders. - -The filtering is quite sophisticated, as it is done using real Perl -matching (m//) commands. +running each message through regular expression based filter and hook +functions to determine in which folder to store it or whether it's +spam. Post-processing hooks may be applied to each message. + +As it processes each message, B prints a line for each message +similar to B and B. This line includes the folder +and message number in which the message was stored, the last 'From' +header, and the last 'Subject' header. These fields are truncated to +fit in the user's terminal (see COLUMNS in B below) in +the following proportions: folder (0.1), message number (0.0625), from +header (0.175). Any of these may be overridden with $SCAN_P_FOLDER, +$SCAN_P_MESSAGE, or $SCAN_P_FROM. The subject always fills out the +rest of the line. =cut use strict; use warnings; +$SIG{'PIPE'} = 'IGNORE'; + use Data::Dumper; use Errno; -use Fcntl qw(O_WRONLY O_EXCL O_CREAT); +use Fcntl; use FileHandle; +use File::FcntlLock; +use File::Temp qw(tempfile); use File::stat; -use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1; -use POSIX qw(strftime WEXITSTATUS WIFEXITED); +use Getopt::Long qw(:config gnu_getopt no_ignore_case); +use POSIX qw(WIFEXITED WEXITSTATUS WIFSIGNALED WTERMSIG); +use Pod::Usage; + +use constant LOCK_TRIES => 60; our $VERSION = 1; @@ -42,9 +57,11 @@ our $VERSION = 1; my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)'; my $MAGIC_TO_TOKEN = ' TO'; -# List of SPAM message numbers, scanned at the end so the user can -# check for false positives. -my @SPAM; +# Mapping of message numbers to array references. The first element is set by +# filter_mail to a reference to a header hash for the message; the second is +# set by maildier_spam to the name of the message file in the spam maildir. +# scan_spam scans this at the end so the user can check for false positives. +my %SPAM; =head1 OPTIONS @@ -53,9 +70,9 @@ my @SPAM; =item B<-d> Dump (using Data::Dumper) the FILTERS list and exit. This is useful -for testing the syntax of .mincfilter. +for testing the syntax of .minc. -=item B<--help> +=item B<-h> Show help. @@ -68,48 +85,45 @@ Stop processing after I messages. Dry run; do not actually incorporate the mail, but log and report to stdout/stderr as normal. -=item B<-p> +=item B<-r> -Print the filename of each message before checking it for spam. This -can be handy if a particular message is giving the spam checker a -problem. +Rebuild `mhpath +`/.folders from scratch, processing no mail. =back =cut -my $dumpfilters = 0; +my $dumpfilters; +my $help; my $maxmsgs; -our $run = 1; -my $printfilenames = 0; - -my %opts; - -if (not getopts('dm:np', \%opts)) { - exit(2); -} - -if ($opts{'d'}) { - $dumpfilters = 1; -} elsif ($opts{'n'}) { - $run = 0; +my $norun; +my $rebuild_dot_folders; + +if (!caller()) { +GetOptions( + 'd' => \$dumpfilters, + 'h|help' => \$help, + 'm=i' => \$maxmsgs, + 'n' => \$norun, + 'r' => \$rebuild_dot_folders, + ) or pod2usage(); +$help and pod2usage(-exitstatus=>0, -verbose=>1); } -if (defined($opts{'m'})) { - $maxmsgs = $opts{'m'}; -} - -if ($opts{'p'}) { - $printfilenames = 1; -} +our $run = !$norun; =head1 ENVIRONMENT =over 4 +=item COLUMNS + +How many columns the user's terminal can hold, used to print scan +lines for each processed message. Defaults to 80. + =item HOME -Where configuration files (.mincfilter) are found. Also, +Where the configuration file (.minc) is found. Also, $HOME/Maildir is used for the maildir if MAILDIR is not set. =item MAILDIR @@ -120,54 +134,28 @@ Where mail is delivered. =cut -use Env qw(HOME MAILDIR); +use Env qw(COLUMNS HOME MAILDIR); + +$COLUMNS ||= 80; if (not $HOME) { die("HOME environment variable must be set.\n"); } -if (not $MAILDIR) { - $MAILDIR = "$HOME/Maildir"; -} =head1 FILES =over 4 -=item $HOME/.mincfilter - -This file is Perl code (included via the 'require' directive) which is -expected to define the FILTERS list. - -=item $HOME/.mincspam - -If this file exists, B will include it with the expectation that -it will define a B function. This function takes a -message filename as an argument and returns 1 if the message is spam, -else 0. If this file does not exist, B will define a simple -function that always returns 0. - -One of B's global variables is available to the user-defined -B function: $run. This boolean should be honored; -B should only take real action (i.e. removing or creating -files, running external programs, etc.) if $run is non-zero. - -This file may also declare two other functions: B and -B. The former is passed no arguments and is expected -to return a list. This list is a "baton" that will also be passed to -B and B. It can hold anything -B will need to do its job, whether network sockets, pipes, -or whatever. - -XXX: need more details about the spam-handling process; for now read -the code. - -=item `mhpath +`/logs/minc.log +=item $HOME/.minc -Where B logs what it does, unless in -n mode. +This file is Perl code (included via the 'require' directive) which +may define the FILTERS list, @start_hooks, @filter_hooks, +@post_store_hooks, and @stop_hooks. -=item `mhpath +`/logs/dryrun.log +=item `mhpath +`/.folders -Where B logs what it would do; used in -n mode. +B adds all folders it filters into to this file, which is used +by lukem's B (XXX need a link). =item `mhpath +`/.minc.context @@ -185,90 +173,73 @@ marked unseen. =cut our @FILTERS; -require "$HOME/.mincfilter"; +our (@start_hooks, @stop_hooks, @filter_hooks, @post_store_hooks); +our $SCAN_P_FOLDER = 0.1; +our $SCAN_P_MESSAGE = 0.0625; +our $SCAN_P_FROM = 0.175; +our @folder_sort_list = (qr/^inbox$/); +our $folder_sorter = sub { sort_by_list(@_, @folder_sort_list) }; my $mh; -my $logfile; -$mh = `mhpath +`; +$mh = `mhpath +`; $? >= 0 or die; chomp($mh); - -if ($run) { - $logfile = $mh . '/logs/minc.log'; -} else { - $logfile = $mh . '/logs/dryrun.log'; +if (!$mh) { + die('mhpath did not give MH mail path: ' . exit_msg($?)); } $ENV{"MHCONTEXT"} = $mh . '/.minc.context'; +# TODO: Support dot-locking too. This works on FreeBSD and Linux. +my $locking = `mhparam datalocking`; +$? < 0 && die; +$? == 0 || die('mhparam datalocking: ' . exit_msg($?)); +chomp($locking); +if ($locking ne 'fcntl') { + die("mh locking $locking not supported; only fcntl"); +} + ############################################################################### -# Logging - -sub mylog { - my $timestamp = strftime('%b %e %H:%M:%S', localtime); - my $msg; - foreach my $part (@_) { - if (defined($part)) { - $msg .= $part; - } - } - # no newlines in the log message, thanks - $msg =~ s/\n/ /gm; +# Utility procedures - open(LOG, ">>$logfile") or die("open(>>$logfile): $!"); - print(LOG "$timestamp $msg\n") or die("print(>>$logfile): $!"); - close(LOG) or die("close($logfile): $!"); +sub exit_msg { + my $status = shift; + WIFEXITED($status) && return 'exited with status ' . WEXITSTATUS($status); + WIFSIGNALED($status) && return 'killed with signal ' . WTERMSIG($status); + # WTF + "died ($status)"; } -sub logheader { - my ($text, @contents) = @_; - my $last; +sub sort_by_list { + my $a = shift; + my $b = shift; - if (@contents) { - $last = $contents[-1]; - } else { - $last = ''; + for my $i (@_) { + my $am = $a =~ $i; + my $bm = $b =~ $i; + if ($am) { + if ($bm) { + last; + } + return -1; + } elsif ($bm) { + return 1; + } } - mylog('<< ', $text, $last); -} - -sub log_headers { - my %headers = @_; - - # For an explanation of the %headers structure, see the - # get_headers function below. - logheader('From: ', @{$headers{'return-path'}}); - logheader('To: ', @{$headers{'to'}}); - logheader('Subject: ', @{$headers{'subject'}}); - logheader('Message-Id: ', @{$headers{'message-id'}}); + return $a cmp $b; } - -############################################################################### -# Utility procedures - sub mkfolder { - my $mhfolder = shift; - my $folder; + my $folder = shift; my $target; my $component; - $folder = $mh . '/' . $mhfolder; - $target = ''; - - if (not -d $folder) { - foreach $component (split('/', $folder)) { - if (defined($component) and length($component) >= 1) { - $target = $target . '/' . $component; - if (-d $target or mkdir($target)) { - next; - } else { - die("Failed to create +$mhfolder ($component)"); - } - } - } + $target = $mh; + foreach $component (split('/', $folder)) { + $target = join('/', $target, $component); + -d $target or mkdir($target) or die("mkdir($target): $!"); } } @@ -277,7 +248,7 @@ sub getfiles { my @result; if (not opendir(DIR, $dir)) { - die("Failed opendir($dir)"); + die("opendir($dir): $!"); } # Initialize $! to 0 (success) because stupid stupid Perl provides @@ -286,27 +257,28 @@ sub getfiles { # to detect an error. Real Programmers don't handle errors, # right? >sigh< $! = 0; - @result = grep { - ($_ ne '.' and $_ ne '..') - and $_ = "$MAILDIR/new/$_" - } readdir(DIR); - + @result = readdir(DIR); if ($! != 0) { - die("Failed readdir($dir)"); + die("readdir($dir): $!"); } - if (scalar(@result) == 0) { - exit(0); + if (@result <= 2) { + return (); } STDOUT->autoflush(1); - print(scalar(@result), " messages..."); + if (@result == 3) { + print('1 message...'); + } else { + print(@result - 2, ' messages...'); + } closedir(DIR); return @result; } +my %msgnum_cache; sub get_highest_msgnum { my $mhfolder = shift; my $dir; @@ -314,16 +286,21 @@ sub get_highest_msgnum { my $highest; my $msgnum; + if (defined($msgnum_cache{$mhfolder})) { + return $msgnum_cache{$mhfolder}++; + } + $dir = "$mh/$mhfolder"; if (not opendir(DIR, $dir)) { - die("Failed opendir($dir)"); + die("opendir($dir): $!"); } + # Insert rant from getfiles here. $! = 0; @list = readdir(DIR); if ($! != 0) { - die("Failed readdir($dir)"); + die("readdir($dir): $!"); } closedir(DIR); @@ -338,92 +315,193 @@ sub get_highest_msgnum { } } - return $highest; + $msgnum_cache{$mhfolder} = $highest; + return $msgnum_cache{$mhfolder}++; +} + +sub mark { + my $folder = shift; + my $msgnum = shift; + my $seq = shift; + + my $fn = "$mh/$folder/.mh_sequences"; + my ($fh, $e) = lkopen_fcntl($fn, O_RDWR | O_CREAT, 0600); + + my @sequences = <$fh>; + chomp(@sequences); + + seek($fh, 0, SEEK_SET) or die("seek($fn): $!"); + truncate($fh, 0) or die("truncate($fn): $!"); + + my $marked = 0; + for $_ (@sequences) { + if (/^$seq: (.*)/) { + my @parts; + my @result; + my $done = 0; + for my $part (split(' ', $1)) { + if (not $done) { + my ($st, $en) = split('-', $part); + if ((defined($en) and ($msgnum >= $st and $msgnum <= $en)) + or $msgnum == $st) { + # It's already there. + $done = 1; + } + if (defined($en)) { + if ($st - 1 == $msgnum) { + $part = "$msgnum-$en"; + $done = 1; + } elsif ($en + 1 == $msgnum) { + $part = "$st-$msgnum"; + $done = 1; + } + } else { + if ($part - 1 == $msgnum) { + $part = "$msgnum-$part"; + $done = 1; + } elsif ($part + 1 == $msgnum) { + $part = "$part-$msgnum"; + $done = 1; + } + } + } + push(@result, $part); + } + if (not $done) { + push(@result, $msgnum); + } + print($fh "$seq: ", join(' ', @result), "\n"); + $marked = 1; + } else { + print($fh "$_\n"); + } + } + $marked or print($fh "$seq: $msgnum\n"); + close($fh) or die("close(>$fn): $!"); +} + +# Based on nmh's lkopen_fcntl +# Return 0 for success, errno on failure. +sub lkopen_fcntl { + my $fn = shift; + my $access = shift; + my $mode = shift; + my $errno; + + # The assumption here is that if you open the file for writing, you + # need an exclusive lock. + + my $tries = LOCK_TRIES; + for (;;) { + sysopen(my $fh, $fn, $access, $mode) or die("sysopen($fn): $!"); + + my $flk = File::FcntlLock->new; + $flk->l_start(0); + $flk->l_len(0); + $flk->l_type(($access & O_ACCMODE) == O_RDONLY ? F_RDLCK : F_WRLCK); + $flk->l_whence(SEEK_SET); + + # Really should only retry on EAGAIN and EINTR... + if ($flk->lock($fh, F_SETLK)) { + return $fh; + } + + $errno = $flk->lock_errno; + close($fh) or die("close($fn): $!"); + + if (--$tries == 0) { + last; + } + sleep(1); + } + + local $! = $errno; + die("failed to lock $fn: $!"); } +my @filtered; sub store_message { - my $msg = shift; + my $inbox = shift; + my $msg = shift; # rename $src_msg my $mhfolder = shift; - my $msgnum; - my $try; - my $mhmsg; - my $status; + my ($msgnum, $mhmsg); # rename $dst_msg and $dst_msgpath # We must do this even in -n mode because later steps fail without # it. This should be harmless. mkfolder($mhfolder); - # This loop is a modified version of the maildir delivery algorithm. - $msgnum = get_highest_msgnum($mhfolder); - for ($try = 0; ; $try++) { - $msgnum++; + if ("+$mhfolder" eq $inbox) { + # If @filtered is empty, this message already has the right number. + if (!($msgnum = shift(@filtered))) { + $msg =~ m|.*/(\d+)$|; + return $1; + } $mhmsg = "$mh/$mhfolder/$msgnum"; - - if (not stat($mhmsg)) { - if ($!{ENOENT}) { - # Now we have a non-existent file, let's try to create - # it. We must create a zero-byte file first because a - # file my appear between our happy stat results and - # our later rename(2), which would clobber said file. - # So attempt to create a file with this name. If it - # succeeds, in just a bit here we'll knowingly clobber - # this file with the rename(2) call. - - # Another way to do this is not to use rename(2), but - # use link(2) + unlink(2) instead. That's how the - # standard maildir algorithm does it. Each method has - # a disadvantage: the program may crash between the - # link(2) and unlink(2) calls. With the standard - # maildir algorithm, that means the message will end - # up duplicated. The advantage of creating an empty - # file followed by rename(2) is that an extra empty - # file is left behind as opposed to a duplicate - # message. This is more easily detected by the user. - if ($run) { - if (sysopen(MSG, "$mhmsg", - O_WRONLY | O_EXCL | O_CREAT, 0600)) { - close(MSG); + } else { + # This loop is a modified version of the maildir delivery algorithm. + $msgnum = get_highest_msgnum($mhfolder); + my $try; + for ($try = 0; ; $try++) { + $msgnum++; + $mhmsg = "$mh/$mhfolder/$msgnum"; + + if (not stat($mhmsg)) { + if ($!{ENOENT}) { + # Now we have a non-existent file, let's try to create + # it. We must create a zero-byte file first because a + # file my appear between our happy stat results and + # our later rename(2), which would clobber said file. + # So attempt to create a file with this name. If it + # succeeds, in just a bit here we'll knowingly clobber + # this file with the rename(2) call. + + # Another way to do this is not to use rename(2), but + # use link(2) + unlink(2) instead. That's how the + # standard maildir algorithm does it. Each method has + # a disadvantage: the program may crash between the + # link(2) and unlink(2) calls. With the standard + # maildir algorithm, that means the message will end + # up duplicated. The advantage of creating an empty + # file followed by rename(2) is that an extra empty + # file is left behind as opposed to a duplicate + # message. This is more easily detected by the user. + if ($run) { + if (sysopen(MSG, $mhmsg, + O_WRONLY | O_EXCL | O_CREAT, 0600)) { + close(MSG); + last; + } + } else { last; } - } else { - last; } } - } - # This algorithm is different from the maildir one; let's make - # 10 tries instead of 3. - if ($try == 9) { - die("Attempted filename $mhmsg exists."); - } + # This algorithm is different from the maildir one; let's make + # 10 tries instead of 3. + if ($try == 9) { + die("Attempted filename $mhmsg exists."); + } - # This algorithm is different; i don't think we need to sleep. - #sleep(2); - } + # This algorithm is different; i don't think we need to sleep. + #sleep(2); + } - if ($mhfolder ne 'SPAM') { - mylog('+', $mhfolder); + if ($run) { + # Mark each message as soon as we store it and bomb if that + # fails. While it is slow, it is not safe to store multiple + # messages and then have a failure before marking some (or + # all). + if ($mhfolder ne 'SPAM') { + mark($mhfolder, $msgnum, 'unseen'); + } + } } + push(@filtered, $msg); if ($run) { if (not rename($msg, $mhmsg)) { - die("Failed rename($msg, $mhmsg)"); - } - - # Mark each message as soon as we store it and bomb if that - # fails. While it is slow, it is not safe to store multiple - # messages and then have a failure before marking some (or - # all). - if ($mhfolder eq 'SPAM') { - push(@SPAM, $msgnum); - } else { - $status = system('mark', "+$mhfolder", "$msgnum", '-sequence', - 'unseen', '-add'); - if (not WIFEXITED($status)) { - die("Failed to run mark"); - } elsif (WEXITSTATUS($status) != 0) { - die("Failed to mark message unseen."); - } + die("rename($msg, $mhmsg): $!"); } } @@ -444,7 +522,7 @@ sub get_headers { my $fieldname; # unmolested header name my $contents; # contents of header - open(MSG, $msg); + open(MSG, $msg) or die("open(MSG, $msg): $!"); while () { chomp; if (length == 0) { @@ -455,8 +533,8 @@ sub get_headers { # folded header continuation if (not defined($current)) { - print(STDERR "Malformed message, cannot parse headers.\n"); - return (); + warn('Malformed message, cannot parse headers.'); + next; } @{$headers{$current}}[-1] .= $_; @@ -476,7 +554,7 @@ sub get_headers { } close(MSG); - return %headers; + return \%headers; } @@ -485,128 +563,349 @@ sub get_headers { sub find_mh_folder { my $msg = shift; - my %headers; - my $filterref; - my @filter; - my $header; - my $contents; - my $pair; - my $match; - my $expression; - my $result; - - %headers = get_headers($msg); - if (not %headers) { + my $header = shift; + + if (not %$header) { return 'malformed'; } - log_headers(%headers); - # Walk the list of filters. This structure is documented in # pod at the end of the program. - foreach $filterref (@FILTERS) { - @filter = @$filterref; - $header = shift(@filter); - - # Handle filters using the magic TO header specially. - if ($header eq $MAGIC_TO_TOKEN) { - foreach $header (keys(%headers)) { - if ($header =~ /$MAGIC_TO_REGEX/i) { - foreach $contents (@{$headers{lc($header)}}) { - foreach $pair (@filter) { - ($match, $expression) = @$pair; - if ($contents =~ /$match/) { - return $expression; - } - } - } - } - } - - # Now that it's been processed specially, skip normal handling. + for my $filterref (@FILTERS) { + if (ref($filterref) eq 'CODE') { + my $m = $filterref->($header, $msg); + $m && return $m; next; } - # Walk the list of message headers matching the filter's - # specified header. - foreach $contents (@{$headers{lc($header)}}) { - # Walk the filter's list of match/expression pairs. - foreach $pair (@filter) { - ($match, $expression) = @$pair; - if ($contents =~ /$match/i) { - if (eval "\$result = \"$expression\"") { - return $result; - } + my $m = match($header, @$filterref); + $m && return $m; + } + + return 'inbox'; +} + +# Test all the header fields against each [regexp, folder-expression] pair. +sub match { + my $header = shift; + my $filter_field = shift; + my @filters = @_; + + # Handle filters using the magic TO header specially. + if ($filter_field eq $MAGIC_TO_TOKEN) { + for my $field_name (keys(%$header)) { + if ($field_name =~ /$MAGIC_TO_REGEX/i) { + my $m = match_one_field($header->{$field_name}, \@filters); + $m && return $m; + } + } + # Now that it's been processed specially, skip normal handling. + return; + } + + # Walk the list of header fields matching the filter's specified header. + my $m = match_one_field($header->{lc($filter_field)}, \@filters); + $m && return $m; +} + +# Test all the values of one header field against each [regexp, +# folder-expression] pair. +sub match_one_field { + my $values = shift; + my $filters = shift; + for my $value (@$values) { + for my $pair (@$filters) { + my ($regexp, $expression) = @$pair; + if ($value =~ $regexp) { + my $result; + if (eval "\$result = \"$expression\"") { + return $result; } } } } +} - return 'inbox'; +sub scan_line { + my ($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns) = @_; + my $from = ''; + my $subject = ''; + # Sometimes these headers are missing... + eval { $from = [@{$headers->{'from'}}]->[-1] }; + eval { $subject = [@{$headers->{'subject'}}]->[-1] }; + # Replace garbage characters. + for ($from, $subject) { + tr/\x00-\x1f\x80-\xff/?/; + } + return sprintf("\%-${nf}s \%${nm}d \%-${nF}s \%s", + substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm), + substr($from, 0, $nF), + substr($subject, 0, $ns)); } sub filter_mail { - my @msglist = @_; - my $msgcount = scalar(@msglist); - my $msgcountlen = length($msgcount); + my $inbox = shift; + @_ or return (); + my $msgcount = @_ - 2; # don't count . and .. + my $len = length($msgcount); my @baton; my $msg; my $mhfolder; my $spam = 0; my $saved = 0; - my %FOLDERS = ('SPAM'=>1); + my $msgnum; + my %folders; + + # XXX lame names + my $nf = int($COLUMNS * $SCAN_P_FOLDER); + my $nm = int($COLUMNS * $SCAN_P_MESSAGE); + my $nF = int($COLUMNS * $SCAN_P_FROM); + my $ns = $COLUMNS - $nf - $nm - $nF - 3; + + my %batons; + for my $hook (@start_hooks) { + my ($handle, @baton) = $hook->(); + if (defined($handle)) { + $batons{$handle} = [@baton]; + } + } - if (-f "$HOME/.mincspam") { - require "$HOME/.mincspam"; - } else { - eval "sub spam_start_hook { return (); }"; - eval "sub spam_stop_hook { }"; - eval "sub spam_check { return 0; }"; + my $tty = -t STDOUT; + if (not $tty) { + # Print a newline after the incomplete "N messages..." line. + print("\n"); } - @baton = spam_start_hook(); + for $msg (@_) { + ($msg eq '.' or $msg eq '..') and next; + + my $headers = get_headers($msg); + + undef($mhfolder); + for my $hook (@filter_hooks) { + my $result = $hook->(\%batons, $headers, $msg); + defined($result) and ($mhfolder = $result); + } + + defined($mhfolder) or ($mhfolder = find_mh_folder($msg, $headers)); + + $msgnum = store_message($inbox, $msg, $mhfolder); + $folders{$mhfolder}++; + + if ($tty) { + print("\r"); + } - foreach $msg (@msglist) { - if (spam_check($msg, @baton)) { - $mhfolder = 'SPAM'; - $spam = scalar(@SPAM)+ 1; + if ($mhfolder eq 'SPAM') { + $spam++; + $SPAM{$msgnum} = [$headers, undef]; } else { - $mhfolder = find_mh_folder($msg); $saved++; + print(scan_line($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns), + "\n"); } - store_message($msg, $mhfolder); - - print("\r"); - if (not $FOLDERS{$mhfolder}) { - print(' ' x $msgcountlen); - print(" \r+$mhfolder\n"); - $FOLDERS{$mhfolder} = 1; + for my $hook (@post_store_hooks) { + $hook->(\%batons, $headers, $mhfolder, $msgnum); } - printf('%6d SPAM %6d saved %6d/%1d', - $spam, $saved, $spam + $saved, $msgcount); + if ($tty) { + printf(" \%${len}d SPAM \%${len}d saved \%${len}d/%1d", + $spam, $saved, $spam + $saved, $msgcount); + } defined($maxmsgs) and ($spam + $saved < $maxmsgs or last); } print("\n"); - spam_stop_hook(@baton); + for my $hook (@stop_hooks) { + $hook->(\%batons); + } + + return %folders; +} + +sub build_dot_folders { + my $folders = shift; + my $fh = shift; + my $fn; + + if (defined($fh)) { + while (<$fh>) { + chomp; + $folders->{$_}++; + } + } + + eval { ($fh, $fn) = tempfile("$mh/.folders.XXXXX") }; + if ($@) { + warn("$@"); + return; + } + + for my $folder (sort { $folder_sorter->($a,$b) } keys(%$folders)) { + print($fh "$folder\n"); + } + + if (not close($fh)) { + warn("close($fn): $!"); + unlink($fn) or warn("unlink($fn): $!"); + return; + } + + rename($fn, "$mh/.folders") or warn("rename($fn, $mh/.folders): $!"); +} + +sub create_dot_folders { + if (-f "$HOME/.minc") { + require "$HOME/.minc"; + } + + my %folders; + open(my $fh, '-|', 'folders', '-fast', '-recur') + or die("open(folders|): $!"); + build_dot_folders(\%folders, $fh); + return 0; +} + +sub update_dot_folders { + my $folders = shift; + my $fh; + + if (not open($fh, "$mh/.folders") and not $!{ENOENT}) { + # For ENOENT, we go ahead and create it, else we error and + # don't clobber it. + warn("open($mh/.folders): $!"); + return; + } + + build_dot_folders($folders, $fh); +} + +# XXX Could use some unification with getfiles... +sub maildir_spam { + my $dir = "$MAILDIR/spam/new"; + + if (not chdir($dir)) { + $!{ENOENT} or print(STDERR "skipping maildir spam: chdir($dir): $!\n"); + return; + } + + if (not opendir(DIR, '.')) { + print(STDERR "skipping maildir spam: opendir($dir): $!\n"); + return; + } + + $! = 0; + my @spams = readdir(DIR); + if ($! != 0) { + print(STDERR "skipping maildir spam: readdir($dir): $!\n"); + return; + } + + closedir(DIR); + + for my $msg (@spams) { + ($msg eq '.' or $msg eq '..') and next; + my $msgnum = store_message($msg, 'SPAM'); + # Store the original file name for scan_spam in -n mode. + $SPAM{$msgnum} = [undef, $msg]; + } +} + +sub scan_spam { + my ($msgnum, $header, $tuple, $msg); + + # Unlike filter_mail, we don't need to print the folder name. + # Calculate how many columns would be allocated to it... + my $nf = int($COLUMNS * $SCAN_P_FOLDER); + # ...and add that amount to COLUMNS to calculate the number of columns to + # allocate to msgnum and from snippet, thus filling the line without + # printing the folder name. + my $nm = int(($COLUMNS + $nf) * $SCAN_P_MESSAGE); + my $nF = int(($COLUMNS + $nf) * $SCAN_P_FROM); + my $ns = $COLUMNS - $nm - $nF - 3; + + for $msgnum (sort(keys(%SPAM))) { + $tuple = $SPAM{$msgnum}; + if (defined($tuple->[0])) { + # Filed by filter_mail, so we have the header. + $header = $tuple->[0]; + } elsif (defined($tuple->[1])) { + # Filed by maildir_spam, so we don't have the header. + if ($run) { + # The message has been filed, load it from $mh. + $msg = "$mh/SPAM/$msgnum"; + } else { + # The message has not been filed, load it from the maildir. + # $tuple->[1] is just a basename, not a path; this works + # because maildir_spam did chdir(Maildir/spam/new). + $msg = $tuple->[1]; + } + $header = get_headers($msg); + } else { + print(STDERR + "BUG: corrupt SPAM tuple, neither element defined", + " for message $msgnum\n"); + next; + } + print(scan_line($header, '', $msgnum, 0, $nm, $nF, $ns), + "\n"); + } } -MAIN: { - my @msglist; +if (!caller()) { my $st; if ($dumpfilters) { + require "$HOME/.minc"; $Data::Dumper::Indent = 1; print(Dumper(\@FILTERS)); exit; } - @msglist = ( - map { $_->[1] } + $rebuild_dot_folders and exit(create_dot_folders); + + if (-f "$HOME/.minc") { + require "$HOME/.minc"; + } + + my $inbox; + if ($MAILDIR) { + $inbox = $MAILDIR; + } elsif (-d "$HOME/Maildir") { + $inbox = "$HOME/Maildir"; + } + + if (@ARGV > 0) { + $inbox = shift(@ARGV); + } + + my @files; + if (substr($inbox, 0, 1) eq '+') { + my @msgs = @ARGV; + if (!@msgs) { + @msgs = ('cur-last'); + } + open(my $fh, '-|', 'mhpath', @msgs) or die("open(mhpath|): $!"); + chomp(@files = <$fh>); + if (!close($fh)) { + $! == 0 or die("open(mhpath|): $!"); + die('mhpath '. exit_msg($?)); + } + @files > 0 or exit 0; + } else { + @ARGV == 0 or pod2usage(); + chdir("$inbox/new") or die("chdir($inbox/new): $!"); + @files = map { $_->[1] } + sort { $a->[0] <=> $b->[0] } + map { + $st = stat($_) or die("stat($_): $!"); + [ $st->mtime, $_ ] + } getfiles('.'); + + @files = map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { if (not ($st = stat($_))) { @@ -614,26 +913,31 @@ MAIN: { } [$st->mtime, $_] } - getfiles("$MAILDIR/new")); + getfiles('.'); + } - filter_mail(@msglist); + my %folders = filter_mail($inbox, @files); - @SPAM and (exec('scan', '+SPAM', @SPAM) or die); + $run and %folders and update_dot_folders(\%folders); + + maildir_spam(); + scan_spam(); } +1; + __END__ =head1 THE FILTERS STRUCTURE -The user's .mincfilter file must define the @FILTERS structure. This -structure is an array. Each element of @FILTERS is a filter. A -filter is itself an array. The first element of a filter is a string, -the name of the header this filter acts upon. The header name is not -case-sensitive. Each subsequent element of a filter is a pair (i.e. a -two-element array): first, a regular expression B uses to -determine whether this filter matches or not, and second, an -expression which B evaluates to get the folder name. +Each element of @FILTERS is a filter. A filter is itself an array. +The first element of a filter is a string, the name of the header this +filter acts upon. The header name is not case-sensitive. Each +subsequent element of a filter is a pair (i.e. a two-element array): +first, a regular expression B uses to determine whether this +filter matches or not, and second, an expression which B +evaluates to get the folder name. B decides where to store a message by iterating over the @FILTERS array. It tests each regexp of each filter against all @@ -656,6 +960,31 @@ header are matched against a collection of headers related to (and including) the To. Which headers to use is determined by a regular expression borrowed from procmail. +=head1 HOOKS + +Filter hooks take a reference to a hash of batons, a reference to a +hash of headers, and the message filename as arguments. It returns +undef to decline filtering of this message (thus falling back to +subsequent filter hooks, and finally @FILTERS), or the name of the +folder to store this message into. + +One of B's global variables is available to the user-defined +hooks: $run. This boolean should be honored; hooks should only take +real action (i.e. removing or creating files, running external +programs, etc.) if $run is non-zero. + +The baton hash is created simply from the start hooks; if the hook +returns at least one defined value, this value is used as the key and +all other return values are put into a list reference as the value. +This hash is then passed by reference to all filter, post-store, and +stop hooks. + +Post store hooks take a reference to a hash of batons, a reference to +a hash of headers, the folder this message was stored in, and its new +message number. + +XXX: need more details about the hook process; for now read the code. + =head1 EXAMPLES @FILTERS = ( @@ -691,8 +1020,7 @@ apache.org subdomain, with mailing lists hosted there. So, given a list such as dev@httpd.apache.org, this filter will create the folder name l/apache/httpd/dev. -For an example B function, see -LE<047>pretzelnet.orgE<047>cvsE<047>dotfilesE<047>.mincspam> +XXX Need hook examples. =head1 AUTHORS