-#! /usr/local/bin/perl
+#! /usr/bin/env perl
# $Id$
=head1 NAME
-B<minc> - Incorporate mail from a maildir into mh folders.
+B<minc> - incorporate mail from a maildir into mh folders
=head1 SYNOPSIS
-B<minc> [-B<dhns>]
+B<minc> [B<-m> I<MAX>] [B<-n>] [B<-p>]
+
+B<minc> B<-d>
+
+B<minc> B<-h>
=head1 DESCRIPTION
-B<minc> is a program for incorporating mail from a maildir to a mh
-folder hierarchy. It takes mail from a maildir folder (not a maildir
-folder hierarchy), checks for spam with razor (XXX crossref), and
-optionally filters mail into separate mh folders.
+B<minc> incorporates mail from a maildir to a mh folder hierarchy. It
+takes mail from a maildir folder (not a maildir folder hierarchy),
+optionally checks for spam with a user-defined spam-checking function,
+and optionally filters mail into separate mh folders.
The filtering is quite sophisticated, as it is done using real Perl
-substitution (s//) commands.
+matching (m//) commands.
=cut
use strict;
use warnings;
-require 'sysexits.ph';
-
use Data::Dumper;
-use File::Basename;
-use Getopt::Std;
-use Log::Dispatch;
-use Log::Dispatch::File;
+use Errno;
+use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
+use FileHandle;
+use File::stat;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case);
use POSIX qw(strftime WEXITSTATUS WIFEXITED);
+use Pod::Usage;
+
+our $VERSION = 1;
+
+# If a filter set's header is $MAGIC_TO_TOKEN, that set is compared
+# against headers matching this regex (taken from procmail).
+my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)';
+my $MAGIC_TO_TOKEN = ' TO';
+
+# List of SPAM message numbers, scanned at the end so the user can
+# check for false positives.
+my @SPAM;
=head1 OPTIONS
=item B<-d>
-Dump (using Data::Dumper) the FILTERS hash and exit. This is useful
+Dump (using Data::Dumper) the FILTERS list and exit. This is useful
for testing the syntax of .mincfilter.
=item B<-h>
Show help.
+=item B<-m> I<MAX>
+
+Stop processing after I<MAX> messages.
+
=item B<-n>
Dry run; do not actually incorporate the mail, but log and report to
stdout/stderr as normal.
-=item B<-s>
+=item B<-p>
-Process SPAM only, then exit, leaving all non-spam messages in the
-maildir.
+Print the filename of each message before checking it for spam. This
+can be handy if a particular message is giving the spam checker a
+problem.
=back
=cut
-my $dumpfilters = 0;
-my $run = 1;
-my $spamonly = 0;
-
-our $opt_d;
-our $opt_h;
-our $opt_n;
-our $opt_s; # ;; # stupid cperl-mode
+my $dumpfilters;
+my $help;
+my $maxmsgs;
+my $norun;
+my $printfilenames;
-if (not getopts('dhns')) {
- exit(&EX_USAGE);
-}
+GetOptions(
+ 'd' => \$dumpfilters,
+ 'h|help' => \$help,
+ 'm=i' => \$maxmsgs,
+ 'n' => \$norun,
+ 'p' => \$printfilenames,
+ ) or pod2usage();
+$help and pod2usage(-exitstatus=>0, -verbose=>1);
+@ARGV == 0 or pod2usage();
-if ($opt_d) {
- $dumpfilters = 1;
-} elsif ($opt_h) {
- print("Sorry bub, no help.\n");
- exit(&EX_OK);
-} elsif ($opt_n) {
- $run = 0;
-}
-
-if ($opt_s) { # ))){ # stupid cperl-mode
- $spamonly = 1;
-}
+our $run = !$norun;
=head1 ENVIRONMENT
=cut
-use Env qw(HOME MAILDIR);
+use Env qw(COLUMNS HOME MAILDIR);
+
+$COLUMNS ||= 80;
if (not $HOME) {
die("HOME environment variable must be set.\n");
=item $HOME/.mincfilter
This file is Perl code (included via the 'require' directive) which is
-expected to define the FILTERS hash.
+expected to define the FILTERS list.
+
+=item $HOME/.mincspam
+
+If this file exists, B<minc> will include it with the expectation that
+it will define a B<spam_check> function. This function takes a
+message filename as an argument and returns 1 if the message is spam,
+else 0. If this file does not exist, B<minc> will define a simple
+function that always returns 0.
+
+One of B<minc>'s global variables is available to the user-defined
+B<spam_check> function: $run. This boolean should be honored;
+B<spam_check> should only take real action (i.e. removing or creating
+files, running external programs, etc.) if $run is non-zero.
+
+This file may also declare two other functions: B<spam_start_hook> and
+B<spam_stop_hook>. The former is passed no arguments and is expected
+to return a list. This list is a "baton" that will also be passed to
+B<spam_check> and B<spam_stop_hook>. It can hold anything
+B<spam_check> will need to do its job, whether network sockets, pipes,
+or whatever.
+
+XXX: need more details about the spam-handling process; for now read
+the code.
=item `mhpath +`/logs/minc.log
-Where minc logs what it does, unless in -n mode.
+Where B<minc> logs what it does, unless in -n mode.
=item `mhpath +`/logs/dryrun.log
-Where minc logs what it would do; used in -n mode.
+Where B<minc> logs what it would do; used in -n mode.
+
+=item `mhpath +`/.minc.context
+
+B<minc> uses this file for context (i.e. current folder) instead of
+`mhpath +`/context. This allows you some amount of freedom to use mh
+while B<minc> is running. For example, you can changed folders without
+causing a message to be stored in the wrong folder. Care must still
+be taken, however, about the unseen sequence; if you change it
+(whether via show, or mark, or anything else) while in the same folder
+as B<minc>, it is likely the new message B<minc> stores will not end up
+marked unseen.
+
+=back
=cut
-our %FILTERS;
-require "$HOME/.mincfilter";
+our @FILTERS;
+our (@start_hooks, @stop_hooks, @filter_hooks, @post_store_hooks);
my $mh;
my $logfile;
$logfile = $mh . '/logs/dryrun.log';
}
+$ENV{"MHCONTEXT"} = $mh . '/.minc.context';
+
\f
###############################################################################
# Logging
-# debuglevels for the logger
-use constant LOG_DEBUGLEVEL => 'info';
-use constant SCREEN_DEBUGLEVEL => 'debug';
-
-# characters used to wrap around the id field in the log so I can more
-# easily parse the msg flow
-use constant DEBUGCHAR => '%';
-use constant INFOCHAR => '=';
-use constant INCOMINGCHAR => '<';
-use constant SAVECHAR => '>';
-use constant WARNCHAR => 'W';
-use constant ERRORCHAR => 'E';
-
-my $logger = new Log::Dispatch;
-$logger->add(new Log::Dispatch::File (name=>'logfile',
- filename=>$logfile,
- min_level=>'info',
- mode=>'append'));
-
-# log alias, handles getting passed vars that are undef
sub mylog {
- my $level = shift;
- my $act = shift;
my $timestamp = strftime('%b %e %H:%M:%S', localtime);
my $msg;
foreach my $part (@_) {
}
# no newlines in the log message, thanks
$msg =~ s/\n/ /gm;
- $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg);
- $logger->log(level=>$level, message=>$msg);
- if ($act eq SAVECHAR) {
- $logger->log(level=>$level, message=>"\n");
- }
+ open(LOG, ">>$logfile") or die("open(>>$logfile): $!");
+ print(LOG "$timestamp $msg\n") or die("print(>>$logfile): $!");
+ close(LOG) or die("close($logfile): $!");
}
-sub logincoming { mylog('info', INCOMINGCHAR, @_); }
-sub logsave { mylog('notice', SAVECHAR, @_); }
-sub loginfo { mylog('info', INFOCHAR, @_); }
-sub logdebug { mylog('debug', DEBUGCHAR, @_); }
-sub logwarn { mylog('warning', WARNCHAR, @_); }
-sub logerr { mylog('error', ERRORCHAR, @_); }
+sub logheader {
+ my ($text, @contents) = @_;
+ my $last;
+
+ if (@contents) {
+ $last = $contents[-1];
+ } else {
+ $last = '';
+ }
+
+ mylog('<< ', $text, $last);
+}
sub log_headers {
my %headers = @_;
- logincoming('From: ', $headers{'return-path'});
- logincoming('To: ', $headers{'to'});
- logincoming('Subject: ', $headers{'subject'});
- logincoming('Message-Id: ', $headers{'message-id'});
+ # For an explanation of the %headers structure, see the
+ # get_headers function below.
+ logheader('From: ', @{$headers{'return-path'}});
+ logheader('To: ', @{$headers{'to'}});
+ logheader('Subject: ', @{$headers{'subject'}});
+ logheader('Message-Id: ', @{$headers{'message-id'}});
}
\f
###############################################################################
# Utility procedures
-sub _errprint {
- printf('%s:%s', __FILE__, __LINE__);
+sub mkfolder {
+ my $folder = shift;
+ my $target;
+ my $component;
- if (@_) {
- print(': ');
- foreach (@_) {
- print;
- }
+ $target = $mh;
+ foreach $component (split('/', $folder)) {
+ $target = join('/', $target, $component);
+ -d $target or mkdir($target) or die("mkdir($target): $!");
}
}
-sub err {
- my $ex = shift;
+sub getfiles {
+ my $dir = shift;
+ my @result;
- _errprint(@_);
- print(": $!\n");
+ if (not opendir(DIR, $dir)) {
+ die("opendir($dir): $!");
+ }
- exit($ex);
-}
+ # Initialize $! to 0 (success) because stupid stupid Perl provides
+ # no way to distinguish an error from an empty directory; that
+ # means setting $! to 0 and testing it afterwards is the only way
+ # to detect an error. Real Programmers don't handle errors,
+ # right? >sigh<
+ $! = 0;
+ @result = readdir(DIR);
+ if ($! != 0) {
+ die("readdir($dir): $!");
+ }
-sub errx {
- my $ex = shift;
+ if (@result <= 2) {
+ exit(0);
+ }
- _errprint(@_);
- print("\n");
+ STDOUT->autoflush(1);
+ print(@result - 2, " messages...");
- exit($ex);
+ closedir(DIR);
+
+ return @result;
}
-sub mkfolder {
+sub get_highest_msgnum {
my $mhfolder = shift;
- my $folder;
- my $target;
- my $component;
+ my $dir;
+ my @list;
+ my $highest;
+ my $msgnum;
- $folder = $mh . '/' . $mhfolder;
- $target = '';
+ $dir = "$mh/$mhfolder";
+ if (not opendir(DIR, $dir)) {
+ die("opendir($dir): $!");
+ }
- if (not -d $folder) {
- foreach $component (split('/', $folder)) {
- if (defined($component) and length($component) >= 1) {
- $target = $target . '/' . $component;
- if (-d $target or mkdir($target)) {
- next;
- } else {
- err(&EX_TEMPFAIL,
- "Failed to create +$mhfolder ($component)");
- }
+ # Insert rant from getfiles here.
+ $! = 0;
+ @list = readdir(DIR);
+
+ if ($! != 0) {
+ die("readdir($dir): $!");
+ }
+
+ closedir(DIR);
+
+ $highest = 0;
+ foreach $msgnum (@list) {
+ # Look for integers.
+ if ($msgnum =~ /^[0-9]+$/) {
+ if ($msgnum > $highest) {
+ $highest = $msgnum;
}
}
}
-}
-# We want to print the name of each list that has new mail only once,
-# so use this hash to store the lists that have already been printed.
-# Start the list out with SPAM already in it, since we don't care when
-# new messages are added to it.
-my %FOLDERS = ('SPAM'=>1);
+ return $highest;
+}
sub store_message {
my $msg = shift;
my $mhfolder = shift;
- my $mhmsg;
my $msgnum;
+ my $try;
+ my $mhmsg;
my $status;
# We must do this even in -n mode because later steps fail without
# it. This should be harmless.
mkfolder($mhfolder);
- # XXX: Grab the 'mhpath new' algorithm and implement it internally.
- $mhmsg = `mhpath +$mhfolder new`;
- chomp($mhmsg);
- $msgnum = basename($mhmsg);
+ # This loop is a modified version of the maildir delivery algorithm.
+ $msgnum = get_highest_msgnum($mhfolder);
+ for ($try = 0; ; $try++) {
+ $msgnum++;
+ $mhmsg = "$mh/$mhfolder/$msgnum";
+
+ if (not stat($mhmsg)) {
+ if ($!{ENOENT}) {
+ # Now we have a non-existent file, let's try to create
+ # it. We must create a zero-byte file first because a
+ # file my appear between our happy stat results and
+ # our later rename(2), which would clobber said file.
+ # So attempt to create a file with this name. If it
+ # succeeds, in just a bit here we'll knowingly clobber
+ # this file with the rename(2) call.
+
+ # Another way to do this is not to use rename(2), but
+ # use link(2) + unlink(2) instead. That's how the
+ # standard maildir algorithm does it. Each method has
+ # a disadvantage: the program may crash between the
+ # link(2) and unlink(2) calls. With the standard
+ # maildir algorithm, that means the message will end
+ # up duplicated. The advantage of creating an empty
+ # file followed by rename(2) is that an extra empty
+ # file is left behind as opposed to a duplicate
+ # message. This is more easily detected by the user.
+ if ($run) {
+ if (sysopen(MSG, $mhmsg,
+ O_WRONLY | O_EXCL | O_CREAT, 0600)) {
+ close(MSG);
+ last;
+ }
+ } else {
+ last;
+ }
+ }
+ }
+
+ # This algorithm is different from the maildir one; let's make
+ # 10 tries instead of 3.
+ if ($try == 9) {
+ die("Attempted filename $mhmsg exists.");
+ }
+
+ # This algorithm is different; i don't think we need to sleep.
+ #sleep(2);
+ }
- logsave("+$mhfolder");
+ if ($mhfolder ne 'SPAM') {
+ mylog('+', $mhfolder);
+ }
if ($run) {
if (not rename($msg, $mhmsg)) {
- err(&EX_TEMPFAIL, "Failed rename($msg, $mhmsg)");
+ die("rename($msg, $mhmsg): $!");
}
- # XXX: Lame! Instead, keep a hash of arrays. Keys are folder
- # names, array elements are message numbers in that list.
- # Then after all the messages have been sorted, run one mark
- # command per folder, marking all messages for that folder in
- # one sweep.
- if ($mhfolder ne 'SPAM') {
- $status = system("mark +$mhfolder $msgnum -sequence unseen -add");
- if (not (WIFEXITED($status) and WEXITSTATUS($status) == 0)) {
- errx(&EX_TEMPFAIL, "Failed to mark message unseen");
+ # Mark each message as soon as we store it and bomb if that
+ # fails. While it is slow, it is not safe to store multiple
+ # messages and then have a failure before marking some (or
+ # all).
+ if ($mhfolder eq 'SPAM') {
+ push(@SPAM, $msgnum);
+ } else {
+ $status = system('mark', "+$mhfolder", "$msgnum", '-sequence',
+ 'unseen', '-add');
+ # XXX need to handle signalled and stopped, and print
+ # the exit code or signal number.
+ if (not WIFEXITED($status)) {
+ die("Failed to run mark");
+ } elsif (WEXITSTATUS($status) != 0) {
+ die("Failed to mark message unseen.");
}
}
}
- if (not $FOLDERS{$mhfolder}) {
- print("+$mhfolder\n");
- $FOLDERS{$mhfolder} = 1;
- }
+ return $msgnum;
}
+# Parse a message file into a structure describing the headers. The
+# structure is a hash of arrays. The hash keys are the names of the
+# headers found in the message, made all lower-case. Each item in
+# the hash is an array of header text. The array has one element
+# per occurrence of the header. Most headers will only have a single
+# element because they occur only once. The Received header is the
+# most common header with multiple texts.
sub get_headers {
my $msg = shift;
my %headers;
my $current; # current header, used for unfolding lines
my $fieldname; # unmolested header name
+ my $contents; # contents of header
- open(MSG, $msg);
+ open(MSG, $msg) or die("open(MSG, $msg): $!");
while (<MSG>) {
chomp;
if (length == 0) {
# folded header continuation
if (not defined($current)) {
- print("Malformed message, cannot parse headers.\n");
- return ();
+ warn('Malformed message, cannot parse headers.');
+ next;
}
- $headers{$current} .= $_;
+ @{$headers{$current}}[-1] .= $_;
} else {
($fieldname) = split(/:/);
$current = lc($fieldname);
- (undef, $headers{$current}) = split(/^$fieldname:\s*/);
+ (undef, $contents) = split(/^\Q$fieldname\E:\s*/);
+
+ if (defined($headers{$current})) {
+ # multiple occurence of same header
+ push(@{$headers{$current}}, $contents);
+ } else {
+ # first occurence of a header
+ $headers{$current} = [$contents];
+ }
}
}
close(MSG);
\f
###############################################################################
-# Spam handling
+# Filtering
-sub is_spam {
+sub find_mh_folder {
my $msg = shift;
- my $err;
- my $sig;
- my $status;
-
- $err = $sig = $msg;
- $err =~ s|/new/|/err/|;
- $sig =~ s|/new/|/sig/|;
- if (-f $err) {
- my $line;
- my $message;
-
- open(ERR, $err);
- $line = <ERR>;
- close(ERR);
-
- chomp($line);
- ($message, $!) = split(/:/, $line);
+ my %headers = @_;
+ my $filterref;
+ my @filter;
+ my $header;
+ my $contents;
+ my $pair;
+ my $match;
+ my $expression;
+ my $result;
- logerr("$err: $message: $!");
+ if (not %headers) {
+ return 'malformed';
+ }
- if ($run) {
- if (unlink($err) != 1) {
- err(&EX_TEMPFAIL, "Failed unlink($err)");
+ # Walk the list of filters. This structure is documented in
+ # pod at the end of the program.
+ foreach $filterref (@FILTERS) {
+ @filter = @$filterref;
+ $header = shift(@filter);
+
+ # Handle filters using the magic TO header specially.
+ if ($header eq $MAGIC_TO_TOKEN) {
+ foreach $header (keys(%headers)) {
+ if ($header =~ /$MAGIC_TO_REGEX/i) {
+ foreach $contents (@{$headers{lc($header)}}) {
+ foreach $pair (@filter) {
+ ($match, $expression) = @$pair;
+ if ($contents =~ /$match/) {
+ return $expression;
+ }
+ }
+ }
+ }
}
+
+ # Now that it's been processed specially, skip normal handling.
+ next;
}
- } elsif (-f $sig) {
- # This is supposed to be a signature created with razor-check
- # directly after delivery. Currently this isn't supported
- # because it isn't clear to me how to get that signature back
- # into razor-check. For now, just unlink any sig files we
- # find and proceed with full razor-check mode.
-
- if ($run) {
- if (unlink($sig) != 1) {
- err(&EX_TEMPFAIL, "Failed unlink($sig)");
+
+ # Walk the list of message headers matching the filter's
+ # specified header.
+ foreach $contents (@{$headers{lc($header)}}) {
+ # Walk the filter's list of match/expression pairs.
+ foreach $pair (@filter) {
+ ($match, $expression) = @$pair;
+ if ($contents =~ /$match/i) {
+ if (eval "\$result = \"$expression\"") {
+ return $result;
+ }
+ }
}
}
}
- if ($run) {
- $status = system("razor-check < $msg");
- if (not WIFEXITED($status)) {
- err(&EX_TEMPFAIL, "Failed to run razor-check < $msg");
- } elsif (WEXITSTATUS($status) == 0) {
- return 1;
- } else {
- return 0;
- }
- } else {
- return 0;
- }
+ return 'inbox';
}
-sub kill_spam {
+sub filter_mail {
my @msglist = @_;
- my @result;
+ my $msgcount = @msglist - 2; # don't count . and ..
+ my $len = length($msgcount);
+ my @baton;
my $msg;
- my $i;
+ my $mhfolder;
+ my $spam = 0;
+ my $saved = 0;
+ my $msgnum;
+ my %FOLDERS = ('SPAM'=>1);
- @result = ();
+ # XXX lame names and hard-coded proportions.
+ my $nf = $COLUMNS * 0.1;
+ my $nm = $COLUMNS * 0.0625;
+ my $nF = $COLUMNS * 0.175;
+ my $ns = $COLUMNS - $nf - $nm - $nF - 3;
- $i = 0;
- foreach $msg (@msglist) {
- printf('%sChecking for spam... %6d/%d',
- "\r", ++$i, scalar(@msglist));
- if (is_spam($msg)) {
- print(" SPAM\n");
- store_message($msg, 'SPAM');
- } else {
- push(@result, $msg);
+ if (-f "$HOME/.minc") {
+ require "$HOME/.minc";
+ }
+
+ my %batons;
+ for my $hook (@start_hooks) {
+ my ($handle, @baton) = $hook->();
+ if (defined($handle)) {
+ $batons{$handle} = [@baton];
}
}
- print("\nDone: ", scalar(@result), " survivors\n");
- return @result;
-}
+ foreach $msg (@msglist) {
+ ($msg eq '.' or $msg eq '..') and next;
-\f
-###############################################################################
-# Filtering
+ if ($printfilenames) {
+ print("$msg\n");
+ }
-sub find_mh_folder {
- my $msg = shift;
- my %headers;
- my $header;
- my $contents;
- my $regex;
- my $subst;
- my $try;
+ my %headers = get_headers($msg);
+ log_headers(%headers);
- %headers = get_headers($msg);
- if (not %headers) {
- return 'inbox';
- }
+ undef($mhfolder);
+ for my $hook (@filter_hooks) {
+ my $result = $hook->(\%batons, \%headers, $msg);
+ defined($result) and ($mhfolder = $result);
+ }
- log_headers(%headers);
+ defined($mhfolder) or ($mhfolder = find_mh_folder($msg, %headers));
- foreach $header (keys(%FILTERS)) {
- $contents = $headers{lc($header)};
+ $msgnum = store_message($msg, $mhfolder);
- if (defined($contents)) {
- foreach $regex (keys(%{$FILTERS{$header}})) {
- $subst = $FILTERS{$header}->{$regex};
+ if ($mhfolder eq 'SPAM') {
+ $spam++;
+ } else {
+ $saved++;
+ print("\r");
+ print(' ' x $COLUMNS);
+ printf("\r\%-${nf}s \%${nm}d \%-${nF}s \%s\n",
+ substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
+ substr(pop(@{$headers{'from'}}), 0, $nF),
+ substr(pop(@{$headers{'subject'}}), 0, $ns))
+ }
- $try = '$contents =~';
- $try .= " s|$regex|$subst|is";
- if (eval $try) {
- return $contents;
- }
- }
+ for my $hook (@post_store_hooks) {
+ $hook->(\%batons, \%headers, $mhfolder, $msgnum);
}
- }
- return 'inbox';
-}
+ printf(" \%${len}d SPAM \%${len}d saved \%${len}d/%1d",
+ $spam, $saved, $spam + $saved, $msgcount);
-sub filter_mail {
- my @msglist = @_;
- my $msg;
- my $mhfolder;
+ defined($maxmsgs) and ($spam + $saved < $maxmsgs or last);
+ }
+ print("\n");
- foreach $msg (@msglist) {
- $mhfolder = find_mh_folder($msg);
- store_message($msg, $mhfolder);
+ for my $hook (@stop_hooks) {
+ $hook->(\%batons);
}
}
\f
MAIN: {
- my @msglist;
+ my $st;
if ($dumpfilters) {
+ require "$HOME/.minc";
$Data::Dumper::Indent = 1;
- print(Dumper(\%FILTERS));
- exit(&EX_OK);
+ print(Dumper(\@FILTERS));
+ exit;
}
- @msglist = kill_spam(glob("$MAILDIR/new/*"));
+ chdir("$MAILDIR/new") or die("chdir($MAILDIR/new): $!");
+ filter_mail(map { $_->[1] }
+ sort { $a->[0] <=> $b->[0] }
+ map {
+ if (not ($st = stat($_))) {
+ die("stat($_): $!");
+ }
+ [$st->mtime, $_]
+ }
+ getfiles('.'));
- if (not $spamonly) {
- filter_mail(@msglist);
- }
+ @SPAM and (exec('scan', '+SPAM', @SPAM) or die);
}
+\f
__END__
+=head1 THE FILTERS STRUCTURE
+
+The user's .mincfilter file must define the @FILTERS structure. This
+structure is an array. Each element of @FILTERS is a filter. A
+filter is itself an array. The first element of a filter is a string,
+the name of the header this filter acts upon. The header name is not
+case-sensitive. Each subsequent element of a filter is a pair (i.e. a
+two-element array): first, a regular expression B<minc> uses to
+determine whether this filter matches or not, and second, an
+expression which B<minc> evaluates to get the folder name.
+
+B<minc> decides where to store a message by iterating over the
+@FILTERS array. It tests each regexp of each filter against all
+headers matching that filter's specified header. As soon as a match
+is found, B<minc> evaluates the second part of the pair. This part
+may contain positional parameters from the matched regexp ($1, $2,
+etc.). The result of this expression is used as the folder name.
+
+Multiple occurrences of the same header are preserved and tested
+individually against the filters. That means, for example, that
+filters can search all the Received headers for a certain string.
+
+It is important to note that all the arrays of the @FILTERS structure
+are traversed I<in order>. This means the filters can be arranged so
+that some have priority over others. XXX: get Doug to write an
+example of having the same header matched more than once.
+
+Lastly, B<minc> supports a magic ' TO' header. Filters using this
+header are matched against a collection of headers related to (and
+including) the To. Which headers to use is determined by a regular
+expression borrowed from procmail.
+
=head1 EXAMPLES
-%FILTERS =
- ('List-Id:'=>
- {'.*<([[:graph:]]+)-([[:graph:]]+)\.lists\.sourceforge\.net>.*'=>'l/$1/$2',
- '.*<([[:graph:]]+?)(-list)?\.freedesktop\.org>.*'=>'l/freedesktop/$1',
- '.*<swig\.cs\.uchicago\.edu>.*'=>'l/swig'},
+@FILTERS = (
- 'List-Post:'=>
- {'<mailto:([[:graph:]]+)+@([[:graph:]]+)\.apache\.org>'=>'l/apache/$2/$1'},
+ ['List-Id',
+ ['<([[:graph:]]+)-([[:graph:]]+)\.lists\.sourceforge\.net>', 'l/$1/$2'],
+ ['<([[:graph:]]+?)(-list)?\.freedesktop\.org>', 'l/freedesktop/$1'],
+ ['<swig\.cs\.uchicago\.edu>', 'l/swig'],
+ ],
- 'To:'=>
- {'root'=>'root'});
+ ['List-Post',
+ ['<mailto:([[:graph:]]+)+@([[:graph:]]+)\.apache\.org>', 'l/apache/$2/$1'],
+ ],
+
+ ['To',
+ ['root', 'root'],
+ ],
+);
The first List-Id filter is a surprisingly effective one which creates
appropriate folder names (such as l/htdig/updates and l/rox/devel) for
list such as dev@httpd.apache.org, this filter will create the folder
name l/apache/httpd/dev.
+For an example B<spam_check> function, see
+L<http://pretzelnet.org/cvs/dotfiles/.mincspam>
+
=head1 AUTHORS
-Written by Eric Gillespie <epg@pretzelnet.org> with logging code
-stolen from Adam Lazur <adam@lazur.org>.
+Written by Eric Gillespie <epg@pretzelnet.org>. Design by Eric
+Gillespie and Doug Porter <dsp@waterspout.com>.
-Design by Eric Gillespie and Doug Porter <dsp@waterspout.com>.
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
=cut
+
+# Local variables:
+# cperl-indent-level: 4
+# perl-indent-level: 4
+# indent-tabs-mode: nil
+# End:
+
+# vi: set tabstop=4 expandtab: