-#! /usr/bin/env perl
-
-# $Id$
+#!/usr/bin/perl
=head1 NAME
use Data::Dumper;
use Errno;
-use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
+use Fcntl;
use FileHandle;
+use File::FcntlLock;
use File::Temp qw(tempfile);
use File::stat;
use Getopt::Long qw(:config gnu_getopt no_ignore_case);
-use POSIX qw(strftime WEXITSTATUS WIFEXITED);
+use POSIX qw(WIFEXITED WEXITSTATUS WIFSIGNALED WTERMSIG);
use Pod::Usage;
+use constant LOCK_TRIES => 60;
+
our $VERSION = 1;
# If a filter set's header is $MAGIC_TO_TOKEN, that set is compared
my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)';
my $MAGIC_TO_TOKEN = ' TO';
-# List of SPAM message numbers, scanned at the end so the user can
-# check for false positives.
-my @SPAM;
+# Mapping of message numbers to array references. The first element is set by
+# filter_mail to a reference to a header hash for the message; the second is
+# set by maildier_spam to the name of the message file in the spam maildir.
+# scan_spam scans this at the end so the user can check for false positives.
+my %SPAM;
=head1 OPTIONS
my $norun;
my $rebuild_dot_folders;
+if (!caller()) {
GetOptions(
'd' => \$dumpfilters,
'h|help' => \$help,
'r' => \$rebuild_dot_folders,
) or pod2usage();
$help and pod2usage(-exitstatus=>0, -verbose=>1);
-@ARGV == 0 or pod2usage();
+}
our $run = !$norun;
if (not $HOME) {
die("HOME environment variable must be set.\n");
}
-if (not $MAILDIR) {
- $MAILDIR = "$HOME/Maildir";
-}
=head1 FILES
B<minc> adds all folders it filters into to this file, which is used
by lukem's B<new(1)> (XXX need a link).
-=item `mhpath +`/logs/minc.log
-
-Where B<minc> logs what it does, unless in -n mode.
-
=item `mhpath +`/.minc.context
B<minc> uses this file for context (i.e. current folder) instead of
my $mh;
-$mh = `mhpath +`;
+$mh = `mhpath +`; $? >= 0 or die;
chomp($mh);
-
-my $logfile = $mh . '/logs/minc.log';
-
-$ENV{"MHCONTEXT"} = $mh . '/.minc.context';
-
-\f
-###############################################################################
-# Logging
-
-sub mylog {
- $run or return;
-
- my $timestamp = strftime('%b %e %H:%M:%S', localtime);
- my $msg;
- foreach my $part (@_) {
- if (defined($part)) {
- $msg .= $part;
- }
- }
- # no newlines in the log message, thanks
- $msg =~ s/\n/ /gm;
-
- open(LOG, ">>$logfile") or die("open(>>$logfile): $!");
- print(LOG "$timestamp $msg\n") or die("print(>>$logfile): $!");
- close(LOG) or die("close($logfile): $!");
+if (!$mh) {
+ die('mhpath did not give MH mail path: ' . exit_msg($?));
}
-sub logheader {
- my ($text, @contents) = @_;
- my $last;
-
- if (@contents) {
- $last = $contents[-1];
- } else {
- $last = '';
- }
-
- mylog('<< ', $text, $last);
-}
-
-sub log_headers {
- my %headers = @_;
+$ENV{"MHCONTEXT"} = $mh . '/.minc.context';
- # For an explanation of the %headers structure, see the
- # get_headers function below.
- logheader('From: ', @{$headers{'return-path'}});
- logheader('To: ', @{$headers{'to'}});
- logheader('Subject: ', @{$headers{'subject'}});
- logheader('Message-Id: ', @{$headers{'message-id'}});
+# TODO: Support dot-locking too. This works on FreeBSD and Linux.
+my $locking = `mhparam datalocking`;
+$? < 0 && die;
+$? == 0 || die('mhparam datalocking: ' . exit_msg($?));
+chomp($locking);
+if ($locking ne 'fcntl') {
+ die("mh locking $locking not supported; only fcntl");
}
\f
###############################################################################
# Utility procedures
+sub exit_msg {
+ my $status = shift;
+ WIFEXITED($status) && return 'exited with status ' . WEXITSTATUS($status);
+ WIFSIGNALED($status) && return 'killed with signal ' . WTERMSIG($status);
+ # WTF
+ "died ($status)";
+}
+
sub sort_by_list {
my $a = shift;
my $b = shift;
}
if (@result <= 2) {
- exit(0);
+ return ();
}
STDOUT->autoflush(1);
- print(@result - 2, " messages...");
+ if (@result == 3) {
+ print('1 message...');
+ } else {
+ print(@result - 2, ' messages...');
+ }
closedir(DIR);
my $folder = shift;
my $msgnum = shift;
my $seq = shift;
+
my $fn = "$mh/$folder/.mh_sequences";
- my $fh;
- my @sequences;
+ my ($fh, $e) = lkopen_fcntl($fn, O_RDWR | O_CREAT, 0600);
- if (not open($fh, $fn)) {
- $!{ENOENT} or die("open($fn): $!");
- } else {
- @sequences = <$fh>;
- chomp(@sequences);
- }
+ my @sequences = <$fh>;
+ chomp(@sequences);
+
+ seek($fh, 0, SEEK_SET) or die("seek($fn): $!");
+ truncate($fh, 0) or die("truncate($fn): $!");
my $marked = 0;
- open($fh, '>', $fn) or die("open(>$fn): $!");
for $_ (@sequences) {
if (/^$seq: (.*)/) {
my @parts;
close($fh) or die("close(>$fn): $!");
}
+# Based on nmh's lkopen_fcntl
+# Return 0 for success, errno on failure.
+sub lkopen_fcntl {
+ my $fn = shift;
+ my $access = shift;
+ my $mode = shift;
+ my $errno;
+
+ # The assumption here is that if you open the file for writing, you
+ # need an exclusive lock.
+
+ my $tries = LOCK_TRIES;
+ for (;;) {
+ sysopen(my $fh, $fn, $access, $mode) or die("sysopen($fn): $!");
+
+ my $flk = File::FcntlLock->new;
+ $flk->l_start(0);
+ $flk->l_len(0);
+ $flk->l_type(($access & O_ACCMODE) == O_RDONLY ? F_RDLCK : F_WRLCK);
+ $flk->l_whence(SEEK_SET);
+
+ # Really should only retry on EAGAIN and EINTR...
+ if ($flk->lock($fh, F_SETLK)) {
+ return $fh;
+ }
+
+ $errno = $flk->lock_errno;
+ close($fh) or die("close($fn): $!");
+
+ if (--$tries == 0) {
+ last;
+ }
+ sleep(1);
+ }
+
+ local $! = $errno;
+ die("failed to lock $fn: $!");
+}
+
+my @filtered;
sub store_message {
- my $msg = shift;
+ my $inbox = shift;
+ my $msg = shift; # rename $src_msg
my $mhfolder = shift;
- my $msgnum;
- my $try;
- my $mhmsg;
- my $status;
+ my ($msgnum, $mhmsg); # rename $dst_msg and $dst_msgpath
# We must do this even in -n mode because later steps fail without
# it. This should be harmless.
mkfolder($mhfolder);
- # This loop is a modified version of the maildir delivery algorithm.
- $msgnum = get_highest_msgnum($mhfolder);
- for ($try = 0; ; $try++) {
- $msgnum++;
+ if ("+$mhfolder" eq $inbox) {
+ # If @filtered is empty, this message already has the right number.
+ if (!($msgnum = shift(@filtered))) {
+ $msg =~ m|.*/(\d+)$|;
+ return $1;
+ }
$mhmsg = "$mh/$mhfolder/$msgnum";
-
- if (not stat($mhmsg)) {
- if ($!{ENOENT}) {
- # Now we have a non-existent file, let's try to create
- # it. We must create a zero-byte file first because a
- # file my appear between our happy stat results and
- # our later rename(2), which would clobber said file.
- # So attempt to create a file with this name. If it
- # succeeds, in just a bit here we'll knowingly clobber
- # this file with the rename(2) call.
-
- # Another way to do this is not to use rename(2), but
- # use link(2) + unlink(2) instead. That's how the
- # standard maildir algorithm does it. Each method has
- # a disadvantage: the program may crash between the
- # link(2) and unlink(2) calls. With the standard
- # maildir algorithm, that means the message will end
- # up duplicated. The advantage of creating an empty
- # file followed by rename(2) is that an extra empty
- # file is left behind as opposed to a duplicate
- # message. This is more easily detected by the user.
- if ($run) {
- if (sysopen(MSG, $mhmsg,
- O_WRONLY | O_EXCL | O_CREAT, 0600)) {
- close(MSG);
+ } else {
+ # This loop is a modified version of the maildir delivery algorithm.
+ $msgnum = get_highest_msgnum($mhfolder);
+ my $try;
+ for ($try = 0; ; $try++) {
+ $msgnum++;
+ $mhmsg = "$mh/$mhfolder/$msgnum";
+
+ if (not stat($mhmsg)) {
+ if ($!{ENOENT}) {
+ # Now we have a non-existent file, let's try to create
+ # it. We must create a zero-byte file first because a
+ # file my appear between our happy stat results and
+ # our later rename(2), which would clobber said file.
+ # So attempt to create a file with this name. If it
+ # succeeds, in just a bit here we'll knowingly clobber
+ # this file with the rename(2) call.
+
+ # Another way to do this is not to use rename(2), but
+ # use link(2) + unlink(2) instead. That's how the
+ # standard maildir algorithm does it. Each method has
+ # a disadvantage: the program may crash between the
+ # link(2) and unlink(2) calls. With the standard
+ # maildir algorithm, that means the message will end
+ # up duplicated. The advantage of creating an empty
+ # file followed by rename(2) is that an extra empty
+ # file is left behind as opposed to a duplicate
+ # message. This is more easily detected by the user.
+ if ($run) {
+ if (sysopen(MSG, $mhmsg,
+ O_WRONLY | O_EXCL | O_CREAT, 0600)) {
+ close(MSG);
+ last;
+ }
+ } else {
last;
}
- } else {
- last;
}
}
- }
- # This algorithm is different from the maildir one; let's make
- # 10 tries instead of 3.
- if ($try == 9) {
- die("Attempted filename $mhmsg exists.");
+ # This algorithm is different from the maildir one; let's make
+ # 10 tries instead of 3.
+ if ($try == 9) {
+ die("Attempted filename $mhmsg exists.");
+ }
+
+ # This algorithm is different; i don't think we need to sleep.
+ #sleep(2);
}
- # This algorithm is different; i don't think we need to sleep.
- #sleep(2);
+ if ($run) {
+ # Mark each message as soon as we store it and bomb if that
+ # fails. While it is slow, it is not safe to store multiple
+ # messages and then have a failure before marking some (or
+ # all).
+ if ($mhfolder ne 'SPAM') {
+ mark($mhfolder, $msgnum, 'unseen');
+ }
+ }
}
-
- mylog('+', $mhfolder);
+ push(@filtered, $msg);
if ($run) {
if (not rename($msg, $mhmsg)) {
die("rename($msg, $mhmsg): $!");
}
-
- # Mark each message as soon as we store it and bomb if that
- # fails. While it is slow, it is not safe to store multiple
- # messages and then have a failure before marking some (or
- # all).
- if ($mhfolder eq 'SPAM') {
- push(@SPAM, $msgnum);
- } else {
- mark($mhfolder, $msgnum, 'unseen');
- }
}
return $msgnum;
}
close(MSG);
- return %headers;
+ return \%headers;
}
\f
sub find_mh_folder {
my $msg = shift;
- my %headers = @_;
- my $filterref;
- my @filter;
- my $header;
- my $contents;
- my $pair;
- my $regexp;
- my $expression;
- my $result;
-
- if (not %headers) {
+ my $header = shift;
+
+ if (not %$header) {
return 'malformed';
}
# Walk the list of filters. This structure is documented in
# pod at the end of the program.
- foreach $filterref (@FILTERS) {
- @filter = @$filterref;
- $header = shift(@filter);
-
- # Handle filters using the magic TO header specially.
- if ($header eq $MAGIC_TO_TOKEN) {
- foreach $header (keys(%headers)) {
- if ($header =~ /$MAGIC_TO_REGEX/i) {
- foreach $contents (@{$headers{lc($header)}}) {
- foreach $pair (@filter) {
- ($regexp, $expression) = @$pair;
- if ($contents =~ $regexp) {
- if (eval "\$result = \"$expression\"") {
- return $result;
- }
- }
- }
- }
- }
- }
-
- # Now that it's been processed specially, skip normal handling.
+ for my $filterref (@FILTERS) {
+ if (ref($filterref) eq 'CODE') {
+ my $m = $filterref->($header, $msg);
+ $m && return $m;
next;
}
- # Walk the list of message headers matching the filter's
- # specified header.
- foreach $contents (@{$headers{lc($header)}}) {
- # Walk the filter's list of regexp/expression pairs.
- foreach $pair (@filter) {
- ($regexp, $expression) = @$pair;
- if ($contents =~ $regexp) {
- if (eval "\$result = \"$expression\"") {
- return $result;
- }
+ my $m = match($header, @$filterref);
+ $m && return $m;
+ }
+
+ return 'inbox';
+}
+
+# Test all the header fields against each [regexp, folder-expression] pair.
+sub match {
+ my $header = shift;
+ my $filter_field = shift;
+ my @filters = @_;
+
+ # Handle filters using the magic TO header specially.
+ if ($filter_field eq $MAGIC_TO_TOKEN) {
+ for my $field_name (keys(%$header)) {
+ if ($field_name =~ /$MAGIC_TO_REGEX/i) {
+ my $m = match_one_field($header->{$field_name}, \@filters);
+ $m && return $m;
+ }
+ }
+ # Now that it's been processed specially, skip normal handling.
+ return;
+ }
+
+ # Walk the list of header fields matching the filter's specified header.
+ my $m = match_one_field($header->{lc($filter_field)}, \@filters);
+ $m && return $m;
+}
+
+# Test all the values of one header field against each [regexp,
+# folder-expression] pair.
+sub match_one_field {
+ my $values = shift;
+ my $filters = shift;
+ for my $value (@$values) {
+ for my $pair (@$filters) {
+ my ($regexp, $expression) = @$pair;
+ if ($value =~ $regexp) {
+ my $result;
+ if (eval "\$result = \"$expression\"") {
+ return $result;
}
}
}
}
+}
- return 'inbox';
+sub scan_line {
+ my ($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns) = @_;
+ my $from = '';
+ my $subject = '';
+ # Sometimes these headers are missing...
+ eval { $from = [@{$headers->{'from'}}]->[-1] };
+ eval { $subject = [@{$headers->{'subject'}}]->[-1] };
+ # Replace garbage characters.
+ for ($from, $subject) {
+ tr/\x00-\x1f\x80-\xff/?/;
+ }
+ return sprintf("\%-${nf}s \%${nm}d \%-${nF}s \%s",
+ substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
+ substr($from, 0, $nF),
+ substr($subject, 0, $ns));
}
sub filter_mail {
+ my $inbox = shift;
+ @_ or return ();
my $msgcount = @_ - 2; # don't count . and ..
my $len = length($msgcount);
my @baton;
my $msgnum;
my %folders;
- if (-f "$HOME/.minc") {
- require "$HOME/.minc";
- }
-
# XXX lame names
my $nf = int($COLUMNS * $SCAN_P_FOLDER);
my $nm = int($COLUMNS * $SCAN_P_MESSAGE);
}
}
+ my $tty = -t STDOUT;
+ if (not $tty) {
+ # Print a newline after the incomplete "N messages..." line.
+ print("\n");
+ }
+
for $msg (@_) {
($msg eq '.' or $msg eq '..') and next;
- my %headers = get_headers($msg);
- log_headers(%headers);
+ my $headers = get_headers($msg);
undef($mhfolder);
for my $hook (@filter_hooks) {
- my $result = $hook->(\%batons, \%headers, $msg);
+ my $result = $hook->(\%batons, $headers, $msg);
defined($result) and ($mhfolder = $result);
}
- defined($mhfolder) or ($mhfolder = find_mh_folder($msg, %headers));
+ defined($mhfolder) or ($mhfolder = find_mh_folder($msg, $headers));
- $msgnum = store_message($msg, $mhfolder);
+ $msgnum = store_message($inbox, $msg, $mhfolder);
$folders{$mhfolder}++;
+ if ($tty) {
+ print("\r");
+ }
+
if ($mhfolder eq 'SPAM') {
$spam++;
+ $SPAM{$msgnum} = [$headers, undef];
} else {
$saved++;
- my $from = '';
- my $subject = '';
- # Sometimes these headers are missing...
- eval { $from = [@{$headers{'from'}}]->[-1] };
- eval { $subject = [@{$headers{'subject'}}]->[-1] };
- for ($from, $subject) {
- tr/\x00-\x1f\x80-\xff/ /;
- }
- printf("\r\%-${nf}s \%${nm}d \%-${nF}s \%s\n",
- substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
- substr($from, 0, $nF),
- substr($subject, 0, $ns));
+ print(scan_line($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns),
+ "\n");
}
for my $hook (@post_store_hooks) {
- $hook->(\%batons, \%headers, $mhfolder, $msgnum);
+ $hook->(\%batons, $headers, $mhfolder, $msgnum);
}
- printf(" \%${len}d SPAM \%${len}d saved \%${len}d/%1d",
- $spam, $saved, $spam + $saved, $msgcount);
+ if ($tty) {
+ printf(" \%${len}d SPAM \%${len}d saved \%${len}d/%1d",
+ $spam, $saved, $spam + $saved, $msgcount);
+ }
defined($maxmsgs) and ($spam + $saved < $maxmsgs or last);
}
build_dot_folders($folders, $fh);
}
+# XXX Could use some unification with getfiles...
+sub maildir_spam {
+ my $dir = "$MAILDIR/spam/new";
+
+ if (not chdir($dir)) {
+ $!{ENOENT} or print(STDERR "skipping maildir spam: chdir($dir): $!\n");
+ return;
+ }
+
+ if (not opendir(DIR, '.')) {
+ print(STDERR "skipping maildir spam: opendir($dir): $!\n");
+ return;
+ }
+
+ $! = 0;
+ my @spams = readdir(DIR);
+ if ($! != 0) {
+ print(STDERR "skipping maildir spam: readdir($dir): $!\n");
+ return;
+ }
+
+ closedir(DIR);
+
+ for my $msg (@spams) {
+ ($msg eq '.' or $msg eq '..') and next;
+ my $msgnum = store_message($msg, 'SPAM');
+ # Store the original file name for scan_spam in -n mode.
+ $SPAM{$msgnum} = [undef, $msg];
+ }
+}
+
+sub scan_spam {
+ my ($msgnum, $header, $tuple, $msg);
+
+ # Unlike filter_mail, we don't need to print the folder name.
+ # Calculate how many columns would be allocated to it...
+ my $nf = int($COLUMNS * $SCAN_P_FOLDER);
+ # ...and add that amount to COLUMNS to calculate the number of columns to
+ # allocate to msgnum and from snippet, thus filling the line without
+ # printing the folder name.
+ my $nm = int(($COLUMNS + $nf) * $SCAN_P_MESSAGE);
+ my $nF = int(($COLUMNS + $nf) * $SCAN_P_FROM);
+ my $ns = $COLUMNS - $nm - $nF - 3;
+
+ for $msgnum (sort(keys(%SPAM))) {
+ $tuple = $SPAM{$msgnum};
+ if (defined($tuple->[0])) {
+ # Filed by filter_mail, so we have the header.
+ $header = $tuple->[0];
+ } elsif (defined($tuple->[1])) {
+ # Filed by maildir_spam, so we don't have the header.
+ if ($run) {
+ # The message has been filed, load it from $mh.
+ $msg = "$mh/SPAM/$msgnum";
+ } else {
+ # The message has not been filed, load it from the maildir.
+ # $tuple->[1] is just a basename, not a path; this works
+ # because maildir_spam did chdir(Maildir/spam/new).
+ $msg = $tuple->[1];
+ }
+ $header = get_headers($msg);
+ } else {
+ print(STDERR
+ "BUG: corrupt SPAM tuple, neither element defined",
+ " for message $msgnum\n");
+ next;
+ }
+ print(scan_line($header, '', $msgnum, 0, $nm, $nF, $ns),
+ "\n");
+ }
+}
+
\f
-MAIN: {
+if (!caller()) {
my $st;
if ($dumpfilters) {
$rebuild_dot_folders and exit(create_dot_folders);
- chdir("$MAILDIR/new") or die("chdir($MAILDIR/new): $!");
- my %folders = filter_mail(map { $_->[1] }
+ if (-f "$HOME/.minc") {
+ require "$HOME/.minc";
+ }
+
+ my $inbox;
+ if ($MAILDIR) {
+ $inbox = $MAILDIR;
+ } elsif (-d "$HOME/Maildir") {
+ $inbox = "$HOME/Maildir";
+ }
+
+ if (@ARGV > 0) {
+ $inbox = shift(@ARGV);
+ }
+
+ my @files;
+ if (substr($inbox, 0, 1) eq '+') {
+ my @msgs = @ARGV;
+ if (!@msgs) {
+ @msgs = ('cur-last');
+ }
+ open(my $fh, '-|', 'mhpath', @msgs) or die("open(mhpath|): $!");
+ chomp(@files = <$fh>);
+ if (!close($fh)) {
+ $! == 0 or die("open(mhpath|): $!");
+ die('mhpath '. exit_msg($?));
+ }
+ @files > 0 or exit 0;
+ } else {
+ @ARGV == 0 or pod2usage();
+ chdir("$inbox/new") or die("chdir($inbox/new): $!");
+ @files = map { $_->[1] }
+ sort { $a->[0] <=> $b->[0] }
+ map {
+ $st = stat($_) or die("stat($_): $!");
+ [ $st->mtime, $_ ]
+ } getfiles('.');
+
+ @files = map { $_->[1] }
sort { $a->[0] <=> $b->[0] }
map {
if (not ($st = stat($_))) {
}
[$st->mtime, $_]
}
- getfiles('.'));
+ getfiles('.');
+ }
- $run and update_dot_folders(\%folders);
+ my %folders = filter_mail($inbox, @files);
- @SPAM and (exec('scan', '+SPAM', @SPAM) or die);
+ $run and %folders and update_dot_folders(\%folders);
+
+ maildir_spam();
+ scan_spam();
}
+1;
+
\f
__END__