-#! /usr/local/bin/perl
+#! /usr/bin/env perl
# $Id$
=head1 NAME
-B<minc> - Incorporate mail from a maildir into mh folders.
+B<minc> - incorporate mail from a maildir into mh folders
=head1 SYNOPSIS
-B<minc> [-B<dhns>]
+B<minc> [B<-m> I<MAX>] [B<-n>] [B<-p>]
+
+B<minc> B<-d>
+
+B<minc> B<-h>
=head1 DESCRIPTION
-B<minc> is a program for incorporating mail from a maildir to a mh
-folder hierarchy. It takes mail from a maildir folder (not a maildir
-folder hierarchy), optionally checks for spam with a user-defined
-spam-checking function, and optionally filters mail into separate mh
-folders.
+B<minc> incorporates mail from a maildir to a mh folder hierarchy. It
+takes mail from a maildir folder (not a maildir folder hierarchy),
+optionally checks for spam with a user-defined spam-checking function,
+and optionally filters mail into separate mh folders.
The filtering is quite sophisticated, as it is done using real Perl
matching (m//) commands.
use strict;
use warnings;
-require 'sysexits.ph';
-
use Data::Dumper;
use Errno;
use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
use FileHandle;
-use File::Basename;
-use Getopt::Std;
-use Log::Dispatch;
-use Log::Dispatch::File;
+use File::stat;
+use Getopt::Long qw(:config gnu_getopt no_ignore_case);
use POSIX qw(strftime WEXITSTATUS WIFEXITED);
+use Pod::Usage;
-# Autoflush STDOUT for the benefit of the status reporting in kill_spam().
-STDOUT->autoflush(1);
+our $VERSION = 1;
# If a filter set's header is $MAGIC_TO_TOKEN, that set is compared
# against headers matching this regex (taken from procmail).
my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)';
my $MAGIC_TO_TOKEN = ' TO';
+# List of SPAM message numbers, scanned at the end so the user can
+# check for false positives.
+my @SPAM;
+
=head1 OPTIONS
=over 4
Dump (using Data::Dumper) the FILTERS list and exit. This is useful
for testing the syntax of .mincfilter.
-=item B<-f>
-
-Filter only, then exit. This is useful after running B<minc -s>.
-
=item B<-h>
Show help.
+=item B<-m> I<MAX>
+
+Stop processing after I<MAX> messages.
+
=item B<-n>
Dry run; do not actually incorporate the mail, but log and report to
can be handy if a particular message is giving the spam checker a
problem.
-=item B<-s>
-
-Process SPAM only, then exit, leaving all non-spam messages in the
-maildir.
-
=back
=cut
-my $dumpfilters = 0;
-my $filteronly = 0;
-our $run = 1;
-my $printfilenames = 0;
-my $spamonly = 0;
-
-our $opt_d;
-our $opt_f;
-our $opt_h;
-our $opt_n;
-our $opt_p;
-our $opt_s; # ;; # stupid cperl-mode
-
-if (not getopts('dfhnps')) {
- exit(&EX_USAGE);
-}
+my $dumpfilters;
+my $help;
+my $maxmsgs;
+my $norun;
+my $printfilenames;
-if ($opt_d) {
- $dumpfilters = 1;
-} elsif ($opt_h) {
- print("Sorry bub, no help.\n");
- exit(&EX_OK);
-} elsif ($opt_n) {
- $run = 0;
-}
+GetOptions(
+ 'd' => \$dumpfilters,
+ 'h|help' => \$help,
+ 'm=i' => \$maxmsgs,
+ 'n' => \$norun,
+ 'p' => \$printfilenames,
+ ) or pod2usage();
+$help and pod2usage(-exitstatus=>0, -verbose=>1);
+@ARGV == 0 or pod2usage();
-if ($opt_p) {
- $printfilenames = 1;
-}
-
-if ($opt_f) {
- $filteronly = 1;
-} elsif ($opt_s) { # ))){ # stupid cperl-mode
- $spamonly = 1;
-}
+our $run = !$norun;
=head1 ENVIRONMENT
=cut
-use Env qw(HOME MAILDIR);
+use Env qw(COLUMNS HOME MAILDIR);
+
+$COLUMNS ||= 80;
if (not $HOME) {
die("HOME environment variable must be set.\n");
=cut
our @FILTERS;
-require "$HOME/.mincfilter";
-
-if (-f "$HOME/.mincspam") {
- require "$HOME/.mincspam";
-} else {
- eval "sub spam_start_hook { return (); }";
- eval "sub spam_stop_hook { }";
- eval "sub spam_check { return 0; }";
-}
+our (@start_hooks, @stop_hooks, @filter_hooks, @post_store_hooks);
my $mh;
my $logfile;
###############################################################################
# Logging
-# debuglevels for the logger
-use constant LOG_DEBUGLEVEL => 'info';
-use constant SCREEN_DEBUGLEVEL => 'debug';
-
-# characters used to wrap around the id field in the log so I can more
-# easily parse the msg flow
-use constant DEBUGCHAR => '%';
-use constant INFOCHAR => '=';
-use constant INCOMINGCHAR => '<';
-use constant SAVECHAR => '>';
-use constant WARNCHAR => 'W';
-use constant ERRORCHAR => 'E';
-
-my $logger = new Log::Dispatch;
-$logger->add(new Log::Dispatch::File (name=>'logfile',
- filename=>$logfile,
- min_level=>'info',
- mode=>'append'));
-
-# log alias, handles getting passed vars that are undef
sub mylog {
- my $level = shift;
- my $act = shift;
my $timestamp = strftime('%b %e %H:%M:%S', localtime);
my $msg;
foreach my $part (@_) {
}
# no newlines in the log message, thanks
$msg =~ s/\n/ /gm;
- $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg);
- $logger->log(level=>$level, message=>$msg);
- if ($act eq SAVECHAR) {
- $logger->log(level=>$level, message=>"\n");
- }
+ open(LOG, ">>$logfile") or die("open(>>$logfile): $!");
+ print(LOG "$timestamp $msg\n") or die("print(>>$logfile): $!");
+ close(LOG) or die("close($logfile): $!");
}
-sub logsave { mylog('notice', SAVECHAR, @_); }
-sub loginfo { mylog('info', INFOCHAR, @_); }
-sub logdebug { mylog('debug', DEBUGCHAR, @_); }
-sub logwarn { mylog('warning', WARNCHAR, @_); }
-sub logerr { mylog('error', ERRORCHAR, @_); }
-
-sub logincoming {
+sub logheader {
my ($text, @contents) = @_;
my $last;
$last = '';
}
- mylog('info', INCOMINGCHAR, $text, $last);
+ mylog('<< ', $text, $last);
}
sub log_headers {
# For an explanation of the %headers structure, see the
# get_headers function below.
- logincoming('From: ', @{$headers{'return-path'}});
- logincoming('To: ', @{$headers{'to'}});
- logincoming('Subject: ', @{$headers{'subject'}});
- logincoming('Message-Id: ', @{$headers{'message-id'}});
+ logheader('From: ', @{$headers{'return-path'}});
+ logheader('To: ', @{$headers{'to'}});
+ logheader('Subject: ', @{$headers{'subject'}});
+ logheader('Message-Id: ', @{$headers{'message-id'}});
}
\f
###############################################################################
# Utility procedures
-sub _errprint {
- printf(STDERR '%s:%s', __FILE__, __LINE__);
-
- if (@_) {
- print(STDERR ': ');
- foreach (@_) {
- print(STDERR);
- }
- }
-}
-
-sub err {
- my $ex = shift;
-
- _errprint(@_);
- print(STDERR ": $!\n");
-
- exit($ex);
-}
-
-sub errx {
- my $ex = shift;
-
- _errprint(@_);
- print(STDERR "\n");
-
- exit($ex);
-}
-
sub mkfolder {
- my $mhfolder = shift;
- my $folder;
+ my $folder = shift;
my $target;
my $component;
- $folder = $mh . '/' . $mhfolder;
- $target = '';
-
- if (not -d $folder) {
- foreach $component (split('/', $folder)) {
- if (defined($component) and length($component) >= 1) {
- $target = $target . '/' . $component;
- if (-d $target or mkdir($target)) {
- next;
- } else {
- err(&EX_OSERR,
- "Failed to create +$mhfolder ($component)");
- }
- }
- }
+ $target = $mh;
+ foreach $component (split('/', $folder)) {
+ $target = join('/', $target, $component);
+ -d $target or mkdir($target) or die("mkdir($target): $!");
}
}
my @result;
if (not opendir(DIR, $dir)) {
- err(&EX_OSERR, "Failed opendir($dir)");
+ die("opendir($dir): $!");
}
# Initialize $! to 0 (success) because stupid stupid Perl provides
# to detect an error. Real Programmers don't handle errors,
# right? >sigh<
$! = 0;
- @result = grep {
- ($_ ne '.' and $_ ne '..')
- and $_ = "$MAILDIR/new/$_"
- } readdir(DIR);
-
+ @result = readdir(DIR);
if ($! != 0) {
- err(&EX_OSERR, "Failed readdir($dir)");
+ die("readdir($dir): $!");
}
- if (scalar(@result) == 0) {
+ if (@result <= 2) {
exit(0);
}
+ STDOUT->autoflush(1);
+ print(@result - 2, " messages...");
+
closedir(DIR);
return @result;
$dir = "$mh/$mhfolder";
if (not opendir(DIR, $dir)) {
- err(&EX_OSERR, "Failed opendir($dir)");
+ die("opendir($dir): $!");
}
+ # Insert rant from getfiles here.
$! = 0;
@list = readdir(DIR);
if ($! != 0) {
- err(&EX_OSERR, "Failed readdir($dir)");
+ die("readdir($dir): $!");
}
closedir(DIR);
return $highest;
}
-# We want to print the name of each list that has new mail only once,
-# so use this hash to store the lists that have already been printed.
-# Start the list out with SPAM already in it, since we don't care when
-# new messages are added to it.
-my %FOLDERS = ('SPAM'=>1);
-
sub store_message {
my $msg = shift;
my $mhfolder = shift;
# up duplicated. The advantage of creating an empty
# file followed by rename(2) is that an extra empty
# file is left behind as opposed to a duplicate
- # message. This is more easiliy detected by the user.
+ # message. This is more easily detected by the user.
if ($run) {
- if (sysopen(MSG, "$mhmsg",
+ if (sysopen(MSG, $mhmsg,
O_WRONLY | O_EXCL | O_CREAT, 0600)) {
close(MSG);
last;
# This algorithm is different from the maildir one; let's make
# 10 tries instead of 3.
if ($try == 9) {
- errx(&EX_TEMPFAIL, "Attempted filename $mhmsg exists.");
+ die("Attempted filename $mhmsg exists.");
}
# This algorithm is different; i don't think we need to sleep.
}
if ($mhfolder ne 'SPAM') {
- logsave("+$mhfolder");
+ mylog('+', $mhfolder);
}
if ($run) {
if (not rename($msg, $mhmsg)) {
- err(&EX_OSERR, "Failed rename($msg, $mhmsg)");
+ die("rename($msg, $mhmsg): $!");
}
# Mark each message as soon as we store it and bomb if that
# fails. While it is slow, it is not safe to store multiple
# messages and then have a failure before marking some (or
# all).
- if ($mhfolder ne 'SPAM') {
+ if ($mhfolder eq 'SPAM') {
+ push(@SPAM, $msgnum);
+ } else {
$status = system('mark', "+$mhfolder", "$msgnum", '-sequence',
'unseen', '-add');
+ # XXX need to handle signalled and stopped, and print
+ # the exit code or signal number.
if (not WIFEXITED($status)) {
- err(&EX_OSERR, "Failed to run mark");
+ die("Failed to run mark");
} elsif (WEXITSTATUS($status) != 0) {
- errx(&EX_SOFTWARE, "Failed to mark message unseen.");
+ die("Failed to mark message unseen.");
}
}
}
- if (not $FOLDERS{$mhfolder}) {
- print("+$mhfolder\n");
- $FOLDERS{$mhfolder} = 1;
- }
+ return $msgnum;
}
# Parse a message file into a structure describing the headers. The
my $fieldname; # unmolested header name
my $contents; # contents of header
- open(MSG, $msg);
+ open(MSG, $msg) or die("open(MSG, $msg): $!");
while (<MSG>) {
chomp;
if (length == 0) {
# folded header continuation
if (not defined($current)) {
- print(STDERR "Malformed message, cannot parse headers.\n");
- return ();
+ warn('Malformed message, cannot parse headers.');
+ next;
}
@{$headers{$current}}[-1] .= $_;
return %headers;
}
-###############################################################################
-# Spam handling, sorting, etc.
-
-sub preprocess {
- my @msglist = @_;
- my @baton;
- my $msg;
- my $i;
- my @result;
-
- @baton = spam_start_hook();
-
- foreach $msg (@msglist) {
- printf('%sChecking for spam... %6d/%d',
- "\r", ++$i, scalar(@msglist));
- if ($printfilenames) {
- print($msg);
- }
- if (spam_check($msg, @baton)) {
- print(" SPAM\n");
- store_message($msg, 'SPAM');
- } else {
- push(@result, $msg);
- }
- }
- print("\nDone: ", scalar(@result), " survivors\n");
-
- spam_stop_hook(@baton);
-
- return @result;
-}
-
\f
###############################################################################
# Filtering
sub find_mh_folder {
my $msg = shift;
- my %headers;
+ my %headers = @_;
my $filterref;
my @filter;
my $header;
my $expression;
my $result;
- %headers = get_headers($msg);
if (not %headers) {
return 'malformed';
}
- log_headers(%headers);
-
# Walk the list of filters. This structure is documented in
# pod at the end of the program.
foreach $filterref (@FILTERS) {
sub filter_mail {
my @msglist = @_;
+ my $msgcount = @msglist - 2; # don't count . and ..
+ my $len = length($msgcount);
+ my @baton;
my $msg;
my $mhfolder;
+ my $spam = 0;
+ my $saved = 0;
+ my $msgnum;
+ my %FOLDERS = ('SPAM'=>1);
+
+ # XXX lame names and hard-coded proportions.
+ my $nf = $COLUMNS * 0.1;
+ my $nm = $COLUMNS * 0.0625;
+ my $nF = $COLUMNS * 0.175;
+ my $ns = $COLUMNS - $nf - $nm - $nF - 3;
+
+ if (-f "$HOME/.minc") {
+ require "$HOME/.minc";
+ }
+
+ my %batons;
+ for my $hook (@start_hooks) {
+ my ($handle, @baton) = $hook->();
+ if (defined($handle)) {
+ $batons{$handle} = [@baton];
+ }
+ }
foreach $msg (@msglist) {
- $mhfolder = find_mh_folder($msg);
- store_message($msg, $mhfolder);
+ ($msg eq '.' or $msg eq '..') and next;
+
+ if ($printfilenames) {
+ print("$msg\n");
+ }
+
+ my %headers = get_headers($msg);
+ log_headers(%headers);
+
+ undef($mhfolder);
+ for my $hook (@filter_hooks) {
+ my $result = $hook->(\%batons, \%headers, $msg);
+ defined($result) and ($mhfolder = $result);
+ }
+
+ defined($mhfolder) or ($mhfolder = find_mh_folder($msg, %headers));
+
+ $msgnum = store_message($msg, $mhfolder);
+
+ if ($mhfolder eq 'SPAM') {
+ $spam++;
+ } else {
+ $saved++;
+ print("\r");
+ print(' ' x $COLUMNS);
+ printf("\r\%-${nf}s \%${nm}d \%-${nF}s \%s\n",
+ substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
+ substr(pop(@{$headers{'from'}}), 0, $nF),
+ substr(pop(@{$headers{'subject'}}), 0, $ns))
+ }
+
+ for my $hook (@post_store_hooks) {
+ $hook->(\%batons, \%headers, $mhfolder, $msgnum);
+ }
+
+ printf(" \%${len}d SPAM \%${len}d saved \%${len}d/%1d",
+ $spam, $saved, $spam + $saved, $msgcount);
+
+ defined($maxmsgs) and ($spam + $saved < $maxmsgs or last);
+ }
+ print("\n");
+
+ for my $hook (@stop_hooks) {
+ $hook->(\%batons);
}
}
\f
MAIN: {
- my @msglist;
+ my $st;
if ($dumpfilters) {
+ require "$HOME/.minc";
$Data::Dumper::Indent = 1;
print(Dumper(\@FILTERS));
- exit(&EX_OK);
+ exit;
}
- @msglist = getfiles("$MAILDIR/new");
-
- if (not $filteronly) {
- @msglist = preprocess(@msglist);
- }
+ chdir("$MAILDIR/new") or die("chdir($MAILDIR/new): $!");
+ filter_mail(map { $_->[1] }
+ sort { $a->[0] <=> $b->[0] }
+ map {
+ if (not ($st = stat($_))) {
+ die("stat($_): $!");
+ }
+ [$st->mtime, $_]
+ }
+ getfiles('.'));
- if (not $spamonly) {
- filter_mail(@msglist);
- }
+ @SPAM and (exec('scan', '+SPAM', @SPAM) or die);
}
\f
name l/apache/httpd/dev.
For an example B<spam_check> function, see
-L<http:E<047>E<047>pretzelnet.orgE<047>cvsE<047>dotfilesE<047>.mincspam>
+L<http://pretzelnet.org/cvs/dotfiles/.mincspam>
=head1 AUTHORS
-Written by Eric Gillespie <epg@pretzelnet.org> with logging code
-stolen from Adam Lazur <adam@lazur.org>.
+Written by Eric Gillespie <epg@pretzelnet.org>. Design by Eric
+Gillespie and Doug Porter <dsp@waterspout.com>.
-Design by Eric Gillespie and Doug Porter <dsp@waterspout.com>.
+This program is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
=cut