#! /usr/local/bin/perl # $Id$ =head1 NAME B - Incorporate mail from a maildir into mh folders. =head1 SYNOPSIS B [-B] =head1 DESCRIPTION B is a program for incorporating mail from a maildir to a mh folder hierarchy. It takes mail from a maildir folder (not a maildir folder hierarchy), checks for spam with razor (XXX crossref), and optionally filters mail into separate mh folders. The filtering is quite sophisticated, as it is done using real Perl substitution (s//) commands. =cut use strict; use warnings; require 'sysexits.ph'; use Data::Dumper; use File::Basename; use Getopt::Std; use Log::Dispatch; use Log::Dispatch::File; use POSIX qw(strftime WEXITSTATUS WIFEXITED); # debuglevels for the logger use constant LOG_DEBUGLEVEL => 'info'; use constant SCREEN_DEBUGLEVEL => 'debug'; # characters used to wrap around the id field in the log so I can more # easily parse the msg flow use constant DEBUGCHAR => '%'; use constant INFOCHAR => '='; use constant INCOMINGCHAR => '<'; use constant SAVECHAR => '>'; use constant WARNCHAR => 'W'; use constant ERRORCHAR => 'E'; =head1 OPTIONS =over 4 =item B<-d> Dump (using Data::Dumper) the FILTERS hash and exit. This is useful for testing the syntax of .mincfilter. =item B<-h> Show help. =item B<-n> Dry run; do not actually incorporate the mail, but log and report to stdout/stderr as normal. =item B<-s> Process SPAM only, then exit, leaving all non-spam messages in the maildir. =back =cut my $dumpfilters = 0; my $run = 1; my $spamonly = 0; our $opt_d; our $opt_h; our $opt_n; our $opt_s; # ;; # stupid cperl-mode if (!getopts('dhns')) { exit(&EX_USAGE); } if ($opt_d) { $dumpfilters = 1; } elsif ($opt_h) { print("Sorry bub, no help.\n"); exit(&EX_OK); } elsif ($opt_n) { $run = 0; } if ($opt_s) { # ))){ # stupid cperl-mode $spamonly = 1; } =head1 ENVIRONMENT =over 4 =item HOME Where configuration files (.mincfilter) are found. =item MAILDIR Where mail is delivered. =back =cut use Env qw(HOME); use Env qw(MAILDIR); if (not $HOME) { die("HOME environment variable must be set.\n"); } if (not $MAILDIR) { $MAILDIR = "$HOME/Maildir"; } =head1 FILES =over 4 =item $HOME/.mincfilter This file is Perl code (included via the 'require' directive) which is expected to define the FILTERS hash. =item `mhpath +`/logs/minc.log Where minc logs what it does, unless in -n mode. =item `mhpath +`/logs/dryrun.log Where minc logs what it would do; used in -n mode. =cut our %FILTERS; require "$HOME/.mincfilter"; my $mh; my $logfile; $mh = `mhpath +`; chomp($mh); if ($run) { $logfile = $mh . '/logs/minc.log'; } else { $logfile = $mh . '/logs/dryrun.log'; } ############################################################################### # Logging my $logger = new Log::Dispatch; $logger->add(new Log::Dispatch::File (name=>'logfile', filename=>$logfile, min_level=>'info', mode=>'append')); # log alias, handles getting passed vars that are undef sub mylog { my $level = shift; my $act = shift; my $timestamp = strftime('%b %e %H:%M:%S', localtime); my $msg; foreach my $part (@_) { if (defined($part)) { $msg .= $part; } } # no newlines in the log message, thanks $msg =~ s/\n/ /gm; $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg); $logger->log(level=>$level, message=>$msg); if ($act eq SAVECHAR) { $logger->log(level=>$level, message=>"\n"); } } sub logincoming { mylog('info', INCOMINGCHAR, @_); } sub logsave { mylog('notice', SAVECHAR, @_); } sub loginfo { mylog('info', INFOCHAR, @_); } sub logdebug { mylog('debug', DEBUGCHAR, @_); } sub logwarn { mylog('warning', WARNCHAR, @_); } sub logerr { mylog('error', ERRORCHAR, @_); } sub log_headers { my %headers = @_; logincoming('From: ', $headers{'return-path'}); logincoming('To: ', $headers{'to'}); logincoming('Subject: ', $headers{'subject'}); logincoming('Message-Id: ', $headers{'message-id'}); } ############################################################################### # Utility procedures sub _errprint { printf("%s:%s", __FILE__, __LINE__); if (@_) { print(': '); foreach (@_) { print; } } } sub err { my $ex = shift; _errprint(@_); print(": $!\n"); exit($ex); } sub errx { my $ex = shift; _errprint(@_); print("\n"); exit($ex); } sub mkfolder { my $mhfolder = shift; my $folder; my $target; my $component; $folder = $mh . '/' . $mhfolder; $target = ''; if (! -d $folder) { foreach $component (split('/', $folder)) { if (defined($component) and length($component) >= 1) { $target = $target . '/' . $component; if (-d $target or mkdir($target)) { next; } else { err(&EX_TEMPFAIL, "Failed to create +$mhfolder ($component)"); } } } } } # We want to print the name of each list that has new mail only once, # so use this hash to store the lists that have already been printed. # Start the list out with SPAM already in it, since we don't care when # new messages are added to it. my %FOLDERS = ('SPAM'=>1); sub store_message { my $msg = shift; my $mhfolder = shift; my $mhmsg; my $msgnum; # We must do this even in -n mode because later steps fail without # it. This should be harmless. mkfolder($mhfolder); $mhmsg = `mhpath +$mhfolder new`; chomp($mhmsg); $msgnum = basename($mhmsg); logsave("+$mhfolder"); if ($run) { if (not rename($msg, $mhmsg)) { err(&EX_TEMPFAIL, "Failed rename($msg, $mhmsg)"); } if ($mhfolder ne 'SPAM') { system("mark +$mhfolder $msgnum -sequence unseen -add"); } } if (not $FOLDERS{$mhfolder}) { print("+$mhfolder\n"); $FOLDERS{$mhfolder} = 1; } } sub get_headers { my $msg = shift; my %headers; my $current; # current header, used for unfolding lines my $fieldname; # unmolested header name open(MSG, $msg); while () { chomp; if (length == 0) { last; } if (/^\s/) { # folded header continuation if (!defined($current)) { print("Malformed message, cannot parse headers.\n"); return (); } $headers{$current} .= $_; } else { ($fieldname) = split(/:/); $current = lc($fieldname); (undef, $headers{$current}) = split(/^$fieldname:\s*/); } } close(MSG); return %headers; } ############################################################################### # Spam handling sub is_spam { my $msg = shift; my $err; my $sig; my $status; $err = $sig = $msg; $err =~ s|/new/|/err/|; $sig =~ s|/new/|/sig/|; if (-f $err) { my $line; my $message; open(ERR, $err); $line = ; close(ERR); chomp($line); ($message, $!) = split(/:/, $line); logerr("$err: $message: $!"); if ($run) { if (unlink($err) != 1) { err(&EX_TEMPFAIL, "Failed unlink($err)"); } } } elsif (-f $sig) { # This is supposed to be a signature created with razor-check # directly after delivery. Currently this isn't supported # because it isn't clear to me how to get that signature back # into razor-check. For now, just unlink any sig files we # find and proceed with full razor-check mode. if ($run) { if (unlink($sig) != 1) { err(&EX_TEMPFAIL, "Failed unlink($sig)"); } } } if ($run) { $status = system("razor-check < $msg"); if (WIFEXITED($status) and WEXITSTATUS($status) == 0) { return 1; } else { return 0; } } else { return 0; } } sub kill_spam { my @msglist = @_; my @result; my $msg; my $i; @result = (); $i = 0; foreach $msg (@msglist) { printf('%sChecking for spam... %6d/%d', "\r", ++$i, scalar(@msglist)); if (is_spam($msg)) { print(" SPAM\n"); store_message($msg, 'SPAM'); } else { push(@result, $msg); } } print("\nDone: ", scalar(@result), "\n"); return @result; } ############################################################################### # Filtering sub find_mh_folder { my $msg = shift; my %headers; my $header; my $contents; my $regex; my $subst; my $try; %headers = get_headers($msg); if (not %headers) { return 'inbox'; } log_headers(%headers); foreach $header (keys(%FILTERS)) { $header = lc($header); $contents = $headers{$header}; if (defined($contents)) { foreach $regex (keys(%{$FILTERS{$header}})) { $subst = $FILTERS{$header}->{$regex}; $try = '$contents =~'; $try .= " s|$regex|$subst|is"; if (eval $try) { return $contents; } } } } return 'inbox'; } sub filter_mail { my @msglist = @_; my $msg; my $mhfolder; foreach $msg (@msglist) { $mhfolder = find_mh_folder($msg); store_message($msg, $mhfolder); } } MAIN: { my @msglist; if ($dumpfilters) { $Data::Dumper::Indent = 1; print(Dumper(\%FILTERS)); exit(&EX_OK); } @msglist = kill_spam(glob("$MAILDIR/new/*")); if (not $spamonly) { filter_mail(@msglist); } } __END__ =head1 EXAMPLES %FILTERS = ('List-Id:'=> {'.*<([[:graph:]]+)-([[:graph:]]+)\.lists\.sourceforge\.net>.*'=>'l/$1/$2', '.*<([[:graph:]]+?)(-list)?\.freedesktop\.org>.*'=>'l/freedesktop/$1', '.*.*'=>'l/swig'}, 'List-Post:'=> {''=>'l/apache/$2/$1'}, 'To:'=> {'root'=>'root'}); The first List-Id filter is a surprisingly effective one which creates appropriate folder names (such as l/htdig/updates and l/rox/devel) for all Sourceforge lists to which i subscribe. Certainly there are lists hosted at Sourceforge which do not work well with this, and this is where it is important to remember that the first match is the one uses. Simply put the more specific rules before this one. The next List-Id example is simple. The swig example demonstrates that the folder name does not have to use a portion of the matched header; you can just hardcode the folder name. The List-Post example is a nice one. Most ASF projects have their own apache.org subdomain, with mailing lists hosted there. So, given a list such as dev@httpd.apache.org, this filter will create the folder name l/apache/httpd/dev. =head1 AUTHORS Written by Eric Gillespie with logging code stolen from Adam Lazur . Design by Eric Gillespie and Doug Porter . =cut