--- /dev/null
+#! /usr/local/bin/perl
+
+# $Id$
+
+=head1 NAME
+
+B<minc> - Incorporate mail from a maildir into mh folders.
+
+=head1 SYNOPSIS
+
+B<minc> [-B<dhn>]
+
+=head1 DESCRIPTION
+
+B<minc> is a program for incorporating mail from a maildir to a mh
+folder hierarchy. It takes mail from a maildir folder (not a maildir
+folder hierarchy), checks for spam with razor (XXX crossref), and
+optionally filters mail into separate mh folders.
+
+The filtering is quite sophisticated, as it is done using real Perl
+substitution (s//) commands.
+
+=cut
+
+use strict;
+use warnings;
+
+require 'sysexits.ph';
+
+use Data::Dumper;
+use File::Basename;
+use Getopt::Std;
+use Log::Dispatch;
+use Log::Dispatch::File;
+use POSIX qw(strftime WEXITSTATUS WIFEXITED);
+
+# debuglevels for the logger
+use constant LOG_DEBUGLEVEL => 'info';
+use constant SCREEN_DEBUGLEVEL => 'debug';
+
+# characters used to wrap around the id field in the log so I can more
+# easily parse the msg flow
+use constant DEBUGCHAR => '%';
+use constant INFOCHAR => '=';
+use constant INCOMINGCHAR => '<';
+use constant SAVECHAR => '>';
+use constant WARNCHAR => 'W';
+use constant ERRORCHAR => 'E';
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-d>
+
+Dump (using Data::Dumper) the FILTERS hash and exit. This is useful
+for testing the syntax of .mincfilter.
+
+=item B<-h>
+
+Show help.
+
+=item B<-n>
+
+Dry run; do not actually incorporate the mail, but log and report to
+stdout/stderr as normal.
+
+=back
+
+=cut
+
+my $dumpfilters = 0;
+my $run = 1;
+our $opt_d;
+our $opt_h;
+our $opt_n;
+if (!getopts('dhn')) {
+ exit(&EX_USAGE);
+}
+if ($opt_d) {
+ $dumpfilters = 1;
+} elsif ($opt_h) {
+ print("Sorry bub, no help.\n");
+ exit(&EX_OK);
+} elsif ($opt_n) {
+ $run = 0;
+}
+
+=head1 ENVIRONMENT
+
+=over 4
+
+=item HOME
+
+Where configuration files (.mincfilter) are found.
+
+=item MAILDIR
+
+Where mail is delivered.
+
+=back
+
+=cut
+
+use Env qw(HOME);
+use Env qw(MAILDIR);
+
+=head1 FILES
+
+=over 4
+
+=item $HOME/.mincfilter
+
+This file is Perl code (included via the 'require' directive) which is
+expected to define the FILTERS hash.
+
+=item `mhpath +`/logs/minc.log
+
+Where minc logs what it does, unless in -n mode.
+
+=item `mhpath +`/logs/dryrun.log
+
+Where minc logs what it would do; used in -n mode.
+
+=cut
+
+our %FILTERS;
+require "$HOME/.mincfilter";
+
+my $mh;
+my $logfile;
+
+$mh = `mhpath +`;
+chomp($mh);
+
+if ($run) {
+ $logfile = $mh . '/logs/minc.log';
+} else {
+ $logfile = $mh . '/logs/dryrun.log';
+}
+
+\f
+###############################################################################
+# Logging
+
+my $logger = new Log::Dispatch;
+$logger->add(new Log::Dispatch::File (name=>'logfile',
+ filename=>$logfile,
+ min_level=>'info',
+ mode=>'append'));
+
+# log alias, handles getting passed vars that are undef
+sub mylog {
+ my $level = shift;
+ my $act = shift;
+ my $timestamp = strftime('%b %e %H:%M:%S', localtime);
+ my $msg;
+ foreach my $part (@_) {
+ if (defined($part)) {
+ $msg .= $part;
+ }
+ }
+ # no newlines in the log message, thanks
+ $msg =~ s/\n/ /gm;
+ $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg);
+ $logger->log(level=>$level, message=>$msg);
+
+ if ($act eq SAVECHAR) {
+ $logger->log(level=>$level, message=>"\n");
+ }
+}
+
+sub logincoming { mylog('info', INCOMINGCHAR, @_); }
+sub logsave { mylog('notice', SAVECHAR, @_); }
+sub loginfo { mylog('info', INFOCHAR, @_); }
+sub logdebug { mylog('debug', DEBUGCHAR, @_); }
+sub logwarn { mylog('warning', WARNCHAR, @_); }
+sub logerr { mylog('error', ERRORCHAR, @_); }
+
+sub log_headers {
+ my %headers = @_;
+
+ logincoming('From: ', $headers{'Return-Path'});
+ logincoming('To: ', $headers{'To'});
+ logincoming('Subject: ', $headers{'Subject'});
+ logincoming('Message-Id: ', $headers{'Message-Id'});
+}
+
+\f
+###############################################################################
+# Utility procedures
+
+sub mkfolder {
+ my $mhfolder = shift;
+ my $folder;
+ my $target;
+ my $component;
+
+ $folder = $mh . '/' . $mhfolder;
+ $target = '';
+
+ if (! -d $folder) {
+ foreach $component (split('/', $folder)) {
+ if (defined($component) and length($component) >= 1) {
+ $target = $target . '/' . $component;
+ if (-d $target or mkdir($target)) {
+ next;
+ } else {
+ die("Failed to create +$mhfolder ($component): $!\n");
+ }
+ }
+ }
+ }
+}
+
+# We want to print the name of each list that has new mail only once,
+# so use this hash to store the lists that have already been printed.
+# Start the list out with SPAM already in it, since we don't care when
+# new messages are added to it.
+my %FOLDERS = ('SPAM'=>1);
+
+sub store_message {
+ my $msg = shift;
+ my $mhfolder = shift;
+ my $mhmsg;
+ my $msgnum;
+
+ mkfolder($mhfolder);
+
+ $mhmsg = `mhpath +$mhfolder new`;
+ chomp($mhmsg);
+ $msgnum = basename($mhmsg);
+
+ logsave("+$mhfolder");
+
+ if ($run) {
+ if (not rename($msg, $mhmsg)) {
+ die("Rename failed: $!\n");
+ }
+
+ if ($mhfolder ne 'SPAM') {
+ system("mark +$mhfolder $msgnum -sequence unseen -add");
+ }
+ }
+
+ if (not $FOLDERS{$mhfolder}) {
+ print("+$mhfolder\n");
+ $FOLDERS{$mhfolder} = 1;
+ }
+}
+
+sub get_headers {
+ my $msg = shift;
+ my %headers;
+ my $current; # current header, used for unfolding lines
+
+ open(MSG, $msg);
+ while (<MSG>) {
+ chomp;
+ if (length == 0) {
+ last;
+ }
+
+ if (/^\s/) {
+ # folded header continuation
+
+ if (!defined($current)) {
+ # XXX: Malformed message; log to stderr
+ return undef;
+ }
+
+ $headers{$current} .= $_;
+ } else {
+ ($current) = split(/:/);
+
+ # XXX: Is it correct simply to eat all whitespace between
+ # the colon and the first text for the header?
+ # Furthermore, is any space at all required? Or is
+ # '^Subject:hey$' a perfectly valid header?
+ (undef, $headers{$current}) = split(/^$current:\s*/);
+ }
+ }
+ close(MSG);
+
+ return %headers;
+}
+
+\f
+###############################################################################
+# Spam handling
+
+sub is_spam {
+ my $msg = shift;
+ my $err;
+ my $sig;
+ my $status;
+
+ $err = $sig = $msg;
+ $err =~ s|/new/|/err/|;
+ $sig =~ s|/new/|/sig/|;
+ if (-f $err) {
+ my $line;
+ my $message;
+
+ open(ERR, $err);
+ $line = <ERR>;
+ close(ERR);
+
+ chomp($line);
+ ($message, $!) = split(/:/, $line);
+
+ logerr("$err: $message: $!");
+
+ if (unlink($err) != 1) {
+ die("Failed unlink($err): $!\n");
+ }
+ } elsif (-f $sig) {
+ # This is supposed to be a signature created with razor-check
+ # directly after delivery. Currently this isn't supported
+ # because it isn't clear to me how to get that signature back
+ # into razor-check. For now, just unlink any sig files we
+ # find and proceed with full razor-check mode.
+
+ if (unlink($sig) != 1) {
+ die("Failed unlink($sig): $!\n");
+ }
+ }
+
+ #$status = system("razor-check < $msg");
+ $status = system('false');
+ if (WIFEXITED($status) and WEXITSTATUS($status) == 0) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+sub kill_spam {
+ my @msglist = @_;
+ my @result;
+ my $msg;
+
+ @result = ();
+
+ foreach $msg (@msglist) {
+ if (is_spam($msg)) {
+ store_message($msg, 'SPAM');
+ } else {
+ push(@result, $msg);
+ }
+ }
+
+ return @result;
+}
+
+\f
+###############################################################################
+# Filtering
+
+sub find_mh_folder {
+ my $msg = shift;
+ my %headers;
+ my $header;
+ my $contents;
+ my $regex;
+ my $subst;
+ my $try;
+
+ %headers = get_headers($msg);
+
+ log_headers(%headers);
+
+ foreach $header (keys(%FILTERS)) {
+ $contents = $headers{$header};
+
+ if (defined($contents)) {
+ foreach $regex (keys(%{$FILTERS{$header}})) {
+ $subst = $FILTERS{$header}->{$regex};
+
+ $try = '$contents =~';
+ $try .= " s|$regex|$subst|is";
+ if (eval $try) {
+ return $contents;
+ }
+ }
+ }
+ }
+
+ return 'inbox';
+}
+
+sub filter_mail {
+ my @msglist = @_;
+ my $msg;
+ my $mhfolder;
+
+ foreach $msg (@msglist) {
+ $mhfolder = find_mh_folder($msg);
+ store_message($msg, $mhfolder);
+ }
+}
+
+\f
+MAIN: {
+ my @msglist;
+
+ if ($dumpfilters) {
+ $Data::Dumper::Indent = 1;
+ print(Dumper(\%FILTERS));
+ exit(&EX_OK);
+ }
+
+ @msglist = kill_spam(glob("$MAILDIR/new/*"));
+ filter_mail(@msglist);
+}
+
+__END__
+
+=head1 EXAMPLES
+
+%FILTERS =
+ ('List-Id:'=>
+ {'.*<([[:graph:]]+)-([[:graph:]]+)\.lists\.sourceforge\.net>.*'=>'l/$1/$2',
+ '.*<([[:graph:]]+?)(-list)?\.freedesktop\.org>.*'=>'l/freedesktop/$1',
+ '.*<swig\.cs\.uchicago\.edu>.*'=>'l/swig'},
+
+ 'List-Post:'=>
+ {'<mailto:([[:graph:]]+)+@([[:graph:]]+)\.apache\.org>'=>'l/apache/$2/$1'},
+
+ 'To:'=>
+ {'root'=>'root'});
+
+The first List-Id filter is a surprisingly effective one which creates
+appropriate folder names (such as l/htdig/updates and l/rox/devel) for
+all Sourceforge lists to which i subscribe. Certainly there are lists
+hosted at Sourceforge which do not work well with this, and this is
+where it is important to remember that the first match is the one
+uses. Simply put the more specific rules before this one.
+
+The next List-Id example is simple. The swig example demonstrates
+that the folder name does not have to use a portion of the matched
+header; you can just hardcode the folder name.
+
+The List-Post example is a nice one. Most ASF projects have their own
+apache.org subdomain, with mailing lists hosted there. So, given a
+list such as dev@httpd.apache.org, this filter will create the folder
+name l/apache/httpd/dev.
+
+=head1 AUTHORS
+
+Written by Eric Gillespie <epg@pretzelnet.org> with logging code
+stolen from Adam Lazur <adam@lazur.org>.
+
+Design by Eric Gillespie and Doug Porter <dsp@waterspout.com>.
+
+=cut