From: epg <> Date: Sun, 25 Aug 2002 08:55:35 +0000 (+0000) Subject: Add minc program, based on an earlier minc which was not developed X-Git-Url: https://diplodocus.org/git/minc/commitdiff_plain/a7d1112ba54d41c787b184a3941ddb566d546842?hp=a0a3c6772f470a20fbd082d7fd5b0e7318bb6bda Add minc program, based on an earlier minc which was not developed very far. --- diff --git a/README b/README new file mode 100644 index 0000000..b3b33db --- /dev/null +++ b/README @@ -0,0 +1,13 @@ +$Id$ + +minc requires the following non-standard Perl modules and headers: + +Log::Dispatch +sysexits.ph + +Log::Dispatch is available on CPAN. You will almost certainly have to +generate sysexits.ph from yourself (using h2ph(1)). +While is not part of POSIX, it is common enough (i have +yet to encounter the UNIX system which does not have it). If you +cannot find a header, you may simply define appropriate +values for the EX_* symbols used by minc. diff --git a/minc b/minc new file mode 100644 index 0000000..931dd26 --- /dev/null +++ b/minc @@ -0,0 +1,456 @@ +#! /usr/local/bin/perl + +# $Id$ + +=head1 NAME + +B - Incorporate mail from a maildir into mh folders. + +=head1 SYNOPSIS + +B [-B] + +=head1 DESCRIPTION + +B is a program for incorporating mail from a maildir to a mh +folder hierarchy. It takes mail from a maildir folder (not a maildir +folder hierarchy), checks for spam with razor (XXX crossref), and +optionally filters mail into separate mh folders. + +The filtering is quite sophisticated, as it is done using real Perl +substitution (s//) commands. + +=cut + +use strict; +use warnings; + +require 'sysexits.ph'; + +use Data::Dumper; +use File::Basename; +use Getopt::Std; +use Log::Dispatch; +use Log::Dispatch::File; +use POSIX qw(strftime WEXITSTATUS WIFEXITED); + +# debuglevels for the logger +use constant LOG_DEBUGLEVEL => 'info'; +use constant SCREEN_DEBUGLEVEL => 'debug'; + +# characters used to wrap around the id field in the log so I can more +# easily parse the msg flow +use constant DEBUGCHAR => '%'; +use constant INFOCHAR => '='; +use constant INCOMINGCHAR => '<'; +use constant SAVECHAR => '>'; +use constant WARNCHAR => 'W'; +use constant ERRORCHAR => 'E'; + +=head1 OPTIONS + +=over 4 + +=item B<-d> + +Dump (using Data::Dumper) the FILTERS hash and exit. This is useful +for testing the syntax of .mincfilter. + +=item B<-h> + +Show help. + +=item B<-n> + +Dry run; do not actually incorporate the mail, but log and report to +stdout/stderr as normal. + +=back + +=cut + +my $dumpfilters = 0; +my $run = 1; +our $opt_d; +our $opt_h; +our $opt_n; +if (!getopts('dhn')) { + exit(&EX_USAGE); +} +if ($opt_d) { + $dumpfilters = 1; +} elsif ($opt_h) { + print("Sorry bub, no help.\n"); + exit(&EX_OK); +} elsif ($opt_n) { + $run = 0; +} + +=head1 ENVIRONMENT + +=over 4 + +=item HOME + +Where configuration files (.mincfilter) are found. + +=item MAILDIR + +Where mail is delivered. + +=back + +=cut + +use Env qw(HOME); +use Env qw(MAILDIR); + +=head1 FILES + +=over 4 + +=item $HOME/.mincfilter + +This file is Perl code (included via the 'require' directive) which is +expected to define the FILTERS hash. + +=item `mhpath +`/logs/minc.log + +Where minc logs what it does, unless in -n mode. + +=item `mhpath +`/logs/dryrun.log + +Where minc logs what it would do; used in -n mode. + +=cut + +our %FILTERS; +require "$HOME/.mincfilter"; + +my $mh; +my $logfile; + +$mh = `mhpath +`; +chomp($mh); + +if ($run) { + $logfile = $mh . '/logs/minc.log'; +} else { + $logfile = $mh . '/logs/dryrun.log'; +} + + +############################################################################### +# Logging + +my $logger = new Log::Dispatch; +$logger->add(new Log::Dispatch::File (name=>'logfile', + filename=>$logfile, + min_level=>'info', + mode=>'append')); + +# log alias, handles getting passed vars that are undef +sub mylog { + my $level = shift; + my $act = shift; + my $timestamp = strftime('%b %e %H:%M:%S', localtime); + my $msg; + foreach my $part (@_) { + if (defined($part)) { + $msg .= $part; + } + } + # no newlines in the log message, thanks + $msg =~ s/\n/ /gm; + $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg); + $logger->log(level=>$level, message=>$msg); + + if ($act eq SAVECHAR) { + $logger->log(level=>$level, message=>"\n"); + } +} + +sub logincoming { mylog('info', INCOMINGCHAR, @_); } +sub logsave { mylog('notice', SAVECHAR, @_); } +sub loginfo { mylog('info', INFOCHAR, @_); } +sub logdebug { mylog('debug', DEBUGCHAR, @_); } +sub logwarn { mylog('warning', WARNCHAR, @_); } +sub logerr { mylog('error', ERRORCHAR, @_); } + +sub log_headers { + my %headers = @_; + + logincoming('From: ', $headers{'Return-Path'}); + logincoming('To: ', $headers{'To'}); + logincoming('Subject: ', $headers{'Subject'}); + logincoming('Message-Id: ', $headers{'Message-Id'}); +} + + +############################################################################### +# Utility procedures + +sub mkfolder { + my $mhfolder = shift; + my $folder; + my $target; + my $component; + + $folder = $mh . '/' . $mhfolder; + $target = ''; + + if (! -d $folder) { + foreach $component (split('/', $folder)) { + if (defined($component) and length($component) >= 1) { + $target = $target . '/' . $component; + if (-d $target or mkdir($target)) { + next; + } else { + die("Failed to create +$mhfolder ($component): $!\n"); + } + } + } + } +} + +# We want to print the name of each list that has new mail only once, +# so use this hash to store the lists that have already been printed. +# Start the list out with SPAM already in it, since we don't care when +# new messages are added to it. +my %FOLDERS = ('SPAM'=>1); + +sub store_message { + my $msg = shift; + my $mhfolder = shift; + my $mhmsg; + my $msgnum; + + mkfolder($mhfolder); + + $mhmsg = `mhpath +$mhfolder new`; + chomp($mhmsg); + $msgnum = basename($mhmsg); + + logsave("+$mhfolder"); + + if ($run) { + if (not rename($msg, $mhmsg)) { + die("Rename failed: $!\n"); + } + + if ($mhfolder ne 'SPAM') { + system("mark +$mhfolder $msgnum -sequence unseen -add"); + } + } + + if (not $FOLDERS{$mhfolder}) { + print("+$mhfolder\n"); + $FOLDERS{$mhfolder} = 1; + } +} + +sub get_headers { + my $msg = shift; + my %headers; + my $current; # current header, used for unfolding lines + + open(MSG, $msg); + while () { + chomp; + if (length == 0) { + last; + } + + if (/^\s/) { + # folded header continuation + + if (!defined($current)) { + # XXX: Malformed message; log to stderr + return undef; + } + + $headers{$current} .= $_; + } else { + ($current) = split(/:/); + + # XXX: Is it correct simply to eat all whitespace between + # the colon and the first text for the header? + # Furthermore, is any space at all required? Or is + # '^Subject:hey$' a perfectly valid header? + (undef, $headers{$current}) = split(/^$current:\s*/); + } + } + close(MSG); + + return %headers; +} + + +############################################################################### +# Spam handling + +sub is_spam { + my $msg = shift; + my $err; + my $sig; + my $status; + + $err = $sig = $msg; + $err =~ s|/new/|/err/|; + $sig =~ s|/new/|/sig/|; + if (-f $err) { + my $line; + my $message; + + open(ERR, $err); + $line = ; + close(ERR); + + chomp($line); + ($message, $!) = split(/:/, $line); + + logerr("$err: $message: $!"); + + if (unlink($err) != 1) { + die("Failed unlink($err): $!\n"); + } + } elsif (-f $sig) { + # This is supposed to be a signature created with razor-check + # directly after delivery. Currently this isn't supported + # because it isn't clear to me how to get that signature back + # into razor-check. For now, just unlink any sig files we + # find and proceed with full razor-check mode. + + if (unlink($sig) != 1) { + die("Failed unlink($sig): $!\n"); + } + } + + #$status = system("razor-check < $msg"); + $status = system('false'); + if (WIFEXITED($status) and WEXITSTATUS($status) == 0) { + return 1; + } else { + return 0; + } +} + +sub kill_spam { + my @msglist = @_; + my @result; + my $msg; + + @result = (); + + foreach $msg (@msglist) { + if (is_spam($msg)) { + store_message($msg, 'SPAM'); + } else { + push(@result, $msg); + } + } + + return @result; +} + + +############################################################################### +# Filtering + +sub find_mh_folder { + my $msg = shift; + my %headers; + my $header; + my $contents; + my $regex; + my $subst; + my $try; + + %headers = get_headers($msg); + + log_headers(%headers); + + foreach $header (keys(%FILTERS)) { + $contents = $headers{$header}; + + if (defined($contents)) { + foreach $regex (keys(%{$FILTERS{$header}})) { + $subst = $FILTERS{$header}->{$regex}; + + $try = '$contents =~'; + $try .= " s|$regex|$subst|is"; + if (eval $try) { + return $contents; + } + } + } + } + + return 'inbox'; +} + +sub filter_mail { + my @msglist = @_; + my $msg; + my $mhfolder; + + foreach $msg (@msglist) { + $mhfolder = find_mh_folder($msg); + store_message($msg, $mhfolder); + } +} + + +MAIN: { + my @msglist; + + if ($dumpfilters) { + $Data::Dumper::Indent = 1; + print(Dumper(\%FILTERS)); + exit(&EX_OK); + } + + @msglist = kill_spam(glob("$MAILDIR/new/*")); + filter_mail(@msglist); +} + +__END__ + +=head1 EXAMPLES + +%FILTERS = + ('List-Id:'=> + {'.*<([[:graph:]]+)-([[:graph:]]+)\.lists\.sourceforge\.net>.*'=>'l/$1/$2', + '.*<([[:graph:]]+?)(-list)?\.freedesktop\.org>.*'=>'l/freedesktop/$1', + '.*.*'=>'l/swig'}, + + 'List-Post:'=> + {''=>'l/apache/$2/$1'}, + + 'To:'=> + {'root'=>'root'}); + +The first List-Id filter is a surprisingly effective one which creates +appropriate folder names (such as l/htdig/updates and l/rox/devel) for +all Sourceforge lists to which i subscribe. Certainly there are lists +hosted at Sourceforge which do not work well with this, and this is +where it is important to remember that the first match is the one +uses. Simply put the more specific rules before this one. + +The next List-Id example is simple. The swig example demonstrates +that the folder name does not have to use a portion of the matched +header; you can just hardcode the folder name. + +The List-Post example is a nice one. Most ASF projects have their own +apache.org subdomain, with mailing lists hosted there. So, given a +list such as dev@httpd.apache.org, this filter will create the folder +name l/apache/httpd/dev. + +=head1 AUTHORS + +Written by Eric Gillespie with logging code +stolen from Adam Lazur . + +Design by Eric Gillespie and Doug Porter . + +=cut