]>
diplodocus.org Git - minc/blob - minc
7 B<minc> - Incorporate mail from a maildir into mh folders.
15 B<minc> is a program for incorporating mail from a maildir to a mh
16 folder hierarchy. It takes mail from a maildir folder (not a maildir
17 folder hierarchy), checks for spam with razor (XXX crossref), and
18 optionally filters mail into separate mh folders.
20 The filtering is quite sophisticated, as it is done using real Perl
21 substitution (s//) commands.
28 require 'sysexits.ph';
34 use Log
::Dispatch
::File
;
35 use POSIX
qw(strftime WEXITSTATUS WIFEXITED);
43 Dump (using Data::Dumper) the FILTERS hash and exit. This is useful
44 for testing the syntax of .mincfilter.
52 Dry run; do not actually incorporate the mail, but log and report to
53 stdout/stderr as normal.
57 Process SPAM only, then exit, leaving all non-spam messages in the
71 our $opt_s; # ;; # stupid cperl-mode
73 if (!getopts
('dhns')) {
80 print("Sorry bub, no help.\n");
86 if ($opt_s) { # ))){ # stupid cperl-mode
96 Where configuration files (.mincfilter) are found. Also,
97 $HOME/Maildir is used for the maildir if MAILDIR is not set.
101 Where mail is delivered.
107 use Env
qw(HOME MAILDIR);
110 die("HOME environment variable must be set.\n");
113 $MAILDIR = "$HOME/Maildir";
120 =item $HOME/.mincfilter
122 This file is Perl code (included via the 'require' directive) which is
123 expected to define the FILTERS hash.
125 =item `mhpath +`/logs/minc.log
127 Where minc logs what it does, unless in -n mode.
129 =item `mhpath +`/logs/dryrun.log
131 Where minc logs what it would do; used in -n mode.
136 require "$HOME/.mincfilter";
145 $logfile = $mh . '/logs/minc.log';
147 $logfile = $mh . '/logs/dryrun.log';
151 ###############################################################################
154 # debuglevels for the logger
155 use constant LOG_DEBUGLEVEL
=> 'info';
156 use constant SCREEN_DEBUGLEVEL
=> 'debug';
158 # characters used to wrap around the id field in the log so I can more
159 # easily parse the msg flow
160 use constant DEBUGCHAR
=> '%';
161 use constant INFOCHAR
=> '=';
162 use constant INCOMINGCHAR
=> '<';
163 use constant SAVECHAR
=> '>';
164 use constant WARNCHAR
=> 'W';
165 use constant ERRORCHAR
=> 'E';
167 my $logger = new Log
::Dispatch
;
168 $logger->add(new Log
::Dispatch
::File
(name
=>'logfile',
173 # log alias, handles getting passed vars that are undef
177 my $timestamp = strftime
('%b %e %H:%M:%S', localtime);
179 foreach my $part (@_) {
180 if (defined($part)) {
184 # no newlines in the log message, thanks
186 $msg = sprintf("%s %s%s %s\n", $timestamp, $act, $act, $msg);
187 $logger->log(level
=>$level, message
=>$msg);
189 if ($act eq SAVECHAR
) {
190 $logger->log(level
=>$level, message
=>"\n");
194 sub logincoming
{ mylog
('info', INCOMINGCHAR
, @_); }
195 sub logsave
{ mylog
('notice', SAVECHAR
, @_); }
196 sub loginfo
{ mylog
('info', INFOCHAR
, @_); }
197 sub logdebug
{ mylog
('debug', DEBUGCHAR
, @_); }
198 sub logwarn
{ mylog
('warning', WARNCHAR
, @_); }
199 sub logerr
{ mylog
('error', ERRORCHAR
, @_); }
204 logincoming
('From: ', $headers{'return-path'});
205 logincoming
('To: ', $headers{'to'});
206 logincoming
('Subject: ', $headers{'subject'});
207 logincoming
('Message-Id: ', $headers{'message-id'});
211 ###############################################################################
215 printf('%s:%s', __FILE__
, __LINE__
);
244 my $mhfolder = shift;
249 $folder = $mh . '/' . $mhfolder;
253 foreach $component (split('/', $folder)) {
254 if (defined($component) and length($component) >= 1) {
255 $target = $target . '/' . $component;
256 if (-d
$target or mkdir($target)) {
260 "Failed to create +$mhfolder ($component)");
267 # We want to print the name of each list that has new mail only once,
268 # so use this hash to store the lists that have already been printed.
269 # Start the list out with SPAM already in it, since we don't care when
270 # new messages are added to it.
271 my %FOLDERS = ('SPAM'=>1);
275 my $mhfolder = shift;
280 # We must do this even in -n mode because later steps fail without
281 # it. This should be harmless.
284 # XXX: Grab the 'mhpath new' algorithm and implement it internally.
285 $mhmsg = `mhpath +$mhfolder new`;
287 $msgnum = basename
($mhmsg);
289 logsave
("+$mhfolder");
292 if (not rename($msg, $mhmsg)) {
293 err
(&EX_TEMPFAIL
, "Failed rename($msg, $mhmsg)");
296 # XXX: Lame! Instead, keep a hash of arrays. Keys are folder
297 # names, array elements are message numbers in that list.
298 # Then after all the messages have been sorted, run one mark
299 # command per folder, marking all messages for that folder in
301 if ($mhfolder ne 'SPAM') {
302 $status = system("mark +$mhfolder $msgnum -sequence unseen -add");
303 if (not (WIFEXITED
($status) and WEXITSTATUS
($status) == 0)) {
304 errx
(&EX_TEMPFAIL
, "Failed to mark message unseen");
309 if (not $FOLDERS{$mhfolder}) {
310 print("+$mhfolder\n");
311 $FOLDERS{$mhfolder} = 1;
318 my $current; # current header, used for unfolding lines
319 my $fieldname; # unmolested header name
329 # folded header continuation
331 if (!defined($current)) {
332 print("Malformed message, cannot parse headers.\n");
336 $headers{$current} .= $_;
338 ($fieldname) = split(/:/);
339 $current = lc($fieldname);
340 (undef, $headers{$current}) = split(/^$fieldname:\s*/);
349 ###############################################################################
359 $err =~ s
|/new/|/err/|;
360 $sig =~ s
|/new/|/sig/|;
370 ($message, $!) = split(/:/, $line);
372 logerr
("$err: $message: $!");
375 if (unlink($err) != 1) {
376 err
(&EX_TEMPFAIL
, "Failed unlink($err)");
380 # This is supposed to be a signature created with razor-check
381 # directly after delivery. Currently this isn't supported
382 # because it isn't clear to me how to get that signature back
383 # into razor-check. For now, just unlink any sig files we
384 # find and proceed with full razor-check mode.
387 if (unlink($sig) != 1) {
388 err
(&EX_TEMPFAIL
, "Failed unlink($sig)");
394 $status = system("razor-check < $msg");
395 if (not WIFEXITED
($status)) {
396 err
(&EX_TEMPFAIL
, "Failed to run razor-check < $msg");
397 } elsif (WEXITSTATUS
($status) == 0) {
416 foreach $msg (@msglist) {
417 printf('%sChecking for spam... %6d/%d',
418 "\r", ++$i, scalar(@msglist));
421 store_message
($msg, 'SPAM');
426 print("\nDone: ", scalar(@result), " survivors\n");
432 ###############################################################################
444 %headers = get_headers
($msg);
449 log_headers
(%headers);
451 foreach $header (keys(%FILTERS)) {
452 $contents = $headers{lc($header)};
454 if (defined($contents)) {
455 foreach $regex (keys(%{$FILTERS{$header}})) {
456 $subst = $FILTERS{$header}->{$regex};
458 $try = '$contents =~';
459 $try .= " s|$regex|$subst|is";
475 foreach $msg (@msglist) {
476 $mhfolder = find_mh_folder
($msg);
477 store_message
($msg, $mhfolder);
486 $Data::Dumper
::Indent
= 1;
487 print(Dumper
(\
%FILTERS));
491 @msglist = kill_spam
(glob("$MAILDIR/new/*"));
494 filter_mail
(@msglist);
504 {'.*<([[:graph:]]+)-([[:graph:]]+)\.lists\.sourceforge\.net>.*'=>'l/$1/$2',
505 '.*<([[:graph:]]+?)(-list)?\.freedesktop\.org>.*'=>'l/freedesktop/$1',
506 '.*<swig\.cs\.uchicago\.edu>.*'=>'l/swig'},
509 {'<mailto:([[:graph:]]+)+@([[:graph:]]+)\.apache\.org>'=>'l/apache/$2/$1'},
514 The first List-Id filter is a surprisingly effective one which creates
515 appropriate folder names (such as l/htdig/updates and l/rox/devel) for
516 all Sourceforge lists to which i subscribe. Certainly there are lists
517 hosted at Sourceforge which do not work well with this, and this is
518 where it is important to remember that the first match is the one
519 uses. Simply put the more specific rules before this one.
521 The next List-Id example is simple. The swig example demonstrates
522 that the folder name does not have to use a portion of the matched
523 header; you can just hardcode the folder name.
525 The List-Post example is a nice one. Most ASF projects have their own
526 apache.org subdomain, with mailing lists hosted there. So, given a
527 list such as dev@httpd.apache.org, this filter will create the folder
528 name l/apache/httpd/dev.
532 Written by Eric Gillespie <epg@pretzelnet.org> with logging code
533 stolen from Adam Lazur <adam@lazur.org>.
535 Design by Eric Gillespie and Doug Porter <dsp@waterspout.com>.