From: epg@google.com <>
Date: Wed, 1 Apr 2009 23:40:27 +0000 (-0700)
Subject: Implement custom scan of SPAM messages instead of using scan(1).
X-Git-Url: https://diplodocus.org/git/minc/commitdiff_plain/6d0449f39ed28ad05afe9806afc664d6bc204fc6?ds=inline;hp=fbeff8e387d16985ccfbacd67f013703a32301f0

Implement custom scan of SPAM messages instead of using scan(1).

If the header of a message contains garbage characters (not 2047-quoted
garbage characters, actual multi-byte garbage!) (Chinese or Russian spam)
scan(1) will happily print them to the tty, which hurts.

(%SPAM): Replace @SPAM global list with a hash.
(store_message): Drop unused $status variable.  Don't update @SPAM .
(scan_line): Factor out the actual scan line formatting from filter_mail to
  this new function.
(filter_mail): Append message number and header hash to %SPAM .
(maildir_spam): Append message number and spam maildir file name to %SPAM .
(scan_spam): Add function to scan spam messages with scan_line .  Deal with 3
  cases: spam filtered via filter_mail and spam filtered via maildir_spam in
  either run or -n mode.  For the latter two, open the message file from the
  mh SPAM folder or from the spam maildir, respectively, to load the header.
---

diff --git a/minc b/minc
index eec56e4..9b6f791 100755
--- a/minc
+++ b/minc
@@ -54,9 +54,13 @@ our $VERSION = 1;
 my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)';
 my $MAGIC_TO_TOKEN = ' TO';
 
-# List of SPAM message numbers, scanned at the end so the user can
-# check for false positives.
-my @SPAM;
+# XXX re-document
+
+# Mapping of message numbers to array references.  The first element is set by
+# filter_mail to a reference to a header hash for the message; the second is
+# set by maildier_spam to the name of the message file in the spam maildir.
+# scan_spam scans this at the end so the user can check for false positives.
+my %SPAM;
 
 =head1 OPTIONS
 
@@ -417,7 +421,6 @@ sub store_message {
     my $msgnum;
     my $try;
     my $mhmsg;
-    my $status;
 
     # We must do this even in -n mode because later steps fail without
     # it.  This should be harmless.
@@ -482,9 +485,7 @@ sub store_message {
         # fails.  While it is slow, it is not safe to store multiple
         # messages and then have a failure before marking some (or
         # all).
-        if ($mhfolder eq 'SPAM') {
-            push(@SPAM, $msgnum);
-        } else {
+        if ($mhfolder ne 'SPAM') {
             mark($mhfolder, $msgnum, 'unseen');
         }
     }
@@ -606,6 +607,23 @@ sub find_mh_folder {
     return 'inbox';
 }
 
+sub scan_line {
+    my ($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns) = @_;
+    my $from = '';
+    my $subject = '';
+    # Sometimes these headers are missing...
+    eval { $from = [@{$headers->{'from'}}]->[-1] };
+    eval { $subject = [@{$headers->{'subject'}}]->[-1] };
+    # Replace garbage characters.
+    for ($from, $subject) {
+        tr/\x00-\x1f\x80-\xff/?/;
+    }
+    return sprintf("\%-${nf}s \%${nm}d \%-${nF}s \%s",
+                   substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
+                   substr($from, 0, $nF),
+                   substr($subject, 0, $ns));
+}
+
 sub filter_mail {
     @_ or return ();
     my $msgcount = @_ - 2; # don't count . and ..
@@ -665,20 +683,11 @@ sub filter_mail {
 
         if ($mhfolder eq 'SPAM') {
             $spam++;
+            $SPAM{$msgnum} = [\%headers, undef];
         } else {
             $saved++;
-            my $from = '';
-            my $subject = '';
-            # Sometimes these headers are missing...
-            eval { $from = [@{$headers{'from'}}]->[-1] };
-            eval { $subject = [@{$headers{'subject'}}]->[-1] };
-            for ($from, $subject) {
-                tr/\x00-\x1f\x80-\xff/ /;
-            }
-            printf("\%-${nf}s \%${nm}d \%-${nF}s \%s\n",
-                   substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
-                   substr($from, 0, $nF),
-                   substr($subject, 0, $ns));
+            print(scan_line(\%headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns),
+                  "\n");
         }
 
         for my $hook (@post_store_hooks) {
@@ -783,7 +792,50 @@ sub maildir_spam {
 
     for my $msg (@spams) {
         ($msg eq '.' or $msg eq '..') and next;
-        store_message($msg, 'SPAM');
+        my $msgnum = store_message($msg, 'SPAM');
+        # Store the original file name for scan_spam in -n mode.
+        $SPAM{$msgnum} = [undef, $msg];
+    }
+}
+
+sub scan_spam {
+    my ($msgnum, %header, $tuple, $msg);
+
+    # Unlike filter_mail, we don't need to print the folder name.
+    # Calculate how many columns would be allocated to it...
+    my $nf = int($COLUMNS * $SCAN_P_FOLDER);
+    # ...and add that amount to COLUMNS to calculate the number of columns to
+    # allocate to msgnum and from snippet, thus filling the line without
+    # printing the folder name.
+    my $nm = int(($COLUMNS + $nf) * $SCAN_P_MESSAGE);
+    my $nF = int(($COLUMNS + $nf) * $SCAN_P_FROM);
+    my $ns = $COLUMNS - $nm - $nF - 3;
+
+    for $msgnum (sort(keys(%SPAM))) {
+        $tuple = $SPAM{$msgnum};
+        if (defined($tuple->[0])) {
+            # Filed by filter_mail, so we have the header.
+            %header = %{$tuple->[0]};
+        } elsif (defined($tuple->[1])) {
+            # Filed by maildir_spam, so we don't have the header.
+            if ($run) {
+                # The message has been filed, load it from $mh.
+                $msg = "$mh/SPAM/$msgnum";
+            } else {
+                # The message has not been filed, load it from the maildir.
+                # $tuple->[1] is just a basename, not a path; this works
+                # because maildir_spam did chdir(Maildir/spam/new).
+                $msg = $tuple->[1];
+            }
+            %header = get_headers($msg);
+        } else {
+            print(STDERR
+                  "BUG: corrupt SPAM tuple, neither element defined",
+                  " for message $msgnum\n");
+            next;
+        }
+        print(scan_line(\%header, '', $msgnum, 0, $nm, $nF, $ns),
+              "\n");
     }
 }
 
@@ -814,8 +866,7 @@ MAIN: {
     $run and %folders and update_dot_folders(\%folders);
 
     maildir_spam();
-
-    @SPAM and (exec('scan', '+SPAM', @SPAM) or die);
+    scan_spam();
 }