use lkopen_fcntl in mark

[minc] / minc
diff --git a/minc b/minc

index eec56e447881dc8d72b843c32674313290536cd6..f0541594d7d05d76ac4f72959304d28fcd9eba6b 100755 (executable)
--- a/minc
+++ b/minc
@@ -39,14 +39,17 @@ $SIG{'PIPE'} = 'IGNORE';
  
  use Data::Dumper;
  use Errno;
-use Fcntl qw(O_WRONLY O_EXCL O_CREAT);
+use Fcntl;
  use FileHandle;
+use File::FcntlLock;
  use File::Temp qw(tempfile);
  use File::stat;
  use Getopt::Long qw(:config gnu_getopt no_ignore_case);
-use POSIX qw(strftime WEXITSTATUS WIFEXITED);
+use POSIX qw(WIFEXITED WEXITSTATUS WIFSIGNALED WTERMSIG);
  use Pod::Usage;
  
+use constant LOCK_TRIES => 60;
+
  our $VERSION = 1;
  
  # If a filter set's header is $MAGIC_TO_TOKEN, that set is compared
@@ -54,9 +57,11 @@ our $VERSION = 1;
  my $MAGIC_TO_REGEX = '^((Original-)?(Resent-)?(To|Cc|Bcc)|(X-Envelope |Apparently(-Resent)?)-To)';
  my $MAGIC_TO_TOKEN = ' TO';
  
-# List of SPAM message numbers, scanned at the end so the user can
-# check for false positives.
-my @SPAM;
+# Mapping of message numbers to array references.  The first element is set by
+# filter_mail to a reference to a header hash for the message; the second is
+# set by maildier_spam to the name of the message file in the spam maildir.
+# scan_spam scans this at the end so the user can check for false positives.
+my %SPAM;
  
  =head1 OPTIONS
  
@@ -94,6 +99,7 @@ my $maxmsgs;
  my $norun;
  my $rebuild_dot_folders;
  
+if (!caller()) {
  GetOptions(
             'd' => \$dumpfilters,
             'h|help' => \$help,
@@ -103,6 +109,7 @@ GetOptions(
            ) or pod2usage();
  $help and pod2usage(-exitstatus=>0, -verbose=>1);
  @ARGV == 0 or pod2usage();
+}
  
  our $run = !$norun;
  
@@ -154,10 +161,6 @@ may define the FILTERS list, @start_hooks, @filter_hooks,
  B<minc> adds all folders it filters into to this file, which is used
  by lukem's B<new(1)> (XXX need a link).
  
-=item `mhpath +`/logs/minc.log
-
-Where B<minc> logs what it does, unless in -n mode.
-
  =item `mhpath +`/.minc.context
  
  B<minc> uses this file for context (i.e. current folder) instead of
@@ -183,63 +186,26 @@ our $folder_sorter = sub { sort_by_list(@_, @folder_sort_list) };
  
  my $mh;
  
-$mh = `mhpath +`;
+$mh = `mhpath +`; $? >= 0 or die;
  chomp($mh);
-
-my $logfile = $mh . '/logs/minc.log';
+if (!$mh) {
+    die('mhpath did not give MH mail path: ' . exit_msg($?));
+}
  
  $ENV{"MHCONTEXT"} = $mh . '/.minc.context';
  
  \f
  ###############################################################################
-# Logging
-
-sub mylog {
-    $run or return;
-
-    my $timestamp = strftime('%b %e %H:%M:%S', localtime);
-    my $msg;
-    foreach my $part (@_) {
-        if (defined($part)) {
-            $msg .= $part;
-        }
-    }
-    # no newlines in the log message, thanks
-    $msg =~ s/\n/ /gm;
-
-    open(LOG, ">>$logfile") or die("open(>>$logfile): $!");
-    print(LOG "$timestamp $msg\n") or die("print(>>$logfile): $!");
-    close(LOG) or die("close($logfile): $!");
-}
-
-sub logheader {
-    my ($text, @contents) = @_;
-    my $last;
-
-    if (@contents) {
-        $last = $contents[-1];
-    } else {
-        $last = '';
-    }
-
-    mylog('<< ', $text, $last);
-}
-
-sub log_headers {
-    my %headers = @_;
+# Utility procedures
  
-    # For an explanation of the %headers structure, see the
-    # get_headers function below.
-    logheader('From:       ', @{$headers{'return-path'}});
-    logheader('To:         ', @{$headers{'to'}});
-    logheader('Subject:    ', @{$headers{'subject'}});
-    logheader('Message-Id: ', @{$headers{'message-id'}});
+sub exit_msg {
+    my $status = shift;
+    WIFEXITED($status) && return 'exited with status ' . WEXITSTATUS($status);
+    WIFSIGNALED($status) && return 'killed with signal ' . WTERMSIG($status);
+    # WTF
+    "died ($status)";
  }
  
-\f
-###############################################################################
-# Utility procedures
-
  sub sort_by_list {
      my $a = shift;
      my $b = shift;
@@ -352,19 +318,16 @@ sub mark {
      my $folder = shift;
      my $msgnum = shift;
      my $seq = shift;
+
      my $fn = "$mh/$folder/.mh_sequences";
-    my $fh;
-    my @sequences;
+    my ($fh, $e) = lkopen_fcntl($fn, O_RDWR | O_CREAT, 0600);
  
-    if (not open($fh, $fn)) {
-        $!{ENOENT} or die("open($fn): $!");
-    } else {
-        @sequences = <$fh>;
-        chomp(@sequences);
-    }
+    my @sequences = <$fh>;
+    chomp(@sequences);
+
+    truncate($fh, 0) or die("truncate($fn): $!");
  
      my $marked = 0;
-    open($fh, '>', $fn) or die("open(>$fn): $!");
      for $_ (@sequences) {
          if (/^$seq: (.*)/) {
              my @parts;
@@ -411,13 +374,51 @@ sub mark {
      close($fh) or die("close(>$fn): $!");
  }
  
+# Based on nmh's lkopen_fcntl
+# Return 0 for success, errno on failure.
+sub lkopen_fcntl {
+    my $fn = shift;
+    my $access = shift;
+    my $mode = shift;
+    my $errno;
+
+    # The assumption here is that if you open the file for writing, you
+    # need an exclusive lock.
+
+    my $tries = LOCK_TRIES;
+    for (;;) {
+        sysopen(my $fh, $fn, $access, $mode) or die("sysopen($fn): $!");
+
+        my $flk = File::FcntlLock->new;
+        $flk->l_start(0);
+        $flk->l_len(0);
+        $flk->l_type(($access & O_ACCMODE) == O_RDONLY ? F_RDLCK : F_WRLCK);
+        $flk->l_whence(SEEK_SET);
+
+        # Really should only retry on EAGAIN and EINTR...
+        if ($flk->lock($fh, F_SETLK)) {
+            return $fh;
+        }
+
+        $errno = $flk->lock_errno;
+        close($fh) or die("close($fn): $!");
+
+        if (--$tries == 0) {
+            last;
+        }
+        sleep(1);
+    }
+
+    local $! = $errno;
+    die("failed to lock $fn: $!");
+}
+
  sub store_message {
      my $msg = shift;
      my $mhfolder = shift;
      my $msgnum;
      my $try;
      my $mhmsg;
-    my $status;
  
      # We must do this even in -n mode because later steps fail without
      # it.  This should be harmless.
@@ -471,8 +472,6 @@ sub store_message {
          #sleep(2);
      }
  
-    mylog('+', $mhfolder);
-
      if ($run) {
          if (not rename($msg, $mhmsg)) {
              die("rename($msg, $mhmsg): $!");
@@ -482,9 +481,7 @@ sub store_message {
          # fails.  While it is slow, it is not safe to store multiple
          # messages and then have a failure before marking some (or
          # all).
-        if ($mhfolder eq 'SPAM') {
-            push(@SPAM, $msgnum);
-        } else {
+        if ($mhfolder ne 'SPAM') {
              mark($mhfolder, $msgnum, 'unseen');
          }
      }
@@ -538,7 +535,7 @@ sub get_headers {
      }
      close(MSG);
  
-    return %headers;
+    return \%headers;
  }
  
  \f
@@ -547,63 +544,84 @@ sub get_headers {
  
  sub find_mh_folder {
      my $msg = shift;
-    my %headers = @_;
-    my $filterref;
-    my @filter;
-    my $header;
-    my $contents;
-    my $pair;
-    my $regexp;
-    my $expression;
-    my $result;
-
-    if (not %headers) {
+    my $header = shift;
+
+    if (not %$header) {
          return 'malformed';
      }
  
      # Walk the list of filters.  This structure is documented in
      # pod at the end of the program.
-    foreach $filterref (@FILTERS) {
-        @filter = @$filterref;
-        $header = shift(@filter);
-
-        # Handle filters using the magic TO header specially.
-        if ($header eq $MAGIC_TO_TOKEN) {
-            foreach $header (keys(%headers)) {
-                if ($header =~ /$MAGIC_TO_REGEX/i) {
-                    foreach $contents (@{$headers{lc($header)}}) {
-                        foreach $pair (@filter) {
-                            ($regexp, $expression) = @$pair;
-                            if ($contents =~ $regexp) {
-                                if (eval "\$result = \"$expression\"") {
-                                    return $result;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-            # Now that it's been processed specially, skip normal handling.
+    for my $filterref (@FILTERS) {
+        if (ref($filterref) eq 'CODE') {
+            my $m = $filterref->($header, $msg);
+            $m && return $m;
              next;
          }
  
-        # Walk the list of message headers matching the filter's
-        # specified header.
-        foreach $contents (@{$headers{lc($header)}}) {
-            # Walk the filter's list of regexp/expression pairs.
-            foreach $pair (@filter) {
-                ($regexp, $expression) = @$pair;
-                if ($contents =~ $regexp) {
-                    if (eval "\$result = \"$expression\"") {
-                        return $result;
-                    }
+        my $m = match($header, @$filterref);
+        $m && return $m;
+    }
+
+    return 'inbox';
+}
+
+# Test all the header fields against each [regexp, folder-expression] pair.
+sub match {
+    my $header = shift;
+    my $filter_field = shift;
+    my @filters = @_;
+
+    # Handle filters using the magic TO header specially.
+    if ($filter_field eq $MAGIC_TO_TOKEN) {
+        for my $field_name (keys(%$header)) {
+            if ($field_name =~ /$MAGIC_TO_REGEX/i) {
+                my $m = match_one_field($header->{$field_name}, \@filters);
+                $m && return $m;
+            }
+        }
+        # Now that it's been processed specially, skip normal handling.
+        return;
+    }
+
+    # Walk the list of header fields matching the filter's specified header.
+    my $m = match_one_field($header->{lc($filter_field)}, \@filters);
+    $m && return $m;
+}
+
+# Test all the values of one header field against each [regexp,
+# folder-expression] pair.
+sub match_one_field {
+    my $values = shift;
+    my $filters = shift;
+    for my $value (@$values) {
+        for my $pair (@$filters) {
+            my ($regexp, $expression) = @$pair;
+            if ($value =~ $regexp) {
+                my $result;
+                if (eval "\$result = \"$expression\"") {
+                    return $result;
                  }
              }
          }
      }
+}
  
-    return 'inbox';
+sub scan_line {
+    my ($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns) = @_;
+    my $from = '';
+    my $subject = '';
+    # Sometimes these headers are missing...
+    eval { $from = [@{$headers->{'from'}}]->[-1] };
+    eval { $subject = [@{$headers->{'subject'}}]->[-1] };
+    # Replace garbage characters.
+    for ($from, $subject) {
+        tr/\x00-\x1f\x80-\xff/?/;
+    }
+    return sprintf("\%-${nf}s \%${nm}d \%-${nF}s \%s",
+                   substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
+                   substr($from, 0, $nF),
+                   substr($subject, 0, $ns));
  }
  
  sub filter_mail {
@@ -645,16 +663,15 @@ sub filter_mail {
      for $msg (@_) {
          ($msg eq '.' or $msg eq '..') and next;
  
-        my %headers = get_headers($msg);
-        log_headers(%headers);
+        my $headers = get_headers($msg);
  
          undef($mhfolder);
          for my $hook (@filter_hooks) {
-            my $result = $hook->(\%batons, \%headers, $msg);
+            my $result = $hook->(\%batons, $headers, $msg);
              defined($result) and ($mhfolder = $result);
          }
  
-        defined($mhfolder) or ($mhfolder = find_mh_folder($msg, %headers));
+        defined($mhfolder) or ($mhfolder = find_mh_folder($msg, $headers));
  
          $msgnum = store_message($msg, $mhfolder);
          $folders{$mhfolder}++;
@@ -665,24 +682,15 @@ sub filter_mail {
  
          if ($mhfolder eq 'SPAM') {
              $spam++;
+            $SPAM{$msgnum} = [$headers, undef];
          } else {
              $saved++;
-            my $from = '';
-            my $subject = '';
-            # Sometimes these headers are missing...
-            eval { $from = [@{$headers{'from'}}]->[-1] };
-            eval { $subject = [@{$headers{'subject'}}]->[-1] };
-            for ($from, $subject) {
-                tr/\x00-\x1f\x80-\xff/ /;
-            }
-            printf("\%-${nf}s \%${nm}d \%-${nF}s \%s\n",
-                   substr($mhfolder, 0, $nf), substr($msgnum, 0, $nm),
-                   substr($from, 0, $nF),
-                   substr($subject, 0, $ns));
+            print(scan_line($headers, $mhfolder, $msgnum, $nf, $nm, $nF, $ns),
+                  "\n");
          }
  
          for my $hook (@post_store_hooks) {
-            $hook->(\%batons, \%headers, $mhfolder, $msgnum);
+            $hook->(\%batons, $headers, $mhfolder, $msgnum);
          }
  
          if ($tty) {
@@ -783,12 +791,55 @@ sub maildir_spam {
  
      for my $msg (@spams) {
          ($msg eq '.' or $msg eq '..') and next;
-        store_message($msg, 'SPAM');
+        my $msgnum = store_message($msg, 'SPAM');
+        # Store the original file name for scan_spam in -n mode.
+        $SPAM{$msgnum} = [undef, $msg];
+    }
+}
+
+sub scan_spam {
+    my ($msgnum, $header, $tuple, $msg);
+
+    # Unlike filter_mail, we don't need to print the folder name.
+    # Calculate how many columns would be allocated to it...
+    my $nf = int($COLUMNS * $SCAN_P_FOLDER);
+    # ...and add that amount to COLUMNS to calculate the number of columns to
+    # allocate to msgnum and from snippet, thus filling the line without
+    # printing the folder name.
+    my $nm = int(($COLUMNS + $nf) * $SCAN_P_MESSAGE);
+    my $nF = int(($COLUMNS + $nf) * $SCAN_P_FROM);
+    my $ns = $COLUMNS - $nm - $nF - 3;
+
+    for $msgnum (sort(keys(%SPAM))) {
+        $tuple = $SPAM{$msgnum};
+        if (defined($tuple->[0])) {
+            # Filed by filter_mail, so we have the header.
+            $header = $tuple->[0];
+        } elsif (defined($tuple->[1])) {
+            # Filed by maildir_spam, so we don't have the header.
+            if ($run) {
+                # The message has been filed, load it from $mh.
+                $msg = "$mh/SPAM/$msgnum";
+            } else {
+                # The message has not been filed, load it from the maildir.
+                # $tuple->[1] is just a basename, not a path; this works
+                # because maildir_spam did chdir(Maildir/spam/new).
+                $msg = $tuple->[1];
+            }
+            $header = get_headers($msg);
+        } else {
+            print(STDERR
+                  "BUG: corrupt SPAM tuple, neither element defined",
+                  " for message $msgnum\n");
+            next;
+        }
+        print(scan_line($header, '', $msgnum, 0, $nm, $nF, $ns),
+              "\n");
      }
  }
  
  \f
-MAIN: {
+if (!caller()) {
      my $st;
  
      if ($dumpfilters) {
@@ -814,10 +865,11 @@ MAIN: {
      $run and %folders and update_dot_folders(\%folders);
  
      maildir_spam();
-
-    @SPAM and (exec('scan', '+SPAM', @SPAM) or die);
+    scan_spam();
  }
  
+1;
+
  \f
  __END__