#!/usr/bin/perl -w =pod =head1 NAME spamular - Report SpamAssassin rule averages for mailboxes =head1 SYNOPSIS perl spamular /path/to/maildir/* =head1 DESCRIPTION This program analyzes a Maildir-style mailbox, incrementing counters for each SpamAssassin test that was hit. Totals are then sent to STDOUT: the test name, the number and percentage of hits the test was found in, and the description of the test. =head1 BUGS Assumes /var/lib/spamassassin is the location of the SpamAssassin rules. =head1 NOTES My girlfriend is gone for a couple weeks, and I'm smoking in the house. =head1 AUTHOR Johnny Cuervo =head1 LICENSE Released into the public domain 17 Dec 2007 by the author. =cut use Mail::Header; sub load_test_descriptions (;$); sub baton (); @_baton = qw(/ - \ |); sub baton () { print "\b \b", $_baton[0]; push @_baton, shift @_baton; } sub load_test_descriptions (;$) { my $path = shift || '/var/lib/spamassassin'; my $dirhandle; if (-f $path) { return undef if (! open CF, "<", $path); while () { if (/^describe\s+(\S+)\s+(.*?)$/) { $test_description{$1} ||= ''; $test_description{$1} .= $2; } } close CF; return; } return if (! -d $path); return if (! opendir $dirhandle, $path); my $de; while (defined($de = readdir $dirhandle)) { next if (substr($de, 0, 1) eq '.'); load_test_descriptions(join '/', $path, $de); } return; } # # main # $| = 1; load_test_descriptions; my $hits = 0; my $total = 0; print " "; for my $msg (@ARGV) { if (! open MSG, "<", $msg) { warn "$msg: $!"; next; } baton; my $hdr = Mail::Header->new(\*MSG, 'Modify' => 1); my $spam = $hdr->get('X-Spam-Status'); if (! $spam) { warn "Couldn't get X-Spam-Status header from $msg\n"; next; } # # Do some work to get the header from Mail::Header un-fucked # $spam =~ s/\n//g; # Want one line $spam =~ s/\s+/ /g; # With single spaces $spam =~ s/,\s+/,/g; # And no spaces between commas $spam =~ s/^(?:Yes|No),//g; # And we don't care about this, # especially because it'll fuck # up our map below # # Aaaaaaaand some more work to parse them. I know this looks a little # funky, but it's going through each word (\s+\S+\s+) in the # X-Spam-Status header and looking for the "tests=" part, then # incrementing the counter for the test name that hit. # map { my ($left, $right) = split /=/, $_; map { $hits{$_}++; } split /,/, $right if ($left eq 'tests'); } split /\s+/, $spam; $total++; } print "\b \b"; # Kill the baton # And now we spew! map { printf "%22s %3d %.1f %s\n", $_, $hits{$_}, ($hits{$_} / $total) * 100, ($test_description{$_} || "No description"); } sort { $hits{$b} <=> $hits{$a} } keys %hits; __END__