| #!/usr/bin/perl -w |
| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| # |
| # Used for training spam (via SpamAssassin) and removing messages from a |
| # public-inbox |
| my $help = <<EOF; |
| usage: public-inbox-learn [OPTIONS] [spam|ham|rm] </path/to/RFC2822_message |
| |
| required action argument: |
| |
| spam unindex the message and train as spam |
| rm remove the message without training as spam |
| ham index the message (based on To:/Cc: headers) and train as ham |
| |
| options: |
| |
| --all scan all inboxes on `rm' |
| -k keep going after errors (for read-only inboxes) |
| |
| See public-inbox-learn(1) man page for full documentation. |
| EOF |
| use v5.12; |
| use PublicInbox::Config; |
| use PublicInbox::InboxWritable; |
| use PublicInbox::Eml; |
| use PublicInbox::Address; |
| use PublicInbox::Spamcheck::Spamc; |
| use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); |
| my %opt = (all => 0); |
| GetOptions(\%opt, qw(all keep-going|k help|h)) or die $help; |
| use PublicInbox::Import; |
| |
| my $train = shift or die $help; |
| if ($train !~ /\A(?:ham|spam|rm)\z/) { |
| die "`$train' not recognized.\n$help"; |
| } |
| die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm'; |
| |
| my $spamc = PublicInbox::Spamcheck::Spamc->new; |
| my $pi_cfg = PublicInbox::Config->new; |
| local $PublicInbox::Import::DROP_UNIQUE_UNSUB; |
| PublicInbox::Import::load_config($pi_cfg); |
| my ($err, @fail_ibx); |
| my $mime = PublicInbox::Eml->new(do{ |
| my $data = PublicInbox::IO::read_all \*STDIN; |
| PublicInbox::Eml::strip_from($data); |
| |
| if ($train ne 'rm') { |
| eval { |
| if ($train eq 'ham') { |
| $spamc->hamlearn(\$data); |
| } elsif ($train eq 'spam') { |
| $spamc->spamlearn(\$data); |
| } |
| die "E: spamc failed with: $?\n" if $?; |
| }; |
| $err = $@; |
| } |
| \$data |
| }); |
| |
| my $ibx_fail = sub { |
| my ($ibx) = @_; |
| my $m = "E: $@ ($ibx->{inboxdir})\n"; |
| die $m if !$opt{'keep-going'}; |
| warn $m; |
| push @fail_ibx, $ibx; |
| }; |
| |
| my $remove_or_add = sub { |
| my ($ibx, $addr) = @_; |
| eval { |
| # We do not touch GIT_COMMITTER_* env here so we can track |
| # who trained the message. |
| $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; |
| $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; |
| $ibx = PublicInbox::InboxWritable->new($ibx); |
| $ibx->{indexlevel} = $ibx->detect_indexlevel; |
| my $im = $ibx->importer(0); |
| |
| if ($train eq "rm") { |
| # This needs to be idempotent, as my inotify trainer |
| # may train for each cross-posted message, and this |
| # script already learns for every list in |
| # ~/.public-inbox/config |
| $im->remove($mime, $train); |
| } elsif ($train eq "ham") { |
| # no checking for spam here, we assume the message has |
| # been reviewed by a human at this point: |
| PublicInbox::MDA->set_list_headers($mime, $ibx); |
| |
| # Ham messages are trained when they're marked into |
| # a SEEN state, so this is idempotent: |
| $im->add($mime); |
| } |
| $im->done; |
| }; |
| $ibx_fail->($ibx) if $@; |
| }; |
| |
| my $remove_all = sub { # each_inbox cb |
| my ($ibx) = @_; |
| eval { |
| $ibx = PublicInbox::InboxWritable->new($ibx); |
| my $im = $ibx->importer(0); |
| $im->remove($mime, $train); |
| $im->done; |
| }; |
| $ibx_fail->($ibx) if $@; |
| }; |
| |
| # spam is removed from all known inboxes since it is often Bcc:-ed |
| if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { |
| $pi_cfg->each_inbox($remove_all); |
| } else { |
| require PublicInbox::MDA; |
| |
| # get all recipients |
| my %dests; # address => <PublicInbox::Inbox|0(false)> |
| for ($mime->header('Cc'), $mime->header('To')) { |
| foreach my $addr (PublicInbox::Address::emails($_)) { |
| $addr = lc($addr); |
| $dests{$addr} //= $pi_cfg->lookup($addr) // 0; |
| } |
| } |
| |
| # n.b. message may be cross-posted to multiple public-inboxes |
| my %seen; |
| while (my ($addr, $ibx) = each %dests) { |
| next unless ref($ibx); # $ibx may be 0 |
| next if $seen{0 + $ibx}++; |
| $remove_or_add->($ibx, $addr); |
| } |
| my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime); |
| for my $ibx (@$dests) { |
| next if $seen{0 + $ibx}++; |
| $remove_or_add->($ibx, $ibx->{-primary_address}); |
| } |
| } |
| |
| if ($err || @fail_ibx) { |
| warn $err if $err; |
| warn 'E: ', scalar(@fail_ibx), " inbox(es) failed\n"; |
| exit 1; |
| } |