| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| use strict; |
| use warnings; |
| use Cwd qw(getcwd); |
| use PublicInbox::MID qw(mid2path); |
| use PublicInbox::Git; |
| use PublicInbox::InboxWritable; |
| use PublicInbox::TestCommon; |
| use PublicInbox::Import; |
| use PublicInbox::IO qw(write_file); |
| use File::Path qw(remove_tree); |
| my ($tmpdir, $for_destroy) = tmpdir(); |
| my $home = "$tmpdir/pi-home"; |
| my $pi_home = "$home/.public-inbox"; |
| my $pi_config = "$pi_home/config"; |
| my $maindir = "$tmpdir/main.git"; |
| my $main_bin = getcwd()."/t/main-bin"; |
| my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock |
| my $fail_bin = getcwd()."/t/fail-bin"; |
| my $fail_path = "$fail_bin:$ENV{PATH}"; # for spamc spam mock |
| my $addr = 'test-public@example.com'; |
| my $cfgpfx = "publicinbox.test"; |
| my $faildir = "$home/faildir/"; |
| my $git = PublicInbox::Git->new($maindir); |
| |
| my $fail_bad_header = sub ($$$) { |
| my ($good_rev, $msg, $in) = @_; |
| my @f = glob("$faildir/*/*"); |
| unlink @f if @f; |
| my ($out, $err) = ("", ""); |
| my $opt = { 0 => \$in, 1 => \$out, 2 => \$err }; |
| local $ENV{PATH} = $main_path; |
| ok(run_script(['-mda'], undef, $opt), |
| "no error on undeliverable ($msg)"); |
| my $rev = $git->qx(qw(rev-list HEAD)); |
| chomp $rev; |
| is($rev, $good_rev, "bad revision not committed ($msg)"); |
| @f = glob("$faildir/*/*"); |
| is(scalar @f, 1, "faildir written to"); |
| [ $in, $out, $err ]; |
| }; |
| |
| { |
| ok(-x "$main_bin/spamc", |
| "spamc ham mock found (run in top of source tree"); |
| ok(-x "$fail_bin/spamc", |
| "spamc mock found (run in top of source tree"); |
| is(1, mkdir($home, 0755), "setup ~/ for testing"); |
| is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); |
| PublicInbox::Import::init_bare($maindir); |
| |
| write_file '>>', $pi_config, <<EOF; |
| [publicinbox "test"] |
| address = $addr |
| inboxdir = $maindir |
| EOF |
| } |
| |
| local $ENV{GIT_COMMITTER_NAME} = eval { |
| use PublicInbox::MDA; |
| use PublicInbox::Address; |
| use Encode qw/encode/; |
| my $msg = eml_load 't/utf8.eml'; |
| my $from = $msg->header('From'); |
| my ($author) = PublicInbox::Address::names($from); |
| my ($email) = PublicInbox::Address::emails($from); |
| my $date = $msg->header('Date'); |
| |
| is('Eléanor', |
| encode('us-ascii', my $tmp = $author, Encode::HTMLCREF), |
| 'HTML conversion is correct'); |
| is($email, 'e@example.com', 'email parsed correctly'); |
| is($date, 'Thu, 01 Jan 1970 00:00:00 +0000', |
| 'message date parsed correctly'); |
| $author; |
| }; |
| die $@ if $@; |
| |
| { |
| my $good_rev; |
| local $ENV{PI_EMERGENCY} = $faildir; |
| local $ENV{HOME} = $home; |
| local $ENV{ORIGINAL_RECIPIENT} = $addr; |
| ok(run_script([qw(-mda --help)], undef, |
| { 1 => \my $out, 2 => \my $err }), '-mda --help'); |
| like $out, qr/usage:/, 'usage shown w/ --help'; |
| ok(!run_script([qw(-mda --bogus)], undef, |
| { 1 => \$out, 2 => \$err }), '-mda --bogus fails'); |
| like $err, qr/usage:/, 'usage shown on bogus switch'; |
| |
| my $in = <<EOF; |
| From: Me <me\@example.com> |
| To: You <you\@example.com> |
| Cc: $addr |
| Message-Id: <blah\@example.com> |
| Subject: hihi |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| EOF |
| { |
| local $ENV{PATH} = $main_path; |
| ok(!run_script(['-mda'], { ORIGINAL_RECIPIENT => undef }, |
| { 0 => \$in, 2 => \$err }), |
| 'missing ORIGINAL_RECIPIENT fails'); |
| is($? >> 8, 67, 'got EX_NOUSER'); |
| like $err, qr/\bORIGINAL_RECIPIENT\b/, |
| 'ORIGINAL_RECIPIENT noted in stderr'; |
| is unlink(glob("$faildir/*/*")), 1, 'unlinked failed message'; |
| } |
| |
| # ensure successful message delivery |
| { |
| local $ENV{PATH} = $main_path; |
| ok(run_script(['-mda'], undef, { 0 => \$in })); |
| my $rev = $git->qx(qw(rev-list HEAD)); |
| like($rev, qr/\A[a-f0-9]{40,64}/, "good revision committed"); |
| chomp $rev; |
| my $cmt = $git->cat_file($rev); |
| like($$cmt, qr/^author Me <me\@example\.com> 0 \+0000\n/m, |
| "author info set correctly"); |
| like($$cmt, qr/^committer test <test-public\@example\.com>/m, |
| "committer info set correctly"); |
| $good_rev = $rev; |
| } |
| |
| # ensure failures work, fail with bad spamc |
| { |
| my @prev = <$faildir/new/*>; |
| is(scalar @prev, 0 , "nothing in PI_EMERGENCY before"); |
| local $ENV{PATH} = $fail_path; |
| ok(run_script(['-mda'], undef, { 0 => \$in })); |
| my @revs = $git->qx(qw(rev-list HEAD)); |
| is(scalar @revs, 1, "bad revision not committed"); |
| my @new = <$faildir/new/*>; |
| is(scalar @new, 1, "PI_EMERGENCY is written to"); |
| } |
| |
| $fail_bad_header->($good_rev, "bad recipient", <<""); |
| From: Me <me\@example.com> |
| To: You <you\@example.com> |
| Message-Id: <bad-recipient\@example.com> |
| Subject: hihi |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| my $fail = $fail_bad_header->($good_rev, "duplicate Message-ID", <<""); |
| From: Me <me\@example.com> |
| To: You <you\@example.com> |
| Cc: $addr |
| Message-ID: <blah\@example.com> |
| Subject: hihi |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| like($fail->[2], qr/CONFLICT/, "duplicate Message-ID message"); |
| |
| $fail_bad_header->($good_rev, "missing From:", <<""); |
| To: $addr |
| Message-ID: <missing-from\@example.com> |
| Subject: hihi |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| $fail_bad_header->($good_rev, "short subject:", <<""); |
| To: $addr |
| From: cat\@example.com |
| Message-ID: <short-subject\@example.com> |
| Subject: a |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| $fail_bad_header->($good_rev, "no date", <<""); |
| To: $addr |
| From: u\@example.com |
| Message-ID: <no-date\@example.com> |
| Subject: hihi |
| |
| $fail_bad_header->($good_rev, "bad date", <<""); |
| To: $addr |
| From: u\@example.com |
| Message-ID: <bad-date\@example.com> |
| Subject: hihi |
| Date: deadbeef |
| |
| } |
| |
| # spam training |
| { |
| local $ENV{PI_EMERGENCY} = $faildir; |
| local $ENV{HOME} = $home; |
| local $ENV{ORIGINAL_RECIPIENT} = $addr; |
| local $ENV{PATH} = $main_path; |
| my $mid = 'spam-train@example.com'; |
| my $in = <<EOF; |
| From: Spammer <spammer\@example.com> |
| To: You <you\@example.com> |
| Cc: $addr |
| Message-ID: <$mid> |
| Subject: this message will be trained as spam |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| EOF |
| { |
| # deliver the spam message, first |
| ok(run_script(['-mda'], undef, { 0 => \$in })); |
| my $path = mid2path($mid); |
| my $msg = $git->cat_file("HEAD:$path"); |
| like($$msg, qr/\Q$mid\E/, "message delivered"); |
| |
| # now train it |
| local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; |
| local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; |
| local $ENV{GIT_COMMITTER_NAME}; |
| delete $ENV{GIT_COMMITTER_NAME}; |
| ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), |
| "no failure from learning spam"); |
| ok(run_script(['-learn', 'spam'], undef, { 0 => $msg }), |
| "no failure from learning spam idempotently"); |
| } |
| } |
| |
| # train ham message |
| { |
| local $ENV{PI_EMERGENCY} = $faildir; |
| local $ENV{HOME} = $home; |
| local $ENV{ORIGINAL_RECIPIENT} = $addr; |
| local $ENV{PATH} = $main_path; |
| my $mid = 'ham-train@example.com'; |
| my $in = <<EOF; |
| From: False-positive <hammer\@example.com> |
| To: You <you\@example.com> |
| Cc: $addr |
| Message-ID: <$mid> |
| Subject: this message will be trained as spam |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| EOF |
| # now train it |
| # these should be overridden |
| local $ENV{GIT_AUTHOR_EMAIL} = 'trainer@example.com'; |
| local $ENV{GIT_COMMITTER_EMAIL} = 'trainer@example.com'; |
| |
| ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), |
| "learned ham without failure"); |
| my $path = mid2path($mid); |
| my $msg = $git->cat_file("HEAD:$path"); |
| like($$msg, qr/\Q$mid\E/, "ham message delivered"); |
| ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), |
| "learned ham idempotently "); |
| |
| # ensure trained email is filtered, too |
| my $mime = eml_load 't/mda-mime.eml'; |
| ($mid) = ($mime->header_raw('message-id') =~ /<([^>]+)>/); |
| { |
| $in = $mime->as_string; |
| ok(run_script(['-learn', 'ham'], undef, { 0 => \$in }), |
| "learned ham without failure"); |
| my $path = mid2path($mid); |
| $msg = $git->cat_file("HEAD:$path"); |
| like($$msg, qr/<\Q$mid\E>/, "ham message delivered"); |
| unlike($$msg, qr/<html>/i, '<html> filtered'); |
| } |
| } |
| |
| # List-ID based delivery |
| { |
| local $ENV{PI_EMERGENCY} = $faildir; |
| local $ENV{HOME} = $home; |
| local $ENV{ORIGINAL_RECIPIENT} = undef; |
| delete $ENV{ORIGINAL_RECIPIENT}; |
| local $ENV{PATH} = $main_path; |
| my $list_id = 'foo.example.com'; |
| my $mid = 'list-id-delivery@example.com'; |
| my $in = <<EOF; |
| From: user <user\@example.com> |
| To: You <you\@example.com> |
| Cc: $addr |
| Message-ID: <$mid> |
| List-Id: <$list_id> |
| Subject: this message will be trained as spam |
| Date: Thu, 01 Jan 1970 00:00:00 +0000 |
| |
| EOF |
| xsys(qw(git config --file), $pi_config, "$cfgpfx.listid", uc $list_id); |
| $? == 0 or die "failed to set listid $?"; |
| ok(run_script(['-mda'], undef, { 0 => \$in }), |
| 'mda OK with List-Id match'); |
| my $path = mid2path($mid); |
| my $msg = $git->cat_file("HEAD:$path"); |
| like($$msg, qr/\Q$list_id\E/, 'delivered message w/ List-ID matches'); |
| |
| # try a message w/o precheck |
| $in = <<EOF; |
| To: You <you\@example.com> |
| List-Id: <$list_id> |
| |
| this message would not be accepted without --no-precheck |
| EOF |
| my ($out, $err) = ('', ''); |
| my $rdr = { 0 => \$in, 1 => \$out, 2 => \$err }; |
| ok(run_script(['-mda', '--no-precheck'], undef, $rdr), |
| 'mda OK with List-Id match and --no-precheck'); |
| my $cur = $git->qx(qw(diff HEAD~1..HEAD)); |
| like($cur, qr/this message would not be accepted without --no-precheck/, |
| '--no-precheck delivered message anyways'); |
| |
| # try a message with multiple List-ID headers |
| $in = <<EOF; |
| List-ID: <foo.bar> |
| List-ID: <$list_id> |
| Message-ID: <2lids\@example> |
| Subject: two List-IDs |
| From: user <user\@example.com> |
| To: $addr |
| Date: Fri, 02 Oct 1993 00:00:00 +0000 |
| |
| EOF |
| ($out, $err) = ('', ''); |
| ok(run_script(['-mda'], undef, $rdr), |
| 'mda OK with multiple List-Id matches'); |
| $cur = $git->qx(qw(diff HEAD~1..HEAD)); |
| like($cur, qr/^\+Message-ID: <2lids\@example>/sm, |
| 'multi List-ID match delivered'); |
| like($err, qr/multiple List-ID/, 'warned about multiple List-ID'); |
| |
| # ensure -learn rm works after inbox address is updated |
| ($out, $err) = ('', ''); |
| xsys(qw(git config --file), $pi_config, "$cfgpfx.address", |
| $addr = 'updated-address@example.com'); |
| ok(run_script(['-learn', 'rm'], undef, $rdr), 'rm-ed via -learn'); |
| $cur = $git->qx(qw(diff HEAD~1..HEAD)); |
| like($cur, qr/^-Message-ID: <2lids\@example>/sm, 'changed in git'); |
| |
| # ensure we can strip List-Unsubscribe |
| $in = <<EOF; |
| To: You <you\@example.com> |
| List-Id: <$list_id> |
| Message-ID: <unsubscribe-1\@example> |
| Subject: unsubscribe-1 |
| From: user <user\@example.com> |
| To: $addr |
| Date: Fri, 02 Oct 1993 00:00:00 +0000 |
| List-Unsubscribe: <https://example.com/some-UUID-here/listname> |
| List-Unsubscribe-Post: List-Unsubscribe=One-Click |
| |
| List-Unsubscribe should be stripped |
| EOF |
| write_file '>>', $pi_config, <<EOM; |
| [publicinboxImport] |
| dropUniqueUnsubscribe |
| EOM |
| $out = $err = ''; |
| ok(run_script([qw(-mda)], undef, $rdr), 'mda w/ dropUniqueUnsubscribe'); |
| $cur = join('', grep(/^\+/, $git->qx(qw(diff HEAD~1..HEAD)))); |
| like $cur, qr/Message-ID: <unsubscribe-1/, 'imported new message'; |
| unlike $cur, qr/some-UUID-here/, 'List-Unsubscribe gone'; |
| unlike $cur, qr/List-Unsubscribe-Post/i, 'List-Unsubscribe-Post gone'; |
| |
| $in =~ s/unsubscribe-1/unsubscribe-2/g or xbail 'BUG: s// fail'; |
| ok(run_script([qw(-learn ham)], undef, $rdr), |
| 'learn ham w/ dropUniqueUnsubscribe'); |
| $cur = join('', grep(/^\+/, $git->qx(qw(diff HEAD~1..HEAD)))); |
| like $cur, qr/Message-ID: <unsubscribe-2/, 'learn ham'; |
| unlike $cur, qr/some-UUID-here/, 'List-Unsubscribe gone on learn ham'; |
| unlike $cur, qr/List-Unsubscribe-Post/i, |
| 'List-Unsubscribe-Post gone on learn ham'; |
| } |
| |
| SKIP: { |
| require_mods(qw(DBD::SQLite Xapian), 1); |
| local $ENV{PI_EMERGENCY} = $faildir; |
| local $ENV{HOME} = $home; |
| local $ENV{PATH} = $main_path; |
| my $rdr = { 1 => \(my $out = ''), 2 => \(my $err = '') }; |
| ok(run_script([qw(-index -L medium), $maindir], undef, $rdr), |
| 'index inbox'); |
| my $in = <<'EOM'; |
| From: a@example.com |
| To: updated-address@example.com |
| Subject: this is a ham message for learn |
| Date: Fri, 02 Oct 1993 00:00:00 +0000 |
| Message-ID: <medium-ham@example> |
| |
| yum |
| EOM |
| $rdr->{0} = \$in; |
| ok(run_script([qw(-learn ham)], undef, $rdr), 'learn medium ham'); |
| is($err, '', 'nothing in stderr after medium -learn'); |
| my $msg = $git->cat_file('HEAD:'.mid2path('medium-ham@example')); |
| like($$msg, qr/medium-ham/, 'medium ham added via -learn'); |
| my @xap = grep(!m!/over\.sqlite3!, |
| glob("$maindir/public-inbox/xapian*/*")); |
| ok(remove_tree(@xap), 'rm Xapian files to convert to indexlevel=basic'); |
| $in =~ s/medium-ham/basic-ham/g or xbail 'BUG: no s//'; |
| ok(run_script([qw(-learn ham)], undef, $rdr), 'learn basic ham'); |
| is($err, '', 'nothing in stderr after basic -learn'); |
| $msg = $git->cat_file('HEAD:'.mid2path('basic-ham@example')); |
| like($$msg, qr/basic-ham/, 'basic ham added via -learn'); |
| @xap = grep(!m!/over\.sqlite3!, |
| glob("$maindir/public-inbox/xapian*/*")); |
| is_deeply(\@xap, [], 'no Xapian files created by -learn'); |
| |
| $in = <<'EOM'; |
| From: a@example.com |
| To: updated-address@example.com |
| Subject: basic message for mda |
| Date: Fri, 02 Oct 1993 00:00:00 +0000 |
| Message-ID: <basic-for-mda@example> |
| |
| basic |
| EOM |
| local $ENV{ORIGINAL_RECIPIENT} = $addr; |
| ok run_script(['-mda'], undef, $rdr), '-mda for basic'; |
| @xap = grep(!m!/over\.sqlite3!, |
| glob("$maindir/public-inbox/xapian*/*")); |
| is_deeply \@xap, [], 'no Xapian files created by -mda'; |
| |
| # try ensure completely unindexed v1 stays unindexed |
| remove_tree "$maindir/public-inbox"; |
| $in = <<'EOM'; |
| From: a@example.com |
| To: updated-address@example.com |
| Subject: unnidexed message for mda |
| Date: Fri, 02 Oct 1993 00:00:00 +0000 |
| Message-ID: <unindexed-for-mda@example> |
| |
| unindexed |
| EOM |
| |
| ok run_script(['-mda'], undef, $rdr), '-mda for unindexed'; |
| ok !-e "$maindir/public-inbox", 'no v1 index created by default'; |
| }; |
| |
| done_testing(); |