| #!perl -w |
| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| use v5.12; |
| use PublicInbox::TestCommon; |
| use Benchmark qw(:all :hireswallclock); |
| use PublicInbox::Inbox; |
| use PublicInbox::ViewDiff; |
| use PublicInbox::MsgIter qw(msg_part_text); |
| my $nr = $ENV{NR} // 5; |
| my $inboxdir = $ENV{GIANT_INBOX_DIR}; |
| plan skip_all => "GIANT_INBOX_DIR not defined for $0" unless $inboxdir; |
| |
| my @cat = qw(cat-file --buffer --batch-check --batch-all-objects); |
| if (require_git(v2.19, 1)) { |
| push @cat, '--unordered'; |
| } else { |
| warn |
| "git <2.19, cat-file lacks --unordered, locality suffers\n"; |
| } |
| my $ibx = PublicInbox::Inbox->new({ inboxdir => $inboxdir, name => 'name' }); |
| my $git = $ibx->git; |
| my ($eml, $res, $oid, $type, $n, $m); |
| my ($part, $s, $err, @top); |
| sub text_part { |
| $part = $_[0]->[0]; |
| ($s, $err) = msg_part_text($part, $part->content_type || 'text/plain'); |
| $s // return; |
| $s =~ s/\r+\n/\n/sg; |
| } |
| |
| my %extract_cb = ( |
| var => sub { # callback for Eml->each_part |
| text_part(@_) // return; |
| my @top = split($PublicInbox::ViewDiff::EXTRACT_DIFFS, $s); |
| }, |
| slash => sub { # callback for Eml->each_part |
| text_part(@_) // return; |
| my @top = split(/$PublicInbox::ViewDiff::EXTRACT_DIFFS/, $s); |
| }, |
| slash_o => sub { # callback for Eml->each_part |
| text_part(@_) // return; |
| my @top = split(/$PublicInbox::ViewDiff::EXTRACT_DIFFS/o, $s); |
| }, |
| ); |
| |
| my $oid_cb = sub { |
| my ($bref, undef, undef, undef, $cb) = @_; |
| ++$m; |
| $eml = PublicInbox::Eml->new($bref); |
| $eml->each_part($cb); |
| }; |
| |
| # ensure all --batch-check processes are ready |
| my @cats = map { |
| my $fh = $git->popen(@cat); |
| vec(my $vec = '', fileno($fh), 1) = 1; |
| select($vec, undef, undef, 60) or |
| xbail 'timed out waiting for --batch-check'; |
| $fh |
| } (1..((scalar keys %extract_cb) * $nr)); |
| |
| my $time; |
| while (my ($name, $eml_cb) = each %extract_cb) { |
| $time->{$name} = sub { |
| my $fh = shift @cats // xbail "no --batch-check for $name"; |
| $n = $m = 0; |
| while (<$fh>) { |
| ($oid, $type) = split / /; |
| next if $type ne 'blob'; |
| ++$n; |
| $git->cat_async($oid, $oid_cb, $eml_cb); |
| } |
| $git->async_wait_all; |
| is $n, $m, "$n of $m messages scanned ($name)"; |
| }; |
| } |
| |
| timethese($nr, $time); |
| done_testing; |