| #!perl -w |
| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| use v5.12; |
| use PublicInbox::TestCommon; |
| use Cwd qw(getcwd); |
| use List::Util qw(sum); |
| use autodie qw(close mkdir open rename); |
| require_mods(qw(json Xapian +SCM_RIGHTS DBD::SQLite)); |
| use_ok 'PublicInbox::CodeSearchIdx'; |
| use PublicInbox::Import; |
| my ($tmp, $for_destroy) = tmpdir(); |
| my $pwd = getcwd(); |
| my @unused_keys = qw(last_commit has_threadid skip_docdata); |
| local $ENV{PI_CONFIG} = '/dev/null'; |
| # local $ENV{TAIL_ALL} = $ENV{TAIL_ALL} // 1; # while features are unstable |
| my $opt = { 1 => \(my $cidx_out), 2 => \(my $cidx_err) }; |
| |
| # I reworked CodeSearchIdx->shard_worker to handle empty trees |
| # in the initial commit generated by cvs2svn for xapian.git |
| create_coderepo 'empty-tree-root-0600', tmpdir => "$tmp/wt0", sub { |
| xsys_e([qw(/bin/sh -c), <<'EOM']); |
| git init -q && |
| git config core.sharedRepository 0600 |
| tree=$(git mktree </dev/null) && |
| head=$(git symbolic-ref HEAD) && |
| cmt=$(echo 'empty root' | git commit-tree $tree) && |
| git update-ref $head $cmt && |
| echo hi >f && |
| git add f && |
| git commit -q -m hi && |
| git gc -q |
| EOM |
| }; # /create_coderepo |
| |
| ok(run_script([qw(-cindex --dangerous -q -g), "$tmp/wt0"]), 'cindex internal'); |
| { |
| my $exists = -e "$tmp/wt0/.git/public-inbox-cindex/cidx.lock"; |
| my @st = stat(_); |
| ok($exists, 'internal dir created'); |
| is($st[2] & 0600, 0600, 'mode respects core.sharedRepository'); |
| @st = stat("$tmp/wt0/.git/public-inbox-cindex"); |
| is($st[2] & 0700, 0700, 'dir mode respects core.sharedRepository'); |
| } |
| |
| # it's possible for git to emit NUL characters in diffs |
| # (see c4201214cbf10636e2c1ab9131573f735b42c8d4 in linux.git) |
| my $zp = create_coderepo 'NUL in patch', sub { |
| my $src = PublicInbox::IO::try_cat("$pwd/COPYING"); |
| xsys_e([qw(git init -q)]); |
| |
| # needs to be further than FIRST_FEW_BYTES (8000) in git.git |
| $src =~ s/\b(Limitation of Liability\.)\n\n/$1\n\0\n/s or |
| xbail "BUG: no `\\n\\n' in $pwd/COPYING"; |
| |
| PublicInbox::IO::write_file '>', 'f', $src; |
| xsys_e([qw(/bin/sh -c), <<'EOM']); |
| git add f && |
| git commit -q -m 'initial with NUL character' |
| EOM |
| $src =~ s/\n\0\n/\n\n/ or xbail "BUG: no `\\n\\0\\n'"; |
| PublicInbox::IO::write_file '>', 'f', $src; |
| xsys_e([qw(/bin/sh -c), <<'EOM']); |
| git add f && |
| git commit -q -m 'remove NUL character' && |
| git gc -q |
| EOM |
| }; # /create_coderepo |
| |
| $zp = File::Spec->rel2abs($zp); |
| ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", |
| '-g', $zp, '-g', "$tmp/wt0" ]), |
| 'cindex external'); |
| ok(-e "$tmp/ext/cidx.lock", 'external dir created'); |
| ok(!-d "$zp/.git/public-inbox-cindex", 'no cindex in original coderepo'); |
| |
| ok(run_script([qw(-cindex -L medium --dangerous -q -d), |
| "$tmp/med", '-g', $zp, '-g', "$tmp/wt0"]), 'cindex external medium'); |
| |
| |
| SKIP: { |
| have_xapian_compact 2; |
| ok(run_script([qw(-compact -q), "$tmp/ext"]), 'compact on full'); |
| ok(run_script([qw(-compact -q), "$tmp/med"]), 'compact on medium'); |
| } |
| |
| my $no_metadata_set = sub { |
| my ($i, $extra, $xdb) = @_; |
| for my $xdb (@$xdb) { |
| for my $k (@unused_keys, @$extra) { |
| is($xdb->get_metadata($k) // '', '', |
| "metadata $k unset in shard #$i"); |
| } |
| ++$i; |
| } |
| }; |
| |
| { |
| my $mid_size = sum(map { -s $_ } glob("$tmp/med/cidx*/*/*")); |
| my $full_size = sum(map { -s $_ } glob("$tmp/ext/cidx*/*/*")); |
| ok($full_size > $mid_size, 'full size > mid size') or |
| diag "full=$full_size mid=$mid_size"; |
| for my $l (qw(med ext)) { |
| ok(run_script([qw(-cindex -q --reindex -u -d), "$tmp/$l"]), |
| "reindex $l"); |
| } |
| $mid_size = sum(map { -s $_ } glob("$tmp/med/cidx*/*/*")); |
| $full_size = sum(map { -s $_ } glob("$tmp/ext/cidx*/*/*")); |
| ok($full_size > $mid_size, 'full size > mid size after reindex') or |
| diag "full=$full_size mid=$mid_size"; |
| my $csrch = PublicInbox::CodeSearch->new("$tmp/med"); |
| my ($xdb0, @xdb) = $csrch->xdb_shards_flat; |
| $no_metadata_set->(0, [], [ $xdb0 ]); |
| is($xdb0->get_metadata('indexlevel'), 'medium', |
| 'indexlevel set in shard #0'); |
| $no_metadata_set->(1, ['indexlevel'], \@xdb); |
| |
| ok(run_script([qw(-cindex -q -L full --reindex -u -d), "$tmp/med"]), |
| 'reindex medium as full'); |
| @xdb = $csrch->xdb_shards_flat; |
| $no_metadata_set->(0, ['indexlevel'], \@xdb); |
| } |
| |
| use_ok 'PublicInbox::CodeSearch'; |
| |
| |
| my @xh_args; |
| my $exp = [ 'initial with NUL character', 'remove NUL character' ]; |
| my $zp_git = "$zp/.git"; |
| if ('multi-repo search') { |
| my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); |
| my $mset = $csrch->mset('NUL'); |
| is(scalar($mset->items), 2, 'got results'); |
| my @have = sort(map { $_->get_document->get_data } $mset->items); |
| is_xdeeply(\@have, $exp, 'got expected subjects'); |
| |
| $mset = $csrch->mset('NUL', { git_dir => "$tmp/wt0/.git" }); |
| is(scalar($mset->items), 0, 'no results with other GIT_DIR'); |
| |
| $mset = $csrch->mset('NUL', { git_dir => $zp_git }); |
| @have = sort(map { $_->get_document->get_data } $mset->items); |
| is_xdeeply(\@have, $exp, 'got expected subjects w/ GIT_DIR filter'); |
| my @xdb = $csrch->xdb_shards_flat; |
| $no_metadata_set->(0, ['indexlevel'], \@xdb); |
| @xh_args = $csrch->xh_args; |
| } |
| |
| my $test_xhc = sub { |
| my ($xhc) = @_; |
| my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); |
| my $impl = $xhc->{impl}; |
| my ($r, @l); |
| $r = $xhc->mkreq([], qw(mset -c -g), $zp_git, @xh_args, 'NUL'); |
| chomp(@l = <$r>); |
| like shift(@l), qr/\bmset\.size=2\b/, "got expected header $impl"; |
| my %docid2data; |
| my @got = sort map { |
| my ($docid, $pct, $rank, @extra) = split /\0/; |
| ok $pct >= 0 && $pct <= 100, 'pct in range'; |
| ok $rank >= 0 && $rank <= 100000, 'rank ok'; |
| is scalar(@extra), 0, 'no extra fields'; |
| $docid2data{$docid} = |
| $csrch->xdb->get_document($docid)->get_data; |
| } @l; |
| is_deeply(\@got, $exp, "expected doc_data $impl"); |
| |
| $r = $xhc->mkreq([], qw(mset -c -g), "$tmp/wt0/.git", @xh_args, 'NUL'); |
| chomp(@l = <$r>); |
| like shift(@l), qr/\bmset.size=0\b/, "got miss in wrong dir $impl"; |
| is_deeply(\@l, [], "no extra lines $impl"); |
| |
| while (my ($did, $expect) = each %docid2data) { |
| is_deeply($csrch->xdb->get_document($did)->get_data, |
| $expect, "docid=$did data matches"); |
| } |
| ok(!$xhc->{io}->close, "$impl close"); |
| is($?, 66 << 8, "got EX_NOINPUT from $impl exit"); |
| }; |
| |
| SKIP: { |
| require_mods('+SCM_RIGHTS', 1); |
| require PublicInbox::XapClient; |
| my $xhc = PublicInbox::XapClient::start_helper('-j0'); |
| my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); |
| $test_xhc->($xhc, $csrch); |
| skip 'PI_NO_CXX set', 1 if $ENV{PI_NO_CXX}; |
| $xhc->{impl} =~ /Cxx/ or |
| skip 'C++ compiler or xapian development libs missing', 1; |
| skip 'TEST_XH_CXX_ONLY set', 1 if $ENV{TEST_XH_CXX_ONLY}; |
| local $ENV{PI_NO_CXX} = 1; # force XS or SWIG binding test |
| $xhc = PublicInbox::XapClient::start_helper('-j0'); |
| $test_xhc->($xhc, $csrch); |
| } |
| |
| if ('--update') { |
| my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); |
| my $mset = $csrch->mset('dfn:for-update'); |
| is(scalar($mset->items), 0, 'no result before update'); |
| |
| my $e = \%PublicInbox::TestCommon::COMMIT_ENV; |
| xsys_e([qw(/bin/sh -c), <<'EOM'], $e, { -C => "$tmp/wt0" }); |
| >for-update && git add for-update && git commit -q -m updated |
| EOM |
| ok(run_script([qw(-cindex -qu -d), "$tmp/ext"]), '-cindex -u'); |
| $mset = $csrch->reopen->mset('dfn:for-update'); |
| is(scalar($mset->items), 1, 'got updated result'); |
| |
| ok(run_script([qw(-cindex -qu --reindex -d), "$tmp/ext"]), 'reindex'); |
| $mset = $csrch->reopen->mset('dfn:for-update'); |
| is(scalar($mset->items), 1, 'same result after reindex'); |
| } |
| |
| SKIP: { # --prune |
| require_cmd($ENV{XAPIAN_DELVE} || 'xapian-delve', 1); |
| require_git v2.6, 1; |
| my $csrch = PublicInbox::CodeSearch->new("$tmp/ext"); |
| is(scalar($csrch->mset('s:hi')->items), 1, 'got hit'); |
| |
| rename("$tmp/wt0/.git", "$tmp/wt0/.giit"); |
| ok(run_script([qw(-cindex -q --prune -d), "$tmp/ext"], undef, $opt), |
| 'prune'); |
| is(${$opt->{2}}, '', 'nothing in stderr') or diag explain($opt); |
| $csrch->reopen; |
| is(scalar($csrch->mset('s:hi')->items), 0, 'hit pruned'); |
| |
| rename("$tmp/wt0/.giit", "$tmp/wt0/.git"); |
| ok(run_script([qw(-cindex -qu -d), "$tmp/ext"]), 'update'); |
| $csrch->reopen; |
| is(scalar($csrch->mset('s:hi')->items), 0, |
| 'hit stays pruned since GIT_DIR was previously pruned'); |
| isnt(scalar($csrch->mset('s:NUL')->items), 0, |
| 'prune did not clobber entire index'); |
| } |
| |
| File::Path::remove_tree("$tmp/ext"); |
| mkdir("$tmp/ext", 0707); |
| ok(run_script([qw(-cindex --dangerous -q -d), "$tmp/ext", '-g', $zp]), |
| 'external on existing dir'); |
| { |
| my @st = stat("$tmp/ext/cidx.lock"); |
| is($st[2] & 0777, 0604, 'created lock respects odd permissions'); |
| } |
| |
| ok(run_script([qw(-xcpdb), "$tmp/ext"]), 'xcpdb upgrade'); |
| ok(run_script([qw(-xcpdb -R4), "$tmp/ext"]), 'xcpdb reshard'); |
| |
| SKIP: { |
| have_xapian_compact 2; |
| ok(run_script([qw(-xcpdb -R2 --compact), "$tmp/ext"]), |
| 'xcpdb reshard+compact'); |
| ok(run_script([qw(-xcpdb --compact), "$tmp/ext"]), 'xcpdb compact'); |
| }; |
| |
| SKIP: { |
| require_cmd('join', 1); |
| my $basic = create_inbox 'basic', indexlevel => 'basic', sub { |
| my ($im, $ibx) = @_; |
| $im->add(eml_load('t/plack-qp.eml')); |
| }; |
| my $env = { PI_CONFIG => "$tmp/pi_config" }; |
| PublicInbox::IO::write_file '>', $env->{PI_CONFIG}, <<EOM; |
| [publicinbox "basictest"] |
| inboxdir = $basic->{inboxdir} |
| address = basic\@example.com |
| EOM |
| my $cmd = [ qw(-cindex -u --all -d), "$tmp/ext", |
| '--join=aggressive,dt:19700101000000..now', |
| '-I', $basic->{inboxdir} ]; |
| $cidx_out = $cidx_err = ''; |
| ok(run_script($cmd, $env, $opt), 'join w/o search'); |
| like($cidx_err, qr/W: \Q$basic->{inboxdir}\E not indexed for search/s, |
| 'non-Xapian-enabled inbox noted'); |
| } |
| |
| # we need to support blank sections for a top-level repos |
| # (e.g. <https://example.com/my-project> |
| # git.kernel.org could use "pub" as section name, though, since all git repos |
| # are currently under //git.kernel.org/pub/**/* |
| { |
| mkdir(my $d = "$tmp/blanksection"); |
| my $cfg = cfg_new($d, <<EOM); |
| [cindex ""] |
| topdir = $tmp/ext |
| localprefix = $tmp |
| EOM |
| my $csrch = $cfg->lookup_cindex(''); |
| is ref($csrch), 'PublicInbox::CodeSearch', 'codesearch w/ blank name'; |
| is_deeply $csrch->{localprefix}, [ "$tmp" ], 'localprefix respected'; |
| my $nr = 0; |
| $cfg->each_cindex(sub { |
| my ($cs, @rest) = @_; |
| is $cs->{topdir}, $csrch->{topdir}, 'each_cindex works'; |
| is_deeply \@rest, [ '.' ], 'got expected arg'; |
| ++$nr; |
| }, '.'); |
| is $nr, 1, 'iterated through cindices'; |
| my $oid = 'dba13ed2ddf783ee8118c6a581dbf75305f816a3'; |
| my $mset = $csrch->mset("dfpost:$oid"); |
| is $mset->size, 1, 'got result from full OID search'; |
| } |
| |
| done_testing; |