| #!perl -w |
| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| use strict; |
| use v5.10.1; |
| use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); |
| my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: |
| usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] |
| |
| Create and update external (detached) search indices |
| |
| --no-fsync speed up indexing, risk corruption on power outage |
| --watch run persistently and watch for inbox updates |
| -L LEVEL `medium', or `full' (default: full) |
| --all index all configured inboxes |
| --jobs=NUM set or disable parallelization (NUM=0) |
| --batch-size=BYTES flush changes to OS after a given number of bytes |
| --max-size=BYTES do not index messages larger than the given size |
| --gc perform garbage collection instead of indexing |
| --dedupe[=MSGID] fix prior deduplication errors (may be repeated) |
| --reindex index previously indexed inboxes |
| --fast only reindex unseen/stale messages |
| --verbose | -v increase verbosity (may be repeated) |
| --dry-run | -n dry-run on --dedupe |
| |
| BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) |
| See public-inbox-extindex(1) man page for full documentation. |
| EOF |
| my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; |
| GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i |
| fsync|sync! fast dangerous |
| indexlevel|index-level|L=s max_size|max-size=s |
| batch_size|batch-size=s |
| dedupe:s@ gc commit-interval=i watch scan! dry-run|n |
| multi-pack-index! all C=s@ help|h)) |
| or die $help; |
| if ($opt->{help}) { print $help; exit 0 }; |
| die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; |
| require IO::Handle; |
| STDOUT->autoflush(1); |
| STDERR->autoflush(1); |
| local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync |
| # require lazily to speed up --help |
| require PublicInbox::Admin; |
| PublicInbox::Admin::do_chdir(delete $opt->{C}); |
| my $cfg = PublicInbox::Config->new; |
| my $eidx_dir = shift(@ARGV); |
| unless (defined $eidx_dir) { |
| if ($opt->{all} && $cfg->ALL) { |
| $eidx_dir = $cfg->ALL->{topdir}; |
| } else { |
| die "E: $help"; |
| } |
| } |
| my @ibxs; |
| if ($opt->{gc}) { |
| die "E: inbox paths must not be specified with --gc\n" if @ARGV; |
| for my $sw (qw(all watch dry-run dedupe)) { |
| die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; |
| } |
| } else { |
| @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); |
| } |
| $opt->{'dry-run'} && !$opt->{dedupe} and |
| die "E: --dry-run only affects --dedupe\n"; |
| $opt->{fast} && !$opt->{reindex} and |
| die "E: --fast only affects --reindex\n"; |
| |
| PublicInbox::Admin::require_or_die(qw(-search)); |
| PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; |
| PublicInbox::Admin::progress_prepare($opt); |
| my $env = PublicInbox::Admin::index_prepare($opt, $cfg); |
| local %ENV = (%ENV, %$env) if $env; |
| require PublicInbox::ExtSearchIdx; |
| my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); |
| if ($opt->{gc}) { |
| $eidx->attach_config($cfg); |
| $eidx->eidx_gc($opt); |
| } else { |
| if ($opt->{all}) { |
| $eidx->attach_config($cfg); |
| } else { |
| $eidx->attach_config($cfg, \@ibxs); |
| } |
| if ($opt->{watch}) { |
| $cfg = undef; # save memory only after SIGHUP |
| $eidx->eidx_watch($opt); |
| } else { |
| $eidx->eidx_sync($opt); |
| } |
| } |