| #!perl -w |
| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> |
| use strict; |
| use v5.10.1; |
| use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; |
| use autodie qw(open chmod close rename); |
| use Fcntl qw(:DEFAULT); |
| my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: |
| usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] |
| |
| Initialize a public-inbox |
| |
| required arguments: |
| |
| NAME the name of the inbox |
| INBOX_DIR pathname the inbox |
| HTTP_URL HTTP (or HTTPS) URL |
| ADDRESS email address(es), may be specified multiple times |
| |
| options: |
| |
| -V2 use scalable public-inbox-v2-format(5) |
| -L LEVEL index level `basic', `medium', or `full' (default: full) |
| --ng NEWSGROUP set NNTP newsgroup name |
| -c KEY=VALUE set additional config option(s) |
| --skip-artnum=NUM NNTP article numbers to skip |
| --skip-epoch=NUM epochs to skip (-V2 only) |
| -j JOBS number of indexing jobs (-V2 only), (default: 4) |
| |
| See public-inbox-init(1) man page for full documentation. |
| EOF |
| |
| require PublicInbox::Admin; |
| PublicInbox::Admin::require_or_die('-base'); |
| |
| my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help); |
| my $skip_docdata; |
| my $ng = ''; |
| my (@c_extra, @chdir); |
| my %opts = ( |
| 'V|version=i' => \$version, |
| 'L|index-level|indexlevel=s' => \$indexlevel, |
| 'S|skip|skip-epoch=i' => \$skip_epoch, |
| 'skip-artnum=i' => \$skip_artnum, |
| 'j|jobs=i' => \$jobs, |
| 'ng|newsgroup=s' => \$ng, |
| 'skip-docdata' => \$skip_docdata, |
| 'help|h' => \$show_help, |
| 'c=s@' => \@c_extra, |
| 'C=s@' => \@chdir, |
| ); |
| my $usage_cb = sub { |
| print STDERR $help; |
| exit 1; |
| }; |
| GetOptions(%opts) or $usage_cb->(); |
| if ($show_help) { print $help; exit 0 }; |
| my $name = shift @ARGV or $usage_cb->(); |
| my $inboxdir = shift @ARGV or $usage_cb->(); |
| my $http_url = shift @ARGV or $usage_cb->(); |
| my (@address) = @ARGV; |
| @address or $usage_cb->(); |
| PublicInbox::Admin::do_chdir(\@chdir); |
| |
| @c_extra = map { |
| my ($k, $v) = split(/=/, $_, 2); |
| defined($v) or die "Usage: -c KEY=VALUE\n"; |
| $k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n"; |
| $k = lc($k); |
| if ($k eq 'newsgroup') { |
| die "newsgroup already set ($ng)\n" if $ng ne ''; |
| $ng = $v; |
| (); |
| } elsif ($k eq 'address') { |
| push @address, $v; # for conflict checking |
| (); |
| } elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) { |
| die "$k not allowed via -c $_\n" |
| } elsif ($k eq 'indexlevel') { |
| defined($indexlevel) and |
| die "indexlevel already set ($indexlevel)\n"; |
| $indexlevel = $v; |
| (); |
| } else { |
| $_ |
| } |
| } @c_extra; |
| |
| PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; |
| |
| $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and |
| die "--newsgroup `$ng' is not valid\n"; |
| ($ng =~ m!\A\.! || $ng =~ m!\.\z!) and |
| die "--newsgroup `$ng' must not start or end with `.'\n"; |
| |
| require PublicInbox::Config; |
| my $pi_config = PublicInbox::Config->default_file; |
| my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!); |
| require File::Path; |
| File::Path::mkpath($dir); # will croak on fatal errors |
| |
| # first, we grab a flock to prevent simultaneous public-inbox-init |
| # processes from trampling over each other, or exiting with 255 on |
| # O_EXCL failure below. This gets unlocked automatically on exit: |
| require PublicInbox::Lock; |
| my $lock_obj = { lock_path => "$pi_config.flock" }; |
| PublicInbox::Lock::lock_acquire($lock_obj); |
| |
| # git-config will operate on this (and rename on success): |
| require File::Temp; |
| my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir); |
| |
| # Now, we grab another lock to use git-config(1) locking, so it won't |
| # wait on the lock, unlike some of our internal flock()-based locks. |
| # This is to prevent direct git-config(1) usage from clobbering our |
| # changes. |
| my $lockfile = "$pi_config.lock"; |
| my $lockfh; |
| sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { |
| warn "could not open config file: $lockfile: $!\n"; |
| exit(255); |
| }; |
| require PublicInbox::OnDestroy; |
| my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile }); |
| my $perm = 0644 & ~umask; |
| my %seen; |
| if (-e $pi_config) { |
| require PublicInbox::IO; |
| open(my $oh, '<', $pi_config); |
| my @st = stat($oh) or die "(f)stat failed on $pi_config: $!\n"; |
| $perm = $st[2]; |
| chmod($perm & 07777, $fh); |
| print $fh PublicInbox::IO::read_all($oh); |
| close $oh; |
| |
| # yes, this conflict checking is racy if multiple instances of this |
| # script are run by the same $PI_DIR |
| my $cfg = PublicInbox::Config->new; |
| my $conflict; |
| foreach my $addr (@address) { |
| my $found = $cfg->lookup($addr); |
| if ($found) { |
| if ($found->{name} ne $name) { |
| print STDERR |
| "`$addr' already defined for ", |
| "`$found->{name}',\n", |
| "does not match intend `$name'\n"; |
| $conflict = 1; |
| } else { |
| $seen{lc($addr)} = 1; |
| } |
| } |
| } |
| |
| exit(1) if $conflict; |
| |
| my $ibx = $cfg->lookup_name($name); |
| $indexlevel //= $ibx->{indexlevel} if $ibx; |
| } |
| my $pi_config_tmp = $fh->filename; |
| close($fh); |
| |
| my $pfx = "publicinbox.$name"; |
| my @x = (qw/git config/, "--file=$pi_config_tmp"); |
| |
| $inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); |
| die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; |
| |
| if (-f "$inboxdir/inbox.lock") { |
| if (!defined $version) { |
| $version = 2; |
| } elsif ($version != 2) { |
| die "$inboxdir is a -V2 inbox, -V$version specified\n" |
| } |
| } elsif (-d "$inboxdir/objects") { |
| if (!defined $version) { |
| $version = 1; |
| } elsif ($version != 1) { |
| die "$inboxdir is a -V1 inbox, -V$version specified\n" |
| } |
| } |
| |
| $version = 1 unless defined $version; |
| |
| if ($version == 1 && defined $skip_epoch) { |
| die "--skip-epoch is only supported for -V2 inboxes\n"; |
| } |
| |
| my $ibx = PublicInbox::Inbox->new({ |
| inboxdir => $inboxdir, |
| name => $name, |
| version => $version, |
| -primary_address => $address[0], |
| indexlevel => $indexlevel, |
| }); |
| |
| my $creat_opt = {}; |
| if (defined $jobs) { |
| die "--jobs is only supported for -V2 inboxes\n" if $version == 1; |
| die "--jobs=$jobs must be >= 1\n" if $jobs <= 0; |
| $creat_opt->{nproc} = $jobs; |
| } |
| |
| require PublicInbox::InboxWritable; |
| $ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt); |
| if ($skip_docdata) { |
| $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb |
| $ibx->{indexlevel} eq 'basic' and |
| die "--skip-docdata ignored with --indexlevel=basic\n"; |
| $ibx->{-skip_docdata} = $skip_docdata; |
| } |
| $ibx->init_inbox(0, $skip_epoch, $skip_artnum); |
| |
| my $f = "$inboxdir/description"; |
| if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) { |
| print $fh "public inbox for $address[0]\n"; |
| close $fh; |
| } |
| |
| # needed for git prior to v2.1.0 |
| umask(0077); |
| |
| require PublicInbox::Spawn; |
| PublicInbox::Spawn->import(qw(run_die)); |
| |
| foreach my $addr (@address) { |
| next if $seen{lc($addr)}; |
| run_die([@x, "--add", "$pfx.address", $addr]); |
| } |
| run_die([@x, "$pfx.url", $http_url]); |
| run_die([@x, "$pfx.inboxdir", $inboxdir]); |
| |
| if (defined($indexlevel)) { |
| run_die([@x, "$pfx.indexlevel", $indexlevel]); |
| } |
| run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; |
| |
| for my $kv (@c_extra) { |
| my ($k, $v) = split(/=/, $kv, 2); |
| # git 2.30+ has --fixed-value for idempotent invocations, |
| # but that's too new to depend on in 2021. Perl quotemeta |
| # seems compatible enough for POSIX ERE which git uses |
| my $re = '^'.quotemeta($v).'$'; |
| run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]); |
| } |
| |
| # needed for git prior to v2.1.0 |
| chmod($perm & 07777, $pi_config_tmp); |
| rename $pi_config_tmp, $pi_config; |
| undef $auto_unlink; # trigger ->DESTROY |