| #!/usr/bin/perl -w |
| # Copyright (C) all contributors <meta@public-inbox.org> |
| # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt> |
| use strict; |
| use v5.10.1; |
| use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); |
| my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: |
| usage: public-inbox-convert [options] OLD NEW |
| |
| convert v1 format inboxes to v2 |
| |
| options: |
| |
| --no-index do not index after conversion |
| --jobs=NUM set shards (NUM=0) |
| --verbose | -v increase verbosity (may be repeated) |
| |
| index options (see public-inbox-index(1) man page for full description): |
| |
| --no-fsync speed up indexing, risk corruption on power outage |
| -L LEVEL `basic', `medium', or `full' (default: full) |
| --compact | -c run public-inbox-compact(1) after indexing |
| --sequential-shard index Xapian shards sequentially for slow storage |
| --batch-size=BYTES flush changes to OS after a given number of bytes |
| --max-size=BYTES do not index messages larger than the given size |
| |
| See public-inbox-convert(1) man page for full documentation. |
| EOF |
| |
| my $opt = { |
| index => 1, |
| # index defaults: |
| quiet => -1, compact => 0, maxsize => undef, fsync => 1, |
| reindex => 1, # we always reindex |
| }; |
| GetOptions($opt, qw(jobs|j=i index! help|h C=s@), |
| # index options |
| qw(verbose|v+ rethread compact|c+ fsync|sync! |
| indexlevel|index-level|L=s max_size|max-size=s |
| batch_size|batch-size=s |
| sequential-shard|seq-shard |
| )) or die $help; |
| if ($opt->{help}) { print $help; exit 0 }; |
| require PublicInbox::Admin; |
| PublicInbox::Admin::do_chdir(delete $opt->{C}); |
| my $old_dir = shift(@ARGV) // ''; |
| my $new_dir = shift(@ARGV) // ''; |
| die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); |
| die "$new_dir exists\n" if -d $new_dir; |
| die "$old_dir not a directory\n" unless -d $old_dir; |
| |
| require PublicInbox::Config; |
| require PublicInbox::InboxWritable; |
| |
| my $cfg = PublicInbox::Config->new; |
| my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); |
| @old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", |
| map { "\t$_->{inboxdir}\n" } @old; |
| my $old = PublicInbox::InboxWritable->new($old[0]); |
| if (delete $old->{-unconfigured}) { |
| warn "W: $old_dir not configured in " . |
| PublicInbox::Config::default_file() . "\n"; |
| } |
| die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; |
| |
| my $detected = $old->detect_indexlevel; |
| $old->{indexlevel} //= $detected; |
| my $env; |
| if ($opt->{'index'}) { |
| my $mods = {}; |
| PublicInbox::Admin::scan_ibx_modules($mods, $old); |
| PublicInbox::Admin::require_or_die(keys %$mods); |
| PublicInbox::Admin::progress_prepare($opt); |
| $env = PublicInbox::Admin::index_prepare($opt, $cfg); |
| } |
| local %ENV = (%$env, %ENV) if $env; |
| my $new = { %$old }; |
| $new->{inboxdir} = PublicInbox::Config::rel2abs_collapsed($new_dir); |
| $new->{version} = 2; |
| $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); |
| $new->{-no_fsync} = 1 if !$opt->{fsync}; |
| my $v2w; |
| |
| sub link_or_copy ($$) { |
| my ($src, $dst) = @_; |
| link($src, $dst) and return; |
| $!{EXDEV} or warn "link $src, $dst failed: $!, trying cp\n"; |
| require File::Copy; # preserves permissions: |
| File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n"; |
| } |
| |
| { |
| my $restore = $old->with_umask; |
| my $old_cfg = "$old->{inboxdir}/config"; |
| local $ENV{GIT_CONFIG} = $old_cfg; |
| my $new_cfg = "$new->{inboxdir}/all.git/config"; |
| $v2w = $new->importer(1); |
| $v2w->init_inbox(delete $opt->{jobs}); |
| unlink $new_cfg; |
| link_or_copy($old_cfg, $new_cfg); |
| if (my $alt = $new->{altid}) { |
| require PublicInbox::AltId; |
| foreach my $i (0..$#$alt) { |
| my $src = PublicInbox::AltId->new($old, $alt->[$i], 0); |
| $src = $src->mm_alt or next; |
| $src = $src->{dbh}->sqlite_db_filename; |
| my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1); |
| $dst->mm_alt->{dbh}->sqlite_backup_from_file($src); |
| } |
| } |
| my $desc = "$old->{inboxdir}/description"; |
| link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc; |
| my $clone = "$old->{inboxdir}/cloneurl"; |
| warn <<"" if -e $clone; |
| $clone may not be valid after migrating to v2, not copying |
| |
| } |
| my $state = ''; |
| my $head = $old->{ref_head} || 'HEAD'; |
| my $rd = $old->git->popen(qw(fast-export --use-done-feature), $head); |
| $v2w->idx_init($opt); |
| my $im = $v2w->importer; |
| my $io = $im->gfi_start; |
| my $h = '[0-9a-f]'; |
| my %D; |
| my $last; |
| while (<$rd>) { |
| if ($_ eq "blob\n") { |
| $state = 'blob'; |
| } elsif (/^commit /) { |
| $state = 'commit'; |
| } elsif (/^data ([0-9]+)/) { |
| print $io $_ or $im->wfail; |
| print $io PublicInbox::IO::read_all($rd, $1) or $im->wfail; |
| next; |
| } elsif ($state eq 'commit') { |
| if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { |
| my ($mark, $path) = ($1, $2); |
| $D{$path} = $mark; |
| if ($last && $last ne 'm') { |
| print $io "D $last\n" or $im->wfail; |
| } |
| print $io "M 100644 :$mark m\n" or $im->wfail; |
| $last = 'm'; |
| next; |
| } |
| if (m{^D (${h}{2}/${h}{38})}o) { |
| my $mark = delete $D{$1}; |
| defined $mark or die "undeleted path: $1\n"; |
| if ($last && $last ne 'd') { |
| print $io "D $last\n" or $im->wfail; |
| } |
| print $io "M 100644 :$mark d\n" or $im->wfail; |
| $last = 'd'; |
| next; |
| } |
| } |
| last if $_ eq "done\n"; |
| print $io $_ or $im->wfail; |
| } |
| $rd->close or die "fast-export: \$?=$? \$!=$!\n"; |
| $io = undef; |
| $v2w->done; |
| if (my $old_mm = $old->mm) { |
| $old->cleanup; |
| $old_mm = $old_mm->{dbh}->sqlite_db_filename; |
| |
| # we want to trigger a reindex, not a from scratch index if |
| # we're reusing the msgmap from an existing v1 installation. |
| $v2w->idx_init($opt); |
| $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm); |
| |
| my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0)); |
| chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head)); |
| $v2w->last_epoch_commit(0, $cmt); |
| } |
| $v2w->index_sync($opt) if delete $opt->{'index'}; |
| $v2w->done; |