| #!/usr/bin/perl -T |
| ## ----------------------------------------------------------------------- |
| ## |
| ## Copyright 2011 Intel Corporation; author: H. Peter Anvin |
| ## |
| ## This program is free software; you can redistribute it and/or |
| ## modify it under the terms of the GNU General Public License as |
| ## published by the Free Software Foundation, Inc.; either version 2 |
| ## of the License, or (at your option) any later version; |
| ## incorporated herein by reference. |
| ## |
| ## ----------------------------------------------------------------------- |
| |
| # |
| # kernel.org bulk file upload client |
| # |
| |
| use strict; |
| use warnings; |
| use bytes; |
| use Encode qw(encode decode); |
| use File::Spec; |
| use Config::Simple; |
| |
| my $blksiz = 1024*1024; |
| |
| # Global options |
| my %opt = ( |
| 'rsh' => 'ssh -a -x -k -T', |
| 'host' => undef, |
| 'batch' => 0, |
| 'verbose' => 0, |
| ); |
| |
| # Read the config file settings and override the above |
| my $cfg_file = $ENV{'HOME'}.'/.kuprc'; |
| my $cfg = new Config::Simple($cfg_file); |
| |
| if (defined($cfg)) { |
| # Update %opt with cfgfile settings (only rsh and host vars) |
| my %cfg_opt = $cfg->vars(); |
| |
| if (defined($cfg_opt{'default.host'})) { |
| $opt{'host'} = $cfg_opt{'default.host'}; |
| } |
| |
| if (defined($cfg_opt{'default.rsh'})) { |
| $opt{'rsh'} = $cfg_opt{'default.rsh'}; |
| } |
| } |
| |
| # If anyone's ssh is somewhere other than /bin:/usr/bin, they can specify |
| # where it is by setting up their .kuprc. This also lets us run with -T |
| # without playing untaint tricks. |
| # |
| $ENV{'PATH'} = '/bin:/usr/bin'; |
| |
| if (defined $ENV{'KUP_RSH'}) { |
| $opt{'rsh'} = $ENV{'KUP_RSH'}; |
| } |
| if (defined $ENV{'KUP_HOST'}) { |
| $opt{'host'} = $ENV{'KUP_HOST'}; |
| } |
| delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer |
| |
| # We process the command set twice, once as a dry run and one for real, |
| # to catch as many errors as early as possible |
| my @args; |
| my $real; |
| |
| # Usage description |
| sub usage($) { |
| my($err) = @_; |
| |
| print STDERR "Usage: $0 [global options] command [-- command...]\n"; |
| print STDERR "\n"; |
| print STDERR "Global options:\n"; |
| print STDERR " -b --batch Output command stream to stdout\n"; |
| print STDERR " -e --rsh=command Send output to command, override KUP_RSH\n"; |
| print STDERR " -o --host=[user@]host Connect to [user@]host, override KUP_HOST\n"; |
| print STDERR " -v --verbose Print each command to stderr as it is sent\n"; |
| print STDERR "\n"; |
| print STDERR "Commands:\n"; |
| print STDERR " put local_file signature remote_path\n"; |
| print STDERR " put --tar [--prefix=] remote_tree ref signature remote_path\n"; |
| print STDERR " put --diff remote_tree ref1 ref2 signature remote_path\n"; |
| print STDERR " mkdir remote_path\n"; |
| print STDERR " mv|move old_path new_path\n"; |
| print STDERR " ln|link old_path new_path\n"; |
| print STDERR " rm|del|delete old_path\n"; |
| print STDERR " ls|dir path...\n"; |
| |
| exit $err; |
| } |
| |
| # Return true if the supplied string is valid UTF-8 without special |
| # characters |
| sub is_clean_string($) |
| { |
| no bytes; |
| # use feature 'unicode_strings'; -- is this needed here? |
| |
| my($b) = @_; |
| my $f = decode('UTF-8', $b, Encode::FB_DEFAULT); |
| |
| return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:); |
| return 1; |
| } |
| |
| # This returns true if the given argument is a valid filename in its |
| # canonical form. Double slashes, relative paths, dot files, control |
| # characters, and malformed UTF-8 is not permitted. We cap the length |
| # of each pathname component to 100 bytes so we can add an extension |
| # without worrying about it, and the entire pathname to 1024 bytes. |
| sub is_valid_filename($) |
| { |
| use bytes; |
| |
| my($f) = @_; |
| |
| return 0 if (!defined($f)); # If undefined, clearly not valid |
| |
| return 0 if (length($f) > 1024); # Reject ridiculously long paths |
| return 0 if (!is_clean_string($f)); # Reject bad UTF-8 and control characters |
| return 0 if ($f !~ m:^/:); # Reject relative paths |
| return 0 if ($f =~ m:/$:); # Reject paths ending in / |
| return 0 if ($f =~ m://:); # Reject double slashes |
| |
| # Reject filename components starting with dot or dash, covers . and .. |
| return 0 if ($f =~ m:/[\.\-]:); |
| |
| # Reject undesirable filename characters anywhere in the name. |
| # This isn't inherently security-critical, and could be tuned if |
| # users need it... |
| return 0 if ($f =~ m:[\!\"\$\&\'\*\;\<\>\?\\\`\|]:); |
| |
| # Make sure we can create a filename after adding .bz2 or similar. |
| # We can't use the obvious regexp here, because regexps operate on |
| # characters, not bytes. The limit of 100 is semi-arbitrary, but |
| # we shouldn't need filenames that long. |
| my $n = 0; |
| my $nmax = 0; |
| for (my $i = 0; $i < length($f); $i++) { |
| my $c = substr($f, $i, 1); |
| $n = ($c eq '/') ? 0 : $n+1; |
| $nmax = ($n > $nmax) ? $n : $nmax; |
| } |
| return 0 if ($nmax > 100); |
| |
| return 1; |
| } |
| |
| # Clean up a filename so that it is more likely to pass the |
| # canonicalization test. An optional second argument is used with |
| # two-filename commands (move, link); it should be the already |
| # canonicalized first argument. |
| # |
| # This can return undef for some invalid pathnames. This needs to be |
| # caught by is_valid_filename(). |
| sub canonicalize_path($;$) |
| { |
| my($file, $root) = @_; |
| |
| $root = '/' unless defined($root); |
| |
| my $tail = ''; |
| if ($root =~ m:^(.*/)([^/]+)$:) { |
| $root = $1; |
| $tail = $2; |
| } |
| |
| if ($root !~ m:^/: || $root !~ m:/$:) { |
| die "$0: internal error: non-canonical root\n"; |
| } |
| |
| if ($file !~ m:^/:) { |
| $file = $root . $file; |
| } |
| if ($file =~ m:/$:) { |
| $file .= $tail; |
| } |
| |
| my @path = (); |
| my $wasspc = 1; |
| # The -1 argument to split means "preserve trailing empty fields" |
| foreach my $s (split(/\//, $file, -1)) { |
| if ($s eq '' || $s eq '.') { |
| $wasspc = 1; |
| } elsif ($s eq '..') { |
| # If this ran off the root, error |
| return undef if (!defined(pop(@path))); |
| $wasspc = 1; |
| } else { |
| push(@path, $s); |
| $wasspc = 0; |
| } |
| } |
| |
| # If this ended in a special component, error |
| return undef if ($wasspc); |
| |
| # The initial '' forces the result to begin with a slash |
| return join('/', '', @path); |
| } |
| |
| # Parse global options |
| sub parse_global_options() |
| { |
| while (scalar @ARGV && $ARGV[0] =~ /^-/) { |
| my $arg = shift(@ARGV); |
| |
| if ($arg eq '-b' || $arg eq '--batch') { |
| $opt{'batch'} = 1; |
| } elsif ($arg eq '-e' || $arg eq '--rsh' || $arg eq '--ssh') { |
| $opt{'rsh'} = shift(@ARGV); |
| } elsif ($arg =~ /^--rsh=(.+)$/) { |
| $opt{'rsh'} = $1; |
| } elsif ($arg eq '-o' || $arg eq '--host') { |
| $opt{'host'} = shift(@ARGV); |
| } elsif ($arg =~ /^--host=(.+)$/) { |
| $opt{'host'} = $1; |
| } elsif ($arg eq '-v' || $arg eq '--verbose') { |
| $opt{'verbose'}++; |
| } elsif ($arg eq '-h' || $arg eq '--help') { |
| usage(0); |
| } else { |
| die "$0: unknown option: $arg\n"; |
| } |
| } |
| } |
| |
| # Encode a string |
| sub url_encode($) |
| { |
| my($s) = @_; |
| |
| # Hack to encode an empty string |
| return '%' if ($s eq ''); |
| |
| my $o = ''; |
| |
| foreach my $c (unpack("C*", $s)) { |
| if ($c > 32 && $c < 126 && $c != 37 && $c != 43) { |
| $o .= chr($c); |
| } elsif ($c == 32) { |
| $o .= '+'; |
| } else { |
| $o .= sprintf("%%%02X", $c); |
| } |
| } |
| |
| return $o; |
| } |
| |
| # Configure the output stream |
| sub setup_output() |
| { |
| # In batch mode, we dump the output to stdout so the user can |
| # aggregate it best they wish |
| unless ($opt{'batch'}) { |
| if ($opt{'rsh'} !~ /^([-a-zA-Z0-9._=\@:\s\/]+)$/) { |
| die "$0: suspicious KUP_RSH setting\n"; |
| } |
| my $rsh = $1; |
| if ($opt{'host'} !~ /^([-a-zA-Z0-9._\@]+)$/) { |
| die "$0: suspicious KUP_HOST\n"; |
| } |
| $rsh .= " \Q$1"; |
| open(STDOUT, '|-', $rsh) |
| or die "$0: cannot execute rsh command ", $rsh, "\n"; |
| } |
| binmode(STDOUT); |
| } |
| |
| # Terminate the output process |
| sub close_output() |
| { |
| $| = 1; # Flush STDOUT |
| unless ($opt{'batch'}) { |
| close(STDOUT); |
| } |
| } |
| |
| # Print a command to STDOUT, and if requested, to STDERR |
| sub command(@) |
| { |
| if ($real) { |
| my $cmd = join(' ', @_); |
| |
| print STDERR $cmd, "\n" if ($opt{'verbose'}); |
| print $cmd, "\n"; |
| } |
| } |
| |
| sub cat_file($$$) |
| { |
| my($cmd, $file, $fmt) = @_; |
| |
| if (!defined($fmt)) { |
| if ($file =~ /\.((gz|bz2|xz))$/) { |
| $fmt = $1; |
| } else { |
| $fmt = '%'; |
| } |
| } |
| |
| my $data; |
| open($data, '<', $file) |
| or die "$0: cannot open: $file: $!\n"; |
| if (! -f $data) { |
| die "$0: not a plain file: $file\n"; |
| } |
| my $size = -s _; |
| |
| binmode($data); |
| |
| if ($real) { |
| if ($size < 2) { |
| # Must be a plain file |
| $fmt = '%'; |
| } |
| |
| if (defined($fmt)) { |
| command($cmd, $size, $fmt); |
| } |
| |
| my $blk; |
| my $len; |
| |
| while ($size) { |
| $len = ($size < $blksiz) ? $size : $blksiz; |
| $len = read($data, $blk, $len); |
| |
| if (!$len) { |
| die "$0: premature end of data (file changed?): $file\n"; |
| } |
| |
| print $blk; |
| $size -= $len; |
| } |
| } |
| |
| close($data); |
| } |
| |
| # PUT command |
| sub cmd_put() |
| { |
| my $file = shift @args; |
| my $file_tail = undef; |
| |
| if ($file eq '-t' || $file eq '--tar') { |
| # tar hack |
| |
| my $remote_tree = shift @args; |
| my $prefix = ''; |
| |
| if ($remote_tree eq '-p' || $remote_tree eq '--prefix') { |
| $prefix = shift @args; |
| $remote_tree = shift @args; |
| } elsif ($remote_tree =~ /^--prefix=(.+)$/) { |
| $prefix = $1; |
| $remote_tree = shift @args; |
| } |
| |
| my $ref = shift(@args); |
| |
| if (!defined($ref)) { |
| usage(1); |
| } |
| |
| my $xrt = $remote_tree; |
| $remote_tree = canonicalize_path($remote_tree); |
| if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) { |
| die "$0: invalid path name for git tree: $xrt\n"; |
| } |
| if (!is_clean_string($ref)) { |
| die "$0: invalid ref: $ref\n"; |
| } |
| |
| command('TAR', url_encode($remote_tree), |
| url_encode($ref), url_encode($prefix)); |
| } elsif ($file eq '-d' || $file eq '--diff') { |
| # diff hack |
| |
| my $remote_tree = shift @args; |
| my $prefix = ''; |
| |
| my $ref1 = shift(@args); |
| my $ref2 = shift(@args); |
| |
| if (!defined($ref2)) { |
| usage(1); |
| } |
| |
| my $xrt = $remote_tree; |
| $remote_tree = canonicalize_path($remote_tree); |
| if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) { |
| die "$0: invalid path name for git tree: $xrt\n"; |
| } |
| if (!is_clean_string($ref1)) { |
| die "$0: invalid ref: $ref1\n"; |
| } |
| if (!is_clean_string($ref2)) { |
| die "$0: invalid ref: $ref2\n"; |
| } |
| |
| command('DIFF', url_encode($remote_tree), url_encode($ref1), |
| url_encode($ref2)); |
| } elsif ($file =~ /^-/) { |
| die "$0: unknown option to put command: $file\n"; |
| } else { |
| # Plain data blob. We don't actively attempt to compress it |
| # since ssh usually has a layer of compression, but if it is |
| # already a compressed file we send it as-is and let the |
| # server decompress it. |
| |
| cat_file('DATA', $file, undef); |
| |
| # Get the local filename without directory |
| my($vol, $dir); |
| ($vol, $dir, $file_tail) = File::Spec->splitpath($file); |
| } |
| |
| my $sign = shift @args; |
| my $remote = shift @args; |
| |
| if (!defined($remote)) { |
| usage(1); |
| } |
| |
| # This allows the user to not specify the filename if it is |
| # the same as on the local filesystem by ending the pathname |
| # with a slash |
| if ($remote =~ m:/$: && defined($file_tail)) { |
| $remote .= $file_tail; |
| } |
| |
| my $xrt = $remote; |
| $remote = canonicalize_path($remote); |
| if (!is_valid_filename($remote)) { |
| die "$0: invalid pathname: $xrt\n"; |
| } |
| |
| if ($remote =~ /\.sign$/) { |
| die "$0: target filename cannot end in .sign\n"; |
| } |
| |
| # DWIM: .bz2, .xz -> .gz |
| $remote =~ s/\.(bz2|xz)$/.gz/; |
| |
| cat_file('SIGN', $sign, undef); |
| command('PUT', url_encode($remote)); |
| } |
| |
| # MKDIR command |
| sub cmd_mkdir() |
| { |
| my $remote = shift @args; |
| |
| if (!defined($remote)) { |
| usage(1); |
| } |
| |
| my $xrt = $remote; |
| $remote = canonicalize_path($remote); |
| if (!is_valid_filename($remote)) { |
| die "$0: invalid pathname: $xrt\n"; |
| } |
| |
| if ($remote =~ /\.(sign|gz|bz2|xz)$/) { |
| die "$0: a directory name cannot end in .sign, .gz, .bz2, .xz\n"; |
| } |
| |
| command('MKDIR', url_encode($remote)); |
| } |
| |
| # DELETE command |
| sub cmd_delete() |
| { |
| my $remote = shift @args; |
| |
| if (!defined($remote)) { |
| usage(1); |
| } |
| |
| my $xrt = $remote; |
| $remote = canonicalize_path($remote); |
| if (!is_valid_filename($remote)) { |
| die "$0: invalid pathname: $xrt\n"; |
| } |
| |
| if ($remote =~ /\.sign$/) { |
| die "$0: cannot delete .sign files directly\n"; |
| } |
| |
| # DWIM: .bz2, .xz -> .gz |
| $remote =~ s/\.(bz2|xz)$/.gz/; |
| |
| command('DELETE', url_encode($remote)); |
| } |
| |
| # MOVE or LINK command |
| sub cmd_move_link($) |
| { |
| my($cmd) = @_; |
| |
| my $from = shift @args; |
| my $to = shift @args; |
| |
| if (!defined($to)) { |
| usage(1); |
| } |
| |
| my $xrt = $from; |
| $from = canonicalize_path($from); |
| if (!is_valid_filename($from)) { |
| die "$0: invalid pathname: $xrt\n"; |
| } |
| |
| $xrt = $to; |
| $to = canonicalize_path($to, $from); |
| if (!is_valid_filename($to)) { |
| die "$0: invalid pathname: $xrt\n"; |
| } |
| |
| if ($from =~ /\.sign$/ || $to =~ /\.sign$/) { |
| die "$0: cannot explicitly move .sign files\n"; |
| } |
| if ($from =~ /\.(gz|bz2|xz)$/ && $to =~ /\.(gz|bz2|xz)$/) { |
| $from =~ s/\.(bz2|xz)$/.gz/; |
| $to =~ s/\.(bz2|xz)$/.gz/; |
| } elsif ($from =~ /\.(gz|bz2|xz)$/ || $to =~ /\.(gz|bz2|xz)$/) { |
| die "$0: cannot move to or from compressed filenames\n"; |
| } |
| |
| if ($from eq $to) { |
| die "$0: moving filename to self: $from\n"; |
| } |
| |
| command($cmd, url_encode($from), url_encode($to)); |
| } |
| |
| # DIR command (supports arbitrary number of arguments) |
| sub cmd_dir() |
| { |
| while (defined($args[0]) && $args[0] ne '--') { |
| my $d = shift @args; |
| $d =~ s:/$::g; |
| if ($d ne '') { |
| my $xrt = $d; |
| $d = canonicalize_path($d); |
| if (!is_valid_filename($d)) { |
| die "$0: invalid pathname: $xrt\n"; |
| } |
| } |
| $d .= '/'; |
| |
| command('DIR', $d); |
| } |
| } |
| |
| # Process commands |
| sub process_commands() |
| { |
| while (1) { |
| my $cmd = shift(@args); |
| |
| if (!defined($cmd)) { |
| usage(1); |
| } |
| |
| $cmd = "\L${cmd}"; |
| |
| if ($cmd eq 'put') { |
| cmd_put(); |
| } elsif ($cmd eq 'mkdir') { |
| cmd_mkdir(); |
| } elsif ($cmd eq 'move' || $cmd eq 'mv') { |
| cmd_move_link('MOVE'); |
| } elsif ($cmd eq 'link' || $cmd eq 'ln') { |
| cmd_move_link('LINK'); |
| } elsif ($cmd eq 'delete' || $cmd eq 'del' || $cmd eq 'rm') { |
| cmd_delete(); |
| } elsif ($cmd eq 'ls' || $cmd eq 'dir') { |
| cmd_dir(); |
| } else { |
| die "$0: unknown command: $cmd\n"; |
| } |
| |
| my $sep = shift(@args); |
| |
| last if (!defined($sep)); # End of command line |
| |
| if ($sep ne '--') { |
| die "$0: garbage at end of $cmd command\n"; |
| } |
| } |
| } |
| |
| # Main program |
| parse_global_options(); |
| |
| if (!defined($opt{'host'})) { |
| die "$0: please specify --host, KUP_HOST, or set up ~/.kuprc\n"; |
| } |
| |
| # "Dry run" pass |
| $real = 0; |
| @args = @ARGV; |
| process_commands(); |
| |
| # Establish output stream |
| setup_output(); |
| |
| # "Real" pass |
| $real = 1; |
| @args = @ARGV; |
| process_commands(); |
| |
| # Close the output to allow the child process to complete |
| close_output(); |
| |
| exit 0; |
| |
| # vim: noet |