| #!/usr/bin/perl -T |
| # |
| # kernel.org bulk file upload client |
| # |
| |
| use strict; |
| use warnings; |
| use bytes; |
| use Encode qw(encode decode); |
| |
| my $blksiz = 1024*1024; |
| |
| # Global options |
| my %opt = ( |
| 'rsh' => 'ssh -a -x -k -T upload.kernel.org', |
| 'batch' => 0, |
| ); |
| |
| # This is a client, and so running with tainting on is a bit overly |
| # paranoid. As a result we have to explicitly untaint certain bits from |
| # the environment. |
| sub untaint($) { |
| my($s) = @_; |
| |
| $s =~ /^(.*)$/; |
| return $1; |
| } |
| |
| $ENV{'PATH'} = untaint($ENV{'PATH'}); |
| |
| if (defined $ENV{'KUP_RSH'}) { |
| $opt{'rsh'} = untaint($ENV{'KUP_RSH'}); |
| } |
| |
| # We process the command set twice, once as a dry run and one for real, |
| # to catch as many errors as early as possible |
| my @args; |
| my $real; |
| |
| # Usage description |
| sub usage($) { |
| my($err) = @_; |
| |
| print STDERR "Usage: $0 [global options] command [-- command...]\n"; |
| print STDERR "\n"; |
| print STDERR "Global options:\n"; |
| print STDERR " -b --batch Output command stream to stdout\n"; |
| print STDERR " -e --rsh=command Send output to command, override KUP_RSH\n"; |
| print STDERR "\n"; |
| print STDERR "Commands:\n"; |
| print STDERR " put local_file signature remote_path\n"; |
| print STDERR " put --tar [--prefix=] remote_tree ref signature remote_path\n"; |
| print STDERR " put --diff remote_tree ref1 ref2 signature remote_path\n"; |
| print STDERR " mkdir remote_path\n"; |
| print STDERR " mv|move old_path new_path\n"; |
| print STDERR " ln|link old_path new_path\n"; |
| print STDERR " rm|delete old_path\n"; |
| |
| exit $err; |
| } |
| |
| # Return true if the supplied string is valid UTF-8 without special |
| # characters |
| sub is_clean_string($) |
| { |
| no bytes; |
| # use feature 'unicode_strings'; -- is this needed here? |
| |
| my($b) = @_; |
| my $f = decode('UTF-8', $b, Encode::FB_DEFAULT); |
| |
| return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:); |
| return 1; |
| } |
| |
| # This returns true if the given argument is a valid filename in its |
| # canonical form. Double slashes, relative paths, dot files, control |
| # characters, and malformed UTF-8 is not permitted. We cap the length |
| # of each pathname component to 100 bytes to we can add an extension |
| # without worrying about it, and the entire pathname to 1024 bytes. |
| sub is_valid_filename($) |
| { |
| use bytes; |
| |
| my($f) = @_; |
| |
| return 0 if (length($f) > 1024); # Reject ridiculously long paths |
| return 0 if (!is_clean_string($f)); # Reject bad UTF-8 and control characters |
| return 0 if ($f !~ m:^/:); # Reject relative paths |
| return 0 if ($f =~ m:/$:); # Reject paths ending in / |
| return 0 if ($f =~ m://:); # Reject double slashes |
| |
| # Reject filename components starting with dot or dash, covers . and .. |
| return 0 if ($f =~ m:/[\.\-]:); |
| |
| # Reject undesirable filename characters anywhere in the name. |
| # This isn't inherently security-critical, and could be tuned if |
| # users need it... |
| return 0 if ($f =~ m:[\!\"\$\&\'\*\;\<\>\?\\\`\|]:); |
| |
| # Make sure we can create a filename after adding .bz2 or similar. |
| # We can't use the obvious regexp here, because regexps operate on |
| # characters, not bytes. The limit of 100 is semi-arbitrary, but |
| # we shouldn't need filenames that long. |
| my $n = 0; |
| my $nmax = 0; |
| for (my $i = 0; $i < length($f); $i++) { |
| my $c = substr($f, $i, 1); |
| $n = ($c eq '/') ? 0 : $n+1; |
| $nmax = ($n > $nmax) ? $n : $nmax; |
| } |
| return 0 if ($nmax > 100); |
| |
| return 1; |
| } |
| |
| # Parse global options |
| sub parse_global_options() |
| { |
| while (scalar @ARGV && $ARGV[0] =~ /^-/) { |
| my $arg = shift(@ARGV); |
| |
| if ($arg eq '-b' || $arg eq '--batch') { |
| $opt{'batch'} = 1; |
| } elsif ($arg eq '-e' || $arg eq '--rsh') { |
| $opt{'rsh'} = shift(@ARGV); |
| } elsif ($arg eq '-h' || $arg eq '--help') { |
| usage(0); |
| } elsif ($arg =~ /^(\-e|\-\-rsh\=)(.+)$/) { |
| $opt{'rsh'} = $2; |
| } else { |
| die "$0: unknown option: $arg\n"; |
| } |
| } |
| } |
| |
| # Encode a string |
| sub url_encode($) |
| { |
| my($s) = @_; |
| |
| # Hack to encode an empty string |
| return '%' if ($s eq ''); |
| |
| my $o = ''; |
| |
| foreach my $c (unpack("C*", $s)) { |
| if ($c > 32 && $c < 126 && $c != 37 && $c != 43) { |
| $o .= chr($c); |
| } elsif ($c == 32) { |
| $o .= '+'; |
| } else { |
| $o .= sprintf("%%%02X", $c); |
| } |
| } |
| |
| return $o; |
| } |
| |
| # Configure the output stream |
| sub setup_output() |
| { |
| # In batch mode, we dump the output to stdout so the user can |
| # aggregate it best they wish |
| unless ($opt{'batch'}) { |
| open(STDOUT, '|-', $opt{'rsh'}) |
| or die "$0: cannot execute rsh command ", $opt{'rsh'}, "\n"; |
| } |
| binmode(STDOUT); |
| } |
| |
| # Terminate the output process |
| sub close_output() |
| { |
| $| = 1; # Flush STDOUT |
| unless ($opt{'batch'}) { |
| close(STDOUT); |
| } |
| } |
| |
| sub get_data_format($) |
| { |
| my($data) = @_; |
| |
| my $magic2 = substr($data, 0, 2); |
| my $magic4 = substr($data, 0, 4); |
| my $magic6 = substr($data, 0, 6); |
| |
| my $fmt = '%'; # Meaning straight binary |
| |
| if ($magic2 eq "\037\213") { |
| $fmt = 'gz'; |
| } elsif ($magic4 =~ /^BZh[1-9]$/) { |
| # The primary bzip2 magic is so crappy, so look |
| # for the magic number of the first packet |
| # (either a compression packet or an end of file packet.) |
| # Funny enough, the magics on the packets are better |
| # than the magics on the file format, and even so |
| # they managed to pick a magic for the compression |
| # packet which has no non-ASCII bytes in it... |
| |
| my $submagic = substr($data, 4, 6); |
| |
| if ($submagic eq "\x31\x41\x59\x26\x53\x59" || |
| $submagic eq "\x17\x72\x45\x38\x50\x90") { |
| $fmt = 'bz2'; |
| } |
| } elsif ($magic6 eq "\x{fd}7zXZ\0") { |
| $fmt = 'xz'; |
| } |
| |
| return $fmt; |
| } |
| |
| sub cat_file($$$) |
| { |
| my($cmd, $file, $fmt) = @_; |
| |
| my $data; |
| open($data, '<', $file) |
| or die "$0: cannot open: $file: $!\n"; |
| if (! -f $data) { |
| die "$0: not a plain file: $file\n"; |
| } |
| my $size = -s _; |
| |
| binmode($data); |
| |
| if ($real) { |
| if ($size < 2) { |
| # Must be a plain file |
| $fmt = '%'; |
| } |
| |
| if (defined($fmt)) { |
| print "${cmd} ${size} ${fmt}\n"; |
| } |
| |
| my $blk; |
| my $len; |
| |
| while ($size) { |
| $len = ($size < $blksiz) ? $size : $blksiz; |
| $len = read($data, $blk, $len); |
| |
| if (!$len) { |
| die "$0: premature end of data (file changed?): $file\n"; |
| } |
| |
| if (!defined($fmt)) { |
| $fmt = get_data_format($blk); |
| print "${cmd} ${file} ${fmt}\n"; |
| } |
| |
| print $blk; |
| $size -= $len; |
| } |
| } |
| |
| close($data); |
| } |
| |
| # PUT command |
| sub cmd_put() |
| { |
| my $file = shift @args; |
| |
| if ($file eq '-t' || $file eq '--tar') { |
| # tar hack |
| |
| my $remote_tree = shift @args; |
| my $prefix = ''; |
| |
| if ($remote_tree eq '-p' || $remote_tree eq '--prefix') { |
| $prefix = shift @args; |
| $remote_tree = shift @args; |
| } elsif ($remote_tree =~ /^(\-p|\-\-prefix=)(.+)$/) { |
| $prefix = $2; |
| $remote_tree = shift @args; |
| } |
| |
| my $ref = shift(@args); |
| |
| if (!defined($ref)) { |
| usage(1); |
| } |
| if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) { |
| die "$0: invalid path name for git tree: $remote_tree\n"; |
| } |
| if (!is_clean_string($ref)) { |
| die "$0: invalid ref: $ref\n"; |
| } |
| |
| if ($real) { |
| print 'TAR ', url_encode($remote_tree), ' ', |
| url_encode($ref), ' ', url_encode($prefix), "\n"; |
| } |
| } elsif ($file eq '-d' || $file eq '--diff') { |
| # diff hack |
| |
| my $remote_tree = shift @args; |
| my $prefix = ''; |
| |
| my $ref1 = shift(@args); |
| my $ref2 = shift(@args); |
| |
| if (!defined($ref2)) { |
| usage(1); |
| } |
| if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) { |
| die "$0: invalid path name for git tree: $remote_tree\n"; |
| } |
| if (!is_clean_string($ref1)) { |
| die "$0: invalid ref: $ref1\n"; |
| } |
| if (!is_clean_string($ref2)) { |
| die "$0: invalid ref: $ref2\n"; |
| } |
| |
| if ($real) { |
| print 'DIFF ', url_encode($remote_tree), ' ', |
| url_encode($ref1), ' ', url_encode($ref2), "\n"; |
| } |
| } elsif ($file =~ /^\-/) { |
| die "$0: unknown option to put command: $file\n"; |
| } else { |
| # Plain data blob. We don't actively attempt to compress it |
| # since ssh usually has a layer of compression, but if it is |
| # already a compressed file we send it as-is and let the |
| # server decompress it. |
| |
| cat_file('DATA', $file, undef); |
| } |
| |
| my $sign = shift @args; |
| my $remote = shift @args; |
| |
| if (!defined($remote)) { |
| usage(1); |
| } |
| |
| if (!is_valid_filename($remote)) { |
| die "$0: invalid pathname: $remote\n"; |
| } |
| |
| if ($remote =~ /\.sign$/) { |
| die "$0: target filename cannot end in .sign\n"; |
| } |
| |
| # DWIM: .bz2, .xz -> .gz |
| $remote =~ s/\.(bz2|xz)$/\.gz/; |
| |
| cat_file('SIGN', $sign, undef); |
| |
| if ($real) { |
| print 'PUT ', url_encode($remote), "\n"; |
| } |
| } |
| |
| # MKDIR command |
| sub cmd_mkdir() |
| { |
| my $remote = shift @args; |
| |
| if (!defined($remote)) { |
| usage(1); |
| } |
| |
| if (!is_valid_filename($remote)) { |
| die "$0: invalid pathname: $remote\n"; |
| } |
| |
| if ($remote =~ /\.(sign|gz|bz2|xz)$/) { |
| die "$0: a directory name cannot end in .sign, .gz, .bz2, .xz\n"; |
| } |
| |
| if ($real) { |
| print 'MKDIR ', url_encode($remote), "\n"; |
| } |
| } |
| |
| # DELETE command |
| sub cmd_delete() |
| { |
| my $remote = shift @args; |
| |
| if (!defined($remote)) { |
| usage(1); |
| } |
| |
| if (!is_valid_filename($remote)) { |
| die "$0: invalid pathname: $remote\n"; |
| } |
| |
| if ($remote =~ /\.sign$/) { |
| die "$0: cannot delete .sign files directly\n"; |
| } |
| |
| # DWIM: .bz2, .xz -> .gz |
| $remote =~ s/\.(bz2|xz)$/\.gz/; |
| |
| if ($real) { |
| print 'DELETE ', url_encode($remote), "\n"; |
| } |
| } |
| |
| # MOVE or LINK command |
| sub cmd_move_link($) |
| { |
| my($cmd) = @_; |
| |
| my $from = shift @args; |
| my $to = shift @args; |
| |
| if (!defined($to)) { |
| usage(1); |
| } |
| |
| if (!is_valid_filename($from)) { |
| die "$0: invalid pathname: $from\n"; |
| } |
| |
| if ($to =~ /\/$/) { |
| # Syntactic sugar: allow specifying a directory only by ending in / |
| if ($from =~ m:^(.*/|)([^/]+)$:) { |
| $to .= $2; |
| } |
| } |
| |
| if (!is_valid_filename($to)) { |
| die "$0: invalid pathname: $to\n"; |
| } |
| |
| if ($from =~ /\.sign$/ || $to =~ /\.sign$/) { |
| die "$0: cannot explicitly move .sign files\n"; |
| } |
| if ($from =~ /\.(gz|bz2|xz)$/ && $to =~ /\.(gz|bz2|xz)$/) { |
| $from =~ s/\.(bz|bz2|xz)$/\.gz/; |
| $to =~ s/\.(bz|bz2|xz)$/\.gz/; |
| } elsif ($from =~ /\.(gz|bz2|xz)$/ || $to =~ /\.(gz|bz2|xz)$/) { |
| die "$0: cannot move to or from compressed filenames\n"; |
| } |
| |
| if ($from eq $to) { |
| die "$0: moving filename to self: $from\n"; |
| } |
| |
| if ($real) { |
| print $cmd, ' ', url_encode($from), ' ', url_encode($to), "\n"; |
| } |
| } |
| |
| # Process commands |
| sub process_commands() |
| { |
| while (1) { |
| my $cmd = shift(@args); |
| |
| if (!defined($cmd)) { |
| usage(1); |
| } |
| |
| $cmd = "\L${cmd}"; |
| |
| if ($cmd eq 'put') { |
| cmd_put(); |
| } elsif ($cmd eq 'mkdir') { |
| cmd_mkdir(); |
| } elsif ($cmd eq 'move' || $cmd eq 'mv') { |
| cmd_move_link('MOVE'); |
| } elsif ($cmd eq 'link' || $cmd eq 'ln') { |
| cmd_move_link('LINK'); |
| } elsif ($cmd eq 'delete' || $cmd eq 'del' || $cmd eq 'rm') { |
| cmd_delete(); |
| } else { |
| die "$0: unknown command: $cmd\n"; |
| } |
| |
| my $sep = shift(@args); |
| |
| last if (!defined($sep)); # End of command line |
| |
| if ($sep ne '--') { |
| die "$0: garbage at end of $cmd command\n"; |
| } |
| } |
| } |
| |
| # Main program |
| parse_global_options(); |
| |
| # "Dry run" pass |
| $real = 0; |
| @args = @ARGV; |
| process_commands(); |
| |
| # Establish output stream |
| setup_output(); |
| |
| # "Real" pass |
| $real = 1; |
| @args = @ARGV; |
| process_commands(); |
| |
| # Close the output to allow the child process to complete |
| close_output(); |
| |
| exit 0; |