blob: 8b8436fe8130a737bb4b756269ac5b345a0114da [file] [log] [blame]
#!/usr/bin/perl -T
#
# kernel.org bulk file upload client
#
use strict;
use warnings;
use bytes;
use Encode qw(encode decode);
my $blksiz = 1024*1024;
# Global options
my %opt = (
'rsh' => 'ssh -a -x -k -T upload.kernel.org',
'batch' => 0,
);
# This is a client, and so running with tainting on is a bit overly
# paranoid. As a result we have to explicitly untaint certain bits from
# the environment.
sub untaint($) {
my($s) = @_;
$s =~ /^(.*)$/;
return $1;
}
$ENV{'PATH'} = untaint($ENV{'PATH'});
if (defined $ENV{'KUP_RSH'}) {
$opt{'rsh'} = untaint($ENV{'KUP_RSH'});
}
# We process the command set twice, once as a dry run and one for real,
# to catch as many errors as early as possible
my @args;
my $real;
# Usage description
sub usage($) {
my($err) = @_;
print STDERR "Usage: $0 [global options] command [-- command...]\n";
print STDERR "\n";
print STDERR "Global options:\n";
print STDERR " -b --batch Output command stream to stdout\n";
print STDERR " -e --rsh=command Send output to command, override KUP_RSH\n";
print STDERR "\n";
print STDERR "Commands:\n";
print STDERR " put local_file signature remote_path\n";
print STDERR " put --tar [--prefix=] remote_tree ref signature remote_path\n";
print STDERR " put --diff remote_tree ref1 ref2 signature remote_path\n";
print STDERR " mkdir remote_path\n";
print STDERR " mv|move old_path new_path\n";
print STDERR " ln|link old_path new_path\n";
print STDERR " rm|delete old_path\n";
exit $err;
}
# Return true if the supplied string is valid UTF-8 without special
# characters
sub is_clean_string($)
{
no bytes;
# use feature 'unicode_strings'; -- is this needed here?
my($b) = @_;
my $f = decode('UTF-8', $b, Encode::FB_DEFAULT);
return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:);
return 1;
}
# This returns true if the given argument is a valid filename in its
# canonical form. Double slashes, relative paths, dot files, control
# characters, and malformed UTF-8 is not permitted. We cap the length
# of each pathname component to 100 bytes to we can add an extension
# without worrying about it, and the entire pathname to 1024 bytes.
sub is_valid_filename($)
{
use bytes;
my($f) = @_;
return 0 if (length($f) > 1024); # Reject ridiculously long paths
return 0 if (!is_clean_string($f)); # Reject bad UTF-8 and control characters
return 0 if ($f !~ m:^/:); # Reject relative paths
return 0 if ($f =~ m:/$:); # Reject paths ending in /
return 0 if ($f =~ m://:); # Reject double slashes
# Reject filename components starting with dot or dash, covers . and ..
return 0 if ($f =~ m:/[\.\-]:);
# Reject undesirable filename characters anywhere in the name.
# This isn't inherently security-critical, and could be tuned if
# users need it...
return 0 if ($f =~ m:[\!\"\$\&\'\*\;\<\>\?\\\`\|]:);
# Make sure we can create a filename after adding .bz2 or similar.
# We can't use the obvious regexp here, because regexps operate on
# characters, not bytes. The limit of 100 is semi-arbitrary, but
# we shouldn't need filenames that long.
my $n = 0;
my $nmax = 0;
for (my $i = 0; $i < length($f); $i++) {
my $c = substr($f, $i, 1);
$n = ($c eq '/') ? 0 : $n+1;
$nmax = ($n > $nmax) ? $n : $nmax;
}
return 0 if ($nmax > 100);
return 1;
}
# Parse global options
sub parse_global_options()
{
while (scalar @ARGV && $ARGV[0] =~ /^-/) {
my $arg = shift(@ARGV);
if ($arg eq '-b' || $arg eq '--batch') {
$opt{'batch'} = 1;
} elsif ($arg eq '-e' || $arg eq '--rsh') {
$opt{'rsh'} = shift(@ARGV);
} elsif ($arg eq '-h' || $arg eq '--help') {
usage(0);
} elsif ($arg =~ /^(\-e|\-\-rsh\=)(.+)$/) {
$opt{'rsh'} = $2;
} else {
die "$0: unknown option: $arg\n";
}
}
}
# Encode a string
sub url_encode($)
{
my($s) = @_;
# Hack to encode an empty string
return '%' if ($s eq '');
my $o = '';
foreach my $c (unpack("C*", $s)) {
if ($c > 32 && $c < 126 && $c != 37 && $c != 43) {
$o .= chr($c);
} elsif ($c == 32) {
$o .= '+';
} else {
$o .= sprintf("%%%02X", $c);
}
}
return $o;
}
# Configure the output stream
sub setup_output()
{
# In batch mode, we dump the output to stdout so the user can
# aggregate it best they wish
unless ($opt{'batch'}) {
open(STDOUT, '|-', $opt{'rsh'})
or die "$0: cannot execute rsh command ", $opt{'rsh'}, "\n";
}
binmode(STDOUT);
}
# Terminate the output process
sub close_output()
{
$| = 1; # Flush STDOUT
unless ($opt{'batch'}) {
close(STDOUT);
}
}
sub get_data_format($)
{
my($data) = @_;
my $magic2 = substr($data, 0, 2);
my $magic4 = substr($data, 0, 4);
my $magic6 = substr($data, 0, 6);
my $fmt = '%'; # Meaning straight binary
if ($magic2 eq "\037\213") {
$fmt = 'gz';
} elsif ($magic4 =~ /^BZh[1-9]$/) {
# The primary bzip2 magic is so crappy, so look
# for the magic number of the first packet
# (either a compression packet or an end of file packet.)
# Funny enough, the magics on the packets are better
# than the magics on the file format, and even so
# they managed to pick a magic for the compression
# packet which has no non-ASCII bytes in it...
my $submagic = substr($data, 4, 6);
if ($submagic eq "\x31\x41\x59\x26\x53\x59" ||
$submagic eq "\x17\x72\x45\x38\x50\x90") {
$fmt = 'bz2';
}
} elsif ($magic6 eq "\x{fd}7zXZ\0") {
$fmt = 'xz';
}
return $fmt;
}
sub cat_file($$$)
{
my($cmd, $file, $fmt) = @_;
my $data;
open($data, '<', $file)
or die "$0: cannot open: $file: $!\n";
if (! -f $data) {
die "$0: not a plain file: $file\n";
}
my $size = -s _;
binmode($data);
if ($real) {
if ($size < 2) {
# Must be a plain file
$fmt = '%';
}
if (defined($fmt)) {
print "${cmd} ${size} ${fmt}\n";
}
my $blk;
my $len;
while ($size) {
$len = ($size < $blksiz) ? $size : $blksiz;
$len = read($data, $blk, $len);
if (!$len) {
die "$0: premature end of data (file changed?): $file\n";
}
if (!defined($fmt)) {
$fmt = get_data_format($blk);
print "${cmd} ${file} ${fmt}\n";
}
print $blk;
$size -= $len;
}
}
close($data);
}
# PUT command
sub cmd_put()
{
my $file = shift @args;
if ($file eq '-t' || $file eq '--tar') {
# tar hack
my $remote_tree = shift @args;
my $prefix = '';
if ($remote_tree eq '-p' || $remote_tree eq '--prefix') {
$prefix = shift @args;
$remote_tree = shift @args;
} elsif ($remote_tree =~ /^(\-p|\-\-prefix=)(.+)$/) {
$prefix = $2;
$remote_tree = shift @args;
}
my $ref = shift(@args);
if (!defined($ref)) {
usage(1);
}
if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) {
die "$0: invalid path name for git tree: $remote_tree\n";
}
if (!is_clean_string($ref)) {
die "$0: invalid ref: $ref\n";
}
if ($real) {
print 'TAR ', url_encode($remote_tree), ' ',
url_encode($ref), ' ', url_encode($prefix), "\n";
}
} elsif ($file eq '-d' || $file eq '--diff') {
# diff hack
my $remote_tree = shift @args;
my $prefix = '';
my $ref1 = shift(@args);
my $ref2 = shift(@args);
if (!defined($ref2)) {
usage(1);
}
if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) {
die "$0: invalid path name for git tree: $remote_tree\n";
}
if (!is_clean_string($ref1)) {
die "$0: invalid ref: $ref1\n";
}
if (!is_clean_string($ref2)) {
die "$0: invalid ref: $ref2\n";
}
if ($real) {
print 'DIFF ', url_encode($remote_tree), ' ',
url_encode($ref1), ' ', url_encode($ref2), "\n";
}
} elsif ($file =~ /^\-/) {
die "$0: unknown option to put command: $file\n";
} else {
# Plain data blob. We don't actively attempt to compress it
# since ssh usually has a layer of compression, but if it is
# already a compressed file we send it as-is and let the
# server decompress it.
cat_file('DATA', $file, undef);
}
my $sign = shift @args;
my $remote = shift @args;
if (!defined($remote)) {
usage(1);
}
if (!is_valid_filename($remote)) {
die "$0: invalid pathname: $remote\n";
}
if ($remote =~ /\.sign$/) {
die "$0: target filename cannot end in .sign\n";
}
# DWIM: .bz2, .xz -> .gz
$remote =~ s/\.(bz2|xz)$/\.gz/;
cat_file('SIGN', $sign, undef);
if ($real) {
print 'PUT ', url_encode($remote), "\n";
}
}
# MKDIR command
sub cmd_mkdir()
{
my $remote = shift @args;
if (!defined($remote)) {
usage(1);
}
if (!is_valid_filename($remote)) {
die "$0: invalid pathname: $remote\n";
}
if ($remote =~ /\.(sign|gz|bz2|xz)$/) {
die "$0: a directory name cannot end in .sign, .gz, .bz2, .xz\n";
}
if ($real) {
print 'MKDIR ', url_encode($remote), "\n";
}
}
# DELETE command
sub cmd_delete()
{
my $remote = shift @args;
if (!defined($remote)) {
usage(1);
}
if (!is_valid_filename($remote)) {
die "$0: invalid pathname: $remote\n";
}
if ($remote =~ /\.sign$/) {
die "$0: cannot delete .sign files directly\n";
}
# DWIM: .bz2, .xz -> .gz
$remote =~ s/\.(bz2|xz)$/\.gz/;
if ($real) {
print 'DELETE ', url_encode($remote), "\n";
}
}
# MOVE or LINK command
sub cmd_move_link($)
{
my($cmd) = @_;
my $from = shift @args;
my $to = shift @args;
if (!defined($to)) {
usage(1);
}
if (!is_valid_filename($from)) {
die "$0: invalid pathname: $from\n";
}
if ($to =~ /\/$/) {
# Syntactic sugar: allow specifying a directory only by ending in /
if ($from =~ m:^(.*/|)([^/]+)$:) {
$to .= $2;
}
}
if (!is_valid_filename($to)) {
die "$0: invalid pathname: $to\n";
}
if ($from =~ /\.sign$/ || $to =~ /\.sign$/) {
die "$0: cannot explicitly move .sign files\n";
}
if ($from =~ /\.(gz|bz2|xz)$/ && $to =~ /\.(gz|bz2|xz)$/) {
$from =~ s/\.(bz|bz2|xz)$/\.gz/;
$to =~ s/\.(bz|bz2|xz)$/\.gz/;
} elsif ($from =~ /\.(gz|bz2|xz)$/ || $to =~ /\.(gz|bz2|xz)$/) {
die "$0: cannot move to or from compressed filenames\n";
}
if ($from eq $to) {
die "$0: moving filename to self: $from\n";
}
if ($real) {
print $cmd, ' ', url_encode($from), ' ', url_encode($to), "\n";
}
}
# Process commands
sub process_commands()
{
while (1) {
my $cmd = shift(@args);
if (!defined($cmd)) {
usage(1);
}
$cmd = "\L${cmd}";
if ($cmd eq 'put') {
cmd_put();
} elsif ($cmd eq 'mkdir') {
cmd_mkdir();
} elsif ($cmd eq 'move' || $cmd eq 'mv') {
cmd_move_link('MOVE');
} elsif ($cmd eq 'link' || $cmd eq 'ln') {
cmd_move_link('LINK');
} elsif ($cmd eq 'delete' || $cmd eq 'del' || $cmd eq 'rm') {
cmd_delete();
} else {
die "$0: unknown command: $cmd\n";
}
my $sep = shift(@args);
last if (!defined($sep)); # End of command line
if ($sep ne '--') {
die "$0: garbage at end of $cmd command\n";
}
}
}
# Main program
parse_global_options();
# "Dry run" pass
$real = 0;
@args = @ARGV;
process_commands();
# Establish output stream
setup_output();
# "Real" pass
$real = 1;
@args = @ARGV;
process_commands();
# Close the output to allow the child process to complete
close_output();
exit 0;