kup - pub/scm/utils/kup/kup - Git at Google

 #!/usr/bin/perl -T
 ## -----------------------------------------------------------------------
 ##
 ##   Copyright 2011 Intel Corporation; author: H. Peter Anvin
 ##
 ##   This program is free software; you can redistribute it and/or
 ##   modify it under the terms of the GNU General Public License as
 ##   published by the Free Software Foundation, Inc.; either version 2
 ##   of the License, or (at your option) any later version;
 ##   incorporated herein by reference.
 ##
 ## -----------------------------------------------------------------------

 #
 # kernel.org bulk file upload client
 #

 use strict;
 use warnings;
 use bytes;
 use Encode qw(encode decode);
 use File::Spec;
 use Config::Simple;

 my $blksiz = 1024*1024;

 # Global options
 my %opt = (
 	'rsh'	  => 'ssh -a -x -k -T',
 	'host'    => undef,
 	'subcmd'  => undef,
 	'batch'   => 0,
 	'verbose' => 0,
 	);

 # Read the config file settings and override the above
 my $cfg_file = $ENV{'HOME'}.'/.kuprc';
 my $cfg = new Config::Simple($cfg_file);

 if (defined($cfg)) {
 	# Update %opt with cfgfile settings (only rsh, subcmd, and host vars)
 	my %cfg_opt = $cfg->vars();

 	if (defined($cfg_opt{'default.host'})) {
 		$opt{'host'} = $cfg_opt{'default.host'};
 	}

 	if (defined($cfg_opt{'default.subcmd'})) {
 		$opt{'subcmd'} = $cfg_opt{'default.subcmd'};
 	}

 	if (defined($cfg_opt{'default.rsh'})) {
 		$opt{'rsh'} = $cfg_opt{'default.rsh'};
 	}
 }

 # If anyone's ssh is somewhere other than /bin:/usr/bin, they can specify
 # where it is by setting up their .kuprc. This also lets us run with -T
 # without playing untaint tricks.
 #
 $ENV{'PATH'} = '/bin:/usr/bin';

 if (defined $ENV{'KUP_RSH'}) {
 	$opt{'rsh'} = $ENV{'KUP_RSH'};
 }
 if (defined $ENV{'KUP_HOST'}) {
 	$opt{'host'} = $ENV{'KUP_HOST'};
 }
 if (defined $ENV{'KUP_SUBCMD'}) {
 	$opt{'subcmd'} = $ENV{'KUP_SUBCMD'};
 }
 delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};   # Make %ENV safer

 # We process the command set twice, once as a dry run and one for real,
 # to catch as many errors as early as possible
 my @args;
 my $real;

 # Usage description
 sub usage($) {
 	my($err) = @_;

 	print STDERR "Usage: $0 [global options] command [-- command...]\n";
 	print STDERR "\n";
 	print STDERR "Global options:\n";
 	print STDERR "   -b  --batch            Output command stream to stdout\n";
 	print STDERR "   -e  --rsh=command      Send output to command, override KUP_RSH\n";
 	print STDERR "   -o  --host=[user@]host Connect to [user@]host, override KUP_HOST\n";
 	print STDERR "   -c  --subcmd=cmd       After connecting via ssh, issue this subcommand\n";
 	print STDERR "   -v  --verbose          Print each command to stderr as it is sent\n";
 	print STDERR "\n";
 	print STDERR "Commands:\n";
 	print STDERR "   put local_file signature remote_path\n";
 	print STDERR "   put --tar [--prefix=] remote_tree ref signature remote_path\n";
 	print STDERR "   put --diff remote_tree ref1 ref2 signature remote_path\n";
 	print STDERR "   mkdir remote_path\n";
 	print STDERR "   mv|move old_path new_path\n";
 	print STDERR "   ln|link old_path new_path\n";
 	print STDERR "   rm|del|delete old_path\n";
 	print STDERR "   ls|dir path...\n";
 	print STDERR "   info\n";

 	exit $err;
 }

 # Return true if the supplied string is valid UTF-8 without special
 # characters
 sub is_clean_string($)
 {
 	no bytes;
 	# use feature 'unicode_strings';		-- is this needed here?

 	my($b) = @_;
 	my $f = decode('UTF-8', $b, Encode::FB_DEFAULT);

 	return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:);
 	return 1;
 }

 # This returns true if the given argument is a valid filename in its
 # canonical form.  Double slashes, relative paths, dot files, control
 # characters, and malformed UTF-8 is not permitted.  We cap the length
 # of each pathname component to 100 bytes so we can add an extension
 # without worrying about it, and the entire pathname to 1024 bytes.
 sub is_valid_filename($)
 {
 	use bytes;

 	my($f) = @_;

 	return 0 if (!defined($f));		# If undefined, clearly not valid

 	return 0 if (length($f) > 1024); # Reject ridiculously long paths
 	return 0 if (!is_clean_string($f)); # Reject bad UTF-8 and control characters
 	return 0 if ($f !~ m:^/:);		# Reject relative paths
 	return 0 if ($f =~ m:/$:);		# Reject paths ending in /
 	return 0 if ($f =~ m://:);		# Reject double slashes

 	# Reject filename components starting with dot or dash, covers . and ..
 	return 0 if ($f =~ m:/[\.\-]:);

 	# Reject undesirable filename characters anywhere in the name.
 	# This isn't inherently security-critical, and could be tuned if
 	# users need it...
 	return 0 if ($f =~ m:[\!\"\$\&\'\*\;\<\>\?\\\`\|]:);

 	# Make sure we can create a filename after adding .bz2 or similar.
 	# We can't use the obvious regexp here, because regexps operate on
 	# characters, not bytes.  The limit of 100 is semi-arbitrary, but
 	# we shouldn't need filenames that long.
 	my $n = 0;
 	my $nmax = 0;
 	for (my $i = 0; $i < length($f); $i++) {
 		my $c = substr($f, $i, 1);
 		$n = ($c eq '/') ? 0 : $n+1;
 		$nmax = ($n > $nmax) ? $n : $nmax;
 	}
 	return 0 if ($nmax > 100);

 	return 1;
 }

 # Clean up a filename so that it is more likely to pass the
 # canonicalization test.  An optional second argument is used with
 # two-filename commands (move, link); it should be the already
 # canonicalized first argument.
 #
 # This can return undef for some invalid pathnames.  This needs to be
 # caught by is_valid_filename().
 sub canonicalize_path($;$)
 {
 	my($file, $root) = @_;

 	$root = '/' unless defined($root);

 	my $tail = '';
 	if ($root =~ m:^(.*/)([^/]+)$:) {
 		$root = $1;
 		$tail = $2;
 	}

 	if ($root !~ m:^/: || $root !~ m:/$:) {
 		die "$0: internal error: non-canonical root\n";
 	}

 	if ($file !~ m:^/:) {
 		$file = $root . $file;
 	}
 	if ($file =~ m:/$:) {
 		$file .= $tail;
 	}

 	my @path = ();
 	my $wasspc = 1;
 	# The -1 argument to split means "preserve trailing empty fields"
 	foreach my $s (split(/\//, $file, -1)) {
 		if ($s eq '' || $s eq '.') {
 			$wasspc = 1;
 		} elsif ($s eq '..') {
 			# If this ran off the root, error
 			return undef if (!defined(pop(@path)));
 			$wasspc = 1;
 		} else {
 			push(@path, $s);
 			$wasspc = 0;
 		}
 	}

 	# If this ended in a special component, error
 	return undef if ($wasspc);

 	# The initial '' forces the result to begin with a slash
 	return join('/', '', @path);
 }

 # Parse global options
 sub parse_global_options()
 {
 	while (scalar @ARGV && $ARGV[0] =~ /^-/) {
 		my $arg = shift(@ARGV);

 		if ($arg eq '-b' || $arg eq '--batch') {
 			$opt{'batch'} = 1;
 		} elsif ($arg eq '-e' || $arg eq '--rsh' || $arg eq '--ssh') {
 			$opt{'rsh'} = shift(@ARGV);
 		} elsif ($arg =~ /^--rsh=(.+)$/) {
 			$opt{'rsh'} = $1;
 		} elsif ($arg eq '-o' || $arg eq '--host') {
 			$opt{'host'} = shift(@ARGV);
 		} elsif ($arg =~ /^--host=(.+)$/) {
 			$opt{'host'} = $1;
 		} elsif ($arg eq '-c' || $arg eq '--subcmd') {
 			$opt{'subcmd'} = shift(@ARGV);
 		} elsif ($arg =~ /^--subcmd=(.+)$/) {
 			$opt{'subcmd'} = $1;
 		} elsif ($arg eq '-v' || $arg eq '--verbose') {
 			$opt{'verbose'}++;
 		} elsif ($arg eq '-h' || $arg eq '--help') {
 			usage(0);
 		} else {
 			die "$0: unknown option: $arg\n";
 		}
 	}
 }

 # Encode a string
 sub url_encode($)
 {
 	my($s) = @_;

 	# Hack to encode an empty string
 	return '%' if ($s eq '');

 	my $o = '';

 	foreach my $c (unpack("C*", $s)) {
 		if ($c > 32 && $c < 126 && $c != 37 && $c != 43) {
 			$o .= chr($c);
 		} elsif ($c == 32) {
 			$o .= '+';
 		} else {
 			$o .= sprintf("%%%02X", $c);
 		}
 	}

 	return $o;
 }

 # Configure the output stream
 sub setup_output()
 {
 	# In batch mode, we dump the output to stdout so the user can
 	# aggregate it best they wish
 	unless ($opt{'batch'}) {
 		if ($opt{'rsh'} !~ /^([-a-zA-Z0-9._=\@:\s\/]+)$/) {
 			die "$0: suspicious KUP_RSH setting\n";
 		}
 		my $rsh = $1;
 		if ($opt{'host'} !~ /^([-a-zA-Z0-9._\@]+)$/) {
 			die "$0: suspicious KUP_HOST\n";
 		}
 		$rsh .= " \Q$1";
 		if ($opt{'subcmd'}) {
 			if ($opt{'subcmd'} !~ /^([-a-zA-Z0-9_]+)$/) {
 				die "$0: suspicious KUP_SUBCMD\n";
 			}
 			# Add the subcommand for the receiving server
 			$rsh .= " " . $opt{'subcmd'}
 		}
 		open(STDOUT, '|-', $rsh)
 			or die "$0: cannot execute rsh command ", $rsh, "\n";
 	}
 	binmode(STDOUT);
 }

 # Terminate the output process
 sub close_output()
 {
 	$| = 1;						# Flush STDOUT
 	unless ($opt{'batch'}) {
 		close(STDOUT);
 	}
 }

 # Print a command to STDOUT, and if requested, to STDERR
 sub command(@)
 {
 	if ($real) {
 		my $cmd = join(' ', @_);

 		print STDERR $cmd, "\n" if ($opt{'verbose'});
 		print $cmd, "\n";
 	}
 }

 sub cat_file($$$)
 {
 	my($cmd, $file, $fmt) = @_;

 	if (!defined($fmt)) {
 		if ($file =~ /\.((gz|bz2|xz))$/) {
 			$fmt = $1;
 		} else {
 			$fmt = '%';
 		}
 	}

 	my $data;
 	open($data, '<', $file)
 		or die "$0: cannot open: $file: $!\n";
 	if (! -f $data) {
 		die "$0: not a plain file: $file\n";
 	}
 	my $size = -s _;

 	binmode($data);

 	if ($real) {
 		if ($size < 2) {
 			# Must be a plain file
 			$fmt = '%';
 		}

 		if (defined($fmt)) {
 			command($cmd, $size, $fmt);
 		}

 		my $blk;
 		my $len;

 		while ($size) {
 			$len = ($size < $blksiz) ? $size : $blksiz;
 			$len = read($data, $blk, $len);

 			if (!$len) {
 				die "$0: premature end of data (file changed?): $file\n";
 			}

 			print $blk;
 			$size -= $len;
 		}
 	}

 	close($data);
 }

 # PUT command
 sub cmd_put()
 {
 	my $file = shift @args;
 	my $file_tail = undef;

 	if ($file eq '-t' || $file eq '--tar') {
 		# tar hack

 		my $remote_tree = shift @args;
 		my $prefix = '';

 		if ($remote_tree eq '-p' || $remote_tree eq '--prefix') {
 			$prefix = shift @args;
 			$remote_tree = shift @args;
 		} elsif ($remote_tree =~ /^--prefix=(.+)$/) {
 			$prefix = $1;
 			$remote_tree = shift @args;
 		}

 		my $ref = shift(@args);

 		if (!defined($ref)) {
 			usage(1);
 		}

 		my $xrt = $remote_tree;
 		$remote_tree = canonicalize_path($remote_tree);
 		if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) {
 			die "$0: invalid path name for git tree: $xrt\n";
 		}
 		if (!is_clean_string($ref)) {
 			die "$0: invalid ref: $ref\n";
 		}

 		command('TAR', url_encode($remote_tree),
 				url_encode($ref), url_encode($prefix));
 	} elsif ($file eq '-d' || $file eq '--diff') {
 		# diff hack

 		my $remote_tree = shift @args;
 		my $prefix = '';

 		my $ref1 = shift(@args);
 		my $ref2 = shift(@args);

 		if (!defined($ref2)) {
 			usage(1);
 		}

 		my $xrt = $remote_tree;
 		$remote_tree = canonicalize_path($remote_tree);
 		if (!is_valid_filename($remote_tree) || $remote_tree !~ /\.git$/) {
 			die "$0: invalid path name for git tree: $xrt\n";
 		}
 		if (!is_clean_string($ref1)) {
 			die "$0: invalid ref: $ref1\n";
 		}
 		if (!is_clean_string($ref2)) {
 			die "$0: invalid ref: $ref2\n";
 		}

 		command('DIFF', url_encode($remote_tree), url_encode($ref1),
 				url_encode($ref2));
 	} elsif ($file =~ /^-/) {
 		die "$0: unknown option to put command: $file\n";
 	} else {
 		# Plain data blob.  We don't actively attempt to compress it
 		# since ssh usually has a layer of compression, but if it is
 		# already a compressed file we send it as-is and let the
 		# server decompress it.

 		cat_file('DATA', $file, undef);

 		# Get the local filename without directory
 		my($vol, $dir);
 		($vol, $dir, $file_tail) = File::Spec->splitpath($file);
 	}

 	my $sign   = shift @args;
 	my $remote = shift @args;

 	if (!defined($remote)) {
 		usage(1);
 	}

 	# This allows the user to not specify the filename if it is
 	# the same as on the local filesystem by ending the pathname
 	# with a slash
 	if ($remote =~ m:/$: && defined($file_tail)) {
 		$remote .= $file_tail;
 	}

 	my $xrt = $remote;
 	$remote = canonicalize_path($remote);
 	if (!is_valid_filename($remote)) {
 		die "$0: invalid pathname: $xrt\n";
 	}

 	if ($remote =~ /\.sign$/) {
 		die "$0: target filename cannot end in .sign\n";
 	}

 	# DWIM: .bz2, .xz -> .gz
 	$remote =~ s/\.(bz2|xz)$/.gz/;

 	cat_file('SIGN', $sign, undef);
 	command('PUT', url_encode($remote));
 }

 # MKDIR command
 sub cmd_mkdir()
 {
 	my $remote = shift @args;

 	if (!defined($remote)) {
 		usage(1);
 	}

 	my $xrt = $remote;
 	$remote = canonicalize_path($remote);
 	if (!is_valid_filename($remote)) {
 		die "$0: invalid pathname: $xrt\n";
 	}

 	if ($remote =~ /\.(sign|gz|bz2|xz)$/) {
 		die "$0: a directory name cannot end in .sign, .gz, .bz2, .xz\n";
 	}

 	command('MKDIR', url_encode($remote));
 }

 # DELETE command
 sub cmd_delete()
 {
 	my $remote = shift @args;

 	if (!defined($remote)) {
 		usage(1);
 	}

 	my $xrt = $remote;
 	$remote = canonicalize_path($remote);
 	if (!is_valid_filename($remote)) {
 		die "$0: invalid pathname: $xrt\n";
 	}

 	if ($remote =~ /\.sign$/) {
 		die "$0: cannot delete .sign files directly\n";
 	}

 	# DWIM: .bz2, .xz -> .gz
 	$remote =~ s/\.(bz2|xz)$/.gz/;

 	command('DELETE', url_encode($remote));
 }

 # MOVE or LINK command
 sub cmd_move_link($)
 {
 	my($cmd) = @_;

 	my $from = shift @args;
 	my $to   = shift @args;

 	if (!defined($to)) {
 		usage(1);
 	}

 	my $xrt = $from;
 	$from = canonicalize_path($from);
 	if (!is_valid_filename($from)) {
 		die "$0: invalid pathname: $xrt\n";
 	}

 	$xrt = $to;
 	$to = canonicalize_path($to, $from);
 	if (!is_valid_filename($to)) {
 		die "$0: invalid pathname: $xrt\n";
 	}

 	if ($from =~ /\.sign$/ || $to =~ /\.sign$/) {
 		die "$0: cannot explicitly move .sign files\n";
 	}
 	if ($from =~ /\.(gz|bz2|xz)$/ && $to =~ /\.(gz|bz2|xz)$/) {
 		$from =~ s/\.(bz2|xz)$/.gz/;
 		$to   =~ s/\.(bz2|xz)$/.gz/;
 	} elsif ($from =~ /\.(gz|bz2|xz)$/ || $to =~ /\.(gz|bz2|xz)$/) {
 		die "$0: cannot move to or from compressed filenames\n";
 	}

 	if ($from eq $to) {
 		die "$0: moving filename to self: $from\n";
 	}

 	command($cmd, url_encode($from), url_encode($to));
 }

 # DIR command (supports arbitrary number of arguments)
 sub cmd_dir()
 {
 	while (defined($args[0]) && $args[0] ne '--') {
 		my $d = shift @args;
 		$d =~ s:/$::g;
 		if ($d ne '') {
 			my $xrt = $d;
 			$d = canonicalize_path($d);
 			if (!is_valid_filename($d)) {
 				die "$0: invalid pathname: $xrt\n";
 			}
 		}
 		$d .= '/';

 		command('DIR', $d);
 	}
 }

 # INFO command (no arguments)
 sub cmd_info()
 {
 	command('INFO');
 }

 # Process commands
 sub process_commands()
 {
 	while (1) {
 		my $cmd = shift(@args);

 		if (!defined($cmd)) {
 			usage(1);
 		}

 		$cmd = "\L${cmd}";

 		if ($cmd eq 'put') {
 			cmd_put();
 		} elsif ($cmd eq 'mkdir') {
 			cmd_mkdir();
 		} elsif ($cmd eq 'move' || $cmd eq 'mv') {
 			cmd_move_link('MOVE');
 		} elsif ($cmd eq 'link' || $cmd eq 'ln') {
 			cmd_move_link('LINK');
 		} elsif ($cmd eq 'delete' || $cmd eq 'del' || $cmd eq 'rm') {
 			cmd_delete();
 		} elsif ($cmd eq 'ls' || $cmd eq 'dir') {
 			cmd_dir();
 		} elsif ($cmd eq 'info') {
 			cmd_info();
 		} else {
 			die "$0: unknown command: $cmd\n";
 		}

 		my $sep = shift(@args);

 		last if (!defined($sep)); # End of command line

 		if ($sep ne '--') {
 			die "$0: garbage at end of $cmd command\n";
 		}
 	}
 }

 # Main program
 parse_global_options();

 if (!defined($opt{'host'})) {
 	die "$0: please specify --host, KUP_HOST, or set up ~/.kuprc\n";
 }

 # "Dry run" pass
 $real = 0;
 @args = @ARGV;
 process_commands();

 # Establish output stream
 setup_output();

 # "Real" pass
 $real = 1;
 @args = @ARGV;
 process_commands();

 # Close the output to allow the child process to complete
 close_output();

 exit 0;

 # vim: noet
	#!/usr/bin/perl -T
	## -----------------------------------------------------------------------
	##
	## Copyright 2011 Intel Corporation; author: H. Peter Anvin
	##
	## This program is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation, Inc.; either version 2
	## of the License, or (at your option) any later version;
	## incorporated herein by reference.
	##
	## -----------------------------------------------------------------------

	#
	# kernel.org bulk file upload client
	#

	use strict;
	use warnings;
	use bytes;
	use Encode qw(encode decode);
	use File::Spec;
	use Config::Simple;

	my $blksiz = 1024*1024;

	# Global options
	my %opt = (
	'rsh' => 'ssh -a -x -k -T',
	'host' => undef,
	'subcmd' => undef,
	'batch' => 0,
	'verbose' => 0,
	);

	# Read the config file settings and override the above
	my $cfg_file = $ENV{'HOME'}.'/.kuprc';
	my $cfg = new Config::Simple($cfg_file);

	if (defined($cfg)) {
	# Update %opt with cfgfile settings (only rsh, subcmd, and host vars)
	my %cfg_opt = $cfg->vars();

	if (defined($cfg_opt{'default.host'})) {
	$opt{'host'} = $cfg_opt{'default.host'};
	}

	if (defined($cfg_opt{'default.subcmd'})) {
	$opt{'subcmd'} = $cfg_opt{'default.subcmd'};
	}

	if (defined($cfg_opt{'default.rsh'})) {
	$opt{'rsh'} = $cfg_opt{'default.rsh'};
	}
	}

	# If anyone's ssh is somewhere other than /bin:/usr/bin, they can specify
	# where it is by setting up their .kuprc. This also lets us run with -T
	# without playing untaint tricks.
	#
	$ENV{'PATH'} = '/bin:/usr/bin';

	if (defined $ENV{'KUP_RSH'}) {
	$opt{'rsh'} = $ENV{'KUP_RSH'};
	}
	if (defined $ENV{'KUP_HOST'}) {
	$opt{'host'} = $ENV{'KUP_HOST'};
	}
	if (defined $ENV{'KUP_SUBCMD'}) {
	$opt{'subcmd'} = $ENV{'KUP_SUBCMD'};
	}
	delete @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer

	# We process the command set twice, once as a dry run and one for real,
	# to catch as many errors as early as possible
	my @args;
	my $real;

	# Usage description
	sub usage($) {
	my($err) = @_;

	print STDERR "Usage: $0 [global options] command [-- command...]\n";
	print STDERR "\n";
	print STDERR "Global options:\n";
	print STDERR " -b --batch Output command stream to stdout\n";
	print STDERR " -e --rsh=command Send output to command, override KUP_RSH\n";
	print STDERR " -o --host=[user@]host Connect to [user@]host, override KUP_HOST\n";
	print STDERR " -c --subcmd=cmd After connecting via ssh, issue this subcommand\n";
	print STDERR " -v --verbose Print each command to stderr as it is sent\n";
	print STDERR "\n";
	print STDERR "Commands:\n";
	print STDERR " put local_file signature remote_path\n";
	print STDERR " put --tar [--prefix=] remote_tree ref signature remote_path\n";
	print STDERR " put --diff remote_tree ref1 ref2 signature remote_path\n";
	print STDERR " mkdir remote_path\n";
	print STDERR " mv\|move old_path new_path\n";
	print STDERR " ln\|link old_path new_path\n";
	print STDERR " rm\|del\|delete old_path\n";
	print STDERR " ls\|dir path...\n";
	print STDERR " info\n";

	exit $err;
	}

	# Return true if the supplied string is valid UTF-8 without special
	# characters
	sub is_clean_string($)
	{
	no bytes;
	# use feature 'unicode_strings'; -- is this needed here?

	my($b) = @_;
	my $f = decode('UTF-8', $b, Encode::FB_DEFAULT);

	return 0 if ($f =~ m:[\x{0000}-\x{001f}\x{007f}-\x{00a0}\x{fffd}-\x{ffff}]:);
	return 1;
	}

	# This returns true if the given argument is a valid filename in its
	# canonical form. Double slashes, relative paths, dot files, control
	# characters, and malformed UTF-8 is not permitted. We cap the length
	# of each pathname component to 100 bytes so we can add an extension
	# without worrying about it, and the entire pathname to 1024 bytes.
	sub is_valid_filename($)
	{
	use bytes;

	my($f) = @_;

	return 0 if (!defined($f)); # If undefined, clearly not valid

	return 0 if (length($f) > 1024); # Reject ridiculously long paths
	return 0 if (!is_clean_string($f)); # Reject bad UTF-8 and control characters
	return 0 if ($f !~ m:^/:); # Reject relative paths
	return 0 if ($f =~ m:/$:); # Reject paths ending in /
	return 0 if ($f =~ m://:); # Reject double slashes

	# Reject filename components starting with dot or dash, covers . and ..
	return 0 if ($f =~ m:/[\.\-]:);

	# Reject undesirable filename characters anywhere in the name.
	# This isn't inherently security-critical, and could be tuned if
	# users need it...
	return 0 if ($f =~ m:[\!\"\$\&\'\*\;\<\>\?\\\`\\|]:);

	# Make sure we can create a filename after adding .bz2 or similar.
	# We can't use the obvious regexp here, because regexps operate on
	# characters, not bytes. The limit of 100 is semi-arbitrary, but
	# we shouldn't need filenames that long.
	my $n = 0;
	my $nmax = 0;
	for (my $i = 0; $i < length($f); $i++) {
	my $c = substr($f, $i, 1);
	$n = ($c eq '/') ? 0 : $n+1;
	$nmax = ($n > $nmax) ? $n : $nmax;
	}
	return 0 if ($nmax > 100);

	return 1;
	}

	# Clean up a filename so that it is more likely to pass the
	# canonicalization test. An optional second argument is used with
	# two-filename commands (move, link); it should be the already
	# canonicalized first argument.
	#
	# This can return undef for some invalid pathnames. This needs to be
	# caught by is_valid_filename().
	sub canonicalize_path($;$)
	{
	my($file, $root) = @_;

	$root = '/' unless defined($root);

	my $tail = '';
	if ($root =~ m:^(.*/)([^/]+)$:) {
	$root = $1;
	$tail = $2;
	}

	if ($root !~ m:^/: \|\| $root !~ m:/$:) {
	die "$0: internal error: non-canonical root\n";
	}

	if ($file !~ m:^/:) {
	$file = $root . $file;
	}
	if ($file =~ m:/$:) {
	$file .= $tail;
	}

	my @path = ();
	my $wasspc = 1;
	# The -1 argument to split means "preserve trailing empty fields"
	foreach my $s (split(/\//, $file, -1)) {
	if ($s eq '' \|\| $s eq '.') {
	$wasspc = 1;
	} elsif ($s eq '..') {
	# If this ran off the root, error
	return undef if (!defined(pop(@path)));
	$wasspc = 1;
	} else {
	push(@path, $s);
	$wasspc = 0;
	}
	}

	# If this ended in a special component, error
	return undef if ($wasspc);

	# The initial '' forces the result to begin with a slash
	return join('/', '', @path);
	}

	# Parse global options
	sub parse_global_options()
	{
	while (scalar @ARGV && $ARGV[0] =~ /^-/) {
	my $arg = shift(@ARGV);

	if ($arg eq '-b' \|\| $arg eq '--batch') {
	$opt{'batch'} = 1;
	} elsif ($arg eq '-e' \|\| $arg eq '--rsh' \|\| $arg eq '--ssh') {
	$opt{'rsh'} = shift(@ARGV);
	} elsif ($arg =~ /^--rsh=(.+)$/) {
	$opt{'rsh'} = $1;
	} elsif ($arg eq '-o' \|\| $arg eq '--host') {
	$opt{'host'} = shift(@ARGV);
	} elsif ($arg =~ /^--host=(.+)$/) {
	$opt{'host'} = $1;
	} elsif ($arg eq '-c' \|\| $arg eq '--subcmd') {
	$opt{'subcmd'} = shift(@ARGV);
	} elsif ($arg =~ /^--subcmd=(.+)$/) {
	$opt{'subcmd'} = $1;
	} elsif ($arg eq '-v' \|\| $arg eq '--verbose') {
	$opt{'verbose'}++;
	} elsif ($arg eq '-h' \|\| $arg eq '--help') {
	usage(0);
	} else {
	die "$0: unknown option: $arg\n";
	}
	}
	}

	# Encode a string
	sub url_encode($)
	{
	my($s) = @_;

	# Hack to encode an empty string
	return '%' if ($s eq '');

	my $o = '';

	foreach my $c (unpack("C*", $s)) {
	if ($c > 32 && $c < 126 && $c != 37 && $c != 43) {
	$o .= chr($c);
	} elsif ($c == 32) {
	$o .= '+';
	} else {
	$o .= sprintf("%%%02X", $c);
	}
	}

	return $o;
	}

	# Configure the output stream
	sub setup_output()
	{
	# In batch mode, we dump the output to stdout so the user can
	# aggregate it best they wish
	unless ($opt{'batch'}) {
	if ($opt{'rsh'} !~ /^([-a-zA-Z0-9._=\@:\s\/]+)$/) {
	die "$0: suspicious KUP_RSH setting\n";
	}
	my $rsh = $1;
	if ($opt{'host'} !~ /^([-a-zA-Z0-9._\@]+)$/) {
	die "$0: suspicious KUP_HOST\n";
	}
	$rsh .= " \Q$1";
	if ($opt{'subcmd'}) {
	if ($opt{'subcmd'} !~ /^([-a-zA-Z0-9_]+)$/) {
	die "$0: suspicious KUP_SUBCMD\n";
	}
	# Add the subcommand for the receiving server
	$rsh .= " " . $opt{'subcmd'}
	}
	open(STDOUT, '\|-', $rsh)
	or die "$0: cannot execute rsh command ", $rsh, "\n";
	}
	binmode(STDOUT);
	}

	# Terminate the output process
	sub close_output()
	{
	$\| = 1; # Flush STDOUT
	unless ($opt{'batch'}) {
	close(STDOUT);
	}
	}

	# Print a command to STDOUT, and if requested, to STDERR
	sub command(@)
	{
	if ($real) {
	my $cmd = join(' ', @_);

	print STDERR $cmd, "\n" if ($opt{'verbose'});
	print $cmd, "\n";
	}
	}

	sub cat_file($$$)
	{
	my($cmd, $file, $fmt) = @_;

	if (!defined($fmt)) {
	if ($file =~ /\.((gz\|bz2\|xz))$/) {
	$fmt = $1;
	} else {
	$fmt = '%';
	}
	}

	my $data;
	open($data, '<', $file)
	or die "$0: cannot open: $file: $!\n";
	if (! -f $data) {
	die "$0: not a plain file: $file\n";
	}
	my $size = -s _;

	binmode($data);

	if ($real) {
	if ($size < 2) {
	# Must be a plain file
	$fmt = '%';
	}

	if (defined($fmt)) {
	command($cmd, $size, $fmt);
	}

	my $blk;
	my $len;

	while ($size) {
	$len = ($size < $blksiz) ? $size : $blksiz;
	$len = read($data, $blk, $len);

	if (!$len) {
	die "$0: premature end of data (file changed?): $file\n";
	}

	print $blk;
	$size -= $len;
	}
	}

	close($data);
	}

	# PUT command
	sub cmd_put()
	{
	my $file = shift @args;
	my $file_tail = undef;

	if ($file eq '-t' \|\| $file eq '--tar') {
	# tar hack

	my $remote_tree = shift @args;
	my $prefix = '';

	if ($remote_tree eq '-p' \|\| $remote_tree eq '--prefix') {
	$prefix = shift @args;
	$remote_tree = shift @args;
	} elsif ($remote_tree =~ /^--prefix=(.+)$/) {
	$prefix = $1;
	$remote_tree = shift @args;
	}

	my $ref = shift(@args);

	if (!defined($ref)) {
	usage(1);
	}

	my $xrt = $remote_tree;
	$remote_tree = canonicalize_path($remote_tree);
	if (!is_valid_filename($remote_tree) \|\| $remote_tree !~ /\.git$/) {
	die "$0: invalid path name for git tree: $xrt\n";
	}
	if (!is_clean_string($ref)) {
	die "$0: invalid ref: $ref\n";
	}

	command('TAR', url_encode($remote_tree),
	url_encode($ref), url_encode($prefix));
	} elsif ($file eq '-d' \|\| $file eq '--diff') {
	# diff hack

	my $remote_tree = shift @args;
	my $prefix = '';

	my $ref1 = shift(@args);
	my $ref2 = shift(@args);

	if (!defined($ref2)) {
	usage(1);
	}

	my $xrt = $remote_tree;
	$remote_tree = canonicalize_path($remote_tree);
	if (!is_valid_filename($remote_tree) \|\| $remote_tree !~ /\.git$/) {
	die "$0: invalid path name for git tree: $xrt\n";
	}
	if (!is_clean_string($ref1)) {
	die "$0: invalid ref: $ref1\n";
	}
	if (!is_clean_string($ref2)) {
	die "$0: invalid ref: $ref2\n";
	}

	command('DIFF', url_encode($remote_tree), url_encode($ref1),
	url_encode($ref2));
	} elsif ($file =~ /^-/) {
	die "$0: unknown option to put command: $file\n";
	} else {
	# Plain data blob. We don't actively attempt to compress it
	# since ssh usually has a layer of compression, but if it is
	# already a compressed file we send it as-is and let the
	# server decompress it.

	cat_file('DATA', $file, undef);

	# Get the local filename without directory
	my($vol, $dir);
	($vol, $dir, $file_tail) = File::Spec->splitpath($file);
	}

	my $sign = shift @args;
	my $remote = shift @args;

	if (!defined($remote)) {
	usage(1);
	}

	# This allows the user to not specify the filename if it is
	# the same as on the local filesystem by ending the pathname
	# with a slash
	if ($remote =~ m:/$: && defined($file_tail)) {
	$remote .= $file_tail;
	}

	my $xrt = $remote;
	$remote = canonicalize_path($remote);
	if (!is_valid_filename($remote)) {
	die "$0: invalid pathname: $xrt\n";
	}

	if ($remote =~ /\.sign$/) {
	die "$0: target filename cannot end in .sign\n";
	}

	# DWIM: .bz2, .xz -> .gz
	$remote =~ s/\.(bz2\|xz)$/.gz/;

	cat_file('SIGN', $sign, undef);
	command('PUT', url_encode($remote));
	}

	# MKDIR command
	sub cmd_mkdir()
	{
	my $remote = shift @args;

	if (!defined($remote)) {
	usage(1);
	}

	my $xrt = $remote;
	$remote = canonicalize_path($remote);
	if (!is_valid_filename($remote)) {
	die "$0: invalid pathname: $xrt\n";
	}

	if ($remote =~ /\.(sign\|gz\|bz2\|xz)$/) {
	die "$0: a directory name cannot end in .sign, .gz, .bz2, .xz\n";
	}

	command('MKDIR', url_encode($remote));
	}

	# DELETE command
	sub cmd_delete()
	{
	my $remote = shift @args;

	if (!defined($remote)) {
	usage(1);
	}

	my $xrt = $remote;
	$remote = canonicalize_path($remote);
	if (!is_valid_filename($remote)) {
	die "$0: invalid pathname: $xrt\n";
	}

	if ($remote =~ /\.sign$/) {
	die "$0: cannot delete .sign files directly\n";
	}

	# DWIM: .bz2, .xz -> .gz
	$remote =~ s/\.(bz2\|xz)$/.gz/;

	command('DELETE', url_encode($remote));
	}

	# MOVE or LINK command
	sub cmd_move_link($)
	{
	my($cmd) = @_;

	my $from = shift @args;
	my $to = shift @args;

	if (!defined($to)) {
	usage(1);
	}

	my $xrt = $from;
	$from = canonicalize_path($from);
	if (!is_valid_filename($from)) {
	die "$0: invalid pathname: $xrt\n";
	}

	$xrt = $to;
	$to = canonicalize_path($to, $from);
	if (!is_valid_filename($to)) {
	die "$0: invalid pathname: $xrt\n";
	}

	if ($from =~ /\.sign$/ \|\| $to =~ /\.sign$/) {
	die "$0: cannot explicitly move .sign files\n";
	}
	if ($from =~ /\.(gz\|bz2\|xz)$/ && $to =~ /\.(gz\|bz2\|xz)$/) {
	$from =~ s/\.(bz2\|xz)$/.gz/;
	$to =~ s/\.(bz2\|xz)$/.gz/;
	} elsif ($from =~ /\.(gz\|bz2\|xz)$/ \|\| $to =~ /\.(gz\|bz2\|xz)$/) {
	die "$0: cannot move to or from compressed filenames\n";
	}

	if ($from eq $to) {
	die "$0: moving filename to self: $from\n";
	}

	command($cmd, url_encode($from), url_encode($to));
	}

	# DIR command (supports arbitrary number of arguments)
	sub cmd_dir()
	{
	while (defined($args[0]) && $args[0] ne '--') {
	my $d = shift @args;
	$d =~ s:/$::g;
	if ($d ne '') {
	my $xrt = $d;
	$d = canonicalize_path($d);
	if (!is_valid_filename($d)) {
	die "$0: invalid pathname: $xrt\n";
	}
	}
	$d .= '/';

	command('DIR', $d);
	}
	}

	# INFO command (no arguments)
	sub cmd_info()
	{
	command('INFO');
	}

	# Process commands
	sub process_commands()
	{
	while (1) {
	my $cmd = shift(@args);

	if (!defined($cmd)) {
	usage(1);
	}

	$cmd = "\L${cmd}";

	if ($cmd eq 'put') {
	cmd_put();
	} elsif ($cmd eq 'mkdir') {
	cmd_mkdir();
	} elsif ($cmd eq 'move' \|\| $cmd eq 'mv') {
	cmd_move_link('MOVE');
	} elsif ($cmd eq 'link' \|\| $cmd eq 'ln') {
	cmd_move_link('LINK');
	} elsif ($cmd eq 'delete' \|\| $cmd eq 'del' \|\| $cmd eq 'rm') {
	cmd_delete();
	} elsif ($cmd eq 'ls' \|\| $cmd eq 'dir') {
	cmd_dir();
	} elsif ($cmd eq 'info') {
	cmd_info();
	} else {
	die "$0: unknown command: $cmd\n";
	}

	my $sep = shift(@args);

	last if (!defined($sep)); # End of command line

	if ($sep ne '--') {
	die "$0: garbage at end of $cmd command\n";
	}
	}
	}

	# Main program
	parse_global_options();

	if (!defined($opt{'host'})) {
	die "$0: please specify --host, KUP_HOST, or set up ~/.kuprc\n";
	}

	# "Dry run" pass
	$real = 0;
	@args = @ARGV;
	process_commands();

	# Establish output stream
	setup_output();

	# "Real" pass
	$real = 1;
	@args = @ARGV;
	process_commands();

	# Close the output to allow the child process to complete
	close_output();

	exit 0;

	# vim: noet