blob: db7900ff1ec1e1e02f04daac9dc1adec3b9018c9 [file] [log] [blame]
#!/usr/bin/perl
#
# Copyright 2014 Intel Corporation
# Author: Jacob Keller
# License: GPLv2
#
# Find the most recent commit that a given mbox (*with* index information!) can
# apply, assuming that our tree has the blobs available. Passes all options
# directly to git-log, so you can shorten or filter what commits to check as
# you desire, such as limiting how far back the check will run, and so on.
use 5.016;
use warnings;
use strict;
use Getopt::Long;
use File::Basename;
my $PROG = basename($0);
sub show_usage {
print STDERR <<END;
Usage: $PROG [options] -- [options and arguments to git-log] ...
This is a specialized extension of git, which can be used to help find a commit
to which a given mbox is applicable. The mbox is expected to be passed in via
standard input.
This works by parsing diff and index information from the patch
or series of patches, in order to determine what initial index it should check
for. By default it will search the entire history (each commit, going backwards
from HEAD). You may pass arguments to git-log which limit this search. Detailed
explanation of the various git-log options which may be useful here, is beyond
the scope of this usage output, however a few examples are provided below.
Examples:
; check only the most recent commit, and stop if it fails.
git find-base -- -1 HEAD < "mbox-file"
; check the most recent commit of a branch
git find-base -- -1 branch < "mbox-file"
; check commits between two branches
git find-base -- master..devel < "mbox-file"
Essentially, the arguments are passed to generate a list of commit objects to
check, and you can use the powerful options in git-log to craft this list to
what you want to check against.
The tool works by checking index information, and will return the first commit
from git-log for which the mbox passed has matching initial index information.
This means that the mbox *will* apply cleanly to that patch, because it has
exact initial index as it expected. It does *not* require that the patch be
based exactly on the commit that was supplied, but only that the files it
modified are exactly what it thought.
Warnings and errors are printed to the standard error, and the only output to
standard out will be a single commit id. If nothing was found, no standard
output will be generated, and this utility will exit with a non-zero exit code.
Options:
-k, --keep Keep duplicate diff chunks.
-?, -h Show this text and exit.
END
}
sub match_index {
my ($x, $y) = @_;
return ( index $x,$y ) == 0 or ( index $y,$x ) == 0;
}
sub hash_comp(\%\%) {
my %x = %{ shift @_ };
my %y = %{ shift @_ };
( grep { not ( ( exists $y{$_} ) and $x{$_} eq $y{$_} ) } keys %x ) == 0;
}
sub path_exists(\%$) {
my %tree = %{ shift @_ };
my $path = shift @_;
return exists $tree{$path} and $tree{$path}->{status} eq "";
}
my $duplicates = '';
Getopt::Long::Configure("pass_through");
GetOptions('h|?' => sub { show_usage; exit 0; },
'keep!' => \$duplicates );
# Slurp the contents into $mbox for processing
my $mbox = do { local $/; <STDIN> };
# Array of hrefs to chunk contexts
my @chunks = ();
# The possible list of extended headers supported by git-diff output
my $extended_headers = qr/(old mode|new mode|deleted file mode|new file mode|copy from|copy to|rename from|rename to|similarity index|dissimilarity index|index)/;
# Split mbox apart by diff header chunks, finding a diff line followed by any number of extended header lines
while ($mbox =~ /^(?<chunk>diff (?s:.*?))(?=^(?!$extended_headers))/gm) {
# Capture the block
my $rawchunk = $+{chunk};
print STDERR "Found a diff chunk\n";
print STDERR $rawchunk;
# Check whether it has expected format
if ( $rawchunk =~ /^diff --git [iwcoab]\/(?<oldpath>\S+) [iwcoab]\/(?<newpath>\S+)$/m ) {
# We have a standard git diff chunk. Now, we need to parse the extended
# headers from the section.
my %chunk = ();
$chunk{oldpath} = $+{oldpath};
$chunk{newpath} = $+{newpath};
$chunk{oldindex} = "";
$chunk{newindex} = "";
$chunk{action} = "none";
if ( $rawchunk =~ /^index (?<oldindex>[0-9a-fA-F]+)[.]{2}(?<newindex>[0-9a-fA-F]+)( (?<mode>[0-7]{6}))?$/m ) {
$chunk{oldindex} = $+{oldindex};
$chunk{newindex} = $+{newindex};
$chunk{oldmode} = $+{mode};
$chunk{newmode} = $+{mode};
}
if ( $rawchunk =~ /^old mode (?<mode>[0-7]{6})$/m ) {
$chunk{oldmode} = $+{mode};
}
if ( $rawchunk =~ /^new mode (?<mode>[0-7]{6})$/m ) {
$chunk{newmode} = $+{mode};
}
if ( $rawchunk =~ /^deleted file mode (?<mode>[0-7]{6})$/m ) {
$chunk{oldmode} = $+{mode};
$chunk{action} = "delete";
}
if ( $rawchunk =~ /^new file mode (?<mode>[0-7]{6})$/m ) {
$chunk{newmode} = $+{mode};
$chunk{action} = "create";
}
if ( $rawchunk =~ /^rename from \Q$chunk{oldpath}\E$/m ) {
$chunk{action} = "rename";
}
if ( $rawchunk =~ /^rename to \Q$chunk{newpath}\E$/m ) {
$chunk{action} = "rename";
}
if ( $rawchunk =~ /^similarity index (?<similarity>[0-9]{1,3}%)$/m ) {
$chunk{similarity} = $+{similarity};
}
if ( $rawchunk =~ /^dissimilarity index (?<dissimilarity>[0-9]{1,3}%)$/m ) {
$chunk{similarity} = 100 - $+{dissimilarity};
}
if ( not $duplicates and ( grep { hash_comp ( %$_, %chunk ) } @chunks ) > 0 ) {
print STDERR "Skipping duplicate diff chunk. Disable this behavior with --keep.\n";
} else {
push (@chunks, \%chunk);
}
} elsif ( $rawchunk =~ /^diff --(combined|cc) (?<newfile>\S+)$/m ) {
# We can't use combined diff formats, since these are used for multiple
# parents, and are not suitable for this process
print STDERR "Found a combined diff format, indicating a merge. We can't find a base commit for a merge!\n";
exit 1;
} else {
# Non git-formats are not supported, as we need the index information
print STDERR "Found a diff chunk, but it does not have a recognized format.\n";
exit 1;
}
}
# We have collated all the chunks. Now we need to loop over a series of commits
# based on user input. For each commit, we will try to build up the list of
# changes and see if it is applicable.
sub check_commit {
my ( $commit ) = @_;
# Our current view of the tree
my %tree = ();
# For each chunk, we need to build up the tree. looking up from git-ls-tree
# for the first time we find a path. We want to see if our patch could cleanly apply to the given commit.
for my $chunk ( @chunks ) {
# If the path doesn't exist yet, just fill in some information about it
# from the real tree
if ( not exists $tree{$chunk->{oldpath}} ) {
open my $ls_tree, '-|', 'git', => 'ls-tree' => '--full-tree' => $commit => '--' => $chunk->{oldpath}
or die "Couldn't open pipe to git-ls-tree: ", $!;
my $ls_tree_output = <$ls_tree>;
close $ls_tree or do {
print STDERR "git-ls-tree failed: ", $? >> 8;
return 0;
};
# Only add the tree object if we actually have output
if ( defined $ls_tree_output ) {
chomp $ls_tree_output;
$ls_tree_output =~ /\A([0-7]{6}) (blob|tree|commit) (\S+)/ or do {
print STDERR "Unexpected git-ls-tree output.\n";
return 0;
};
$tree{$chunk->{oldpath}} = {
mode => $1,
index => $3,
status => "",
};
}
}
# We have now added any known information about this path to the tree.
# We will now attempt to modify the tree based on the contents of the
# chunk.
if ( $chunk->{action} eq "create" ) {
if ( path_exists( %tree, $chunk->{oldpath} ) ) {
# This path already exists, so we can't add it!
print STDERR "$chunk->{oldpath} already exists.\n";
return 0;
} else {
# We found a patch that either doesn't exist, or is already
# been renamed or deleted. We can simply add it here now.
$tree{$chunk->{oldpath}}->{mode} = $chunk->{mode};
$tree{$chunk->{oldpath}}->{index} = $chunk->{newindex};
$tree{$chunk->{oldpath}}->{status} = "";
}
} else {
if ( not path_exists( %tree, $chunk->{oldpath} ) ) {
# This path no longer exists, we can't modify it.
print STDERR "$chunk->{oldpath} does not exist.\n";
return 0;
} else {
if ( not match_index( $tree{$chunk->{oldpath}}->{index}, $chunk->{oldindex} ) ) {
print STDERR "$chunk->{oldpath} does not have matching index.\n";
return 0;
}
if ( $chunk->{newindex} ) {
$tree{$chunk->{oldpath}}->{index} = $chunk->{newindex};
}
if ( $chunk->{newmode} ) {
$tree{$chunk->{oldpath}}->{mode} = $chunk->{newmode};
}
# Handle special case here for rename and delete actions
if ( $chunk->{action} eq "rename" ) {
if ( path_exists( %tree, $chunk->{newpath} ) ) {
print STDERR "$chunk->{newpath} already exists.\n";
return 0;
}
$tree{$chunk->{newpath}} = $tree{$chunk->{oldpath}};
$tree{$chunk->{oldpath}}->{status} = "renamed";
} elsif ( $chunk->{action} eq "delete" ) {
$tree{$chunk->{oldpath}}->{status} = "deleted";
}
}
}
}
# If we get here, that means we had no issues verifying each chunk, and we
# can exit true.
return 1;
}
# Open the git-log pipe. Pass all of our ARGV directly to the rev-list command.
open my $log, '-|', 'git' => 'log' => @ARGV => '--pretty=%H'
or die "Couldn't open pipe to git-log: ", $!;
# Loop through each commit in the list, checking if the diff chunks can apply
# cleanly to the commit. Easily allow modifying which commits are checked via
# options to the git-log command, which allows limiting what can be checked.
while ( <$log> ) {
chomp;
if (check_commit $_) {
# Print the commit hash we found, and exit with a good return status.
print "$_\n";
exit 0;
}
}
# We failed to find a commit, so exit 1
print STDERR "Failed to find matching base commit.\n";
exit 1;