Merge branch 'td/ref-filter-memoize-contains' into seen * td/ref-filter-memoize-contains: : 'git branch --contains' and 'git for-each-ref --contains' have been : optimized to use the memoized commit traversal previously used only by : 'git tag --contains', significantly speeding up connectivity checks : across many candidate refs with shared history. ref-filter: memoize --contains with generations commit-reach: handle cycles in contains walk
diff --git a/commit-reach.c b/commit-reach.c index 026be5c..067f5d2 100644 --- a/commit-reach.c +++ b/commit-reach.c
@@ -757,7 +757,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) /* * Test whether the candidate is contained in the list. - * Do not recurse to find out, though, but return -1 if inconclusive. + * Do not recurse to find out, though, but return CONTAINS_UNKNOWN if + * inconclusive. */ static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, @@ -793,7 +794,7 @@ static void push_to_contains_stack(struct commit *candidate, struct contains_sta } static enum contains_result contains_tag_algo(struct commit *candidate, - const struct commit_list *want, + struct commit_list *want, struct contains_cache *cache) { struct contains_stack contains_stack = { 0, 0, NULL }; @@ -814,6 +815,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, if (result != CONTAINS_UNKNOWN) return result; + *contains_cache_at(cache, candidate) = CONTAINS_IN_PROGRESS; push_to_contains_stack(candidate, &contains_stack); while (contains_stack.nr) { struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; @@ -825,8 +827,8 @@ static enum contains_result contains_tag_algo(struct commit *candidate, contains_stack.nr--; } /* - * If we just popped the stack, parents->item has been marked, - * therefore contains_test will return a meaningful yes/no. + * A parent may have just been popped and marked, or may still + * be active when replacement refs create a cycle. */ else switch (contains_test(parents->item, want, cache, cutoff)) { case CONTAINS_YES: @@ -836,21 +838,50 @@ static enum contains_result contains_tag_algo(struct commit *candidate, case CONTAINS_NO: entry->parents = parents->next; break; + case CONTAINS_IN_PROGRESS: + /* + * Partial negative answers are not safe across a cycle. + * Discard them and use the cycle-safe reachability walk. + */ + goto cycle; case CONTAINS_UNKNOWN: + *contains_cache_at(cache, parents->item) = + CONTAINS_IN_PROGRESS; push_to_contains_stack(parents->item, &contains_stack); break; } } free(contains_stack.contains_stack); return contains_test(candidate, want, cache, cutoff); + +cycle: + free(contains_stack.contains_stack); + clear_contains_cache(cache); + init_contains_cache(cache); + + result = repo_is_descendant_of(the_repository, candidate, want); + if (result < 0) + exit(128); + *contains_cache_at(cache, candidate) = + result ? CONTAINS_YES : CONTAINS_NO; + return result ? CONTAINS_YES : CONTAINS_NO; } int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache) { - if (filter->with_commit_tag_algo) + int result; + + if (!list) + return 1; + if (filter->with_commit_tag_algo || + generation_numbers_enabled(the_repository)) return contains_tag_algo(commit, list, cache) == CONTAINS_YES; - return repo_is_descendant_of(the_repository, commit, list); + + result = repo_is_descendant_of(the_repository, commit, list); + if (result < 0) + exit(128); + return result; } int can_all_from_reach_with_flag(struct object_array *from,
diff --git a/commit-reach.h b/commit-reach.h index b3e7051..60e564f 100644 --- a/commit-reach.h +++ b/commit-reach.h
@@ -73,11 +73,19 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); enum contains_result { CONTAINS_UNKNOWN = 0, CONTAINS_NO, - CONTAINS_YES + CONTAINS_YES, + CONTAINS_IN_PROGRESS }; define_commit_slab(contains_cache, enum contains_result); +/* + * Return whether "commit" is a descendant of any commit in "list". An empty + * list matches. + * + * The memoized traversal records answers in "cache" for one fixed "list". + * Clear it before changing the list. + */ int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache);
diff --git a/t/perf/p1500-graph-walks.sh b/t/perf/p1500-graph-walks.sh index 5b23ce5..99b54e2 100755 --- a/t/perf/p1500-graph-walks.sh +++ b/t/perf/p1500-graph-walks.sh
@@ -32,12 +32,47 @@ echo "X:$line" >>test-tool-tags || return 1 done && - commit=$(git commit-tree $(git rev-parse HEAD^{tree})) && + git rev-list --first-parent --max-count=8192 HEAD >contains-commits && + test_file_not_empty contains-commits && + git update-ref refs/contains-perf-base "$(tail -n 1 contains-commits)" && + awk "{ + printf \"update refs/contains-perf/%04d %s\\n\", NR, \$1 + }" contains-commits | + git update-ref --stdin && + git pack-refs --include "refs/contains-perf/*" && + + tree=$(git rev-parse HEAD^{tree}) && + base=$(git rev-parse HEAD) && + target=$(echo target | git commit-tree "$tree" -p "$base") && + git update-ref refs/contains-diverged/target "$target" && + for i in $(test_seq 1 4) + do + commit=$(echo candidate-$i | + git commit-tree "$tree" -p "$base") && + git update-ref refs/contains-diverged/candidate-$i "$commit" || + return 1 + done && + + commit=$(git commit-tree "$tree") && git update-ref refs/heads/disjoint-base $commit && git commit-graph write --reachable ' +test_expect_success 'verify contains results' ' + git for-each-ref --contains=refs/contains-perf-base \ + refs/contains-perf/ >actual && + test_line_count = $(wc -l <contains-commits) actual && + + echo refs/contains-diverged/target >expect && + GIT_TEST_COMMIT_GRAPH=0 \ + git -c core.commitGraph=false for-each-ref \ + --format="%(refname)" \ + --contains=refs/contains-diverged/target \ + refs/contains-diverged/ >actual && + test_cmp expect actual +' + test_perf 'ahead-behind counts: git for-each-ref' ' git for-each-ref --format="%(ahead-behind:HEAD)" --stdin <refs ' @@ -62,6 +97,18 @@ xargs git tag --merged=HEAD <tags ' +test_perf 'contains: git for-each-ref --contains' ' + git for-each-ref --contains=refs/contains-perf-base \ + refs/contains-perf/ >/dev/null +' + +test_perf 'contains without generations: divergent refs' ' + GIT_TEST_COMMIT_GRAPH=0 \ + git -c core.commitGraph=false for-each-ref \ + --contains=refs/contains-diverged/target \ + refs/contains-diverged/ >/dev/null +' + test_perf 'is-base check: test-tool reach (refs)' ' test-tool reach get_branch_base_for_tip <test-tool-refs '
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh index e06feb0..72b27c8 100755 --- a/t/t6301-for-each-ref-errors.sh +++ b/t/t6301-for-each-ref-errors.sh
@@ -52,6 +52,28 @@ test_must_be_empty brief-err ' +test_expect_success 'missing ancestors are reported by contains filters' ' + test_when_finished "git update-ref -d refs/heads/missing-parent" && + { + echo "tree $(git rev-parse HEAD^{tree})" && + echo "parent $MISSING" && + git cat-file commit HEAD | + sed -n -e "/^author /p" -e "/^committer /p" && + echo && + echo "missing parent" + } >commit && + broken=$(git hash-object -t commit -w commit) && + git update-ref refs/heads/missing-parent "$broken" && + for option in --contains --no-contains + do + test_must_fail git for-each-ref "$option=HEAD" \ + refs/heads/missing-parent >out 2>err && + test_must_be_empty out && + test_grep "parse commit $MISSING" err || + return 1 + done +' + test_expect_success 'ahead-behind requires an argument' ' test_must_fail git for-each-ref \ --format="%(ahead-behind)" 2>err &&
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh index d918005..1ed91bb 100755 --- a/t/t7004-tag.sh +++ b/t/t7004-tag.sh
@@ -1611,6 +1611,27 @@ test_cmp expected actual ' +test_expect_success 'tag --contains handles cyclic replacement histories' ' + first=$(git rev-parse HEAD~2) && + second=$(git rev-parse HEAD~) && + third=$(git rev-parse HEAD) && + test_when_finished " + git replace -d $first + git replace -d $third + git tag -d cycle-a cycle-b + " && + git tag cycle-a "$first" && + git tag cycle-b "$third" && + git replace --graft "$first" "$third" "$second" && + git replace --graft "$third" "$first" && + cat >expected <<-\EOF && + cycle-a + cycle-b + EOF + git tag --contains="$second" --list "cycle-*" >actual && + test_cmp expected actual +' + # other ways of specifying the commit test_expect_success 'checking that first commit is in all tags (tag)' ' cat >expected <<-\EOF &&