From a937b37e766479c8e780b17cce9c4b252fd97e40 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 13 Oct 2017 11:27:45 -0400 Subject: revision: quit pruning diff more quickly when possible When the revision traversal machinery is given a pathspec, we must compute the parent-diff for each commit to determine which ones are TREESAME. We set the QUICK diff flag to avoid looking at more entries than we need; we really just care whether there are any changes at all. But there is one case where we want to know a bit more: if --remove-empty is set, we care about finding cases where the change consists only of added entries (in which case we may prune the parent in try_to_simplify_commit()). To cover that case, our file_add_remove() callback does not quit the diff upon seeing an added entry; it keeps looking for other types of entries. But this means when --remove-empty is not set (and it is not by default), we compute more of the diff than is necessary. You can see this in a pathological case where a commit adds a very large number of entries, and we limit based on a broad pathspec. E.g.: perl -e ' chomp(my $blob = `git hash-object -w --stdin remove_empty_trees. This callback parameter could be passed to the "add_remove" and "change" callbacks, but there's not much point. They already receive the diff_options struct, and doing it this way avoids having to update the function signature of the other callbacks (arguably the format_callback and output_prefix functions could benefit from the same simplification). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'revision.c') diff --git a/revision.c b/revision.c index 771d079f6e..7c23ab7afe 100644 --- a/revision.c +++ b/revision.c @@ -394,8 +394,16 @@ static struct commit *one_relevant_parent(const struct rev_info *revs, * if the whole diff is removal of old data, and otherwise * REV_TREE_DIFFERENT (of course if the trees are the same we * want REV_TREE_SAME). - * That means that once we get to REV_TREE_DIFFERENT, we do not - * have to look any further. + * + * The only time we care about the distinction is when + * remove_empty_trees is in effect, in which case we care only about + * whether the whole change is REV_TREE_NEW, or if there's another type + * of change. Which means we can stop the diff early in either of these + * cases: + * + * 1. We're not using remove_empty_trees at all. + * + * 2. We saw anything except REV_TREE_NEW. */ static int tree_difference = REV_TREE_SAME; @@ -406,9 +414,10 @@ static void file_add_remove(struct diff_options *options, const char *fullpath, unsigned dirty_submodule) { int diff = addremove == '+' ? REV_TREE_NEW : REV_TREE_OLD; + struct rev_info *revs = options->change_fn_data; tree_difference |= diff; - if (tree_difference == REV_TREE_DIFFERENT) + if (!revs->remove_empty_trees || tree_difference != REV_TREE_NEW) DIFF_OPT_SET(options, HAS_CHANGES); } @@ -1346,6 +1355,7 @@ void init_revisions(struct rev_info *revs, const char *prefix) DIFF_OPT_SET(&revs->pruning, QUICK); revs->pruning.add_remove = file_add_remove; revs->pruning.change = file_change; + revs->pruning.change_fn_data = revs; revs->sort_order = REV_SORT_IN_GRAPH_ORDER; revs->dense = 1; revs->prefix = prefix; -- cgit v1.2.1