diff options
author | Junio C Hamano <gitster@pobox.com> | 2016-10-26 13:14:47 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-10-26 13:14:47 -0700 |
commit | 9fcd14491d32d76c3533ba0b1dfe7cabf31fe852 (patch) | |
tree | 4411906265b2adcf0bac4d2b68bece4828ec6371 | |
parent | 92657ea597310c05b8f4ac3496ba90496286b5d6 (diff) | |
parent | 5827a03545663f6d6b491a35edb313900608568b (diff) | |
download | git-9fcd14491d32d76c3533ba0b1dfe7cabf31fe852.tar.gz |
Merge branch 'jk/fetch-quick-tag-following'
When fetching from a remote that has many tags that are irrelevant
to branches we are following, we used to waste way too many cycles
when checking if the object pointed at by a tag (that we are not
going to fetch!) exists in our repository too carefully.
* jk/fetch-quick-tag-following:
fetch: use "quick" has_sha1_file for tag following
-rw-r--r-- | builtin/fetch.c | 11 | ||||
-rw-r--r-- | cache.h | 1 | ||||
-rw-r--r-- | sha1_file.c | 5 | ||||
-rwxr-xr-x | t/perf/p5550-fetch-tags.sh | 99 |
4 files changed, 112 insertions, 4 deletions
diff --git a/builtin/fetch.c b/builtin/fetch.c index d5329f915e..74c0546362 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -241,9 +241,10 @@ static void find_non_local_tags(struct transport *transport, * as one to ignore by setting util to NULL. */ if (ends_with(ref->name, "^{}")) { - if (item && !has_object_file(&ref->old_oid) && + if (item && + !has_object_file_with_flags(&ref->old_oid, HAS_SHA1_QUICK) && !will_fetch(head, ref->old_oid.hash) && - !has_sha1_file(item->util) && + !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) && !will_fetch(head, item->util)) item->util = NULL; item = NULL; @@ -256,7 +257,8 @@ static void find_non_local_tags(struct transport *transport, * to check if it is a lightweight tag that we want to * fetch. */ - if (item && !has_sha1_file(item->util) && + if (item && + !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) && !will_fetch(head, item->util)) item->util = NULL; @@ -276,7 +278,8 @@ static void find_non_local_tags(struct transport *transport, * We may have a final lightweight tag that needs to be * checked to see if it needs fetching. */ - if (item && !has_sha1_file(item->util) && + if (item && + !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) && !will_fetch(head, item->util)) item->util = NULL; @@ -1157,6 +1157,7 @@ static inline int has_sha1_file(const unsigned char *sha1) /* Same as the above, except for struct object_id. */ extern int has_object_file(const struct object_id *oid); +extern int has_object_file_with_flags(const struct object_id *oid, int flags); /* * Return true iff an alternate object database has a loose object diff --git a/sha1_file.c b/sha1_file.c index 266152de36..2eda9291ee 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3335,6 +3335,11 @@ int has_object_file(const struct object_id *oid) return has_sha1_file(oid->hash); } +int has_object_file_with_flags(const struct object_id *oid, int flags) +{ + return has_sha1_file_with_flags(oid->hash, flags); +} + static void check_tree(const void *buf, size_t size) { struct tree_desc desc; diff --git a/t/perf/p5550-fetch-tags.sh b/t/perf/p5550-fetch-tags.sh new file mode 100755 index 0000000000..a5dc39f86a --- /dev/null +++ b/t/perf/p5550-fetch-tags.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +test_description='performance of tag-following with many tags + +This tests a fairly pathological case, so rather than rely on a real-world +case, we will construct our own repository. The situation is roughly as +follows. + +The parent repository has a large number of tags which are disconnected from +the rest of history. That makes them candidates for tag-following, but we never +actually grab them (and thus they will impact each subsequent fetch). + +The child repository is a clone of parent, without the tags, and is at least +one commit behind the parent (meaning that we will fetch one object and then +examine the tags to see if they need followed). Furthermore, it has a large +number of packs. + +The exact values of "large" here are somewhat arbitrary; I picked values that +start to show a noticeable performance problem on my machine, but without +taking too long to set up and run the tests. +' +. ./perf-lib.sh + +# make a long nonsense history on branch $1, consisting of $2 commits, each +# with a unique file pointing to the blob at $2. +create_history () { + perl -le ' + my ($branch, $n, $blob) = @ARGV; + for (1..$n) { + print "commit refs/heads/$branch"; + print "committer nobody <nobody@example.com> now"; + print "data 4"; + print "foo"; + print "M 100644 $blob $_"; + } + ' "$@" | + git fast-import --date-format=now +} + +# make a series of tags, one per commit in the revision range given by $@ +create_tags () { + git rev-list "$@" | + perl -lne 'print "create refs/tags/$. $_"' | + git update-ref --stdin +} + +# create $1 nonsense packs, each with a single blob +create_packs () { + perl -le ' + my ($n) = @ARGV; + for (1..$n) { + print "blob"; + print "data <<EOF"; + print "$_"; + print "EOF"; + } + ' "$@" | + git fast-import && + + git cat-file --batch-all-objects --batch-check='%(objectname)' | + while read sha1 + do + echo $sha1 | git pack-objects .git/objects/pack/pack + done +} + +test_expect_success 'create parent and child' ' + git init parent && + git -C parent commit --allow-empty -m base && + git clone parent child && + git -C parent commit --allow-empty -m trigger-fetch +' + +test_expect_success 'populate parent tags' ' + ( + cd parent && + blob=$(echo content | git hash-object -w --stdin) && + create_history cruft 3000 $blob && + create_tags cruft && + git branch -D cruft + ) +' + +test_expect_success 'create child packs' ' + ( + cd child && + git config gc.auto 0 && + git config gc.autopacklimit 0 && + create_packs 500 + ) +' + +test_perf 'fetch' ' + # make sure there is something to fetch on each iteration + git -C child update-ref -d refs/remotes/origin/master && + git -C child fetch +' + +test_done |