summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2016-10-26 13:14:47 -0700
committerJunio C Hamano <gitster@pobox.com>2016-10-26 13:14:47 -0700
commit9fcd14491d32d76c3533ba0b1dfe7cabf31fe852 (patch)
tree4411906265b2adcf0bac4d2b68bece4828ec6371
parent92657ea597310c05b8f4ac3496ba90496286b5d6 (diff)
parent5827a03545663f6d6b491a35edb313900608568b (diff)
downloadgit-9fcd14491d32d76c3533ba0b1dfe7cabf31fe852.tar.gz
Merge branch 'jk/fetch-quick-tag-following'
When fetching from a remote that has many tags that are irrelevant to branches we are following, we used to waste way too many cycles when checking if the object pointed at by a tag (that we are not going to fetch!) exists in our repository too carefully. * jk/fetch-quick-tag-following: fetch: use "quick" has_sha1_file for tag following
-rw-r--r--builtin/fetch.c11
-rw-r--r--cache.h1
-rw-r--r--sha1_file.c5
-rwxr-xr-xt/perf/p5550-fetch-tags.sh99
4 files changed, 112 insertions, 4 deletions
diff --git a/builtin/fetch.c b/builtin/fetch.c
index d5329f915e..74c0546362 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -241,9 +241,10 @@ static void find_non_local_tags(struct transport *transport,
* as one to ignore by setting util to NULL.
*/
if (ends_with(ref->name, "^{}")) {
- if (item && !has_object_file(&ref->old_oid) &&
+ if (item &&
+ !has_object_file_with_flags(&ref->old_oid, HAS_SHA1_QUICK) &&
!will_fetch(head, ref->old_oid.hash) &&
- !has_sha1_file(item->util) &&
+ !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
item = NULL;
@@ -256,7 +257,8 @@ static void find_non_local_tags(struct transport *transport,
* to check if it is a lightweight tag that we want to
* fetch.
*/
- if (item && !has_sha1_file(item->util) &&
+ if (item &&
+ !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
@@ -276,7 +278,8 @@ static void find_non_local_tags(struct transport *transport,
* We may have a final lightweight tag that needs to be
* checked to see if it needs fetching.
*/
- if (item && !has_sha1_file(item->util) &&
+ if (item &&
+ !has_sha1_file_with_flags(item->util, HAS_SHA1_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
diff --git a/cache.h b/cache.h
index 5f2f03090f..f7ee414563 100644
--- a/cache.h
+++ b/cache.h
@@ -1157,6 +1157,7 @@ static inline int has_sha1_file(const unsigned char *sha1)
/* Same as the above, except for struct object_id. */
extern int has_object_file(const struct object_id *oid);
+extern int has_object_file_with_flags(const struct object_id *oid, int flags);
/*
* Return true iff an alternate object database has a loose object
diff --git a/sha1_file.c b/sha1_file.c
index 266152de36..2eda9291ee 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3335,6 +3335,11 @@ int has_object_file(const struct object_id *oid)
return has_sha1_file(oid->hash);
}
+int has_object_file_with_flags(const struct object_id *oid, int flags)
+{
+ return has_sha1_file_with_flags(oid->hash, flags);
+}
+
static void check_tree(const void *buf, size_t size)
{
struct tree_desc desc;
diff --git a/t/perf/p5550-fetch-tags.sh b/t/perf/p5550-fetch-tags.sh
new file mode 100755
index 0000000000..a5dc39f86a
--- /dev/null
+++ b/t/perf/p5550-fetch-tags.sh
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+test_description='performance of tag-following with many tags
+
+This tests a fairly pathological case, so rather than rely on a real-world
+case, we will construct our own repository. The situation is roughly as
+follows.
+
+The parent repository has a large number of tags which are disconnected from
+the rest of history. That makes them candidates for tag-following, but we never
+actually grab them (and thus they will impact each subsequent fetch).
+
+The child repository is a clone of parent, without the tags, and is at least
+one commit behind the parent (meaning that we will fetch one object and then
+examine the tags to see if they need followed). Furthermore, it has a large
+number of packs.
+
+The exact values of "large" here are somewhat arbitrary; I picked values that
+start to show a noticeable performance problem on my machine, but without
+taking too long to set up and run the tests.
+'
+. ./perf-lib.sh
+
+# make a long nonsense history on branch $1, consisting of $2 commits, each
+# with a unique file pointing to the blob at $2.
+create_history () {
+ perl -le '
+ my ($branch, $n, $blob) = @ARGV;
+ for (1..$n) {
+ print "commit refs/heads/$branch";
+ print "committer nobody <nobody@example.com> now";
+ print "data 4";
+ print "foo";
+ print "M 100644 $blob $_";
+ }
+ ' "$@" |
+ git fast-import --date-format=now
+}
+
+# make a series of tags, one per commit in the revision range given by $@
+create_tags () {
+ git rev-list "$@" |
+ perl -lne 'print "create refs/tags/$. $_"' |
+ git update-ref --stdin
+}
+
+# create $1 nonsense packs, each with a single blob
+create_packs () {
+ perl -le '
+ my ($n) = @ARGV;
+ for (1..$n) {
+ print "blob";
+ print "data <<EOF";
+ print "$_";
+ print "EOF";
+ }
+ ' "$@" |
+ git fast-import &&
+
+ git cat-file --batch-all-objects --batch-check='%(objectname)' |
+ while read sha1
+ do
+ echo $sha1 | git pack-objects .git/objects/pack/pack
+ done
+}
+
+test_expect_success 'create parent and child' '
+ git init parent &&
+ git -C parent commit --allow-empty -m base &&
+ git clone parent child &&
+ git -C parent commit --allow-empty -m trigger-fetch
+'
+
+test_expect_success 'populate parent tags' '
+ (
+ cd parent &&
+ blob=$(echo content | git hash-object -w --stdin) &&
+ create_history cruft 3000 $blob &&
+ create_tags cruft &&
+ git branch -D cruft
+ )
+'
+
+test_expect_success 'create child packs' '
+ (
+ cd child &&
+ git config gc.auto 0 &&
+ git config gc.autopacklimit 0 &&
+ create_packs 500
+ )
+'
+
+test_perf 'fetch' '
+ # make sure there is something to fetch on each iteration
+ git -C child update-ref -d refs/remotes/origin/master &&
+ git -C child fetch
+'
+
+test_done