summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2016-08-12 09:47:39 -0700
committerJunio C Hamano <gitster@pobox.com>2016-08-12 09:47:39 -0700
commitdd610aeda684e42d3933a719cbd59ffbcdfdbcaa (patch)
tree3001212a96785e66a7d82e18116badd5007bc0ef
parent3787e3c16ced0e3a614766dfbb55f8cbd70762c1 (diff)
parentb3dfeebb92630c54db1e4f03dbcff0e05208c4c1 (diff)
downloadgit-dd610aeda684e42d3933a719cbd59ffbcdfdbcaa.tar.gz
Merge branch 'kw/patch-ids-optim'
When "git rebase" tries to compare set of changes on the updated upstream and our own branch, it computes patch-id for all of these changes and attempts to find matches. This has been optimized by lazily computing the full patch-id (which is expensive) to be compared only for changes that touch the same set of paths. * kw/patch-ids-optim: rebase: avoid computing unnecessary patch IDs patch-ids: add flag to create the diff patch id using header only data patch-ids: replace the seen indicator with a commit pointer patch-ids: stop using a hand-rolled hashmap implementation
-rw-r--r--builtin/log.c2
-rw-r--r--diff.c16
-rw-r--r--diff.h2
-rw-r--r--patch-ids.c109
-rw-r--r--patch-ids.h11
-rw-r--r--revision.c18
-rw-r--r--t/perf/p3400-rebase.sh36
-rwxr-xr-xt/t6007-rev-list-cherry-pick-file.sh21
8 files changed, 128 insertions, 87 deletions
diff --git a/builtin/log.c b/builtin/log.c
index 1f116bea8c..92dc34dcb0 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -1343,7 +1343,7 @@ static void prepare_bases(struct base_tree_info *bases,
struct object_id *patch_id;
if (commit->util)
continue;
- if (commit_patch_id(commit, &diffopt, sha1))
+ if (commit_patch_id(commit, &diffopt, sha1, 0))
die(_("cannot get patch id"));
ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id);
patch_id = bases->patch_id + bases->nr_patch_id;
diff --git a/diff.c b/diff.c
index b43d3dd2ec..534c12e28e 100644
--- a/diff.c
+++ b/diff.c
@@ -4462,7 +4462,7 @@ static void patch_id_consume(void *priv, char *line, unsigned long len)
}
/* returns 0 upon success, and writes result into sha1 */
-static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
+static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
{
struct diff_queue_struct *q = &diff_queued_diff;
int i;
@@ -4497,9 +4497,6 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
diff_fill_sha1_info(p->one);
diff_fill_sha1_info(p->two);
- if (fill_mmfile(&mf1, p->one) < 0 ||
- fill_mmfile(&mf2, p->two) < 0)
- return error("unable to read files to diff");
len1 = remove_space(p->one->path, strlen(p->one->path));
len2 = remove_space(p->two->path, strlen(p->two->path));
@@ -4534,6 +4531,13 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
len2, p->two->path);
git_SHA1_Update(&ctx, buffer, len1);
+ if (diff_header_only)
+ continue;
+
+ if (fill_mmfile(&mf1, p->one) < 0 ||
+ fill_mmfile(&mf2, p->two) < 0)
+ return error("unable to read files to diff");
+
if (diff_filespec_is_binary(p->one) ||
diff_filespec_is_binary(p->two)) {
git_SHA1_Update(&ctx, oid_to_hex(&p->one->oid),
@@ -4556,11 +4560,11 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
return 0;
}
-int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1)
+int diff_flush_patch_id(struct diff_options *options, unsigned char *sha1, int diff_header_only)
{
struct diff_queue_struct *q = &diff_queued_diff;
int i;
- int result = diff_get_patch_id(options, sha1);
+ int result = diff_get_patch_id(options, sha1, diff_header_only);
for (i = 0; i < q->nr; i++)
diff_free_filepair(q->queue[i]);
diff --git a/diff.h b/diff.h
index 125447be09..7883729edf 100644
--- a/diff.h
+++ b/diff.h
@@ -342,7 +342,7 @@ extern int run_diff_files(struct rev_info *revs, unsigned int option);
extern int run_diff_index(struct rev_info *revs, int cached);
extern int do_diff_cache(const unsigned char *, struct diff_options *);
-extern int diff_flush_patch_id(struct diff_options *, unsigned char *);
+extern int diff_flush_patch_id(struct diff_options *, unsigned char *, int);
extern int diff_result_code(struct diff_options *, int);
diff --git a/patch-ids.c b/patch-ids.c
index a4d0016664..082412aca6 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -5,7 +5,7 @@
#include "patch-ids.h"
int commit_patch_id(struct commit *commit, struct diff_options *options,
- unsigned char *sha1)
+ unsigned char *sha1, int diff_header_only)
{
if (commit->parents)
diff_tree_sha1(commit->parents->item->object.oid.hash,
@@ -13,93 +13,86 @@ int commit_patch_id(struct commit *commit, struct diff_options *options,
else
diff_root_tree_sha1(commit->object.oid.hash, "", options);
diffcore_std(options);
- return diff_flush_patch_id(options, sha1);
+ return diff_flush_patch_id(options, sha1, diff_header_only);
}
-static const unsigned char *patch_id_access(size_t index, void *table)
+/*
+ * When we cannot load the full patch-id for both commits for whatever
+ * reason, the function returns -1 (i.e. return error(...)). Despite
+ * the "cmp" in the name of this function, the caller only cares about
+ * the return value being zero (a and b are equivalent) or non-zero (a
+ * and b are different), and returning non-zero would keep both in the
+ * result, even if they actually were equivalent, in order to err on
+ * the side of safety. The actual value being negative does not have
+ * any significance; only that it is non-zero matters.
+ */
+static int patch_id_cmp(struct patch_id *a,
+ struct patch_id *b,
+ struct diff_options *opt)
{
- struct patch_id **id_table = table;
- return id_table[index]->patch_id;
+ if (is_null_sha1(a->patch_id) &&
+ commit_patch_id(a->commit, opt, a->patch_id, 0))
+ return error("Could not get patch ID for %s",
+ oid_to_hex(&a->commit->object.oid));
+ if (is_null_sha1(b->patch_id) &&
+ commit_patch_id(b->commit, opt, b->patch_id, 0))
+ return error("Could not get patch ID for %s",
+ oid_to_hex(&b->commit->object.oid));
+ return hashcmp(a->patch_id, b->patch_id);
}
-static int patch_pos(struct patch_id **table, int nr, const unsigned char *id)
-{
- return sha1_pos(id, table, nr, patch_id_access);
-}
-
-#define BUCKET_SIZE 190 /* 190 * 21 = 3990, with slop close enough to 4K */
-struct patch_id_bucket {
- struct patch_id_bucket *next;
- int nr;
- struct patch_id bucket[BUCKET_SIZE];
-};
-
int init_patch_ids(struct patch_ids *ids)
{
memset(ids, 0, sizeof(*ids));
diff_setup(&ids->diffopts);
DIFF_OPT_SET(&ids->diffopts, RECURSIVE);
diff_setup_done(&ids->diffopts);
+ hashmap_init(&ids->patches, (hashmap_cmp_fn)patch_id_cmp, 256);
return 0;
}
int free_patch_ids(struct patch_ids *ids)
{
- struct patch_id_bucket *next, *patches;
-
- free(ids->table);
- for (patches = ids->patches; patches; patches = next) {
- next = patches->next;
- free(patches);
- }
+ hashmap_free(&ids->patches, 1);
return 0;
}
-static struct patch_id *add_commit(struct commit *commit,
- struct patch_ids *ids,
- int no_add)
+static int init_patch_id_entry(struct patch_id *patch,
+ struct commit *commit,
+ struct patch_ids *ids)
{
- struct patch_id_bucket *bucket;
- struct patch_id *ent;
- unsigned char sha1[20];
- int pos;
+ unsigned char header_only_patch_id[GIT_SHA1_RAWSZ];
- if (commit_patch_id(commit, &ids->diffopts, sha1))
- return NULL;
- pos = patch_pos(ids->table, ids->nr, sha1);
- if (0 <= pos)
- return ids->table[pos];
- if (no_add)
- return NULL;
+ patch->commit = commit;
+ if (commit_patch_id(commit, &ids->diffopts, header_only_patch_id, 1))
+ return -1;
- pos = -1 - pos;
-
- bucket = ids->patches;
- if (!bucket || (BUCKET_SIZE <= bucket->nr)) {
- bucket = xcalloc(1, sizeof(*bucket));
- bucket->next = ids->patches;
- ids->patches = bucket;
- }
- ent = &bucket->bucket[bucket->nr++];
- hashcpy(ent->patch_id, sha1);
-
- ALLOC_GROW(ids->table, ids->nr + 1, ids->alloc);
- if (pos < ids->nr)
- memmove(ids->table + pos + 1, ids->table + pos,
- sizeof(ent) * (ids->nr - pos));
- ids->nr++;
- ids->table[pos] = ent;
- return ids->table[pos];
+ hashmap_entry_init(patch, sha1hash(header_only_patch_id));
+ return 0;
}
struct patch_id *has_commit_patch_id(struct commit *commit,
struct patch_ids *ids)
{
- return add_commit(commit, ids, 1);
+ struct patch_id patch;
+
+ memset(&patch, 0, sizeof(patch));
+ if (init_patch_id_entry(&patch, commit, ids))
+ return NULL;
+
+ return hashmap_get(&ids->patches, &patch, &ids->diffopts);
}
struct patch_id *add_commit_patch_id(struct commit *commit,
struct patch_ids *ids)
{
- return add_commit(commit, ids, 0);
+ struct patch_id *key = xcalloc(1, sizeof(*key));
+
+ if (init_patch_id_entry(key, commit, ids)) {
+ free(key);
+ return NULL;
+ }
+
+ hashmap_add(&ids->patches, key);
+ return key;
}
diff --git a/patch-ids.h b/patch-ids.h
index eeb56b307f..0f34ea11ea 100644
--- a/patch-ids.h
+++ b/patch-ids.h
@@ -2,19 +2,18 @@
#define PATCH_IDS_H
struct patch_id {
- unsigned char patch_id[20];
- char seen;
+ struct hashmap_entry ent;
+ unsigned char patch_id[GIT_SHA1_RAWSZ];
+ struct commit *commit;
};
struct patch_ids {
+ struct hashmap patches;
struct diff_options diffopts;
- int nr, alloc;
- struct patch_id **table;
- struct patch_id_bucket *patches;
};
int commit_patch_id(struct commit *commit, struct diff_options *options,
- unsigned char *sha1);
+ unsigned char *sha1, int);
int init_patch_ids(struct patch_ids *);
int free_patch_ids(struct patch_ids *);
struct patch_id *add_commit_patch_id(struct commit *, struct patch_ids *);
diff --git a/revision.c b/revision.c
index 15873bf24d..8a29cb03c5 100644
--- a/revision.c
+++ b/revision.c
@@ -846,7 +846,7 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
*/
if (left_first != !!(flags & SYMMETRIC_LEFT))
continue;
- commit->util = add_commit_patch_id(commit, &ids);
+ add_commit_patch_id(commit, &ids);
}
/* either cherry_mark or cherry_pick are true */
@@ -873,21 +873,9 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
id = has_commit_patch_id(commit, &ids);
if (!id)
continue;
- id->seen = 1;
- commit->object.flags |= cherry_flag;
- }
- /* Now check the original side for seen ones */
- for (p = list; p; p = p->next) {
- struct commit *commit = p->item;
- struct patch_id *ent;
-
- ent = commit->util;
- if (!ent)
- continue;
- if (ent->seen)
- commit->object.flags |= cherry_flag;
- commit->util = NULL;
+ commit->object.flags |= cherry_flag;
+ id->commit->object.flags |= cherry_flag;
}
free_patch_ids(&ids);
diff --git a/t/perf/p3400-rebase.sh b/t/perf/p3400-rebase.sh
new file mode 100644
index 0000000000..b3e7d525d2
--- /dev/null
+++ b/t/perf/p3400-rebase.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+test_description='Tests rebase performance'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+test_expect_success 'setup' '
+ git checkout -f -b base &&
+ git checkout -b to-rebase &&
+ git checkout -b upstream &&
+ for i in $(seq 100)
+ do
+ # simulate huge diffs
+ echo change$i >unrelated-file$i &&
+ seq 1000 >>unrelated-file$i &&
+ git add unrelated-file$i &&
+ test_tick &&
+ git commit -m commit$i unrelated-file$i &&
+ echo change$i >unrelated-file$i &&
+ seq 1000 | tac >>unrelated-file$i &&
+ git add unrelated-file$i &&
+ test_tick &&
+ git commit -m commit$i-reverse unrelated-file$i ||
+ break
+ done &&
+ git checkout to-rebase &&
+ test_commit our-patch interesting-file
+'
+
+test_perf 'rebase on top of a lot of unrelated changes' '
+ git rebase --onto upstream HEAD^ &&
+ git rebase --onto base HEAD^
+'
+
+test_done
diff --git a/t/t6007-rev-list-cherry-pick-file.sh b/t/t6007-rev-list-cherry-pick-file.sh
index 28d4f6b259..1408b608eb 100755
--- a/t/t6007-rev-list-cherry-pick-file.sh
+++ b/t/t6007-rev-list-cherry-pick-file.sh
@@ -207,4 +207,25 @@ test_expect_success '--count --left-right' '
test_cmp expect actual
'
+# Corrupt the object store deliberately to make sure
+# the object is not even checked for its existence.
+remove_loose_object () {
+ sha1="$(git rev-parse "$1")" &&
+ remainder=${sha1#??} &&
+ firsttwo=${sha1%$remainder} &&
+ rm .git/objects/$firsttwo/$remainder
+}
+
+test_expect_success '--cherry-pick avoids looking at full diffs' '
+ git checkout -b shy-diff &&
+ test_commit dont-look-at-me &&
+ echo Hello >dont-look-at-me.t &&
+ test_tick &&
+ git commit -m tip dont-look-at-me.t &&
+ git checkout -b mainline HEAD^ &&
+ test_commit to-cherry-pick &&
+ remove_loose_object shy-diff^:dont-look-at-me.t &&
+ git rev-list --cherry-pick ...shy-diff
+'
+
test_done