summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2020-04-28 15:49:58 -0700
committerJunio C Hamano <gitster@pobox.com>2020-04-28 15:49:58 -0700
commit6ae3c797883b1ce8d4138ce08f4ec4901e4dbe50 (patch)
tree6596006db7d773663c8f55cc0eddcdaeca02de18
parent342bc9e29fa462b127d9e1c02aa58349e58e56f0 (diff)
parentd8410a816bc0ccaa8532cb201ee8073e0c457d39 (diff)
downloadgit-6ae3c797883b1ce8d4138ce08f4ec4901e4dbe50.tar.gz
Merge branch 'jk/fast-import-use-hashmap'
The custom hash function used by "git fast-import" has been replaced with the one from hashmap.c, which gave us a nice performance boost. * jk/fast-import-use-hashmap: fast-import: replace custom hash with hashmap.c
-rw-r--r--fast-import.c61
-rwxr-xr-xt/perf/p9300-fast-import-export.sh23
2 files changed, 59 insertions, 25 deletions
diff --git a/fast-import.c b/fast-import.c
index 202dda11a6..c98970274c 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -39,12 +39,28 @@
struct object_entry {
struct pack_idx_entry idx;
- struct object_entry *next;
+ struct hashmap_entry ent;
uint32_t type : TYPE_BITS,
pack_id : PACK_ID_BITS,
depth : DEPTH_BITS;
};
+static int object_entry_hashcmp(const void *map_data,
+ const struct hashmap_entry *eptr,
+ const struct hashmap_entry *entry_or_key,
+ const void *keydata)
+{
+ const struct object_id *oid = keydata;
+ const struct object_entry *e1, *e2;
+
+ e1 = container_of(eptr, const struct object_entry, ent);
+ if (oid)
+ return oidcmp(&e1->idx.oid, oid);
+
+ e2 = container_of(entry_or_key, const struct object_entry, ent);
+ return oidcmp(&e1->idx.oid, &e2->idx.oid);
+}
+
struct object_entry_pool {
struct object_entry_pool *next_pool;
struct object_entry *next_free;
@@ -178,7 +194,7 @@ static off_t pack_size;
/* Table of objects we've written. */
static unsigned int object_entry_alloc = 5000;
static struct object_entry_pool *blocks;
-static struct object_entry *object_table[1 << 16];
+static struct hashmap object_table;
static struct mark_set *marks;
static const char *export_marks_file;
static const char *import_marks_file;
@@ -455,44 +471,37 @@ static struct object_entry *new_object(struct object_id *oid)
static struct object_entry *find_object(struct object_id *oid)
{
- unsigned int h = oid->hash[0] << 8 | oid->hash[1];
- struct object_entry *e;
- for (e = object_table[h]; e; e = e->next)
- if (oideq(oid, &e->idx.oid))
- return e;
- return NULL;
+ return hashmap_get_entry_from_hash(&object_table, oidhash(oid), oid,
+ struct object_entry, ent);
}
static struct object_entry *insert_object(struct object_id *oid)
{
- unsigned int h = oid->hash[0] << 8 | oid->hash[1];
- struct object_entry *e = object_table[h];
+ struct object_entry *e;
+ unsigned int hash = oidhash(oid);
- while (e) {
- if (oideq(oid, &e->idx.oid))
- return e;
- e = e->next;
+ e = hashmap_get_entry_from_hash(&object_table, hash, oid,
+ struct object_entry, ent);
+ if (!e) {
+ e = new_object(oid);
+ e->idx.offset = 0;
+ hashmap_entry_init(&e->ent, hash);
+ hashmap_add(&object_table, &e->ent);
}
- e = new_object(oid);
- e->next = object_table[h];
- e->idx.offset = 0;
- object_table[h] = e;
return e;
}
static void invalidate_pack_id(unsigned int id)
{
- unsigned int h;
unsigned long lu;
struct tag *t;
+ struct hashmap_iter iter;
+ struct object_entry *e;
- for (h = 0; h < ARRAY_SIZE(object_table); h++) {
- struct object_entry *e;
-
- for (e = object_table[h]; e; e = e->next)
- if (e->pack_id == id)
- e->pack_id = MAX_PACK_ID;
+ hashmap_for_each_entry(&object_table, &iter, e, ent) {
+ if (e->pack_id == id)
+ e->pack_id = MAX_PACK_ID;
}
for (lu = 0; lu < branch_table_sz; lu++) {
@@ -3511,6 +3520,8 @@ int cmd_main(int argc, const char **argv)
avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
marks = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
+ hashmap_init(&object_table, object_entry_hashcmp, NULL, 0);
+
/*
* We don't parse most options until after we've seen the set of
* "feature" lines at the start of the stream (which allows the command
diff --git a/t/perf/p9300-fast-import-export.sh b/t/perf/p9300-fast-import-export.sh
new file mode 100755
index 0000000000..586161e9ad
--- /dev/null
+++ b/t/perf/p9300-fast-import-export.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+test_description='test fast-import and fast-export performance'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# Use --no-data here to produce a vastly smaller export file.
+# This is much cheaper to work with but should still exercise
+# fast-import pretty well (we'll still process all commits and
+# trees, which account for 60% or more of objects in most repos).
+#
+# Use --reencode to avoid the default of aborting on non-utf8 commits,
+# which lets this test run against a wider variety of sample repos.
+test_perf 'export (no-blobs)' '
+ git fast-export --reencode=yes --no-data HEAD >export
+'
+
+test_perf 'import (no-blobs)' '
+ git fast-import --force <export
+'
+
+test_done