diff options
author | Junio C Hamano <gitster@pobox.com> | 2020-04-28 15:49:58 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-04-28 15:49:58 -0700 |
commit | 6ae3c797883b1ce8d4138ce08f4ec4901e4dbe50 (patch) | |
tree | 6596006db7d773663c8f55cc0eddcdaeca02de18 | |
parent | 342bc9e29fa462b127d9e1c02aa58349e58e56f0 (diff) | |
parent | d8410a816bc0ccaa8532cb201ee8073e0c457d39 (diff) | |
download | git-6ae3c797883b1ce8d4138ce08f4ec4901e4dbe50.tar.gz |
Merge branch 'jk/fast-import-use-hashmap'
The custom hash function used by "git fast-import" has been
replaced with the one from hashmap.c, which gave us a nice
performance boost.
* jk/fast-import-use-hashmap:
fast-import: replace custom hash with hashmap.c
-rw-r--r-- | fast-import.c | 61 | ||||
-rwxr-xr-x | t/perf/p9300-fast-import-export.sh | 23 |
2 files changed, 59 insertions, 25 deletions
diff --git a/fast-import.c b/fast-import.c index 202dda11a6..c98970274c 100644 --- a/fast-import.c +++ b/fast-import.c @@ -39,12 +39,28 @@ struct object_entry { struct pack_idx_entry idx; - struct object_entry *next; + struct hashmap_entry ent; uint32_t type : TYPE_BITS, pack_id : PACK_ID_BITS, depth : DEPTH_BITS; }; +static int object_entry_hashcmp(const void *map_data, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata) +{ + const struct object_id *oid = keydata; + const struct object_entry *e1, *e2; + + e1 = container_of(eptr, const struct object_entry, ent); + if (oid) + return oidcmp(&e1->idx.oid, oid); + + e2 = container_of(entry_or_key, const struct object_entry, ent); + return oidcmp(&e1->idx.oid, &e2->idx.oid); +} + struct object_entry_pool { struct object_entry_pool *next_pool; struct object_entry *next_free; @@ -178,7 +194,7 @@ static off_t pack_size; /* Table of objects we've written. */ static unsigned int object_entry_alloc = 5000; static struct object_entry_pool *blocks; -static struct object_entry *object_table[1 << 16]; +static struct hashmap object_table; static struct mark_set *marks; static const char *export_marks_file; static const char *import_marks_file; @@ -455,44 +471,37 @@ static struct object_entry *new_object(struct object_id *oid) static struct object_entry *find_object(struct object_id *oid) { - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; - struct object_entry *e; - for (e = object_table[h]; e; e = e->next) - if (oideq(oid, &e->idx.oid)) - return e; - return NULL; + return hashmap_get_entry_from_hash(&object_table, oidhash(oid), oid, + struct object_entry, ent); } static struct object_entry *insert_object(struct object_id *oid) { - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; - struct object_entry *e = object_table[h]; + struct object_entry *e; + unsigned int hash = oidhash(oid); - while (e) { - if (oideq(oid, &e->idx.oid)) - return e; - e = e->next; + e = hashmap_get_entry_from_hash(&object_table, hash, oid, + struct object_entry, ent); + if (!e) { + e = new_object(oid); + e->idx.offset = 0; + hashmap_entry_init(&e->ent, hash); + hashmap_add(&object_table, &e->ent); } - e = new_object(oid); - e->next = object_table[h]; - e->idx.offset = 0; - object_table[h] = e; return e; } static void invalidate_pack_id(unsigned int id) { - unsigned int h; unsigned long lu; struct tag *t; + struct hashmap_iter iter; + struct object_entry *e; - for (h = 0; h < ARRAY_SIZE(object_table); h++) { - struct object_entry *e; - - for (e = object_table[h]; e; e = e->next) - if (e->pack_id == id) - e->pack_id = MAX_PACK_ID; + hashmap_for_each_entry(&object_table, &iter, e, ent) { + if (e->pack_id == id) + e->pack_id = MAX_PACK_ID; } for (lu = 0; lu < branch_table_sz; lu++) { @@ -3511,6 +3520,8 @@ int cmd_main(int argc, const char **argv) avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*)); marks = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); + hashmap_init(&object_table, object_entry_hashcmp, NULL, 0); + /* * We don't parse most options until after we've seen the set of * "feature" lines at the start of the stream (which allows the command diff --git a/t/perf/p9300-fast-import-export.sh b/t/perf/p9300-fast-import-export.sh new file mode 100755 index 0000000000..586161e9ad --- /dev/null +++ b/t/perf/p9300-fast-import-export.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +test_description='test fast-import and fast-export performance' +. ./perf-lib.sh + +test_perf_default_repo + +# Use --no-data here to produce a vastly smaller export file. +# This is much cheaper to work with but should still exercise +# fast-import pretty well (we'll still process all commits and +# trees, which account for 60% or more of objects in most repos). +# +# Use --reencode to avoid the default of aborting on non-utf8 commits, +# which lets this test run against a wider variety of sample repos. +test_perf 'export (no-blobs)' ' + git fast-export --reencode=yes --no-data HEAD >export +' + +test_perf 'import (no-blobs)' ' + git fast-import --force <export +' + +test_done |