summaryrefslogtreecommitdiff
path: root/object-store.h
diff options
context:
space:
mode:
authorColin Stolley <cstolley@runbox.com>2019-11-27 16:24:53 -0600
committerJunio C Hamano <gitster@pobox.com>2019-12-03 07:59:45 -0800
commitec48540fe8c387cf7424d5387ddbd53e89bb9d51 (patch)
tree1b2111770e24c35a2591d4458c6c6e3fba124e8b /object-store.h
parentd9f6f3b6195a0ca35642561e530798ad1469bd41 (diff)
downloadgit-ec48540fe8c387cf7424d5387ddbd53e89bb9d51.tar.gz
packfile.c: speed up loading lots of packfiles
When loading packfiles on start-up, we traverse the internal packfile list once per file to avoid reloading packfiles that have already been loaded. This check runs in quadratic time, so for poorly maintained repos with a large number of packfiles, it can be pretty slow. Add a hashmap containing the packfile names as we load them so that the average runtime cost of checking for already-loaded packs becomes constant. Add a perf test to p5303 to show speed-up. The existing p5303 test runtimes are dominated by other factors and do not show an appreciable speed-up. The new test in p5303 clearly exposes a speed-up in bad cases. In this test we create 10,000 packfiles and measure the start-up time of git rev-parse, which does little else besides load in the packs. Here are the numbers for the new p5303 test: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.12: load 10,000 packs 1.03(0.92+0.10) 0.12(0.02+0.09) -88.3% Signed-off-by: Colin Stolley <cstolley@runbox.com> Helped-by: Jeff King <peff@peff.net> [jc: squashed the change to call hashmap in install_packed_git() by peff] Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'object-store.h')
-rw-r--r--object-store.h21
1 files changed, 21 insertions, 0 deletions
diff --git a/object-store.h b/object-store.h
index 7f7b3cdd80..55ee639350 100644
--- a/object-store.h
+++ b/object-store.h
@@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
void odb_clear_loose_cache(struct object_directory *odb);
struct packed_git {
+ struct hashmap_entry packmap_ent;
struct packed_git *next;
struct list_head mru;
struct pack_window *windows;
@@ -88,6 +89,20 @@ struct packed_git {
struct multi_pack_index;
+static inline int pack_map_entry_cmp(const void *unused_cmp_data,
+ const struct hashmap_entry *entry,
+ const struct hashmap_entry *entry2,
+ const void *keydata)
+{
+ const char *key = keydata;
+ const struct packed_git *pg1, *pg2;
+
+ pg1 = container_of(entry, const struct packed_git, packmap_ent);
+ pg2 = container_of(entry2, const struct packed_git, packmap_ent);
+
+ return strcmp(pg1->pack_name, key ? key : pg2->pack_name);
+}
+
struct raw_object_store {
/*
* Set of all object directories; the main directory is first (and
@@ -132,6 +147,12 @@ struct raw_object_store {
struct list_head packed_git_mru;
/*
+ * A map of packfiles to packed_git structs for tracking which
+ * packs have been loaded already.
+ */
+ struct hashmap pack_map;
+
+ /*
* A fast, rough count of the number of objects in the repository.
* These two fields are not meant for direct access. Use
* approximate_object_count() instead.