diff options
author | Junio C Hamano <gitster@pobox.com> | 2016-10-10 14:03:46 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-10-10 14:03:47 -0700 |
commit | e6e24c94df9df6d39f2316113c14fe07d2ab03d7 (patch) | |
tree | 1f883be2ac3f3a84ce5af3cd1e9e32c45648e21e /builtin/pack-objects.c | |
parent | b8688adb12d086b161aa7c369126bdd56843a01b (diff) | |
parent | c9af708b1a5a92cbde2a0c9a75b13530f792ac84 (diff) | |
download | git-e6e24c94df9df6d39f2316113c14fe07d2ab03d7.tar.gz |
Merge branch 'jk/pack-objects-optim-mru'
"git pack-objects" in a repository with many packfiles used to
spend a lot of time looking for/at objects in them; the accesses to
the packfiles are now optimized by checking the most-recently-used
packfile first.
* jk/pack-objects-optim-mru:
pack-objects: use mru list when iterating over packs
pack-objects: break delta cycles before delta-search phase
sha1_file: make packed_object_info public
provide an initializer for "struct object_info"
Diffstat (limited to 'builtin/pack-objects.c')
-rw-r--r-- | builtin/pack-objects.c | 92 |
1 files changed, 90 insertions, 2 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 8aeba6a6e1..1e7c2a98a5 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -23,6 +23,7 @@ #include "reachable.h" #include "sha1-array.h" #include "argv-array.h" +#include "mru.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), @@ -994,7 +995,7 @@ static int want_object_in_pack(const unsigned char *sha1, struct packed_git **found_pack, off_t *found_offset) { - struct packed_git *p; + struct mru_entry *entry; int want; if (!exclude && local && has_loose_object_nonlocal(sha1)) @@ -1011,7 +1012,8 @@ static int want_object_in_pack(const unsigned char *sha1, return want; } - for (p = packed_git; p; p = p->next) { + for (entry = packed_git_mru->head; entry; entry = entry->next) { + struct packed_git *p = entry->item; off_t offset; if (p == *found_pack) @@ -1027,6 +1029,8 @@ static int want_object_in_pack(const unsigned char *sha1, *found_pack = p; } want = want_found_object(exclude, p); + if (!exclude && want > 0) + mru_mark(packed_git_mru, entry); if (want != -1) return want; } @@ -1527,6 +1531,83 @@ static int pack_offset_sort(const void *_a, const void *_b) (a->in_pack_offset > b->in_pack_offset); } +/* + * Drop an on-disk delta we were planning to reuse. Naively, this would + * just involve blanking out the "delta" field, but we have to deal + * with some extra book-keeping: + * + * 1. Removing ourselves from the delta_sibling linked list. + * + * 2. Updating our size/type to the non-delta representation. These were + * either not recorded initially (size) or overwritten with the delta type + * (type) when check_object() decided to reuse the delta. + */ +static void drop_reused_delta(struct object_entry *entry) +{ + struct object_entry **p = &entry->delta->delta_child; + struct object_info oi = OBJECT_INFO_INIT; + + while (*p) { + if (*p == entry) + *p = (*p)->delta_sibling; + else + p = &(*p)->delta_sibling; + } + entry->delta = NULL; + + oi.sizep = &entry->size; + oi.typep = &entry->type; + if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) { + /* + * We failed to get the info from this pack for some reason; + * fall back to sha1_object_info, which may find another copy. + * And if that fails, the error will be recorded in entry->type + * and dealt with in prepare_pack(). + */ + entry->type = sha1_object_info(entry->idx.sha1, &entry->size); + } +} + +/* + * Follow the chain of deltas from this entry onward, throwing away any links + * that cause us to hit a cycle (as determined by the DFS state flags in + * the entries). + */ +static void break_delta_chains(struct object_entry *entry) +{ + /* If it's not a delta, it can't be part of a cycle. */ + if (!entry->delta) { + entry->dfs_state = DFS_DONE; + return; + } + + switch (entry->dfs_state) { + case DFS_NONE: + /* + * This is the first time we've seen the object. We mark it as + * part of the active potential cycle and recurse. + */ + entry->dfs_state = DFS_ACTIVE; + break_delta_chains(entry->delta); + entry->dfs_state = DFS_DONE; + break; + + case DFS_DONE: + /* object already examined, and not part of a cycle */ + break; + + case DFS_ACTIVE: + /* + * We found a cycle that needs broken. It would be correct to + * break any link in the chain, but it's convenient to + * break this one. + */ + drop_reused_delta(entry); + entry->dfs_state = DFS_DONE; + break; + } +} + static void get_object_details(void) { uint32_t i; @@ -1544,6 +1625,13 @@ static void get_object_details(void) entry->no_try_delta = 1; } + /* + * This must happen in a second pass, since we rely on the delta + * information for the whole list being completed. + */ + for (i = 0; i < to_pack.nr_objects; i++) + break_delta_chains(&to_pack.objects[i]); + free(sorted_by_offset); } |