diff options
-rw-r--r-- | Documentation/config/pack.txt | 9 | ||||
-rw-r--r-- | Documentation/git-pack-objects.txt | 11 | ||||
-rw-r--r-- | bisect.c | 2 | ||||
-rw-r--r-- | builtin/pack-objects.c | 10 | ||||
-rw-r--r-- | builtin/rev-list.c | 2 | ||||
-rw-r--r-- | http-push.c | 2 | ||||
-rw-r--r-- | list-objects.c | 70 | ||||
-rw-r--r-- | list-objects.h | 4 | ||||
-rw-r--r-- | revision.c | 143 | ||||
-rw-r--r-- | revision.h | 2 | ||||
-rw-r--r-- | t/README | 4 | ||||
-rwxr-xr-x | t/t5322-pack-objects-sparse.sh | 136 |
12 files changed, 378 insertions, 17 deletions
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index edac75c83f..425c73aa52 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -105,6 +105,15 @@ pack.useBitmaps:: true. You should not generally need to turn this off unless you are debugging pack bitmaps. +pack.useSparse:: + When true, git will default to using the '--sparse' option in + 'git pack-objects' when the '--revs' option is present. This + algorithm only walks trees that appear in paths that introduce new + objects. This can have significant performance benefits when + computing a pack to send a small change. However, it is possible + that extra objects are added to the pack-file if the included + commits contain certain types of direct renames. + pack.writeBitmaps (deprecated):: This is a deprecated synonym for `repack.writeBitmaps`. diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 40c825c381..e45f3e680d 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -14,7 +14,7 @@ SYNOPSIS [--local] [--incremental] [--window=<n>] [--depth=<n>] [--revs [--unpacked | --all]] [--keep-pack=<pack-name>] [--stdout [--filter=<filter-spec>] | base-name] - [--shallow] [--keep-true-parents] < object-list + [--shallow] [--keep-true-parents] [--sparse] < object-list DESCRIPTION @@ -196,6 +196,15 @@ depth is 4095. Add --no-reuse-object if you want to force a uniform compression level on all data no matter the source. +--sparse:: + Use the "sparse" algorithm to determine which objects to include in + the pack, when combined with the "--revs" option. This algorithm + only walks trees that appear in paths that introduce new objects. + This can have significant performance benefits when computing + a pack to send a small change. However, it is possible that extra + objects are added to the pack-file if the included commits contain + certain types of direct renames. + --thin:: Create a "thin" pack by omitting the common objects between a sender and a receiver in order to reduce network transfer. This @@ -658,7 +658,7 @@ static void bisect_common(struct rev_info *revs) if (prepare_revision_walk(revs)) die("revision walk setup failed"); if (revs->tree_objects) - mark_edges_uninteresting(revs, NULL); + mark_edges_uninteresting(revs, NULL, 0); } static void exit_if_skipped_commits(struct commit_list *tried, diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ffef8dcf2f..bd67491c16 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -84,6 +84,7 @@ static unsigned long pack_size_limit; static int depth = 50; static int delta_search_threads; static int pack_to_stdout; +static int sparse; static int thin; static int num_preferred_base; static struct progress *progress_state; @@ -2703,6 +2704,10 @@ static int git_pack_config(const char *k, const char *v, void *cb) use_bitmap_index_default = git_config_bool(k, v); return 0; } + if (!strcmp(k, "pack.usesparse")) { + sparse = git_config_bool(k, v); + return 0; + } if (!strcmp(k, "pack.threads")) { delta_search_threads = git_config_int(k, v); if (delta_search_threads < 0) @@ -3130,7 +3135,7 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); - mark_edges_uninteresting(&revs, show_edge); + mark_edges_uninteresting(&revs, show_edge, sparse); if (!fn_show_object) fn_show_object = show_object; @@ -3287,6 +3292,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"), N_("unpack unreachable objects newer than <time>"), PARSE_OPT_OPTARG, option_parse_unpack_unreachable }, + OPT_BOOL(0, "sparse", &sparse, + N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, N_("create thin packs")), OPT_BOOL(0, "shallow", &shallow, @@ -3319,6 +3326,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_replace_refs = 0; + sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0); reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 14ef659c12..5b5b6dbb1c 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -546,7 +546,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); if (revs.tree_objects) - mark_edges_uninteresting(&revs, show_edge); + mark_edges_uninteresting(&revs, show_edge, 0); if (bisect_list) { int reaches, all; diff --git a/http-push.c b/http-push.c index bb802d80ee..77e2e22852 100644 --- a/http-push.c +++ b/http-push.c @@ -1933,7 +1933,7 @@ int cmd_main(int argc, const char **argv) pushing = 0; if (prepare_revision_walk(&revs)) die("revision walk setup failed"); - mark_edges_uninteresting(&revs, NULL); + mark_edges_uninteresting(&revs, NULL, 0); objects_to_send = get_delta(&revs, ref_lock); finish_all_active_slots(); diff --git a/list-objects.c b/list-objects.c index a2296a8e7b..dc77361e11 100644 --- a/list-objects.c +++ b/list-objects.c @@ -226,25 +226,73 @@ static void mark_edge_parents_uninteresting(struct commit *commit, } } -void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge) +static void add_edge_parents(struct commit *commit, + struct rev_info *revs, + show_edge_fn show_edge, + struct oidset *set) +{ + struct commit_list *parents; + + for (parents = commit->parents; parents; parents = parents->next) { + struct commit *parent = parents->item; + struct tree *tree = get_commit_tree(parent); + + if (!tree) + continue; + + oidset_insert(set, &tree->object.oid); + + if (!(parent->object.flags & UNINTERESTING)) + continue; + tree->object.flags |= UNINTERESTING; + + if (revs->edge_hint && !(parent->object.flags & SHOWN)) { + parent->object.flags |= SHOWN; + show_edge(parent); + } + } +} + +void mark_edges_uninteresting(struct rev_info *revs, + show_edge_fn show_edge, + int sparse) { struct commit_list *list; int i; - for (list = revs->commits; list; list = list->next) { - struct commit *commit = list->item; + if (sparse) { + struct oidset set; + oidset_init(&set, 16); - if (commit->object.flags & UNINTERESTING) { - mark_tree_uninteresting(revs->repo, - get_commit_tree(commit)); - if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) { - commit->object.flags |= SHOWN; - show_edge(commit); + for (list = revs->commits; list; list = list->next) { + struct commit *commit = list->item; + struct tree *tree = get_commit_tree(commit); + + if (commit->object.flags & UNINTERESTING) + tree->object.flags |= UNINTERESTING; + + oidset_insert(&set, &tree->object.oid); + add_edge_parents(commit, revs, show_edge, &set); + } + + mark_trees_uninteresting_sparse(revs->repo, &set); + oidset_clear(&set); + } else { + for (list = revs->commits; list; list = list->next) { + struct commit *commit = list->item; + if (commit->object.flags & UNINTERESTING) { + mark_tree_uninteresting(revs->repo, + get_commit_tree(commit)); + if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) { + commit->object.flags |= SHOWN; + show_edge(commit); + } + continue; } - continue; + mark_edge_parents_uninteresting(commit, revs, show_edge); } - mark_edge_parents_uninteresting(commit, revs, show_edge); } + if (revs->edge_hint_aggressive) { for (i = 0; i < revs->cmdline.nr; i++) { struct object *obj = revs->cmdline.rev[i].item; diff --git a/list-objects.h b/list-objects.h index ad40762926..a952680e46 100644 --- a/list-objects.h +++ b/list-objects.h @@ -10,7 +10,9 @@ typedef void (*show_object_fn)(struct object *, const char *, void *); void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *); typedef void (*show_edge_fn)(struct commit *); -void mark_edges_uninteresting(struct rev_info *, show_edge_fn); +void mark_edges_uninteresting(struct rev_info *revs, + show_edge_fn show_edge, + int sparse); struct oidset; struct list_objects_filter_options; diff --git a/revision.c b/revision.c index 5c7d3c75d7..162d511d46 100644 --- a/revision.c +++ b/revision.c @@ -27,6 +27,7 @@ #include "commit-reach.h" #include "commit-graph.h" #include "prio-queue.h" +#include "hashmap.h" volatile show_early_output_fn_t show_early_output; @@ -99,6 +100,148 @@ void mark_tree_uninteresting(struct repository *r, struct tree *tree) mark_tree_contents_uninteresting(r, tree); } +struct path_and_oids_entry { + struct hashmap_entry ent; + char *path; + struct oidset trees; +}; + +static int path_and_oids_cmp(const void *hashmap_cmp_fn_data, + const struct path_and_oids_entry *e1, + const struct path_and_oids_entry *e2, + const void *keydata) +{ + return strcmp(e1->path, e2->path); +} + +static void paths_and_oids_init(struct hashmap *map) +{ + hashmap_init(map, (hashmap_cmp_fn) path_and_oids_cmp, NULL, 0); +} + +static void paths_and_oids_clear(struct hashmap *map) +{ + struct hashmap_iter iter; + struct path_and_oids_entry *entry; + hashmap_iter_init(map, &iter); + + while ((entry = (struct path_and_oids_entry *)hashmap_iter_next(&iter))) { + oidset_clear(&entry->trees); + free(entry->path); + } + + hashmap_free(map, 1); +} + +static void paths_and_oids_insert(struct hashmap *map, + const char *path, + const struct object_id *oid) +{ + int hash = strhash(path); + struct path_and_oids_entry key; + struct path_and_oids_entry *entry; + + hashmap_entry_init(&key, hash); + + /* use a shallow copy for the lookup */ + key.path = (char *)path; + oidset_init(&key.trees, 0); + + if (!(entry = (struct path_and_oids_entry *)hashmap_get(map, &key, NULL))) { + entry = xcalloc(1, sizeof(struct path_and_oids_entry)); + hashmap_entry_init(entry, hash); + entry->path = xstrdup(key.path); + oidset_init(&entry->trees, 16); + hashmap_put(map, entry); + } + + oidset_insert(&entry->trees, oid); +} + +static void add_children_by_path(struct repository *r, + struct tree *tree, + struct hashmap *map) +{ + struct tree_desc desc; + struct name_entry entry; + + if (!tree) + return; + + if (parse_tree_gently(tree, 1) < 0) + return; + + init_tree_desc(&desc, tree->buffer, tree->size); + while (tree_entry(&desc, &entry)) { + switch (object_type(entry.mode)) { + case OBJ_TREE: + paths_and_oids_insert(map, entry.path, &entry.oid); + + if (tree->object.flags & UNINTERESTING) { + struct tree *child = lookup_tree(r, &entry.oid); + if (child) + child->object.flags |= UNINTERESTING; + } + break; + case OBJ_BLOB: + if (tree->object.flags & UNINTERESTING) { + struct blob *child = lookup_blob(r, &entry.oid); + if (child) + child->object.flags |= UNINTERESTING; + } + break; + default: + /* Subproject commit - not in this repository */ + break; + } + } + + free_tree_buffer(tree); +} + +void mark_trees_uninteresting_sparse(struct repository *r, + struct oidset *trees) +{ + unsigned has_interesting = 0, has_uninteresting = 0; + struct hashmap map; + struct hashmap_iter map_iter; + struct path_and_oids_entry *entry; + struct object_id *oid; + struct oidset_iter iter; + + oidset_iter_init(trees, &iter); + while ((!has_interesting || !has_uninteresting) && + (oid = oidset_iter_next(&iter))) { + struct tree *tree = lookup_tree(r, oid); + + if (!tree) + continue; + + if (tree->object.flags & UNINTERESTING) + has_uninteresting = 1; + else + has_interesting = 1; + } + + /* Do not walk unless we have both types of trees. */ + if (!has_uninteresting || !has_interesting) + return; + + paths_and_oids_init(&map); + + oidset_iter_init(trees, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct tree *tree = lookup_tree(r, oid); + add_children_by_path(r, tree, &map); + } + + hashmap_iter_init(&map, &map_iter); + while ((entry = hashmap_iter_next(&map_iter))) + mark_trees_uninteresting_sparse(r, &entry->trees); + + paths_and_oids_clear(&map); +} + struct commit_stack { struct commit **items; size_t nr, alloc; diff --git a/revision.h b/revision.h index 52e5a88ff5..d32d62abc6 100644 --- a/revision.h +++ b/revision.h @@ -67,6 +67,7 @@ struct rev_cmdline_info { #define REVISION_WALK_NO_WALK_SORTED 1 #define REVISION_WALK_NO_WALK_UNSORTED 2 +struct oidset; struct topo_walk_info; struct rev_info { @@ -327,6 +328,7 @@ void put_revision_mark(const struct rev_info *revs, void mark_parents_uninteresting(struct commit *commit); void mark_tree_uninteresting(struct repository *r, struct tree *tree); +void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees); void show_object_with_name(FILE *, struct object *, const char *); @@ -358,6 +358,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path for the index version specified. Can be set to any valid version (currently 2, 3, or 4). +GIT_TEST_PACK_SPARSE=<boolean> if enabled will default the pack-objects +builtin to use the sparse object walk. This can still be overridden by +the --no-sparse command-line argument. + GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path by overriding the minimum number of cache entries required per thread. diff --git a/t/t5322-pack-objects-sparse.sh b/t/t5322-pack-objects-sparse.sh new file mode 100755 index 0000000000..7124b5581a --- /dev/null +++ b/t/t5322-pack-objects-sparse.sh @@ -0,0 +1,136 @@ +#!/bin/sh + +test_description='pack-objects object selection using sparse algorithm' +. ./test-lib.sh + +test_expect_success 'setup repo' ' + test_commit initial && + for i in $(test_seq 1 3) + do + mkdir f$i && + for j in $(test_seq 1 3) + do + mkdir f$i/f$j && + echo $j >f$i/f$j/data.txt + done + done && + git add . && + git commit -m "Initialized trees" && + for i in $(test_seq 1 3) + do + git checkout -b topic$i master && + echo change-$i >f$i/f$i/data.txt && + git commit -a -m "Changed f$i/f$i/data.txt" + done && + cat >packinput.txt <<-EOF && + topic1 + ^topic2 + ^topic3 + EOF + git rev-parse \ + topic1 \ + topic1^{tree} \ + topic1:f1 \ + topic1:f1/f1 \ + topic1:f1/f1/data.txt | sort >expect_objects.txt +' + +test_expect_success 'non-sparse pack-objects' ' + git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack && + git index-pack -o nonsparse.idx nonsparse.pack && + git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt && + test_cmp expect_objects.txt nonsparse_objects.txt +' + +test_expect_success 'sparse pack-objects' ' + git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack && + git index-pack -o sparse.idx sparse.pack && + git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt && + test_cmp expect_objects.txt sparse_objects.txt +' + +test_expect_success 'duplicate a folder from f3 and commit to topic1' ' + git checkout topic1 && + echo change-3 >f3/f3/data.txt && + git commit -a -m "Changed f3/f3/data.txt" && + git rev-parse \ + topic1~1 \ + topic1~1^{tree} \ + topic1^{tree} \ + topic1 \ + topic1:f1 \ + topic1:f1/f1 \ + topic1:f1/f1/data.txt | sort >required_objects.txt +' + +test_expect_success 'non-sparse pack-objects' ' + git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack && + git index-pack -o nonsparse.idx nonsparse.pack && + git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt && + comm -1 -2 required_objects.txt nonsparse_objects.txt >nonsparse_required_objects.txt && + test_cmp required_objects.txt nonsparse_required_objects.txt +' + +test_expect_success 'sparse pack-objects' ' + git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack && + git index-pack -o sparse.idx sparse.pack && + git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt && + comm -1 -2 required_objects.txt sparse_objects.txt >sparse_required_objects.txt && + test_cmp required_objects.txt sparse_required_objects.txt +' + +# Demonstrate that the algorithms differ when we copy a tree wholesale +# from one folder to another. + +test_expect_success 'duplicate a folder from f1 into f3' ' + mkdir f3/f4 && + cp -r f1/f1/* f3/f4 && + git add f3/f4 && + git commit -m "Copied f1/f1 to f3/f4" && + cat >packinput.txt <<-EOF && + topic1 + ^topic1~1 + EOF + git rev-parse \ + topic1 \ + topic1^{tree} \ + topic1:f3 | sort >required_objects.txt +' + +test_expect_success 'non-sparse pack-objects' ' + git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack && + git index-pack -o nonsparse.idx nonsparse.pack && + git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt && + comm -1 -2 required_objects.txt nonsparse_objects.txt >nonsparse_required_objects.txt && + test_cmp required_objects.txt nonsparse_required_objects.txt +' + +test_expect_success 'sparse pack-objects' ' + git rev-parse \ + topic1 \ + topic1^{tree} \ + topic1:f3 \ + topic1:f3/f4 \ + topic1:f3/f4/data.txt | sort >expect_sparse_objects.txt && + git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack && + git index-pack -o sparse.idx sparse.pack && + git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt && + test_cmp expect_sparse_objects.txt sparse_objects.txt +' + +test_expect_success 'pack.useSparse enables algorithm' ' + git config pack.useSparse true && + git pack-objects --stdout --revs <packinput.txt >sparse.pack && + git index-pack -o sparse.idx sparse.pack && + git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt && + test_cmp expect_sparse_objects.txt sparse_objects.txt +' + +test_expect_success 'pack.useSparse overridden' ' + git pack-objects --stdout --revs --no-sparse <packinput.txt >sparse.pack && + git index-pack -o sparse.idx sparse.pack && + git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt && + test_cmp required_objects.txt sparse_objects.txt +' + +test_done |