diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2015-03-17 10:06:50 -0400 |
---|---|---|
committer | Edward Thomson <ethomson@edwardthomson.com> | 2015-03-17 10:06:50 -0400 |
commit | 7800048afbb002d0003b54fcc09c98d0d3249949 (patch) | |
tree | c68b437313065b3a46c987bd2983f6a511f28194 | |
parent | 828e595969efee951d67f23cfd9f4b8461ce71fb (diff) | |
parent | e68b31a1a9f338908a4c72d48734b30c303f0901 (diff) | |
download | libgit2-7800048afbb002d0003b54fcc09c98d0d3249949.tar.gz |
Merge pull request #2972 from libgit2/cmn/pack-objects-walk
[WIP] Smarter pack-building
-rw-r--r-- | include/git2/pack.h | 13 | ||||
-rw-r--r-- | src/describe.c | 2 | ||||
-rw-r--r-- | src/indexer.c | 2 | ||||
-rw-r--r-- | src/pack-objects.c | 245 | ||||
-rw-r--r-- | src/pack-objects.h | 10 | ||||
-rw-r--r-- | src/pack.c | 3 | ||||
-rw-r--r-- | src/pack.h | 4 | ||||
-rw-r--r-- | src/revwalk.c | 2 | ||||
-rw-r--r-- | src/revwalk.h | 2 | ||||
-rw-r--r-- | src/transports/local.c | 42 |
10 files changed, 288 insertions, 37 deletions
diff --git a/include/git2/pack.h b/include/git2/pack.h index e7f060d12..4cf426273 100644 --- a/include/git2/pack.h +++ b/include/git2/pack.h @@ -115,6 +115,19 @@ GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid * GIT_EXTERN(int) git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *id); /** + * Insert objects as given by the walk + * + * Those commits and all objects they reference will be inserted into + * the packbuilder. + * + * @param pb the packbuilder + * @param walk the revwalk to use to fill the packbuilder + * + * @return 0 or an error code + */ +GIT_EXTERN(int) git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk); + +/** * Write the contents of the packfile to an in-memory buffer * * The contents of the buffer will become a valid packfile, even though there diff --git a/src/describe.c b/src/describe.c index d4c0dea78..68bac2d2f 100644 --- a/src/describe.c +++ b/src/describe.c @@ -19,6 +19,8 @@ #include "vector.h" #include "repository.h" +GIT__USE_OIDMAP; + /* Ported from https://github.com/git/git/blob/89dde7882f71f846ccd0359756d27bebc31108de/builtin/describe.c */ struct commit_name { diff --git a/src/indexer.c b/src/indexer.c index 4fb33b6f1..665d50fcd 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -18,6 +18,8 @@ #include "oidmap.h" #include "zstream.h" +GIT__USE_OIDMAP; + extern git_mutex git__mwindow_mutex; #define UINT31_MAX (0x7FFFFFFF) diff --git a/src/pack-objects.c b/src/pack-objects.c index f644520ac..0f43b98e0 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -15,6 +15,8 @@ #include "thread-utils.h" #include "tree.h" #include "util.h" +#include "revwalk.h" +#include "commit_list.h" #include "git2/pack.h" #include "git2/commit.h" @@ -39,6 +41,8 @@ struct pack_write_context { git_transfer_progress *stats; }; +GIT__USE_OIDMAP; + #ifdef GIT_THREADS #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \ @@ -124,10 +128,16 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo) GITERR_CHECK_ALLOC(pb); pb->object_ix = git_oidmap_alloc(); - if (!pb->object_ix) goto on_error; + pb->walk_objects = git_oidmap_alloc(); + if (!pb->walk_objects) + goto on_error; + + if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0) + goto on_error; + pb->repo = repo; pb->nr_threads = 1; /* do not spawn any thread by default */ @@ -1345,6 +1355,7 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb) return &pb->pack_oid; } + static int cb_tree_walk( const char *root, const git_tree_entry *entry, void *payload) { @@ -1403,6 +1414,235 @@ uint32_t git_packbuilder_written(git_packbuilder *pb) return pb->nr_written; } +int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id) +{ + git_walk_object *obj; + + obj = git_pool_mallocz(&pb->object_pool, 1); + if (!obj) { + giterr_set_oom(); + return -1; + } + + git_oid_cpy(&obj->id, id); + + *out = obj; + return 0; +} + +static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id) +{ + int error; + khiter_t pos; + git_walk_object *obj; + + pos = git_oidmap_lookup_index(pb->walk_objects, id); + if (git_oidmap_valid_index(pb->walk_objects, pos)) { + obj = git_oidmap_value_at(pb->walk_objects, pos); + } else { + if ((error = lookup_walk_object(&obj, pb, id)) < 0) + return error; + + git_oidmap_insert(pb->walk_objects, &obj->id, obj, error); + } + + *out = obj; + return 0; +} + +static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id) +{ + int error; + git_walk_object *obj; + + if ((error = retrieve_object(&obj, pb, id)) < 0) + return error; + + obj->uninteresting = 1; + + return 0; +} + +static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id) +{ + git_walk_object *obj; + git_tree *tree; + int error; + size_t i; + + if ((error = retrieve_object(&obj, pb, id)) < 0) + return error; + + if (obj->uninteresting) + return 0; + + obj->uninteresting = 1; + + if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0) + return error; + + for (i = 0; i < git_tree_entrycount(tree); i++) { + const git_tree_entry *entry = git_tree_entry_byindex(tree, i); + const git_oid *entry_id = git_tree_entry_id(entry); + switch (git_tree_entry_type(entry)) { + case GIT_OBJ_TREE: + if ((error = mark_tree_uninteresting(pb, entry_id)) < 0) + goto cleanup; + break; + case GIT_OBJ_BLOB: + if ((error = mark_blob_uninteresting(pb, entry_id)) < 0) + goto cleanup; + break; + default: + /* it's a submodule or something unknown, we don't want it */ + ; + } + } + +cleanup: + git_tree_free(tree); + return error; +} + +/* + * Mark the edges of the graph uninteresting. Since we start from a + * git_revwalk, the commits are already uninteresting, but we need to + * mark the trees and blobs. + */ +static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits) +{ + int error; + git_commit_list *list; + git_commit *commit; + + for (list = commits; list; list = list->next) { + if (!list->item->uninteresting) + continue; + + if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0) + return error; + + error = mark_tree_uninteresting(pb, git_commit_tree_id(commit)); + git_commit_free(commit); + + if (error < 0) + return error; + } + + return 0; +} + +int insert_tree(git_packbuilder *pb, git_tree *tree) +{ + size_t i; + int error; + git_tree *subtree; + git_walk_object *obj; + const char *name; + + if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0) + return error; + + if (obj->seen) + return 0; + + obj->seen = 1; + + if ((error = git_packbuilder_insert(pb, &obj->id, NULL))) + return error; + + for (i = 0; i < git_tree_entrycount(tree); i++) { + const git_tree_entry *entry = git_tree_entry_byindex(tree, i); + const git_oid *entry_id = git_tree_entry_id(entry); + switch (git_tree_entry_type(entry)) { + case GIT_OBJ_TREE: + if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0) + return error; + + error = insert_tree(pb, subtree); + git_tree_free(subtree); + + if (error < 0) + return error; + + break; + case GIT_OBJ_BLOB: + name = git_tree_entry_name(entry); + if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0) + return error; + break; + default: + /* it's a submodule or something unknown, we don't want it */ + ; + } + } + + + return error; +} + +int insert_commit(git_packbuilder *pb, git_walk_object *obj) +{ + int error; + git_commit *commit = NULL; + git_tree *tree = NULL; + + obj->seen = 1; + + if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0) + return error; + + if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0) + return error; + + if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0) + goto cleanup; + + if ((error = insert_tree(pb, tree)) < 0) + goto cleanup; + +cleanup: + git_commit_free(commit); + git_tree_free(tree); + return error; +} + +int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk) +{ + int error; + git_oid id; + git_walk_object *obj; + + assert(pb && walk); + + if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0) + return error; + + /* + * TODO: git marks the parents of the edges + * uninteresting. This may provide a speed advantage, but does + * seem to assume the remote does not have a single-commit + * history on the other end. + */ + + /* walk down each tree up to the blobs and insert them, stopping when uninteresting */ + while ((error = git_revwalk_next(&id, walk)) == 0) { + if ((error = retrieve_object(&obj, pb, &id)) < 0) + return error; + + if (obj->seen || obj->uninteresting) + continue; + + if ((error = insert_commit(pb, obj)) < 0) + return error; + } + + if (error == GIT_ITEROVER) + error = 0; + + return 0; +} + int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload) { if (!pb) @@ -1436,6 +1676,9 @@ void git_packbuilder_free(git_packbuilder *pb) if (pb->object_list) git__free(pb->object_list); + git_oidmap_free(pb->walk_objects); + git_pool_clear(&pb->object_pool); + git_hash_ctx_cleanup(&pb->ctx); git_zstream_free(&pb->zstream); diff --git a/src/pack-objects.h b/src/pack-objects.h index 4647df75a..9af5c0b09 100644 --- a/src/pack-objects.h +++ b/src/pack-objects.h @@ -15,6 +15,7 @@ #include "oidmap.h" #include "netops.h" #include "zstream.h" +#include "pool.h" #include "git2/oid.h" #include "git2/pack.h" @@ -50,6 +51,12 @@ typedef struct git_pobject { filled:1; } git_pobject; +typedef struct { + git_oid id; + unsigned int uninteresting:1, + seen:1; +} git_walk_object; + struct git_packbuilder { git_repository *repo; /* associated repository */ git_odb *odb; /* associated object database */ @@ -66,6 +73,9 @@ struct git_packbuilder { git_oidmap *object_ix; + git_oidmap *walk_objects; + git_pool object_pool; + git_oid pack_oid; /* hash of written pack */ /* synchronization objects */ diff --git a/src/pack.c b/src/pack.c index 3c646b778..5d0a27b91 100644 --- a/src/pack.c +++ b/src/pack.c @@ -16,6 +16,9 @@ #include <zlib.h> +GIT__USE_OFFMAP; +GIT__USE_OIDMAP; + static int packfile_open(struct git_pack_file *p); static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n); int packfile_unpack_compressed( diff --git a/src/pack.h b/src/pack.h index 3cebd10ac..b3d5b2993 100644 --- a/src/pack.h +++ b/src/pack.h @@ -71,9 +71,7 @@ struct pack_chain_elem { typedef git_array_t(struct pack_chain_elem) git_dependency_chain; #include "offmap.h" - -GIT__USE_OFFMAP -GIT__USE_OIDMAP +#include "oidmap.h" #define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024 #define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */ diff --git a/src/revwalk.c b/src/revwalk.c index 2ba000c6b..a6d823ec8 100644 --- a/src/revwalk.c +++ b/src/revwalk.c @@ -14,6 +14,8 @@ #include "git2/revparse.h" #include "merge.h" +GIT__USE_OIDMAP; + git_commit_list_node *git_revwalk__commit_lookup( git_revwalk *walk, const git_oid *oid) { diff --git a/src/revwalk.h b/src/revwalk.h index 1148a2ac9..6b363d40f 100644 --- a/src/revwalk.h +++ b/src/revwalk.h @@ -14,7 +14,7 @@ #include "pool.h" #include "vector.h" -GIT__USE_OIDMAP +#include "oidmap.h" struct git_revwalk { git_repository *repo; diff --git a/src/transports/local.c b/src/transports/local.c index 588b7ce17..64ddbd970 100644 --- a/src/transports/local.c +++ b/src/transports/local.c @@ -513,7 +513,6 @@ static int local_download_pack( git_remote_head *rhead; unsigned int i; int error = -1; - git_oid oid; git_packbuilder *pack = NULL; git_odb_writepack *writepack = NULL; git_odb *odb = NULL; @@ -539,15 +538,22 @@ static int local_download_pack( if (git_object_type(obj) == GIT_OBJ_COMMIT) { /* Revwalker includes only wanted commits */ error = git_revwalk_push(walk, &rhead->oid); - if (!git_oid_iszero(&rhead->loid)) + if (!error && !git_oid_iszero(&rhead->loid)) { error = git_revwalk_hide(walk, &rhead->loid); + if (error == GIT_ENOTFOUND) + error = 0; + } } else { - /* Tag or some other wanted object. Add it on its own */ error = git_packbuilder_insert(pack, &rhead->oid, rhead->name); } git_object_free(obj); + if (error < 0) + goto cleanup; } + if ((error = git_packbuilder_insert_walk(pack, walk))) + goto cleanup; + if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0) goto cleanup; @@ -559,35 +565,6 @@ static int local_download_pack( if ((error = git_repository_odb__weakptr(&odb, repo)) < 0) goto cleanup; - while ((error = git_revwalk_next(&oid, walk)) == 0) { - git_commit *commit; - - /* Skip commits we already have */ - if (git_odb_exists(odb, &oid)) continue; - - if (!git_object_lookup((git_object**)&commit, t->repo, &oid, GIT_OBJ_COMMIT)) { - const git_oid *tree_oid = git_commit_tree_id(commit); - - /* Add the commit and its tree */ - if ((error = git_packbuilder_insert(pack, &oid, NULL)) < 0 || - (error = git_packbuilder_insert_tree(pack, tree_oid)) < 0) { - git_commit_free(commit); - goto cleanup; - } - - git_commit_free(commit); - - git_buf_clear(&progress_info); - if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0) - goto cleanup; - - if (t->progress_cb && - (error = t->progress_cb(git_buf_cstr(&progress_info), git_buf_len(&progress_info), t->message_cb_payload)) < 0) - goto cleanup; - - } - } - /* One last one with the newline */ git_buf_clear(&progress_info); git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack)); @@ -615,6 +592,7 @@ static int local_download_pack( if ((error = git_packbuilder_foreach(pack, foreach_cb, &data)) != 0) goto cleanup; } + error = writepack->commit(writepack, stats); cleanup: |