summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2015-03-17 10:06:50 -0400
committerEdward Thomson <ethomson@edwardthomson.com>2015-03-17 10:06:50 -0400
commit7800048afbb002d0003b54fcc09c98d0d3249949 (patch)
treec68b437313065b3a46c987bd2983f6a511f28194
parent828e595969efee951d67f23cfd9f4b8461ce71fb (diff)
parente68b31a1a9f338908a4c72d48734b30c303f0901 (diff)
downloadlibgit2-7800048afbb002d0003b54fcc09c98d0d3249949.tar.gz
Merge pull request #2972 from libgit2/cmn/pack-objects-walk
[WIP] Smarter pack-building
-rw-r--r--include/git2/pack.h13
-rw-r--r--src/describe.c2
-rw-r--r--src/indexer.c2
-rw-r--r--src/pack-objects.c245
-rw-r--r--src/pack-objects.h10
-rw-r--r--src/pack.c3
-rw-r--r--src/pack.h4
-rw-r--r--src/revwalk.c2
-rw-r--r--src/revwalk.h2
-rw-r--r--src/transports/local.c42
10 files changed, 288 insertions, 37 deletions
diff --git a/include/git2/pack.h b/include/git2/pack.h
index e7f060d12..4cf426273 100644
--- a/include/git2/pack.h
+++ b/include/git2/pack.h
@@ -115,6 +115,19 @@ GIT_EXTERN(int) git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *
GIT_EXTERN(int) git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *id);
/**
+ * Insert objects as given by the walk
+ *
+ * Those commits and all objects they reference will be inserted into
+ * the packbuilder.
+ *
+ * @param pb the packbuilder
+ * @param walk the revwalk to use to fill the packbuilder
+ *
+ * @return 0 or an error code
+ */
+GIT_EXTERN(int) git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk);
+
+/**
* Write the contents of the packfile to an in-memory buffer
*
* The contents of the buffer will become a valid packfile, even though there
diff --git a/src/describe.c b/src/describe.c
index d4c0dea78..68bac2d2f 100644
--- a/src/describe.c
+++ b/src/describe.c
@@ -19,6 +19,8 @@
#include "vector.h"
#include "repository.h"
+GIT__USE_OIDMAP;
+
/* Ported from https://github.com/git/git/blob/89dde7882f71f846ccd0359756d27bebc31108de/builtin/describe.c */
struct commit_name {
diff --git a/src/indexer.c b/src/indexer.c
index 4fb33b6f1..665d50fcd 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -18,6 +18,8 @@
#include "oidmap.h"
#include "zstream.h"
+GIT__USE_OIDMAP;
+
extern git_mutex git__mwindow_mutex;
#define UINT31_MAX (0x7FFFFFFF)
diff --git a/src/pack-objects.c b/src/pack-objects.c
index f644520ac..0f43b98e0 100644
--- a/src/pack-objects.c
+++ b/src/pack-objects.c
@@ -15,6 +15,8 @@
#include "thread-utils.h"
#include "tree.h"
#include "util.h"
+#include "revwalk.h"
+#include "commit_list.h"
#include "git2/pack.h"
#include "git2/commit.h"
@@ -39,6 +41,8 @@ struct pack_write_context {
git_transfer_progress *stats;
};
+GIT__USE_OIDMAP;
+
#ifdef GIT_THREADS
#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
@@ -124,10 +128,16 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
GITERR_CHECK_ALLOC(pb);
pb->object_ix = git_oidmap_alloc();
-
if (!pb->object_ix)
goto on_error;
+ pb->walk_objects = git_oidmap_alloc();
+ if (!pb->walk_objects)
+ goto on_error;
+
+ if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0)
+ goto on_error;
+
pb->repo = repo;
pb->nr_threads = 1; /* do not spawn any thread by default */
@@ -1345,6 +1355,7 @@ const git_oid *git_packbuilder_hash(git_packbuilder *pb)
return &pb->pack_oid;
}
+
static int cb_tree_walk(
const char *root, const git_tree_entry *entry, void *payload)
{
@@ -1403,6 +1414,235 @@ uint32_t git_packbuilder_written(git_packbuilder *pb)
return pb->nr_written;
}
+int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
+{
+ git_walk_object *obj;
+
+ obj = git_pool_mallocz(&pb->object_pool, 1);
+ if (!obj) {
+ giterr_set_oom();
+ return -1;
+ }
+
+ git_oid_cpy(&obj->id, id);
+
+ *out = obj;
+ return 0;
+}
+
+static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
+{
+ int error;
+ khiter_t pos;
+ git_walk_object *obj;
+
+ pos = git_oidmap_lookup_index(pb->walk_objects, id);
+ if (git_oidmap_valid_index(pb->walk_objects, pos)) {
+ obj = git_oidmap_value_at(pb->walk_objects, pos);
+ } else {
+ if ((error = lookup_walk_object(&obj, pb, id)) < 0)
+ return error;
+
+ git_oidmap_insert(pb->walk_objects, &obj->id, obj, error);
+ }
+
+ *out = obj;
+ return 0;
+}
+
+static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
+{
+ int error;
+ git_walk_object *obj;
+
+ if ((error = retrieve_object(&obj, pb, id)) < 0)
+ return error;
+
+ obj->uninteresting = 1;
+
+ return 0;
+}
+
+static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
+{
+ git_walk_object *obj;
+ git_tree *tree;
+ int error;
+ size_t i;
+
+ if ((error = retrieve_object(&obj, pb, id)) < 0)
+ return error;
+
+ if (obj->uninteresting)
+ return 0;
+
+ obj->uninteresting = 1;
+
+ if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
+ return error;
+
+ for (i = 0; i < git_tree_entrycount(tree); i++) {
+ const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
+ const git_oid *entry_id = git_tree_entry_id(entry);
+ switch (git_tree_entry_type(entry)) {
+ case GIT_OBJ_TREE:
+ if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
+ goto cleanup;
+ break;
+ case GIT_OBJ_BLOB:
+ if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
+ goto cleanup;
+ break;
+ default:
+ /* it's a submodule or something unknown, we don't want it */
+ ;
+ }
+ }
+
+cleanup:
+ git_tree_free(tree);
+ return error;
+}
+
+/*
+ * Mark the edges of the graph uninteresting. Since we start from a
+ * git_revwalk, the commits are already uninteresting, but we need to
+ * mark the trees and blobs.
+ */
+static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
+{
+ int error;
+ git_commit_list *list;
+ git_commit *commit;
+
+ for (list = commits; list; list = list->next) {
+ if (!list->item->uninteresting)
+ continue;
+
+ if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
+ return error;
+
+ error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
+ git_commit_free(commit);
+
+ if (error < 0)
+ return error;
+ }
+
+ return 0;
+}
+
+int insert_tree(git_packbuilder *pb, git_tree *tree)
+{
+ size_t i;
+ int error;
+ git_tree *subtree;
+ git_walk_object *obj;
+ const char *name;
+
+ if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
+ return error;
+
+ if (obj->seen)
+ return 0;
+
+ obj->seen = 1;
+
+ if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
+ return error;
+
+ for (i = 0; i < git_tree_entrycount(tree); i++) {
+ const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
+ const git_oid *entry_id = git_tree_entry_id(entry);
+ switch (git_tree_entry_type(entry)) {
+ case GIT_OBJ_TREE:
+ if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
+ return error;
+
+ error = insert_tree(pb, subtree);
+ git_tree_free(subtree);
+
+ if (error < 0)
+ return error;
+
+ break;
+ case GIT_OBJ_BLOB:
+ name = git_tree_entry_name(entry);
+ if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
+ return error;
+ break;
+ default:
+ /* it's a submodule or something unknown, we don't want it */
+ ;
+ }
+ }
+
+
+ return error;
+}
+
+int insert_commit(git_packbuilder *pb, git_walk_object *obj)
+{
+ int error;
+ git_commit *commit = NULL;
+ git_tree *tree = NULL;
+
+ obj->seen = 1;
+
+ if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
+ return error;
+
+ if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
+ return error;
+
+ if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
+ goto cleanup;
+
+ if ((error = insert_tree(pb, tree)) < 0)
+ goto cleanup;
+
+cleanup:
+ git_commit_free(commit);
+ git_tree_free(tree);
+ return error;
+}
+
+int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
+{
+ int error;
+ git_oid id;
+ git_walk_object *obj;
+
+ assert(pb && walk);
+
+ if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
+ return error;
+
+ /*
+ * TODO: git marks the parents of the edges
+ * uninteresting. This may provide a speed advantage, but does
+ * seem to assume the remote does not have a single-commit
+ * history on the other end.
+ */
+
+ /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
+ while ((error = git_revwalk_next(&id, walk)) == 0) {
+ if ((error = retrieve_object(&obj, pb, &id)) < 0)
+ return error;
+
+ if (obj->seen || obj->uninteresting)
+ continue;
+
+ if ((error = insert_commit(pb, obj)) < 0)
+ return error;
+ }
+
+ if (error == GIT_ITEROVER)
+ error = 0;
+
+ return 0;
+}
+
int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
{
if (!pb)
@@ -1436,6 +1676,9 @@ void git_packbuilder_free(git_packbuilder *pb)
if (pb->object_list)
git__free(pb->object_list);
+ git_oidmap_free(pb->walk_objects);
+ git_pool_clear(&pb->object_pool);
+
git_hash_ctx_cleanup(&pb->ctx);
git_zstream_free(&pb->zstream);
diff --git a/src/pack-objects.h b/src/pack-objects.h
index 4647df75a..9af5c0b09 100644
--- a/src/pack-objects.h
+++ b/src/pack-objects.h
@@ -15,6 +15,7 @@
#include "oidmap.h"
#include "netops.h"
#include "zstream.h"
+#include "pool.h"
#include "git2/oid.h"
#include "git2/pack.h"
@@ -50,6 +51,12 @@ typedef struct git_pobject {
filled:1;
} git_pobject;
+typedef struct {
+ git_oid id;
+ unsigned int uninteresting:1,
+ seen:1;
+} git_walk_object;
+
struct git_packbuilder {
git_repository *repo; /* associated repository */
git_odb *odb; /* associated object database */
@@ -66,6 +73,9 @@ struct git_packbuilder {
git_oidmap *object_ix;
+ git_oidmap *walk_objects;
+ git_pool object_pool;
+
git_oid pack_oid; /* hash of written pack */
/* synchronization objects */
diff --git a/src/pack.c b/src/pack.c
index 3c646b778..5d0a27b91 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -16,6 +16,9 @@
#include <zlib.h>
+GIT__USE_OFFMAP;
+GIT__USE_OIDMAP;
+
static int packfile_open(struct git_pack_file *p);
static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
int packfile_unpack_compressed(
diff --git a/src/pack.h b/src/pack.h
index 3cebd10ac..b3d5b2993 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -71,9 +71,7 @@ struct pack_chain_elem {
typedef git_array_t(struct pack_chain_elem) git_dependency_chain;
#include "offmap.h"
-
-GIT__USE_OFFMAP
-GIT__USE_OIDMAP
+#include "oidmap.h"
#define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024
#define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */
diff --git a/src/revwalk.c b/src/revwalk.c
index 2ba000c6b..a6d823ec8 100644
--- a/src/revwalk.c
+++ b/src/revwalk.c
@@ -14,6 +14,8 @@
#include "git2/revparse.h"
#include "merge.h"
+GIT__USE_OIDMAP;
+
git_commit_list_node *git_revwalk__commit_lookup(
git_revwalk *walk, const git_oid *oid)
{
diff --git a/src/revwalk.h b/src/revwalk.h
index 1148a2ac9..6b363d40f 100644
--- a/src/revwalk.h
+++ b/src/revwalk.h
@@ -14,7 +14,7 @@
#include "pool.h"
#include "vector.h"
-GIT__USE_OIDMAP
+#include "oidmap.h"
struct git_revwalk {
git_repository *repo;
diff --git a/src/transports/local.c b/src/transports/local.c
index 588b7ce17..64ddbd970 100644
--- a/src/transports/local.c
+++ b/src/transports/local.c
@@ -513,7 +513,6 @@ static int local_download_pack(
git_remote_head *rhead;
unsigned int i;
int error = -1;
- git_oid oid;
git_packbuilder *pack = NULL;
git_odb_writepack *writepack = NULL;
git_odb *odb = NULL;
@@ -539,15 +538,22 @@ static int local_download_pack(
if (git_object_type(obj) == GIT_OBJ_COMMIT) {
/* Revwalker includes only wanted commits */
error = git_revwalk_push(walk, &rhead->oid);
- if (!git_oid_iszero(&rhead->loid))
+ if (!error && !git_oid_iszero(&rhead->loid)) {
error = git_revwalk_hide(walk, &rhead->loid);
+ if (error == GIT_ENOTFOUND)
+ error = 0;
+ }
} else {
- /* Tag or some other wanted object. Add it on its own */
error = git_packbuilder_insert(pack, &rhead->oid, rhead->name);
}
git_object_free(obj);
+ if (error < 0)
+ goto cleanup;
}
+ if ((error = git_packbuilder_insert_walk(pack, walk)))
+ goto cleanup;
+
if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0)
goto cleanup;
@@ -559,35 +565,6 @@ static int local_download_pack(
if ((error = git_repository_odb__weakptr(&odb, repo)) < 0)
goto cleanup;
- while ((error = git_revwalk_next(&oid, walk)) == 0) {
- git_commit *commit;
-
- /* Skip commits we already have */
- if (git_odb_exists(odb, &oid)) continue;
-
- if (!git_object_lookup((git_object**)&commit, t->repo, &oid, GIT_OBJ_COMMIT)) {
- const git_oid *tree_oid = git_commit_tree_id(commit);
-
- /* Add the commit and its tree */
- if ((error = git_packbuilder_insert(pack, &oid, NULL)) < 0 ||
- (error = git_packbuilder_insert_tree(pack, tree_oid)) < 0) {
- git_commit_free(commit);
- goto cleanup;
- }
-
- git_commit_free(commit);
-
- git_buf_clear(&progress_info);
- if ((error = git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack))) < 0)
- goto cleanup;
-
- if (t->progress_cb &&
- (error = t->progress_cb(git_buf_cstr(&progress_info), git_buf_len(&progress_info), t->message_cb_payload)) < 0)
- goto cleanup;
-
- }
- }
-
/* One last one with the newline */
git_buf_clear(&progress_info);
git_buf_printf(&progress_info, counting_objects_fmt, git_packbuilder_object_count(pack));
@@ -615,6 +592,7 @@ static int local_download_pack(
if ((error = git_packbuilder_foreach(pack, foreach_cb, &data)) != 0)
goto cleanup;
}
+
error = writepack->commit(writepack, stats);
cleanup: