summaryrefslogtreecommitdiff
path: root/src/indexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/indexer.c')
-rw-r--r--src/indexer.c152
1 files changed, 151 insertions, 1 deletions
diff --git a/src/indexer.c b/src/indexer.c
index c8fd9c25d..1a59f03f1 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -10,6 +10,9 @@
#include "git2/indexer.h"
#include "git2/object.h"
+#include "commit.h"
+#include "tree.h"
+#include "tag.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
@@ -36,12 +39,15 @@ struct git_indexer {
pack_committed :1,
have_stream :1,
have_delta :1,
- do_fsync :1;
+ do_fsync :1,
+ do_verify :1;
struct git_pack_header hdr;
struct git_pack_file *pack;
unsigned int mode;
git_off_t off;
git_off_t entry_start;
+ git_otype entry_type;
+ git_buf entry_data;
git_packfile_stream stream;
size_t nr_objects;
git_vector objects;
@@ -53,6 +59,9 @@ struct git_indexer {
void *progress_payload;
char objbuf[8*1024];
+ /* OIDs referenced from pack objects. Used for verification. */
+ git_oidmap *expected_oids;
+
/* Needed to look up objects which we want to inject to fix a thin pack */
git_odb *odb;
@@ -125,6 +134,11 @@ int git_indexer_new(
idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
git_hash_ctx_init(&idx->hash_ctx);
git_hash_ctx_init(&idx->trailer);
+ git_buf_init(&idx->entry_data, 0);
+ idx->expected_oids = git_oidmap_alloc();
+ GITERR_CHECK_ALLOC(idx->expected_oids);
+
+ idx->do_verify = !!idx->odb;
if (git_repository__fsync_gitdir)
idx->do_fsync = 1;
@@ -210,6 +224,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
break;
+ if (idx->do_verify)
+ git_buf_put(&idx->entry_data, idx->objbuf, read);
+
git_hash_update(&idx->hash_ctx, idx->objbuf, read);
} while (read > 0);
@@ -279,6 +296,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
return 0;
}
+static void add_expected_oid(git_indexer *idx, const git_oid *oid)
+{
+ int ret;
+
+ /*
+ * If we know about that object because it is stored in our ODB or
+ * because we have already processed it as part of our pack file, we do
+ * not have to expect it.
+ */
+ if (!git_odb_exists(idx->odb, oid) &&
+ !git_oidmap_exists(idx->pack->idx_cache, oid) &&
+ !git_oidmap_exists(idx->expected_oids, oid)) {
+ git_oid *dup = git__malloc(sizeof(*oid));
+ git_oid_cpy(dup, oid);
+ git_oidmap_put(idx->expected_oids, dup, &ret);
+ }
+}
+
+static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
+{
+ git_object *object;
+ size_t keyidx;
+ int error;
+
+ if (obj->type != GIT_OBJ_BLOB &&
+ obj->type != GIT_OBJ_TREE &&
+ obj->type != GIT_OBJ_COMMIT &&
+ obj->type != GIT_OBJ_TAG)
+ return 0;
+
+ if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0)
+ goto out;
+
+ keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid);
+ if (git_oidmap_valid_index(idx->expected_oids, keyidx)) {
+ const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx);
+ git__free((git_oid *) key);
+ git_oidmap_delete_at(idx->expected_oids, keyidx);
+ }
+
+ /*
+ * Check whether this is a known object. If so, we can just continue as
+ * we assume that the ODB has a complete graph.
+ */
+ if (git_odb_exists(idx->odb, &object->cached.oid))
+ return 0;
+
+ switch (obj->type) {
+ case GIT_OBJ_TREE:
+ {
+ git_tree *tree = (git_tree *) object;
+ git_tree_entry *entry;
+ size_t i;
+
+ git_array_foreach(tree->entries, i, entry)
+ add_expected_oid(idx, entry->oid);
+
+ break;
+ }
+ case GIT_OBJ_COMMIT:
+ {
+ git_commit *commit = (git_commit *) object;
+ git_oid *parent_oid;
+ size_t i;
+
+ git_array_foreach(commit->parent_ids, i, parent_oid)
+ add_expected_oid(idx, parent_oid);
+
+ add_expected_oid(idx, &commit->tree_id);
+
+ break;
+ }
+ case GIT_OBJ_TAG:
+ {
+ git_tag *tag = (git_tag *) object;
+
+ add_expected_oid(idx, &tag->target);
+
+ break;
+ }
+ case GIT_OBJ_BLOB:
+ default:
+ break;
+ }
+
+out:
+ git_object_free(object);
+
+ return error;
+}
+
static int store_object(git_indexer *idx)
{
int i, error;
@@ -304,6 +412,17 @@ static int store_object(git_indexer *idx)
entry->offset = (uint32_t)entry_start;
}
+ if (idx->do_verify) {
+ git_rawobj rawobj = {
+ idx->entry_data.ptr,
+ idx->entry_data.size,
+ idx->entry_type
+ };
+
+ if ((error = check_object_connectivity(idx, &rawobj)) < 0)
+ goto on_error;
+ }
+
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
@@ -549,6 +668,7 @@ static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
git_mwindow_close(&w);
idx->entry_start = entry_start;
git_hash_init(&idx->hash_ctx);
+ git_buf_clear(&idx->entry_data);
if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
error = advance_delta_offset(idx, type);
@@ -569,6 +689,7 @@ static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
}
idx->have_stream = 1;
+ idx->entry_type = type;
error = git_packfile_stream_open(stream, idx->pack, idx->off);
if (error < 0)
@@ -884,6 +1005,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
return -1;
}
+ if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
+ /* TODO: error? continue? */
+ continue;
+
if (hash_and_save(idx, &obj, delta->delta_off) < 0)
continue;
@@ -1014,6 +1139,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
}
+ /*
+ * Is the resulting graph fully connected or are we still
+ * missing some objects? In the second case, we can
+ * bail out due to an incomplete and thus corrupt
+ * packfile.
+ */
+ if (git_oidmap_size(idx->expected_oids) > 0) {
+ giterr_set(GITERR_INDEXER, "packfile is missing %"PRIuZ" objects",
+ git_oidmap_size(idx->expected_oids));
+ return -1;
+ }
+
git_vector_sort(&idx->objects);
/* Use the trailer hash as the pack file name to ensure
@@ -1143,6 +1280,8 @@ on_error:
void git_indexer_free(git_indexer *idx)
{
+ khiter_t pos;
+
if (idx == NULL)
return;
@@ -1170,7 +1309,18 @@ void git_indexer_free(git_indexer *idx)
git_mutex_unlock(&git__mwindow_mutex);
}
+ for (pos = git_oidmap_begin(idx->expected_oids);
+ pos != git_oidmap_end(idx->expected_oids); pos++)
+ {
+ if (git_oidmap_has_data(idx->expected_oids, pos)) {
+ git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos));
+ git_oidmap_delete_at(idx->expected_oids, pos);
+ }
+ }
+
git_hash_ctx_cleanup(&idx->trailer);
git_hash_ctx_cleanup(&idx->hash_ctx);
+ git_buf_dispose(&idx->entry_data);
+ git_oidmap_free(idx->expected_oids);
git__free(idx);
}