summaryrefslogtreecommitdiff
path: root/src/indexer.c
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2018-08-26 11:26:45 +0100
committerGitHub <noreply@github.com>2018-08-26 11:26:45 +0100
commit50186ce88bcca5d7a23d6365acb3e9a818474f6f (patch)
tree3d8d98da06557dc2a3bf759943e038bc69a26b4e /src/indexer.c
parent8856337b35ad417db9fa5604f76086528cb0436b (diff)
parent261267e0f1d36435e3832c4988cb2298b68dc7c2 (diff)
downloadlibgit2-50186ce88bcca5d7a23d6365acb3e9a818474f6f.tar.gz
Merge pull request #4374 from pks-t/pks/pack-file-verify
Pack file verification
Diffstat (limited to 'src/indexer.c')
-rw-r--r--src/indexer.c348
1 files changed, 259 insertions, 89 deletions
diff --git a/src/indexer.c b/src/indexer.c
index 4f0c5ca5b..a5a4eb18d 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -10,6 +10,9 @@
#include "git2/indexer.h"
#include "git2/object.h"
+#include "commit.h"
+#include "tree.h"
+#include "tag.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
@@ -38,12 +41,15 @@ struct git_indexer {
pack_committed :1,
have_stream :1,
have_delta :1,
- do_fsync :1;
+ do_fsync :1,
+ do_verify :1;
struct git_pack_header hdr;
struct git_pack_file *pack;
unsigned int mode;
git_off_t off;
git_off_t entry_start;
+ git_otype entry_type;
+ git_buf entry_data;
git_packfile_stream stream;
size_t nr_objects;
git_vector objects;
@@ -55,6 +61,9 @@ struct git_indexer {
void *progress_payload;
char objbuf[8*1024];
+ /* OIDs referenced from pack objects. Used for verification. */
+ git_oidmap *expected_oids;
+
/* Needed to look up objects which we want to inject to fix a thin pack */
git_odb *odb;
@@ -106,27 +115,42 @@ static int objects_cmp(const void *a, const void *b)
return git_oid__cmp(&entrya->oid, &entryb->oid);
}
+int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
+{
+ GIT_INIT_STRUCTURE_FROM_TEMPLATE(
+ opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
+ return 0;
+}
+
int git_indexer_new(
git_indexer **out,
const char *prefix,
unsigned int mode,
git_odb *odb,
- git_transfer_progress_cb progress_cb,
- void *progress_payload)
+ git_indexer_options *in_opts)
{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
git_indexer *idx;
git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
static const char suff[] = "/pack";
int error, fd = -1;
+ if (in_opts)
+ memcpy(&opts, in_opts, sizeof(opts));
+
idx = git__calloc(1, sizeof(git_indexer));
GITERR_CHECK_ALLOC(idx);
idx->odb = odb;
- idx->progress_cb = progress_cb;
- idx->progress_payload = progress_payload;
+ idx->progress_cb = opts.progress_cb;
+ idx->progress_payload = opts.progress_cb_payload;
idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
git_hash_ctx_init(&idx->hash_ctx);
git_hash_ctx_init(&idx->trailer);
+ git_buf_init(&idx->entry_data, 0);
+ idx->expected_oids = git_oidmap_alloc();
+ GITERR_CHECK_ALLOC(idx->expected_oids);
+
+ idx->do_verify = opts.verify;
if (git_repository__fsync_gitdir)
idx->do_fsync = 1;
@@ -212,6 +236,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
break;
+ if (idx->do_verify)
+ git_buf_put(&idx->entry_data, idx->objbuf, read);
+
git_hash_update(&idx->hash_ctx, idx->objbuf, read);
} while (read > 0);
@@ -281,6 +308,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
return 0;
}
+static void add_expected_oid(git_indexer *idx, const git_oid *oid)
+{
+ int ret;
+
+ /*
+ * If we know about that object because it is stored in our ODB or
+ * because we have already processed it as part of our pack file, we do
+ * not have to expect it.
+ */
+ if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
+ !git_oidmap_exists(idx->pack->idx_cache, oid) &&
+ !git_oidmap_exists(idx->expected_oids, oid)) {
+ git_oid *dup = git__malloc(sizeof(*oid));
+ git_oid_cpy(dup, oid);
+ git_oidmap_put(idx->expected_oids, dup, &ret);
+ }
+}
+
+static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
+{
+ git_object *object;
+ size_t keyidx;
+ int error;
+
+ if (obj->type != GIT_OBJ_BLOB &&
+ obj->type != GIT_OBJ_TREE &&
+ obj->type != GIT_OBJ_COMMIT &&
+ obj->type != GIT_OBJ_TAG)
+ return 0;
+
+ if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0)
+ goto out;
+
+ keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid);
+ if (git_oidmap_valid_index(idx->expected_oids, keyidx)) {
+ const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx);
+ git__free((git_oid *) key);
+ git_oidmap_delete_at(idx->expected_oids, keyidx);
+ }
+
+ /*
+ * Check whether this is a known object. If so, we can just continue as
+ * we assume that the ODB has a complete graph.
+ */
+ if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
+ return 0;
+
+ switch (obj->type) {
+ case GIT_OBJ_TREE:
+ {
+ git_tree *tree = (git_tree *) object;
+ git_tree_entry *entry;
+ size_t i;
+
+ git_array_foreach(tree->entries, i, entry)
+ add_expected_oid(idx, entry->oid);
+
+ break;
+ }
+ case GIT_OBJ_COMMIT:
+ {
+ git_commit *commit = (git_commit *) object;
+ git_oid *parent_oid;
+ size_t i;
+
+ git_array_foreach(commit->parent_ids, i, parent_oid)
+ add_expected_oid(idx, parent_oid);
+
+ add_expected_oid(idx, &commit->tree_id);
+
+ break;
+ }
+ case GIT_OBJ_TAG:
+ {
+ git_tag *tag = (git_tag *) object;
+
+ add_expected_oid(idx, &tag->target);
+
+ break;
+ }
+ case GIT_OBJ_BLOB:
+ default:
+ break;
+ }
+
+out:
+ git_object_free(object);
+
+ return error;
+}
+
static int store_object(git_indexer *idx)
{
int i, error;
@@ -306,6 +424,17 @@ static int store_object(git_indexer *idx)
entry->offset = (uint32_t)entry_start;
}
+ if (idx->do_verify) {
+ git_rawobj rawobj = {
+ idx->entry_data.ptr,
+ idx->entry_data.size,
+ idx->entry_type
+ };
+
+ if ((error = check_object_connectivity(idx, &rawobj)) < 0)
+ goto on_error;
+ }
+
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
@@ -527,17 +656,103 @@ static int append_to_pack(git_indexer *idx, const void *data, size_t size)
return write_at(idx, data, idx->pack->mwf.size, size);
}
+static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
+{
+ git_packfile_stream *stream = &idx->stream;
+ git_off_t entry_start = idx->off;
+ size_t entry_size;
+ git_otype type;
+ git_mwindow *w = NULL;
+ int error;
+
+ if (idx->pack->mwf.size <= idx->off + 20)
+ return GIT_EBUFS;
+
+ if (!idx->have_stream) {
+ error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return error;
+ }
+ if (error < 0)
+ return error;
+
+ git_mwindow_close(&w);
+ idx->entry_start = entry_start;
+ git_hash_init(&idx->hash_ctx);
+ git_buf_clear(&idx->entry_data);
+
+ if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
+ error = advance_delta_offset(idx, type);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return error;
+ }
+ if (error < 0)
+ return error;
+
+ idx->have_delta = 1;
+ } else {
+ idx->have_delta = 0;
+
+ error = hash_header(&idx->hash_ctx, entry_size, type);
+ if (error < 0)
+ return error;
+ }
+
+ idx->have_stream = 1;
+ idx->entry_type = type;
+
+ error = git_packfile_stream_open(stream, idx->pack, idx->off);
+ if (error < 0)
+ return error;
+ }
+
+ if (idx->have_delta) {
+ error = read_object_stream(idx, stream);
+ } else {
+ error = hash_object_stream(idx, stream);
+ }
+
+ idx->off = stream->curpos;
+ if (error == GIT_EBUFS)
+ return error;
+
+ /* We want to free the stream reasorces no matter what here */
+ idx->have_stream = 0;
+ git_packfile_stream_dispose(stream);
+
+ if (error < 0)
+ return error;
+
+ if (idx->have_delta) {
+ error = store_delta(idx);
+ } else {
+ error = store_object(idx);
+ }
+
+ if (error < 0)
+ return error;
+
+ if (!idx->have_delta) {
+ stats->indexed_objects++;
+ }
+ stats->received_objects++;
+
+ if ((error = do_progress_callback(idx, stats)) != 0)
+ return error;
+
+ return 0;
+}
+
int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
{
int error = -1;
- size_t processed;
struct git_pack_header *hdr = &idx->hdr;
git_mwindow_file *mwf = &idx->pack->mwf;
assert(idx && data && stats);
- processed = stats->indexed_objects;
-
if ((error = append_to_pack(idx, data, size)) < 0)
return error;
@@ -580,7 +795,7 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran
stats->local_objects = 0;
stats->total_deltas = 0;
stats->indexed_deltas = 0;
- processed = stats->indexed_objects = 0;
+ stats->indexed_objects = 0;
stats->total_objects = total_objects;
if ((error = do_progress_callback(idx, stats)) != 0)
@@ -592,87 +807,13 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran
/* As the file grows any windows we try to use will be out of date */
git_mwindow_free_all(mwf);
- while (processed < idx->nr_objects) {
- git_packfile_stream *stream = &idx->stream;
- git_off_t entry_start = idx->off;
- size_t entry_size;
- git_otype type;
- git_mwindow *w = NULL;
-
- if (idx->pack->mwf.size <= idx->off + 20)
- return 0;
-
- if (!idx->have_stream) {
- error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- goto on_error;
-
- git_mwindow_close(&w);
- idx->entry_start = entry_start;
- git_hash_init(&idx->hash_ctx);
-
- if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
- error = advance_delta_offset(idx, type);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- goto on_error;
-
- idx->have_delta = 1;
- } else {
- idx->have_delta = 0;
-
- error = hash_header(&idx->hash_ctx, entry_size, type);
- if (error < 0)
- goto on_error;
- }
-
- idx->have_stream = 1;
-
- error = git_packfile_stream_open(stream, idx->pack, idx->off);
- if (error < 0)
+ while (stats->indexed_objects < idx->nr_objects) {
+ if ((error = read_stream_object(idx, stats)) != 0) {
+ if (error == GIT_EBUFS)
+ break;
+ else
goto on_error;
}
-
- if (idx->have_delta) {
- error = read_object_stream(idx, stream);
- } else {
- error = hash_object_stream(idx, stream);
- }
-
- idx->off = stream->curpos;
- if (error == GIT_EBUFS)
- return 0;
-
- /* We want to free the stream reasorces no matter what here */
- idx->have_stream = 0;
- git_packfile_stream_dispose(stream);
-
- if (error < 0)
- goto on_error;
-
- if (idx->have_delta) {
- error = store_delta(idx);
- } else {
- error = store_object(idx);
- }
-
- if (error < 0)
- goto on_error;
-
- if (!idx->have_delta) {
- stats->indexed_objects = (unsigned int)++processed;
- }
- stats->received_objects++;
-
- if ((error = do_progress_callback(idx, stats)) != 0)
- goto on_error;
}
return 0;
@@ -861,7 +1002,7 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
progressed = 0;
non_null = 0;
git_vector_foreach(&idx->deltas, i, delta) {
- git_rawobj obj = {NULL};
+ git_rawobj obj = {0};
if (!delta)
continue;
@@ -876,6 +1017,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
return -1;
}
+ if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
+ /* TODO: error? continue? */
+ continue;
+
if (hash_and_save(idx, &obj, delta->delta_off) < 0)
continue;
@@ -1006,6 +1151,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
}
+ /*
+ * Is the resulting graph fully connected or are we still
+ * missing some objects? In the second case, we can
+ * bail out due to an incomplete and thus corrupt
+ * packfile.
+ */
+ if (git_oidmap_size(idx->expected_oids) > 0) {
+ giterr_set(GITERR_INDEXER, "packfile is missing %"PRIuZ" objects",
+ git_oidmap_size(idx->expected_oids));
+ return -1;
+ }
+
git_vector_sort(&idx->objects);
/* Use the trailer hash as the pack file name to ensure
@@ -1135,6 +1292,8 @@ on_error:
void git_indexer_free(git_indexer *idx)
{
+ khiter_t pos;
+
if (idx == NULL)
return;
@@ -1162,7 +1321,18 @@ void git_indexer_free(git_indexer *idx)
git_mutex_unlock(&git__mwindow_mutex);
}
+ for (pos = git_oidmap_begin(idx->expected_oids);
+ pos != git_oidmap_end(idx->expected_oids); pos++)
+ {
+ if (git_oidmap_has_data(idx->expected_oids, pos)) {
+ git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos));
+ git_oidmap_delete_at(idx->expected_oids, pos);
+ }
+ }
+
git_hash_ctx_cleanup(&idx->trailer);
git_hash_ctx_cleanup(&idx->hash_ctx);
+ git_buf_dispose(&idx->entry_data);
+ git_oidmap_free(idx->expected_oids);
git__free(idx);
}