diff options
Diffstat (limited to 'src/indexer.c')
| -rw-r--r-- | src/indexer.c | 152 |
1 files changed, 151 insertions, 1 deletions
diff --git a/src/indexer.c b/src/indexer.c index c8fd9c25d..1a59f03f1 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -10,6 +10,9 @@ #include "git2/indexer.h" #include "git2/object.h" +#include "commit.h" +#include "tree.h" +#include "tag.h" #include "pack.h" #include "mwindow.h" #include "posix.h" @@ -36,12 +39,15 @@ struct git_indexer { pack_committed :1, have_stream :1, have_delta :1, - do_fsync :1; + do_fsync :1, + do_verify :1; struct git_pack_header hdr; struct git_pack_file *pack; unsigned int mode; git_off_t off; git_off_t entry_start; + git_otype entry_type; + git_buf entry_data; git_packfile_stream stream; size_t nr_objects; git_vector objects; @@ -53,6 +59,9 @@ struct git_indexer { void *progress_payload; char objbuf[8*1024]; + /* OIDs referenced from pack objects. Used for verification. */ + git_oidmap *expected_oids; + /* Needed to look up objects which we want to inject to fix a thin pack */ git_odb *odb; @@ -125,6 +134,11 @@ int git_indexer_new( idx->mode = mode ? mode : GIT_PACK_FILE_MODE; git_hash_ctx_init(&idx->hash_ctx); git_hash_ctx_init(&idx->trailer); + git_buf_init(&idx->entry_data, 0); + idx->expected_oids = git_oidmap_alloc(); + GITERR_CHECK_ALLOC(idx->expected_oids); + + idx->do_verify = !!idx->odb; if (git_repository__fsync_gitdir) idx->do_fsync = 1; @@ -210,6 +224,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0) break; + if (idx->do_verify) + git_buf_put(&idx->entry_data, idx->objbuf, read); + git_hash_update(&idx->hash_ctx, idx->objbuf, read); } while (read > 0); @@ -279,6 +296,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, return 0; } +static void add_expected_oid(git_indexer *idx, const git_oid *oid) +{ + int ret; + + /* + * If we know about that object because it is stored in our ODB or + * because we have already processed it as part of our pack file, we do + * not have to expect it. + */ + if (!git_odb_exists(idx->odb, oid) && + !git_oidmap_exists(idx->pack->idx_cache, oid) && + !git_oidmap_exists(idx->expected_oids, oid)) { + git_oid *dup = git__malloc(sizeof(*oid)); + git_oid_cpy(dup, oid); + git_oidmap_put(idx->expected_oids, dup, &ret); + } +} + +static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) +{ + git_object *object; + size_t keyidx; + int error; + + if (obj->type != GIT_OBJ_BLOB && + obj->type != GIT_OBJ_TREE && + obj->type != GIT_OBJ_COMMIT && + obj->type != GIT_OBJ_TAG) + return 0; + + if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) + goto out; + + keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid); + if (git_oidmap_valid_index(idx->expected_oids, keyidx)) { + const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx); + git__free((git_oid *) key); + git_oidmap_delete_at(idx->expected_oids, keyidx); + } + + /* + * Check whether this is a known object. If so, we can just continue as + * we assume that the ODB has a complete graph. + */ + if (git_odb_exists(idx->odb, &object->cached.oid)) + return 0; + + switch (obj->type) { + case GIT_OBJ_TREE: + { + git_tree *tree = (git_tree *) object; + git_tree_entry *entry; + size_t i; + + git_array_foreach(tree->entries, i, entry) + add_expected_oid(idx, entry->oid); + + break; + } + case GIT_OBJ_COMMIT: + { + git_commit *commit = (git_commit *) object; + git_oid *parent_oid; + size_t i; + + git_array_foreach(commit->parent_ids, i, parent_oid) + add_expected_oid(idx, parent_oid); + + add_expected_oid(idx, &commit->tree_id); + + break; + } + case GIT_OBJ_TAG: + { + git_tag *tag = (git_tag *) object; + + add_expected_oid(idx, &tag->target); + + break; + } + case GIT_OBJ_BLOB: + default: + break; + } + +out: + git_object_free(object); + + return error; +} + static int store_object(git_indexer *idx) { int i, error; @@ -304,6 +412,17 @@ static int store_object(git_indexer *idx) entry->offset = (uint32_t)entry_start; } + if (idx->do_verify) { + git_rawobj rawobj = { + idx->entry_data.ptr, + idx->entry_data.size, + idx->entry_type + }; + + if ((error = check_object_connectivity(idx, &rawobj)) < 0) + goto on_error; + } + git_oid_cpy(&pentry->sha1, &oid); pentry->offset = entry_start; @@ -549,6 +668,7 @@ static int read_stream_object(git_indexer *idx, git_transfer_progress *stats) git_mwindow_close(&w); idx->entry_start = entry_start; git_hash_init(&idx->hash_ctx); + git_buf_clear(&idx->entry_data); if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { error = advance_delta_offset(idx, type); @@ -569,6 +689,7 @@ static int read_stream_object(git_indexer *idx, git_transfer_progress *stats) } idx->have_stream = 1; + idx->entry_type = type; error = git_packfile_stream_open(stream, idx->pack, idx->off); if (error < 0) @@ -884,6 +1005,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats) return -1; } + if (idx->do_verify && check_object_connectivity(idx, &obj) < 0) + /* TODO: error? continue? */ + continue; + if (hash_and_save(idx, &obj, delta->delta_off) < 0) continue; @@ -1014,6 +1139,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats) write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); } + /* + * Is the resulting graph fully connected or are we still + * missing some objects? In the second case, we can + * bail out due to an incomplete and thus corrupt + * packfile. + */ + if (git_oidmap_size(idx->expected_oids) > 0) { + giterr_set(GITERR_INDEXER, "packfile is missing %"PRIuZ" objects", + git_oidmap_size(idx->expected_oids)); + return -1; + } + git_vector_sort(&idx->objects); /* Use the trailer hash as the pack file name to ensure @@ -1143,6 +1280,8 @@ on_error: void git_indexer_free(git_indexer *idx) { + khiter_t pos; + if (idx == NULL) return; @@ -1170,7 +1309,18 @@ void git_indexer_free(git_indexer *idx) git_mutex_unlock(&git__mwindow_mutex); } + for (pos = git_oidmap_begin(idx->expected_oids); + pos != git_oidmap_end(idx->expected_oids); pos++) + { + if (git_oidmap_has_data(idx->expected_oids, pos)) { + git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos)); + git_oidmap_delete_at(idx->expected_oids, pos); + } + } + git_hash_ctx_cleanup(&idx->trailer); git_hash_ctx_cleanup(&idx->hash_ctx); + git_buf_dispose(&idx->entry_data); + git_oidmap_free(idx->expected_oids); git__free(idx); } |
