diff options
| author | Edward Thomson <ethomson@edwardthomson.com> | 2018-08-26 11:26:45 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-08-26 11:26:45 +0100 |
| commit | 50186ce88bcca5d7a23d6365acb3e9a818474f6f (patch) | |
| tree | 3d8d98da06557dc2a3bf759943e038bc69a26b4e /src/indexer.c | |
| parent | 8856337b35ad417db9fa5604f76086528cb0436b (diff) | |
| parent | 261267e0f1d36435e3832c4988cb2298b68dc7c2 (diff) | |
| download | libgit2-50186ce88bcca5d7a23d6365acb3e9a818474f6f.tar.gz | |
Merge pull request #4374 from pks-t/pks/pack-file-verify
Pack file verification
Diffstat (limited to 'src/indexer.c')
| -rw-r--r-- | src/indexer.c | 348 |
1 files changed, 259 insertions, 89 deletions
diff --git a/src/indexer.c b/src/indexer.c index 4f0c5ca5b..a5a4eb18d 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -10,6 +10,9 @@ #include "git2/indexer.h" #include "git2/object.h" +#include "commit.h" +#include "tree.h" +#include "tag.h" #include "pack.h" #include "mwindow.h" #include "posix.h" @@ -38,12 +41,15 @@ struct git_indexer { pack_committed :1, have_stream :1, have_delta :1, - do_fsync :1; + do_fsync :1, + do_verify :1; struct git_pack_header hdr; struct git_pack_file *pack; unsigned int mode; git_off_t off; git_off_t entry_start; + git_otype entry_type; + git_buf entry_data; git_packfile_stream stream; size_t nr_objects; git_vector objects; @@ -55,6 +61,9 @@ struct git_indexer { void *progress_payload; char objbuf[8*1024]; + /* OIDs referenced from pack objects. Used for verification. */ + git_oidmap *expected_oids; + /* Needed to look up objects which we want to inject to fix a thin pack */ git_odb *odb; @@ -106,27 +115,42 @@ static int objects_cmp(const void *a, const void *b) return git_oid__cmp(&entrya->oid, &entryb->oid); } +int git_indexer_init_options(git_indexer_options *opts, unsigned int version) +{ + GIT_INIT_STRUCTURE_FROM_TEMPLATE( + opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT); + return 0; +} + int git_indexer_new( git_indexer **out, const char *prefix, unsigned int mode, git_odb *odb, - git_transfer_progress_cb progress_cb, - void *progress_payload) + git_indexer_options *in_opts) { + git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; git_indexer *idx; git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT; static const char suff[] = "/pack"; int error, fd = -1; + if (in_opts) + memcpy(&opts, in_opts, sizeof(opts)); + idx = git__calloc(1, sizeof(git_indexer)); GITERR_CHECK_ALLOC(idx); idx->odb = odb; - idx->progress_cb = progress_cb; - idx->progress_payload = progress_payload; + idx->progress_cb = opts.progress_cb; + idx->progress_payload = opts.progress_cb_payload; idx->mode = mode ? mode : GIT_PACK_FILE_MODE; git_hash_ctx_init(&idx->hash_ctx); git_hash_ctx_init(&idx->trailer); + git_buf_init(&idx->entry_data, 0); + idx->expected_oids = git_oidmap_alloc(); + GITERR_CHECK_ALLOC(idx->expected_oids); + + idx->do_verify = opts.verify; if (git_repository__fsync_gitdir) idx->do_fsync = 1; @@ -212,6 +236,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0) break; + if (idx->do_verify) + git_buf_put(&idx->entry_data, idx->objbuf, read); + git_hash_update(&idx->hash_ctx, idx->objbuf, read); } while (read > 0); @@ -281,6 +308,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, return 0; } +static void add_expected_oid(git_indexer *idx, const git_oid *oid) +{ + int ret; + + /* + * If we know about that object because it is stored in our ODB or + * because we have already processed it as part of our pack file, we do + * not have to expect it. + */ + if ((!idx->odb || !git_odb_exists(idx->odb, oid)) && + !git_oidmap_exists(idx->pack->idx_cache, oid) && + !git_oidmap_exists(idx->expected_oids, oid)) { + git_oid *dup = git__malloc(sizeof(*oid)); + git_oid_cpy(dup, oid); + git_oidmap_put(idx->expected_oids, dup, &ret); + } +} + +static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) +{ + git_object *object; + size_t keyidx; + int error; + + if (obj->type != GIT_OBJ_BLOB && + obj->type != GIT_OBJ_TREE && + obj->type != GIT_OBJ_COMMIT && + obj->type != GIT_OBJ_TAG) + return 0; + + if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) + goto out; + + keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid); + if (git_oidmap_valid_index(idx->expected_oids, keyidx)) { + const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx); + git__free((git_oid *) key); + git_oidmap_delete_at(idx->expected_oids, keyidx); + } + + /* + * Check whether this is a known object. If so, we can just continue as + * we assume that the ODB has a complete graph. + */ + if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid)) + return 0; + + switch (obj->type) { + case GIT_OBJ_TREE: + { + git_tree *tree = (git_tree *) object; + git_tree_entry *entry; + size_t i; + + git_array_foreach(tree->entries, i, entry) + add_expected_oid(idx, entry->oid); + + break; + } + case GIT_OBJ_COMMIT: + { + git_commit *commit = (git_commit *) object; + git_oid *parent_oid; + size_t i; + + git_array_foreach(commit->parent_ids, i, parent_oid) + add_expected_oid(idx, parent_oid); + + add_expected_oid(idx, &commit->tree_id); + + break; + } + case GIT_OBJ_TAG: + { + git_tag *tag = (git_tag *) object; + + add_expected_oid(idx, &tag->target); + + break; + } + case GIT_OBJ_BLOB: + default: + break; + } + +out: + git_object_free(object); + + return error; +} + static int store_object(git_indexer *idx) { int i, error; @@ -306,6 +424,17 @@ static int store_object(git_indexer *idx) entry->offset = (uint32_t)entry_start; } + if (idx->do_verify) { + git_rawobj rawobj = { + idx->entry_data.ptr, + idx->entry_data.size, + idx->entry_type + }; + + if ((error = check_object_connectivity(idx, &rawobj)) < 0) + goto on_error; + } + git_oid_cpy(&pentry->sha1, &oid); pentry->offset = entry_start; @@ -527,17 +656,103 @@ static int append_to_pack(git_indexer *idx, const void *data, size_t size) return write_at(idx, data, idx->pack->mwf.size, size); } +static int read_stream_object(git_indexer *idx, git_transfer_progress *stats) +{ + git_packfile_stream *stream = &idx->stream; + git_off_t entry_start = idx->off; + size_t entry_size; + git_otype type; + git_mwindow *w = NULL; + int error; + + if (idx->pack->mwf.size <= idx->off + 20) + return GIT_EBUFS; + + if (!idx->have_stream) { + error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off); + if (error == GIT_EBUFS) { + idx->off = entry_start; + return error; + } + if (error < 0) + return error; + + git_mwindow_close(&w); + idx->entry_start = entry_start; + git_hash_init(&idx->hash_ctx); + git_buf_clear(&idx->entry_data); + + if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { + error = advance_delta_offset(idx, type); + if (error == GIT_EBUFS) { + idx->off = entry_start; + return error; + } + if (error < 0) + return error; + + idx->have_delta = 1; + } else { + idx->have_delta = 0; + + error = hash_header(&idx->hash_ctx, entry_size, type); + if (error < 0) + return error; + } + + idx->have_stream = 1; + idx->entry_type = type; + + error = git_packfile_stream_open(stream, idx->pack, idx->off); + if (error < 0) + return error; + } + + if (idx->have_delta) { + error = read_object_stream(idx, stream); + } else { + error = hash_object_stream(idx, stream); + } + + idx->off = stream->curpos; + if (error == GIT_EBUFS) + return error; + + /* We want to free the stream reasorces no matter what here */ + idx->have_stream = 0; + git_packfile_stream_dispose(stream); + + if (error < 0) + return error; + + if (idx->have_delta) { + error = store_delta(idx); + } else { + error = store_object(idx); + } + + if (error < 0) + return error; + + if (!idx->have_delta) { + stats->indexed_objects++; + } + stats->received_objects++; + + if ((error = do_progress_callback(idx, stats)) != 0) + return error; + + return 0; +} + int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats) { int error = -1; - size_t processed; struct git_pack_header *hdr = &idx->hdr; git_mwindow_file *mwf = &idx->pack->mwf; assert(idx && data && stats); - processed = stats->indexed_objects; - if ((error = append_to_pack(idx, data, size)) < 0) return error; @@ -580,7 +795,7 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran stats->local_objects = 0; stats->total_deltas = 0; stats->indexed_deltas = 0; - processed = stats->indexed_objects = 0; + stats->indexed_objects = 0; stats->total_objects = total_objects; if ((error = do_progress_callback(idx, stats)) != 0) @@ -592,87 +807,13 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran /* As the file grows any windows we try to use will be out of date */ git_mwindow_free_all(mwf); - while (processed < idx->nr_objects) { - git_packfile_stream *stream = &idx->stream; - git_off_t entry_start = idx->off; - size_t entry_size; - git_otype type; - git_mwindow *w = NULL; - - if (idx->pack->mwf.size <= idx->off + 20) - return 0; - - if (!idx->have_stream) { - error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off); - if (error == GIT_EBUFS) { - idx->off = entry_start; - return 0; - } - if (error < 0) - goto on_error; - - git_mwindow_close(&w); - idx->entry_start = entry_start; - git_hash_init(&idx->hash_ctx); - - if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { - error = advance_delta_offset(idx, type); - if (error == GIT_EBUFS) { - idx->off = entry_start; - return 0; - } - if (error < 0) - goto on_error; - - idx->have_delta = 1; - } else { - idx->have_delta = 0; - - error = hash_header(&idx->hash_ctx, entry_size, type); - if (error < 0) - goto on_error; - } - - idx->have_stream = 1; - - error = git_packfile_stream_open(stream, idx->pack, idx->off); - if (error < 0) + while (stats->indexed_objects < idx->nr_objects) { + if ((error = read_stream_object(idx, stats)) != 0) { + if (error == GIT_EBUFS) + break; + else goto on_error; } - - if (idx->have_delta) { - error = read_object_stream(idx, stream); - } else { - error = hash_object_stream(idx, stream); - } - - idx->off = stream->curpos; - if (error == GIT_EBUFS) - return 0; - - /* We want to free the stream reasorces no matter what here */ - idx->have_stream = 0; - git_packfile_stream_dispose(stream); - - if (error < 0) - goto on_error; - - if (idx->have_delta) { - error = store_delta(idx); - } else { - error = store_object(idx); - } - - if (error < 0) - goto on_error; - - if (!idx->have_delta) { - stats->indexed_objects = (unsigned int)++processed; - } - stats->received_objects++; - - if ((error = do_progress_callback(idx, stats)) != 0) - goto on_error; } return 0; @@ -861,7 +1002,7 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats) progressed = 0; non_null = 0; git_vector_foreach(&idx->deltas, i, delta) { - git_rawobj obj = {NULL}; + git_rawobj obj = {0}; if (!delta) continue; @@ -876,6 +1017,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats) return -1; } + if (idx->do_verify && check_object_connectivity(idx, &obj) < 0) + /* TODO: error? continue? */ + continue; + if (hash_and_save(idx, &obj, delta->delta_off) < 0) continue; @@ -1006,6 +1151,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats) write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); } + /* + * Is the resulting graph fully connected or are we still + * missing some objects? In the second case, we can + * bail out due to an incomplete and thus corrupt + * packfile. + */ + if (git_oidmap_size(idx->expected_oids) > 0) { + giterr_set(GITERR_INDEXER, "packfile is missing %"PRIuZ" objects", + git_oidmap_size(idx->expected_oids)); + return -1; + } + git_vector_sort(&idx->objects); /* Use the trailer hash as the pack file name to ensure @@ -1135,6 +1292,8 @@ on_error: void git_indexer_free(git_indexer *idx) { + khiter_t pos; + if (idx == NULL) return; @@ -1162,7 +1321,18 @@ void git_indexer_free(git_indexer *idx) git_mutex_unlock(&git__mwindow_mutex); } + for (pos = git_oidmap_begin(idx->expected_oids); + pos != git_oidmap_end(idx->expected_oids); pos++) + { + if (git_oidmap_has_data(idx->expected_oids, pos)) { + git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos)); + git_oidmap_delete_at(idx->expected_oids, pos); + } + } + git_hash_ctx_cleanup(&idx->trailer); git_hash_ctx_cleanup(&idx->hash_ctx); + git_buf_dispose(&idx->entry_data); + git_oidmap_free(idx->expected_oids); git__free(idx); } |
