diff options
author | Edward Thomson <ethomson@edwardthomson.com> | 2018-08-26 11:26:45 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-08-26 11:26:45 +0100 |
commit | 50186ce88bcca5d7a23d6365acb3e9a818474f6f (patch) | |
tree | 3d8d98da06557dc2a3bf759943e038bc69a26b4e | |
parent | 8856337b35ad417db9fa5604f76086528cb0436b (diff) | |
parent | 261267e0f1d36435e3832c4988cb2298b68dc7c2 (diff) | |
download | libgit2-50186ce88bcca5d7a23d6365acb3e9a818474f6f.tar.gz |
Merge pull request #4374 from pks-t/pks/pack-file-verify
Pack file verification
-rw-r--r-- | examples/network/index-pack.c | 2 | ||||
-rw-r--r-- | include/git2/indexer.h | 34 | ||||
-rw-r--r-- | src/blob.c | 40 | ||||
-rw-r--r-- | src/blob.h | 11 | ||||
-rw-r--r-- | src/commit.c | 13 | ||||
-rw-r--r-- | src/commit.h | 1 | ||||
-rw-r--r-- | src/indexer.c | 348 | ||||
-rw-r--r-- | src/object.c | 65 | ||||
-rw-r--r-- | src/object.h | 11 | ||||
-rw-r--r-- | src/odb_pack.c | 6 | ||||
-rw-r--r-- | src/pack-objects.c | 32 | ||||
-rw-r--r-- | src/pack-objects.h | 6 | ||||
-rw-r--r-- | src/tag.c | 5 | ||||
-rw-r--r-- | src/tag.h | 1 | ||||
-rw-r--r-- | src/tree.c | 25 | ||||
-rw-r--r-- | src/tree.h | 1 | ||||
-rw-r--r-- | tests/pack/indexer.c | 72 | ||||
-rw-r--r-- | tests/pack/packbuilder.c | 6 |
18 files changed, 529 insertions, 150 deletions
diff --git a/examples/network/index-pack.c b/examples/network/index-pack.c index e9261027c..128c7ebf5 100644 --- a/examples/network/index-pack.c +++ b/examples/network/index-pack.c @@ -48,7 +48,7 @@ int index_pack(git_repository *repo, int argc, char **argv) return EXIT_FAILURE; } - if (git_indexer_new(&idx, ".", 0, NULL, NULL, NULL) < 0) { + if (git_indexer_new(&idx, ".", 0, NULL, NULL) < 0) { puts("bad idx"); return -1; } diff --git a/include/git2/indexer.h b/include/git2/indexer.h index d2d315e47..94d8785c0 100644 --- a/include/git2/indexer.h +++ b/include/git2/indexer.h @@ -15,6 +15,33 @@ GIT_BEGIN_DECL typedef struct git_indexer git_indexer; +typedef struct git_indexer_options { + unsigned int version; + + /** progress_cb function to call with progress information */ + git_transfer_progress_cb progress_cb; + /** progress_cb_payload payload for the progress callback */ + void *progress_cb_payload; + + /** Do connectivity checks for the received pack */ + unsigned char verify; +} git_indexer_options; + +#define GIT_INDEXER_OPTIONS_VERSION 1 +#define GIT_INDEXER_OPTIONS_INIT { GIT_INDEXER_OPTIONS_VERSION } + +/** + * Initializes a `git_indexer_options` with default values. Equivalent to + * creating an instance with GIT_INDEXER_OPTIONS_INIT. + * + * @param opts the `git_indexer_options` struct to initialize. + * @param version Version of struct; pass `GIT_INDEXER_OPTIONS_VERSION` + * @return Zero on success; -1 on failure. + */ +GIT_EXTERN(int) git_indexer_init_options( + git_indexer_options *opts, + unsigned int version); + /** * Create a new indexer instance * @@ -24,16 +51,15 @@ typedef struct git_indexer git_indexer; * @param odb object database from which to read base objects when * fixing thin packs. Pass NULL if no thin pack is expected (an error * will be returned if there are bases missing) - * @param progress_cb function to call with progress information - * @param progress_cb_payload payload for the progress callback + * @param opts Optional structure containing additional options. See + * `git_indexer_options` above. */ GIT_EXTERN(int) git_indexer_new( git_indexer **out, const char *path, unsigned int mode, git_odb *odb, - git_transfer_progress_cb progress_cb, - void *progress_cb_payload); + git_indexer_options *opts); /** * Add data to the indexer diff --git a/src/blob.c b/src/blob.c index 86ec95c48..bcd3f41e1 100644 --- a/src/blob.c +++ b/src/blob.c @@ -19,34 +19,54 @@ const void *git_blob_rawcontent(const git_blob *blob) { assert(blob); - return git_odb_object_data(blob->odb_object); + if (blob->raw) + return blob->data.raw.data; + else + return git_odb_object_data(blob->data.odb); } git_off_t git_blob_rawsize(const git_blob *blob) { assert(blob); - return (git_off_t)git_odb_object_size(blob->odb_object); + if (blob->raw) + return blob->data.raw.size; + else + return (git_off_t)git_odb_object_size(blob->data.odb); } int git_blob__getbuf(git_buf *buffer, git_blob *blob) { return git_buf_set( buffer, - git_odb_object_data(blob->odb_object), - git_odb_object_size(blob->odb_object)); + git_blob_rawcontent(blob), + git_blob_rawsize(blob)); } -void git_blob__free(void *blob) +void git_blob__free(void *_blob) { - git_odb_object_free(((git_blob *)blob)->odb_object); + git_blob *blob = (git_blob *) _blob; + if (!blob->raw) + git_odb_object_free(blob->data.odb); git__free(blob); } -int git_blob__parse(void *blob, git_odb_object *odb_obj) +int git_blob__parse_raw(void *_blob, const char *data, size_t size) { + git_blob *blob = (git_blob *) _blob; + assert(blob); + blob->raw = 1; + blob->data.raw.data = data; + blob->data.raw.size = size; + return 0; +} + +int git_blob__parse(void *_blob, git_odb_object *odb_obj) +{ + git_blob *blob = (git_blob *) _blob; assert(blob); git_cached_obj_incref((git_cached_obj *)odb_obj); - ((git_blob *)blob)->odb_object = odb_obj; + blob->raw = 0; + blob->data.odb = odb_obj; return 0; } @@ -372,8 +392,8 @@ int git_blob_is_binary(const git_blob *blob) assert(blob); - git_buf_attach_notowned(&content, blob->odb_object->buffer, - min(blob->odb_object->cached.size, + git_buf_attach_notowned(&content, git_blob_rawcontent(blob), + min(git_blob_rawsize(blob), GIT_FILTER_BYTES_TO_CHECK_NUL)); return git_buf_text_is_binary(&content); } diff --git a/src/blob.h b/src/blob.h index 3f1f97719..f644ec583 100644 --- a/src/blob.h +++ b/src/blob.h @@ -16,11 +16,20 @@ struct git_blob { git_object object; - git_odb_object *odb_object; + + union { + git_odb_object *odb; + struct { + const char *data; + git_off_t size; + } raw; + } data; + unsigned int raw:1; }; void git_blob__free(void *blob); int git_blob__parse(void *blob, git_odb_object *obj); +int git_blob__parse_raw(void *blob, const char *data, size_t size); int git_blob__getbuf(git_buf *buffer, git_blob *blob); extern int git_blob__create_from_paths( diff --git a/src/commit.c b/src/commit.c index e0ba51d47..97ac2a189 100644 --- a/src/commit.c +++ b/src/commit.c @@ -383,11 +383,11 @@ int git_commit_amend( return error; } -int git_commit__parse(void *_commit, git_odb_object *odb_obj) +int git_commit__parse_raw(void *_commit, const char *data, size_t size) { git_commit *commit = _commit; - const char *buffer_start = git_odb_object_data(odb_obj), *buffer; - const char *buffer_end = buffer_start + git_odb_object_size(odb_obj); + const char *buffer_start = data, *buffer; + const char *buffer_end = buffer_start + size; git_oid parent_id; size_t header_len; git_signature dummy_sig; @@ -477,6 +477,13 @@ bad_buffer: return -1; } +int git_commit__parse(void *_commit, git_odb_object *odb_obj) +{ + return git_commit__parse_raw(_commit, + git_odb_object_data(odb_obj), + git_odb_object_size(odb_obj)); +} + #define GIT_COMMIT_GETTER(_rvalue, _name, _return) \ _rvalue git_commit_##_name(const git_commit *commit) \ {\ diff --git a/src/commit.h b/src/commit.h index 781809d70..9137a8fad 100644 --- a/src/commit.h +++ b/src/commit.h @@ -35,5 +35,6 @@ struct git_commit { void git_commit__free(void *commit); int git_commit__parse(void *commit, git_odb_object *obj); +int git_commit__parse_raw(void *commit, const char *data, size_t size); #endif diff --git a/src/indexer.c b/src/indexer.c index 4f0c5ca5b..a5a4eb18d 100644 --- a/src/indexer.c +++ b/src/indexer.c @@ -10,6 +10,9 @@ #include "git2/indexer.h" #include "git2/object.h" +#include "commit.h" +#include "tree.h" +#include "tag.h" #include "pack.h" #include "mwindow.h" #include "posix.h" @@ -38,12 +41,15 @@ struct git_indexer { pack_committed :1, have_stream :1, have_delta :1, - do_fsync :1; + do_fsync :1, + do_verify :1; struct git_pack_header hdr; struct git_pack_file *pack; unsigned int mode; git_off_t off; git_off_t entry_start; + git_otype entry_type; + git_buf entry_data; git_packfile_stream stream; size_t nr_objects; git_vector objects; @@ -55,6 +61,9 @@ struct git_indexer { void *progress_payload; char objbuf[8*1024]; + /* OIDs referenced from pack objects. Used for verification. */ + git_oidmap *expected_oids; + /* Needed to look up objects which we want to inject to fix a thin pack */ git_odb *odb; @@ -106,27 +115,42 @@ static int objects_cmp(const void *a, const void *b) return git_oid__cmp(&entrya->oid, &entryb->oid); } +int git_indexer_init_options(git_indexer_options *opts, unsigned int version) +{ + GIT_INIT_STRUCTURE_FROM_TEMPLATE( + opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT); + return 0; +} + int git_indexer_new( git_indexer **out, const char *prefix, unsigned int mode, git_odb *odb, - git_transfer_progress_cb progress_cb, - void *progress_payload) + git_indexer_options *in_opts) { + git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; git_indexer *idx; git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT; static const char suff[] = "/pack"; int error, fd = -1; + if (in_opts) + memcpy(&opts, in_opts, sizeof(opts)); + idx = git__calloc(1, sizeof(git_indexer)); GITERR_CHECK_ALLOC(idx); idx->odb = odb; - idx->progress_cb = progress_cb; - idx->progress_payload = progress_payload; + idx->progress_cb = opts.progress_cb; + idx->progress_payload = opts.progress_cb_payload; idx->mode = mode ? mode : GIT_PACK_FILE_MODE; git_hash_ctx_init(&idx->hash_ctx); git_hash_ctx_init(&idx->trailer); + git_buf_init(&idx->entry_data, 0); + idx->expected_oids = git_oidmap_alloc(); + GITERR_CHECK_ALLOC(idx->expected_oids); + + idx->do_verify = opts.verify; if (git_repository__fsync_gitdir) idx->do_fsync = 1; @@ -212,6 +236,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0) break; + if (idx->do_verify) + git_buf_put(&idx->entry_data, idx->objbuf, read); + git_hash_update(&idx->hash_ctx, idx->objbuf, read); } while (read > 0); @@ -281,6 +308,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, return 0; } +static void add_expected_oid(git_indexer *idx, const git_oid *oid) +{ + int ret; + + /* + * If we know about that object because it is stored in our ODB or + * because we have already processed it as part of our pack file, we do + * not have to expect it. + */ + if ((!idx->odb || !git_odb_exists(idx->odb, oid)) && + !git_oidmap_exists(idx->pack->idx_cache, oid) && + !git_oidmap_exists(idx->expected_oids, oid)) { + git_oid *dup = git__malloc(sizeof(*oid)); + git_oid_cpy(dup, oid); + git_oidmap_put(idx->expected_oids, dup, &ret); + } +} + +static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) +{ + git_object *object; + size_t keyidx; + int error; + + if (obj->type != GIT_OBJ_BLOB && + obj->type != GIT_OBJ_TREE && + obj->type != GIT_OBJ_COMMIT && + obj->type != GIT_OBJ_TAG) + return 0; + + if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) + goto out; + + keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid); + if (git_oidmap_valid_index(idx->expected_oids, keyidx)) { + const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx); + git__free((git_oid *) key); + git_oidmap_delete_at(idx->expected_oids, keyidx); + } + + /* + * Check whether this is a known object. If so, we can just continue as + * we assume that the ODB has a complete graph. + */ + if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid)) + return 0; + + switch (obj->type) { + case GIT_OBJ_TREE: + { + git_tree *tree = (git_tree *) object; + git_tree_entry *entry; + size_t i; + + git_array_foreach(tree->entries, i, entry) + add_expected_oid(idx, entry->oid); + + break; + } + case GIT_OBJ_COMMIT: + { + git_commit *commit = (git_commit *) object; + git_oid *parent_oid; + size_t i; + + git_array_foreach(commit->parent_ids, i, parent_oid) + add_expected_oid(idx, parent_oid); + + add_expected_oid(idx, &commit->tree_id); + + break; + } + case GIT_OBJ_TAG: + { + git_tag *tag = (git_tag *) object; + + add_expected_oid(idx, &tag->target); + + break; + } + case GIT_OBJ_BLOB: + default: + break; + } + +out: + git_object_free(object); + + return error; +} + static int store_object(git_indexer *idx) { int i, error; @@ -306,6 +424,17 @@ static int store_object(git_indexer *idx) entry->offset = (uint32_t)entry_start; } + if (idx->do_verify) { + git_rawobj rawobj = { + idx->entry_data.ptr, + idx->entry_data.size, + idx->entry_type + }; + + if ((error = check_object_connectivity(idx, &rawobj)) < 0) + goto on_error; + } + git_oid_cpy(&pentry->sha1, &oid); pentry->offset = entry_start; @@ -527,17 +656,103 @@ static int append_to_pack(git_indexer *idx, const void *data, size_t size) return write_at(idx, data, idx->pack->mwf.size, size); } +static int read_stream_object(git_indexer *idx, git_transfer_progress *stats) +{ + git_packfile_stream *stream = &idx->stream; + git_off_t entry_start = idx->off; + size_t entry_size; + git_otype type; + git_mwindow *w = NULL; + int error; + + if (idx->pack->mwf.size <= idx->off + 20) + return GIT_EBUFS; + + if (!idx->have_stream) { + error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off); + if (error == GIT_EBUFS) { + idx->off = entry_start; + return error; + } + if (error < 0) + return error; + + git_mwindow_close(&w); + idx->entry_start = entry_start; + git_hash_init(&idx->hash_ctx); + git_buf_clear(&idx->entry_data); + + if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { + error = advance_delta_offset(idx, type); + if (error == GIT_EBUFS) { + idx->off = entry_start; + return error; + } + if (error < 0) + return error; + + idx->have_delta = 1; + } else { + idx->have_delta = 0; + + error = hash_header(&idx->hash_ctx, entry_size, type); + if (error < 0) + return error; + } + + idx->have_stream = 1; + idx->entry_type = type; + + error = git_packfile_stream_open(stream, idx->pack, idx->off); + if (error < 0) + return error; + } + + if (idx->have_delta) { + error = read_object_stream(idx, stream); + } else { + error = hash_object_stream(idx, stream); + } + + idx->off = stream->curpos; + if (error == GIT_EBUFS) + return error; + + /* We want to free the stream reasorces no matter what here */ + idx->have_stream = 0; + git_packfile_stream_dispose(stream); + + if (error < 0) + return error; + + if (idx->have_delta) { + error = store_delta(idx); + } else { + error = store_object(idx); + } + + if (error < 0) + return error; + + if (!idx->have_delta) { + stats->indexed_objects++; + } + stats->received_objects++; + + if ((error = do_progress_callback(idx, stats)) != 0) + return error; + + return 0; +} + int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats) { int error = -1; - size_t processed; struct git_pack_header *hdr = &idx->hdr; git_mwindow_file *mwf = &idx->pack->mwf; assert(idx && data && stats); - processed = stats->indexed_objects; - if ((error = append_to_pack(idx, data, size)) < 0) return error; @@ -580,7 +795,7 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran stats->local_objects = 0; stats->total_deltas = 0; stats->indexed_deltas = 0; - processed = stats->indexed_objects = 0; + stats->indexed_objects = 0; stats->total_objects = total_objects; if ((error = do_progress_callback(idx, stats)) != 0) @@ -592,87 +807,13 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran /* As the file grows any windows we try to use will be out of date */ git_mwindow_free_all(mwf); - while (processed < idx->nr_objects) { - git_packfile_stream *stream = &idx->stream; - git_off_t entry_start = idx->off; - size_t entry_size; - git_otype type; - git_mwindow *w = NULL; - - if (idx->pack->mwf.size <= idx->off + 20) - return 0; - - if (!idx->have_stream) { - error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off); - if (error == GIT_EBUFS) { - idx->off = entry_start; - return 0; - } - if (error < 0) - goto on_error; - - git_mwindow_close(&w); - idx->entry_start = entry_start; - git_hash_init(&idx->hash_ctx); - - if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) { - error = advance_delta_offset(idx, type); - if (error == GIT_EBUFS) { - idx->off = entry_start; - return 0; - } - if (error < 0) - goto on_error; - - idx->have_delta = 1; - } else { - idx->have_delta = 0; - - error = hash_header(&idx->hash_ctx, entry_size, type); - if (error < 0) - goto on_error; - } - - idx->have_stream = 1; - - error = git_packfile_stream_open(stream, idx->pack, idx->off); - if (error < 0) + while (stats->indexed_objects < idx->nr_objects) { + if ((error = read_stream_object(idx, stats)) != 0) { + if (error == GIT_EBUFS) + break; + else goto on_error; } - - if (idx->have_delta) { - error = read_object_stream(idx, stream); - } else { - error = hash_object_stream(idx, stream); - } - - idx->off = stream->curpos; - if (error == GIT_EBUFS) - return 0; - - /* We want to free the stream reasorces no matter what here */ - idx->have_stream = 0; - git_packfile_stream_dispose(stream); - - if (error < 0) - goto on_error; - - if (idx->have_delta) { - error = store_delta(idx); - } else { - error = store_object(idx); - } - - if (error < 0) - goto on_error; - - if (!idx->have_delta) { - stats->indexed_objects = (unsigned int)++processed; - } - stats->received_objects++; - - if ((error = do_progress_callback(idx, stats)) != 0) - goto on_error; } return 0; @@ -861,7 +1002,7 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats) progressed = 0; non_null = 0; git_vector_foreach(&idx->deltas, i, delta) { - git_rawobj obj = {NULL}; + git_rawobj obj = {0}; if (!delta) continue; @@ -876,6 +1017,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats) return -1; } + if (idx->do_verify && check_object_connectivity(idx, &obj) < 0) + /* TODO: error? continue? */ + continue; + if (hash_and_save(idx, &obj, delta->delta_off) < 0) continue; @@ -1006,6 +1151,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats) write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); } + /* + * Is the resulting graph fully connected or are we still + * missing some objects? In the second case, we can + * bail out due to an incomplete and thus corrupt + * packfile. + */ + if (git_oidmap_size(idx->expected_oids) > 0) { + giterr_set(GITERR_INDEXER, "packfile is missing %"PRIuZ" objects", + git_oidmap_size(idx->expected_oids)); + return -1; + } + git_vector_sort(&idx->objects); /* Use the trailer hash as the pack file name to ensure @@ -1135,6 +1292,8 @@ on_error: void git_indexer_free(git_indexer *idx) { + khiter_t pos; + if (idx == NULL) return; @@ -1162,7 +1321,18 @@ void git_indexer_free(git_indexer *idx) git_mutex_unlock(&git__mwindow_mutex); } + for (pos = git_oidmap_begin(idx->expected_oids); + pos != git_oidmap_end(idx->expected_oids); pos++) + { + if (git_oidmap_has_data(idx->expected_oids, pos)) { + git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos)); + git_oidmap_delete_at(idx->expected_oids, pos); + } + } + git_hash_ctx_cleanup(&idx->trailer); git_hash_ctx_cleanup(&idx->hash_ctx); + git_buf_dispose(&idx->entry_data); + git_oidmap_free(idx->expected_oids); git__free(idx); } diff --git a/src/object.c b/src/object.c index 48f561384..c1f3ea919 100644 --- a/src/object.c +++ b/src/object.c @@ -12,6 +12,7 @@ #include "repository.h" #include "commit.h" +#include "hash.h" #include "tree.h" #include "blob.h" #include "oid.h" @@ -19,38 +20,86 @@ bool git_object__strict_input_validation = true; +extern int git_odb_hash(git_oid *out, const void *data, size_t len, git_otype type); + typedef struct { const char *str; /* type name string */ size_t size; /* size in bytes of the object structure */ int (*parse)(void *self, git_odb_object *obj); + int (*parse_raw)(void *self, const char *data, size_t size); void (*free)(void *self); } git_object_def; static git_object_def git_objects_table[] = { /* 0 = GIT_OBJ__EXT1 */ - { "", 0, NULL, NULL }, + { "", 0, NULL, NULL, NULL }, /* 1 = GIT_OBJ_COMMIT */ - { "commit", sizeof(git_commit), git_commit__parse, git_commit__free }, + { "commit", sizeof(git_commit), git_commit__parse, git_commit__parse_raw, git_commit__free }, /* 2 = GIT_OBJ_TREE */ - { "tree", sizeof(git_tree), git_tree__parse, git_tree__free }, + { "tree", sizeof(git_tree), git_tree__parse, git_tree__parse_raw, git_tree__free }, /* 3 = GIT_OBJ_BLOB */ - { "blob", sizeof(git_blob), git_blob__parse, git_blob__free }, + { "blob", sizeof(git_blob), git_blob__parse, git_blob__parse_raw, git_blob__free }, /* 4 = GIT_OBJ_TAG */ - { "tag", sizeof(git_tag), git_tag__parse, git_tag__free }, + { "tag", sizeof(git_tag), git_tag__parse, git_tag__parse_raw, git_tag__free }, /* 5 = GIT_OBJ__EXT2 */ - { "", 0, NULL, NULL }, + { "", 0, NULL, NULL, NULL }, /* 6 = GIT_OBJ_OFS_DELTA */ - { "OFS_DELTA", 0, NULL, NULL }, + { "OFS_DELTA", 0, NULL, NULL, NULL }, /* 7 = GIT_OBJ_REF_DELTA */ - { "REF_DELTA", 0, NULL, NULL }, + { "REF_DELTA", 0, NULL, NULL, NULL }, }; +int git_object__from_raw( + git_object **object_out, + const char *data, + size_t size, + git_otype type) +{ + git_object_def *def; + git_object *object; + size_t object_size; + int error; + + assert(object_out); + *object_out = NULL; + + /* Validate type match */ + if (type != GIT_OBJ_BLOB && type != GIT_OBJ_TREE && type != GIT_OBJ_COMMIT && type != GIT_OBJ_TAG) { + giterr_set(GITERR_INVALID, "the requested type is invalid"); + return GIT_ENOTFOUND; + } + + if ((object_size = git_object__size(type)) == 0) { + giterr_set(GITERR_INVALID, "the requested type is invalid"); + return GIT_ENOTFOUND; + } + + /* Allocate and initialize base object */ + object = git__calloc(1, object_size); + GITERR_CHECK_ALLOC(object); + object->cached.flags = GIT_CACHE_STORE_PARSED; + object->cached.type = type; + git_odb_hash(&object->cached.oid, data, size, type); + + /* Parse raw object data */ + def = &git_objects_table[type]; + assert(def->free && def->parse_raw); + + if ((error = def->parse_raw(object, data, size)) < 0) + def->free(object); + + git_cached_obj_incref(object); + *object_out = object; + + return 0; +} + int git_object__from_odb_object( git_object **object_out, git_repository *repo, diff --git a/src/object.h b/src/object.h index e46c9cafa..f5cbbf763 100644 --- a/src/object.h +++ b/src/object.h @@ -22,6 +22,17 @@ struct git_object { /* fully free the object; internal method, DO NOT EXPORT */ void git_object__free(void *object); +/* + * Parse object from raw data. Note that the resulting object is + * tied to the lifetime of the data, as some objects simply point + * into it. + */ +int git_object__from_raw( + git_object **object_out, + const char *data, + size_t size, + git_otype type); + int git_object__from_odb_object( git_object **object_out, git_repository *repo, diff --git a/src/odb_pack.c b/src/odb_pack.c index 2c3048034..2892aa1e2 100644 --- a/src/odb_pack.c +++ b/src/odb_pack.c @@ -519,6 +519,7 @@ static int pack_backend__writepack(struct git_odb_writepack **out, git_transfer_progress_cb progress_cb, void *progress_payload) { + git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; struct pack_backend *backend; struct pack_writepack *writepack; @@ -526,13 +527,16 @@ static int pack_backend__writepack(struct git_odb_writepack **out, *out = NULL; + opts.progress_cb = progress_cb; + opts.progress_cb_payload = progress_payload; + backend = (struct pack_backend *)_backend; writepack = git__calloc(1, sizeof(struct pack_writepack)); GITERR_CHECK_ALLOC(writepack); if (git_indexer_new(&writepack->indexer, - backend->pack_folder, 0, odb, progress_cb, progress_payload) < 0) { + backend->pack_folder, 0, odb, &opts) < 0) { git__free(writepack); return -1; } diff --git a/src/pack-objects.c b/src/pack-objects.c index bc5fb2e1f..2b786df33 100644 --- a/src/pack-objects.c +++ b/src/pack-objects.c @@ -41,6 +41,12 @@ struct pack_write_context { git_transfer_progress *stats; }; +struct walk_object { + git_oid id; + unsigned int uninteresting:1, + seen:1; +}; + #ifdef GIT_THREADS #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \ @@ -143,7 +149,7 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo) if (!pb->walk_objects) goto on_error; - git_pool_init(&pb->object_pool, sizeof(git_walk_object)); + git_pool_init(&pb->object_pool, sizeof(struct walk_object)); pb->repo = repo; pb->nr_threads = 1; /* do not spawn any thread by default */ @@ -1382,6 +1388,7 @@ int git_packbuilder_write( git_transfer_progress_cb progress_cb, void *progress_cb_payload) { + git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; git_indexer *indexer; git_transfer_progress stats; struct pack_write_context ctx; @@ -1389,8 +1396,11 @@ int git_packbuilder_write( PREPARE_PACK; + opts.progress_cb = progress_cb; + opts.progress_cb_payload = progress_cb_payload; + if (git_indexer_new( - &indexer, path, mode, pb->odb, progress_cb, progress_cb_payload) < 0) + &indexer, path, mode, pb->odb, &opts) < 0) return -1; if (!git_repository__cvar(&t, pb->repo, GIT_CVAR_FSYNCOBJECTFILES) && t) @@ -1513,9 +1523,9 @@ size_t git_packbuilder_written(git_packbuilder *pb) return pb->nr_written; } -int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id) +static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id) { - git_walk_object *obj; + struct walk_object *obj; obj = git_pool_mallocz(&pb->object_pool, 1); if (!obj) { @@ -1529,11 +1539,11 @@ int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid return 0; } -static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id) +static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id) { int error; khiter_t pos; - git_walk_object *obj; + struct walk_object *obj; pos = git_oidmap_lookup_index(pb->walk_objects, id); if (git_oidmap_valid_index(pb->walk_objects, pos)) { @@ -1552,7 +1562,7 @@ static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id) { int error; - git_walk_object *obj; + struct walk_object *obj; if ((error = retrieve_object(&obj, pb, id)) < 0) return error; @@ -1564,7 +1574,7 @@ static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id) static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id) { - git_walk_object *obj; + struct walk_object *obj; git_tree *tree; int error; size_t i; @@ -1636,7 +1646,7 @@ int insert_tree(git_packbuilder *pb, git_tree *tree) size_t i; int error; git_tree *subtree; - git_walk_object *obj; + struct walk_object *obj; const char *name; if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0) @@ -1684,7 +1694,7 @@ int insert_tree(git_packbuilder *pb, git_tree *tree) return error; } -int insert_commit(git_packbuilder *pb, git_walk_object *obj) +int insert_commit(git_packbuilder *pb, struct walk_object *obj) { int error; git_commit *commit = NULL; @@ -1714,7 +1724,7 @@ int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk) { int error; git_oid id; - git_walk_object *obj; + struct walk_object *obj; assert(pb && walk); diff --git a/src/pack-objects.h b/src/pack-objects.h index c9cd5777a..a931f3f86 100644 --- a/src/pack-objects.h +++ b/src/pack-objects.h @@ -52,12 +52,6 @@ typedef struct git_pobject { filled:1; } git_pobject; -typedef struct { - git_oid id; - unsigned int uninteresting:1, - seen:1; -} git_walk_object; - struct git_packbuilder { git_repository *repo; /* associated repository */ git_odb *odb; /* associated object database */ @@ -159,6 +159,11 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end) return 0; } +int git_tag__parse_raw(void *_tag, const char *data, size_t size) +{ + return tag_parse(_tag, data, data + size); +} + int git_tag__parse(void *_tag, git_odb_object *odb_obj) { git_tag *tag = _tag; @@ -26,5 +26,6 @@ struct git_tag { void git_tag__free(void *tag); int git_tag__parse(void *tag, git_odb_object *obj); +int git_tag__parse_raw(void *tag, const char *data, size_t size); #endif diff --git a/src/tree.c b/src/tree.c index be0f528c2..823385722 100644 --- a/src/tree.c +++ b/src/tree.c @@ -375,18 +375,16 @@ static int parse_mode(unsigned int *modep, const char *buffer, const char **buff return 0; } -int git_tree__parse(void *_tree, git_odb_object *odb_obj) +int git_tree__parse_raw(void *_tree, const char *data, size_t size) { git_tree *tree = _tree; const char *buffer; const char *buffer_end; - if (git_odb_object_dup(&tree->odb_obj, odb_obj) < 0) - return -1; - - buffer = git_odb_object_data(tree->odb_obj); - buffer_end = buffer + git_odb_object_size(tree->odb_obj); + buffer = data; + buffer_end = buffer + size; + tree->odb_obj = NULL; git_array_init_to_size(tree->entries, DEFAULT_TREE_SIZE); GITERR_CHECK_ARRAY(tree->entries); @@ -426,6 +424,21 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj) return 0; } +int git_tree__parse(void *_tree, git_odb_object *odb_obj) +{ + git_tree *tree = _tree; + + if ((git_tree__parse_raw(tree, + git_odb_object_data(odb_obj), + git_odb_object_size(odb_obj))) < 0) + return -1; + + if (git_odb_object_dup(&tree->odb_obj, odb_obj) < 0) + return -1; + + return 0; +} + static size_t find_next_dir(const char *dirname, git_index *index, size_t start) { size_t dirlen, i, entries = git_index_entrycount(index); diff --git a/src/tree.h b/src/tree.h index fbee5efe1..973ba15d0 100644 --- a/src/tree.h +++ b/src/tree.h @@ -41,6 +41,7 @@ GIT_INLINE(bool) git_tree_entry__is_tree(const struct git_tree_entry *e) void git_tree__free(void *tree); int git_tree__parse(void *tree, git_odb_object *obj); +int git_tree__parse_raw(void *_tree, const char *data, size_t size); /** * Write a tree to the given repository diff --git a/tests/pack/indexer.c b/tests/pack/indexer.c index 453399809..3a5021223 100644 --- a/tests/pack/indexer.c +++ b/tests/pack/indexer.c @@ -74,6 +74,24 @@ static const unsigned char leaky_pack[] = { }; static const unsigned int leaky_pack_len = 33; +/* + * Packfile with a three objects. The first one is a tree referencing two blobs, + * the second object is one of those blobs. The second blob is missing. + */ +unsigned char incomplete_pack[] = { + 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, + 0xae, 0x03, 0x78, 0x9c, 0x33, 0x34, 0x30, 0x30, 0x33, 0x31, 0x51, 0x48, + 0x4a, 0x2c, 0x62, 0x08, 0x17, 0x3b, 0x15, 0xd9, 0x7e, 0xfa, 0x67, 0x6d, + 0xf6, 0x56, 0x4f, 0x85, 0x7d, 0xcb, 0xd6, 0xde, 0x53, 0xd1, 0x6d, 0x7f, + 0x66, 0x08, 0x91, 0x4e, 0xcb, 0xcf, 0x67, 0x50, 0xad, 0x39, 0x9a, 0xa2, + 0xb3, 0x71, 0x41, 0xc8, 0x87, 0x9e, 0x13, 0xf6, 0xba, 0x53, 0xec, 0xc2, + 0xfe, 0xda, 0xed, 0x9b, 0x09, 0x00, 0xe8, 0xc8, 0x19, 0xab, 0x34, 0x78, + 0x9c, 0x4b, 0x4a, 0x2c, 0xe2, 0x02, 0x00, 0x03, 0x9d, 0x01, 0x40, 0x4b, + 0x72, 0xa2, 0x6f, 0xb6, 0x88, 0x2d, 0x6c, 0xa5, 0x07, 0xb2, 0xa5, 0x45, + 0xe8, 0xdb, 0xe6, 0x53, 0xb3, 0x52, 0xe2 +}; +unsigned int incomplete_pack_len = 115; + static const unsigned char base_obj[] = { 07, 076 }; static const unsigned int base_obj_len = 2; @@ -82,7 +100,7 @@ void test_pack_indexer__out_of_order(void) git_indexer *idx = 0; git_transfer_progress stats = { 0 }; - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); cl_git_pass(git_indexer_append( idx, out_of_order_pack, out_of_order_pack_len, &stats)); cl_git_pass(git_indexer_commit(idx, &stats)); @@ -99,7 +117,7 @@ void test_pack_indexer__missing_trailer(void) git_indexer *idx = 0; git_transfer_progress stats = { 0 }; - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); cl_git_pass(git_indexer_append( idx, missing_trailer_pack, missing_trailer_pack_len, &stats)); cl_git_fail(git_indexer_commit(idx, &stats)); @@ -115,7 +133,7 @@ void test_pack_indexer__leaky(void) git_indexer *idx = 0; git_transfer_progress stats = { 0 }; - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); cl_git_pass(git_indexer_append( idx, leaky_pack, leaky_pack_len, &stats)); cl_git_fail(git_indexer_commit(idx, &stats)); @@ -142,7 +160,7 @@ void test_pack_indexer__fix_thin(void) git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18"); cl_assert_equal_oid(&should_id, &id); - cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL)); cl_git_pass(git_indexer_append(idx, thin_pack, thin_pack_len, &stats)); cl_git_pass(git_indexer_commit(idx, &stats)); @@ -175,7 +193,7 @@ void test_pack_indexer__fix_thin(void) cl_git_pass(p_stat(name, &st)); - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); read = p_read(fd, buffer, sizeof(buffer)); cl_assert(read != -1); p_close(fd); @@ -208,7 +226,7 @@ void test_pack_indexer__corrupt_length(void) git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18"); cl_assert_equal_oid(&should_id, &id); - cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL)); cl_git_pass(git_indexer_append( idx, corrupt_thin_pack, corrupt_thin_pack_len, &stats)); cl_git_fail(git_indexer_commit(idx, &stats)); @@ -221,6 +239,46 @@ void test_pack_indexer__corrupt_length(void) git_repository_free(repo); } +void test_pack_indexer__incomplete_pack_fails_with_strict(void) +{ + git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; + git_indexer *idx = 0; + git_transfer_progress stats = { 0 }; + + opts.verify = 1; + + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, &opts)); + cl_git_pass(git_indexer_append( + idx, incomplete_pack, incomplete_pack_len, &stats)); + cl_git_fail(git_indexer_commit(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 2); + cl_assert_equal_i(stats.received_objects, 2); + cl_assert_equal_i(stats.indexed_objects, 2); + + git_indexer_free(idx); +} + +void test_pack_indexer__out_of_order_with_connectivity_checks(void) +{ + git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; + git_indexer *idx = 0; + git_transfer_progress stats = { 0 }; + + opts.verify = 1; + + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, &opts)); + cl_git_pass(git_indexer_append( + idx, out_of_order_pack, out_of_order_pack_len, &stats)); + cl_git_pass(git_indexer_commit(idx, &stats)); + + cl_assert_equal_i(stats.total_objects, 3); + cl_assert_equal_i(stats.received_objects, 3); + cl_assert_equal_i(stats.indexed_objects, 3); + + git_indexer_free(idx); +} + static int find_tmp_file_recurs(void *opaque, git_buf *path) { int error = 0; @@ -252,7 +310,7 @@ void test_pack_indexer__no_tmp_files(void) git_buf_dispose(&path); cl_assert(git_buf_len(&first_tmp_file) == 0); - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); git_indexer_free(idx); cl_git_pass(git_buf_sets(&path, clar_sandbox_path())); diff --git a/tests/pack/packbuilder.c b/tests/pack/packbuilder.c index 932cb3861..bd2cebed2 100644 --- a/tests/pack/packbuilder.c +++ b/tests/pack/packbuilder.c @@ -100,7 +100,7 @@ void test_pack_packbuilder__create_pack(void) seed_packbuilder(); - cl_git_pass(git_indexer_new(&_indexer, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&_indexer, ".", 0, NULL, NULL)); cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats)); cl_git_pass(git_indexer_commit(_indexer, &stats)); @@ -237,7 +237,7 @@ void test_pack_packbuilder__foreach(void) git_indexer *idx; seed_packbuilder(); - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); cl_git_pass(git_packbuilder_foreach(_packbuilder, foreach_cb, idx)); cl_git_pass(git_indexer_commit(idx, &_stats)); git_indexer_free(idx); @@ -255,7 +255,7 @@ void test_pack_packbuilder__foreach_with_cancel(void) git_indexer *idx; seed_packbuilder(); - cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL)); + cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL)); cl_git_fail_with( git_packbuilder_foreach(_packbuilder, foreach_cancel_cb, idx), -1111); git_indexer_free(idx); |