summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2018-08-26 11:26:45 +0100
committerGitHub <noreply@github.com>2018-08-26 11:26:45 +0100
commit50186ce88bcca5d7a23d6365acb3e9a818474f6f (patch)
tree3d8d98da06557dc2a3bf759943e038bc69a26b4e
parent8856337b35ad417db9fa5604f76086528cb0436b (diff)
parent261267e0f1d36435e3832c4988cb2298b68dc7c2 (diff)
downloadlibgit2-50186ce88bcca5d7a23d6365acb3e9a818474f6f.tar.gz
Merge pull request #4374 from pks-t/pks/pack-file-verify
Pack file verification
-rw-r--r--examples/network/index-pack.c2
-rw-r--r--include/git2/indexer.h34
-rw-r--r--src/blob.c40
-rw-r--r--src/blob.h11
-rw-r--r--src/commit.c13
-rw-r--r--src/commit.h1
-rw-r--r--src/indexer.c348
-rw-r--r--src/object.c65
-rw-r--r--src/object.h11
-rw-r--r--src/odb_pack.c6
-rw-r--r--src/pack-objects.c32
-rw-r--r--src/pack-objects.h6
-rw-r--r--src/tag.c5
-rw-r--r--src/tag.h1
-rw-r--r--src/tree.c25
-rw-r--r--src/tree.h1
-rw-r--r--tests/pack/indexer.c72
-rw-r--r--tests/pack/packbuilder.c6
18 files changed, 529 insertions, 150 deletions
diff --git a/examples/network/index-pack.c b/examples/network/index-pack.c
index e9261027c..128c7ebf5 100644
--- a/examples/network/index-pack.c
+++ b/examples/network/index-pack.c
@@ -48,7 +48,7 @@ int index_pack(git_repository *repo, int argc, char **argv)
return EXIT_FAILURE;
}
- if (git_indexer_new(&idx, ".", 0, NULL, NULL, NULL) < 0) {
+ if (git_indexer_new(&idx, ".", 0, NULL, NULL) < 0) {
puts("bad idx");
return -1;
}
diff --git a/include/git2/indexer.h b/include/git2/indexer.h
index d2d315e47..94d8785c0 100644
--- a/include/git2/indexer.h
+++ b/include/git2/indexer.h
@@ -15,6 +15,33 @@ GIT_BEGIN_DECL
typedef struct git_indexer git_indexer;
+typedef struct git_indexer_options {
+ unsigned int version;
+
+ /** progress_cb function to call with progress information */
+ git_transfer_progress_cb progress_cb;
+ /** progress_cb_payload payload for the progress callback */
+ void *progress_cb_payload;
+
+ /** Do connectivity checks for the received pack */
+ unsigned char verify;
+} git_indexer_options;
+
+#define GIT_INDEXER_OPTIONS_VERSION 1
+#define GIT_INDEXER_OPTIONS_INIT { GIT_INDEXER_OPTIONS_VERSION }
+
+/**
+ * Initializes a `git_indexer_options` with default values. Equivalent to
+ * creating an instance with GIT_INDEXER_OPTIONS_INIT.
+ *
+ * @param opts the `git_indexer_options` struct to initialize.
+ * @param version Version of struct; pass `GIT_INDEXER_OPTIONS_VERSION`
+ * @return Zero on success; -1 on failure.
+ */
+GIT_EXTERN(int) git_indexer_init_options(
+ git_indexer_options *opts,
+ unsigned int version);
+
/**
* Create a new indexer instance
*
@@ -24,16 +51,15 @@ typedef struct git_indexer git_indexer;
* @param odb object database from which to read base objects when
* fixing thin packs. Pass NULL if no thin pack is expected (an error
* will be returned if there are bases missing)
- * @param progress_cb function to call with progress information
- * @param progress_cb_payload payload for the progress callback
+ * @param opts Optional structure containing additional options. See
+ * `git_indexer_options` above.
*/
GIT_EXTERN(int) git_indexer_new(
git_indexer **out,
const char *path,
unsigned int mode,
git_odb *odb,
- git_transfer_progress_cb progress_cb,
- void *progress_cb_payload);
+ git_indexer_options *opts);
/**
* Add data to the indexer
diff --git a/src/blob.c b/src/blob.c
index 86ec95c48..bcd3f41e1 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -19,34 +19,54 @@
const void *git_blob_rawcontent(const git_blob *blob)
{
assert(blob);
- return git_odb_object_data(blob->odb_object);
+ if (blob->raw)
+ return blob->data.raw.data;
+ else
+ return git_odb_object_data(blob->data.odb);
}
git_off_t git_blob_rawsize(const git_blob *blob)
{
assert(blob);
- return (git_off_t)git_odb_object_size(blob->odb_object);
+ if (blob->raw)
+ return blob->data.raw.size;
+ else
+ return (git_off_t)git_odb_object_size(blob->data.odb);
}
int git_blob__getbuf(git_buf *buffer, git_blob *blob)
{
return git_buf_set(
buffer,
- git_odb_object_data(blob->odb_object),
- git_odb_object_size(blob->odb_object));
+ git_blob_rawcontent(blob),
+ git_blob_rawsize(blob));
}
-void git_blob__free(void *blob)
+void git_blob__free(void *_blob)
{
- git_odb_object_free(((git_blob *)blob)->odb_object);
+ git_blob *blob = (git_blob *) _blob;
+ if (!blob->raw)
+ git_odb_object_free(blob->data.odb);
git__free(blob);
}
-int git_blob__parse(void *blob, git_odb_object *odb_obj)
+int git_blob__parse_raw(void *_blob, const char *data, size_t size)
{
+ git_blob *blob = (git_blob *) _blob;
+ assert(blob);
+ blob->raw = 1;
+ blob->data.raw.data = data;
+ blob->data.raw.size = size;
+ return 0;
+}
+
+int git_blob__parse(void *_blob, git_odb_object *odb_obj)
+{
+ git_blob *blob = (git_blob *) _blob;
assert(blob);
git_cached_obj_incref((git_cached_obj *)odb_obj);
- ((git_blob *)blob)->odb_object = odb_obj;
+ blob->raw = 0;
+ blob->data.odb = odb_obj;
return 0;
}
@@ -372,8 +392,8 @@ int git_blob_is_binary(const git_blob *blob)
assert(blob);
- git_buf_attach_notowned(&content, blob->odb_object->buffer,
- min(blob->odb_object->cached.size,
+ git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
+ min(git_blob_rawsize(blob),
GIT_FILTER_BYTES_TO_CHECK_NUL));
return git_buf_text_is_binary(&content);
}
diff --git a/src/blob.h b/src/blob.h
index 3f1f97719..f644ec583 100644
--- a/src/blob.h
+++ b/src/blob.h
@@ -16,11 +16,20 @@
struct git_blob {
git_object object;
- git_odb_object *odb_object;
+
+ union {
+ git_odb_object *odb;
+ struct {
+ const char *data;
+ git_off_t size;
+ } raw;
+ } data;
+ unsigned int raw:1;
};
void git_blob__free(void *blob);
int git_blob__parse(void *blob, git_odb_object *obj);
+int git_blob__parse_raw(void *blob, const char *data, size_t size);
int git_blob__getbuf(git_buf *buffer, git_blob *blob);
extern int git_blob__create_from_paths(
diff --git a/src/commit.c b/src/commit.c
index e0ba51d47..97ac2a189 100644
--- a/src/commit.c
+++ b/src/commit.c
@@ -383,11 +383,11 @@ int git_commit_amend(
return error;
}
-int git_commit__parse(void *_commit, git_odb_object *odb_obj)
+int git_commit__parse_raw(void *_commit, const char *data, size_t size)
{
git_commit *commit = _commit;
- const char *buffer_start = git_odb_object_data(odb_obj), *buffer;
- const char *buffer_end = buffer_start + git_odb_object_size(odb_obj);
+ const char *buffer_start = data, *buffer;
+ const char *buffer_end = buffer_start + size;
git_oid parent_id;
size_t header_len;
git_signature dummy_sig;
@@ -477,6 +477,13 @@ bad_buffer:
return -1;
}
+int git_commit__parse(void *_commit, git_odb_object *odb_obj)
+{
+ return git_commit__parse_raw(_commit,
+ git_odb_object_data(odb_obj),
+ git_odb_object_size(odb_obj));
+}
+
#define GIT_COMMIT_GETTER(_rvalue, _name, _return) \
_rvalue git_commit_##_name(const git_commit *commit) \
{\
diff --git a/src/commit.h b/src/commit.h
index 781809d70..9137a8fad 100644
--- a/src/commit.h
+++ b/src/commit.h
@@ -35,5 +35,6 @@ struct git_commit {
void git_commit__free(void *commit);
int git_commit__parse(void *commit, git_odb_object *obj);
+int git_commit__parse_raw(void *commit, const char *data, size_t size);
#endif
diff --git a/src/indexer.c b/src/indexer.c
index 4f0c5ca5b..a5a4eb18d 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -10,6 +10,9 @@
#include "git2/indexer.h"
#include "git2/object.h"
+#include "commit.h"
+#include "tree.h"
+#include "tag.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
@@ -38,12 +41,15 @@ struct git_indexer {
pack_committed :1,
have_stream :1,
have_delta :1,
- do_fsync :1;
+ do_fsync :1,
+ do_verify :1;
struct git_pack_header hdr;
struct git_pack_file *pack;
unsigned int mode;
git_off_t off;
git_off_t entry_start;
+ git_otype entry_type;
+ git_buf entry_data;
git_packfile_stream stream;
size_t nr_objects;
git_vector objects;
@@ -55,6 +61,9 @@ struct git_indexer {
void *progress_payload;
char objbuf[8*1024];
+ /* OIDs referenced from pack objects. Used for verification. */
+ git_oidmap *expected_oids;
+
/* Needed to look up objects which we want to inject to fix a thin pack */
git_odb *odb;
@@ -106,27 +115,42 @@ static int objects_cmp(const void *a, const void *b)
return git_oid__cmp(&entrya->oid, &entryb->oid);
}
+int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
+{
+ GIT_INIT_STRUCTURE_FROM_TEMPLATE(
+ opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
+ return 0;
+}
+
int git_indexer_new(
git_indexer **out,
const char *prefix,
unsigned int mode,
git_odb *odb,
- git_transfer_progress_cb progress_cb,
- void *progress_payload)
+ git_indexer_options *in_opts)
{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
git_indexer *idx;
git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
static const char suff[] = "/pack";
int error, fd = -1;
+ if (in_opts)
+ memcpy(&opts, in_opts, sizeof(opts));
+
idx = git__calloc(1, sizeof(git_indexer));
GITERR_CHECK_ALLOC(idx);
idx->odb = odb;
- idx->progress_cb = progress_cb;
- idx->progress_payload = progress_payload;
+ idx->progress_cb = opts.progress_cb;
+ idx->progress_payload = opts.progress_cb_payload;
idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
git_hash_ctx_init(&idx->hash_ctx);
git_hash_ctx_init(&idx->trailer);
+ git_buf_init(&idx->entry_data, 0);
+ idx->expected_oids = git_oidmap_alloc();
+ GITERR_CHECK_ALLOC(idx->expected_oids);
+
+ idx->do_verify = opts.verify;
if (git_repository__fsync_gitdir)
idx->do_fsync = 1;
@@ -212,6 +236,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
break;
+ if (idx->do_verify)
+ git_buf_put(&idx->entry_data, idx->objbuf, read);
+
git_hash_update(&idx->hash_ctx, idx->objbuf, read);
} while (read > 0);
@@ -281,6 +308,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
return 0;
}
+static void add_expected_oid(git_indexer *idx, const git_oid *oid)
+{
+ int ret;
+
+ /*
+ * If we know about that object because it is stored in our ODB or
+ * because we have already processed it as part of our pack file, we do
+ * not have to expect it.
+ */
+ if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
+ !git_oidmap_exists(idx->pack->idx_cache, oid) &&
+ !git_oidmap_exists(idx->expected_oids, oid)) {
+ git_oid *dup = git__malloc(sizeof(*oid));
+ git_oid_cpy(dup, oid);
+ git_oidmap_put(idx->expected_oids, dup, &ret);
+ }
+}
+
+static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
+{
+ git_object *object;
+ size_t keyidx;
+ int error;
+
+ if (obj->type != GIT_OBJ_BLOB &&
+ obj->type != GIT_OBJ_TREE &&
+ obj->type != GIT_OBJ_COMMIT &&
+ obj->type != GIT_OBJ_TAG)
+ return 0;
+
+ if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0)
+ goto out;
+
+ keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid);
+ if (git_oidmap_valid_index(idx->expected_oids, keyidx)) {
+ const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx);
+ git__free((git_oid *) key);
+ git_oidmap_delete_at(idx->expected_oids, keyidx);
+ }
+
+ /*
+ * Check whether this is a known object. If so, we can just continue as
+ * we assume that the ODB has a complete graph.
+ */
+ if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
+ return 0;
+
+ switch (obj->type) {
+ case GIT_OBJ_TREE:
+ {
+ git_tree *tree = (git_tree *) object;
+ git_tree_entry *entry;
+ size_t i;
+
+ git_array_foreach(tree->entries, i, entry)
+ add_expected_oid(idx, entry->oid);
+
+ break;
+ }
+ case GIT_OBJ_COMMIT:
+ {
+ git_commit *commit = (git_commit *) object;
+ git_oid *parent_oid;
+ size_t i;
+
+ git_array_foreach(commit->parent_ids, i, parent_oid)
+ add_expected_oid(idx, parent_oid);
+
+ add_expected_oid(idx, &commit->tree_id);
+
+ break;
+ }
+ case GIT_OBJ_TAG:
+ {
+ git_tag *tag = (git_tag *) object;
+
+ add_expected_oid(idx, &tag->target);
+
+ break;
+ }
+ case GIT_OBJ_BLOB:
+ default:
+ break;
+ }
+
+out:
+ git_object_free(object);
+
+ return error;
+}
+
static int store_object(git_indexer *idx)
{
int i, error;
@@ -306,6 +424,17 @@ static int store_object(git_indexer *idx)
entry->offset = (uint32_t)entry_start;
}
+ if (idx->do_verify) {
+ git_rawobj rawobj = {
+ idx->entry_data.ptr,
+ idx->entry_data.size,
+ idx->entry_type
+ };
+
+ if ((error = check_object_connectivity(idx, &rawobj)) < 0)
+ goto on_error;
+ }
+
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
@@ -527,17 +656,103 @@ static int append_to_pack(git_indexer *idx, const void *data, size_t size)
return write_at(idx, data, idx->pack->mwf.size, size);
}
+static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
+{
+ git_packfile_stream *stream = &idx->stream;
+ git_off_t entry_start = idx->off;
+ size_t entry_size;
+ git_otype type;
+ git_mwindow *w = NULL;
+ int error;
+
+ if (idx->pack->mwf.size <= idx->off + 20)
+ return GIT_EBUFS;
+
+ if (!idx->have_stream) {
+ error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return error;
+ }
+ if (error < 0)
+ return error;
+
+ git_mwindow_close(&w);
+ idx->entry_start = entry_start;
+ git_hash_init(&idx->hash_ctx);
+ git_buf_clear(&idx->entry_data);
+
+ if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
+ error = advance_delta_offset(idx, type);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return error;
+ }
+ if (error < 0)
+ return error;
+
+ idx->have_delta = 1;
+ } else {
+ idx->have_delta = 0;
+
+ error = hash_header(&idx->hash_ctx, entry_size, type);
+ if (error < 0)
+ return error;
+ }
+
+ idx->have_stream = 1;
+ idx->entry_type = type;
+
+ error = git_packfile_stream_open(stream, idx->pack, idx->off);
+ if (error < 0)
+ return error;
+ }
+
+ if (idx->have_delta) {
+ error = read_object_stream(idx, stream);
+ } else {
+ error = hash_object_stream(idx, stream);
+ }
+
+ idx->off = stream->curpos;
+ if (error == GIT_EBUFS)
+ return error;
+
+ /* We want to free the stream reasorces no matter what here */
+ idx->have_stream = 0;
+ git_packfile_stream_dispose(stream);
+
+ if (error < 0)
+ return error;
+
+ if (idx->have_delta) {
+ error = store_delta(idx);
+ } else {
+ error = store_object(idx);
+ }
+
+ if (error < 0)
+ return error;
+
+ if (!idx->have_delta) {
+ stats->indexed_objects++;
+ }
+ stats->received_objects++;
+
+ if ((error = do_progress_callback(idx, stats)) != 0)
+ return error;
+
+ return 0;
+}
+
int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
{
int error = -1;
- size_t processed;
struct git_pack_header *hdr = &idx->hdr;
git_mwindow_file *mwf = &idx->pack->mwf;
assert(idx && data && stats);
- processed = stats->indexed_objects;
-
if ((error = append_to_pack(idx, data, size)) < 0)
return error;
@@ -580,7 +795,7 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran
stats->local_objects = 0;
stats->total_deltas = 0;
stats->indexed_deltas = 0;
- processed = stats->indexed_objects = 0;
+ stats->indexed_objects = 0;
stats->total_objects = total_objects;
if ((error = do_progress_callback(idx, stats)) != 0)
@@ -592,87 +807,13 @@ int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_tran
/* As the file grows any windows we try to use will be out of date */
git_mwindow_free_all(mwf);
- while (processed < idx->nr_objects) {
- git_packfile_stream *stream = &idx->stream;
- git_off_t entry_start = idx->off;
- size_t entry_size;
- git_otype type;
- git_mwindow *w = NULL;
-
- if (idx->pack->mwf.size <= idx->off + 20)
- return 0;
-
- if (!idx->have_stream) {
- error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- goto on_error;
-
- git_mwindow_close(&w);
- idx->entry_start = entry_start;
- git_hash_init(&idx->hash_ctx);
-
- if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
- error = advance_delta_offset(idx, type);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- goto on_error;
-
- idx->have_delta = 1;
- } else {
- idx->have_delta = 0;
-
- error = hash_header(&idx->hash_ctx, entry_size, type);
- if (error < 0)
- goto on_error;
- }
-
- idx->have_stream = 1;
-
- error = git_packfile_stream_open(stream, idx->pack, idx->off);
- if (error < 0)
+ while (stats->indexed_objects < idx->nr_objects) {
+ if ((error = read_stream_object(idx, stats)) != 0) {
+ if (error == GIT_EBUFS)
+ break;
+ else
goto on_error;
}
-
- if (idx->have_delta) {
- error = read_object_stream(idx, stream);
- } else {
- error = hash_object_stream(idx, stream);
- }
-
- idx->off = stream->curpos;
- if (error == GIT_EBUFS)
- return 0;
-
- /* We want to free the stream reasorces no matter what here */
- idx->have_stream = 0;
- git_packfile_stream_dispose(stream);
-
- if (error < 0)
- goto on_error;
-
- if (idx->have_delta) {
- error = store_delta(idx);
- } else {
- error = store_object(idx);
- }
-
- if (error < 0)
- goto on_error;
-
- if (!idx->have_delta) {
- stats->indexed_objects = (unsigned int)++processed;
- }
- stats->received_objects++;
-
- if ((error = do_progress_callback(idx, stats)) != 0)
- goto on_error;
}
return 0;
@@ -861,7 +1002,7 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
progressed = 0;
non_null = 0;
git_vector_foreach(&idx->deltas, i, delta) {
- git_rawobj obj = {NULL};
+ git_rawobj obj = {0};
if (!delta)
continue;
@@ -876,6 +1017,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
return -1;
}
+ if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
+ /* TODO: error? continue? */
+ continue;
+
if (hash_and_save(idx, &obj, delta->delta_off) < 0)
continue;
@@ -1006,6 +1151,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
}
+ /*
+ * Is the resulting graph fully connected or are we still
+ * missing some objects? In the second case, we can
+ * bail out due to an incomplete and thus corrupt
+ * packfile.
+ */
+ if (git_oidmap_size(idx->expected_oids) > 0) {
+ giterr_set(GITERR_INDEXER, "packfile is missing %"PRIuZ" objects",
+ git_oidmap_size(idx->expected_oids));
+ return -1;
+ }
+
git_vector_sort(&idx->objects);
/* Use the trailer hash as the pack file name to ensure
@@ -1135,6 +1292,8 @@ on_error:
void git_indexer_free(git_indexer *idx)
{
+ khiter_t pos;
+
if (idx == NULL)
return;
@@ -1162,7 +1321,18 @@ void git_indexer_free(git_indexer *idx)
git_mutex_unlock(&git__mwindow_mutex);
}
+ for (pos = git_oidmap_begin(idx->expected_oids);
+ pos != git_oidmap_end(idx->expected_oids); pos++)
+ {
+ if (git_oidmap_has_data(idx->expected_oids, pos)) {
+ git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos));
+ git_oidmap_delete_at(idx->expected_oids, pos);
+ }
+ }
+
git_hash_ctx_cleanup(&idx->trailer);
git_hash_ctx_cleanup(&idx->hash_ctx);
+ git_buf_dispose(&idx->entry_data);
+ git_oidmap_free(idx->expected_oids);
git__free(idx);
}
diff --git a/src/object.c b/src/object.c
index 48f561384..c1f3ea919 100644
--- a/src/object.c
+++ b/src/object.c
@@ -12,6 +12,7 @@
#include "repository.h"
#include "commit.h"
+#include "hash.h"
#include "tree.h"
#include "blob.h"
#include "oid.h"
@@ -19,38 +20,86 @@
bool git_object__strict_input_validation = true;
+extern int git_odb_hash(git_oid *out, const void *data, size_t len, git_otype type);
+
typedef struct {
const char *str; /* type name string */
size_t size; /* size in bytes of the object structure */
int (*parse)(void *self, git_odb_object *obj);
+ int (*parse_raw)(void *self, const char *data, size_t size);
void (*free)(void *self);
} git_object_def;
static git_object_def git_objects_table[] = {
/* 0 = GIT_OBJ__EXT1 */
- { "", 0, NULL, NULL },
+ { "", 0, NULL, NULL, NULL },
/* 1 = GIT_OBJ_COMMIT */
- { "commit", sizeof(git_commit), git_commit__parse, git_commit__free },
+ { "commit", sizeof(git_commit), git_commit__parse, git_commit__parse_raw, git_commit__free },
/* 2 = GIT_OBJ_TREE */
- { "tree", sizeof(git_tree), git_tree__parse, git_tree__free },
+ { "tree", sizeof(git_tree), git_tree__parse, git_tree__parse_raw, git_tree__free },
/* 3 = GIT_OBJ_BLOB */
- { "blob", sizeof(git_blob), git_blob__parse, git_blob__free },
+ { "blob", sizeof(git_blob), git_blob__parse, git_blob__parse_raw, git_blob__free },
/* 4 = GIT_OBJ_TAG */
- { "tag", sizeof(git_tag), git_tag__parse, git_tag__free },
+ { "tag", sizeof(git_tag), git_tag__parse, git_tag__parse_raw, git_tag__free },
/* 5 = GIT_OBJ__EXT2 */
- { "", 0, NULL, NULL },
+ { "", 0, NULL, NULL, NULL },
/* 6 = GIT_OBJ_OFS_DELTA */
- { "OFS_DELTA", 0, NULL, NULL },
+ { "OFS_DELTA", 0, NULL, NULL, NULL },
/* 7 = GIT_OBJ_REF_DELTA */
- { "REF_DELTA", 0, NULL, NULL },
+ { "REF_DELTA", 0, NULL, NULL, NULL },
};
+int git_object__from_raw(
+ git_object **object_out,
+ const char *data,
+ size_t size,
+ git_otype type)
+{
+ git_object_def *def;
+ git_object *object;
+ size_t object_size;
+ int error;
+
+ assert(object_out);
+ *object_out = NULL;
+
+ /* Validate type match */
+ if (type != GIT_OBJ_BLOB && type != GIT_OBJ_TREE && type != GIT_OBJ_COMMIT && type != GIT_OBJ_TAG) {
+ giterr_set(GITERR_INVALID, "the requested type is invalid");
+ return GIT_ENOTFOUND;
+ }
+
+ if ((object_size = git_object__size(type)) == 0) {
+ giterr_set(GITERR_INVALID, "the requested type is invalid");
+ return GIT_ENOTFOUND;
+ }
+
+ /* Allocate and initialize base object */
+ object = git__calloc(1, object_size);
+ GITERR_CHECK_ALLOC(object);
+ object->cached.flags = GIT_CACHE_STORE_PARSED;
+ object->cached.type = type;
+ git_odb_hash(&object->cached.oid, data, size, type);
+
+ /* Parse raw object data */
+ def = &git_objects_table[type];
+ assert(def->free && def->parse_raw);
+
+ if ((error = def->parse_raw(object, data, size)) < 0)
+ def->free(object);
+
+ git_cached_obj_incref(object);
+ *object_out = object;
+
+ return 0;
+}
+
int git_object__from_odb_object(
git_object **object_out,
git_repository *repo,
diff --git a/src/object.h b/src/object.h
index e46c9cafa..f5cbbf763 100644
--- a/src/object.h
+++ b/src/object.h
@@ -22,6 +22,17 @@ struct git_object {
/* fully free the object; internal method, DO NOT EXPORT */
void git_object__free(void *object);
+/*
+ * Parse object from raw data. Note that the resulting object is
+ * tied to the lifetime of the data, as some objects simply point
+ * into it.
+ */
+int git_object__from_raw(
+ git_object **object_out,
+ const char *data,
+ size_t size,
+ git_otype type);
+
int git_object__from_odb_object(
git_object **object_out,
git_repository *repo,
diff --git a/src/odb_pack.c b/src/odb_pack.c
index 2c3048034..2892aa1e2 100644
--- a/src/odb_pack.c
+++ b/src/odb_pack.c
@@ -519,6 +519,7 @@ static int pack_backend__writepack(struct git_odb_writepack **out,
git_transfer_progress_cb progress_cb,
void *progress_payload)
{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
struct pack_backend *backend;
struct pack_writepack *writepack;
@@ -526,13 +527,16 @@ static int pack_backend__writepack(struct git_odb_writepack **out,
*out = NULL;
+ opts.progress_cb = progress_cb;
+ opts.progress_cb_payload = progress_payload;
+
backend = (struct pack_backend *)_backend;
writepack = git__calloc(1, sizeof(struct pack_writepack));
GITERR_CHECK_ALLOC(writepack);
if (git_indexer_new(&writepack->indexer,
- backend->pack_folder, 0, odb, progress_cb, progress_payload) < 0) {
+ backend->pack_folder, 0, odb, &opts) < 0) {
git__free(writepack);
return -1;
}
diff --git a/src/pack-objects.c b/src/pack-objects.c
index bc5fb2e1f..2b786df33 100644
--- a/src/pack-objects.c
+++ b/src/pack-objects.c
@@ -41,6 +41,12 @@ struct pack_write_context {
git_transfer_progress *stats;
};
+struct walk_object {
+ git_oid id;
+ unsigned int uninteresting:1,
+ seen:1;
+};
+
#ifdef GIT_THREADS
#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
@@ -143,7 +149,7 @@ int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
if (!pb->walk_objects)
goto on_error;
- git_pool_init(&pb->object_pool, sizeof(git_walk_object));
+ git_pool_init(&pb->object_pool, sizeof(struct walk_object));
pb->repo = repo;
pb->nr_threads = 1; /* do not spawn any thread by default */
@@ -1382,6 +1388,7 @@ int git_packbuilder_write(
git_transfer_progress_cb progress_cb,
void *progress_cb_payload)
{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
git_indexer *indexer;
git_transfer_progress stats;
struct pack_write_context ctx;
@@ -1389,8 +1396,11 @@ int git_packbuilder_write(
PREPARE_PACK;
+ opts.progress_cb = progress_cb;
+ opts.progress_cb_payload = progress_cb_payload;
+
if (git_indexer_new(
- &indexer, path, mode, pb->odb, progress_cb, progress_cb_payload) < 0)
+ &indexer, path, mode, pb->odb, &opts) < 0)
return -1;
if (!git_repository__cvar(&t, pb->repo, GIT_CVAR_FSYNCOBJECTFILES) && t)
@@ -1513,9 +1523,9 @@ size_t git_packbuilder_written(git_packbuilder *pb)
return pb->nr_written;
}
-int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
+static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
{
- git_walk_object *obj;
+ struct walk_object *obj;
obj = git_pool_mallocz(&pb->object_pool, 1);
if (!obj) {
@@ -1529,11 +1539,11 @@ int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid
return 0;
}
-static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
+static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
{
int error;
khiter_t pos;
- git_walk_object *obj;
+ struct walk_object *obj;
pos = git_oidmap_lookup_index(pb->walk_objects, id);
if (git_oidmap_valid_index(pb->walk_objects, pos)) {
@@ -1552,7 +1562,7 @@ static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git
static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
{
int error;
- git_walk_object *obj;
+ struct walk_object *obj;
if ((error = retrieve_object(&obj, pb, id)) < 0)
return error;
@@ -1564,7 +1574,7 @@ static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
{
- git_walk_object *obj;
+ struct walk_object *obj;
git_tree *tree;
int error;
size_t i;
@@ -1636,7 +1646,7 @@ int insert_tree(git_packbuilder *pb, git_tree *tree)
size_t i;
int error;
git_tree *subtree;
- git_walk_object *obj;
+ struct walk_object *obj;
const char *name;
if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
@@ -1684,7 +1694,7 @@ int insert_tree(git_packbuilder *pb, git_tree *tree)
return error;
}
-int insert_commit(git_packbuilder *pb, git_walk_object *obj)
+int insert_commit(git_packbuilder *pb, struct walk_object *obj)
{
int error;
git_commit *commit = NULL;
@@ -1714,7 +1724,7 @@ int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
{
int error;
git_oid id;
- git_walk_object *obj;
+ struct walk_object *obj;
assert(pb && walk);
diff --git a/src/pack-objects.h b/src/pack-objects.h
index c9cd5777a..a931f3f86 100644
--- a/src/pack-objects.h
+++ b/src/pack-objects.h
@@ -52,12 +52,6 @@ typedef struct git_pobject {
filled:1;
} git_pobject;
-typedef struct {
- git_oid id;
- unsigned int uninteresting:1,
- seen:1;
-} git_walk_object;
-
struct git_packbuilder {
git_repository *repo; /* associated repository */
git_odb *odb; /* associated object database */
diff --git a/src/tag.c b/src/tag.c
index a7c55d0aa..663c7dabd 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -159,6 +159,11 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end)
return 0;
}
+int git_tag__parse_raw(void *_tag, const char *data, size_t size)
+{
+ return tag_parse(_tag, data, data + size);
+}
+
int git_tag__parse(void *_tag, git_odb_object *odb_obj)
{
git_tag *tag = _tag;
diff --git a/src/tag.h b/src/tag.h
index 8aae37840..734770abd 100644
--- a/src/tag.h
+++ b/src/tag.h
@@ -26,5 +26,6 @@ struct git_tag {
void git_tag__free(void *tag);
int git_tag__parse(void *tag, git_odb_object *obj);
+int git_tag__parse_raw(void *tag, const char *data, size_t size);
#endif
diff --git a/src/tree.c b/src/tree.c
index be0f528c2..823385722 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -375,18 +375,16 @@ static int parse_mode(unsigned int *modep, const char *buffer, const char **buff
return 0;
}
-int git_tree__parse(void *_tree, git_odb_object *odb_obj)
+int git_tree__parse_raw(void *_tree, const char *data, size_t size)
{
git_tree *tree = _tree;
const char *buffer;
const char *buffer_end;
- if (git_odb_object_dup(&tree->odb_obj, odb_obj) < 0)
- return -1;
-
- buffer = git_odb_object_data(tree->odb_obj);
- buffer_end = buffer + git_odb_object_size(tree->odb_obj);
+ buffer = data;
+ buffer_end = buffer + size;
+ tree->odb_obj = NULL;
git_array_init_to_size(tree->entries, DEFAULT_TREE_SIZE);
GITERR_CHECK_ARRAY(tree->entries);
@@ -426,6 +424,21 @@ int git_tree__parse(void *_tree, git_odb_object *odb_obj)
return 0;
}
+int git_tree__parse(void *_tree, git_odb_object *odb_obj)
+{
+ git_tree *tree = _tree;
+
+ if ((git_tree__parse_raw(tree,
+ git_odb_object_data(odb_obj),
+ git_odb_object_size(odb_obj))) < 0)
+ return -1;
+
+ if (git_odb_object_dup(&tree->odb_obj, odb_obj) < 0)
+ return -1;
+
+ return 0;
+}
+
static size_t find_next_dir(const char *dirname, git_index *index, size_t start)
{
size_t dirlen, i, entries = git_index_entrycount(index);
diff --git a/src/tree.h b/src/tree.h
index fbee5efe1..973ba15d0 100644
--- a/src/tree.h
+++ b/src/tree.h
@@ -41,6 +41,7 @@ GIT_INLINE(bool) git_tree_entry__is_tree(const struct git_tree_entry *e)
void git_tree__free(void *tree);
int git_tree__parse(void *tree, git_odb_object *obj);
+int git_tree__parse_raw(void *_tree, const char *data, size_t size);
/**
* Write a tree to the given repository
diff --git a/tests/pack/indexer.c b/tests/pack/indexer.c
index 453399809..3a5021223 100644
--- a/tests/pack/indexer.c
+++ b/tests/pack/indexer.c
@@ -74,6 +74,24 @@ static const unsigned char leaky_pack[] = {
};
static const unsigned int leaky_pack_len = 33;
+/*
+ * Packfile with a three objects. The first one is a tree referencing two blobs,
+ * the second object is one of those blobs. The second blob is missing.
+ */
+unsigned char incomplete_pack[] = {
+ 0x50, 0x41, 0x43, 0x4b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02,
+ 0xae, 0x03, 0x78, 0x9c, 0x33, 0x34, 0x30, 0x30, 0x33, 0x31, 0x51, 0x48,
+ 0x4a, 0x2c, 0x62, 0x08, 0x17, 0x3b, 0x15, 0xd9, 0x7e, 0xfa, 0x67, 0x6d,
+ 0xf6, 0x56, 0x4f, 0x85, 0x7d, 0xcb, 0xd6, 0xde, 0x53, 0xd1, 0x6d, 0x7f,
+ 0x66, 0x08, 0x91, 0x4e, 0xcb, 0xcf, 0x67, 0x50, 0xad, 0x39, 0x9a, 0xa2,
+ 0xb3, 0x71, 0x41, 0xc8, 0x87, 0x9e, 0x13, 0xf6, 0xba, 0x53, 0xec, 0xc2,
+ 0xfe, 0xda, 0xed, 0x9b, 0x09, 0x00, 0xe8, 0xc8, 0x19, 0xab, 0x34, 0x78,
+ 0x9c, 0x4b, 0x4a, 0x2c, 0xe2, 0x02, 0x00, 0x03, 0x9d, 0x01, 0x40, 0x4b,
+ 0x72, 0xa2, 0x6f, 0xb6, 0x88, 0x2d, 0x6c, 0xa5, 0x07, 0xb2, 0xa5, 0x45,
+ 0xe8, 0xdb, 0xe6, 0x53, 0xb3, 0x52, 0xe2
+};
+unsigned int incomplete_pack_len = 115;
+
static const unsigned char base_obj[] = { 07, 076 };
static const unsigned int base_obj_len = 2;
@@ -82,7 +100,7 @@ void test_pack_indexer__out_of_order(void)
git_indexer *idx = 0;
git_transfer_progress stats = { 0 };
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
cl_git_pass(git_indexer_append(
idx, out_of_order_pack, out_of_order_pack_len, &stats));
cl_git_pass(git_indexer_commit(idx, &stats));
@@ -99,7 +117,7 @@ void test_pack_indexer__missing_trailer(void)
git_indexer *idx = 0;
git_transfer_progress stats = { 0 };
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
cl_git_pass(git_indexer_append(
idx, missing_trailer_pack, missing_trailer_pack_len, &stats));
cl_git_fail(git_indexer_commit(idx, &stats));
@@ -115,7 +133,7 @@ void test_pack_indexer__leaky(void)
git_indexer *idx = 0;
git_transfer_progress stats = { 0 };
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
cl_git_pass(git_indexer_append(
idx, leaky_pack, leaky_pack_len, &stats));
cl_git_fail(git_indexer_commit(idx, &stats));
@@ -142,7 +160,7 @@ void test_pack_indexer__fix_thin(void)
git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18");
cl_assert_equal_oid(&should_id, &id);
- cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL));
cl_git_pass(git_indexer_append(idx, thin_pack, thin_pack_len, &stats));
cl_git_pass(git_indexer_commit(idx, &stats));
@@ -175,7 +193,7 @@ void test_pack_indexer__fix_thin(void)
cl_git_pass(p_stat(name, &st));
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
read = p_read(fd, buffer, sizeof(buffer));
cl_assert(read != -1);
p_close(fd);
@@ -208,7 +226,7 @@ void test_pack_indexer__corrupt_length(void)
git_oid_fromstr(&should_id, "e68fe8129b546b101aee9510c5328e7f21ca1d18");
cl_assert_equal_oid(&should_id, &id);
- cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, odb, NULL));
cl_git_pass(git_indexer_append(
idx, corrupt_thin_pack, corrupt_thin_pack_len, &stats));
cl_git_fail(git_indexer_commit(idx, &stats));
@@ -221,6 +239,46 @@ void test_pack_indexer__corrupt_length(void)
git_repository_free(repo);
}
+void test_pack_indexer__incomplete_pack_fails_with_strict(void)
+{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
+ git_indexer *idx = 0;
+ git_transfer_progress stats = { 0 };
+
+ opts.verify = 1;
+
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, &opts));
+ cl_git_pass(git_indexer_append(
+ idx, incomplete_pack, incomplete_pack_len, &stats));
+ cl_git_fail(git_indexer_commit(idx, &stats));
+
+ cl_assert_equal_i(stats.total_objects, 2);
+ cl_assert_equal_i(stats.received_objects, 2);
+ cl_assert_equal_i(stats.indexed_objects, 2);
+
+ git_indexer_free(idx);
+}
+
+void test_pack_indexer__out_of_order_with_connectivity_checks(void)
+{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
+ git_indexer *idx = 0;
+ git_transfer_progress stats = { 0 };
+
+ opts.verify = 1;
+
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, &opts));
+ cl_git_pass(git_indexer_append(
+ idx, out_of_order_pack, out_of_order_pack_len, &stats));
+ cl_git_pass(git_indexer_commit(idx, &stats));
+
+ cl_assert_equal_i(stats.total_objects, 3);
+ cl_assert_equal_i(stats.received_objects, 3);
+ cl_assert_equal_i(stats.indexed_objects, 3);
+
+ git_indexer_free(idx);
+}
+
static int find_tmp_file_recurs(void *opaque, git_buf *path)
{
int error = 0;
@@ -252,7 +310,7 @@ void test_pack_indexer__no_tmp_files(void)
git_buf_dispose(&path);
cl_assert(git_buf_len(&first_tmp_file) == 0);
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
git_indexer_free(idx);
cl_git_pass(git_buf_sets(&path, clar_sandbox_path()));
diff --git a/tests/pack/packbuilder.c b/tests/pack/packbuilder.c
index 932cb3861..bd2cebed2 100644
--- a/tests/pack/packbuilder.c
+++ b/tests/pack/packbuilder.c
@@ -100,7 +100,7 @@ void test_pack_packbuilder__create_pack(void)
seed_packbuilder();
- cl_git_pass(git_indexer_new(&_indexer, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&_indexer, ".", 0, NULL, NULL));
cl_git_pass(git_packbuilder_foreach(_packbuilder, feed_indexer, &stats));
cl_git_pass(git_indexer_commit(_indexer, &stats));
@@ -237,7 +237,7 @@ void test_pack_packbuilder__foreach(void)
git_indexer *idx;
seed_packbuilder();
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
cl_git_pass(git_packbuilder_foreach(_packbuilder, foreach_cb, idx));
cl_git_pass(git_indexer_commit(idx, &_stats));
git_indexer_free(idx);
@@ -255,7 +255,7 @@ void test_pack_packbuilder__foreach_with_cancel(void)
git_indexer *idx;
seed_packbuilder();
- cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL, NULL));
+ cl_git_pass(git_indexer_new(&idx, ".", 0, NULL, NULL));
cl_git_fail_with(
git_packbuilder_foreach(_packbuilder, foreach_cancel_cb, idx), -1111);
git_indexer_free(idx);