diff options
author | Junio C Hamano <gitster@pobox.com> | 2008-04-09 00:44:17 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2008-04-09 00:44:17 -0700 |
commit | 3c993de9f25d052b791c55609468c59bcd47fc19 (patch) | |
tree | aa47776e396aa2f480350d5463a76eb70d4c6c62 | |
parent | 769f60aed3906ba9cd1151731f2551087cc8502f (diff) | |
parent | 38a5739dfadd41c9e45bb05ed6fe1559895360cb (diff) | |
download | git-3c993de9f25d052b791c55609468c59bcd47fc19.tar.gz |
Merge branch 'mk/unpack-careful'
* mk/unpack-careful:
t5300: add test for "index-pack --strict"
receive-pack: allow using --strict mode for unpacking objects
unpack-objects: fix --strict handling
t5300: add test for "unpack-objects --strict"
unpack-objects: prevent writing of inconsistent objects
-rw-r--r-- | Documentation/config.txt | 6 | ||||
-rw-r--r-- | Documentation/git-unpack-objects.txt | 3 | ||||
-rw-r--r-- | builtin-unpack-objects.c | 177 | ||||
-rw-r--r-- | receive-pack.c | 36 | ||||
-rwxr-xr-x | t/t5300-pack-object.sh | 95 |
5 files changed, 284 insertions, 33 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index 04c01c5fdc..fe43b12572 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -991,6 +991,12 @@ imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. +receive.fsckObjects:: + If it is set to true, git-receive-pack will check all received + objects. It will abort in the case of a malformed object or a + broken link. The result of an abort are only dangling objects. + Defaults to false. + receive.unpackLimit:: If the number of objects received in a push is below this limit then the objects will be unpacked into loose object diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt index b79be3fd4c..3697896a06 100644 --- a/Documentation/git-unpack-objects.txt +++ b/Documentation/git-unpack-objects.txt @@ -40,6 +40,9 @@ OPTIONS and make the best effort to recover as many objects as possible. +--strict:: + Don't write objects with broken content or links. + Author ------ diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 50e07faa12..fecf0be779 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -7,11 +7,13 @@ #include "commit.h" #include "tag.h" #include "tree.h" +#include "tree-walk.h" #include "progress.h" #include "decorate.h" +#include "fsck.h" -static int dry_run, quiet, recover, has_errors; -static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file"; +static int dry_run, quiet, recover, has_errors, strict; +static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file"; /* We always read in 4kB chunks. */ static unsigned char buffer[4096]; @@ -19,6 +21,11 @@ static unsigned int offset, len; static off_t consumed_bytes; static SHA_CTX ctx; +/* + * When running under --strict mode, objects whose reachability are + * suspect are kept in core without getting written in the object + * store. + */ struct obj_buffer { char *buffer; unsigned long size; @@ -31,6 +38,16 @@ static struct obj_buffer *lookup_object_buffer(struct object *base) return lookup_decoration(&obj_decorate, base); } +static void add_object_buffer(struct object *object, char *buffer, unsigned long size) +{ + struct obj_buffer *obj; + obj = xcalloc(1, sizeof(struct obj_buffer)); + obj->buffer = buffer; + obj->size = size; + if (add_decoration(&obj_decorate, object, obj)) + die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1)); +} + /* * Make sure at least "min" bytes are available in the buffer, and * return the pointer to the buffer. @@ -134,19 +151,110 @@ static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1, struct obj_info { off_t offset; unsigned char sha1[20]; + struct object *obj; }; +#define FLAG_OPEN (1u<<20) +#define FLAG_WRITTEN (1u<<21) + static struct obj_info *obj_list; +unsigned nr_objects; + +/* + * Called only from check_object() after it verified this object + * is Ok. + */ +static void write_cached_object(struct object *obj) +{ + unsigned char sha1[20]; + struct obj_buffer *obj_buf = lookup_object_buffer(obj); + if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0) + die("failed to write object %s", sha1_to_hex(obj->sha1)); + obj->flags |= FLAG_WRITTEN; +} + +/* + * At the very end of the processing, write_rest() scans the objects + * that have reachability requirements and calls this function. + * Verify its reachability and validity recursively and write it out. + */ +static int check_object(struct object *obj, int type, void *data) +{ + if (!obj) + return 0; + + if (obj->flags & FLAG_WRITTEN) + return 1; + + if (type != OBJ_ANY && obj->type != type) + die("object type mismatch"); + + if (!(obj->flags & FLAG_OPEN)) { + unsigned long size; + int type = sha1_object_info(obj->sha1, &size); + if (type != obj->type || type <= 0) + die("object of unexpected type"); + obj->flags |= FLAG_WRITTEN; + return 1; + } + + if (fsck_object(obj, 1, fsck_error_function)) + die("Error in object"); + if (!fsck_walk(obj, check_object, 0)) + die("Error on reachable objects of %s", sha1_to_hex(obj->sha1)); + write_cached_object(obj); + return 1; +} + +static void write_rest(void) +{ + unsigned i; + for (i = 0; i < nr_objects; i++) + check_object(obj_list[i].obj, OBJ_ANY, 0); +} static void added_object(unsigned nr, enum object_type type, void *data, unsigned long size); +/* + * Write out nr-th object from the list, now we know the contents + * of it. Under --strict, this buffers structured objects in-core, + * to be checked at the end. + */ static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { - if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) - die("failed to write object"); - added_object(nr, type, buf, size); + if (!strict) { + if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + die("failed to write object"); + added_object(nr, type, buf, size); + free(buf); + obj_list[nr].obj = NULL; + } else if (type == OBJ_BLOB) { + struct blob *blob; + if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + die("failed to write object"); + added_object(nr, type, buf, size); + free(buf); + + blob = lookup_blob(obj_list[nr].sha1); + if (blob) + blob->object.flags |= FLAG_WRITTEN; + else + die("invalid blob object"); + obj_list[nr].obj = NULL; + } else { + struct object *obj; + int eaten; + hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1); + added_object(nr, type, buf, size); + obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten); + if (!obj) + die("invalid %s", typename(type)); + add_object_buffer(obj, buf, size); + obj->flags |= FLAG_OPEN; + obj_list[nr].obj = obj; + } } static void resolve_delta(unsigned nr, enum object_type type, @@ -163,9 +271,12 @@ static void resolve_delta(unsigned nr, enum object_type type, die("failed to apply delta"); free(delta); write_object(nr, type, result, result_size); - free(result); } +/* + * We now know the contents of an object (which is nr-th in the pack); + * resolve all the deltified objects that are based on it. + */ static void added_object(unsigned nr, enum object_type type, void *data, unsigned long size) { @@ -193,7 +304,24 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, if (!dry_run && buf) write_object(nr, type, buf, size); - free(buf); + else + free(buf); +} + +static int resolve_against_held(unsigned nr, const unsigned char *base, + void *delta_data, unsigned long delta_size) +{ + struct object *obj; + struct obj_buffer *obj_buffer; + obj = lookup_object(base); + if (!obj) + return 0; + obj_buffer = lookup_object_buffer(obj); + if (!obj_buffer) + return 0; + resolve_delta(nr, obj->type, obj_buffer->buffer, + obj_buffer->size, delta_data, delta_size); + return 1; } static void unpack_delta_entry(enum object_type type, unsigned long delta_size, @@ -202,7 +330,6 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, void *delta_data, *base; unsigned long base_size; unsigned char base_sha1[20]; - struct object *obj; if (type == OBJ_REF_DELTA) { hashcpy(base_sha1, fill(20)); @@ -212,7 +339,13 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, free(delta_data); return; } - if (!has_sha1_file(base_sha1)) { + if (has_sha1_file(base_sha1)) + ; /* Ok we have this one */ + else if (resolve_against_held(nr, base_sha1, + delta_data, delta_size)) + return; /* we are done */ + else { + /* cannot resolve yet --- queue it */ hashcpy(obj_list[nr].sha1, null_sha1); add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size); return; @@ -258,22 +391,18 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, } } if (!base_found) { - /* The delta base object is itself a delta that - has not been resolved yet. */ + /* + * The delta base object is itself a delta that + * has not been resolved yet. + */ hashcpy(obj_list[nr].sha1, null_sha1); add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size); return; } } - obj = lookup_object(base_sha1); - if (obj) { - struct obj_buffer *obj_buf = lookup_object_buffer(obj); - if (obj_buf) { - resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size); - return; - } - } + if (resolve_against_held(nr, base_sha1, delta_data, delta_size)) + return; base = read_sha1_file(base_sha1, &type, &base_size); if (!base) { @@ -336,7 +465,8 @@ static void unpack_all(void) int i; struct progress *progress = NULL; struct pack_header *hdr = fill(sizeof(struct pack_header)); - unsigned nr_objects = ntohl(hdr->hdr_entries); + + nr_objects = ntohl(hdr->hdr_entries); if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE) die("bad pack file"); @@ -347,6 +477,7 @@ static void unpack_all(void) if (!quiet) progress = start_progress("Unpacking objects", nr_objects); obj_list = xmalloc(nr_objects * sizeof(*obj_list)); + memset(obj_list, 0, nr_objects * sizeof(*obj_list)); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); @@ -382,6 +513,10 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) recover = 1; continue; } + if (!strcmp(arg, "--strict")) { + strict = 1; + continue; + } if (!prefixcmp(arg, "--pack_header=")) { struct pack_header *hdr; char *c; @@ -407,6 +542,8 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); SHA1_Update(&ctx, buffer, offset); SHA1_Final(sha1, &ctx); + if (strict) + write_rest(); if (hashcmp(fill(20), sha1)) die("final sha1 did not match"); use(20); diff --git a/receive-pack.c b/receive-pack.c index f83ae87e15..828d49001d 100644 --- a/receive-pack.c +++ b/receive-pack.c @@ -10,6 +10,7 @@ static const char receive_pack_usage[] = "git-receive-pack <git-dir>"; static int deny_non_fast_forwards = 0; +static int receive_fsck_objects; static int receive_unpack_limit = -1; static int transfer_unpack_limit = -1; static int unpack_limit = 100; @@ -35,6 +36,11 @@ static int receive_pack_config(const char *var, const char *value) return 0; } + if (strcmp(var, "receive.fsckobjects") == 0) { + receive_fsck_objects = git_config_bool(var, value); + return 0; + } + return git_default_config(var, value); } @@ -368,11 +374,13 @@ static const char *unpack(void) ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries)); if (ntohl(hdr.hdr_entries) < unpack_limit) { - int code; - const char *unpacker[3]; - unpacker[0] = "unpack-objects"; - unpacker[1] = hdr_arg; - unpacker[2] = NULL; + int code, i = 0; + const char *unpacker[4]; + unpacker[i++] = "unpack-objects"; + if (receive_fsck_objects) + unpacker[i++] = "--strict"; + unpacker[i++] = hdr_arg; + unpacker[i++] = NULL; code = run_command_v_opt(unpacker, RUN_GIT_CMD); switch (code) { case 0: @@ -393,8 +401,8 @@ static const char *unpack(void) return "unpacker exited with error code"; } } else { - const char *keeper[6]; - int s, status; + const char *keeper[7]; + int s, status, i = 0; char keep_arg[256]; struct child_process ip; @@ -402,12 +410,14 @@ static const char *unpack(void) if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) strcpy(keep_arg + s, "localhost"); - keeper[0] = "index-pack"; - keeper[1] = "--stdin"; - keeper[2] = "--fix-thin"; - keeper[3] = hdr_arg; - keeper[4] = keep_arg; - keeper[5] = NULL; + keeper[i++] = "index-pack"; + keeper[i++] = "--stdin"; + if (receive_fsck_objects) + keeper[i++] = "--strict"; + keeper[i++] = "--fix-thin"; + keeper[i++] = hdr_arg; + keeper[i++] = keep_arg; + keeper[i++] = NULL; memset(&ip, 0, sizeof(ip)); ip.argv = keeper; ip.out = -1; diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index c955fe44f5..983a39398f 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -274,4 +274,99 @@ test_expect_success \ packname_4=$(git pack-objects test-4 <obj-list) && test 3 = $(ls test-4-*.pack | wc -l)' +test_expect_success 'unpacking with --strict' ' + + git config --unset pack.packsizelimit && + for j in a b c d e f g + do + for i in 0 1 2 3 4 5 6 7 8 9 + do + o=$(echo $j$i | git hash-object -w --stdin) && + echo "100644 $o 0 $j$i" + done + done >LIST && + rm -f .git/index && + git update-index --index-info <LIST && + LIST=$(git write-tree) && + rm -f .git/index && + head -n 10 LIST | git update-index --index-info && + LI=$(git write-tree) && + rm -f .git/index && + tail -n 10 LIST | git update-index --index-info && + ST=$(git write-tree) && + PACK5=$( git rev-list --objects "$LIST" "$LI" "$ST" | \ + git pack-objects test-5 ) && + PACK6=$( ( + echo "$LIST" + echo "$LI" + echo "$ST" + ) | git pack-objects test-6 ) && + test_create_repo test-5 && + ( + cd test-5 && + git unpack-objects --strict <../test-5-$PACK5.pack && + git ls-tree -r $LIST && + git ls-tree -r $LI && + git ls-tree -r $ST + ) && + test_create_repo test-6 && + ( + # tree-only into empty repo -- many unreachables + cd test-6 && + test_must_fail git unpack-objects --strict <../test-6-$PACK6.pack + ) && + ( + # already populated -- no unreachables + cd test-5 && + git unpack-objects --strict <../test-6-$PACK6.pack + ) +' + +test_expect_success 'index-pack with --strict' ' + + for j in a b c d e f g + do + for i in 0 1 2 3 4 5 6 7 8 9 + do + o=$(echo $j$i | git hash-object -w --stdin) && + echo "100644 $o 0 $j$i" + done + done >LIST && + rm -f .git/index && + git update-index --index-info <LIST && + LIST=$(git write-tree) && + rm -f .git/index && + head -n 10 LIST | git update-index --index-info && + LI=$(git write-tree) && + rm -f .git/index && + tail -n 10 LIST | git update-index --index-info && + ST=$(git write-tree) && + PACK5=$( git rev-list --objects "$LIST" "$LI" "$ST" | \ + git pack-objects test-5 ) && + PACK6=$( ( + echo "$LIST" + echo "$LI" + echo "$ST" + ) | git pack-objects test-6 ) && + test_create_repo test-7 && + ( + cd test-7 && + git index-pack --strict --stdin <../test-5-$PACK5.pack && + git ls-tree -r $LIST && + git ls-tree -r $LI && + git ls-tree -r $ST + ) && + test_create_repo test-8 && + ( + # tree-only into empty repo -- many unreachables + cd test-8 && + test_must_fail git index-pack --strict --stdin <../test-6-$PACK6.pack + ) && + ( + # already populated -- no unreachables + cd test-7 && + git index-pack --strict --stdin <../test-6-$PACK6.pack + ) +' + test_done |