diff options
author | Junio C Hamano <gitster@pobox.com> | 2017-01-31 13:14:57 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2017-01-31 13:14:57 -0800 |
commit | 42ace93e41da0abe5a264fb8661f1c7de88206ec (patch) | |
tree | 937299b26d761c4b1729f86de23c7cad833261cc | |
parent | e8272fd5fbb4f3a6d264fe0721247efe08aada8f (diff) | |
parent | cce044df7f2392d0c6cb21d6dca94f01ff838727 (diff) | |
download | git-42ace93e41da0abe5a264fb8661f1c7de88206ec.tar.gz |
Merge branch 'jk/loose-object-fsck'
"git fsck" inspects loose objects more carefully now.
* jk/loose-object-fsck:
fsck: detect trailing garbage in all object types
fsck: parse loose object paths directly
sha1_file: add read_loose_object() function
t1450: test fsck of packed objects
sha1_file: fix error message for alternate objects
t1450: refactor loose-object removal
-rw-r--r-- | builtin/fsck.c | 46 | ||||
-rw-r--r-- | cache.h | 13 | ||||
-rw-r--r-- | sha1_file.c | 180 | ||||
-rwxr-xr-x | t/t1450-fsck.sh | 86 |
4 files changed, 284 insertions, 41 deletions
diff --git a/builtin/fsck.c b/builtin/fsck.c index f01b81eebf..4b91ee95e6 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -362,18 +362,6 @@ static int fsck_obj(struct object *obj) return 0; } -static int fsck_sha1(const unsigned char *sha1) -{ - struct object *obj = parse_object(sha1); - if (!obj) { - errors_found |= ERROR_OBJECT; - return error("%s: object corrupt or missing", - sha1_to_hex(sha1)); - } - obj->flags |= HAS_OBJ; - return fsck_obj(obj); -} - static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, unsigned long size, void *buffer, int *eaten) { @@ -488,9 +476,41 @@ static void get_default_heads(void) } } +static struct object *parse_loose_object(const unsigned char *sha1, + const char *path) +{ + struct object *obj; + void *contents; + enum object_type type; + unsigned long size; + int eaten; + + if (read_loose_object(path, sha1, &type, &size, &contents) < 0) + return NULL; + + if (!contents && type != OBJ_BLOB) + die("BUG: read_loose_object streamed a non-blob"); + + obj = parse_object_buffer(sha1, type, size, contents, &eaten); + + if (!eaten) + free(contents); + return obj; +} + static int fsck_loose(const unsigned char *sha1, const char *path, void *data) { - if (fsck_sha1(sha1)) + struct object *obj = parse_loose_object(sha1, path); + + if (!obj) { + errors_found |= ERROR_OBJECT; + error("%s: object corrupt or missing: %s", + sha1_to_hex(sha1), path); + return 0; /* keep checking other objects */ + } + + obj->flags = HAS_OBJ; + if (fsck_obj(obj)) errors_found |= ERROR_OBJECT; return 0; } @@ -1143,6 +1143,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename); extern int has_sha1_pack(const unsigned char *sha1); /* + * Open the loose object at path, check its sha1, and return the contents, + * type, and size. If the object is a blob, then "contents" may return NULL, + * to allow streaming of large blobs. + * + * Returns 0 on success, negative on error (details may be written to stderr). + */ +int read_loose_object(const char *path, + const unsigned char *expected_sha1, + enum object_type *type, + unsigned long *size, + void **contents); + +/* * Return true iff we have an object named sha1, whether local or in * an alternate object database, and whether packed or loose. This * function does not respect replace references. diff --git a/sha1_file.c b/sha1_file.c index b5e827ac9e..c40ef7111c 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1630,39 +1630,54 @@ int git_open_cloexec(const char *name, int flags) return fd; } -static int stat_sha1_file(const unsigned char *sha1, struct stat *st) +/* + * Find "sha1" as a loose object in the local repository or in an alternate. + * Returns 0 on success, negative on failure. + * + * The "path" out-parameter will give the path of the object we found (if any). + * Note that it may point to static storage and is only valid until another + * call to sha1_file_name(), etc. + */ +static int stat_sha1_file(const unsigned char *sha1, struct stat *st, + const char **path) { struct alternate_object_database *alt; - if (!lstat(sha1_file_name(sha1), st)) + *path = sha1_file_name(sha1); + if (!lstat(*path, st)) return 0; prepare_alt_odb(); errno = ENOENT; for (alt = alt_odb_list; alt; alt = alt->next) { - const char *path = alt_sha1_path(alt, sha1); - if (!lstat(path, st)) + *path = alt_sha1_path(alt, sha1); + if (!lstat(*path, st)) return 0; } return -1; } -static int open_sha1_file(const unsigned char *sha1) +/* + * Like stat_sha1_file(), but actually open the object and return the + * descriptor. See the caveats on the "path" parameter above. + */ +static int open_sha1_file(const unsigned char *sha1, const char **path) { int fd; struct alternate_object_database *alt; int most_interesting_errno; - fd = git_open(sha1_file_name(sha1)); + *path = sha1_file_name(sha1); + fd = git_open(*path); if (fd >= 0) return fd; most_interesting_errno = errno; prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) { - const char *path = alt_sha1_path(alt, sha1); - fd = git_open(path); + *path = alt_sha1_path(alt, sha1); + fd = git_open(*path); if (fd >= 0) return fd; if (most_interesting_errno == ENOENT) @@ -1672,12 +1687,21 @@ static int open_sha1_file(const unsigned char *sha1) return -1; } -void *map_sha1_file(const unsigned char *sha1, unsigned long *size) +/* + * Map the loose object at "path" if it is not NULL, or the path found by + * searching for a loose object named "sha1". + */ +static void *map_sha1_file_1(const char *path, + const unsigned char *sha1, + unsigned long *size) { void *map; int fd; - fd = open_sha1_file(sha1); + if (path) + fd = git_open(path); + else + fd = open_sha1_file(sha1, &path); map = NULL; if (fd >= 0) { struct stat st; @@ -1686,7 +1710,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) *size = xsize_t(st.st_size); if (!*size) { /* mmap() is forbidden on empty files */ - error("object file %s is empty", sha1_file_name(sha1)); + error("object file %s is empty", path); return NULL; } map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); @@ -1696,6 +1720,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) return map; } +void *map_sha1_file(const unsigned char *sha1, unsigned long *size) +{ + return map_sha1_file_1(NULL, sha1, size); +} + unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) { @@ -2806,8 +2835,9 @@ static int sha1_loose_object_info(const unsigned char *sha1, * object even exists. */ if (!oi->typep && !oi->typename && !oi->sizep) { + const char *path; struct stat st; - if (stat_sha1_file(sha1, &st) < 0) + if (stat_sha1_file(sha1, &st, &path) < 0) return -1; if (oi->disk_sizep) *oi->disk_sizep = st.st_size; @@ -3003,6 +3033,8 @@ void *read_sha1_file_extended(const unsigned char *sha1, { void *data; const struct packed_git *p; + const char *path; + struct stat st; const unsigned char *repl = lookup_replace_object_extended(sha1, flag); errno = 0; @@ -3018,12 +3050,9 @@ void *read_sha1_file_extended(const unsigned char *sha1, die("replacement %s not found for %s", sha1_to_hex(repl), sha1_to_hex(sha1)); - if (has_loose_object(repl)) { - const char *path = sha1_file_name(sha1); - + if (!stat_sha1_file(repl, &st, &path)) die("loose object %s (stored in %s) is corrupt", sha1_to_hex(repl), path); - } if ((p = has_packed_and_bad(repl)) != NULL) die("packed object %s (stored in %s) is corrupt", @@ -3793,3 +3822,122 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) } return r ? r : pack_errors; } + +static int check_stream_sha1(git_zstream *stream, + const char *hdr, + unsigned long size, + const char *path, + const unsigned char *expected_sha1) +{ + git_SHA_CTX c; + unsigned char real_sha1[GIT_SHA1_RAWSZ]; + unsigned char buf[4096]; + unsigned long total_read; + int status = Z_OK; + + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, stream->total_out); + + /* + * We already read some bytes into hdr, but the ones up to the NUL + * do not count against the object's content size. + */ + total_read = stream->total_out - strlen(hdr) - 1; + + /* + * This size comparison must be "<=" to read the final zlib packets; + * see the comment in unpack_sha1_rest for details. + */ + while (total_read <= size && + (status == Z_OK || status == Z_BUF_ERROR)) { + stream->next_out = buf; + stream->avail_out = sizeof(buf); + if (size - total_read < stream->avail_out) + stream->avail_out = size - total_read; + status = git_inflate(stream, Z_FINISH); + git_SHA1_Update(&c, buf, stream->next_out - buf); + total_read += stream->next_out - buf; + } + git_inflate_end(stream); + + if (status != Z_STREAM_END) { + error("corrupt loose object '%s'", sha1_to_hex(expected_sha1)); + return -1; + } + if (stream->avail_in) { + error("garbage at end of loose object '%s'", + sha1_to_hex(expected_sha1)); + return -1; + } + + git_SHA1_Final(real_sha1, &c); + if (hashcmp(expected_sha1, real_sha1)) { + error("sha1 mismatch for %s (expected %s)", path, + sha1_to_hex(expected_sha1)); + return -1; + } + + return 0; +} + +int read_loose_object(const char *path, + const unsigned char *expected_sha1, + enum object_type *type, + unsigned long *size, + void **contents) +{ + int ret = -1; + int fd = -1; + void *map = NULL; + unsigned long mapsize; + git_zstream stream; + char hdr[32]; + + *contents = NULL; + + map = map_sha1_file_1(path, NULL, &mapsize); + if (!map) { + error_errno("unable to mmap %s", path); + goto out; + } + + if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) { + error("unable to unpack header of %s", path); + goto out; + } + + *type = parse_sha1_header(hdr, size); + if (*type < 0) { + error("unable to parse header of %s", path); + git_inflate_end(&stream); + goto out; + } + + if (*type == OBJ_BLOB) { + if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) + goto out; + } else { + *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1); + if (!*contents) { + error("unable to unpack contents of %s", path); + git_inflate_end(&stream); + goto out; + } + if (check_sha1_signature(expected_sha1, *contents, + *size, typename(*type))) { + error("sha1 mismatch for %s (expected %s)", path, + sha1_to_hex(expected_sha1)); + free(*contents); + goto out; + } + } + + ret = 0; /* everything checks out */ + +out: + if (map) + munmap(map, mapsize); + if (fd >= 0) + close(fd); + return ret; +} diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index ee7d4736db..8975b4d1bc 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -43,13 +43,13 @@ test_expect_success 'HEAD is part of refs, valid objects appear valid' ' test_expect_success 'setup: helpers for corruption tests' ' sha1_file() { - echo "$*" | sed "s#..#.git/objects/&/#" + remainder=${1#??} && + firsttwo=${1%$remainder} && + echo ".git/objects/$firsttwo/$remainder" } && remove_object() { - file=$(sha1_file "$*") && - test -e "$file" && - rm -f "$file" + rm "$(sha1_file "$1")" } ' @@ -535,13 +535,6 @@ test_expect_success 'fsck --connectivity-only' ' ) ' -remove_loose_object () { - sha1="$(git rev-parse "$1")" && - remainder=${sha1#??} && - firsttwo=${sha1%$remainder} && - rm .git/objects/$firsttwo/$remainder -} - test_expect_success 'fsck --name-objects' ' rm -rf name-objects && git init name-objects && @@ -550,11 +543,80 @@ test_expect_success 'fsck --name-objects' ' test_commit julius caesar.t && test_commit augustus && test_commit caesar && - remove_loose_object $(git rev-parse julius:caesar.t) && + remove_object $(git rev-parse julius:caesar.t) && test_must_fail git fsck --name-objects >out && tree=$(git rev-parse --verify julius:) && grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out ) ' +test_expect_success 'alternate objects are correctly blamed' ' + test_when_finished "rm -rf alt.git .git/objects/info/alternates" && + git init --bare alt.git && + echo "../../alt.git/objects" >.git/objects/info/alternates && + mkdir alt.git/objects/12 && + >alt.git/objects/12/34567890123456789012345678901234567890 && + test_must_fail git fsck >out 2>&1 && + grep alt.git out +' + +test_expect_success 'fsck errors in packed objects' ' + git cat-file commit HEAD >basis && + sed "s/</one/" basis >one && + sed "s/</foo/" basis >two && + one=$(git hash-object -t commit -w one) && + two=$(git hash-object -t commit -w two) && + pack=$( + { + echo $one && + echo $two + } | git pack-objects .git/objects/pack/pack + ) && + test_when_finished "rm -f .git/objects/pack/pack-$pack.*" && + remove_object $one && + remove_object $two && + test_must_fail git fsck 2>out && + grep "error in commit $one.* - bad name" out && + grep "error in commit $two.* - bad name" out && + ! grep corrupt out +' + +test_expect_success 'fsck finds problems in duplicate loose objects' ' + rm -rf broken-duplicate && + git init broken-duplicate && + ( + cd broken-duplicate && + test_commit duplicate && + # no "-d" here, so we end up with duplicates + git repack && + # now corrupt the loose copy + file=$(sha1_file "$(git rev-parse HEAD)") && + rm "$file" && + echo broken >"$file" && + test_must_fail git fsck + ) +' + +test_expect_success 'fsck detects trailing loose garbage (commit)' ' + git cat-file commit HEAD >basis && + echo bump-commit-sha1 >>basis && + commit=$(git hash-object -w -t commit basis) && + file=$(sha1_file $commit) && + test_when_finished "remove_object $commit" && + chmod +w "$file" && + echo garbage >>"$file" && + test_must_fail git fsck 2>out && + test_i18ngrep "garbage.*$commit" out +' + +test_expect_success 'fsck detects trailing loose garbage (blob)' ' + blob=$(echo trailing | git hash-object -w --stdin) && + file=$(sha1_file $blob) && + test_when_finished "remove_object $blob" && + chmod +w "$file" && + echo garbage >>"$file" && + test_must_fail git fsck 2>out && + test_i18ngrep "garbage.*$blob" out +' + test_done |