summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2015-03-28 09:33:06 -0700
committerJunio C Hamano <gitster@pobox.com>2015-03-28 09:33:06 -0700
commit9f389aa4920f147a6314719741b47074b2e4b727 (patch)
treeedf7abf348e4b2ba4fe128801346357c512ce173
parentb37996e251872891f71b833c425acd05cf263547 (diff)
parentea56c4e02fc1d5ce6a6b5083c284e32ffc6367e6 (diff)
downloadgit-9f389aa4920f147a6314719741b47074b2e4b727.tar.gz
Merge branch 'jk/prune-with-corrupt-refs' into maint
"git prune" used to largely ignore broken refs when deciding which objects are still being used, which could spread an existing small damage and make it a larger one. * jk/prune-with-corrupt-refs: refs.c: drop curate_packed_refs repack: turn on "ref paranoia" when doing a destructive repack prune: turn on ref_paranoia flag refs: introduce a "ref paranoia" flag t5312: test object deletion code paths in a corrupted repository
-rw-r--r--Documentation/git.txt11
-rw-r--r--builtin/prune.c1
-rw-r--r--builtin/repack.c8
-rw-r--r--cache.h8
-rw-r--r--environment.c1
-rw-r--r--refs.c72
-rwxr-xr-xt/t5312-prune-corruption.sh114
7 files changed, 147 insertions, 68 deletions
diff --git a/Documentation/git.txt b/Documentation/git.txt
index 9c75617051..a2596dc734 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -1027,6 +1027,17 @@ GIT_ICASE_PATHSPECS::
variable when it is invoked as the top level command by the
end user, to be recorded in the body of the reflog.
+`GIT_REF_PARANOIA`::
+ If set to `1`, include broken or badly named refs when iterating
+ over lists of refs. In a normal, non-corrupted repository, this
+ does nothing. However, enabling it may help git to detect and
+ abort some operations in the presence of broken refs. Git sets
+ this variable automatically when performing destructive
+ operations like linkgit:git-prune[1]. You should not need to set
+ it yourself unless you want to be paranoid about making sure
+ an operation has touched every ref (e.g., because you are
+ cloning a repository to make a backup).
+
Discussion[[Discussion]]
------------------------
diff --git a/builtin/prune.c b/builtin/prune.c
index 04d3b12ae4..17094ad954 100644
--- a/builtin/prune.c
+++ b/builtin/prune.c
@@ -115,6 +115,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
expire = ULONG_MAX;
save_commit_buffer = 0;
check_replace_refs = 0;
+ ref_paranoia = 1;
init_revisions(&revs, prefix);
argc = parse_options(argc, argv, prefix, options, prune_usage, 0);
diff --git a/builtin/repack.c b/builtin/repack.c
index 3f852f35d1..2fe1b30d71 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -228,13 +228,17 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
get_non_kept_pack_filenames(&existing_packs);
if (existing_packs.nr && delete_redundant) {
- if (unpack_unreachable)
+ if (unpack_unreachable) {
argv_array_pushf(&cmd.args,
"--unpack-unreachable=%s",
unpack_unreachable);
- else if (pack_everything & LOOSEN_UNREACHABLE)
+ argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
+ } else if (pack_everything & LOOSEN_UNREACHABLE) {
argv_array_push(&cmd.args,
"--unpack-unreachable");
+ } else {
+ argv_array_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
+ }
}
} else {
argv_array_push(&cmd.args, "--unpacked");
diff --git a/cache.h b/cache.h
index 4d02efc905..23806394eb 100644
--- a/cache.h
+++ b/cache.h
@@ -614,6 +614,14 @@ extern int protect_hfs;
extern int protect_ntfs;
/*
+ * Include broken refs in all ref iterations, which will
+ * generally choke dangerous operations rather than letting
+ * them silently proceed without taking the broken ref into
+ * account.
+ */
+extern int ref_paranoia;
+
+/*
* The character that begins a commented line in user-editable file
* that is subject to stripspace.
*/
diff --git a/environment.c b/environment.c
index 1ade5c9684..a40044c3bf 100644
--- a/environment.c
+++ b/environment.c
@@ -24,6 +24,7 @@ int is_bare_repository_cfg = -1; /* unspecified */
int log_all_ref_updates = -1; /* unspecified */
int warn_ambiguous_refs = 1;
int warn_on_object_refname_ambiguity = 1;
+int ref_paranoia = -1;
int repository_format_version;
const char *git_commit_encoding;
const char *git_log_output_encoding;
diff --git a/refs.c b/refs.c
index 9edf18b04e..3a26ad4e65 100644
--- a/refs.c
+++ b/refs.c
@@ -1907,6 +1907,11 @@ static int do_for_each_ref(struct ref_cache *refs, const char *base,
data.fn = fn;
data.cb_data = cb_data;
+ if (ref_paranoia < 0)
+ ref_paranoia = git_env_bool("GIT_REF_PARANOIA", 0);
+ if (ref_paranoia)
+ data.flags |= DO_FOR_EACH_INCLUDE_BROKEN;
+
return do_for_each_entry(refs, base, do_one_ref, &data);
}
@@ -2591,68 +2596,10 @@ int pack_refs(unsigned int flags)
return 0;
}
-/*
- * If entry is no longer needed in packed-refs, add it to the string
- * list pointed to by cb_data. Reasons for deleting entries:
- *
- * - Entry is broken.
- * - Entry is overridden by a loose ref.
- * - Entry does not point at a valid object.
- *
- * In the first and third cases, also emit an error message because these
- * are indications of repository corruption.
- */
-static int curate_packed_ref_fn(struct ref_entry *entry, void *cb_data)
-{
- struct string_list *refs_to_delete = cb_data;
-
- if (entry->flag & REF_ISBROKEN) {
- /* This shouldn't happen to packed refs. */
- error("%s is broken!", entry->name);
- string_list_append(refs_to_delete, entry->name);
- return 0;
- }
- if (!has_sha1_file(entry->u.value.sha1)) {
- unsigned char sha1[20];
- int flags;
-
- if (read_ref_full(entry->name, 0, sha1, &flags))
- /* We should at least have found the packed ref. */
- die("Internal error");
- if ((flags & REF_ISSYMREF) || !(flags & REF_ISPACKED)) {
- /*
- * This packed reference is overridden by a
- * loose reference, so it is OK that its value
- * is no longer valid; for example, it might
- * refer to an object that has been garbage
- * collected. For this purpose we don't even
- * care whether the loose reference itself is
- * invalid, broken, symbolic, etc. Silently
- * remove the packed reference.
- */
- string_list_append(refs_to_delete, entry->name);
- return 0;
- }
- /*
- * There is no overriding loose reference, so the fact
- * that this reference doesn't refer to a valid object
- * indicates some kind of repository corruption.
- * Report the problem, then omit the reference from
- * the output.
- */
- error("%s does not point to a valid object!", entry->name);
- string_list_append(refs_to_delete, entry->name);
- return 0;
- }
-
- return 0;
-}
-
int repack_without_refs(struct string_list *refnames, struct strbuf *err)
{
struct ref_dir *packed;
- struct string_list refs_to_delete = STRING_LIST_INIT_DUP;
- struct string_list_item *refname, *ref_to_delete;
+ struct string_list_item *refname;
int ret, needs_repacking = 0, removed = 0;
assert(err);
@@ -2688,13 +2635,6 @@ int repack_without_refs(struct string_list *refnames, struct strbuf *err)
return 0;
}
- /* Remove any other accumulated cruft */
- do_for_each_entry_in_dir(packed, 0, curate_packed_ref_fn, &refs_to_delete);
- for_each_string_list_item(ref_to_delete, &refs_to_delete) {
- if (remove_entry(packed, ref_to_delete->string) == -1)
- die("internal error");
- }
-
/* Write what remains */
ret = commit_packed_refs();
if (ret)
diff --git a/t/t5312-prune-corruption.sh b/t/t5312-prune-corruption.sh
new file mode 100755
index 0000000000..8e98b44083
--- /dev/null
+++ b/t/t5312-prune-corruption.sh
@@ -0,0 +1,114 @@
+#!/bin/sh
+
+test_description='
+Test pruning of repositories with minor corruptions. The goal
+here is that we should always be erring on the side of safety. So
+if we see, for example, a ref with a bogus name, it is OK either to
+bail out or to proceed using it as a reachable tip, but it is _not_
+OK to proceed as if it did not exist. Otherwise we might silently
+delete objects that cannot be recovered.
+'
+. ./test-lib.sh
+
+test_expect_success 'disable reflogs' '
+ git config core.logallrefupdates false &&
+ rm -rf .git/logs
+'
+
+test_expect_success 'create history reachable only from a bogus-named ref' '
+ test_tick && git commit --allow-empty -m master &&
+ base=$(git rev-parse HEAD) &&
+ test_tick && git commit --allow-empty -m bogus &&
+ bogus=$(git rev-parse HEAD) &&
+ git cat-file commit $bogus >saved &&
+ echo $bogus >.git/refs/heads/bogus..name &&
+ git reset --hard HEAD^
+'
+
+test_expect_success 'pruning does not drop bogus object' '
+ test_when_finished "git hash-object -w -t commit saved" &&
+ test_might_fail git prune --expire=now &&
+ verbose git cat-file -e $bogus
+'
+
+test_expect_success 'put bogus object into pack' '
+ git tag reachable $bogus &&
+ git repack -ad &&
+ git tag -d reachable &&
+ verbose git cat-file -e $bogus
+'
+
+test_expect_success 'destructive repack keeps packed object' '
+ test_might_fail git repack -Ad --unpack-unreachable=now &&
+ verbose git cat-file -e $bogus &&
+ test_might_fail git repack -ad &&
+ verbose git cat-file -e $bogus
+'
+
+# subsequent tests will have different corruptions
+test_expect_success 'clean up bogus ref' '
+ rm .git/refs/heads/bogus..name
+'
+
+# We create two new objects here, "one" and "two". Our
+# master branch points to "two", which is deleted,
+# corrupting the repository. But we'd like to make sure
+# that the otherwise unreachable "one" is not pruned
+# (since it is the user's best bet for recovering
+# from the corruption).
+#
+# Note that we also point HEAD somewhere besides "two",
+# as we want to make sure we test the case where we
+# pick up the reference to "two" by iterating the refs,
+# not by resolving HEAD.
+test_expect_success 'create history with missing tip commit' '
+ test_tick && git commit --allow-empty -m one &&
+ recoverable=$(git rev-parse HEAD) &&
+ git cat-file commit $recoverable >saved &&
+ test_tick && git commit --allow-empty -m two &&
+ missing=$(git rev-parse HEAD) &&
+ git checkout --detach $base &&
+ rm .git/objects/$(echo $missing | sed "s,..,&/,") &&
+ test_must_fail git cat-file -e $missing
+'
+
+test_expect_success 'pruning with a corrupted tip does not drop history' '
+ test_when_finished "git hash-object -w -t commit saved" &&
+ test_might_fail git prune --expire=now &&
+ verbose git cat-file -e $recoverable
+'
+
+test_expect_success 'pack-refs does not silently delete broken loose ref' '
+ git pack-refs --all --prune &&
+ echo $missing >expect &&
+ git rev-parse refs/heads/master >actual &&
+ test_cmp expect actual
+'
+
+# we do not want to count on running pack-refs to
+# actually pack it, as it is perfectly reasonable to
+# skip processing a broken ref
+test_expect_success 'create packed-refs file with broken ref' '
+ rm -f .git/refs/heads/master &&
+ cat >.git/packed-refs <<-EOF &&
+ $missing refs/heads/master
+ $recoverable refs/heads/other
+ EOF
+ echo $missing >expect &&
+ git rev-parse refs/heads/master >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'pack-refs does not silently delete broken packed ref' '
+ git pack-refs --all --prune &&
+ git rev-parse refs/heads/master >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'pack-refs does not drop broken refs during deletion' '
+ git update-ref -d refs/heads/other &&
+ git rev-parse refs/heads/master >actual &&
+ test_cmp expect actual
+'
+
+test_done