summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtin/pack-objects.c84
-rw-r--r--pack-objects.h9
-rwxr-xr-xt/t5314-pack-cycle-detection.sh113
3 files changed, 206 insertions, 0 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c4c2a3c79d..ed77355c6e 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1494,6 +1494,83 @@ static int pack_offset_sort(const void *_a, const void *_b)
(a->in_pack_offset > b->in_pack_offset);
}
+/*
+ * Drop an on-disk delta we were planning to reuse. Naively, this would
+ * just involve blanking out the "delta" field, but we have to deal
+ * with some extra book-keeping:
+ *
+ * 1. Removing ourselves from the delta_sibling linked list.
+ *
+ * 2. Updating our size/type to the non-delta representation. These were
+ * either not recorded initially (size) or overwritten with the delta type
+ * (type) when check_object() decided to reuse the delta.
+ */
+static void drop_reused_delta(struct object_entry *entry)
+{
+ struct object_entry **p = &entry->delta->delta_child;
+ struct object_info oi = OBJECT_INFO_INIT;
+
+ while (*p) {
+ if (*p == entry)
+ *p = (*p)->delta_sibling;
+ else
+ p = &(*p)->delta_sibling;
+ }
+ entry->delta = NULL;
+
+ oi.sizep = &entry->size;
+ oi.typep = &entry->type;
+ if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
+ /*
+ * We failed to get the info from this pack for some reason;
+ * fall back to sha1_object_info, which may find another copy.
+ * And if that fails, the error will be recorded in entry->type
+ * and dealt with in prepare_pack().
+ */
+ entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
+ }
+}
+
+/*
+ * Follow the chain of deltas from this entry onward, throwing away any links
+ * that cause us to hit a cycle (as determined by the DFS state flags in
+ * the entries).
+ */
+static void break_delta_chains(struct object_entry *entry)
+{
+ /* If it's not a delta, it can't be part of a cycle. */
+ if (!entry->delta) {
+ entry->dfs_state = DFS_DONE;
+ return;
+ }
+
+ switch (entry->dfs_state) {
+ case DFS_NONE:
+ /*
+ * This is the first time we've seen the object. We mark it as
+ * part of the active potential cycle and recurse.
+ */
+ entry->dfs_state = DFS_ACTIVE;
+ break_delta_chains(entry->delta);
+ entry->dfs_state = DFS_DONE;
+ break;
+
+ case DFS_DONE:
+ /* object already examined, and not part of a cycle */
+ break;
+
+ case DFS_ACTIVE:
+ /*
+ * We found a cycle that needs broken. It would be correct to
+ * break any link in the chain, but it's convenient to
+ * break this one.
+ */
+ drop_reused_delta(entry);
+ entry->dfs_state = DFS_DONE;
+ break;
+ }
+}
+
static void get_object_details(void)
{
uint32_t i;
@@ -1511,6 +1588,13 @@ static void get_object_details(void)
entry->no_try_delta = 1;
}
+ /*
+ * This must happen in a second pass, since we rely on the delta
+ * information for the whole list being completed.
+ */
+ for (i = 0; i < to_pack.nr_objects; i++)
+ break_delta_chains(&to_pack.objects[i]);
+
free(sorted_by_offset);
}
diff --git a/pack-objects.h b/pack-objects.h
index d1b98b30ff..cc9b9a9b90 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -27,6 +27,15 @@ struct object_entry {
unsigned no_try_delta:1;
unsigned tagged:1; /* near the very tip of refs */
unsigned filled:1; /* assigned write-order */
+
+ /*
+ * State flags for depth-first search used for analyzing delta cycles.
+ */
+ enum {
+ DFS_NONE = 0,
+ DFS_ACTIVE,
+ DFS_DONE
+ } dfs_state;
};
struct packing_data {
diff --git a/t/t5314-pack-cycle-detection.sh b/t/t5314-pack-cycle-detection.sh
new file mode 100755
index 0000000000..f7dbdfb412
--- /dev/null
+++ b/t/t5314-pack-cycle-detection.sh
@@ -0,0 +1,113 @@
+#!/bin/sh
+
+test_description='test handling of inter-pack delta cycles during repack
+
+The goal here is to create a situation where we have two blobs, A and B, with A
+as a delta against B in one pack, and vice versa in the other. Then if we can
+persuade a full repack to find A from one pack and B from the other, that will
+give us a cycle when we attempt to reuse those deltas.
+
+The trick is in the "persuade" step, as it depends on the internals of how
+pack-objects picks which pack to reuse the deltas from. But we can assume
+that it does so in one of two general strategies:
+
+ 1. Using a static ordering of packs. In this case, no inter-pack cycles can
+ happen. Any objects with a delta relationship must be present in the same
+ pack (i.e., no "--thin" packs on disk), so we will find all related objects
+ from that pack. So assuming there are no cycles within a single pack (and
+ we avoid generating them via pack-objects or importing them via
+ index-pack), then our result will have no cycles.
+
+ So this case should pass the tests no matter how we arrange things.
+
+ 2. Picking the next pack to examine based on locality (i.e., where we found
+ something else recently).
+
+ In this case, we want to make sure that we find the delta versions of A and
+ B and not their base versions. We can do this by putting two blobs in each
+ pack. The first is a "dummy" blob that can only be found in the pack in
+ question. And then the second is the actual delta we want to find.
+
+ The two blobs must be present in the same tree, not present in other trees,
+ and the dummy pathname must sort before the delta path.
+
+The setup below focuses on case 2. We have two commits HEAD and HEAD^, each
+which has two files: "dummy" and "file". Then we can make two packs which
+contain:
+
+ [pack one]
+ HEAD:dummy
+ HEAD:file (as delta against HEAD^:file)
+ HEAD^:file (as base)
+
+ [pack two]
+ HEAD^:dummy
+ HEAD^:file (as delta against HEAD:file)
+ HEAD:file (as base)
+
+Then no matter which order we start looking at the packs in, we know that we
+will always find a delta for "file", because its lookup will always come
+immediately after the lookup for "dummy".
+'
+. ./test-lib.sh
+
+
+
+# Create a pack containing the the tree $1 and blob $1:file, with
+# the latter stored as a delta against $2:file.
+#
+# We convince pack-objects to make the delta in the direction of our choosing
+# by marking $2 as a preferred-base edge. That results in $1:file as a thin
+# delta, and index-pack completes it by adding $2:file as a base.
+#
+# Note that the two variants of "file" must be similar enough to convince git
+# to create the delta.
+make_pack () {
+ {
+ printf '%s\n' "-$(git rev-parse $2)"
+ printf '%s dummy\n' "$(git rev-parse $1:dummy)"
+ printf '%s file\n' "$(git rev-parse $1:file)"
+ } |
+ git pack-objects --stdout |
+ git index-pack --stdin --fix-thin
+}
+
+test_expect_success 'setup' '
+ test-genrandom base 4096 >base &&
+ for i in one two
+ do
+ # we want shared content here to encourage deltas...
+ cp base file &&
+ echo $i >>file &&
+
+ # ...whereas dummy should be short, because we do not want
+ # deltas that would create duplicates when we --fix-thin
+ echo $i >dummy &&
+
+ git add file dummy &&
+ test_tick &&
+ git commit -m $i ||
+ return 1
+ done &&
+
+ make_pack HEAD^ HEAD &&
+ make_pack HEAD HEAD^
+'
+
+test_expect_success 'repack' '
+ # We first want to check that we do not have any internal errors,
+ # and also that we do not hit the last-ditch cycle-breaking code
+ # in write_object(), which will issue a warning to stderr.
+ >expect &&
+ git repack -ad 2>stderr &&
+ test_cmp expect stderr &&
+
+ # And then double-check that the resulting pack is usable (i.e.,
+ # we did not fail to notice any cycles). We know we are accessing
+ # the objects via the new pack here, because "repack -d" will have
+ # removed the others.
+ git cat-file blob HEAD:file >/dev/null &&
+ git cat-file blob HEAD^:file >/dev/null
+'
+
+test_done