summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2015-02-26 20:42:27 -0500
committerJunio C Hamano <gitster@pobox.com>2015-02-27 13:41:29 -0800
commit4d6be03b95c3db21db1bb8fee01128c1b13f70e7 (patch)
tree0f7900c095a79b828c04f25577896f8163518d13
parentf98c2f7e53062a59f67914337c0b45c82393e11f (diff)
downloadgit-jk/diffcore-rename-duplicate.tar.gz
diffcore-rename: avoid processing duplicate destinationsjk/diffcore-rename-duplicate
The rename code cannot handle an input where we have duplicate destinations (i.e., more than one diff_filepair in the queue with the same string in its pair->two->path). We end up allocating only one slot in the rename_dst mapping. If we fill in the diff_filepair for that slot, when we re-queue the results, we may queue that filepair multiple times. When the diff is finally flushed, the filepair is processed and free()d multiple times, leading to heap corruption. This situation should only happen when a tree diff sees duplicates in one of the trees (see the added test for a detailed example). Rather than handle it, the sanest thing is just to turn off rename detection altogether for the diff. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--diffcore-rename.c8
-rwxr-xr-xt/t4058-diff-duplicates.sh79
2 files changed, 85 insertions, 2 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 0afe903de9..361eed9fbc 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -467,8 +467,12 @@ void diffcore_rename(struct diff_options *options)
else if (!DIFF_OPT_TST(options, RENAME_EMPTY) &&
is_empty_blob_sha1(p->two->sha1))
continue;
- else
- add_rename_dst(p->two);
+ else if (add_rename_dst(p->two) < 0) {
+ warning("skipping rename detection, detected"
+ " duplicate destination '%s'",
+ p->two->path);
+ goto cleanup;
+ }
}
else if (!DIFF_OPT_TST(options, RENAME_EMPTY) &&
is_empty_blob_sha1(p->one->sha1))
diff --git a/t/t4058-diff-duplicates.sh b/t/t4058-diff-duplicates.sh
new file mode 100755
index 0000000000..0a23242cb6
--- /dev/null
+++ b/t/t4058-diff-duplicates.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+test_description='test tree diff when trees have duplicate entries'
+. ./test-lib.sh
+
+# make_tree_entry <mode> <mode> <sha1>
+#
+# We have to rely on perl here because not all printfs understand
+# hex escapes (only octal), and xxd is not portable.
+make_tree_entry () {
+ printf '%s %s\0' "$1" "$2" &&
+ perl -e 'print chr(hex($_)) for ($ARGV[0] =~ /../g)' "$3"
+}
+
+# Like git-mktree, but without all of the pesky sanity checking.
+# Arguments come in groups of three, each group specifying a single
+# tree entry (see make_tree_entry above).
+make_tree () {
+ while test $# -gt 2; do
+ make_tree_entry "$1" "$2" "$3"
+ shift; shift; shift
+ done |
+ git hash-object -w -t tree --stdin
+}
+
+# this is kind of a convoluted setup, but matches
+# a real-world case. Each tree contains four entries
+# for the given path, one with one sha1, and three with
+# the other. The first tree has them split across
+# two subtrees (which are themselves duplicate entries in
+# the root tree), and the second has them all in a single subtree.
+test_expect_success 'create trees with duplicate entries' '
+ blob_one=$(echo one | git hash-object -w --stdin) &&
+ blob_two=$(echo two | git hash-object -w --stdin) &&
+ inner_one_a=$(make_tree \
+ 100644 inner $blob_one
+ ) &&
+ inner_one_b=$(make_tree \
+ 100644 inner $blob_two \
+ 100644 inner $blob_two \
+ 100644 inner $blob_two
+ ) &&
+ outer_one=$(make_tree \
+ 040000 outer $inner_one_a \
+ 040000 outer $inner_one_b
+ ) &&
+ inner_two=$(make_tree \
+ 100644 inner $blob_one \
+ 100644 inner $blob_two \
+ 100644 inner $blob_two \
+ 100644 inner $blob_two
+ ) &&
+ outer_two=$(make_tree \
+ 040000 outer $inner_two
+ ) &&
+ git tag one $outer_one &&
+ git tag two $outer_two
+'
+
+test_expect_success 'diff-tree between trees' '
+ {
+ printf ":000000 100644 $_z40 $blob_two A\touter/inner\n" &&
+ printf ":000000 100644 $_z40 $blob_two A\touter/inner\n" &&
+ printf ":000000 100644 $_z40 $blob_two A\touter/inner\n" &&
+ printf ":100644 000000 $blob_two $_z40 D\touter/inner\n" &&
+ printf ":100644 000000 $blob_two $_z40 D\touter/inner\n" &&
+ printf ":100644 000000 $blob_two $_z40 D\touter/inner\n"
+ } >expect &&
+ git diff-tree -r --no-abbrev one two >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'diff-tree with renames' '
+ # same expectation as above, since we disable rename detection
+ git diff-tree -M -r --no-abbrev one two >actual &&
+ test_cmp expect actual
+'
+
+test_done