summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--diffcore-rename.c53
-rwxr-xr-xt/t4001-diff-rename.sh7
2 files changed, 52 insertions, 8 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 266d4fae48..41558185ae 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -379,7 +379,6 @@ static const char *get_basename(const char *filename)
return base ? base + 1 : filename;
}
-MAYBE_UNUSED
static int find_basename_matches(struct diff_options *options,
int minimum_score)
{
@@ -716,11 +715,55 @@ void diffcore_rename(struct diff_options *options)
if (minimum_score == MAX_SCORE)
goto cleanup;
- /* Calculate how many renames are left */
- num_destinations = (rename_dst_nr - rename_count);
- remove_unneeded_paths_from_src(want_copies);
num_sources = rename_src_nr;
+ if (want_copies || break_idx) {
+ /*
+ * Cull sources:
+ * - remove ones corresponding to exact renames
+ */
+ trace2_region_enter("diff", "cull after exact", options->repo);
+ remove_unneeded_paths_from_src(want_copies);
+ trace2_region_leave("diff", "cull after exact", options->repo);
+ } else {
+ /* Determine minimum score to match basenames */
+ double factor = 0.5;
+ char *basename_factor = getenv("GIT_BASENAME_FACTOR");
+ int min_basename_score;
+
+ if (basename_factor)
+ factor = strtol(basename_factor, NULL, 10)/100.0;
+ assert(factor >= 0.0 && factor <= 1.0);
+ min_basename_score = minimum_score +
+ (int)(factor * (MAX_SCORE - minimum_score));
+
+ /*
+ * Cull sources:
+ * - remove ones involved in renames (found via exact match)
+ */
+ trace2_region_enter("diff", "cull after exact", options->repo);
+ remove_unneeded_paths_from_src(want_copies);
+ trace2_region_leave("diff", "cull after exact", options->repo);
+
+ /* Utilize file basenames to quickly find renames. */
+ trace2_region_enter("diff", "basename matches", options->repo);
+ rename_count += find_basename_matches(options,
+ min_basename_score);
+ trace2_region_leave("diff", "basename matches", options->repo);
+
+ /*
+ * Cull sources, again:
+ * - remove ones involved in renames (found via basenames)
+ */
+ trace2_region_enter("diff", "cull basename", options->repo);
+ remove_unneeded_paths_from_src(want_copies);
+ trace2_region_leave("diff", "cull basename", options->repo);
+ }
+
+ /* Calculate how many rename destinations are left */
+ num_destinations = (rename_dst_nr - rename_count);
+ num_sources = rename_src_nr; /* rename_src_nr reflects lower number */
+
/* All done? */
if (!num_destinations || !num_sources)
goto cleanup;
@@ -751,7 +794,7 @@ void diffcore_rename(struct diff_options *options)
struct diff_score *m;
if (rename_dst[i].is_rename)
- continue; /* dealt with exact match already. */
+ continue; /* exact or basename match already handled */
m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST];
for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh
index 0f97858197..99a5d1bd1c 100755
--- a/t/t4001-diff-rename.sh
+++ b/t/t4001-diff-rename.sh
@@ -277,10 +277,11 @@ test_expect_success 'basename similarity vs best similarity' '
git add file.txt file.md &&
git commit -a -m "rename" &&
git diff-tree -r -M --name-status HEAD^ HEAD >actual &&
- # subdir/file.txt is 88% similar to file.md and 78% similar to file.txt
+ # subdir/file.txt is 88% similar to file.md, 78% similar to file.txt,
+ # but since same basenames are checked first...
cat >expected <<-\EOF &&
- R088 subdir/file.txt file.md
- A file.txt
+ A file.md
+ R078 subdir/file.txt file.txt
EOF
test_cmp expected actual
'