summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2013-05-22 10:37:12 -0700
committerRussell Belfer <rb@github.com>2013-05-22 10:37:12 -0700
commita21cbb12db62426ca789045d5ac5c96ca069f0ea (patch)
tree73885773654c6869003fb4d855383165d14d6a5e
parent4742148d54334629495eeaf0382e6c9da8786f17 (diff)
downloadlibgit2-a21cbb12db62426ca789045d5ac5c96ca069f0ea.tar.gz
Significant rename detection rewrite
This flips rename detection around so instead of creating a forward mapping from deltas to possible rename targets, instead it creates a reverse mapping, looking at possible targets and trying to find a source that they could have been renamed or copied from. This is important because each output can only have a single source, but a given source could map to multiple outputs (in the form of COPIED records). Additionally, this makes a couple of tweaks to the public rename detection APIs, mostly renaming a couple of options that control the behavior to make more sense and to be more like core Git. I walked through the tests looking at the exact results and updated the expectations based on what I saw. The new code is different from the old because it cannot give some nonsense results (like A was renamed to both B and C) which were part of the outputs previously.
-rw-r--r--include/git2/diff.h15
-rw-r--r--src/diff.h10
-rw-r--r--src/diff_tform.c479
-rw-r--r--tests-clar/diff/rename.c34
-rw-r--r--tests-clar/object/raw/convert.c1
5 files changed, 319 insertions, 220 deletions
diff --git a/include/git2/diff.h b/include/git2/diff.h
index 31f6e0591..6939f6a2e 100644
--- a/include/git2/diff.h
+++ b/include/git2/diff.h
@@ -429,8 +429,8 @@ typedef enum {
GIT_DIFF_FIND_AND_BREAK_REWRITES =
(GIT_DIFF_FIND_REWRITES | GIT_DIFF_BREAK_REWRITES),
- /** consider untracked files as rename/copy targets */
- GIT_DIFF_FIND_FROM_UNTRACKED = (1 << 6),
+ /** find renames/copies for untracked items in working directory */
+ GIT_DIFF_FIND_FOR_UNTRACKED = (1 << 6),
/** turn on all finding features */
GIT_DIFF_FIND_ALL = (0x0ff),
@@ -469,7 +469,10 @@ typedef struct {
* - `copy_threshold` is the same as the -C option with a value
* - `rename_from_rewrite_threshold` matches the top of the -B option
* - `break_rewrite_threshold` matches the bottom of the -B option
- * - `target_limit` matches the -l option (approximately)
+ * - `rename_limit` is the maximum number of matches to consider for
+ * a particular file. This is a little different from the `-l` option
+ * to regular Git because we will still process up to this many matches
+ * before abandoning the search.
*
* The `metric` option allows you to plug in a custom similarity metric.
* Set it to NULL for the default internal metric which is based on sampling
@@ -492,10 +495,10 @@ typedef struct {
/** Similarity to split modify into delete/add pair (default 60) */
uint16_t break_rewrite_threshold;
- /** Maximum similarity sources to examine (a la diff's `-l` option or
- * the `diff.renameLimit` config) (default 200)
+ /** Maximum similarity sources to examine for a file (somewhat like
+ * git-diff's `-l` option or `diff.renameLimit` config) (default 200)
*/
- size_t target_limit;
+ size_t rename_limit;
/** Pluggable similarity metric; pass NULL to use internal metric */
git_diff_similarity_metric *metric;
diff --git a/src/diff.h b/src/diff.h
index 16df431ed..a9a543ecd 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -34,10 +34,16 @@ enum {
GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */
GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */
GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */
- GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */
- GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */
+
+ GIT_DIFF_FLAG__TO_DELETE = (1 << 16), /* delete entry during rename det. */
+ GIT_DIFF_FLAG__TO_SPLIT = (1 << 17), /* split entry during rename det. */
+ GIT_DIFF_FLAG__IS_RENAME_TARGET = (1 << 18),
+ GIT_DIFF_FLAG__IS_RENAME_SOURCE = (1 << 19),
+ GIT_DIFF_FLAG__HAS_SELF_SIMILARITY = (1 << 20),
};
+#define GIT_DIFF_FLAG__CLEAR_INTERNAL(F) (F) = ((F) & 0x00FFFF)
+
struct git_diff_list {
git_refcount rc;
git_repository *repo;
diff --git a/src/diff_tform.c b/src/diff_tform.c
index d5e56ac60..a3afe0d7a 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -222,7 +222,7 @@ int git_diff_find_similar__calc_similarity(
#define DEFAULT_THRESHOLD 50
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60
-#define DEFAULT_TARGET_LIMIT 200
+#define DEFAULT_RENAME_LIMIT 200
static int normalize_find_opts(
git_diff_list *diff,
@@ -290,15 +290,15 @@ static int normalize_find_opts(
#undef USE_DEFAULT
- if (!opts->target_limit) {
+ if (!opts->rename_limit) {
int32_t limit = 0;
- opts->target_limit = DEFAULT_TARGET_LIMIT;
+ opts->rename_limit = DEFAULT_RENAME_LIMIT;
if (git_config_get_int32(&limit, cfg, "diff.renameLimit") < 0)
giterr_clear();
else if (limit > 0)
- opts->target_limit = limit;
+ opts->rename_limit = limit;
}
/* assign the internal metric with whitespace flag as payload */
@@ -322,27 +322,6 @@ static int normalize_find_opts(
return 0;
}
-static void validate_delta(git_diff_delta *delta)
-{
- assert(delta);
- return;
-/*
- switch (delta->status) {
- case GIT_DELTA_ADDED:
- case GIT_DELTA_UNTRACKED:
- case GIT_DELTA_IGNORED:
- assert(delta->new_file.path);
- break;
- case GIT_DELTA_DELETED:
- assert(delta->old_file.path);
- break;
- default:
- assert(delta->old_file.path && delta->new_file.path);
- break;
- }
-*/
-}
-
static int apply_splits_and_deletes(
git_diff_list *diff, size_t expected_size, bool actually_split)
{
@@ -358,16 +337,7 @@ static int apply_splits_and_deletes(
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
continue;
- if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
-
- /* just leave delta flagged with score if not actually splitting */
- if (!actually_split) {
- delta->flags = (delta->flags & ~GIT_DIFF_FLAG__TO_SPLIT);
- if (delta->status != GIT_DELTA_MODIFIED)
- delta->similarity = 0;
- continue;
- }
-
+ if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0 && actually_split) {
delta->similarity = 0;
/* make new record for DELETED side of split */
@@ -378,7 +348,6 @@ static int apply_splits_and_deletes(
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
- validate_delta(deleted);
if (git_vector_insert(&onto, deleted) < 0)
goto on_error;
@@ -390,7 +359,6 @@ static int apply_splits_and_deletes(
memset(&delta->old_file, 0, sizeof(delta->old_file));
delta->old_file.path = delta->new_file.path;
delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
- validate_delta(delta);
}
if (git_vector_insert(&onto, delta) < 0)
@@ -398,13 +366,22 @@ static int apply_splits_and_deletes(
}
/* cannot return an error past this point */
- git_vector_foreach(&diff->deltas, i, delta)
+ git_vector_foreach(&diff->deltas, i, delta) {
if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
git__free(delta);
+ GIT_DIFF_FLAG__CLEAR_INTERNAL(delta->flags);
+
+ if (delta->status != GIT_DELTA_COPIED &&
+ delta->status != GIT_DELTA_RENAMED &&
+ (delta->status != GIT_DELTA_MODIFIED || actually_split))
+ delta->similarity = 0;
+ }
+
/* swap new delta list into place */
git_vector_swap(&diff->deltas, &onto);
git_vector_free(&onto);
+ git_vector_sort(&diff->deltas);
return 0;
@@ -424,7 +401,7 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
static int similarity_calc(
git_diff_list *diff,
- git_diff_find_options *opts,
+ const git_diff_find_options *opts,
size_t file_idx,
void **cache)
{
@@ -473,7 +450,7 @@ static int similarity_calc(
return error;
}
-#define FLAG_SET(opts,flag_name) (((opts).flags & flag_name) != 0)
+#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
/* - score < 0 means files cannot be compared
* - score >= 100 means files are exact match
@@ -482,14 +459,14 @@ static int similarity_calc(
static int similarity_measure(
int *score,
git_diff_list *diff,
- git_diff_find_options *opts,
+ const git_diff_find_options *opts,
void **cache,
size_t a_idx,
size_t b_idx)
{
git_diff_file *a_file = similarity_get_file(diff, a_idx);
git_diff_file *b_file = similarity_get_file(diff, b_idx);
- bool exact_match = FLAG_SET(*opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
+ bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
*score = -1;
@@ -539,28 +516,152 @@ static int similarity_measure(
score, cache[a_idx], cache[b_idx], opts->metric->payload);
}
-static void convert_to_rename_and_add(
+static int calc_self_similarity(
git_diff_list *diff,
- git_diff_delta *from,
- git_diff_delta *to,
- int similarity)
+ const git_diff_find_options *opts,
+ size_t delta_idx,
+ void **cache)
{
- to->status = GIT_DELTA_RENAMED;
- to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; /* ensure no split */
- to->similarity = (uint32_t)similarity;
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
- validate_delta(to);
-
- if (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR)
- from->status = GIT_DELTA_UNTRACKED;
- else
- from->status = GIT_DELTA_ADDED;
- from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT; /* ensure no split */
- from->similarity = 0;
- memset(&from->old_file, 0, sizeof(from->old_file));
- from->old_file.path = from->new_file.path;
- from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
- validate_delta(from);
+ int error, similarity = -1;
+ git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
+
+ if ((delta->flags & GIT_DIFF_FLAG__HAS_SELF_SIMILARITY) != 0)
+ return 0;
+
+ error = similarity_measure(
+ &similarity, diff, opts, cache, 2 * delta_idx, 2 * delta_idx + 1);
+ if (error < 0)
+ return error;
+
+ if (similarity >= 0) {
+ delta->similarity = (uint32_t)similarity;
+ delta->flags |= GIT_DIFF_FLAG__HAS_SELF_SIMILARITY;
+ }
+
+ return 0;
+}
+
+static bool is_rename_target(
+ git_diff_list *diff,
+ const git_diff_find_options *opts,
+ size_t delta_idx,
+ void **cache)
+{
+ git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
+
+ /* skip things that aren't plain blobs */
+ if (!GIT_MODE_ISBLOB(delta->new_file.mode))
+ return false;
+
+ /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
+ * targets; maybe include UNTRACKED and IGNORED if requested.
+ */
+ switch (delta->status) {
+ case GIT_DELTA_UNMODIFIED:
+ case GIT_DELTA_DELETED:
+ return false;
+
+ case GIT_DELTA_MODIFIED:
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
+ !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
+ return false;
+
+ if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
+ return false;
+
+ if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
+ delta->similarity < opts->break_rewrite_threshold) {
+ delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
+ break;
+ }
+ if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
+ delta->similarity < opts->rename_from_rewrite_threshold)
+ break;
+
+ return false;
+
+ case GIT_DELTA_UNTRACKED:
+ case GIT_DELTA_IGNORED:
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_FOR_UNTRACKED))
+ return false;
+ break;
+
+ default: /* all other status values should be checked */
+ break;
+ }
+
+ delta->flags |= GIT_DIFF_FLAG__IS_RENAME_TARGET;
+ return true;
+}
+
+static bool is_rename_source(
+ git_diff_list *diff,
+ const git_diff_find_options *opts,
+ size_t delta_idx,
+ void **cache)
+{
+ git_diff_delta *delta = GIT_VECTOR_GET(&diff->deltas, delta_idx);
+
+ /* skip things that aren't blobs */
+ if (!GIT_MODE_ISBLOB(delta->old_file.mode))
+ return false;
+
+ switch (delta->status) {
+ case GIT_DELTA_ADDED:
+ case GIT_DELTA_UNTRACKED:
+ case GIT_DELTA_IGNORED:
+ return false;
+
+ case GIT_DELTA_DELETED:
+ case GIT_DELTA_TYPECHANGE:
+ break;
+
+ case GIT_DELTA_UNMODIFIED:
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
+ return false;
+ break;
+
+ default: /* MODIFIED, RENAMED, COPIED */
+ /* if we're finding copies, this could be a source */
+ if (FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
+ break;
+
+ /* otherwise, this is only a source if we can split it */
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_REWRITES) &&
+ !FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES))
+ return false;
+
+ if (calc_self_similarity(diff, opts, delta_idx, cache) < 0)
+ return false;
+
+ if (FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES) &&
+ delta->similarity < opts->break_rewrite_threshold) {
+ delta->flags |= GIT_DIFF_FLAG__TO_SPLIT;
+ break;
+ }
+
+ if (FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
+ delta->similarity < opts->rename_from_rewrite_threshold)
+ break;
+
+ return false;
+ }
+
+ delta->flags |= GIT_DIFF_FLAG__IS_RENAME_SOURCE;
+ return true;
+}
+
+GIT_INLINE(bool) delta_is_split(git_diff_delta *delta)
+{
+ return (delta->status == GIT_DELTA_TYPECHANGE ||
+ (delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0);
+}
+
+GIT_INLINE(bool) delta_is_new_only(git_diff_delta *delta)
+{
+ return (delta->status == GIT_DELTA_ADDED ||
+ delta->status == GIT_DELTA_UNTRACKED ||
+ delta->status == GIT_DELTA_IGNORED);
}
typedef struct {
@@ -583,7 +684,7 @@ int git_diff_find_similar(
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error;
- /* TODO: maybe abort if deltas.length > target_limit ??? */
+ /* TODO: maybe abort if deltas.length > rename_limit ??? */
if (!git__is_uint32(diff->deltas.length))
return 0;
@@ -594,103 +695,40 @@ int git_diff_find_similar(
matches = git__calloc(diff->deltas.length, sizeof(diff_find_match));
GITERR_CHECK_ALLOC(matches);
- /* first mark MODIFIED deltas to split if too different (if requested) */
-
- if (FLAG_SET(opts, GIT_DIFF_FIND_REWRITES)) {
- git_vector_foreach(&diff->deltas, i, from) {
- if (from->status != GIT_DELTA_MODIFIED)
- continue;
-
- /* skip things that aren't plain blobs */
- if (!GIT_MODE_ISBLOB(from->old_file.mode))
- continue;
-
- /* measure similarity from old_file to new_file */
- if ((error = similarity_measure(
- &similarity, diff, &opts, cache, 2 * i, 2 * i + 1)) < 0)
- goto cleanup;
-
- if (similarity < 0)
- continue;
- if (similarity < (int)opts.break_rewrite_threshold) {
- from->similarity = (uint32_t)similarity;
- from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
- num_rewrites++;
- }
- }
- }
-
/* next find the most similar delta for each rename / copy candidate */
- git_vector_foreach(&diff->deltas, i, from) {
- size_t tried_targets = 0;
+ git_vector_foreach(&diff->deltas, i, to) {
+ size_t tried_sources = 0;
matches[i].idx = i;
matches[i].similarity = 0;
- /* skip things that aren't plain blobs */
- if (!GIT_MODE_ISBLOB(from->old_file.mode))
- continue;
-
- /* don't check UNMODIFIED files as source unless given option */
- if (from->status == GIT_DELTA_UNMODIFIED &&
- !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
- continue;
-
- /* don't check UNTRACKED files as source unless given option */
- if ((from->status == GIT_DELTA_UNTRACKED ||
- from->status == GIT_DELTA_IGNORED) &&
- !FLAG_SET(opts, GIT_DIFF_FIND_FROM_UNTRACKED))
+ /* skip things that are not rename targets */
+ if (!is_rename_target(diff, &opts, i, cache))
continue;
- /* only use DELETED (or split MODIFIED) unless copy detection on */
- if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) &&
- from->status != GIT_DELTA_DELETED &&
- (from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
- continue;
-
- git_vector_foreach(&diff->deltas, j, to) {
+ git_vector_foreach(&diff->deltas, j, from) {
if (i == j)
continue;
- /* skip things that aren't blobs */
- if (!GIT_MODE_ISBLOB(to->new_file.mode))
+ /* skip things that are not rename sources */
+ if (!is_rename_source(diff, &opts, j, cache))
continue;
- /* only consider ADDED, RENAMED, COPIED, and split MODIFIED as
- * targets; maybe include UNTRACKED and IGNORED if requested.
- */
- switch (to->status) {
- case GIT_DELTA_ADDED:
- case GIT_DELTA_RENAMED:
- case GIT_DELTA_COPIED:
- break;
- case GIT_DELTA_MODIFIED:
- if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
- continue;
- break;
- case GIT_DELTA_UNTRACKED:
- case GIT_DELTA_IGNORED:
- if (!FLAG_SET(opts, GIT_DIFF_FIND_FROM_UNTRACKED))
- continue;
- break;
- default:
- /* all other status values will be skipped */
- continue;
- }
-
- /* cap on maximum targets we'll examine (per "from" file) */
- if (++tried_targets > opts.target_limit)
+ /* cap on maximum targets we'll examine (per "to" file) */
+ if (++tried_sources > opts.rename_limit)
break;
/* calculate similarity for this pair and find best match */
if ((error = similarity_measure(
- &similarity, diff, &opts, cache, 2 * i, 2 * j + 1)) < 0)
+ &similarity, diff, &opts, cache, 2 * j, 2 * i + 1)) < 0)
goto cleanup;
- if (similarity < 0) {
- --tried_targets;
+
+ if (similarity < 0) { /* not actually comparable */
+ --tried_sources;
continue;
}
+
if (matches[i].similarity < (uint32_t)similarity) {
matches[i].similarity = (uint32_t)similarity;
matches[i].idx = j;
@@ -700,97 +738,128 @@ int git_diff_find_similar(
/* next rewrite the diffs with renames / copies */
- git_vector_foreach(&diff->deltas, i, from) {
- if (!matches[i].similarity)
- continue;
+ git_vector_foreach(&diff->deltas, i, to) {
- to = GIT_VECTOR_GET(&diff->deltas, matches[i].idx);
- assert(to);
+ /* check if this delta was matched to another one */
+ if ((similarity = (int)matches[i].similarity) <= 0)
+ continue;
+ assert(to && (to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) != 0);
- similarity = (int)matches[i].similarity;
+ from = GIT_VECTOR_GET(&diff->deltas, matches[i].idx);
+ assert(from && (from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) != 0);
- /*
- * Four possible outcomes here:
+ /* possible scenarios:
+ * 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
+ * 2. from DELETE to SPLIT/TYPECHANGE = RENAME + DELETE
+ * 3. from SPLIT/TYPECHANGE to ADD/UNTRACK/IGNORE = ADD + RENAME
+ * 4. from SPLIT/TYPECHANGE to SPLIT/TYPECHANGE = RENAME + SPLIT
+ * 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
*/
- /* 1. DELETED "from" with match over rename threshold becomes
- * RENAMED "from" record (and "to" record goes away)
- */
if (from->status == GIT_DELTA_DELETED) {
- if (similarity < (int)opts.rename_threshold)
- continue;
- to->flags |= GIT_DIFF_FLAG__TO_DELETE;
+ if (delta_is_new_only(to)) {
- from->status = GIT_DELTA_RENAMED;
- from->similarity = (uint32_t)similarity;
- memcpy(&from->new_file, &to->new_file, sizeof(to->new_file));
- validate_delta(from);
+ if (similarity < (int)opts.rename_threshold)
+ continue;
- num_rewrites++;
- continue;
- }
+ from->status = GIT_DELTA_RENAMED;
+ from->similarity = (uint32_t)similarity;
+ memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
- /* 2. SPLIT MODIFIED "from" with match over rename threshold becomes
- * ADDED "from" record (with no SPLIT) and RENAMED "to" record
- */
- if (from->status == GIT_DELTA_MODIFIED &&
- (from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
+ to->flags |= GIT_DIFF_FLAG__TO_DELETE;
- if (similarity < (int)opts.rename_threshold)
- continue;
+ num_rewrites++;
+ } else {
+ assert(delta_is_split(from));
- convert_to_rename_and_add(diff, from, to, similarity);
- num_rewrites--;
- num_updates++;
- continue;
- }
+ if (similarity < (int)opts.rename_from_rewrite_threshold)
+ continue;
- /* 3. MODIFIED "from" with FIND_RENAMES_FROM_REWRITES with similar
- * "to" and self-similarity below rename_from_rewrite_threshold
- * becomes newly ADDED "from" and RENAMED "to".
- */
- if (from->status == GIT_DELTA_MODIFIED &&
- FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
- similarity > (int)opts.rename_threshold)
- {
- int self_similarity;
-
- if ((error = similarity_measure(&self_similarity,
- diff, &opts, cache, 2 * i, 2 * i + 1)) < 0)
- goto cleanup;
+ from->status = GIT_DELTA_RENAMED;
+ from->similarity = (uint32_t)similarity;
+ memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
- if (self_similarity >= 0 &&
- self_similarity < (int)opts.rename_from_rewrite_threshold) {
+ to->status = GIT_DELTA_DELETED;
+ memset(&to->new_file, 0, sizeof(to->new_file));
+ to->new_file.path = to->old_file.path;
+ to->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
+ to->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ num_rewrites--;
+ }
- convert_to_rename_and_add(diff, from, to, similarity);
num_updates++;
- continue;
}
}
- /* 4. if "from" -> "to" over copy threshold, "to" becomes COPIED */
- if (similarity < (int)opts.copy_threshold)
- continue;
+ else if (delta_is_split(from)) {
+ git_diff_file swap;
+
+ if (delta_is_new_only(to)) {
- /* convert "to" to a COPIED record */
- to->status = GIT_DELTA_COPIED;
- to->similarity = (uint32_t)similarity;
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
- validate_delta(to);
+ if (similarity < (int)opts.rename_threshold)
+ continue;
- validate_delta(from);
+ memcpy(&swap, &from->new_file, sizeof(swap));
+
+ from->status = GIT_DELTA_RENAMED;
+ from->similarity = (uint32_t)similarity;
+ memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
+ if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
+ from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ num_rewrites--;
+ }
- num_updates++;
+ to->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
+ GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
+ memcpy(&to->new_file, &swap, sizeof(to->new_file));
+ to->old_file.path = to->new_file.path;
+
+ num_updates++;
+ } else {
+ assert(delta_is_split(from));
+
+ if (similarity < (int)opts.rename_from_rewrite_threshold)
+ continue;
+
+ memcpy(&swap, &from->new_file, sizeof(swap));
+
+ from->status = GIT_DELTA_RENAMED;
+ from->similarity = (uint32_t)similarity;
+ memcpy(&from->new_file, &to->new_file, sizeof(from->new_file));
+ if ((from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
+ from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ num_rewrites--;
+ }
+
+ memcpy(&to->new_file, &swap, sizeof(to->new_file));
+ if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0) {
+ to->flags |= GIT_DIFF_FLAG__TO_SPLIT;
+ num_rewrites++;
+ }
+
+ num_updates++;
+ }
+ }
+
+ else if (delta_is_new_only(to)) {
+ if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
+ similarity < (int)opts.copy_threshold)
+ continue;
+
+ to->status = GIT_DELTA_COPIED;
+ to->similarity = (uint32_t)similarity;
+ memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
+
+ num_updates++;
+ }
}
- if (num_rewrites > 0)
+ if (num_rewrites > 0 || num_updates > 0)
error = apply_splits_and_deletes(
diff, diff->deltas.length - num_rewrites,
- FLAG_SET(opts, GIT_DIFF_BREAK_REWRITES));
-
- if (num_rewrites > 0 || num_updates > 0)
- git_vector_sort(&diff->deltas);
+ FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup:
git__free(matches);
diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c
index a78e33939..b4f9df713 100644
--- a/tests-clar/diff/rename.c
+++ b/tests-clar/diff/rename.c
@@ -15,6 +15,18 @@ void test_diff_rename__cleanup(void)
}
/*
+static int debug_print(
+ const git_diff_delta *delta, const git_diff_range *range, char usage,
+ const char *line, size_t line_len, void *data)
+{
+ GIT_UNUSED(delta); GIT_UNUSED(range); GIT_UNUSED(usage);
+ GIT_UNUSED(line_len); GIT_UNUSED(data);
+ fputs(line, stderr);
+ return 0;
+}
+*/
+
+/*
* Renames repo has:
*
* commit 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 -
@@ -72,8 +84,10 @@ void test_diff_rename__match_oid(void)
/* git diff 31e47d8c1fa36d7f8d537b96158e3f024de0a9f2 \
* 2bc7f351d20b53f1c72c16c4b036e491c478c49a
+ * don't use NULL opts to avoid config `diff.renames` contamination
*/
- cl_git_pass(git_diff_find_similar(diff, NULL));
+ opts.flags = GIT_DIFF_FIND_RENAMES;
+ cl_git_pass(git_diff_find_similar(diff, &opts));
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
@@ -243,8 +257,8 @@ void test_diff_rename__not_exact_match(void)
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNMODIFIED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
- cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_COPIED]);
git_diff_list_free(diff);
@@ -429,8 +443,8 @@ void test_diff_rename__working_directory_changes(void)
cl_assert_equal_i(6, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
- cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_DELETED]);
+ cl_assert_equal_i(3, exp.file_status[GIT_DELTA_UNTRACKED]);
/* git diff -M 2bc7f351d20b53f1c72c16c4b036e491c478c49a */
opts.flags = GIT_DIFF_FIND_ALL;
@@ -441,7 +455,8 @@ void test_diff_rename__working_directory_changes(void)
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
- cl_assert_equal_i(3, exp.file_status[GIT_DELTA_RENAMED]);
+ cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
@@ -466,7 +481,8 @@ void test_diff_rename__working_directory_changes(void)
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
- cl_assert_equal_i(3, exp.file_status[GIT_DELTA_RENAMED]);
+ cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
@@ -521,13 +537,19 @@ void test_diff_rename__working_directory_changes(void)
opts.flags = GIT_DIFF_FIND_ALL | GIT_DIFF_FIND_EXACT_MATCH_ONLY;
cl_git_pass(git_diff_find_similar(diff, &opts));
+ /*
+ fprintf(stderr, "\n\n");
+ cl_git_pass(git_diff_print_raw(diff, debug_print, NULL));
+ */
+
memset(&exp, 0, sizeof(exp));
cl_git_pass(git_diff_foreach(
diff, diff_file_cb, diff_hunk_cb, diff_line_cb, &exp));
cl_assert_equal_i(5, exp.files);
cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
- cl_assert_equal_i(2, exp.file_status[GIT_DELTA_RENAMED]);
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
cl_assert_equal_i(2, exp.file_status[GIT_DELTA_UNTRACKED]);
git_diff_list_free(diff);
diff --git a/tests-clar/object/raw/convert.c b/tests-clar/object/raw/convert.c
index 86f0d74a9..88b1380a4 100644
--- a/tests-clar/object/raw/convert.c
+++ b/tests-clar/object/raw/convert.c
@@ -87,7 +87,6 @@ void test_object_raw_convert__convert_oid_partially(void)
const char *exp = "16a0123456789abcdef4b775213c23a8bd74f5e0";
git_oid in;
char big[GIT_OID_HEXSZ + 1 + 3]; /* note + 4 => big buffer */
- char *str;
cl_git_pass(git_oid_fromstr(&in, exp));