summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/diff_tform.c145
-rw-r--r--tests-clar/diff/rename.c50
2 files changed, 146 insertions, 49 deletions
diff --git a/src/diff_tform.c b/src/diff_tform.c
index cabcd1f80..08b0d5c38 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -408,54 +408,90 @@ GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
return (idx & 1) ? &delta->new_file : &delta->old_file;
}
-static int similarity_calc(
- git_diff_list *diff,
- const git_diff_find_options *opts,
- size_t file_idx,
- void **cache)
+typedef struct {
+ size_t idx;
+ git_iterator_type_t src;
+ git_repository *repo;
+ git_diff_file *file;
+ git_buf data;
+ git_blob *blob;
+ int loaded;
+} similarity_info;
+
+static void similarity_init(
+ similarity_info *info, git_diff_list *diff, size_t file_idx)
+{
+ info->idx = file_idx;
+ info->src = (file_idx & 1) ? diff->new_src : diff->old_src;
+ info->repo = diff->repo;
+ info->file = similarity_get_file(diff, file_idx);
+ info->blob = NULL;
+ info->loaded = 0;
+ git_buf_init(&info->data, 0);
+}
+
+static int similarity_load(similarity_info *info)
{
int error = 0;
- git_diff_file *file = similarity_get_file(diff, file_idx);
- git_iterator_type_t src = (file_idx & 1) ? diff->new_src : diff->old_src;
+ git_diff_file *file = info->file;
- if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
- git_buf path = GIT_BUF_INIT;
+ if (info->src == GIT_ITERATOR_TYPE_WORKDIR) {
+ error = git_buf_joinpath(
+ &info->data, git_repository_workdir(info->repo), file->path);
- /* TODO: apply wd-to-odb filters to file data if necessary */
+ /* if path is not a regular file, just skip this item */
+ if (!error && !git_path_isfile(info->data.ptr))
+ git_buf_free(&info->data);
+ } else if (git_blob_lookup(&info->blob, info->repo, &file->oid) < 0) {
+ /* if lookup fails, just skip this item in similarity calc */
+ giterr_clear();
+ } else {
+ if (!file->size)
+ file->size = git_blob_rawsize(info->blob);
+ assert(file->size == git_blob_rawsize(info->blob));
- if ((error = git_buf_joinpath(
- &path, git_repository_workdir(diff->repo), file->path)) < 0)
- return error;
+ info->data.size = (size_t)(git__is_sizet(file->size) ? file->size : -1);
+ info->data.ptr = (char *)git_blob_rawcontent(info->blob);
+ }
- /* if path is not a regular file, just skip this item */
- if (git_path_isfile(path.ptr))
- error = opts->metric->file_signature(
- &cache[file_idx], file, path.ptr, opts->metric->payload);
+ info->loaded = 1;
- git_buf_free(&path);
- } else { /* compute hashsig from blob buffer */
- git_blob *blob = NULL;
- git_off_t blobsize;
+ return error;
+}
- /* TODO: add max size threshold a la diff? */
+static void similarity_unload(similarity_info *info)
+{
+ if (info->blob)
+ git_blob_free(info->blob);
+ else
+ git_buf_free(&info->data);
- if (git_blob_lookup(&blob, diff->repo, &file->oid) < 0) {
- /* if lookup fails, just skip this item in similarity calc */
- giterr_clear();
- return 0;
- }
+ info->loaded = 0;
+}
- blobsize = git_blob_rawsize(blob);
- if (!file->size)
- file->size = blobsize;
- if (!git__is_sizet(blobsize)) /* ? what to do ? */
- blobsize = (size_t)-1;
+static int similarity_calc(
+ similarity_info *info,
+ const git_diff_find_options *opts,
+ void **cache)
+{
+ int error = 0;
- error = opts->metric->buffer_signature(
- &cache[file_idx], file, git_blob_rawcontent(blob),
- (size_t)blobsize, opts->metric->payload);
+ if (!info->loaded && (error = similarity_load(info)) < 0)
+ return error;
- git_blob_free(blob);
+ if (!info->data.size)
+ return 0;
+
+ if (info->src == GIT_ITERATOR_TYPE_WORKDIR) {
+ /* TODO: apply wd-to-odb filters to file data if necessary */
+
+ error = opts->metric->file_signature(
+ &cache[info->idx], info->file,
+ info->data.ptr, opts->metric->payload);
+ } else {
+ error = opts->metric->buffer_signature(
+ &cache[info->idx], info->file,
+ info->data.ptr, info->data.size, opts->metric->payload);
}
return error;
@@ -478,6 +514,8 @@ static int similarity_measure(
git_diff_file *a_file = similarity_get_file(diff, a_idx);
git_diff_file *b_file = similarity_get_file(diff, b_idx);
bool exact_match = FLAG_SET(opts, GIT_DIFF_FIND_EXACT_MATCH_ONLY);
+ int error = 0;
+ similarity_info a_info, b_info;
*score = -1;
@@ -512,26 +550,39 @@ static int similarity_measure(
return 0;
}
+ similarity_init(&a_info, diff, a_idx);
+ similarity_init(&b_info, diff, b_idx);
+
+ if (!a_file->size && (error = similarity_load(&a_info)) < 0)
+ goto done;
+ if (!b_file->size && (error = similarity_load(&b_info)) < 0)
+ goto done;
+
/* check if file sizes are nowhere near each other */
if (a_file->size > 127 &&
b_file->size > 127 &&
(a_file->size > (b_file->size << 4) ||
b_file->size > (a_file->size << 4)))
- return 0;
+ goto done;
/* update signature cache if needed */
- if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
- return -1;
- if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
- return -1;
+ if (!cache[a_idx] && (error = similarity_calc(&a_info, opts, cache)) < 0)
+ goto done;
+ if (!cache[b_idx] && (error = similarity_calc(&b_info, opts, cache)) < 0)
+ goto done;
- /* some metrics may not wish to process this file (too big / too small) */
- if (!cache[a_idx] || !cache[b_idx])
- return 0;
+ /* calculate similarity provided that the metric choose to process
+ * both the a and b files (some may not if file is too big, etc).
+ */
+ if (cache[a_idx] && cache[b_idx])
+ error = opts->metric->similarity(
+ score, cache[a_idx], cache[b_idx], opts->metric->payload);
- /* compare signatures */
- return opts->metric->similarity(
- score, cache[a_idx], cache[b_idx], opts->metric->payload);
+done:
+ similarity_unload(&a_info);
+ similarity_unload(&b_info);
+
+ return error;
}
static int calc_self_similarity(
diff --git a/tests-clar/diff/rename.c b/tests-clar/diff/rename.c
index ed58b7aa9..79c89e362 100644
--- a/tests-clar/diff/rename.c
+++ b/tests-clar/diff/rename.c
@@ -1126,7 +1126,7 @@ void test_diff_rename__unmodified_can_be_renamed(void)
void test_diff_rename__many_files(void)
{
git_index *index;
- git_tree *tree;
+ git_tree *tree, *new_tree;
git_diff_list *diff = NULL;
diff_expects exp;
git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
@@ -1176,6 +1176,52 @@ void test_diff_rename__many_files(void)
cl_assert_equal_i(51, exp.files);
git_diff_list_free(diff);
- git_index_free(index);
+
+ {
+ git_object *parent;
+ git_signature *sig;
+ git_oid tree_id, commit_id;
+ git_reference *ref;
+
+ cl_git_pass(git_index_write_tree(&tree_id, index));
+ cl_git_pass(git_tree_lookup(&new_tree, g_repo, &tree_id));
+
+ cl_git_pass(git_revparse_ext(&parent, &ref, g_repo, "HEAD"));
+ cl_git_pass(git_signature_new(
+ &sig, "Sm Test", "sm@tester.test", 1372350000, 480));
+
+ cl_git_pass(git_commit_create_v(
+ &commit_id, g_repo, git_reference_name(ref), sig, sig,
+ NULL, "yoyoyo", new_tree, 1, parent));
+
+ git_object_free(parent);
+ git_reference_free(ref);
+ git_signature_free(sig);
+ }
+
+ cl_git_pass(git_diff_tree_to_tree(
+ &diff, g_repo, tree, new_tree, &diffopts));
+
+ memset(&exp, 0, sizeof(exp));
+ cl_git_pass(git_diff_foreach(
+ diff, diff_file_cb, NULL, NULL, &exp));
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
+ cl_assert_equal_i(51, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(52, exp.files);
+
+ opts.flags = GIT_DIFF_FIND_ALL;
+ cl_git_pass(git_diff_find_similar(diff, &opts));
+
+ memset(&exp, 0, sizeof(exp));
+ cl_git_pass(git_diff_foreach(
+ diff, diff_file_cb, NULL, NULL, &exp));
+ cl_assert_equal_i(1, exp.file_status[GIT_DELTA_RENAMED]);
+ cl_assert_equal_i(50, exp.file_status[GIT_DELTA_ADDED]);
+ cl_assert_equal_i(51, exp.files);
+
+ git_diff_list_free(diff);
+
+ git_tree_free(new_tree);
git_tree_free(tree);
+ git_index_free(index);
}