diff options
author | Russell Belfer <rb@github.com> | 2012-10-23 16:40:51 -0700 |
---|---|---|
committer | Russell Belfer <rb@github.com> | 2012-10-23 16:40:51 -0700 |
commit | b4f5bb074721823cc016b66a9984abe2c271cb1f (patch) | |
tree | 946a7522e701442786cdec870a9035f4bde097a4 /src | |
parent | 5b67d145d8f465ed0c7ed9c07d331aae29c2713b (diff) | |
download | libgit2-b4f5bb074721823cc016b66a9984abe2c271cb1f.tar.gz |
Initial implementation of diff rename detection
This implements the basis for diff rename and copy detection,
although it is based on simple SHA comparison right now instead
of using a matching algortihm. Just as `git_diff_merge` can be
used as a post-pass on diffs to emulate certain command line
behaviors, there is a new API `git_diff_detect` which will
update a diff list in-place, adjusting some deltas to RENAMED
or COPIED state (and also, eventually, splitting MODIFIED deltas
where the change is too large into DELETED/ADDED pairs).
This also adds a new test repo that will hold rename/copy/split
scenarios. Right now, it just has exact-match rename and copy,
but the tests are written to use tree diffs, so we should be able
to add new test scenarios easily without breaking tests.
Diffstat (limited to 'src')
-rw-r--r-- | src/diff.c | 147 | ||||
-rw-r--r-- | src/diff.h | 3 |
2 files changed, 150 insertions, 0 deletions
diff --git a/src/diff.c b/src/diff.c index 9f693bebf..e2649ff3b 100644 --- a/src/diff.c +++ b/src/diff.c @@ -378,12 +378,23 @@ static git_diff_list *git_diff_list_alloc( diff->diffcaps = diff->diffcaps | GIT_DIFFCAPS_TRUST_CTIME; /* Don't set GIT_DIFFCAPS_USE_DEV - compile time option in core git */ + /* TODO: there are certain config settings where even if we were + * not given an options structure, we need the diff list to have one + * so that we can store the altered default values. + * + * - diff.ignoreSubmodules + * - diff.mnemonicprefix + * - diff.noprefix + */ + if (opts == NULL) return diff; memcpy(&diff->opts, opts, sizeof(git_diff_options)); memset(&diff->opts.pathspec, 0, sizeof(diff->opts.pathspec)); + /* TODO: handle config diff.mnemonicprefix, diff.noprefix */ + diff->opts.old_prefix = diff_strdup_prefix(&diff->pool, opts->old_prefix ? opts->old_prefix : DIFF_OLD_PREFIX_DEFAULT); diff->opts.new_prefix = diff_strdup_prefix(&diff->pool, @@ -1082,3 +1093,139 @@ int git_diff_merge( return error; } + +#define DEFAULT_THRESHOLD 50 +#define DEFAULT_TARGET_LIMIT 200 + +int git_diff_detect( + git_diff_list *diff, + git_diff_detect_options *opts) +{ + int error = 0; + unsigned int i, j; + git_diff_delta *from, *to; + bool check_unmodified = opts && + (opts->flags & GIT_DIFF_DETECT_COPIES_FROM_UNMODIFIED) != 0; + int max_targets = (opts && opts->target_limit > 0) ? + opts->target_limit : DEFAULT_TARGET_LIMIT; + unsigned int rename_threshold = (opts && opts->rename_threshold > 0) ? + opts->rename_threshold : DEFAULT_THRESHOLD; + unsigned int copy_threshold = (opts && opts->copy_threshold > 0) ? + opts->copy_threshold : DEFAULT_THRESHOLD; + int num_deletes = 0, num_splits = 0; + + /* TODO: update opts from config diff.renameLimit / diff.renames */ + + git_vector_foreach(&diff->deltas, i, from) { + int tried_targets = 0; + + git_vector_foreach(&diff->deltas, j, to) { + unsigned int similarity = 0; + + if (i == j) + continue; + + switch (to->status) { + case GIT_DELTA_ADDED: + case GIT_DELTA_UNTRACKED: + case GIT_DELTA_RENAMED: + case GIT_DELTA_COPIED: + break; + default: + /* only those status values should be checked */ + continue; + } + + /* don't check UNMODIFIED files as source unless given option */ + if (from->status == GIT_DELTA_UNMODIFIED && !check_unmodified) + continue; + + /* cap on maximum files we'll examine */ + if (++tried_targets > max_targets) + break; + + /* calculate similarity and see if this pair beats the + * similarity score of the current best pair. + */ + if (git_oid_cmp(&from->old_file.oid, &to->new_file.oid) == 0) + similarity = 100; + /* TODO: insert actual similarity algo here */ + + if (similarity <= to->similarity) + continue; + + if (from->status == GIT_DELTA_DELETED) { + if (similarity < rename_threshold) + continue; + + /* merge "from" & "to" to a RENAMED record */ + to->status = GIT_DELTA_RENAMED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + + from->status = GIT_DELTA__TO_DELETE; + num_deletes++; + } else { + if (similarity < copy_threshold) + continue; + + /* convert "to" to a COPIED record */ + to->status = GIT_DELTA_COPIED; + memcpy(&to->old_file, &from->old_file, sizeof(to->old_file)); + } + } + + if (from->status == GIT_DELTA_MODIFIED && + opts && (opts->flags & GIT_DIFF_DETECT_BREAK_REWRITES) != 0) + { + /* TODO: calculate similarity and maybe mark for split */ + + /* from->status = GIT_DELTA__TO_SPLIT; */ + /* num_splits++; */ + } + } + + if (num_deletes > 0 || num_splits > 0) { + git_vector onto = GIT_VECTOR_INIT; + size_t new_size = diff->deltas.length + num_splits - num_deletes; + + if (git_vector_init(&onto, new_size, diff_delta__cmp) < 0) + return -1; + + /* build new delta list without TO_DELETE and splitting TO_SPLIT */ + git_vector_foreach(&diff->deltas, i, from) { + if (from->status == GIT_DELTA__TO_DELETE) { + git__free(from); + continue; + } + + if (from->status == GIT_DELTA__TO_SPLIT) { + git_diff_delta *deleted = diff_delta__dup(from, &diff->pool); + if (!deleted) + return -1; + + deleted->status = GIT_DELTA_DELETED; + memset(&deleted->new_file, 0, sizeof(deleted->new_file)); + deleted->new_file.path = deleted->old_file.path; + deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + + git_vector_insert(&onto, deleted); + + from->status = GIT_DELTA_ADDED; + memset(&from->old_file, 0, sizeof(from->old_file)); + from->old_file.path = from->new_file.path; + from->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + } + + git_vector_insert(&onto, from); + } + + /* swap new delta list into place */ + + git_vector_sort(&onto); + git_vector_swap(&diff->deltas, &onto); + git_vector_free(&onto); + } + + return error; +} + diff --git a/src/diff.h b/src/diff.h index c6a26aee7..61723bc9e 100644 --- a/src/diff.h +++ b/src/diff.h @@ -28,6 +28,9 @@ enum { GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */ }; +#define GIT_DELTA__TO_DELETE 10 +#define GIT_DELTA__TO_SPLIT 11 + struct git_diff_list { git_refcount rc; git_repository *repo; |