summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/diff.h29
-rw-r--r--src/diff_file.c14
-rw-r--r--src/diff_tform.c116
3 files changed, 89 insertions, 70 deletions
diff --git a/src/diff.h b/src/diff.h
index d09a130bc..d1bec00c6 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -16,6 +16,7 @@
#include "iterator.h"
#include "repository.h"
#include "pool.h"
+#include "odb.h"
#define DIFF_OLD_PREFIX_DEFAULT "a/"
#define DIFF_NEW_PREFIX_DEFAULT "b/"
@@ -108,5 +109,33 @@ extern void git_diff_find_similar__hashsig_free(void *sig, void *payload);
extern int git_diff_find_similar__calc_similarity(
int *score, void *siga, void *sigb, void *payload);
+/*
+ * Sometimes a git_diff_file will have a zero size; this attempts to
+ * fill in the size without loading the blob if possible. If that is
+ * not possible, then it will return the git_odb_object that had to be
+ * loaded and the caller can use it or dispose of it as needed.
+ */
+GIT_INLINE(int) git_diff_file__resolve_zero_size(
+ git_diff_file *file, git_odb_object **odb_obj, git_repository *repo)
+{
+ int error;
+ git_odb *odb;
+ size_t len;
+ git_otype type;
+
+ if ((error = git_repository_odb(&odb, repo)) < 0)
+ return error;
+
+ error = git_odb__read_header_or_object(
+ odb_obj, &len, &type, odb, &file->oid);
+
+ git_odb_free(odb);
+
+ if (!error)
+ file->size = (git_off_t)len;
+
+ return error;
+}
+
#endif
diff --git a/src/diff_file.c b/src/diff_file.c
index 19bcf2d45..bcfef13cd 100644
--- a/src/diff_file.c
+++ b/src/diff_file.c
@@ -241,19 +241,9 @@ static int diff_file_content_load_blob(git_diff_file_content *fc)
/* if we don't know size, try to peek at object header first */
if (!fc->file->size) {
- git_odb *odb;
- size_t len;
- git_otype type;
-
- if (!(error = git_repository_odb__weakptr(&odb, fc->repo))) {
- error = git_odb__read_header_or_object(
- &odb_obj, &len, &type, odb, &fc->file->oid);
- git_odb_free(odb);
- }
- if (error)
+ if ((error = git_diff_file__resolve_zero_size(
+ fc->file, &odb_obj, fc->repo)) < 0)
return error;
-
- fc->file->size = len;
}
if (diff_file_content_binary_by_size(fc))
diff --git a/src/diff_tform.c b/src/diff_tform.c
index 08b0d5c38..8fd2a4fe9 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -414,59 +414,26 @@ typedef struct {
git_repository *repo;
git_diff_file *file;
git_buf data;
+ git_odb_object *odb_obj;
git_blob *blob;
- int loaded;
} similarity_info;
-static void similarity_init(
+static int similarity_init(
similarity_info *info, git_diff_list *diff, size_t file_idx)
{
info->idx = file_idx;
info->src = (file_idx & 1) ? diff->new_src : diff->old_src;
info->repo = diff->repo;
info->file = similarity_get_file(diff, file_idx);
+ info->odb_obj = NULL;
info->blob = NULL;
- info->loaded = 0;
git_buf_init(&info->data, 0);
-}
-
-static int similarity_load(similarity_info *info)
-{
- int error = 0;
- git_diff_file *file = info->file;
-
- if (info->src == GIT_ITERATOR_TYPE_WORKDIR) {
- error = git_buf_joinpath(
- &info->data, git_repository_workdir(info->repo), file->path);
-
- /* if path is not a regular file, just skip this item */
- if (!error && !git_path_isfile(info->data.ptr))
- git_buf_free(&info->data);
- } else if (git_blob_lookup(&info->blob, info->repo, &file->oid) < 0) {
- /* if lookup fails, just skip this item in similarity calc */
- giterr_clear();
- } else {
- if (!file->size)
- file->size = git_blob_rawsize(info->blob);
- assert(file->size == git_blob_rawsize(info->blob));
-
- info->data.size = (size_t)(git__is_sizet(file->size) ? file->size : -1);
- info->data.ptr = (char *)git_blob_rawcontent(info->blob);
- }
-
- info->loaded = 1;
- return error;
-}
-
-static void similarity_unload(similarity_info *info)
-{
- if (info->blob)
- git_blob_free(info->blob);
- else
- git_buf_free(&info->data);
+ if (info->file->size > 0)
+ return 0;
- info->loaded = 0;
+ return git_diff_file__resolve_zero_size(
+ info->file, &info->odb_obj, info->repo);
}
static int similarity_calc(
@@ -475,28 +442,59 @@ static int similarity_calc(
void **cache)
{
int error = 0;
+ git_diff_file *file = info->file;
- if (!info->loaded && (error = similarity_load(info)) < 0)
- return error;
+ if (info->src == GIT_ITERATOR_TYPE_WORKDIR) {
+ if ((error = git_buf_joinpath(
+ &info->data, git_repository_workdir(info->repo), file->path)) < 0)
+ return error;
- if (!info->data.size)
- return 0;
+ /* if path is not a regular file, just skip this item */
+ if (!git_path_isfile(info->data.ptr))
+ return 0;
- if (info->src == GIT_ITERATOR_TYPE_WORKDIR) {
/* TODO: apply wd-to-odb filters to file data if necessary */
error = opts->metric->file_signature(
&cache[info->idx], info->file,
info->data.ptr, opts->metric->payload);
} else {
- error = opts->metric->buffer_signature(
- &cache[info->idx], info->file,
- info->data.ptr, info->data.size, opts->metric->payload);
+ /* if we didn't initially know the size, we might have an odb_obj
+ * around from earlier, so convert that, otherwise load the blob now
+ */
+ if (info->odb_obj != NULL)
+ error = git_object__from_odb_object(
+ (git_object **)&info->blob, info->repo,
+ info->odb_obj, GIT_OBJ_BLOB);
+ else
+ error = git_blob_lookup(&info->blob, info->repo, &file->oid);
+
+ if (error < 0) {
+ /* if lookup fails, just skip this item in similarity calc */
+ giterr_clear();
+ } else {
+ size_t sz = (size_t)(git__is_sizet(file->size) ? file->size : -1);
+
+ error = opts->metric->buffer_signature(
+ &cache[info->idx], info->file,
+ git_blob_rawcontent(info->blob), sz, opts->metric->payload);
+ }
}
return error;
}
+static void similarity_unload(similarity_info *info)
+{
+ if (info->odb_obj)
+ git_odb_object_free(info->odb_obj);
+
+ if (info->blob)
+ git_blob_free(info->blob);
+ else
+ git_buf_free(&info->data);
+}
+
#define FLAG_SET(opts,flag_name) (((opts)->flags & flag_name) != 0)
/* - score < 0 means files cannot be compared
@@ -550,26 +548,28 @@ static int similarity_measure(
return 0;
}
- similarity_init(&a_info, diff, a_idx);
- similarity_init(&b_info, diff, b_idx);
+ memset(&a_info, 0, sizeof(a_info));
+ memset(&b_info, 0, sizeof(b_info));
- if (!a_file->size && (error = similarity_load(&a_info)) < 0)
- goto done;
- if (!b_file->size && (error = similarity_load(&b_info)) < 0)
- goto done;
+ /* set up similarity data (will try to update missing file sizes) */
+ if (!cache[a_idx] && (error = similarity_init(&a_info, diff, a_idx)) < 0)
+ return error;
+ if (!cache[b_idx] && (error = similarity_init(&b_info, diff, b_idx)) < 0)
+ goto cleanup;
/* check if file sizes are nowhere near each other */
if (a_file->size > 127 &&
b_file->size > 127 &&
(a_file->size > (b_file->size << 4) ||
b_file->size > (a_file->size << 4)))
- goto done;
+ goto cleanup;
/* update signature cache if needed */
if (!cache[a_idx] && (error = similarity_calc(&a_info, opts, cache)) < 0)
- goto done;
+ goto cleanup;
+
if (!cache[b_idx] && (error = similarity_calc(&b_info, opts, cache)) < 0)
- goto done;
+ goto cleanup;
/* calculate similarity provided that the metric choose to process
* both the a and b files (some may not if file is too big, etc).
@@ -578,7 +578,7 @@ static int similarity_measure(
error = opts->metric->similarity(
score, cache[a_idx], cache[b_idx], opts->metric->payload);
-done:
+cleanup:
similarity_unload(&a_info);
similarity_unload(&b_info);