summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/checkout.c11
-rw-r--r--src/clone.c11
-rw-r--r--src/common.h6
-rw-r--r--src/config.c6
-rw-r--r--src/diff.c8
-rw-r--r--src/diff.h10
-rw-r--r--src/diff_output.c207
-rw-r--r--src/diff_tform.c324
-rw-r--r--src/fileops.c35
-rw-r--r--src/fileops.h10
-rw-r--r--src/hash/hash_generic.c2
-rw-r--r--src/hashsig.c365
-rw-r--r--src/hashsig.h72
-rw-r--r--src/index.c43
-rw-r--r--src/index.h2
-rw-r--r--src/indexer.c371
-rw-r--r--src/pack-objects.c2
-rw-r--r--src/pack.c11
-rw-r--r--src/pack.h4
-rw-r--r--src/repository.c1
-rw-r--r--src/tree.c16
-rw-r--r--src/win32/git2.rc4
-rw-r--r--src/win32/msvc-compat.h9
-rw-r--r--src/win32/posix_w32.c6
24 files changed, 977 insertions, 559 deletions
diff --git a/src/checkout.c b/src/checkout.c
index 59cd218a9..19ac913d3 100644
--- a/src/checkout.c
+++ b/src/checkout.c
@@ -78,7 +78,7 @@ static int checkout_notify(
git_oid_cpy(&wdfile.oid, &wditem->oid);
wdfile.path = wditem->path;
wdfile.size = wditem->file_size;
- wdfile.flags = GIT_DIFF_FILE_VALID_OID;
+ wdfile.flags = GIT_DIFF_FLAG_VALID_OID;
wdfile.mode = wditem->mode;
workdir = &wdfile;
@@ -456,7 +456,7 @@ static int checkout_action(
while (1) {
if (!wd)
return checkout_action_no_wd(data, delta);
-
+
cmp = strcomp(wd->path, delta->old_file.path);
/* 1. wd before delta ("a/a" before "a/b")
@@ -475,6 +475,8 @@ static int checkout_action(
/* case 2 - entry prefixed by workdir tree */
if (git_iterator_advance_into_directory(workdir, &wd) < 0)
goto fail;
+
+ *wditem_ptr = wd;
continue;
}
@@ -608,7 +610,7 @@ static int checkout_get_actions(
if (act & CHECKOUT_ACTION__CONFLICT)
counts[CHECKOUT_ACTION__CONFLICT]++;
}
-
+
error = checkout_remaining_wd_items(data, workdir, wditem, &pathspec);
if (error < 0)
goto fail;
@@ -1141,6 +1143,9 @@ static int checkout_data_init(
if ((error = git_repository_index(&data->index, data->repo)) < 0 ||
(error = git_index_read(data->index)) < 0)
goto cleanup;
+
+ /* clear the REUC when doing a tree or commit checkout */
+ git_index_reuc_clear(data->index);
}
}
diff --git a/src/clone.c b/src/clone.c
index 409a77f92..0bbccd44b 100644
--- a/src/clone.c
+++ b/src/clone.c
@@ -429,6 +429,7 @@ int git_clone(
int retcode = GIT_ERROR;
git_repository *repo = NULL;
git_clone_options normOptions;
+ int remove_directory_on_failure = 0;
assert(out && url && local_path);
@@ -439,11 +440,19 @@ int git_clone(
return GIT_ERROR;
}
+ /* Only remove the directory on failure if we create it */
+ remove_directory_on_failure = !git_path_exists(local_path);
+
if (!(retcode = git_repository_init(&repo, local_path, normOptions.bare))) {
if ((retcode = setup_remotes_and_fetch(repo, url, &normOptions)) < 0) {
/* Failed to fetch; clean up */
git_repository_free(repo);
- git_futils_rmdir_r(local_path, NULL, GIT_RMDIR_REMOVE_FILES);
+
+ if (remove_directory_on_failure)
+ git_futils_rmdir_r(local_path, NULL, GIT_RMDIR_REMOVE_FILES);
+ else
+ git_futils_cleanupdir_r(local_path);
+
} else {
*out = repo;
retcode = 0;
diff --git a/src/common.h b/src/common.h
index ca203ee5c..e3a9e1984 100644
--- a/src/common.h
+++ b/src/common.h
@@ -33,14 +33,14 @@
# include "win32/pthread.h"
#endif
-# define snprintf _snprintf
-
#else
-# include <unistd.h>
+# include <unistd.h>
# ifdef GIT_THREADS
# include <pthread.h>
# endif
+#define GIT_STDLIB_CALL
+
#endif
#include "git2/types.h"
diff --git a/src/config.c b/src/config.c
index ce105089e..d6aa3078c 100644
--- a/src/config.c
+++ b/src/config.c
@@ -426,8 +426,6 @@ static int get_string(const char **out, const git_config *cfg, const char *name)
file_internal *internal;
unsigned int i;
- assert(cfg->files.length);
-
git_vector_foreach(&cfg->files, i, internal) {
int res = get_string_at_file(out, internal->file, name);
@@ -466,8 +464,6 @@ int git_config_get_entry(const git_config_entry **out, const git_config *cfg, co
file_internal *internal;
unsigned int i;
- assert(cfg->files.length);
-
*out = NULL;
git_vector_foreach(&cfg->files, i, internal) {
@@ -488,8 +484,6 @@ int git_config_get_multivar(const git_config *cfg, const char *name, const char
int ret = GIT_ENOTFOUND;
size_t i;
- assert(cfg->files.length);
-
/*
* This loop runs the "wrong" way 'round because we need to
* look at every value from the most general to most specific
diff --git a/src/diff.c b/src/diff.c
index d9bc32a37..0861b13eb 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -92,11 +92,11 @@ static int diff_delta__from_one(
git_oid_cpy(&delta->new_file.oid, &entry->oid);
}
- delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if (delta->status == GIT_DELTA_DELETED ||
!git_oid_iszero(&delta->new_file.oid))
- delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ delta->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
notify_res = diff_notify(diff, delta, matched_pathspec);
@@ -142,7 +142,7 @@ static int diff_delta__from_two(
git_oid_cpy(&delta->old_file.oid, &old_entry->oid);
delta->old_file.size = old_entry->file_size;
delta->old_file.mode = old_mode;
- delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
git_oid_cpy(&delta->new_file.oid, &new_entry->oid);
delta->new_file.size = new_entry->file_size;
@@ -156,7 +156,7 @@ static int diff_delta__from_two(
}
if (new_oid || !git_oid_iszero(&new_entry->oid))
- delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ delta->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
notify_res = diff_notify(diff, delta, matched_pathspec);
diff --git a/src/diff.h b/src/diff.h
index 16fbf71e6..8e3cbcd46 100644
--- a/src/diff.h
+++ b/src/diff.h
@@ -28,8 +28,14 @@ enum {
GIT_DIFFCAPS_USE_DEV = (1 << 4), /* use st_dev? */
};
-#define GIT_DELTA__TO_DELETE 10
-#define GIT_DELTA__TO_SPLIT 11
+enum {
+ GIT_DIFF_FLAG__FREE_PATH = (1 << 7), /* `path` is allocated memory */
+ GIT_DIFF_FLAG__FREE_DATA = (1 << 8), /* internal file data is allocated */
+ GIT_DIFF_FLAG__UNMAP_DATA = (1 << 9), /* internal file data is mmap'ed */
+ GIT_DIFF_FLAG__NO_DATA = (1 << 10), /* file data should not be loaded */
+ GIT_DIFF_FLAG__TO_DELETE = (1 << 11), /* delete entry during rename det. */
+ GIT_DIFF_FLAG__TO_SPLIT = (1 << 12), /* split entry during rename det. */
+};
struct git_diff_list {
git_refcount rc;
diff --git a/src/diff_output.c b/src/diff_output.c
index 88ccc9d45..209a6e017 100644
--- a/src/diff_output.c
+++ b/src/diff_output.c
@@ -52,8 +52,8 @@ static int parse_hunk_header(git_diff_range *range, const char *header)
return 0;
}
-#define KNOWN_BINARY_FLAGS (GIT_DIFF_FILE_BINARY|GIT_DIFF_FILE_NOT_BINARY)
-#define NOT_BINARY_FLAGS (GIT_DIFF_FILE_NOT_BINARY|GIT_DIFF_FILE_NO_DATA)
+#define KNOWN_BINARY_FLAGS (GIT_DIFF_FLAG_BINARY|GIT_DIFF_FLAG_NOT_BINARY)
+#define NOT_BINARY_FLAGS (GIT_DIFF_FLAG_NOT_BINARY|GIT_DIFF_FLAG__NO_DATA)
static int update_file_is_binary_by_attr(
git_repository *repo, git_diff_file *file)
@@ -68,9 +68,9 @@ static int update_file_is_binary_by_attr(
return -1;
if (GIT_ATTR_FALSE(value))
- file->flags |= GIT_DIFF_FILE_BINARY;
+ file->flags |= GIT_DIFF_FLAG_BINARY;
else if (GIT_ATTR_TRUE(value))
- file->flags |= GIT_DIFF_FILE_NOT_BINARY;
+ file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
/* otherwise leave file->flags alone */
return 0;
@@ -78,15 +78,15 @@ static int update_file_is_binary_by_attr(
static void update_delta_is_binary(git_diff_delta *delta)
{
- if ((delta->old_file.flags & GIT_DIFF_FILE_BINARY) != 0 ||
- (delta->new_file.flags & GIT_DIFF_FILE_BINARY) != 0)
- delta->binary = 1;
+ if ((delta->old_file.flags & GIT_DIFF_FLAG_BINARY) != 0 ||
+ (delta->new_file.flags & GIT_DIFF_FLAG_BINARY) != 0)
+ delta->flags |= GIT_DIFF_FLAG_BINARY;
else if ((delta->old_file.flags & NOT_BINARY_FLAGS) != 0 &&
(delta->new_file.flags & NOT_BINARY_FLAGS) != 0)
- delta->binary = 0;
+ delta->flags |= GIT_DIFF_FLAG_NOT_BINARY;
- /* otherwise leave delta->binary value untouched */
+ /* otherwise leave delta->flags binary value untouched */
}
/* returns if we forced binary setting (and no further checks needed) */
@@ -95,24 +95,24 @@ static bool diff_delta_is_binary_forced(
git_diff_delta *delta)
{
/* return true if binary-ness has already been settled */
- if (delta->binary != -1)
+ if ((delta->flags & KNOWN_BINARY_FLAGS) != 0)
return true;
/* make sure files are conceivably mmap-able */
if ((git_off_t)((size_t)delta->old_file.size) != delta->old_file.size ||
(git_off_t)((size_t)delta->new_file.size) != delta->new_file.size)
{
- delta->old_file.flags |= GIT_DIFF_FILE_BINARY;
- delta->new_file.flags |= GIT_DIFF_FILE_BINARY;
- delta->binary = 1;
+ delta->old_file.flags |= GIT_DIFF_FLAG_BINARY;
+ delta->new_file.flags |= GIT_DIFF_FLAG_BINARY;
+ delta->flags |= GIT_DIFF_FLAG_BINARY;
return true;
}
/* check if user is forcing us to text diff these files */
if (ctxt->opts && (ctxt->opts->flags & GIT_DIFF_FORCE_TEXT) != 0) {
- delta->old_file.flags |= GIT_DIFF_FILE_NOT_BINARY;
- delta->new_file.flags |= GIT_DIFF_FILE_NOT_BINARY;
- delta->binary = 0;
+ delta->old_file.flags |= GIT_DIFF_FLAG_NOT_BINARY;
+ delta->new_file.flags |= GIT_DIFF_FLAG_NOT_BINARY;
+ delta->flags |= GIT_DIFF_FLAG_NOT_BINARY;
return true;
}
@@ -125,8 +125,6 @@ static int diff_delta_is_binary_by_attr(
int error = 0, mirror_new;
git_diff_delta *delta = patch->delta;
- delta->binary = -1;
-
if (diff_delta_is_binary_forced(ctxt, delta))
return 0;
@@ -152,23 +150,21 @@ static int diff_delta_is_binary_by_content(
git_diff_file *file,
const git_map *map)
{
+ const git_buf search = { map->data, 0, min(map->len, 4000) };
+
if (diff_delta_is_binary_forced(ctxt, delta))
return 0;
- if ((file->flags & KNOWN_BINARY_FLAGS) == 0) {
- const git_buf search = { map->data, 0, min(map->len, 4000) };
-
- /* TODO: provide encoding / binary detection callbacks that can
- * be UTF-8 aware, etc. For now, instead of trying to be smart,
- * let's just use the simple NUL-byte detection that core git uses.
- */
+ /* TODO: provide encoding / binary detection callbacks that can
+ * be UTF-8 aware, etc. For now, instead of trying to be smart,
+ * let's just use the simple NUL-byte detection that core git uses.
+ */
- /* previously was: if (git_buf_text_is_binary(&search)) */
- if (git_buf_text_contains_nul(&search))
- file->flags |= GIT_DIFF_FILE_BINARY;
- else
- file->flags |= GIT_DIFF_FILE_NOT_BINARY;
- }
+ /* previously was: if (git_buf_text_is_binary(&search)) */
+ if (git_buf_text_contains_nul(&search))
+ file->flags |= GIT_DIFF_FLAG_BINARY;
+ else
+ file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
update_delta_is_binary(delta);
@@ -192,7 +188,7 @@ static int diff_delta_is_binary_by_size(
}
if (file->size > threshold)
- file->flags |= GIT_DIFF_FILE_BINARY;
+ file->flags |= GIT_DIFF_FLAG_BINARY;
update_delta_is_binary(delta);
@@ -247,7 +243,7 @@ static int get_blob_content(
map->data = git_buf_detach(&content);
map->len = strlen(map->data);
- file->flags |= GIT_DIFF_FILE_FREE_DATA;
+ file->flags |= GIT_DIFF_FLAG__FREE_DATA;
return 0;
}
@@ -270,7 +266,7 @@ static int get_blob_content(
/* if blob is too large to diff, mark as binary */
if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0)
return error;
- if (delta->binary == 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
return 0;
if (odb_obj != NULL) {
@@ -306,14 +302,14 @@ static int get_workdir_sm_content(
return error;
/* update OID if we didn't have it previously */
- if ((file->flags & GIT_DIFF_FILE_VALID_OID) == 0) {
+ if ((file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) {
const git_oid* sm_head;
if ((sm_head = git_submodule_wd_id(sm)) != NULL ||
(sm_head = git_submodule_head_id(sm)) != NULL)
{
git_oid_cpy(&file->oid, sm_head);
- file->flags |= GIT_DIFF_FILE_VALID_OID;
+ file->flags |= GIT_DIFF_FLAG_VALID_OID;
}
}
@@ -329,7 +325,7 @@ static int get_workdir_sm_content(
map->data = git_buf_detach(&content);
map->len = strlen(map->data);
- file->flags |= GIT_DIFF_FILE_FREE_DATA;
+ file->flags |= GIT_DIFF_FLAG__FREE_DATA;
return 0;
}
@@ -356,8 +352,8 @@ static int get_workdir_content(
if (S_ISLNK(file->mode)) {
ssize_t alloc_len, read_len;
- file->flags |= GIT_DIFF_FILE_FREE_DATA;
- file->flags |= GIT_DIFF_FILE_BINARY;
+ file->flags |= GIT_DIFF_FLAG__FREE_DATA;
+ file->flags |= GIT_DIFF_FLAG_BINARY;
/* link path on disk could be UTF-16, so prepare a buffer that is
* big enough to handle some UTF-8 data expansion
@@ -389,7 +385,7 @@ static int get_workdir_content(
file->size = git_futils_filesize(fd);
if ((error = diff_delta_is_binary_by_size(ctxt, delta, file)) < 0 ||
- delta->binary == 1)
+ (delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
goto close_and_cleanup;
error = git_filters_load(
@@ -402,7 +398,7 @@ static int get_workdir_content(
goto close_and_cleanup;
error = git_futils_mmap_ro(map, fd, 0, (size_t)file->size);
- file->flags |= GIT_DIFF_FILE_UNMAP_DATA;
+ file->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
} else {
git_buf raw = GIT_BUF_INIT, filtered = GIT_BUF_INIT;
@@ -412,7 +408,7 @@ static int get_workdir_content(
map->len = git_buf_len(&filtered);
map->data = git_buf_detach(&filtered);
- file->flags |= GIT_DIFF_FILE_FREE_DATA;
+ file->flags |= GIT_DIFF_FLAG__FREE_DATA;
}
git_buf_free(&raw);
@@ -425,11 +421,11 @@ close_and_cleanup:
}
/* once data is loaded, update OID if we didn't have it previously */
- if (!error && (file->flags & GIT_DIFF_FILE_VALID_OID) == 0) {
+ if (!error && (file->flags & GIT_DIFF_FLAG_VALID_OID) == 0) {
error = git_odb_hash(
&file->oid, map->data, map->len, GIT_OBJ_BLOB);
if (!error)
- file->flags |= GIT_DIFF_FILE_VALID_OID;
+ file->flags |= GIT_DIFF_FLAG_VALID_OID;
}
if (!error)
@@ -445,22 +441,22 @@ static void release_content(git_diff_file *file, git_map *map, git_blob *blob)
if (blob != NULL)
git_blob_free(blob);
- if (file->flags & GIT_DIFF_FILE_FREE_DATA) {
+ if (file->flags & GIT_DIFF_FLAG__FREE_DATA) {
git__free(map->data);
map->data = "";
map->len = 0;
- file->flags &= ~GIT_DIFF_FILE_FREE_DATA;
+ file->flags &= ~GIT_DIFF_FLAG__FREE_DATA;
}
- else if (file->flags & GIT_DIFF_FILE_UNMAP_DATA) {
+ else if (file->flags & GIT_DIFF_FLAG__UNMAP_DATA) {
git_futils_mmap_free(map);
map->data = "";
map->len = 0;
- file->flags &= ~GIT_DIFF_FILE_UNMAP_DATA;
+ file->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
}
}
-static void diff_context_init(
+static int diff_context_init(
diff_context *ctxt,
git_diff_list *diff,
git_repository *repo,
@@ -472,6 +468,12 @@ static void diff_context_init(
{
memset(ctxt, 0, sizeof(diff_context));
+ if (!repo && diff)
+ repo = diff->repo;
+
+ if (!opts && diff)
+ opts = &diff->opts;
+
ctxt->repo = repo;
ctxt->diff = diff;
ctxt->opts = opts;
@@ -482,6 +484,8 @@ static void diff_context_init(
ctxt->error = 0;
setup_xdiff_options(ctxt->opts, &ctxt->xdiff_config, &ctxt->xdiff_params);
+
+ return 0;
}
static int diff_delta_file_callback(
@@ -555,7 +559,7 @@ static int diff_patch_load(
patch->new_data.len = 0;
patch->new_blob = NULL;
- if (delta->binary == 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
goto cleanup;
if (!ctxt->hunk_cb &&
@@ -565,25 +569,25 @@ static int diff_patch_load(
switch (delta->status) {
case GIT_DELTA_ADDED:
- delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA;
+ delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA;
break;
case GIT_DELTA_DELETED:
- delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA;
+ delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA;
break;
case GIT_DELTA_MODIFIED:
break;
case GIT_DELTA_UNTRACKED:
- delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA;
+ delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA;
if ((ctxt->opts->flags & GIT_DIFF_INCLUDE_UNTRACKED_CONTENT) == 0)
- delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA;
+ delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA;
break;
default:
- delta->new_file.flags |= GIT_DIFF_FILE_NO_DATA;
- delta->old_file.flags |= GIT_DIFF_FILE_NO_DATA;
+ delta->new_file.flags |= GIT_DIFF_FLAG__NO_DATA;
+ delta->old_file.flags |= GIT_DIFF_FLAG__NO_DATA;
break;
}
-#define CHECK_UNMODIFIED (GIT_DIFF_FILE_NO_DATA | GIT_DIFF_FILE_VALID_OID)
+#define CHECK_UNMODIFIED (GIT_DIFF_FLAG__NO_DATA | GIT_DIFF_FLAG_VALID_OID)
check_if_unmodified =
(delta->old_file.flags & CHECK_UNMODIFIED) == 0 &&
@@ -594,41 +598,41 @@ static int diff_patch_load(
* memory footprint during diff.
*/
- if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 &&
+ if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 &&
patch->old_src == GIT_ITERATOR_TYPE_WORKDIR) {
if ((error = get_workdir_content(
ctxt, delta, &delta->old_file, &patch->old_data)) < 0)
goto cleanup;
- if (delta->binary == 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
goto cleanup;
}
- if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 &&
+ if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 &&
patch->new_src == GIT_ITERATOR_TYPE_WORKDIR) {
if ((error = get_workdir_content(
ctxt, delta, &delta->new_file, &patch->new_data)) < 0)
goto cleanup;
- if (delta->binary == 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
goto cleanup;
}
- if ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 &&
+ if ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 &&
patch->old_src != GIT_ITERATOR_TYPE_WORKDIR) {
if ((error = get_blob_content(
ctxt, delta, &delta->old_file,
&patch->old_data, &patch->old_blob)) < 0)
goto cleanup;
- if (delta->binary == 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
goto cleanup;
}
- if ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0 &&
+ if ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0 &&
patch->new_src != GIT_ITERATOR_TYPE_WORKDIR) {
if ((error = get_blob_content(
ctxt, delta, &delta->new_file,
&patch->new_data, &patch->new_blob)) < 0)
goto cleanup;
- if (delta->binary == 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) != 0)
goto cleanup;
}
@@ -646,13 +650,13 @@ static int diff_patch_load(
}
cleanup:
- if (delta->binary == -1)
+ if ((delta->flags & KNOWN_BINARY_FLAGS) == 0)
update_delta_is_binary(delta);
if (!error) {
patch->flags |= GIT_DIFF_PATCH_LOADED;
- if (delta->binary != 1 &&
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 &&
delta->status != GIT_DELTA_UNMODIFIED &&
(patch->old_data.len || patch->new_data.len) &&
!git_oid_equal(&delta->old_file.oid, &delta->new_file.oid))
@@ -926,6 +930,15 @@ static int diff_patch_line_cb(
return 0;
}
+static int diff_required(git_diff_list *diff, const char *action)
+{
+ if (!diff) {
+ giterr_set(GITERR_INVALID, "Must provide valid diff to %s", action);
+ return -1;
+ }
+
+ return 0;
+}
int git_diff_foreach(
git_diff_list *diff,
@@ -939,9 +952,12 @@ int git_diff_foreach(
size_t idx;
git_diff_patch patch;
- diff_context_init(
- &ctxt, diff, diff->repo, &diff->opts,
- file_cb, hunk_cb, data_cb, payload);
+ if (diff_required(diff, "git_diff_foreach") < 0)
+ return -1;
+
+ if (diff_context_init(
+ &ctxt, diff, NULL, NULL, file_cb, hunk_cb, data_cb, payload) < 0)
+ return -1;
diff_patch_init(&ctxt, &patch);
@@ -1138,7 +1154,7 @@ static int print_patch_file(
newpath = "/dev/null";
}
- if (delta->binary != 1) {
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0) {
git_buf_printf(pi->buf, "--- %s%s\n", oldpfx, oldpath);
git_buf_printf(pi->buf, "+++ %s%s\n", newpfx, newpath);
}
@@ -1153,7 +1169,7 @@ static int print_patch_file(
return GIT_EUSER;
}
- if (delta->binary != 1)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0)
return 0;
git_buf_clear(pi->buf);
@@ -1268,7 +1284,7 @@ static void set_data_from_blob(
map->data = (char *)git_blob_rawcontent(blob);
} else {
file->size = 0;
- file->flags |= GIT_DIFF_FILE_NO_DATA;
+ file->flags |= GIT_DIFF_FLAG__NO_DATA;
map->len = 0;
map->data = "";
@@ -1283,7 +1299,7 @@ static void set_data_from_buffer(
map->len = buffer_len;
if (!buffer) {
- file->flags |= GIT_DIFF_FILE_NO_DATA;
+ file->flags |= GIT_DIFF_FLAG__NO_DATA;
map->data = NULL;
} else {
map->data = (char *)buffer;
@@ -1310,8 +1326,10 @@ static int diff_single_init(
memset(data, 0, sizeof(*data));
- diff_context_init(
- &data->ctxt, NULL, repo, opts, file_cb, hunk_cb, data_cb, payload);
+ if (diff_context_init(
+ &data->ctxt, NULL, repo, opts,
+ file_cb, hunk_cb, data_cb, payload) < 0)
+ return -1;
diff_patch_init(&data->ctxt, &data->patch);
@@ -1322,13 +1340,13 @@ static int diff_single_apply(diff_single_data *data)
{
int error;
git_diff_delta *delta = &data->delta;
- bool has_old = ((delta->old_file.flags & GIT_DIFF_FILE_NO_DATA) == 0);
- bool has_new = ((delta->new_file.flags & GIT_DIFF_FILE_NO_DATA) == 0);
+ bool has_old = ((delta->old_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0);
+ bool has_new = ((delta->new_file.flags & GIT_DIFF_FLAG__NO_DATA) == 0);
/* finish setting up fake git_diff_delta record and loaded data */
data->patch.delta = delta;
- delta->binary = -1;
+ delta->flags = delta->flags & ~KNOWN_BINARY_FLAGS;
delta->status = has_new ?
(has_old ? GIT_DELTA_MODIFIED : GIT_DELTA_ADDED) :
@@ -1345,7 +1363,8 @@ static int diff_single_apply(diff_single_data *data)
data->patch.flags |= GIT_DIFF_PATCH_LOADED;
- if (delta->binary != 1 && delta->status != GIT_DELTA_UNMODIFIED)
+ if ((delta->flags & GIT_DIFF_FLAG_BINARY) == 0 &&
+ delta->status != GIT_DELTA_UNMODIFIED)
data->patch.flags |= GIT_DIFF_PATCH_DIFFABLE;
/* do diffs */
@@ -1377,6 +1396,9 @@ int git_diff_blobs(
new_blob ? git_object_owner((const git_object *)new_blob) :
old_blob ? git_object_owner((const git_object *)old_blob) : NULL;
+ if (!repo) /* Hmm, given two NULL blobs, silently do no callbacks? */
+ return 0;
+
if ((error = diff_single_init(
&d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0)
return error;
@@ -1408,6 +1430,9 @@ int git_diff_blob_to_buffer(
git_repository *repo =
old_blob ? git_object_owner((const git_object *)old_blob) : NULL;
+ if (!repo && !buf) /* Hmm, given NULLs, silently do no callbacks? */
+ return 0;
+
if ((error = diff_single_init(
&d, repo, options, file_cb, hunk_cb, data_cb, payload)) < 0)
return error;
@@ -1456,11 +1481,19 @@ int git_diff_get_patch(
if (patch_ptr)
*patch_ptr = NULL;
+ if (delta_ptr)
+ *delta_ptr = NULL;
+
+ if (diff_required(diff, "git_diff_get_patch") < 0)
+ return -1;
+
+ if (diff_context_init(
+ &ctxt, diff, NULL, NULL,
+ NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL) < 0)
+ return -1;
delta = git_vector_get(&diff->deltas, idx);
if (!delta) {
- if (delta_ptr)
- *delta_ptr = NULL;
giterr_set(GITERR_INVALID, "Index out of range for delta in diff");
return GIT_ENOTFOUND;
}
@@ -1469,17 +1502,17 @@ int git_diff_get_patch(
*delta_ptr = delta;
if (!patch_ptr &&
- (delta->binary != -1 ||
+ ((delta->flags & KNOWN_BINARY_FLAGS) != 0 ||
(diff->opts.flags & GIT_DIFF_SKIP_BINARY_CHECK) != 0))
return 0;
- diff_context_init(
- &ctxt, diff, diff->repo, &diff->opts,
- NULL, diff_patch_hunk_cb, diff_patch_line_cb, NULL);
-
if (git_diff_delta__should_skip(ctxt.opts, delta))
return 0;
+ /* Don't load the patch if the user doesn't want it */
+ if (!patch_ptr)
+ return 0;
+
patch = diff_patch_alloc(&ctxt, delta);
if (!patch)
return -1;
diff --git a/src/diff_tform.c b/src/diff_tform.c
index 2c2e1fb19..958d2bfec 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -7,6 +7,8 @@
#include "common.h"
#include "diff.h"
#include "git2/config.h"
+#include "git2/blob.h"
+#include "hashsig.h"
static git_diff_delta *diff_delta__dup(
const git_diff_delta *d, git_pool *pool)
@@ -168,6 +170,36 @@ int git_diff_merge(
return error;
}
+static int find_similar__hashsig_for_file(
+ void **out, const git_diff_file *f, const char *path, void *p)
+{
+ git_hashsig_option_t opt = (git_hashsig_option_t)p;
+ GIT_UNUSED(f);
+ return git_hashsig_create_fromfile((git_hashsig **)out, path, opt);
+}
+
+static int find_similar__hashsig_for_buf(
+ void **out, const git_diff_file *f, const char *buf, size_t len, void *p)
+{
+ git_hashsig_option_t opt = (git_hashsig_option_t)p;
+ GIT_UNUSED(f);
+ return git_hashsig_create((git_hashsig **)out, buf, len, opt);
+}
+
+static void find_similar__hashsig_free(void *sig, void *payload)
+{
+ GIT_UNUSED(payload);
+ git_hashsig_free(sig);
+}
+
+static int find_similar__calc_similarity(
+ int *score, void *siga, void *sigb, void *payload)
+{
+ GIT_UNUSED(payload);
+ *score = git_hashsig_compare(siga, sigb);
+ return 0;
+}
+
#define DEFAULT_THRESHOLD 50
#define DEFAULT_BREAK_REWRITE_THRESHOLD 60
#define DEFAULT_TARGET_LIMIT 200
@@ -178,7 +210,6 @@ static int normalize_find_opts(
git_diff_find_options *given)
{
git_config *cfg = NULL;
- const char *val;
if (diff->repo != NULL &&
git_repository_config__weakptr(&cfg, diff->repo) < 0)
@@ -187,8 +218,9 @@ static int normalize_find_opts(
if (given != NULL)
memcpy(opts, given, sizeof(*opts));
else {
- git_diff_find_options init = GIT_DIFF_FIND_OPTIONS_INIT;
- memmove(opts, &init, sizeof(init));
+ const char *val = NULL;
+
+ GIT_INIT_STRUCTURE(opts, GIT_DIFF_FIND_OPTIONS_VERSION);
opts->flags = GIT_DIFF_FIND_RENAMES;
@@ -236,6 +268,24 @@ static int normalize_find_opts(
opts->target_limit = limit;
}
+ /* assign the internal metric with whitespace flag as payload */
+ if (!opts->metric) {
+ opts->metric = git__malloc(sizeof(git_diff_similarity_metric));
+ GITERR_CHECK_ALLOC(opts->metric);
+
+ opts->metric->file_signature = find_similar__hashsig_for_file;
+ opts->metric->buffer_signature = find_similar__hashsig_for_buf;
+ opts->metric->free_signature = find_similar__hashsig_free;
+ opts->metric->similarity = find_similar__calc_similarity;
+
+ if (opts->flags & GIT_DIFF_FIND_IGNORE_WHITESPACE)
+ opts->metric->payload = (void *)GIT_HASHSIG_IGNORE_WHITESPACE;
+ else if (opts->flags & GIT_DIFF_FIND_DONT_IGNORE_WHITESPACE)
+ opts->metric->payload = (void *)GIT_HASHSIG_NORMAL;
+ else
+ opts->metric->payload = (void *)GIT_HASHSIG_SMART_WHITESPACE;
+ }
+
return 0;
}
@@ -250,10 +300,10 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
/* build new delta list without TO_DELETE and splitting TO_SPLIT */
git_vector_foreach(&diff->deltas, i, delta) {
- if (delta->status == GIT_DELTA__TO_DELETE)
+ if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
continue;
- if (delta->status == GIT_DELTA__TO_SPLIT) {
+ if ((delta->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0) {
git_diff_delta *deleted = diff_delta__dup(delta, &diff->pool);
if (!deleted)
goto on_error;
@@ -261,7 +311,7 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
deleted->status = GIT_DELTA_DELETED;
memset(&deleted->new_file, 0, sizeof(deleted->new_file));
deleted->new_file.path = deleted->old_file.path;
- deleted->new_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ deleted->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
if (git_vector_insert(&onto, deleted) < 0)
goto on_error;
@@ -269,7 +319,7 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
delta->status = GIT_DELTA_ADDED;
memset(&delta->old_file, 0, sizeof(delta->old_file));
delta->old_file.path = delta->new_file.path;
- delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ delta->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
}
if (git_vector_insert(&onto, delta) < 0)
@@ -278,7 +328,7 @@ static int apply_splits_and_deletes(git_diff_list *diff, size_t expected_size)
/* cannot return an error past this point */
git_vector_foreach(&diff->deltas, i, delta)
- if (delta->status == GIT_DELTA__TO_DELETE)
+ if ((delta->flags & GIT_DIFF_FLAG__TO_DELETE) != 0)
git__free(delta);
/* swap new delta list into place */
@@ -297,17 +347,86 @@ on_error:
return -1;
}
-static unsigned int calc_similarity(
- void *cache, git_diff_file *old_file, git_diff_file *new_file)
+GIT_INLINE(git_diff_file *) similarity_get_file(git_diff_list *diff, size_t idx)
+{
+ git_diff_delta *delta = git_vector_get(&diff->deltas, idx / 2);
+ return (idx & 1) ? &delta->new_file : &delta->old_file;
+}
+
+static int similarity_calc(
+ git_diff_list *diff,
+ git_diff_find_options *opts,
+ size_t file_idx,
+ void **cache)
{
- GIT_UNUSED(cache);
+ int error = 0;
+ git_diff_file *file = similarity_get_file(diff, file_idx);
+ git_iterator_type_t src = (file_idx & 1) ? diff->old_src : diff->new_src;
+
+ if (src == GIT_ITERATOR_TYPE_WORKDIR) { /* compute hashsig from file */
+ git_buf path = GIT_BUF_INIT;
+
+ /* TODO: apply wd-to-odb filters to file data if necessary */
- if (git_oid_cmp(&old_file->oid, &new_file->oid) == 0)
+ if (!(error = git_buf_joinpath(
+ &path, git_repository_workdir(diff->repo), file->path)))
+ error = opts->metric->file_signature(
+ &cache[file_idx], file, path.ptr, opts->metric->payload);
+
+ git_buf_free(&path);
+ } else { /* compute hashsig from blob buffer */
+ git_blob *blob = NULL;
+
+ /* TODO: add max size threshold a la diff? */
+
+ if ((error = git_blob_lookup(&blob, diff->repo, &file->oid)) < 0)
+ return error;
+
+ error = opts->metric->buffer_signature(
+ &cache[file_idx], file, git_blob_rawcontent(blob),
+ git_blob_rawsize(blob), opts->metric->payload);
+
+ git_blob_free(blob);
+ }
+
+ return error;
+}
+
+static int similarity_measure(
+ git_diff_list *diff,
+ git_diff_find_options *opts,
+ void **cache,
+ size_t a_idx,
+ size_t b_idx)
+{
+ int score = 0;
+ git_diff_file *a_file = similarity_get_file(diff, a_idx);
+ git_diff_file *b_file = similarity_get_file(diff, b_idx);
+
+ if (GIT_MODE_TYPE(a_file->mode) != GIT_MODE_TYPE(b_file->mode))
+ return 0;
+
+ if (git_oid_cmp(&a_file->oid, &b_file->oid) == 0)
return 100;
- /* TODO: insert actual similarity algo here */
+ /* update signature cache if needed */
+ if (!cache[a_idx] && similarity_calc(diff, opts, a_idx, cache) < 0)
+ return -1;
+ if (!cache[b_idx] && similarity_calc(diff, opts, b_idx, cache) < 0)
+ return -1;
- return 0;
+ /* compare signatures */
+ if (opts->metric->similarity(
+ &score, cache[a_idx], cache[b_idx], opts->metric->payload) < 0)
+ return -1;
+
+ /* clip score */
+ if (score < 0)
+ score = 0;
+ else if (score > 100)
+ score = 100;
+
+ return score;
}
#define FLAG_SET(opts,flag_name) ((opts.flags & flag_name) != 0)
@@ -316,109 +435,135 @@ int git_diff_find_similar(
git_diff_list *diff,
git_diff_find_options *given_opts)
{
- unsigned int i, j, similarity;
+ size_t i, j, cache_size, *matches;
+ int error = 0, similarity;
git_diff_delta *from, *to;
git_diff_find_options opts;
- unsigned int tried_targets, num_changes = 0;
- git_vector matches = GIT_VECTOR_INIT;
+ size_t tried_targets, num_rewrites = 0;
+ void **cache;
- if (normalize_find_opts(diff, &opts, given_opts) < 0)
- return -1;
+ if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
+ return error;
- /* first do splits if requested */
+ /* TODO: maybe abort if deltas.length > target_limit ??? */
+
+ cache_size = diff->deltas.length * 2; /* must store b/c length may change */
+ cache = git__calloc(cache_size, sizeof(void *));
+ GITERR_CHECK_ALLOC(cache);
+
+ matches = git__calloc(diff->deltas.length, sizeof(size_t));
+ GITERR_CHECK_ALLOC(matches);
+
+ /* first break MODIFIED records that are too different (if requested) */
if (FLAG_SET(opts, GIT_DIFF_FIND_AND_BREAK_REWRITES)) {
git_vector_foreach(&diff->deltas, i, from) {
if (from->status != GIT_DELTA_MODIFIED)
continue;
- /* Right now, this doesn't work right because the similarity
- * algorithm isn't actually implemented...
- */
- similarity = 100;
- /* calc_similarity(NULL, &from->old_file, from->new_file); */
+ similarity = similarity_measure(
+ diff, &opts, cache, 2 * i, 2 * i + 1);
- if (similarity < opts.break_rewrite_threshold) {
- from->status = GIT_DELTA__TO_SPLIT;
- num_changes++;
+ if (similarity < 0) {
+ error = similarity;
+ goto cleanup;
}
- }
- /* apply splits as needed */
- if (num_changes > 0 &&
- apply_splits_and_deletes(
- diff, diff->deltas.length + num_changes) < 0)
- return -1;
+ if ((unsigned int)similarity < opts.break_rewrite_threshold) {
+ from->flags |= GIT_DIFF_FLAG__TO_SPLIT;
+ num_rewrites++;
+ }
+ }
}
/* next find the most similar delta for each rename / copy candidate */
- if (git_vector_init(&matches, diff->deltas.length, git_diff_delta__cmp) < 0)
- return -1;
-
git_vector_foreach(&diff->deltas, i, from) {
tried_targets = 0;
+ /* skip things that aren't blobs */
+ if (GIT_MODE_TYPE(from->old_file.mode) !=
+ GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
+ continue;
+
+ /* don't check UNMODIFIED files as source unless given option */
+ if (from->status == GIT_DELTA_UNMODIFIED &&
+ !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
+ continue;
+
+ /* skip all but DELETED files unless copy detection is on */
+ if (!FLAG_SET(opts, GIT_DIFF_FIND_COPIES) &&
+ from->status != GIT_DELTA_DELETED &&
+ (from->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
+ continue;
+
git_vector_foreach(&diff->deltas, j, to) {
if (i == j)
continue;
+ /* skip things that aren't blobs */
+ if (GIT_MODE_TYPE(to->new_file.mode) !=
+ GIT_MODE_TYPE(GIT_FILEMODE_BLOB))
+ continue;
+
switch (to->status) {
case GIT_DELTA_ADDED:
case GIT_DELTA_UNTRACKED:
case GIT_DELTA_RENAMED:
case GIT_DELTA_COPIED:
break;
+ case GIT_DELTA_MODIFIED:
+ if ((to->flags & GIT_DIFF_FLAG__TO_SPLIT) == 0)
+ continue;
+ break;
default:
/* only the above status values should be checked */
continue;
}
- /* skip all but DELETED files unless copy detection is on */
- if (from->status != GIT_DELTA_DELETED &&
- !FLAG_SET(opts, GIT_DIFF_FIND_COPIES))
- continue;
-
- /* don't check UNMODIFIED files as source unless given option */
- if (from->status == GIT_DELTA_UNMODIFIED &&
- !FLAG_SET(opts, GIT_DIFF_FIND_COPIES_FROM_UNMODIFIED))
- continue;
-
- /* cap on maximum files we'll examine */
+ /* cap on maximum files we'll examine (per "from" file) */
if (++tried_targets > opts.target_limit)
break;
/* calculate similarity and see if this pair beats the
* similarity score of the current best pair.
*/
- similarity = calc_similarity(NULL, &from->old_file, &to->new_file);
+ similarity = similarity_measure(
+ diff, &opts, cache, 2 * i, 2 * j + 1);
+
+ if (similarity < 0) {
+ error = similarity;
+ goto cleanup;
+ }
- if (to->similarity < similarity) {
- to->similarity = similarity;
- if (git_vector_set(NULL, &matches, j, from) < 0)
- return -1;
+ if (to->similarity < (unsigned int)similarity) {
+ to->similarity = (unsigned int)similarity;
+ matches[j] = i + 1;
}
}
}
/* next rewrite the diffs with renames / copies */
- num_changes = 0;
-
git_vector_foreach(&diff->deltas, j, to) {
- from = GIT_VECTOR_GET(&matches, j);
- if (!from) {
+ if (!matches[j]) {
assert(to->similarity == 0);
continue;
}
- /* three possible outcomes here:
+ i = matches[j] - 1;
+ from = GIT_VECTOR_GET(&diff->deltas, i);
+ assert(from);
+
+ /* four possible outcomes here:
* 1. old DELETED and if over rename threshold,
* new becomes RENAMED and old goes away
- * 2. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
+ * 2. old SPLIT and if over rename threshold,
+ * new becomes RENAMED and old becomes ADDED (clear SPLIT)
+ * 3. old was MODIFIED but FIND_RENAMES_FROM_REWRITES is on and
* old is more similar to new than it is to itself, in which
* case, new becomes RENAMED and old becomed ADDED
- * 3. otherwise if over copy threshold, new becomes COPIED
+ * 4. otherwise if over copy threshold, new becomes COPIED
*/
if (from->status == GIT_DELTA_DELETED) {
@@ -430,8 +575,27 @@ int git_diff_find_similar(
to->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
- from->status = GIT_DELTA__TO_DELETE;
- num_changes++;
+ from->flags |= GIT_DIFF_FLAG__TO_DELETE;
+ num_rewrites++;
+
+ continue;
+ }
+
+ if (from->status == GIT_DELTA_MODIFIED &&
+ (from->flags & GIT_DIFF_FLAG__TO_SPLIT) != 0)
+ {
+ if (to->similarity < opts.rename_threshold) {
+ to->similarity = 0;
+ continue;
+ }
+
+ to->status = GIT_DELTA_RENAMED;
+ memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
+
+ from->status = GIT_DELTA_ADDED;
+ from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ memset(&from->old_file, 0, sizeof(from->old_file));
+ num_rewrites--;
continue;
}
@@ -440,17 +604,22 @@ int git_diff_find_similar(
FLAG_SET(opts, GIT_DIFF_FIND_RENAMES_FROM_REWRITES) &&
to->similarity > opts.rename_threshold)
{
- similarity = 100;
- /* calc_similarity(NULL, &from->old_file, from->new_file); */
+ similarity = similarity_measure(
+ diff, &opts, cache, 2 * i, 2 * i + 1);
+
+ if (similarity < 0) {
+ error = similarity;
+ goto cleanup;
+ }
- if (similarity < opts.rename_from_rewrite_threshold) {
+ if ((unsigned int)similarity < opts.rename_from_rewrite_threshold) {
to->status = GIT_DELTA_RENAMED;
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
from->status = GIT_DELTA_ADDED;
memset(&from->old_file, 0, sizeof(from->old_file));
from->old_file.path = to->old_file.path;
- from->old_file.flags |= GIT_DIFF_FILE_VALID_OID;
+ from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
continue;
}
@@ -466,17 +635,26 @@ int git_diff_find_similar(
memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
}
- git_vector_free(&matches);
+ if (num_rewrites > 0) {
+ assert(num_rewrites < diff->deltas.length);
- if (num_changes > 0) {
- assert(num_changes < diff->deltas.length);
+ error = apply_splits_and_deletes(
+ diff, diff->deltas.length - num_rewrites);
+ }
+
+cleanup:
+ git__free(matches);
- if (apply_splits_and_deletes(
- diff, diff->deltas.length - num_changes) < 0)
- return -1;
+ for (i = 0; i < cache_size; ++i) {
+ if (cache[i] != NULL)
+ opts.metric->free_signature(cache[i], opts.metric->payload);
}
+ git__free(cache);
- return 0;
+ if (!given_opts || !given_opts->metric)
+ git__free(opts.metric);
+
+ return error;
}
#undef FLAG_SET
diff --git a/src/fileops.c b/src/fileops.c
index 90ca11fb7..c1824e812 100644
--- a/src/fileops.c
+++ b/src/fileops.c
@@ -523,6 +523,41 @@ int git_futils_rmdir_r(
return error;
}
+int git_futils_cleanupdir_r(const char *path)
+{
+ int error;
+ git_buf fullpath = GIT_BUF_INIT;
+ futils__rmdir_data data;
+
+ if ((error = git_buf_put(&fullpath, path, strlen(path)) < 0))
+ goto clean_up;
+
+ data.base = "";
+ data.baselen = 0;
+ data.flags = GIT_RMDIR_REMOVE_FILES;
+ data.error = 0;
+
+ if (!git_path_exists(path)) {
+ giterr_set(GITERR_OS, "Path does not exist: %s" , path);
+ error = GIT_ERROR;
+ goto clean_up;
+ }
+
+ if (!git_path_isdir(path)) {
+ giterr_set(GITERR_OS, "Path is not a directory: %s" , path);
+ error = GIT_ERROR;
+ goto clean_up;
+ }
+
+ error = git_path_direach(&fullpath, futils__rmdir_recurs_foreach, &data);
+ if (error == GIT_EUSER)
+ error = data.error;
+
+clean_up:
+ git_buf_free(&fullpath);
+ return error;
+}
+
int git_futils_find_system_file(git_buf *path, const char *filename)
{
#ifdef GIT_WIN32
diff --git a/src/fileops.h b/src/fileops.h
index 988cc661a..7ba99d3d9 100644
--- a/src/fileops.h
+++ b/src/fileops.h
@@ -130,7 +130,7 @@ typedef enum {
/**
* Remove path and any files and directories beneath it.
*
- * @param path Path to to top level directory to process.
+ * @param path Path to the top level directory to process.
* @param base Root for relative path.
* @param flags Combination of git_futils_rmdir_flags values
* @return 0 on success; -1 on error.
@@ -138,6 +138,14 @@ typedef enum {
extern int git_futils_rmdir_r(const char *path, const char *base, uint32_t flags);
/**
+ * Remove all files and directories beneath the specified path.
+ *
+ * @param path Path to the top level directory to process.
+ * @return 0 on success; -1 on error.
+ */
+extern int git_futils_cleanupdir_r(const char *path);
+
+/**
* Create and open a temporary file with a `_git2_` suffix.
* Writes the filename into path_out.
* @return On success, an open file descriptor, else an error code < 0.
diff --git a/src/hash/hash_generic.c b/src/hash/hash_generic.c
index 0723bfaf9..32fcd869c 100644
--- a/src/hash/hash_generic.c
+++ b/src/hash/hash_generic.c
@@ -232,7 +232,7 @@ int git_hash_init(git_hash_ctx *ctx)
ctx->H[3] = 0x10325476;
ctx->H[4] = 0xc3d2e1f0;
- return 0;
+ return 0;
}
int git_hash_update(git_hash_ctx *ctx, const void *data, size_t len)
diff --git a/src/hashsig.c b/src/hashsig.c
new file mode 100644
index 000000000..e9c5164a4
--- /dev/null
+++ b/src/hashsig.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#include "hashsig.h"
+#include "fileops.h"
+
+typedef uint32_t hashsig_t;
+typedef uint64_t hashsig_state;
+
+#define HASHSIG_SCALE 100
+
+#define HASHSIG_HASH_WINDOW 32
+#define HASHSIG_HASH_START 0
+#define HASHSIG_HASH_SHIFT 5
+#define HASHSIG_HASH_MASK 0x7FFFFFFF
+
+#define HASHSIG_HEAP_SIZE ((1 << 7) - 1)
+
+typedef int (GIT_STDLIB_CALL *hashsig_cmp)(const void *a, const void *b);
+
+typedef struct {
+ int size, asize;
+ hashsig_cmp cmp;
+ hashsig_t values[HASHSIG_HEAP_SIZE];
+} hashsig_heap;
+
+typedef struct {
+ hashsig_state state, shift_n;
+ char window[HASHSIG_HASH_WINDOW];
+ int win_len, win_pos, saw_lf;
+} hashsig_in_progress;
+
+#define HASHSIG_IN_PROGRESS_INIT { HASHSIG_HASH_START, 1, {0}, 0, 0, 1 }
+
+struct git_hashsig {
+ hashsig_heap mins;
+ hashsig_heap maxs;
+ git_hashsig_option_t opt;
+ int considered;
+};
+
+#define HEAP_LCHILD_OF(I) (((I)*2)+1)
+#define HEAP_RCHILD_OF(I) (((I)*2)+2)
+#define HEAP_PARENT_OF(I) (((I)-1)>>1)
+
+static void hashsig_heap_init(hashsig_heap *h, hashsig_cmp cmp)
+{
+ h->size = 0;
+ h->asize = HASHSIG_HEAP_SIZE;
+ h->cmp = cmp;
+}
+
+static int GIT_STDLIB_CALL hashsig_cmp_max(const void *a, const void *b)
+{
+ hashsig_t av = *(const hashsig_t *)a, bv = *(const hashsig_t *)b;
+ return (av < bv) ? -1 : (av > bv) ? 1 : 0;
+}
+
+static int GIT_STDLIB_CALL hashsig_cmp_min(const void *a, const void *b)
+{
+ hashsig_t av = *(const hashsig_t *)a, bv = *(const hashsig_t *)b;
+ return (av > bv) ? -1 : (av < bv) ? 1 : 0;
+}
+
+static void hashsig_heap_up(hashsig_heap *h, int el)
+{
+ int parent_el = HEAP_PARENT_OF(el);
+
+ while (el > 0 && h->cmp(&h->values[parent_el], &h->values[el]) > 0) {
+ hashsig_t t = h->values[el];
+ h->values[el] = h->values[parent_el];
+ h->values[parent_el] = t;
+
+ el = parent_el;
+ parent_el = HEAP_PARENT_OF(el);
+ }
+}
+
+static void hashsig_heap_down(hashsig_heap *h, int el)
+{
+ hashsig_t v, lv, rv;
+
+ /* 'el < h->size / 2' tests if el is bottom row of heap */
+
+ while (el < h->size / 2) {
+ int lel = HEAP_LCHILD_OF(el), rel = HEAP_RCHILD_OF(el), swapel;
+
+ v = h->values[el];
+ lv = h->values[lel];
+ rv = h->values[rel];
+
+ if (h->cmp(&v, &lv) < 0 && h->cmp(&v, &rv) < 0)
+ break;
+
+ swapel = (h->cmp(&lv, &rv) < 0) ? lel : rel;
+
+ h->values[el] = h->values[swapel];
+ h->values[swapel] = v;
+
+ el = swapel;
+ }
+}
+
+static void hashsig_heap_sort(hashsig_heap *h)
+{
+ /* only need to do this at the end for signature comparison */
+ qsort(h->values, h->size, sizeof(hashsig_t), h->cmp);
+}
+
+static void hashsig_heap_insert(hashsig_heap *h, hashsig_t val)
+{
+ /* if heap is full, pop top if new element should replace it */
+ if (h->size == h->asize && h->cmp(&val, &h->values[0]) > 0) {
+ h->size--;
+ h->values[0] = h->values[h->size];
+ hashsig_heap_down(h, 0);
+ }
+
+ /* if heap is not full, insert new element */
+ if (h->size < h->asize) {
+ h->values[h->size++] = val;
+ hashsig_heap_up(h, h->size - 1);
+ }
+}
+
+GIT_INLINE(bool) hashsig_include_char(
+ char ch, git_hashsig_option_t opt, int *saw_lf)
+{
+ if ((opt & GIT_HASHSIG_IGNORE_WHITESPACE) && git__isspace(ch))
+ return false;
+
+ if (opt & GIT_HASHSIG_SMART_WHITESPACE) {
+ if (ch == '\r' || (*saw_lf && git__isspace(ch)))
+ return false;
+
+ *saw_lf = (ch == '\n');
+ }
+
+ return true;
+}
+
+static void hashsig_initial_window(
+ git_hashsig *sig,
+ const char **data,
+ size_t size,
+ hashsig_in_progress *prog)
+{
+ hashsig_state state, shift_n;
+ int win_len;
+ const char *scan, *end;
+
+ /* init until we have processed at least HASHSIG_HASH_WINDOW data */
+
+ if (prog->win_len >= HASHSIG_HASH_WINDOW)
+ return;
+
+ state = prog->state;
+ win_len = prog->win_len;
+ shift_n = prog->shift_n;
+
+ scan = *data;
+ end = scan + size;
+
+ while (scan < end && win_len < HASHSIG_HASH_WINDOW) {
+ char ch = *scan++;
+
+ if (!hashsig_include_char(ch, sig->opt, &prog->saw_lf))
+ continue;
+
+ state = (state * HASHSIG_HASH_SHIFT + ch) & HASHSIG_HASH_MASK;
+
+ if (!win_len)
+ shift_n = 1;
+ else
+ shift_n = (shift_n * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK;
+
+ prog->window[win_len++] = ch;
+ }
+
+ /* insert initial hash if we just finished */
+
+ if (win_len == HASHSIG_HASH_WINDOW) {
+ hashsig_heap_insert(&sig->mins, (hashsig_t)state);
+ hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
+ sig->considered = 1;
+ }
+
+ prog->state = state;
+ prog->win_len = win_len;
+ prog->shift_n = shift_n;
+
+ *data = scan;
+}
+
+static int hashsig_add_hashes(
+ git_hashsig *sig,
+ const char *data,
+ size_t size,
+ hashsig_in_progress *prog)
+{
+ const char *scan = data, *end = data + size;
+ hashsig_state state, shift_n, rmv;
+
+ if (prog->win_len < HASHSIG_HASH_WINDOW)
+ hashsig_initial_window(sig, &scan, size, prog);
+
+ state = prog->state;
+ shift_n = prog->shift_n;
+
+ /* advance window, adding new chars and removing old */
+
+ for (; scan < end; ++scan) {
+ char ch = *scan;
+
+ if (!hashsig_include_char(ch, sig->opt, &prog->saw_lf))
+ continue;
+
+ rmv = shift_n * prog->window[prog->win_pos];
+
+ state = (state - rmv) & HASHSIG_HASH_MASK;
+ state = (state * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK;
+ state = (state + ch) & HASHSIG_HASH_MASK;
+
+ hashsig_heap_insert(&sig->mins, (hashsig_t)state);
+ hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
+ sig->considered++;
+
+ prog->window[prog->win_pos] = ch;
+ prog->win_pos = (prog->win_pos + 1) % HASHSIG_HASH_WINDOW;
+ }
+
+ prog->state = state;
+
+ return 0;
+}
+
+static int hashsig_finalize_hashes(git_hashsig *sig)
+{
+ if (sig->mins.size < HASHSIG_HEAP_SIZE) {
+ giterr_set(GITERR_INVALID,
+ "File too small for similarity signature calculation");
+ return GIT_EBUFS;
+ }
+
+ hashsig_heap_sort(&sig->mins);
+ hashsig_heap_sort(&sig->maxs);
+
+ return 0;
+}
+
+static git_hashsig *hashsig_alloc(git_hashsig_option_t opts)
+{
+ git_hashsig *sig = git__calloc(1, sizeof(git_hashsig));
+ if (!sig)
+ return NULL;
+
+ hashsig_heap_init(&sig->mins, hashsig_cmp_min);
+ hashsig_heap_init(&sig->maxs, hashsig_cmp_max);
+ sig->opt = opts;
+
+ return sig;
+}
+
+int git_hashsig_create(
+ git_hashsig **out,
+ const char *buf,
+ size_t buflen,
+ git_hashsig_option_t opts)
+{
+ int error;
+ hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT;
+ git_hashsig *sig = hashsig_alloc(opts);
+ GITERR_CHECK_ALLOC(sig);
+
+ error = hashsig_add_hashes(sig, buf, buflen, &prog);
+
+ if (!error)
+ error = hashsig_finalize_hashes(sig);
+
+ if (!error)
+ *out = sig;
+ else
+ git_hashsig_free(sig);
+
+ return error;
+}
+
+int git_hashsig_create_fromfile(
+ git_hashsig **out,
+ const char *path,
+ git_hashsig_option_t opts)
+{
+ char buf[4096];
+ ssize_t buflen = 0;
+ int error = 0, fd;
+ hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT;
+ git_hashsig *sig = hashsig_alloc(opts);
+ GITERR_CHECK_ALLOC(sig);
+
+ if ((fd = git_futils_open_ro(path)) < 0) {
+ git__free(sig);
+ return fd;
+ }
+
+ while (!error) {
+ if ((buflen = p_read(fd, buf, sizeof(buf))) <= 0) {
+ if ((error = (int)buflen) < 0)
+ giterr_set(GITERR_OS,
+ "Read error on '%s' calculating similarity hashes", path);
+ break;
+ }
+
+ error = hashsig_add_hashes(sig, buf, buflen, &prog);
+ }
+
+ p_close(fd);
+
+ if (!error)
+ error = hashsig_finalize_hashes(sig);
+
+ if (!error)
+ *out = sig;
+ else
+ git_hashsig_free(sig);
+
+ return error;
+}
+
+void git_hashsig_free(git_hashsig *sig)
+{
+ git__free(sig);
+}
+
+static int hashsig_heap_compare(const hashsig_heap *a, const hashsig_heap *b)
+{
+ int matches = 0, i, j, cmp;
+
+ assert(a->cmp == b->cmp);
+
+ /* hash heaps are sorted - just look for overlap vs total */
+
+ for (i = 0, j = 0; i < a->size && j < b->size; ) {
+ cmp = a->cmp(&a->values[i], &b->values[j]);
+
+ if (cmp < 0)
+ ++i;
+ else if (cmp > 0)
+ ++j;
+ else {
+ ++i; ++j; ++matches;
+ }
+ }
+
+ return HASHSIG_SCALE * (matches * 2) / (a->size + b->size);
+}
+
+int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b)
+{
+ return (hashsig_heap_compare(&a->mins, &b->mins) +
+ hashsig_heap_compare(&a->maxs, &b->maxs)) / 2;
+}
+
diff --git a/src/hashsig.h b/src/hashsig.h
new file mode 100644
index 000000000..8c920cbf1
--- /dev/null
+++ b/src/hashsig.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_hashsig_h__
+#define INCLUDE_hashsig_h__
+
+#include "common.h"
+
+/**
+ * Similarity signature of line hashes for a buffer
+ */
+typedef struct git_hashsig git_hashsig;
+
+typedef enum {
+ GIT_HASHSIG_NORMAL = 0, /* use all data */
+ GIT_HASHSIG_IGNORE_WHITESPACE = 1, /* ignore whitespace */
+ GIT_HASHSIG_SMART_WHITESPACE = 2, /* ignore \r and all space after \n */
+} git_hashsig_option_t;
+
+/**
+ * Build a similarity signature for a buffer
+ *
+ * If you have passed a whitespace-ignoring buffer, then the whitespace
+ * will be removed from the buffer while it is being processed, modifying
+ * the buffer in place. Sorry about that!
+ *
+ * This will return an error if the buffer doesn't contain enough data to
+ * compute a valid signature.
+ *
+ * @param out The array of hashed runs representing the file content
+ * @param buf The contents of the file to hash
+ * @param buflen The length of the data at `buf`
+ * @param generate_pairwise_hashes Should pairwise runs be hashed
+ */
+extern int git_hashsig_create(
+ git_hashsig **out,
+ const char *buf,
+ size_t buflen,
+ git_hashsig_option_t opts);
+
+/**
+ * Build a similarity signature from a file
+ *
+ * This walks through the file, only loading a maximum of 4K of file data at
+ * a time. Otherwise, it acts just like `git_hashsig_create`.
+ *
+ * This will return an error if the file doesn't contain enough data to
+ * compute a valid signature.
+ */
+extern int git_hashsig_create_fromfile(
+ git_hashsig **out,
+ const char *path,
+ git_hashsig_option_t opts);
+
+/**
+ * Release memory for a content similarity signature
+ */
+extern void git_hashsig_free(git_hashsig *sig);
+
+/**
+ * Measure similarity between two files
+ *
+ * @return <0 for error, [0 to 100] as similarity score
+ */
+extern int git_hashsig_compare(
+ const git_hashsig *a,
+ const git_hashsig *b);
+
+#endif
diff --git a/src/index.c b/src/index.c
index 59649083b..4deafd77f 100644
--- a/src/index.c
+++ b/src/index.c
@@ -242,8 +242,10 @@ static unsigned int index_merge_mode(
return index_create_mode(mode);
}
-static void index_set_ignore_case(git_index *index, bool ignore_case)
+void git_index__set_ignore_case(git_index *index, bool ignore_case)
{
+ index->ignore_case = ignore_case;
+
index->entries._cmp = ignore_case ? index_icmp : index_cmp;
index->entries_cmp_path = ignore_case ? index_icmp_path : index_cmp_path;
index->entries_search = ignore_case ? index_isrch : index_srch;
@@ -297,18 +299,8 @@ int git_index_new(git_index **out)
static void index_free(git_index *index)
{
- git_index_entry *e;
- git_index_reuc_entry *reuc;
- size_t i;
-
git_index_clear(index);
- git_vector_foreach(&index->entries, i, e) {
- index_entry_free(e);
- }
git_vector_free(&index->entries);
- git_vector_foreach(&index->reuc, i, reuc) {
- index_entry_reuc_free(reuc);
- }
git_vector_free(&index->reuc);
git__free(index->index_file_path);
@@ -335,16 +327,10 @@ void git_index_clear(git_index *index)
git__free(e->path);
git__free(e);
}
-
- for (i = 0; i < index->reuc.length; ++i) {
- git_index_reuc_entry *e;
- e = git_vector_get(&index->reuc, i);
- git__free(e->path);
- git__free(e);
- }
-
git_vector_clear(&index->entries);
- git_vector_clear(&index->reuc);
+
+ git_index_reuc_clear(index);
+
git_futils_filestamp_set(&index->stamp, NULL);
git_tree_cache_free(index->tree);
@@ -388,7 +374,7 @@ int git_index_set_caps(git_index *index, unsigned int caps)
}
if (old_ignore_case != index->ignore_case) {
- index_set_ignore_case(index, index->ignore_case);
+ git_index__set_ignore_case(index, index->ignore_case);
}
return 0;
@@ -1151,6 +1137,21 @@ int git_index_reuc_remove(git_index *index, size_t position)
return error;
}
+void git_index_reuc_clear(git_index *index)
+{
+ size_t i;
+ git_index_reuc_entry *reuc;
+
+ assert(index);
+
+ git_vector_foreach(&index->reuc, i, reuc) {
+ git__free(reuc->path);
+ git__free(reuc);
+ }
+
+ git_vector_clear(&index->reuc);
+}
+
static int index_error_invalid(const char *message)
{
giterr_set(GITERR_INDEX, "Invalid data in index - %s", message);
diff --git a/src/index.h b/src/index.h
index 9304b5539..2beaa6375 100644
--- a/src/index.h
+++ b/src/index.h
@@ -48,6 +48,8 @@ extern size_t git_index__prefix_position(git_index *index, const char *path);
extern int git_index_entry__cmp(const void *a, const void *b);
extern int git_index_entry__cmp_icase(const void *a, const void *b);
+extern void git_index__set_ignore_case(git_index *index, bool ignore_case);
+
extern int git_index_read_tree_match(
git_index *index, git_tree *tree, git_strarray *strspec);
diff --git a/src/indexer.c b/src/indexer.c
index c4648e400..c7e142baf 100644
--- a/src/indexer.c
+++ b/src/indexer.c
@@ -17,6 +17,7 @@
#include "posix.h"
#include "pack.h"
#include "filebuf.h"
+#include "oidmap.h"
#define UINT31_MAX (0x7FFFFFFF)
@@ -27,15 +28,6 @@ struct entry {
uint64_t offset_long;
};
-struct git_indexer {
- struct git_pack_file *pack;
- size_t nr_objects;
- git_vector objects;
- git_filebuf file;
- unsigned int fanout[256];
- git_oid hash;
-};
-
struct git_indexer_stream {
unsigned int parsed_header :1,
opened_pack :1,
@@ -61,11 +53,6 @@ struct delta_info {
git_off_t delta_off;
};
-const git_oid *git_indexer_hash(const git_indexer *idx)
-{
- return &idx->hash;
-}
-
const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx)
{
return &idx->hash;
@@ -136,14 +123,6 @@ static int objects_cmp(const void *a, const void *b)
return git_oid_cmp(&entrya->oid, &entryb->oid);
}
-static int cache_cmp(const void *a, const void *b)
-{
- const struct git_pack_entry *ea = a;
- const struct git_pack_entry *eb = b;
-
- return git_oid_cmp(&ea->sha1, &eb->sha1);
-}
-
int git_indexer_stream_new(
git_indexer_stream **out,
const char *prefix,
@@ -285,7 +264,8 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
static int store_object(git_indexer_stream *idx)
{
- int i;
+ int i, error;
+ khiter_t k;
git_oid oid;
struct entry *entry;
git_off_t entry_size;
@@ -310,11 +290,15 @@ static int store_object(git_indexer_stream *idx)
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
- if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
+
+ k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
+ if (!error) {
git__free(pentry);
goto on_error;
}
+ kh_value(idx->pack->idx_cache, k) = pentry;
+
git_oid_cpy(&entry->oid, &oid);
if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
@@ -338,7 +322,8 @@ on_error:
static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
{
- int i;
+ int i, error;
+ khiter_t k;
git_oid oid;
size_t entry_size;
struct entry *entry;
@@ -365,11 +350,14 @@ static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t ent
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
- if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
+ k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
+ if (!error) {
git__free(pentry);
goto on_error;
}
+ kh_value(idx->pack->idx_cache, k) = pentry;
+
git_oid_cpy(&entry->oid, &oid);
entry->crc = crc32(0L, Z_NULL, 0);
@@ -440,8 +428,8 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
/* for now, limit to 2^32 objects */
assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
- if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0)
- return -1;
+ idx->pack->idx_cache = git_oidmap_alloc();
+ GITERR_CHECK_ALLOC(idx->pack->idx_cache);
idx->pack->has_cache = 1;
if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0)
@@ -451,7 +439,7 @@ int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t siz
return -1;
stats->received_objects = 0;
- stats->indexed_objects = 0;
+ processed = stats->indexed_objects = 0;
stats->total_objects = (unsigned int)idx->nr_objects;
do_progress_callback(idx, stats);
}
@@ -732,9 +720,9 @@ on_error:
void git_indexer_stream_free(git_indexer_stream *idx)
{
+ khiter_t k;
unsigned int i;
struct entry *e;
- struct git_pack_entry *pe;
struct delta_info *delta;
if (idx == NULL)
@@ -743,11 +731,16 @@ void git_indexer_stream_free(git_indexer_stream *idx)
git_vector_foreach(&idx->objects, i, e)
git__free(e);
git_vector_free(&idx->objects);
+
if (idx->pack) {
- git_vector_foreach(&idx->pack->cache, i, pe)
- git__free(pe);
- git_vector_free(&idx->pack->cache);
+ for (k = kh_begin(idx->pack->idx_cache); k != kh_end(idx->pack->idx_cache); k++) {
+ if (kh_exist(idx->pack->idx_cache, k))
+ git__free(kh_value(idx->pack->idx_cache, k));
+ }
+
+ git_oidmap_free(idx->pack->idx_cache);
}
+
git_vector_foreach(&idx->deltas, i, delta)
git__free(delta);
git_vector_free(&idx->deltas);
@@ -755,315 +748,3 @@ void git_indexer_stream_free(git_indexer_stream *idx)
git_filebuf_cleanup(&idx->pack_file);
git__free(idx);
}
-
-int git_indexer_new(git_indexer **out, const char *packname)
-{
- git_indexer *idx;
- struct git_pack_header hdr;
- int error;
-
- assert(out && packname);
-
- idx = git__calloc(1, sizeof(git_indexer));
- GITERR_CHECK_ALLOC(idx);
-
- open_pack(&idx->pack, packname);
-
- if ((error = parse_header(&hdr, idx->pack)) < 0)
- goto cleanup;
-
- idx->nr_objects = ntohl(hdr.hdr_entries);
-
- /* for now, limit to 2^32 objects */
- assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
-
- error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
- if (error < 0)
- goto cleanup;
-
- idx->pack->has_cache = 1;
- error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
- if (error < 0)
- goto cleanup;
-
- *out = idx;
-
- return 0;
-
-cleanup:
- git_indexer_free(idx);
-
- return -1;
-}
-
-static int index_path(git_buf *path, git_indexer *idx)
-{
- const char prefix[] = "pack-", suffix[] = ".idx";
- size_t slash = (size_t)path->size;
-
- /* search backwards for '/' */
- while (slash > 0 && path->ptr[slash - 1] != '/')
- slash--;
-
- if (git_buf_grow(path, slash + 1 + strlen(prefix) +
- GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
- return -1;
-
- git_buf_truncate(path, slash);
- git_buf_puts(path, prefix);
- git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
- path->size += GIT_OID_HEXSZ;
- git_buf_puts(path, suffix);
-
- return git_buf_oom(path) ? -1 : 0;
-}
-
-int git_indexer_write(git_indexer *idx)
-{
- git_mwindow *w = NULL;
- int error;
- unsigned int i, long_offsets = 0, left;
- struct git_pack_idx_header hdr;
- git_buf filename = GIT_BUF_INIT;
- struct entry *entry;
- void *packfile_hash;
- git_oid file_hash;
- git_hash_ctx ctx;
-
- if (git_hash_ctx_init(&ctx) < 0)
- return -1;
-
- git_vector_sort(&idx->objects);
-
- git_buf_sets(&filename, idx->pack->pack_name);
- git_buf_truncate(&filename, filename.size - strlen("pack"));
- git_buf_puts(&filename, "idx");
- if (git_buf_oom(&filename))
- return -1;
-
- error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
- if (error < 0)
- goto cleanup;
-
- /* Write out the header */
- hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
- hdr.idx_version = htonl(2);
- error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
- if (error < 0)
- goto cleanup;
-
- /* Write out the fanout table */
- for (i = 0; i < 256; ++i) {
- uint32_t n = htonl(idx->fanout[i]);
- error = git_filebuf_write(&idx->file, &n, sizeof(n));
- if (error < 0)
- goto cleanup;
- }
-
- /* Write out the object names (SHA-1 hashes) */
- git_vector_foreach(&idx->objects, i, entry) {
- if ((error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid))) < 0 ||
- (error = git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ)) < 0)
- goto cleanup;
- }
-
- if ((error = git_hash_final(&idx->hash, &ctx)) < 0)
- goto cleanup;
-
- /* Write out the CRC32 values */
- git_vector_foreach(&idx->objects, i, entry) {
- error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
- if (error < 0)
- goto cleanup;
- }
-
- /* Write out the offsets */
- git_vector_foreach(&idx->objects, i, entry) {
- uint32_t n;
-
- if (entry->offset == UINT32_MAX)
- n = htonl(0x80000000 | long_offsets++);
- else
- n = htonl(entry->offset);
-
- error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
- if (error < 0)
- goto cleanup;
- }
-
- /* Write out the long offsets */
- git_vector_foreach(&idx->objects, i, entry) {
- uint32_t split[2];
-
- if (entry->offset != UINT32_MAX)
- continue;
-
- split[0] = htonl(entry->offset_long >> 32);
- split[1] = htonl(entry->offset_long & 0xffffffff);
-
- error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
- if (error < 0)
- goto cleanup;
- }
-
- /* Write out the packfile trailer */
-
- packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
- git_mwindow_close(&w);
- if (packfile_hash == NULL) {
- error = -1;
- goto cleanup;
- }
-
- memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
-
- git_mwindow_close(&w);
-
- error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
- if (error < 0)
- goto cleanup;
-
- /* Write out the index sha */
- error = git_filebuf_hash(&file_hash, &idx->file);
- if (error < 0)
- goto cleanup;
-
- error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
- if (error < 0)
- goto cleanup;
-
- /* Figure out what the final name should be */
- error = index_path(&filename, idx);
- if (error < 0)
- goto cleanup;
-
- /* Commit file */
- error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
-
-cleanup:
- git_mwindow_free_all(&idx->pack->mwf);
- git_mwindow_file_deregister(&idx->pack->mwf);
- if (error < 0)
- git_filebuf_cleanup(&idx->file);
- git_buf_free(&filename);
- git_hash_ctx_cleanup(&ctx);
-
- return error;
-}
-
-int git_indexer_run(git_indexer *idx, git_transfer_progress *stats)
-{
- git_mwindow_file *mwf;
- git_off_t off = sizeof(struct git_pack_header);
- int error;
- struct entry *entry;
- unsigned int left, processed;
-
- assert(idx && stats);
-
- mwf = &idx->pack->mwf;
- error = git_mwindow_file_register(mwf);
- if (error < 0)
- return error;
-
- stats->total_objects = (unsigned int)idx->nr_objects;
- stats->indexed_objects = processed = 0;
-
- while (processed < idx->nr_objects) {
- git_rawobj obj;
- git_oid oid;
- struct git_pack_entry *pentry;
- git_mwindow *w = NULL;
- int i;
- git_off_t entry_start = off;
- void *packed;
- size_t entry_size;
- char fmt[GIT_OID_HEXSZ] = {0};
-
- entry = git__calloc(1, sizeof(*entry));
- GITERR_CHECK_ALLOC(entry);
-
- if (off > UINT31_MAX) {
- entry->offset = UINT32_MAX;
- entry->offset_long = off;
- } else {
- entry->offset = (uint32_t)off;
- }
-
- error = git_packfile_unpack(&obj, idx->pack, &off);
- if (error < 0)
- goto cleanup;
-
- /* FIXME: Parse the object instead of hashing it */
- error = git_odb__hashobj(&oid, &obj);
- if (error < 0) {
- giterr_set(GITERR_INDEXER, "Failed to hash object");
- goto cleanup;
- }
-
- pentry = git__malloc(sizeof(struct git_pack_entry));
- if (pentry == NULL) {
- error = -1;
- goto cleanup;
- }
-
- git_oid_cpy(&pentry->sha1, &oid);
- pentry->offset = entry_start;
- git_oid_fmt(fmt, &oid);
- error = git_vector_insert(&idx->pack->cache, pentry);
- if (error < 0)
- goto cleanup;
-
- git_oid_cpy(&entry->oid, &oid);
- entry->crc = crc32(0L, Z_NULL, 0);
-
- entry_size = (size_t)(off - entry_start);
- packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
- if (packed == NULL) {
- error = -1;
- goto cleanup;
- }
- entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
- git_mwindow_close(&w);
-
- /* Add the object to the list */
- error = git_vector_insert(&idx->objects, entry);
- if (error < 0)
- goto cleanup;
-
- for (i = oid.id[0]; i < 256; ++i) {
- idx->fanout[i]++;
- }
-
- git__free(obj.data);
-
- stats->indexed_objects = ++processed;
- }
-
-cleanup:
- git_mwindow_free_all(mwf);
-
- return error;
-
-}
-
-void git_indexer_free(git_indexer *idx)
-{
- unsigned int i;
- struct entry *e;
- struct git_pack_entry *pe;
-
- if (idx == NULL)
- return;
-
- git_mwindow_file_deregister(&idx->pack->mwf);
- git_vector_foreach(&idx->objects, i, e)
- git__free(e);
- git_vector_free(&idx->objects);
- git_vector_foreach(&idx->pack->cache, i, pe)
- git__free(pe);
- git_vector_free(&idx->pack->cache);
- git_packfile_free(idx->pack);
- git__free(idx);
-}
-
diff --git a/src/pack-objects.c b/src/pack-objects.c
index e4b67192d..459201f58 100644
--- a/src/pack-objects.c
+++ b/src/pack-objects.c
@@ -21,8 +21,6 @@
#include "git2/indexer.h"
#include "git2/config.h"
-GIT__USE_OIDMAP;
-
struct unpacked {
git_pobject *object;
void *data;
diff --git a/src/pack.c b/src/pack.c
index f36f3cf6b..75ac98186 100644
--- a/src/pack.c
+++ b/src/pack.c
@@ -760,13 +760,14 @@ git_off_t get_delta_base(
} else if (type == GIT_OBJ_REF_DELTA) {
/* If we have the cooperative cache, search in it first */
if (p->has_cache) {
- size_t pos;
- struct git_pack_entry key;
+ khiter_t k;
+ git_oid oid;
- git_oid_fromraw(&key.sha1, base_info);
- if (!git_vector_bsearch(&pos, &p->cache, &key)) {
+ git_oid_fromraw(&oid, base_info);
+ k = kh_get(oid, p->idx_cache, &oid);
+ if (k != kh_end(p->idx_cache)) {
*curpos += 20;
- return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset;
+ return ((struct git_pack_entry *)kh_value(p->idx_cache, k))->offset;
}
}
/* The base entry _must_ be in the same pack */
diff --git a/src/pack.h b/src/pack.h
index 6c43d8f5b..8d7e33dfe 100644
--- a/src/pack.h
+++ b/src/pack.h
@@ -16,6 +16,7 @@
#include "map.h"
#include "mwindow.h"
#include "odb.h"
+#include "oidmap.h"
#define GIT_PACK_FILE_MODE 0444
@@ -62,6 +63,7 @@ typedef struct git_pack_cache_entry {
#include "offmap.h"
GIT__USE_OFFMAP;
+GIT__USE_OIDMAP;
#define GIT_PACK_CACHE_MEMORY_LIMIT 16 * 1024 * 1024
#define GIT_PACK_CACHE_SIZE_LIMIT 1024 * 1024 /* don't bother caching anything over 1MB */
@@ -86,7 +88,7 @@ struct git_pack_file {
git_time_t mtime;
unsigned pack_local:1, pack_keep:1, has_cache:1;
git_oid sha1;
- git_vector cache;
+ git_oidmap *idx_cache;
git_oid **oids;
git_pack_cache bases; /* delta base cache */
diff --git a/src/repository.c b/src/repository.c
index cd1e658cf..278abfaf2 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -553,6 +553,7 @@ void git_repository_set_config(git_repository *repo, git_config *config)
repo->_config = config;
GIT_REFCOUNT_OWN(repo->_config, repo);
+ GIT_REFCOUNT_INC(repo->_config);
}
int git_repository_odb__weakptr(git_odb **out, git_repository *repo)
diff --git a/src/tree.c b/src/tree.c
index ec57e8bb8..11123a18a 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -566,6 +566,7 @@ int git_tree__write_index(
git_oid *oid, git_index *index, git_repository *repo)
{
int ret;
+ bool old_ignore_case = false;
assert(oid && index && repo);
@@ -580,8 +581,21 @@ int git_tree__write_index(
return 0;
}
- /* The tree cache didn't help us */
+ /* The tree cache didn't help us; we'll have to write
+ * out a tree. If the index is ignore_case, we must
+ * make it case-sensitive for the duration of the tree-write
+ * operation. */
+
+ if (index->ignore_case) {
+ old_ignore_case = true;
+ git_index__set_ignore_case(index, false);
+ }
+
ret = write_tree(oid, repo, index, "", 0);
+
+ if (old_ignore_case)
+ git_index__set_ignore_case(index, true);
+
return ret < 0 ? ret : 0;
}
diff --git a/src/win32/git2.rc b/src/win32/git2.rc
index 892008b77..436913228 100644
--- a/src/win32/git2.rc
+++ b/src/win32/git2.rc
@@ -12,13 +12,13 @@ VS_VERSION_INFO VERSIONINFO MOVEABLE IMPURE LOADONCALL DISCARDABLE
PRODUCTVERSION LIBGIT2_VER_MAJOR,LIBGIT2_VER_MINOR,LIBGIT2_VER_REVISION,0
FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
#ifdef _DEBUG
- FILEFLAGS 1
+ FILEFLAGS VS_FF_DEBUG
#else
FILEFLAGS 0
#endif
FILEOS VOS_NT_WINDOWS32
FILETYPE VFT_DLL
- FILESUBTYPE 0 // not used
+ FILESUBTYPE VFT2_UNKNOWN
BEGIN
BLOCK "StringFileInfo"
BEGIN
diff --git a/src/win32/msvc-compat.h b/src/win32/msvc-compat.h
index 714a85e21..50865ed17 100644
--- a/src/win32/msvc-compat.h
+++ b/src/win32/msvc-compat.h
@@ -37,6 +37,15 @@
/* MSVC doesn't define ssize_t at all */
typedef SSIZE_T ssize_t;
+/* define snprintf using variadic macro support if available */
+#if _MSC_VER >= 1400
+# define snprintf(BUF, SZ, FMT, ...) _snprintf_s(BUF, SZ, _TRUNCATE, FMT, __VA_ARGS__)
+#else
+# define snprintf _snprintf
#endif
+#endif
+
+#define GIT_STDLIB_CALL __cdecl
+
#endif /* INCLUDE_msvc_compat__ */
diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c
index f533eaa5e..4d56299f7 100644
--- a/src/win32/posix_w32.c
+++ b/src/win32/posix_w32.c
@@ -375,7 +375,8 @@ int p_vsnprintf(char *buffer, size_t count, const char *format, va_list argptr)
#ifdef _MSC_VER
int len;
- if (count == 0 || (len = _vsnprintf(buffer, count, format, argptr)) < 0)
+ if (count == 0 ||
+ (len = _vsnprintf_s(buffer, count, _TRUNCATE, format, argptr)) < 0)
return _vscprintf(format, argptr);
return len;
@@ -487,11 +488,14 @@ p_gmtime_r (const time_t *timer, struct tm *result)
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
+#ifndef _TIMEZONE_DEFINED
+#define _TIMEZONE_DEFINED
struct timezone
{
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */
};
+#endif
int p_gettimeofday(struct timeval *tv, struct timezone *tz)
{