summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/diff_tform.c219
-rw-r--r--src/hashsig.c218
-rw-r--r--src/util.h5
3 files changed, 228 insertions, 214 deletions
diff --git a/src/diff_tform.c b/src/diff_tform.c
index d4b8cf30a..92c4036fb 100644
--- a/src/diff_tform.c
+++ b/src/diff_tform.c
@@ -436,7 +436,7 @@ static int similarity_init(
info->file, &info->odb_obj, info->repo);
}
-static int similarity_calc(
+static int similarity_sig(
similarity_info *info,
const git_diff_find_options *opts,
void **cache)
@@ -566,16 +566,19 @@ static int similarity_measure(
/* check if file sizes are nowhere near each other */
if (a_file->size > 127 &&
b_file->size > 127 &&
- (a_file->size > (b_file->size << 4) ||
- b_file->size > (a_file->size << 4)))
+ (a_file->size > (b_file->size << 3) ||
+ b_file->size > (a_file->size << 3)))
goto cleanup;
/* update signature cache if needed */
- if (!cache[a_idx] && (error = similarity_calc(&a_info, opts, cache)) < 0)
- goto cleanup;
-
- if (!cache[b_idx] && (error = similarity_calc(&b_info, opts, cache)) < 0)
- goto cleanup;
+ if (!cache[a_idx]) {
+ if ((error = similarity_sig(&a_info, opts, cache)) < 0)
+ goto cleanup;
+ }
+ if (!cache[b_idx]) {
+ if ((error = similarity_sig(&b_info, opts, cache)) < 0)
+ goto cleanup;
+ }
/* calculate similarity provided that the metric choose to process
* both the a and b files (some may not if file is too big, etc).
@@ -759,25 +762,30 @@ int git_diff_find_similar(
git_diff_list *diff,
git_diff_find_options *given_opts)
{
- size_t i, j, sigcache_size;
+ size_t s, t;
int error = 0, similarity;
- git_diff_delta *from, *to;
+ git_diff_delta *src, *tgt;
git_diff_find_options opts;
- size_t num_srcs = 0, num_tgts = 0, tried_srcs = 0, tried_tgts = 0;
+ size_t num_deltas, num_srcs = 0, num_tgts = 0;
+ size_t tried_srcs = 0, tried_tgts = 0;
size_t num_rewrites = 0, num_updates = 0, num_bumped = 0;
void **sigcache; /* cache of similarity metric file signatures */
- diff_find_match *match_srcs = NULL, *match_tgts = NULL, *best_match;
+ diff_find_match *tgt2src = NULL;
+ diff_find_match *src2tgt = NULL;
+ diff_find_match *tgt2src_copy = NULL;
+ diff_find_match *best_match;
git_diff_file swap;
if ((error = normalize_find_opts(diff, &opts, given_opts)) < 0)
return error;
+ num_deltas = diff->deltas.length;
+
/* TODO: maybe abort if deltas.length > rename_limit ??? */
- if (!git__is_uint32(diff->deltas.length))
+ if (!git__is_uint32(num_deltas))
return 0;
- sigcache_size = diff->deltas.length * 2; /* keep size b/c diff may change */
- sigcache = git__calloc(sigcache_size, sizeof(void *));
+ sigcache = git__calloc(num_deltas * 2, sizeof(void *));
GITERR_CHECK_ALLOC(sigcache);
/* Label rename sources and targets
@@ -785,11 +793,11 @@ int git_diff_find_similar(
* This will also set self-similarity scores for MODIFIED files and
* mark them for splitting if break-rewrites is enabled
*/
- git_vector_foreach(&diff->deltas, i, to) {
- if (is_rename_source(diff, &opts, i, sigcache))
+ git_vector_foreach(&diff->deltas, t, tgt) {
+ if (is_rename_source(diff, &opts, t, sigcache))
++num_srcs;
- if (is_rename_target(diff, &opts, i, sigcache))
+ if (is_rename_target(diff, &opts, t, sigcache))
++num_tgts;
}
@@ -797,10 +805,15 @@ int git_diff_find_similar(
if (!num_srcs || !num_tgts)
goto cleanup;
- match_tgts = git__calloc(diff->deltas.length, sizeof(diff_find_match));
- GITERR_CHECK_ALLOC(match_tgts);
- match_srcs = git__calloc(diff->deltas.length, sizeof(diff_find_match));
- GITERR_CHECK_ALLOC(match_srcs);
+ src2tgt = git__calloc(num_deltas, sizeof(diff_find_match));
+ GITERR_CHECK_ALLOC(src2tgt);
+ tgt2src = git__calloc(num_deltas, sizeof(diff_find_match));
+ GITERR_CHECK_ALLOC(tgt2src);
+
+ if (FLAG_SET(&opts, GIT_DIFF_FIND_COPIES)) {
+ tgt2src_copy = git__calloc(num_deltas, sizeof(diff_find_match));
+ GITERR_CHECK_ALLOC(tgt2src_copy);
+ }
/*
* Find best-fit matches for rename / copy candidates
@@ -809,47 +822,61 @@ int git_diff_find_similar(
find_best_matches:
tried_tgts = num_bumped = 0;
- git_vector_foreach(&diff->deltas, i, to) {
+ git_vector_foreach(&diff->deltas, t, tgt) {
/* skip things that are not rename targets */
- if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
+ if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue;
tried_srcs = 0;
- git_vector_foreach(&diff->deltas, j, from) {
+ git_vector_foreach(&diff->deltas, s, src) {
/* skip things that are not rename sources */
- if ((from->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
+ if ((src->flags & GIT_DIFF_FLAG__IS_RENAME_SOURCE) == 0)
continue;
/* calculate similarity for this pair and find best match */
- if (i == j)
+ if (s == t)
similarity = -1; /* don't measure self-similarity here */
else if ((error = similarity_measure(
- &similarity, diff, &opts, sigcache, 2 * j, 2 * i + 1)) < 0)
+ &similarity, diff, &opts, sigcache, 2 * s, 2 * t + 1)) < 0)
goto cleanup;
- /* if this pairing is better for the src and the tgt, keep it */
- if (similarity > 0 &&
- match_tgts[i].similarity < (uint32_t)similarity &&
- match_srcs[j].similarity < (uint32_t)similarity)
+ if (similarity < 0)
+ continue;
+
+ /* is this a better rename? */
+ if (tgt2src[t].similarity < (uint32_t)similarity &&
+ src2tgt[s].similarity < (uint32_t)similarity)
{
- if (match_tgts[i].similarity > 0) {
- match_tgts[match_srcs[j].idx].similarity = 0;
- match_srcs[match_tgts[i].idx].similarity = 0;
- ++num_bumped;
+ /* eject old mapping */
+ if (src2tgt[s].similarity > 0) {
+ tgt2src[src2tgt[s].idx].similarity = 0;
+ num_bumped++;
+ }
+ if (tgt2src[t].similarity > 0) {
+ src2tgt[tgt2src[t].idx].similarity = 0;
+ num_bumped++;
}
- match_tgts[i].similarity = (uint32_t)similarity;
- match_tgts[i].idx = (uint32_t)j;
+ /* write new mapping */
+ tgt2src[t].idx = s;
+ tgt2src[t].similarity = (uint32_t)similarity;
+ src2tgt[s].idx = t;
+ src2tgt[s].similarity = (uint32_t)similarity;
+ }
- match_srcs[j].similarity = (uint32_t)similarity;
- match_srcs[j].idx = (uint32_t)i;
+ /* keep best absolute match for copies */
+ if (tgt2src_copy != NULL &&
+ tgt2src_copy[t].similarity < (uint32_t)similarity)
+ {
+ tgt2src_copy[t].idx = s;
+ tgt2src_copy[t].similarity = (uint32_t)similarity;
}
if (++tried_srcs >= num_srcs)
break;
- /* cap on maximum targets we'll examine (per "to" file) */
+ /* cap on maximum targets we'll examine (per "tgt" file) */
if (tried_srcs > opts.rename_limit)
break;
}
@@ -867,18 +894,21 @@ find_best_matches:
tried_tgts = 0;
- git_vector_foreach(&diff->deltas, i, to) {
+ git_vector_foreach(&diff->deltas, t, tgt) {
/* skip things that are not rename targets */
- if ((to->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
+ if ((tgt->flags & GIT_DIFF_FLAG__IS_RENAME_TARGET) == 0)
continue;
/* check if this delta was the target of a similarity */
- best_match = &match_tgts[i];
- if (!best_match->similarity)
+ if (tgt2src[t].similarity)
+ best_match = &tgt2src[t];
+ else if (tgt2src_copy && tgt2src_copy[t].similarity)
+ best_match = &tgt2src_copy[t];
+ else
continue;
- j = best_match->idx;
- from = GIT_VECTOR_GET(&diff->deltas, j);
+ s = best_match->idx;
+ src = GIT_VECTOR_GET(&diff->deltas, s);
/* possible scenarios:
* 1. from DELETE to ADD/UNTRACK/IGNORE = RENAME
@@ -888,101 +918,107 @@ find_best_matches:
* 5. from OTHER to ADD/UNTRACK/IGNORE = OTHER + COPY
*/
- if (from->status == GIT_DELTA_DELETED) {
+ if (src->status == GIT_DELTA_DELETED) {
- if (delta_is_new_only(to)) {
+ if (delta_is_new_only(tgt)) {
if (best_match->similarity < opts.rename_threshold)
continue;
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
- from->flags |= GIT_DIFF_FLAG__TO_DELETE;
+ src->flags |= GIT_DIFF_FLAG__TO_DELETE;
num_rewrites++;
} else {
- assert(delta_is_split(to));
+ assert(delta_is_split(tgt));
if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue;
- memcpy(&swap, &to->old_file, sizeof(swap));
+ memcpy(&swap, &tgt->old_file, sizeof(swap));
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
num_rewrites--;
- from->status = GIT_DELTA_DELETED;
- memcpy(&from->old_file, &swap, sizeof(from->old_file));
- memset(&from->new_file, 0, sizeof(from->new_file));
- from->new_file.path = from->old_file.path;
- from->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ src->status = GIT_DELTA_DELETED;
+ memcpy(&src->old_file, &swap, sizeof(src->old_file));
+ memset(&src->new_file, 0, sizeof(src->new_file));
+ src->new_file.path = src->old_file.path;
+ src->new_file.flags |= GIT_DIFF_FLAG_VALID_OID;
num_updates++;
}
}
- else if (delta_is_split(from)) {
+ else if (delta_is_split(src)) {
- if (delta_is_new_only(to)) {
+ if (delta_is_new_only(tgt)) {
if (best_match->similarity < opts.rename_threshold)
continue;
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
- from->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
+ src->status = (diff->new_src == GIT_ITERATOR_TYPE_WORKDIR) ?
GIT_DELTA_UNTRACKED : GIT_DELTA_ADDED;
- memset(&from->old_file, 0, sizeof(from->old_file));
- from->old_file.path = from->new_file.path;
- from->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
+ memset(&src->old_file, 0, sizeof(src->old_file));
+ src->old_file.path = src->new_file.path;
+ src->old_file.flags |= GIT_DIFF_FLAG_VALID_OID;
- from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
num_updates++;
} else {
- assert(delta_is_split(from));
+ assert(delta_is_split(src));
if (best_match->similarity < opts.rename_from_rewrite_threshold)
continue;
- memcpy(&swap, &to->old_file, sizeof(swap));
+ memcpy(&swap, &tgt->old_file, sizeof(swap));
- delta_make_rename(to, from, best_match->similarity);
+ delta_make_rename(tgt, src, best_match->similarity);
num_rewrites--;
num_updates++;
- memcpy(&from->old_file, &swap, sizeof(from->old_file));
+ memcpy(&src->old_file, &swap, sizeof(src->old_file));
/* if we've just swapped the new element into the correct
* place, clear the SPLIT flag
*/
- if (match_tgts[j].idx == i &&
- match_tgts[j].similarity >
+ if (tgt2src[s].idx == t &&
+ tgt2src[s].similarity >
opts.rename_from_rewrite_threshold) {
-
- from->status = GIT_DELTA_RENAMED;
- from->similarity = match_tgts[j].similarity;
- match_tgts[j].similarity = 0;
- from->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
+ src->status = GIT_DELTA_RENAMED;
+ src->similarity = tgt2src[s].similarity;
+ tgt2src[s].similarity = 0;
+ src->flags &= ~GIT_DIFF_FLAG__TO_SPLIT;
num_rewrites--;
}
/* otherwise, if we just overwrote a source, update mapping */
- else if (j > i && match_srcs[i].similarity > 0) {
- match_tgts[match_srcs[i].idx].idx = (uint32_t)j;
+ else if (s > t && src2tgt[t].similarity > 0) {
+ /* what used to be at src t is now at src s */
+ tgt2src[src2tgt[t].idx].idx = (uint32_t)s;
}
num_updates++;
}
}
- else if (delta_is_new_only(to)) {
- if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES) ||
- best_match->similarity < opts.copy_threshold)
+ else if (delta_is_new_only(tgt)) {
+ if (!FLAG_SET(&opts, GIT_DIFF_FIND_COPIES))
+ continue;
+
+ if (tgt2src_copy[t].similarity < opts.copy_threshold)
continue;
- to->status = GIT_DELTA_COPIED;
- to->similarity = best_match->similarity;
- memcpy(&to->old_file, &from->old_file, sizeof(to->old_file));
+ /* always use best possible source for copy */
+ best_match = &tgt2src_copy[t];
+ src = GIT_VECTOR_GET(&diff->deltas, best_match->idx);
+
+ tgt->status = GIT_DELTA_COPIED;
+ tgt->similarity = best_match->similarity;
+ memcpy(&tgt->old_file, &src->old_file, sizeof(tgt->old_file));
num_updates++;
}
@@ -998,12 +1034,13 @@ find_best_matches:
FLAG_SET(&opts, GIT_DIFF_BREAK_REWRITES));
cleanup:
- git__free(match_srcs);
- git__free(match_tgts);
+ git__free(tgt2src);
+ git__free(src2tgt);
+ git__free(tgt2src_copy);
- for (i = 0; i < sigcache_size; ++i) {
- if (sigcache[i] != NULL)
- opts.metric->free_signature(sigcache[i], opts.metric->payload);
+ for (t = 0; t < num_deltas * 2; ++t) {
+ if (sigcache[t] != NULL)
+ opts.metric->free_signature(sigcache[t], opts.metric->payload);
}
git__free(sigcache);
diff --git a/src/hashsig.c b/src/hashsig.c
index a11c4bee7..109f966ba 100644
--- a/src/hashsig.c
+++ b/src/hashsig.c
@@ -13,12 +13,15 @@ typedef uint64_t hashsig_state;
#define HASHSIG_SCALE 100
-#define HASHSIG_HASH_WINDOW 32
-#define HASHSIG_HASH_START 0
+#define HASHSIG_MAX_RUN 80
+#define HASHSIG_HASH_START 0x012345678ABCDEF0LL
#define HASHSIG_HASH_SHIFT 5
-#define HASHSIG_HASH_MASK 0x7FFFFFFF
+
+#define HASHSIG_HASH_MIX(S,CH) \
+ (S) = ((S) << HASHSIG_HASH_SHIFT) - (S) + (hashsig_state)(CH)
#define HASHSIG_HEAP_SIZE ((1 << 7) - 1)
+#define HASHSIG_HEAP_MIN_SIZE 4
typedef int (*hashsig_cmp)(const void *a, const void *b, void *);
@@ -28,14 +31,6 @@ typedef struct {
hashsig_t values[HASHSIG_HEAP_SIZE];
} hashsig_heap;
-typedef struct {
- hashsig_state state, shift_n;
- char window[HASHSIG_HASH_WINDOW];
- int win_len, win_pos, saw_lf;
-} hashsig_in_progress;
-
-#define HASHSIG_IN_PROGRESS_INIT { HASHSIG_HASH_START, 1, {0}, 0, 0, 1 }
-
struct git_hashsig {
hashsig_heap mins;
hashsig_heap maxs;
@@ -115,142 +110,109 @@ static void hashsig_heap_sort(hashsig_heap *h)
static void hashsig_heap_insert(hashsig_heap *h, hashsig_t val)
{
- /* if heap is full, pop top if new element should replace it */
- if (h->size == h->asize && h->cmp(&val, &h->values[0], NULL) > 0) {
- h->size--;
- h->values[0] = h->values[h->size];
- hashsig_heap_down(h, 0);
- }
-
/* if heap is not full, insert new element */
if (h->size < h->asize) {
h->values[h->size++] = val;
hashsig_heap_up(h, h->size - 1);
}
-}
-
-GIT_INLINE(bool) hashsig_include_char(
- char ch, git_hashsig_option_t opt, int *saw_lf)
-{
- if ((opt & GIT_HASHSIG_IGNORE_WHITESPACE) && git__isspace(ch))
- return false;
-
- if (opt & GIT_HASHSIG_SMART_WHITESPACE) {
- if (ch == '\r' || (*saw_lf && git__isspace(ch)))
- return false;
- *saw_lf = (ch == '\n');
+ /* if heap is full, pop top if new element should replace it */
+ else if (h->cmp(&val, &h->values[0], NULL) > 0) {
+ h->size--;
+ h->values[0] = h->values[h->size];
+ hashsig_heap_down(h, 0);
}
- return true;
}
-static void hashsig_initial_window(
- git_hashsig *sig,
- const char **data,
- size_t size,
- hashsig_in_progress *prog)
-{
- hashsig_state state, shift_n;
- int win_len, saw_lf = prog->saw_lf;
- const char *scan, *end;
- char *window = &prog->window[0];
-
- /* init until we have processed at least HASHSIG_HASH_WINDOW data */
-
- if (prog->win_len >= HASHSIG_HASH_WINDOW)
- return;
-
- state = prog->state;
- win_len = prog->win_len;
- shift_n = prog->shift_n;
-
- scan = *data;
- end = scan + size;
-
- while (scan < end && win_len < HASHSIG_HASH_WINDOW) {
- char ch = *scan++;
-
- if (!hashsig_include_char(ch, sig->opt, &saw_lf))
- continue;
-
- state = (state * HASHSIG_HASH_SHIFT + ch) & HASHSIG_HASH_MASK;
-
- if (!win_len)
- shift_n = 1;
- else
- shift_n = (shift_n * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK;
-
- window[win_len++] = ch;
- }
-
- /* insert initial hash if we just finished */
+typedef struct {
+ int use_ignores;
+ uint8_t ignore_ch[256];
+} hashsig_in_progress;
- if (win_len == HASHSIG_HASH_WINDOW) {
- hashsig_heap_insert(&sig->mins, (hashsig_t)state);
- hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- sig->considered = 1;
+static void hashsig_in_progress_init(
+ hashsig_in_progress *prog, git_hashsig *sig)
+{
+ int i;
+
+ switch (sig->opt) {
+ case GIT_HASHSIG_IGNORE_WHITESPACE:
+ for (i = 0; i < 256; ++i)
+ prog->ignore_ch[i] = git__isspace_nonlf(i);
+ prog->use_ignores = 1;
+ break;
+ case GIT_HASHSIG_SMART_WHITESPACE:
+ for (i = 0; i < 256; ++i)
+ prog->ignore_ch[i] = git__isspace(i);
+ prog->use_ignores = 1;
+ break;
+ default:
+ memset(prog, 0, sizeof(*prog));
+ break;
}
-
- prog->state = state;
- prog->win_len = win_len;
- prog->shift_n = shift_n;
- prog->saw_lf = saw_lf;
-
- *data = scan;
}
+#define HASHSIG_IN_PROGRESS_INIT { 1 }
+
static int hashsig_add_hashes(
git_hashsig *sig,
- const char *data,
+ const uint8_t *data,
size_t size,
hashsig_in_progress *prog)
{
- const char *scan = data, *end = data + size;
- hashsig_state state, shift_n, rmv;
- int win_pos, saw_lf;
- char *window = &prog->window[0];
-
- if (prog->win_len < HASHSIG_HASH_WINDOW)
- hashsig_initial_window(sig, &scan, size, prog);
-
- state = prog->state;
- shift_n = prog->shift_n;
- saw_lf = prog->saw_lf;
- win_pos = prog->win_pos;
-
- /* advance window, adding new chars and removing old */
-
- for (; scan < end; ++scan) {
- char ch = *scan;
-
- if (!hashsig_include_char(ch, sig->opt, &saw_lf))
- continue;
-
- rmv = shift_n * window[win_pos];
+ const uint8_t *scan = data, *end = data + size;
+ hashsig_state state = HASHSIG_HASH_START;
+ int use_ignores = prog->use_ignores, len;
+ uint8_t ch;
+
+ while (scan < end) {
+ state = HASHSIG_HASH_START;
+
+ for (len = 0; scan < end && len < HASHSIG_MAX_RUN; ) {
+ ch = *scan;
+
+ if (use_ignores)
+ for (; scan < end && git__isspace_nonlf(ch); ch = *scan)
+ ++scan;
+ else if (sig->opt != GIT_HASHSIG_NORMAL)
+ for (; scan < end && ch == '\r'; ch = *scan)
+ ++scan;
+
+ /* peek at next character to decide what to do next */
+ if (sig->opt == GIT_HASHSIG_SMART_WHITESPACE)
+ use_ignores = (ch == '\n');
+
+ if (scan >= end)
+ break;
+ ++scan;
+
+ /* check run terminator */
+ if (ch == '\n' || ch == '\0')
+ break;
+
+ ++len;
+ HASHSIG_HASH_MIX(state, ch);
+ }
- state = (state - rmv) & HASHSIG_HASH_MASK;
- state = (state * HASHSIG_HASH_SHIFT) & HASHSIG_HASH_MASK;
- state = (state + ch) & HASHSIG_HASH_MASK;
+ if (len > 0) {
+ hashsig_heap_insert(&sig->mins, (hashsig_t)state);
+ hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- hashsig_heap_insert(&sig->mins, (hashsig_t)state);
- hashsig_heap_insert(&sig->maxs, (hashsig_t)state);
- sig->considered++;
+ sig->considered++;
- window[win_pos] = ch;
- win_pos = (win_pos + 1) % HASHSIG_HASH_WINDOW;
+ while (scan < end && (*scan == '\n' || !*scan))
+ ++scan;
+ }
}
- prog->state = state;
- prog->saw_lf = saw_lf;
- prog->win_pos = win_pos;
+ prog->use_ignores = use_ignores;
return 0;
}
static int hashsig_finalize_hashes(git_hashsig *sig)
{
- if (sig->mins.size < HASHSIG_HEAP_SIZE) {
+ if (sig->mins.size < HASHSIG_HEAP_MIN_SIZE) {
giterr_set(GITERR_INVALID,
"File too small for similarity signature calculation");
return GIT_EBUFS;
@@ -282,11 +244,13 @@ int git_hashsig_create(
git_hashsig_option_t opts)
{
int error;
- hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT;
+ hashsig_in_progress prog;
git_hashsig *sig = hashsig_alloc(opts);
GITERR_CHECK_ALLOC(sig);
- error = hashsig_add_hashes(sig, buf, buflen, &prog);
+ hashsig_in_progress_init(&prog, sig);
+
+ error = hashsig_add_hashes(sig, (const uint8_t *)buf, buflen, &prog);
if (!error)
error = hashsig_finalize_hashes(sig);
@@ -304,10 +268,10 @@ int git_hashsig_create_fromfile(
const char *path,
git_hashsig_option_t opts)
{
- char buf[0x1000];
+ uint8_t buf[0x1000];
ssize_t buflen = 0;
int error = 0, fd;
- hashsig_in_progress prog = HASHSIG_IN_PROGRESS_INIT;
+ hashsig_in_progress prog;
git_hashsig *sig = hashsig_alloc(opts);
GITERR_CHECK_ALLOC(sig);
@@ -316,6 +280,8 @@ int git_hashsig_create_fromfile(
return fd;
}
+ hashsig_in_progress_init(&prog, sig);
+
while (!error) {
if ((buflen = p_read(fd, buf, sizeof(buf))) <= 0) {
if ((error = (int)buflen) < 0)
@@ -370,6 +336,12 @@ static int hashsig_heap_compare(const hashsig_heap *a, const hashsig_heap *b)
int git_hashsig_compare(const git_hashsig *a, const git_hashsig *b)
{
- return (hashsig_heap_compare(&a->mins, &b->mins) +
- hashsig_heap_compare(&a->maxs, &b->maxs)) / 2;
+ /* if we have fewer than the maximum number of elements, then just use
+ * one array since the two arrays will be the same
+ */
+ if (a->mins.size < HASHSIG_HEAP_SIZE)
+ return hashsig_heap_compare(&a->mins, &b->mins);
+ else
+ return (hashsig_heap_compare(&a->mins, &b->mins) +
+ hashsig_heap_compare(&a->maxs, &b->maxs)) / 2;
}
diff --git a/src/util.h b/src/util.h
index a97c9bf39..ed9624770 100644
--- a/src/util.h
+++ b/src/util.h
@@ -294,6 +294,11 @@ GIT_INLINE(bool) git__isspace(int c)
return (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == '\v' || c == 0x85 /* Unicode CR+LF */);
}
+GIT_INLINE(bool) git__isspace_nonlf(int c)
+{
+ return (c == ' ' || c == '\t' || c == '\f' || c == '\r' || c == '\v' || c == 0x85 /* Unicode CR+LF */);
+}
+
GIT_INLINE(bool) git__iswildcard(int c)
{
return (c == '*' || c == '?' || c == '[');