summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicent Martí <tanoku@gmail.com>2011-07-06 10:55:06 -0700
committerVicent Martí <tanoku@gmail.com>2011-07-06 10:55:06 -0700
commitbf9a2e98b83c8de2b8664a91167b5529ae1d9d39 (patch)
tree6db283bad395273d08b96f779ff349d00530f948
parentc68dee2a74fc6c5a2adab12857e9840254647a4b (diff)
parent245adf4f3cc1d47352e626ef53372d07761d84cf (diff)
downloadlibgit2-bf9a2e98b83c8de2b8664a91167b5529ae1d9d39.tar.gz
Merge pull request #296 from kiryl/index-optimization
Index optimization
-rw-r--r--include/git2/index.h7
-rw-r--r--src/index.c39
-rw-r--r--src/util.c64
-rw-r--r--src/util.h3
-rw-r--r--src/vector.c22
-rw-r--r--src/vector.h2
-rw-r--r--tests/t00-core.c23
7 files changed, 142 insertions, 18 deletions
diff --git a/include/git2/index.h b/include/git2/index.h
index 83a18e160..d34ed0a92 100644
--- a/include/git2/index.h
+++ b/include/git2/index.h
@@ -173,6 +173,13 @@ GIT_EXTERN(int) git_index_write(git_index *index);
GIT_EXTERN(int) git_index_find(git_index *index, const char *path);
/**
+ * Remove all entries with equal path except last added
+ *
+ * @param index an existing index object
+ */
+GIT_EXTERN(void) git_index_uniq(git_index *index);
+
+/**
* Add or update an index entry from a file in disk
*
* The file `path` must be relative to the repository's
diff --git a/src/index.c b/src/index.c
index 06fa95c7b..dd33db92a 100644
--- a/src/index.c
+++ b/src/index.c
@@ -348,6 +348,7 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i
git_index_entry *entry;
size_t path_length;
int position;
+ git_index_entry **entry_array;
assert(index && source_entry);
@@ -375,27 +376,28 @@ static int index_insert(git_index *index, const git_index_entry *source_entry, i
else
entry->flags |= GIT_IDXENTRY_NAMEMASK;;
+ /*
+ * replacing is not requested: just insert entry at the end;
+ * the index is no longer sorted
+ */
+ if (!replace)
+ return git_vector_insert(&index->entries, entry);
/* look if an entry with this path already exists */
position = git_index_find(index, source_entry->path);
- /* if no entry exists and replace is not set,
- * add the entry at the end;
- * the index is no longer sorted */
- if (!replace || position == GIT_ENOTFOUND) {
- if (git_vector_insert(&index->entries, entry) < GIT_SUCCESS)
- return GIT_ENOMEM;
+ /*
+ * if no entry exists add the entry at the end;
+ * the index is no longer sorted
+ */
+ if (position == GIT_ENOTFOUND)
+ return git_vector_insert(&index->entries, entry);
- /* if a previous entry exists and replace is set,
- * replace it */
- } else {
- git_index_entry **entry_array = (git_index_entry **)index->entries.contents;
-
- free((char *)entry_array[position]->path);
- free(entry_array[position]);
-
- entry_array[position] = entry;
- }
+ /* exists, replace it */
+ entry_array = (git_index_entry **) index->entries.contents;
+ free((char *)entry_array[position]->path);
+ free(entry_array[position]);
+ entry_array[position] = entry;
return GIT_SUCCESS;
}
@@ -485,6 +487,11 @@ int git_index_find(git_index *index, const char *path)
return git_vector_bsearch2(&index->entries, index_srch, path);
}
+void git_index_uniq(git_index *index)
+{
+ git_vector_uniq(&index->entries);
+}
+
const git_index_entry_unmerged *git_index_get_unmerged_bypath(git_index *index, const char *path)
{
int pos;
diff --git a/src/util.c b/src/util.c
index 7bfc08be4..03e911241 100644
--- a/src/util.c
+++ b/src/util.c
@@ -330,3 +330,67 @@ uint32_t git__hash(const void *key, int len, uint32_t seed)
return h1;
}
#endif
+
+/*
+ * A merge sort implementation, simplified from the qsort implementation
+ * by Mike Haertel, which is a part of the GNU C Library.
+ */
+static void msort_with_tmp(void *b, size_t n, size_t s,
+ int (*cmp)(const void *, const void *),
+ char *t)
+{
+ char *tmp;
+ char *b1, *b2;
+ size_t n1, n2;
+
+ if (n <= 1)
+ return;
+
+ n1 = n / 2;
+ n2 = n - n1;
+ b1 = b;
+ b2 = (char *)b + (n1 * s);
+
+ msort_with_tmp(b1, n1, s, cmp, t);
+ msort_with_tmp(b2, n2, s, cmp, t);
+
+ tmp = t;
+
+ while (n1 > 0 && n2 > 0) {
+ if (cmp(b1, b2) <= 0) {
+ memcpy(tmp, b1, s);
+ tmp += s;
+ b1 += s;
+ --n1;
+ } else {
+ memcpy(tmp, b2, s);
+ tmp += s;
+ b2 += s;
+ --n2;
+ }
+ }
+ if (n1 > 0)
+ memcpy(tmp, b1, n1 * s);
+ memcpy(b, t, (n - n2) * s);
+}
+
+int git__msort(void *b, size_t n, size_t s,
+ int (*cmp)(const void *, const void *))
+{
+ const size_t size = n * s;
+ char buf[1024];
+
+ if (size < sizeof(buf)) {
+ /* The temporary array fits on the small on-stack buffer. */
+ msort_with_tmp(b, n, s, cmp, buf);
+ } else {
+ /* It's somewhat large, so malloc it. */
+ char *tmp = git__malloc(size);
+ if (tmp == NULL)
+ return GIT_ENOMEM;
+ msort_with_tmp(b, n, s, cmp, tmp);
+ free(tmp);
+ }
+
+ return GIT_SUCCESS;
+}
diff --git a/src/util.h b/src/util.h
index 0faf7f69c..c9ca4dec0 100644
--- a/src/util.h
+++ b/src/util.h
@@ -118,4 +118,7 @@ extern int git__fnmatch(const char *pattern, const char *name, int flags);
} \
} while (0)
+extern int git__msort(void *b, size_t n, size_t s,
+ int (*cmp)(const void *, const void *));
+
#endif /* INCLUDE_util_h__ */
diff --git a/src/vector.c b/src/vector.c
index 0451eb082..2fc051f0c 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -94,7 +94,7 @@ void git_vector_sort(git_vector *v)
if (v->sorted || v->_cmp == NULL)
return;
- qsort(v->contents, v->length, sizeof(void *), v->_cmp);
+ git__msort(v->contents, v->length, sizeof(void *), v->_cmp);
v->sorted = 1;
}
@@ -162,6 +162,26 @@ int git_vector_remove(git_vector *v, unsigned int idx)
return GIT_SUCCESS;
}
+void git_vector_uniq(git_vector *v)
+{
+ git_vector_cmp cmp;
+ unsigned int i, j;
+
+ if (v->length <= 1)
+ return;
+
+ git_vector_sort(v);
+ cmp = v->_cmp ? v->_cmp : strict_comparison;
+
+ for (i = 0, j = 1 ; j < v->length; ++j)
+ if (!cmp(v->contents + i, v->contents + j))
+ v->contents[i] = v->contents[j];
+ else
+ v->contents[++i] = v->contents[j];
+
+ v->length -= j - i - 1;
+}
+
void git_vector_clear(git_vector *v)
{
assert(v);
diff --git a/src/vector.h b/src/vector.h
index 256452ee5..76778ba4e 100644
--- a/src/vector.h
+++ b/src/vector.h
@@ -32,5 +32,5 @@ GIT_INLINE(void *) git_vector_get(git_vector *v, unsigned int position)
int git_vector_insert(git_vector *v, void *element);
int git_vector_remove(git_vector *v, unsigned int idx);
-
+void git_vector_uniq(git_vector *v);
#endif
diff --git a/tests/t00-core.c b/tests/t00-core.c
index c01518b28..ee1eb6df2 100644
--- a/tests/t00-core.c
+++ b/tests/t00-core.c
@@ -73,6 +73,28 @@ BEGIN_TEST(vector1, "don't read past array bounds on remove()")
git_vector_free(&x);
END_TEST
+static int test_cmp(const void *a, const void *b)
+{
+ int n1 = *(int *)a;
+ int n2 = *(int *)b;
+
+ return n1 - n2;
+}
+
+BEGIN_TEST(vector2, "remove duplicates")
+ git_vector x;
+ must_pass(git_vector_init(&x, 5, test_cmp));
+ must_pass(git_vector_insert(&x, (void *) 0xdeadbeef));
+ must_pass(git_vector_insert(&x, (void *) 0xcafebabe));
+ must_pass(git_vector_insert(&x, (void *) 0xcafebabe));
+ must_pass(git_vector_insert(&x, (void *) 0xdeadbeef));
+ must_pass(git_vector_insert(&x, (void *) 0xcafebabe));
+ must_be_true(x.length == 5);
+ git_vector_uniq(&x);
+ must_be_true(x.length == 2);
+ git_vector_free(&x);
+END_TEST
+
BEGIN_TEST(path0, "get the dirname of a path")
char dir[64], *dir2;
@@ -531,6 +553,7 @@ BEGIN_SUITE(core)
ADD_TEST(vector0);
ADD_TEST(vector1);
+ ADD_TEST(vector2);
ADD_TEST(path0);
ADD_TEST(path1);