summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/iterator.c138
-rw-r--r--src/iterator.h1
-rw-r--r--tests/diff/workdir.c24
-rw-r--r--tests/repo/iterator.c70
4 files changed, 147 insertions, 86 deletions
diff --git a/src/iterator.c b/src/iterator.c
index 9bf56b650..e35c8dc85 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -75,7 +75,8 @@ static int iterator_pathlist__init(git_iterator *iter, git_strarray *pathspec)
{
size_t i;
- if (git_vector_init(&iter->pathlist, pathspec->count, iter->strcomp) < 0)
+ if (git_vector_init(&iter->pathlist, pathspec->count,
+ (git_vector_cmp)iter->strcomp) < 0)
return -1;
for (i = 0; i < pathspec->count; i++) {
@@ -98,7 +99,8 @@ static iterator_pathlist__match_t iterator_pathlist__match(
size_t idx;
int error;
- error = git_vector_bsearch2(&idx, &iter->pathlist, iter->strcomp, path);
+ error = git_vector_bsearch2(&idx, &iter->pathlist,
+ (git_vector_cmp)iter->strcomp, path);
if (error == 0)
return ITERATOR_PATHLIST_MATCH;
@@ -116,10 +118,7 @@ static iterator_pathlist__match_t iterator_pathlist__match(
/* is this a literal directory entry (eg `foo/`) or a file beneath */
if (p[path_len] == '/') {
- while (p[path_len] == '/')
- path_len++;
-
- return (p[path_len] == '\0') ?
+ return (p[path_len+1] == '\0') ?
ITERATOR_PATHLIST_MATCH_DIRECTORY :
ITERATOR_PATHLIST_MATCH_CHILD;
}
@@ -133,10 +132,68 @@ static iterator_pathlist__match_t iterator_pathlist__match(
return ITERATOR_PATHLIST_NONE;
}
+static void iterator_pathlist_walk__reset(git_iterator *iter)
+{
+ iter->pathlist_walk_idx = 0;
+}
+
+/* walker for the index iterator that allows it to walk the sorted pathlist
+ * entries alongside the sorted index entries. the `iter->pathlist_walk_idx`
+ * stores the starting position for subsequent calls, the position is advanced
+ * along with the index iterator, with a special case for handling directories
+ * in the pathlist that are specified without trailing '/'. (eg, `foo`).
+ * we do not advance over these entries until we're certain that the index
+ * iterator will not ask us for a file beneath that directory (eg, `foo/bar`).
+ */
+static bool iterator_pathlist_walk__contains(git_iterator *iter, const char *path)
+{
+ size_t i;
+ char *p;
+ size_t p_len;
+ int cmp;
+
+ for (i = iter->pathlist_walk_idx; i < iter->pathlist.length; i++) {
+ p = iter->pathlist.contents[i];
+ p_len = strlen(p);
+
+ /* see if the pathlist entry is a prefix of this path */
+ cmp = iter->strncomp(p, path, p_len);
+
+ /* this pathlist entry sorts before the given path, try the next */
+ if (!p_len || cmp < 0)
+ iter->pathlist_walk_idx++;
+
+ /* this pathlist sorts after the given path, no match. */
+ else if (cmp > 0)
+ return false;
+
+ /* match! an exact match (`foo` vs `foo`), the path is a child of an
+ * explicit directory in the pathlist (`foo/` vs `foo/bar`) or the path
+ * is a child of an entry in the pathlist (`foo` vs `foo/bar`)
+ */
+ else if (path[p_len] == '\0' || p[p_len - 1] == '/' || path[p_len] == '/')
+ return true;
+
+ /* only advance the start index for future callers if we know that we
+ * will not see a child of this path. eg, a pathlist entry `foo` is
+ * a prefix for `foo.txt` and `foo/bar`. don't advance the start
+ * pathlist index when we see `foo.txt` or we would miss a subsequent
+ * inspection of `foo/bar`. only advance when there are no more
+ * potential children.
+ */
+ else if (path[p_len] > '/')
+ iter->pathlist_walk_idx++;
+ }
+
+ return false;
+}
+
static void iterator_pathlist__update_ignore_case(git_iterator *iter)
{
- git_vector_set_cmp(&iter->pathlist, iter->strcomp);
+ git_vector_set_cmp(&iter->pathlist, (git_vector_cmp)iter->strcomp);
git_vector_sort(&iter->pathlist);
+
+ iter->pathlist_walk_idx = 0;
}
@@ -583,13 +640,13 @@ static int tree_iterator__current_internal(
return 0;
}
-int tree_iterator__advance(
+static int tree_iterator__advance(
const git_index_entry **out, git_iterator *self);
static int tree_iterator__current(
const git_index_entry **out, git_iterator *self)
{
- git_index_entry *entry = NULL;
+ const git_index_entry *entry = NULL;
iterator_pathlist__match_t m;
int error;
@@ -797,9 +854,7 @@ static const git_index_entry *index_iterator__advance_over_unwanted(
index_iterator *ii)
{
const git_index_entry *ie = index_iterator__index_entry(ii);
- const char *p;
- size_t p_len;
- int cmp;
+ bool match;
while (ie) {
if (!iterator__include_conflicts(ii) &&
@@ -810,53 +865,17 @@ static const git_index_entry *index_iterator__advance_over_unwanted(
}
/* if we have a pathlist, this entry's path must be in it to be
- * returned. otherwise, advance the pathlist entry or the iterator
- * until we find the next path that we want to return.
+ * returned. walk the pathlist in unison with the index to
+ * compare paths.
*/
if (ii->base.pathlist.length) {
+ match = iterator_pathlist_walk__contains(&ii->base, ie->path);
- if (ii->pathlist_idx >= ii->base.pathlist.length) {
- ii->current = SIZE_MAX;
- ie = NULL;
- break;
- }
-
- p = git_vector_get(&ii->base.pathlist, ii->pathlist_idx);
-
- /* trim trailing slashes that indicate an exact directory match */
- p_len = strlen(p);
-
- while (p_len && p[p_len-1] == '/')
- p_len--;
-
- cmp = ii->base.strncomp(ie->path, p, p_len);
-
- /* we've matched the prefix - if the pathlist entry is equal to
- * this entry, or if the pathlist entry is a folder (eg `foo/`)
- * and this entry was beneath that, then continue. otherwise,
- * sort the index entry path against the pathlist entry.
- */
- if (cmp == 0) {
- if (ie->path[p_len] == 0)
- ;
- else if (ie->path[p_len] == '/')
- ;
- else if (ie->path[p_len] < '/')
- cmp = -1;
- else if (ie->path[p_len] > '/')
- cmp = 1;
- }
-
- if (cmp < 0) {
+ if (!match) {
ii->current++;
ie = index_iterator__index_entry(ii);
continue;
}
-
- if (cmp > 0) {
- ii->pathlist_idx++;
- continue;
- }
}
break;
@@ -1006,7 +1025,8 @@ static int index_iterator__reset(
return -1;
ii->current = 0;
- ii->pathlist_idx = 0;
+
+ iterator_pathlist_walk__reset(self);
/* if we're given a start prefix, find it; if we're given a pathlist, find
* the first of those. start at the later of the two.
@@ -1193,7 +1213,7 @@ static void fs_iterator__seek_frame_start(
ff->index = 0;
}
-static int dirload_with_stat(git_vector *contents, size_t *filtered, fs_iterator *fi)
+static int dirload_with_stat(git_vector *contents, fs_iterator *fi)
{
git_path_diriter diriter = GIT_PATH_DIRITER_INIT;
const char *path;
@@ -1204,8 +1224,6 @@ static int dirload_with_stat(git_vector *contents, size_t *filtered, fs_iterator
iterator_pathlist__match_t pathlist_match = ITERATOR_PATHLIST_MATCH;
int error;
- *filtered = 0;
-
/* Any error here is equivalent to the dir not existing, skip over it */
if ((error = git_path_diriter_init(
&diriter, fi->path.ptr, fi->dirload_flags)) < 0) {
@@ -1241,11 +1259,8 @@ static int dirload_with_stat(git_vector *contents, size_t *filtered, fs_iterator
if (fi->base.pathlist.length &&
fi->pathlist_match != ITERATOR_PATHLIST_MATCH &&
fi->pathlist_match != ITERATOR_PATHLIST_MATCH_DIRECTORY &&
- !(pathlist_match = iterator_pathlist__match(&fi->base, path, path_len))) {
-
- *filtered++;
+ !(pathlist_match = iterator_pathlist__match(&fi->base, path, path_len)))
continue;
- }
/* Make sure to append two bytes, one for the path's null
* termination, one for a possible trailing '/' for folders.
@@ -1313,7 +1328,6 @@ static int fs_iterator__expand_dir(fs_iterator *fi)
{
int error;
fs_iterator_frame *ff;
- size_t filtered = 0;
if (fi->depth > FS_MAX_DEPTH) {
giterr_set(GITERR_REPOSITORY,
@@ -1324,7 +1338,7 @@ static int fs_iterator__expand_dir(fs_iterator *fi)
ff = fs_iterator__alloc_frame(fi);
GITERR_CHECK_ALLOC(ff);
- error = dirload_with_stat(&ff->entries, &filtered, fi);
+ error = dirload_with_stat(&ff->entries, fi);
if (error < 0) {
git_error_state last_error = { 0 };
diff --git a/src/iterator.h b/src/iterator.h
index d2d61fbff..59f87e9de 100644
--- a/src/iterator.h
+++ b/src/iterator.h
@@ -70,6 +70,7 @@ struct git_iterator {
char *start;
char *end;
git_vector pathlist;
+ size_t pathlist_walk_idx;
int (*strcomp)(const char *a, const char *b);
int (*strncomp)(const char *a, const char *b, size_t n);
int (*prefixcomp)(const char *str, const char *prefix);
diff --git a/tests/diff/workdir.c b/tests/diff/workdir.c
index 336f959f6..e87769170 100644
--- a/tests/diff/workdir.c
+++ b/tests/diff/workdir.c
@@ -581,30 +581,6 @@ void test_diff_workdir__to_index_with_pathlist_disabling_fnmatch(void)
git_diff_free(diff);
- /* ensure that multiple trailing slashes are ignored */
- pathspec = "subdir//////";
-
- cl_git_pass(git_diff_index_to_workdir(&diff, g_repo, NULL, &opts));
-
- for (use_iterator = 0; use_iterator <= 1; use_iterator++) {
- memset(&exp, 0, sizeof(exp));
-
- if (use_iterator)
- cl_git_pass(diff_foreach_via_iterator(
- diff, diff_file_cb, NULL, NULL, NULL, &exp));
- else
- cl_git_pass(git_diff_foreach(diff, diff_file_cb, NULL, NULL, NULL, &exp));
-
- cl_assert_equal_i(3, exp.files);
- cl_assert_equal_i(0, exp.file_status[GIT_DELTA_ADDED]);
- cl_assert_equal_i(1, exp.file_status[GIT_DELTA_DELETED]);
- cl_assert_equal_i(1, exp.file_status[GIT_DELTA_MODIFIED]);
- cl_assert_equal_i(0, exp.file_status[GIT_DELTA_IGNORED]);
- cl_assert_equal_i(1, exp.file_status[GIT_DELTA_UNTRACKED]);
- }
-
- git_diff_free(diff);
-
/* ensure that fnmatching is completely disabled */
pathspec = "subdir/*";
diff --git a/tests/repo/iterator.c b/tests/repo/iterator.c
index cb9d4cd4b..8eeb7d376 100644
--- a/tests/repo/iterator.c
+++ b/tests/repo/iterator.c
@@ -1162,6 +1162,76 @@ void test_repo_iterator__indexfilelist_2(void)
git_vector_free(&filelist);
}
+void test_repo_iterator__indexfilelist_3(void)
+{
+ git_iterator *i;
+ git_iterator_options i_opts = GIT_ITERATOR_OPTIONS_INIT;
+ git_index *index;
+ git_vector filelist = GIT_VECTOR_INIT;
+
+ g_repo = cl_git_sandbox_init("icase");
+
+ cl_git_pass(git_repository_index(&index, g_repo));
+
+ cl_git_pass(git_vector_init(&filelist, 100, &git__strcmp_cb));
+ cl_git_pass(git_vector_insert(&filelist, "0"));
+ cl_git_pass(git_vector_insert(&filelist, "c"));
+ cl_git_pass(git_vector_insert(&filelist, "D"));
+ cl_git_pass(git_vector_insert(&filelist, "e"));
+ cl_git_pass(git_vector_insert(&filelist, "k/"));
+ cl_git_pass(git_vector_insert(&filelist, "k.a"));
+ cl_git_pass(git_vector_insert(&filelist, "k.b"));
+ cl_git_pass(git_vector_insert(&filelist, "kZZZZZZZ"));
+
+ i_opts.pathlist.strings = (char **)filelist.contents;
+ i_opts.pathlist.count = filelist.length;
+
+ i_opts.start = "b";
+ i_opts.end = "k/D";
+
+ cl_git_pass(git_iterator_for_index(&i, index, &i_opts));
+ expect_iterator_items(i, 8, NULL, 8, NULL);
+ git_iterator_free(i);
+
+ git_index_free(index);
+ git_vector_free(&filelist);
+}
+
+void test_repo_iterator__indexfilelist_4(void)
+{
+ git_iterator *i;
+ git_iterator_options i_opts = GIT_ITERATOR_OPTIONS_INIT;
+ git_index *index;
+ git_vector filelist = GIT_VECTOR_INIT;
+
+ g_repo = cl_git_sandbox_init("icase");
+
+ cl_git_pass(git_repository_index(&index, g_repo));
+
+ cl_git_pass(git_vector_init(&filelist, 100, &git__strcmp_cb));
+ cl_git_pass(git_vector_insert(&filelist, "0"));
+ cl_git_pass(git_vector_insert(&filelist, "c"));
+ cl_git_pass(git_vector_insert(&filelist, "D"));
+ cl_git_pass(git_vector_insert(&filelist, "e"));
+ cl_git_pass(git_vector_insert(&filelist, "k"));
+ cl_git_pass(git_vector_insert(&filelist, "k.a"));
+ cl_git_pass(git_vector_insert(&filelist, "k.b"));
+ cl_git_pass(git_vector_insert(&filelist, "kZZZZZZZ"));
+
+ i_opts.pathlist.strings = (char **)filelist.contents;
+ i_opts.pathlist.count = filelist.length;
+
+ i_opts.start = "b";
+ i_opts.end = "k/D";
+
+ cl_git_pass(git_iterator_for_index(&i, index, &i_opts));
+ expect_iterator_items(i, 8, NULL, 8, NULL);
+ git_iterator_free(i);
+
+ git_index_free(index);
+ git_vector_free(&filelist);
+}
+
void test_repo_iterator__indexfilelist_icase(void)
{
git_iterator *i;