summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@edwardthomson.com>2015-08-04 16:51:00 -0500
committerEdward Thomson <ethomson@microsoft.com>2015-09-08 11:32:40 -0400
commit280adb3f942a1ce4f4939b7058209d0cd0467062 (patch)
tree519e9f4965dfaac6f969c0f135373c16b669fc2f
parente1d27bcaafaadf4ef5eeae19c96835c6663c4289 (diff)
downloadlibgit2-280adb3f942a1ce4f4939b7058209d0cd0467062.tar.gz
index: canonicalize directory case when adding
On case insensitive systems, when given a user-provided path in the higher-level index addition functions (eg `git_index_add_bypath` / `git_index_add_frombuffer`), examine the index to try to match the given path to an existing directory. Various mechanisms can cause the on-disk representation of a folder to not match the representation in HEAD or the index - for example, a case changing rename of some file `a/file.txt` to `A/file.txt` will update the paths in the index, but not rename the folder on disk. If a user subsequently adds `a/other.txt`, then this should be stored in the index as `A/other.txt`.
-rw-r--r--src/index.c95
-rw-r--r--tests/index/bypath.c148
2 files changed, 236 insertions, 7 deletions
diff --git a/src/index.c b/src/index.c
index 6be73d2c7..be86f16cb 100644
--- a/src/index.c
+++ b/src/index.c
@@ -1102,6 +1102,74 @@ static int check_file_directory_collision(git_index *index,
return 0;
}
+static int canonicalize_directory_path(
+ git_index *index, git_index_entry *entry)
+{
+ const git_index_entry *match, *best = NULL;
+ char *search, *sep;
+ size_t pos, search_len, best_len;
+
+ if (!index->ignore_case)
+ return 0;
+
+ /* item already exists in the index, simply re-use the existing case */
+ if ((match = git_index_get_bypath(index, entry->path, 0)) != NULL) {
+ memcpy((char *)entry->path, match->path, strlen(entry->path));
+ return 0;
+ }
+
+ /* nothing to do */
+ if (strchr(entry->path, '/') == NULL)
+ return 0;
+
+ if ((search = git__strdup(entry->path)) == NULL)
+ return -1;
+
+ /* starting at the parent directory and descending to the root, find the
+ * common parent directory.
+ */
+ while (!best && (sep = strrchr(search, '/'))) {
+ sep[1] = '\0';
+
+ search_len = strlen(search);
+
+ git_vector_bsearch2(
+ &pos, &index->entries, index->entries_search_path, search);
+
+ while ((match = git_vector_get(&index->entries, pos))) {
+ if (GIT_IDXENTRY_STAGE(match) != 0) {
+ /* conflicts do not contribute to canonical paths */
+ } else if (memcmp(search, match->path, search_len) == 0) {
+ /* prefer an exact match to the input filename */
+ best = match;
+ best_len = search_len;
+ break;
+ } else if (strncasecmp(search, match->path, search_len) == 0) {
+ /* continue walking, there may be a path with an exact
+ * (case sensitive) match later in the index, but use this
+ * as the best match until that happens.
+ */
+ if (!best) {
+ best = match;
+ best_len = search_len;
+ }
+ } else {
+ break;
+ }
+
+ pos++;
+ }
+
+ sep[0] = '\0';
+ }
+
+ if (best)
+ memcpy((char *)entry->path, best->path, best_len);
+
+ git__free(search);
+ return 0;
+}
+
static int index_no_dups(void **old, void *new)
{
const git_index_entry *entry = new;
@@ -1115,10 +1183,17 @@ static int index_no_dups(void **old, void *new)
* it, then it will return an error **and also free the entry**. When
* it replaces an existing entry, it will update the entry_ptr with the
* actual entry in the index (and free the passed in one).
+ * trust_path is whether we use the given path, or whether (on case
+ * insensitive systems only) we try to canonicalize the given path to
+ * be within an existing directory.
* trust_mode is whether we trust the mode in entry_ptr.
*/
static int index_insert(
- git_index *index, git_index_entry **entry_ptr, int replace, bool trust_mode)
+ git_index *index,
+ git_index_entry **entry_ptr,
+ int replace,
+ bool trust_path,
+ bool trust_mode)
{
int error = 0;
size_t path_length, position;
@@ -1156,8 +1231,14 @@ static int index_insert(
entry->mode = index_merge_mode(index, existing, entry->mode);
}
+ /* canonicalize the directory name */
+ if (!trust_path)
+ error = canonicalize_directory_path(index, entry);
+
/* look for tree / blob name collisions, removing conflicts if requested */
- error = check_file_directory_collision(index, entry, position, replace);
+ if (!error)
+ error = check_file_directory_collision(index, entry, position, replace);
+
if (error < 0)
/* skip changes */;
@@ -1258,7 +1339,7 @@ int git_index_add_frombuffer(
git_oid_cpy(&entry->id, &id);
entry->file_size = len;
- if ((error = index_insert(index, &entry, 1, true)) < 0)
+ if ((error = index_insert(index, &entry, 1, true, true)) < 0)
return error;
/* Adding implies conflict was resolved, move conflict entries to REUC */
@@ -1317,7 +1398,7 @@ int git_index_add_bypath(git_index *index, const char *path)
assert(index && path);
if ((ret = index_entry_init(&entry, index, path)) == 0)
- ret = index_insert(index, &entry, 1, false);
+ ret = index_insert(index, &entry, 1, false, false);
/* If we were given a directory, let's see if it's a submodule */
if (ret < 0 && ret != GIT_EDIRECTORY)
@@ -1343,7 +1424,7 @@ int git_index_add_bypath(git_index *index, const char *path)
if ((ret = add_repo_as_submodule(&entry, index, path)) < 0)
return ret;
- if ((ret = index_insert(index, &entry, 1, false)) < 0)
+ if ((ret = index_insert(index, &entry, 1, false, false)) < 0)
return ret;
} else if (ret < 0) {
return ret;
@@ -1394,7 +1475,7 @@ int git_index_add(git_index *index, const git_index_entry *source_entry)
}
if ((ret = index_entry_dup(&entry, INDEX_OWNER(index), source_entry)) < 0 ||
- (ret = index_insert(index, &entry, 1, true)) < 0)
+ (ret = index_insert(index, &entry, 1, true, true)) < 0)
return ret;
git_tree_cache_invalidate_path(index->tree, entry->path);
@@ -1555,7 +1636,7 @@ int git_index_conflict_add(git_index *index,
/* Make sure stage is correct */
GIT_IDXENTRY_STAGE_SET(entries[i], i + 1);
- if ((ret = index_insert(index, &entries[i], 0, true)) < 0)
+ if ((ret = index_insert(index, &entries[i], 0, true, true)) < 0)
goto on_error;
entries[i] = NULL; /* don't free if later entry fails */
diff --git a/tests/index/bypath.c b/tests/index/bypath.c
index b607e1732..17bba6ad5 100644
--- a/tests/index/bypath.c
+++ b/tests/index/bypath.c
@@ -72,3 +72,151 @@ void test_index_bypath__add_hidden(void)
cl_assert_equal_i(GIT_FILEMODE_BLOB, entry->mode);
#endif
}
+
+void test_index_bypath__add_honors_existing_case(void)
+{
+ const git_index_entry *entry;
+
+ if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+ clar__skip();
+
+ cl_git_mkfile("submod2/just_a_dir/file1.txt", "This is a file");
+ cl_git_mkfile("submod2/just_a_dir/file2.txt", "This is another file");
+ cl_git_mkfile("submod2/just_a_dir/file3.txt", "This is another file");
+ cl_git_mkfile("submod2/just_a_dir/file4.txt", "And another file");
+
+ cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/File1.txt"));
+ cl_git_pass(git_index_add_bypath(g_idx, "JUST_A_DIR/file2.txt"));
+ cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/FILE3.txt"));
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "just_a_dir/File1.txt", 0));
+ cl_assert_equal_s("just_a_dir/File1.txt", entry->path);
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "JUST_A_DIR/file2.txt", 0));
+ cl_assert_equal_s("just_a_dir/file2.txt", entry->path);
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/FILE3.txt", 0));
+ cl_assert_equal_s("just_a_dir/FILE3.txt", entry->path);
+
+ cl_git_rewritefile("submod2/just_a_dir/file3.txt", "Rewritten");
+ cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/file3.txt"));
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/file3.txt", 0));
+ cl_assert_equal_s("just_a_dir/FILE3.txt", entry->path);
+}
+
+void test_index_bypath__add_honors_existing_case_2(void)
+{
+ git_index_entry dummy = { { 0 } };
+ const git_index_entry *entry;
+
+ if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+ clar__skip();
+
+ dummy.mode = GIT_FILEMODE_BLOB;
+
+ /* note that `git_index_add` does no checking to canonical directories */
+ dummy.path = "Just_a_dir/file0.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "just_a_dir/fileA.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "Just_A_Dir/fileB.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "JUST_A_DIR/fileC.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "just_A_dir/fileD.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "JUST_a_DIR/fileE.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ cl_git_mkfile("submod2/just_a_dir/file1.txt", "This is a file");
+ cl_git_mkfile("submod2/just_a_dir/file2.txt", "This is another file");
+ cl_git_mkfile("submod2/just_a_dir/file3.txt", "This is another file");
+ cl_git_mkfile("submod2/just_a_dir/file4.txt", "And another file");
+
+ cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/File1.txt"));
+ cl_git_pass(git_index_add_bypath(g_idx, "JUST_A_DIR/file2.txt"));
+ cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/FILE3.txt"));
+ cl_git_pass(git_index_add_bypath(g_idx, "JusT_A_DIR/FILE4.txt"));
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "just_a_dir/File1.txt", 0));
+ cl_assert_equal_s("just_a_dir/File1.txt", entry->path);
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "JUST_A_DIR/file2.txt", 0));
+ cl_assert_equal_s("JUST_A_DIR/file2.txt", entry->path);
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/FILE3.txt", 0));
+ cl_assert_equal_s("Just_A_Dir/FILE3.txt", entry->path);
+
+ cl_git_rewritefile("submod2/just_a_dir/file3.txt", "Rewritten");
+ cl_git_pass(git_index_add_bypath(g_idx, "Just_A_Dir/file3.txt"));
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "Just_A_Dir/file3.txt", 0));
+ cl_assert_equal_s("Just_A_Dir/FILE3.txt", entry->path);
+}
+
+void test_index_bypath__add_honors_existing_case_3(void)
+{
+ git_index_entry dummy = { { 0 } };
+ const git_index_entry *entry;
+
+ if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+ clar__skip();
+
+ dummy.mode = GIT_FILEMODE_BLOB;
+
+ dummy.path = "just_a_dir/filea.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "Just_A_Dir/fileB.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "just_A_DIR/FILEC.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "Just_a_DIR/FileD.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ cl_git_mkfile("submod2/JuSt_A_DiR/fILEE.txt", "This is a file");
+
+ cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/fILEE.txt"));
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "JUST_A_DIR/fILEE.txt", 0));
+ cl_assert_equal_s("just_a_dir/fILEE.txt", entry->path);
+}
+
+void test_index_bypath__add_honors_existing_case_4(void)
+{
+ git_index_entry dummy = { { 0 } };
+ const git_index_entry *entry;
+
+ if (!cl_repo_get_bool(g_repo, "core.ignorecase"))
+ clar__skip();
+
+ dummy.mode = GIT_FILEMODE_BLOB;
+
+ dummy.path = "just_a_dir/a/b/c/d/e/file1.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ dummy.path = "just_a_dir/a/B/C/D/E/file2.txt";
+ cl_git_pass(git_index_add(g_idx, &dummy));
+
+ cl_must_pass(p_mkdir("submod2/just_a_dir/a", 0777));
+ cl_must_pass(p_mkdir("submod2/just_a_dir/a/b", 0777));
+ cl_must_pass(p_mkdir("submod2/just_a_dir/a/b/z", 0777));
+ cl_must_pass(p_mkdir("submod2/just_a_dir/a/b/z/y", 0777));
+ cl_must_pass(p_mkdir("submod2/just_a_dir/a/b/z/y/x", 0777));
+
+ cl_git_mkfile("submod2/just_a_dir/a/b/z/y/x/FOO.txt", "This is a file");
+
+ cl_git_pass(git_index_add_bypath(g_idx, "just_a_dir/A/b/Z/y/X/foo.txt"));
+
+ cl_assert(entry = git_index_get_bypath(g_idx, "just_a_dir/A/b/Z/y/X/foo.txt", 0));
+ cl_assert_equal_s("just_a_dir/a/b/Z/y/X/foo.txt", entry->path);
+}
+