summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRussell Belfer <rb@github.com>2014-05-12 10:51:56 -0700
committerRussell Belfer <rb@github.com>2014-05-12 10:51:56 -0700
commitdf3419269bb3a7dba18f1df8a31d7d79e0a20475 (patch)
tree38684dcd60eb4f2ce3b634f09915f4236ef32e08 /src
parentaf567e8853e359df3b3abb7d3f7c15c3b1b391c7 (diff)
parent8a2ef218b99c41923dc32e25dc0915f68e2e4bca (diff)
downloadlibgit2-df3419269bb3a7dba18f1df8a31d7d79e0a20475.tar.gz
Merge pull request #2336 from libgit2/rb/unicode-branch-names
Pass unconverted Unicode path data when iconv doesn't like it
Diffstat (limited to 'src')
-rw-r--r--src/path.c63
-rw-r--r--src/path.h2
-rw-r--r--src/repository.c57
3 files changed, 66 insertions, 56 deletions
diff --git a/src/path.c b/src/path.c
index 2690cd8e8..e0b00a086 100644
--- a/src/path.c
+++ b/src/path.c
@@ -799,8 +799,11 @@ int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen)
if (rv != (size_t)-1)
break;
+ /* if we cannot convert the data (probably because iconv thinks
+ * it is not valid UTF-8 source data), then use original data
+ */
if (errno != E2BIG)
- goto fail;
+ return 0;
/* make space for 2x the remaining data to be converted
* (with per retry overhead to avoid infinite loops)
@@ -823,6 +826,64 @@ fail:
return -1;
}
+static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX";
+static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX";
+
+/* Check if the platform is decomposing unicode data for us. We will
+ * emulate core Git and prefer to use precomposed unicode data internally
+ * on these platforms, composing the decomposed unicode on the fly.
+ *
+ * This mainly happens on the Mac where HDFS stores filenames as
+ * decomposed unicode. Even on VFAT and SAMBA file systems, the Mac will
+ * return decomposed unicode from readdir() even when the actual
+ * filesystem is storing precomposed unicode.
+ */
+bool git_path_does_fs_decompose_unicode(const char *root)
+{
+ git_buf path = GIT_BUF_INIT;
+ int fd;
+ bool found_decomposed = false;
+ char tmp[6];
+
+ /* Create a file using a precomposed path and then try to find it
+ * using the decomposed name. If the lookup fails, then we will mark
+ * that we should precompose unicode for this repository.
+ */
+ if (git_buf_joinpath(&path, root, nfc_file) < 0 ||
+ (fd = p_mkstemp(path.ptr)) < 0)
+ goto done;
+ p_close(fd);
+
+ /* record trailing digits generated by mkstemp */
+ memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp));
+
+ /* try to look up as NFD path */
+ if (git_buf_joinpath(&path, root, nfd_file) < 0)
+ goto done;
+ memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
+
+ found_decomposed = git_path_exists(path.ptr);
+
+ /* remove temporary file (using original precomposed path) */
+ if (git_buf_joinpath(&path, root, nfc_file) < 0)
+ goto done;
+ memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
+
+ (void)p_unlink(path.ptr);
+
+done:
+ git_buf_free(&path);
+ return found_decomposed;
+}
+
+#else
+
+bool git_path_does_fs_decompose_unicode(const char *root)
+{
+ GIT_UNUSED(root);
+ return false;
+}
+
#endif
#if defined(__sun) || defined(__GNU__)
diff --git a/src/path.h b/src/path.h
index 2367d707b..3213c5104 100644
--- a/src/path.h
+++ b/src/path.h
@@ -436,4 +436,6 @@ extern int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen);
#endif /* GIT_USE_ICONV */
+extern bool git_path_does_fs_decompose_unicode(const char *root);
+
#endif
diff --git a/src/repository.c b/src/repository.c
index 43a476016..7d055e28e 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -889,60 +889,6 @@ static bool are_symlinks_supported(const char *wd_path)
return symlinks_supported;
}
-#ifdef GIT_USE_ICONV
-
-static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX";
-static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX";
-
-/* Check if the platform is decomposing unicode data for us. We will
- * emulate core Git and prefer to use precomposed unicode data internally
- * on these platforms, composing the decomposed unicode on the fly.
- *
- * This mainly happens on the Mac where HDFS stores filenames as
- * decomposed unicode. Even on VFAT and SAMBA file systems, the Mac will
- * return decomposed unicode from readdir() even when the actual
- * filesystem is storing precomposed unicode.
- */
-static bool does_fs_decompose_unicode_paths(const char *wd_path)
-{
- git_buf path = GIT_BUF_INIT;
- int fd;
- bool found_decomposed = false;
- char tmp[6];
-
- /* Create a file using a precomposed path and then try to find it
- * using the decomposed name. If the lookup fails, then we will mark
- * that we should precompose unicode for this repository.
- */
- if (git_buf_joinpath(&path, wd_path, nfc_file) < 0 ||
- (fd = p_mkstemp(path.ptr)) < 0)
- goto done;
- p_close(fd);
-
- /* record trailing digits generated by mkstemp */
- memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp));
-
- /* try to look up as NFD path */
- if (git_buf_joinpath(&path, wd_path, nfd_file) < 0)
- goto done;
- memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
-
- found_decomposed = git_path_exists(path.ptr);
-
- /* remove temporary file (using original precomposed path) */
- if (git_buf_joinpath(&path, wd_path, nfc_file) < 0)
- goto done;
- memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp));
-
- (void)p_unlink(path.ptr);
-
-done:
- git_buf_free(&path);
- return found_decomposed;
-}
-
-#endif
-
static int create_empty_file(const char *path, mode_t mode)
{
int fd;
@@ -1033,8 +979,9 @@ static int repo_init_fs_configs(
#ifdef GIT_USE_ICONV
if ((error = git_config_set_bool(
cfg, "core.precomposeunicode",
- does_fs_decompose_unicode_paths(work_dir))) < 0)
+ git_path_does_fs_decompose_unicode(work_dir))) < 0)
return error;
+ /* on non-iconv platforms, don't even set core.precomposeunicode */
#endif
return 0;