diff options
author | Russell Belfer <rb@github.com> | 2014-05-12 10:51:56 -0700 |
---|---|---|
committer | Russell Belfer <rb@github.com> | 2014-05-12 10:51:56 -0700 |
commit | df3419269bb3a7dba18f1df8a31d7d79e0a20475 (patch) | |
tree | 38684dcd60eb4f2ce3b634f09915f4236ef32e08 /src | |
parent | af567e8853e359df3b3abb7d3f7c15c3b1b391c7 (diff) | |
parent | 8a2ef218b99c41923dc32e25dc0915f68e2e4bca (diff) | |
download | libgit2-df3419269bb3a7dba18f1df8a31d7d79e0a20475.tar.gz |
Merge pull request #2336 from libgit2/rb/unicode-branch-names
Pass unconverted Unicode path data when iconv doesn't like it
Diffstat (limited to 'src')
-rw-r--r-- | src/path.c | 63 | ||||
-rw-r--r-- | src/path.h | 2 | ||||
-rw-r--r-- | src/repository.c | 57 |
3 files changed, 66 insertions, 56 deletions
diff --git a/src/path.c b/src/path.c index 2690cd8e8..e0b00a086 100644 --- a/src/path.c +++ b/src/path.c @@ -799,8 +799,11 @@ int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen) if (rv != (size_t)-1) break; + /* if we cannot convert the data (probably because iconv thinks + * it is not valid UTF-8 source data), then use original data + */ if (errno != E2BIG) - goto fail; + return 0; /* make space for 2x the remaining data to be converted * (with per retry overhead to avoid infinite loops) @@ -823,6 +826,64 @@ fail: return -1; } +static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX"; +static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX"; + +/* Check if the platform is decomposing unicode data for us. We will + * emulate core Git and prefer to use precomposed unicode data internally + * on these platforms, composing the decomposed unicode on the fly. + * + * This mainly happens on the Mac where HDFS stores filenames as + * decomposed unicode. Even on VFAT and SAMBA file systems, the Mac will + * return decomposed unicode from readdir() even when the actual + * filesystem is storing precomposed unicode. + */ +bool git_path_does_fs_decompose_unicode(const char *root) +{ + git_buf path = GIT_BUF_INIT; + int fd; + bool found_decomposed = false; + char tmp[6]; + + /* Create a file using a precomposed path and then try to find it + * using the decomposed name. If the lookup fails, then we will mark + * that we should precompose unicode for this repository. + */ + if (git_buf_joinpath(&path, root, nfc_file) < 0 || + (fd = p_mkstemp(path.ptr)) < 0) + goto done; + p_close(fd); + + /* record trailing digits generated by mkstemp */ + memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp)); + + /* try to look up as NFD path */ + if (git_buf_joinpath(&path, root, nfd_file) < 0) + goto done; + memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); + + found_decomposed = git_path_exists(path.ptr); + + /* remove temporary file (using original precomposed path) */ + if (git_buf_joinpath(&path, root, nfc_file) < 0) + goto done; + memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); + + (void)p_unlink(path.ptr); + +done: + git_buf_free(&path); + return found_decomposed; +} + +#else + +bool git_path_does_fs_decompose_unicode(const char *root) +{ + GIT_UNUSED(root); + return false; +} + #endif #if defined(__sun) || defined(__GNU__) diff --git a/src/path.h b/src/path.h index 2367d707b..3213c5104 100644 --- a/src/path.h +++ b/src/path.h @@ -436,4 +436,6 @@ extern int git_path_iconv(git_path_iconv_t *ic, char **in, size_t *inlen); #endif /* GIT_USE_ICONV */ +extern bool git_path_does_fs_decompose_unicode(const char *root); + #endif diff --git a/src/repository.c b/src/repository.c index 43a476016..7d055e28e 100644 --- a/src/repository.c +++ b/src/repository.c @@ -889,60 +889,6 @@ static bool are_symlinks_supported(const char *wd_path) return symlinks_supported; } -#ifdef GIT_USE_ICONV - -static const char *nfc_file = "\xC3\x85\x73\x74\x72\xC3\xB6\x6D.XXXXXX"; -static const char *nfd_file = "\x41\xCC\x8A\x73\x74\x72\x6F\xCC\x88\x6D.XXXXXX"; - -/* Check if the platform is decomposing unicode data for us. We will - * emulate core Git and prefer to use precomposed unicode data internally - * on these platforms, composing the decomposed unicode on the fly. - * - * This mainly happens on the Mac where HDFS stores filenames as - * decomposed unicode. Even on VFAT and SAMBA file systems, the Mac will - * return decomposed unicode from readdir() even when the actual - * filesystem is storing precomposed unicode. - */ -static bool does_fs_decompose_unicode_paths(const char *wd_path) -{ - git_buf path = GIT_BUF_INIT; - int fd; - bool found_decomposed = false; - char tmp[6]; - - /* Create a file using a precomposed path and then try to find it - * using the decomposed name. If the lookup fails, then we will mark - * that we should precompose unicode for this repository. - */ - if (git_buf_joinpath(&path, wd_path, nfc_file) < 0 || - (fd = p_mkstemp(path.ptr)) < 0) - goto done; - p_close(fd); - - /* record trailing digits generated by mkstemp */ - memcpy(tmp, path.ptr + path.size - sizeof(tmp), sizeof(tmp)); - - /* try to look up as NFD path */ - if (git_buf_joinpath(&path, wd_path, nfd_file) < 0) - goto done; - memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); - - found_decomposed = git_path_exists(path.ptr); - - /* remove temporary file (using original precomposed path) */ - if (git_buf_joinpath(&path, wd_path, nfc_file) < 0) - goto done; - memcpy(path.ptr + path.size - sizeof(tmp), tmp, sizeof(tmp)); - - (void)p_unlink(path.ptr); - -done: - git_buf_free(&path); - return found_decomposed; -} - -#endif - static int create_empty_file(const char *path, mode_t mode) { int fd; @@ -1033,8 +979,9 @@ static int repo_init_fs_configs( #ifdef GIT_USE_ICONV if ((error = git_config_set_bool( cfg, "core.precomposeunicode", - does_fs_decompose_unicode_paths(work_dir))) < 0) + git_path_does_fs_decompose_unicode(work_dir))) < 0) return error; + /* on non-iconv platforms, don't even set core.precomposeunicode */ #endif return 0; |