diff options
author | Edward Thomson <ethomson@microsoft.com> | 2014-12-01 13:09:58 -0500 |
---|---|---|
committer | Edward Thomson <ethomson@microsoft.com> | 2014-12-16 10:08:43 -0600 |
commit | cceae9a25d0bed8b00f4981e051d5f380ef54401 (patch) | |
tree | 37fc849d60ba24f6c6ac896322d6e8ac5f7d902c | |
parent | 09debe1213b9c979e21106ccbe9d420f8511f4eb (diff) | |
download | libgit2-cceae9a25d0bed8b00f4981e051d5f380ef54401.tar.gz |
win32: use NT-prefixed "\\?\" paths
When turning UTF-8 paths into UCS-2 paths for Windows, always use
the \\?\-prefixed paths. Because this bypasses the system's
path canonicalization, handle the canonicalization functions ourselves.
We must:
1. always use a backslash as a directory separator
2. only use a single backslash between directories
3. not rely on the system to translate "." and ".." in paths
4. remove trailing backslashes, except at the drive root (C:\)
-rw-r--r-- | src/win32/findfile.c | 1 | ||||
-rw-r--r-- | src/win32/path_w32.c | 269 | ||||
-rw-r--r-- | src/win32/path_w32.h | 65 | ||||
-rw-r--r-- | src/win32/posix.h | 1 | ||||
-rw-r--r-- | src/win32/posix_w32.c | 67 | ||||
-rw-r--r-- | src/win32/utf-conv.c | 42 | ||||
-rw-r--r-- | src/win32/utf-conv.h | 39 | ||||
-rw-r--r-- | src/win32/w32_util.h | 1 | ||||
-rw-r--r-- | tests/core/link.c | 19 | ||||
-rw-r--r-- | tests/path/win32.c | 190 |
10 files changed, 591 insertions, 103 deletions
diff --git a/src/win32/findfile.c b/src/win32/findfile.c index 86d4ef5bd..de27dd060 100644 --- a/src/win32/findfile.c +++ b/src/win32/findfile.c @@ -5,6 +5,7 @@ * a Linking Exception. For full terms see the included COPYING file. */ +#include "path_w32.h" #include "utf-conv.h" #include "path.h" #include "findfile.h" diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c new file mode 100644 index 000000000..f0eacaa63 --- /dev/null +++ b/src/win32/path_w32.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "path.h" +#include "path_w32.h" +#include "utf-conv.h" + +#define PATH__NT_NAMESPACE L"\\\\?\\" +#define PATH__NT_NAMESPACE_LEN 4 + +#define PATH__ABSOLUTE_LEN 3 + +#define path__is_dirsep(p) ((p) == '/' || (p) == '\\') + +#define path__is_absolute(p) \ + (git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/')) + +#define path__is_nt_namespace(p) \ + (((p)[0] == '\\' && (p)[1] == '\\' && (p)[2] == '?' && (p)[3] == '\\') || \ + ((p)[0] == '/' && (p)[1] == '/' && (p)[2] == '?' && (p)[3] == '/')) + +#define path__is_unc(p) \ + (((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/')) + +GIT_INLINE(int) path__cwd(wchar_t *path, int size) +{ + int len; + + if ((len = GetCurrentDirectoryW(size, path)) == 0) { + errno = GetLastError() == ERROR_ACCESS_DENIED ? EACCES : ENOENT; + return -1; + } else if (len > size) { + errno = ENAMETOOLONG; + return -1; + } + + /* The Win32 APIs may return "\\?\" once you've used it first. + * But it may not. What a gloriously predictible API! + */ + if (wcsncmp(path, PATH__NT_NAMESPACE, PATH__NT_NAMESPACE_LEN)) + return len; + + len -= PATH__NT_NAMESPACE_LEN; + + memmove(path, path + PATH__NT_NAMESPACE_LEN, sizeof(wchar_t) * len); + return len; +} + +static wchar_t *path__skip_server(wchar_t *path) +{ + wchar_t *c; + + for (c = path; *c; c++) { + if (path__is_dirsep(*c)) + return c + 1; + } + + return c; +} + +static wchar_t *path__skip_prefix(wchar_t *path) +{ + if (path__is_nt_namespace(path)) { + path += PATH__NT_NAMESPACE_LEN; + + if (wcsncmp(path, L"UNC\\", 4) == 0) + path = path__skip_server(path + 4); + else if (path__is_absolute(path)) + path += PATH__ABSOLUTE_LEN; + } else if (path__is_absolute(path)) { + path += PATH__ABSOLUTE_LEN; + } else if (path__is_unc(path)) { + path = path__skip_server(path + 2); + } + + return path; +} + +int git_win32_path_canonicalize(git_win32_path path) +{ + wchar_t *base, *from, *to, *next; + size_t len; + + base = to = path__skip_prefix(path); + + /* Unposixify if the prefix */ + for (from = path; from < to; from++) { + if (*from == L'/') + *from = L'\\'; + } + + while (*from) { + for (next = from; *next; ++next) { + if (*next == L'/') { + *next = L'\\'; + break; + } + + if (*next == L'\\') + break; + } + + len = next - from; + + if (len == 1 && from[0] == L'.') + /* do nothing with singleton dot */; + + else if (len == 2 && from[0] == L'.' && from[1] == L'.') { + if (to == base) { + /* no more path segments to strip, eat the "../" */ + if (*next == L'\\') + len++; + + base = to; + } else { + /* back up a path segment */ + while (to > base && to[-1] == L'\\') to--; + while (to > base && to[-1] != L'\\') to--; + } + } else { + if (*next == L'\\' && *from != L'\\') + len++; + + if (to != from) + memmove(to, from, sizeof(wchar_t) * len); + + to += len; + } + + from += len; + + while (*from == L'\\') from++; + } + + /* Strip trailing backslashes */ + while (to > base && to[-1] == L'\\') to--; + + *to = L'\0'; + + return (to - path); +} + +int git_win32_path__cwd(wchar_t *out, size_t len) +{ + int cwd_len; + + if ((cwd_len = path__cwd(out, len)) < 0) + return -1; + + /* UNC paths */ + if (wcsncmp(L"\\\\", out, 2) == 0) { + /* Our buffer must be at least 5 characters larger than the + * current working directory: we swallow one of the leading + * '\'s, but we we add a 'UNC' specifier to the path, plus + * a trailing directory separator, plus a NUL. + */ + if (cwd_len > MAX_PATH - 4) { + errno = ENAMETOOLONG; + return -1; + } + + memmove(out+2, out, sizeof(wchar_t) * cwd_len); + out[0] = L'U'; + out[1] = L'N'; + out[2] = L'C'; + + cwd_len += 2; + } + + /* Our buffer must be at least 2 characters larger than the current + * working directory. (One character for the directory separator, + * one for the null. + */ + else if (cwd_len > MAX_PATH - 2) { + errno = ENAMETOOLONG; + return -1; + } + + return cwd_len; +} + +int git_win32_path_from_utf8(git_win32_path out, const char *src) +{ + wchar_t *dest = out; + + /* All win32 paths are in NT-prefixed format, beginning with "\\?\". */ + memcpy(dest, PATH__NT_NAMESPACE, sizeof(wchar_t) * PATH__NT_NAMESPACE_LEN); + dest += PATH__NT_NAMESPACE_LEN; + + /* See if this is an absolute path (beginning with a drive letter) */ + if (path__is_absolute(src)) { + if (git__utf8_to_16(dest, MAX_PATH, src) < 0) + return -1; + } + /* File-prefixed NT-style paths beginning with \\?\ */ + else if (path__is_nt_namespace(src)) { + /* Skip the NT prefix, the destination already contains it */ + if (git__utf8_to_16(dest, MAX_PATH, src + PATH__NT_NAMESPACE_LEN) < 0) + return -1; + } + /* UNC paths */ + else if (path__is_unc(src)) { + memcpy(dest, L"UNC\\", sizeof(wchar_t) * 4); + dest += 4; + + /* Skip the leading "\\" */ + if (git__utf8_to_16(dest, MAX_PATH - 2, src + 2) < 0) + return -1; + } + /* Absolute paths omitting the drive letter */ + else if (src[0] == '\\' || src[0] == '/') { + if (path__cwd(dest, MAX_PATH) < 0) + return -1; + + if (!path__is_absolute(dest)) { + errno = ENOENT; + return -1; + } + + /* Skip the drive letter specification ("C:") */ + if (git__utf8_to_16(dest + 2, MAX_PATH - 2, src) < 0) + return -1; + } + /* Relative paths */ + else { + int cwd_len; + + if ((cwd_len = git_win32_path__cwd(dest, MAX_PATH)) < 0) + return -1; + + dest[cwd_len++] = L'\\'; + + if (git__utf8_to_16(dest + cwd_len, MAX_PATH - cwd_len, src) < 0) + return -1; + } + + return git_win32_path_canonicalize(out); +} + +int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src) +{ + char *out = dest; + int len; + + /* Strip NT namespacing "\\?\" */ + if (path__is_nt_namespace(src)) { + src += 4; + + /* "\\?\UNC\server\share" -> "\\server\share" */ + if (wcsncmp(src, L"UNC\\", 4) == 0) { + src += 4; + + memcpy(dest, "\\\\", 2); + out = dest + 2; + } + } + + if ((len = git__utf16_to_8(out, GIT_WIN_PATH_UTF8, src)) < 0) + return len; + + git_path_mkposix(dest); + + return len; +} diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h new file mode 100644 index 000000000..dc7a68e59 --- /dev/null +++ b/src/win32/path_w32.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_path_w32_h__ +#define INCLUDE_git_path_w32_h__ + +/* + * Provides a large enough buffer to support Windows paths: MAX_PATH is + * 260, corresponding to a maximum path length of 259 characters plus a + * NULL terminator. Prefixing with "\\?\" adds 4 characters, but if the + * original was a UNC path, then we turn "\\server\share" into + * "\\?\UNC\server\share". So we replace the first two characters with + * 8 characters, a net gain of 6, so the maximum length is MAX_PATH+6. + */ +#define GIT_WIN_PATH_UTF16 MAX_PATH+6 + +/* Maximum size of a UTF-8 Win32 path. We remove the "\\?\" or "\\?\UNC\" + * prefixes for presentation, bringing us back to 259 (non-NULL) + * characters. UTF-8 does have 4-byte sequences, but they are encoded in + * UTF-16 using surrogate pairs, which takes up the space of two characters. + * Two characters in the range U+0800 -> U+FFFF take up more space in UTF-8 + * (6 bytes) than one surrogate pair (4 bytes). + */ +#define GIT_WIN_PATH_UTF8 (259 * 3 + 1) + +/* Win32 path types */ +typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16]; +typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8]; + +/** + * Create a Win32 path (in UCS-2 format) from a UTF-8 string. + * + * @param dest The buffer to receive the wide string. + * @param src The UTF-8 string to convert. + * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure + */ +extern int git_win32_path_from_utf8(git_win32_path dest, const char *src); + +/** + * Canonicalize a Win32 UCS-2 path so that it is suitable for delivery to the + * Win32 APIs: remove multiple directory separators, squashing to a single one, + * strip trailing directory separators, ensure directory separators are all + * canonical (always backslashes, never forward slashes) and process any + * directory entries of '.' or '..'. + * + * This processes the buffer in place. + * + * @param path The buffer to process + * @return The new length of the buffer, in wchar_t's (not counting the NULL terminator) + */ +extern int git_win32_path_canonicalize(git_win32_path path); + +/** + * Create an internal format (posix-style) UTF-8 path from a Win32 UCS-2 path. + * + * @param dest The buffer to receive the UTF-8 string. + * @param src The wide string to convert. + * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure + */ +extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src); + +#endif diff --git a/src/win32/posix.h b/src/win32/posix.h index e055a77d0..104966edc 100644 --- a/src/win32/posix.h +++ b/src/win32/posix.h @@ -9,6 +9,7 @@ #include "common.h" #include "../posix.h" +#include "path_w32.h" #include "utf-conv.h" #include "dir.h" diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index 7b4555719..e446ccab0 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -7,6 +7,7 @@ #include "../posix.h" #include "../fileops.h" #include "path.h" +#include "path_w32.h" #include "utf-conv.h" #include "repository.h" #include "reparse.h" @@ -35,22 +36,6 @@ /* GetFinalPathNameByHandleW signature */ typedef DWORD(WINAPI *PFGetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD); -/* Helper function which converts UTF-8 paths to UTF-16. - * On failure, errno is set. */ -static int utf8_to_16_with_errno(git_win32_path dest, const char *src) -{ - int len = git_win32_path_from_utf8(dest, src); - - if (len < 0) { - if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) - errno = ENAMETOOLONG; - else - errno = EINVAL; /* Bad code point, presumably */ - } - - return len; -} - int p_ftruncate(int fd, long size) { #if defined(_MSC_VER) && _MSC_VER >= 1500 @@ -66,7 +51,7 @@ int p_mkdir(const char *path, mode_t mode) GIT_UNUSED(mode); - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; return _wmkdir(buf); @@ -85,7 +70,7 @@ int p_unlink(const char *path) git_win32_path buf; int error; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; error = _wunlink(buf); @@ -292,7 +277,7 @@ static int do_lstat(const char *path, struct stat *buf, bool posixly_correct) git_win32_path path_w; int len; - if ((len = utf8_to_16_with_errno(path_w, path)) < 0) + if ((len = git_win32_path_from_utf8(path_w, path)) < 0) return -1; git_win32__path_trim_end(path_w, len); @@ -323,7 +308,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz) * could occur in the middle of the encoding of a code point, * we need to buffer the result on the stack. */ - if (utf8_to_16_with_errno(path_w, path) < 0 || + if (git_win32_path_from_utf8(path_w, path) < 0 || readlink_w(target_w, path_w) < 0 || (len = git_win32_path_to_utf8(target, target_w)) < 0) return -1; @@ -347,7 +332,7 @@ int p_open(const char *path, int flags, ...) git_win32_path buf; mode_t mode = 0; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; if (flags & O_CREAT) { @@ -365,7 +350,7 @@ int p_creat(const char *path, mode_t mode) { git_win32_path buf; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | STANDARD_OPEN_FLAGS, mode); @@ -463,7 +448,7 @@ int p_stat(const char* path, struct stat* buf) git_win32_path path_w; int len; - if ((len = utf8_to_16_with_errno(path_w, path)) < 0) + if ((len = git_win32_path_from_utf8(path_w, path)) < 0) return -1; git_win32__path_trim_end(path_w, len); @@ -483,7 +468,7 @@ int p_chdir(const char* path) { git_win32_path buf; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; return _wchdir(buf); @@ -493,7 +478,7 @@ int p_chmod(const char* path, mode_t mode) { git_win32_path buf; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; return _wchmod(buf, mode); @@ -504,7 +489,7 @@ int p_rmdir(const char* path) git_win32_path buf; int error; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; error = _wrmdir(buf); @@ -533,7 +518,7 @@ char *p_realpath(const char *orig_path, char *buffer) { git_win32_path orig_path_w, buffer_w; - if (utf8_to_16_with_errno(orig_path_w, orig_path) < 0) + if (git_win32_path_from_utf8(orig_path_w, orig_path) < 0) return NULL; /* Note that if the path provided is a relative path, then the current directory @@ -554,20 +539,17 @@ char *p_realpath(const char *orig_path, char *buffer) return NULL; } - /* Convert the path to UTF-8. */ - if (buffer) { - /* If the caller provided a buffer, then it is assumed to be GIT_WIN_PATH_UTF8 - * characters in size. If it isn't, then we may overflow. */ - if (git__utf16_to_8(buffer, GIT_WIN_PATH_UTF8, buffer_w) < 0) - return NULL; - } else { - /* If the caller did not provide a buffer, then we allocate one for the caller - * from the heap. */ - if (git__utf16_to_8_alloc(&buffer, buffer_w) < 0) - return NULL; + if (!buffer && !(buffer = git__malloc(GIT_WIN_PATH_UTF8))) { + errno = ENOMEM; + return NULL; } - /* Convert backslashes to forward slashes */ + /* Convert the path to UTF-8. If the caller provided a buffer, then it + * is assumed to be GIT_WIN_PATH_UTF8 characters in size. If it isn't, + * then we may overflow. */ + if (git_win32_path_to_utf8(buffer, buffer_w) < 0) + return NULL; + git_path_mkposix(buffer); return buffer; @@ -608,6 +590,7 @@ int p_snprintf(char *buffer, size_t count, const char *format, ...) return r; } +/* TODO: wut? */ int p_mkstemp(char *tmp_path) { #if defined(_MSC_VER) && _MSC_VER >= 1500 @@ -625,7 +608,7 @@ int p_access(const char* path, mode_t mode) { git_win32_path buf; - if (utf8_to_16_with_errno(buf, path) < 0) + if (git_win32_path_from_utf8(buf, path) < 0) return -1; return _waccess(buf, mode); @@ -664,8 +647,8 @@ int p_rename(const char *from, const char *to) int rename_succeeded; int error; - if (utf8_to_16_with_errno(wfrom, from) < 0 || - utf8_to_16_with_errno(wto, to) < 0) + if (git_win32_path_from_utf8(wfrom, from) < 0 || + git_win32_path_from_utf8(wto, to) < 0) return -1; /* wait up to 50ms if file is locked by another thread or process */ diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c index b9ccfb5e5..b0205b019 100644 --- a/src/win32/utf-conv.c +++ b/src/win32/utf-conv.c @@ -26,6 +26,14 @@ GIT_INLINE(DWORD) get_wc_flags(void) return flags; } +GIT_INLINE(void) git__set_errno(void) +{ + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + errno = ENAMETOOLONG; + else + errno = EINVAL; +} + /** * Converts a UTF-8 string to wide characters. * @@ -36,10 +44,15 @@ GIT_INLINE(DWORD) get_wc_flags(void) */ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src) { + int len; + /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's * length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */ - return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1; + if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0) + git__set_errno(); + + return len; } /** @@ -52,10 +65,15 @@ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src) */ int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src) { + int len; + /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */ - return WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1; + if ((len = WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0) + git__set_errno(); + + return len; } /** @@ -76,17 +94,23 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src) /* Length of -1 indicates NULL termination of the input string */ utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0); - if (!utf16_size) + if (!utf16_size) { + git__set_errno(); return -1; + } *dest = git__malloc(utf16_size * sizeof(wchar_t)); - if (!*dest) + if (!*dest) { + errno = ENOMEM; return -1; + } utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size); if (!utf16_size) { + git__set_errno(); + git__free(*dest); *dest = NULL; } @@ -116,17 +140,23 @@ int git__utf16_to_8_alloc(char **dest, const wchar_t *src) /* Length of -1 indicates NULL termination of the input string */ utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL); - if (!utf8_size) + if (!utf8_size) { + git__set_errno(); return -1; + } *dest = git__malloc(utf8_size); - if (!*dest) + if (!*dest) { + errno = ENOMEM; return -1; + } utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL); if (!utf8_size) { + git__set_errno(); + git__free(*dest); *dest = NULL; } diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h index a480cd93e..89cdb96da 100644 --- a/src/win32/utf-conv.h +++ b/src/win32/utf-conv.h @@ -10,21 +10,6 @@ #include <wchar.h> #include "common.h" -/* Equal to the Win32 MAX_PATH constant. The maximum path length is 259 - * characters plus a NULL terminator. */ -#define GIT_WIN_PATH_UTF16 260 - -/* Maximum size of a UTF-8 Win32 path. UTF-8 does have 4-byte sequences, - * but they are encoded in UTF-16 using surrogate pairs, which takes up - * the space of two characters. Two characters in the range U+0800 -> - * U+FFFF take up more space in UTF-8 (6 bytes) than one surrogate pair - * (4 bytes). */ -#define GIT_WIN_PATH_UTF8 (259 * 3 + 1) - -/* Win32 path types */ -typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16]; -typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8]; - /** * Converts a UTF-8 string to wide characters. * @@ -67,28 +52,4 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src); */ int git__utf16_to_8_alloc(char **dest, const wchar_t *src); -/** - * Converts a UTF-8 Win32 path to wide characters. - * - * @param dest The buffer to receive the wide string. - * @param src The UTF-8 string to convert. - * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure - */ -GIT_INLINE(int) git_win32_path_from_utf8(git_win32_path dest, const char *src) -{ - return git__utf8_to_16(dest, GIT_WIN_PATH_UTF16, src); -} - -/** - * Converts a wide Win32 path to UTF-8. - * - * @param dest The buffer to receive the UTF-8 string. - * @param src The wide string to convert. - * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure - */ -GIT_INLINE(int) git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src) -{ - return git__utf16_to_8(dest, GIT_WIN_PATH_UTF8, src); -} - #endif diff --git a/src/win32/w32_util.h b/src/win32/w32_util.h index a1d388af5..9c1b94359 100644 --- a/src/win32/w32_util.h +++ b/src/win32/w32_util.h @@ -9,6 +9,7 @@ #define INCLUDE_w32_util_h__ #include "utf-conv.h" +#include "path_w32.h" GIT_INLINE(bool) git_win32__isalpha(wchar_t c) { diff --git a/tests/core/link.c b/tests/core/link.c index 83999ebdf..ec85ec4e0 100644 --- a/tests/core/link.c +++ b/tests/core/link.c @@ -197,19 +197,6 @@ static void do_custom_reparse(const char *path) #endif -git_buf *unslashify(git_buf *buf) -{ -#ifdef GIT_WIN32 - size_t i; - - for (i = 0; i < buf->size; i++) - if (buf->ptr[i] == '/') - buf->ptr[i] = '\\'; -#endif - - return buf; -} - void test_core_link__stat_regular_file(void) { struct stat st; @@ -584,7 +571,7 @@ void test_core_link__readlink_symlink(void) buf[len] = 0; - cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf); + cl_assert_equal_s(git_buf_cstr(&target_path), buf); git_buf_free(&target_path); } @@ -607,7 +594,7 @@ void test_core_link__readlink_dangling(void) buf[len] = 0; - cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf); + cl_assert_equal_s(git_buf_cstr(&target_path), buf); git_buf_free(&target_path); } @@ -636,7 +623,7 @@ void test_core_link__readlink_multiple(void) buf[len] = 0; - cl_assert_equal_s(git_buf_cstr(unslashify(&path2)), buf); + cl_assert_equal_s(git_buf_cstr(&path2), buf); git_buf_free(&path1); git_buf_free(&path2); diff --git a/tests/path/win32.c b/tests/path/win32.c new file mode 100644 index 000000000..ef0b5d2f2 --- /dev/null +++ b/tests/path/win32.c @@ -0,0 +1,190 @@ + +#include "clar_libgit2.h" +#include "path.h" + +#ifdef GIT_WIN32 +#include "win32/path_w32.h" +#endif + +void test_utf8_to_utf16(const char *utf8_in, const wchar_t *utf16_expected) +{ +#ifdef GIT_WIN32 + git_win32_path path_utf16; + int path_utf16len; + + cl_assert((path_utf16len = git_win32_path_from_utf8(path_utf16, utf8_in)) >= 0); + cl_assert_equal_wcs(utf16_expected, path_utf16); + cl_assert_equal_i(wcslen(utf16_expected), path_utf16len); +#else + GIT_UNUSED(utf8_in); + GIT_UNUSED(utf16_expected); +#endif +} + +void test_path_win32__utf8_to_utf16(void) +{ +#ifdef GIT_WIN32 + test_utf8_to_utf16("C:\\", L"\\\\?\\C:\\"); + test_utf8_to_utf16("c:\\", L"\\\\?\\c:\\"); + test_utf8_to_utf16("C:/", L"\\\\?\\C:\\"); + test_utf8_to_utf16("c:/", L"\\\\?\\c:\\"); +#endif +} + +void test_path_win32__removes_trailing_slash(void) +{ +#ifdef GIT_WIN32 + test_utf8_to_utf16("C:\\Foo\\", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("C:/Foo/", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("C:/Foo///", L"\\\\?\\C:\\Foo"); +#endif +} + +void test_path_win32__squashes_multiple_slashes(void) +{ +#ifdef GIT_WIN32 + test_utf8_to_utf16("C:\\\\Foo\\Bar\\\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); + test_utf8_to_utf16("C://Foo/Bar///Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); +#endif +} + +void test_path_win32__unc(void) +{ +#ifdef GIT_WIN32 + test_utf8_to_utf16("\\\\server\\c$\\unc\\path", L"\\\\?\\UNC\\server\\c$\\unc\\path"); + test_utf8_to_utf16("//server/git/style/unc/path", L"\\\\?\\UNC\\server\\git\\style\\unc\\path"); +#endif +} + +void test_path_win32__honors_max_path(void) +{ +#ifdef GIT_WIN32 + git_win32_path path_utf16; + + test_utf8_to_utf16("C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij", + L"\\\\?\\C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"); + test_utf8_to_utf16("\\\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij", + L"\\\\?\\UNC\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"); + + cl_check_fail(git_win32_path_from_utf8(path_utf16, "C:\\This path is 260 chars and is sadly too long for windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij")); + cl_check_fail(git_win32_path_from_utf8(path_utf16, "\\\\unc\\paths are also bound by 260 character restrictions\\including the server name portion\\bcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij")); +#endif +} + +void test_path_win32__dot_and_dotdot(void) +{ +#ifdef GIT_WIN32 + test_utf8_to_utf16("C:\\Foo\\..\\Foobar", L"\\\\?\\C:\\Foobar"); + test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar", L"\\\\?\\C:\\Foo\\Foobar"); + test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar\\..", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("C:\\Foobar\\..", L"\\\\?\\C:\\"); + test_utf8_to_utf16("C:/Foo/Bar/../Foobar", L"\\\\?\\C:\\Foo\\Foobar"); + test_utf8_to_utf16("C:/Foo/Bar/../Foobar/../Asdf/", L"\\\\?\\C:\\Foo\\Asdf"); + test_utf8_to_utf16("C:/Foo/Bar/../Foobar/..", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("C:/Foo/..", L"\\\\?\\C:\\"); + + test_utf8_to_utf16("C:\\Foo\\Bar\\.\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); + test_utf8_to_utf16("C:\\.\\Foo\\.\\Bar\\.\\Foobar\\.\\", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); + test_utf8_to_utf16("C:/Foo/Bar/./Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); + test_utf8_to_utf16("C:/Foo/../Bar/./Foobar/../", L"\\\\?\\C:\\Bar"); + + test_utf8_to_utf16("C:\\Foo\\..\\..\\Bar", L"\\\\?\\C:\\Bar"); +#endif +} + +void test_path_win32__absolute_from_no_drive_letter(void) +{ +#ifdef GIT_WIN32 + test_utf8_to_utf16("\\Foo", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar"); + test_utf8_to_utf16("/Foo/Bar", L"\\\\?\\C:\\Foo\\Bar"); +#endif +} + +void test_path_win32__absolute_from_relative(void) +{ +#ifdef GIT_WIN32 + char cwd_backup[MAX_PATH]; + + cl_must_pass(p_getcwd(cwd_backup, MAX_PATH)); + cl_must_pass(p_chdir("C:/")); + + test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("..\\..\\Foo", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("Foo\\..", L"\\\\?\\C:\\"); + test_utf8_to_utf16("Foo\\..\\..", L"\\\\?\\C:\\"); + test_utf8_to_utf16("", L"\\\\?\\C:\\"); + + cl_must_pass(p_chdir("C:/Windows")); + + test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Windows\\Foo"); + test_utf8_to_utf16("Foo\\Bar", L"\\\\?\\C:\\Windows\\Foo\\Bar"); + test_utf8_to_utf16("..\\Foo", L"\\\\?\\C:\\Foo"); + test_utf8_to_utf16("Foo\\..\\Bar", L"\\\\?\\C:\\Windows\\Bar"); + test_utf8_to_utf16("", L"\\\\?\\C:\\Windows"); + + cl_must_pass(p_chdir(cwd_backup)); +#endif +} + +void test_canonicalize(const wchar_t *in, const wchar_t *expected) +{ +#ifdef GIT_WIN32 + git_win32_path canonical; + + cl_assert(wcslen(in) < MAX_PATH); + wcscpy(canonical, in); + + cl_must_pass(git_win32_path_canonicalize(canonical)); + cl_assert_equal_wcs(expected, canonical); +#else + GIT_UNUSED(in); + GIT_UNUSED(expected); +#endif +} + +void test_path_win32__canonicalize(void) +{ +#ifdef GIT_WIN32 + test_canonicalize(L"C:\\Foo\\Bar", L"C:\\Foo\\Bar"); + test_canonicalize(L"C:\\Foo\\", L"C:\\Foo"); + test_canonicalize(L"C:\\Foo\\\\", L"C:\\Foo"); + test_canonicalize(L"C:\\Foo\\..\\Bar", L"C:\\Bar"); + test_canonicalize(L"C:\\Foo\\..\\..\\Bar", L"C:\\Bar"); + test_canonicalize(L"C:\\Foo\\..\\..\\..\\..\\", L"C:\\"); + test_canonicalize(L"C:/Foo/Bar", L"C:\\Foo\\Bar"); + test_canonicalize(L"C:/", L"C:\\"); + + test_canonicalize(L"Foo\\\\Bar\\\\Asdf\\\\", L"Foo\\Bar\\Asdf"); + test_canonicalize(L"Foo\\\\Bar\\\\..\\\\Asdf\\", L"Foo\\Asdf"); + test_canonicalize(L"Foo\\\\Bar\\\\.\\\\Asdf\\", L"Foo\\Bar\\Asdf"); + test_canonicalize(L"Foo\\\\..\\Bar\\\\.\\\\Asdf\\", L"Bar\\Asdf"); + test_canonicalize(L"\\", L""); + test_canonicalize(L"", L""); + test_canonicalize(L"Foo\\..\\..\\..\\..", L""); + test_canonicalize(L"..\\..\\..\\..", L""); + test_canonicalize(L"\\..\\..\\..\\..", L""); + + test_canonicalize(L"\\\\?\\C:\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar"); + test_canonicalize(L"\\\\?\\C:\\Foo\\Bar\\", L"\\\\?\\C:\\Foo\\Bar"); + test_canonicalize(L"\\\\?\\C:\\\\Foo\\.\\Bar\\\\..\\", L"\\\\?\\C:\\Foo"); + test_canonicalize(L"\\\\?\\C:\\\\", L"\\\\?\\C:\\"); + test_canonicalize(L"//?/C:/", L"\\\\?\\C:\\"); + test_canonicalize(L"//?/C:/../../Foo/", L"\\\\?\\C:\\Foo"); + test_canonicalize(L"//?/C:/Foo/../../", L"\\\\?\\C:\\"); + + test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder", L"\\\\?\\UNC\\server\\C$\\folder"); + test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder"); + test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder"); + test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\..\\..\\..\\..\\share\\", L"\\\\?\\UNC\\server\\share"); + + test_canonicalize(L"\\\\server\\share", L"\\\\server\\share"); + test_canonicalize(L"\\\\server\\share\\", L"\\\\server\\share"); + test_canonicalize(L"\\\\server\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar"); + test_canonicalize(L"\\\\server\\\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar"); + test_canonicalize(L"\\\\server\\share\\..\\foo", L"\\\\server\\foo"); + test_canonicalize(L"\\\\server\\..\\..\\share\\.\\foo", L"\\\\server\\share\\foo"); +#endif +} |