diff options
| author | Edward Thomson <ethomson@microsoft.com> | 2014-12-01 13:09:58 -0500 | 
|---|---|---|
| committer | Edward Thomson <ethomson@microsoft.com> | 2014-12-16 10:08:43 -0600 | 
| commit | cceae9a25d0bed8b00f4981e051d5f380ef54401 (patch) | |
| tree | 37fc849d60ba24f6c6ac896322d6e8ac5f7d902c | |
| parent | 09debe1213b9c979e21106ccbe9d420f8511f4eb (diff) | |
| download | libgit2-cceae9a25d0bed8b00f4981e051d5f380ef54401.tar.gz | |
win32: use NT-prefixed "\\?\" paths
When turning UTF-8 paths into UCS-2 paths for Windows, always use
the \\?\-prefixed paths.  Because this bypasses the system's
path canonicalization, handle the canonicalization functions ourselves.
We must:
 1. always use a backslash as a directory separator
 2. only use a single backslash between directories
 3. not rely on the system to translate "." and ".." in paths
 4. remove trailing backslashes, except at the drive root (C:\)
| -rw-r--r-- | src/win32/findfile.c | 1 | ||||
| -rw-r--r-- | src/win32/path_w32.c | 269 | ||||
| -rw-r--r-- | src/win32/path_w32.h | 65 | ||||
| -rw-r--r-- | src/win32/posix.h | 1 | ||||
| -rw-r--r-- | src/win32/posix_w32.c | 67 | ||||
| -rw-r--r-- | src/win32/utf-conv.c | 42 | ||||
| -rw-r--r-- | src/win32/utf-conv.h | 39 | ||||
| -rw-r--r-- | src/win32/w32_util.h | 1 | ||||
| -rw-r--r-- | tests/core/link.c | 19 | ||||
| -rw-r--r-- | tests/path/win32.c | 190 | 
10 files changed, 591 insertions, 103 deletions
| diff --git a/src/win32/findfile.c b/src/win32/findfile.c index 86d4ef5bd..de27dd060 100644 --- a/src/win32/findfile.c +++ b/src/win32/findfile.c @@ -5,6 +5,7 @@   * a Linking Exception. For full terms see the included COPYING file.   */ +#include "path_w32.h"  #include "utf-conv.h"  #include "path.h"  #include "findfile.h" diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c new file mode 100644 index 000000000..f0eacaa63 --- /dev/null +++ b/src/win32/path_w32.c @@ -0,0 +1,269 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "path.h" +#include "path_w32.h" +#include "utf-conv.h" + +#define PATH__NT_NAMESPACE     L"\\\\?\\" +#define PATH__NT_NAMESPACE_LEN 4 + +#define PATH__ABSOLUTE_LEN     3 + +#define path__is_dirsep(p) ((p) == '/' || (p) == '\\') + +#define path__is_absolute(p) \ +	(git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/')) + +#define path__is_nt_namespace(p) \ +	(((p)[0] == '\\' && (p)[1] == '\\' && (p)[2] == '?' && (p)[3] == '\\') || \ +	 ((p)[0] == '/' && (p)[1] == '/' && (p)[2] == '?' && (p)[3] == '/')) + +#define path__is_unc(p) \ +	(((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/')) + +GIT_INLINE(int) path__cwd(wchar_t *path, int size) +{ +	int len; + +	if ((len = GetCurrentDirectoryW(size, path)) == 0) { +		errno = GetLastError() == ERROR_ACCESS_DENIED ? EACCES : ENOENT; +		return -1; +	} else if (len > size) { +		errno = ENAMETOOLONG; +		return -1; +	} + +	/* The Win32 APIs may return "\\?\" once you've used it first. +	 * But it may not.  What a gloriously predictible API! +	 */ +	if (wcsncmp(path, PATH__NT_NAMESPACE, PATH__NT_NAMESPACE_LEN)) +		return len; + +	len -= PATH__NT_NAMESPACE_LEN; + +	memmove(path, path + PATH__NT_NAMESPACE_LEN, sizeof(wchar_t) * len); +	return len; +} + +static wchar_t *path__skip_server(wchar_t *path) +{ +	wchar_t *c; + +	for (c = path; *c; c++) { +		if (path__is_dirsep(*c)) +			return c + 1; +	} + +	return c; +} + +static wchar_t *path__skip_prefix(wchar_t *path) +{ +	if (path__is_nt_namespace(path)) { +		path += PATH__NT_NAMESPACE_LEN; + +		if (wcsncmp(path, L"UNC\\", 4) == 0) +			path = path__skip_server(path + 4); +		else if (path__is_absolute(path)) +			path += PATH__ABSOLUTE_LEN; +	} else if (path__is_absolute(path)) { +		path += PATH__ABSOLUTE_LEN; +	} else if (path__is_unc(path)) { +		path = path__skip_server(path + 2); +	} + +	return path; +} + +int git_win32_path_canonicalize(git_win32_path path) +{ +	wchar_t *base, *from, *to, *next; +	size_t len; + +	base = to = path__skip_prefix(path); + +	/* Unposixify if the prefix */ +	for (from = path; from < to; from++) { +		if (*from == L'/') +			*from = L'\\'; +	} + +	while (*from) { +		for (next = from; *next; ++next) { +			if (*next == L'/') { +				*next = L'\\'; +				break; +			} + +			if (*next == L'\\') +				break; +		} + +		len = next - from; + +		if (len == 1 && from[0] == L'.') +			/* do nothing with singleton dot */; + +		else if (len == 2 && from[0] == L'.' && from[1] == L'.') { +			if (to == base) { +				/* no more path segments to strip, eat the "../" */ +				if (*next == L'\\') +					len++; + +				base = to; +			} else { +				/* back up a path segment */ +				while (to > base && to[-1] == L'\\') to--; +				while (to > base && to[-1] != L'\\') to--; +			} +		} else { +			if (*next == L'\\' && *from != L'\\') +				len++; + +			if (to != from) +				memmove(to, from, sizeof(wchar_t) * len); + +			to += len; +		} + +		from += len; + +		while (*from == L'\\') from++; +	} + +	/* Strip trailing backslashes */ +	while (to > base && to[-1] == L'\\') to--; + +	*to = L'\0'; + +	return (to - path); +} + +int git_win32_path__cwd(wchar_t *out, size_t len) +{ +	int cwd_len; + +	if ((cwd_len = path__cwd(out, len)) < 0) +		return -1; + +	/* UNC paths */ +	if (wcsncmp(L"\\\\", out, 2) == 0) { +		/* Our buffer must be at least 5 characters larger than the +		 * current working directory:  we swallow one of the leading +		 * '\'s, but we we add a 'UNC' specifier to the path, plus +		 * a trailing directory separator, plus a NUL. +		 */ +		if (cwd_len > MAX_PATH - 4) { +			errno = ENAMETOOLONG; +			return -1; +		} + +		memmove(out+2, out, sizeof(wchar_t) * cwd_len); +		out[0] = L'U'; +		out[1] = L'N'; +		out[2] = L'C'; + +		cwd_len += 2; +	} + +	/* Our buffer must be at least 2 characters larger than the current +	 * working directory.  (One character for the directory separator, +	 * one for the null. +	 */ +	else if (cwd_len > MAX_PATH - 2) { +		errno = ENAMETOOLONG; +		return -1; +	} + +	return cwd_len; +} + +int git_win32_path_from_utf8(git_win32_path out, const char *src) +{ +	wchar_t *dest = out; + +	/* All win32 paths are in NT-prefixed format, beginning with "\\?\". */ +	memcpy(dest, PATH__NT_NAMESPACE, sizeof(wchar_t) * PATH__NT_NAMESPACE_LEN); +	dest += PATH__NT_NAMESPACE_LEN; + +	/* See if this is an absolute path (beginning with a drive letter) */ +	if (path__is_absolute(src)) { +		if (git__utf8_to_16(dest, MAX_PATH, src) < 0) +			return -1; +	} +	/* File-prefixed NT-style paths beginning with \\?\ */ +	else if (path__is_nt_namespace(src)) { +		/* Skip the NT prefix, the destination already contains it */ +		if (git__utf8_to_16(dest, MAX_PATH, src + PATH__NT_NAMESPACE_LEN) < 0) +			return -1; +	} +	/* UNC paths */ +	else if (path__is_unc(src)) { +		memcpy(dest, L"UNC\\", sizeof(wchar_t) * 4); +		dest += 4; + +		/* Skip the leading "\\" */ +		if (git__utf8_to_16(dest, MAX_PATH - 2, src + 2) < 0) +			return -1; +	} +	/* Absolute paths omitting the drive letter */ +	else if (src[0] == '\\' || src[0] == '/') { +		if (path__cwd(dest, MAX_PATH) < 0) +			return -1; + +		if (!path__is_absolute(dest)) { +			errno = ENOENT; +			return -1; +		} + +		/* Skip the drive letter specification ("C:") */	 +		if (git__utf8_to_16(dest + 2, MAX_PATH - 2, src) < 0) +			return -1; +	} +	/* Relative paths */ +	else { +		int cwd_len; + +		if ((cwd_len = git_win32_path__cwd(dest, MAX_PATH)) < 0) +			return -1; + +		dest[cwd_len++] = L'\\'; + +		if (git__utf8_to_16(dest + cwd_len, MAX_PATH - cwd_len, src) < 0) +			return -1; +	} + +	return git_win32_path_canonicalize(out); +} + +int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src) +{ +	char *out = dest; +	int len; + +	/* Strip NT namespacing "\\?\" */ +	if (path__is_nt_namespace(src)) { +		src += 4; + +		/* "\\?\UNC\server\share" -> "\\server\share" */ +		if (wcsncmp(src, L"UNC\\", 4) == 0) { +			src += 4; + +			memcpy(dest, "\\\\", 2); +			out = dest + 2; +		} +	} + +	if ((len = git__utf16_to_8(out, GIT_WIN_PATH_UTF8, src)) < 0) +		return len; + +	git_path_mkposix(dest); + +	return len; +} diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h new file mode 100644 index 000000000..dc7a68e59 --- /dev/null +++ b/src/win32/path_w32.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) the libgit2 contributors. All rights reserved. + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#ifndef INCLUDE_git_path_w32_h__ +#define INCLUDE_git_path_w32_h__ + +/* + * Provides a large enough buffer to support Windows paths:  MAX_PATH is + * 260, corresponding to a maximum path length of 259 characters plus a + * NULL terminator.  Prefixing with "\\?\" adds 4 characters, but if the + * original was a UNC path, then we turn "\\server\share" into + * "\\?\UNC\server\share".  So we replace the first two characters with + * 8 characters, a net gain of 6, so the maximum length is MAX_PATH+6. + */ +#define GIT_WIN_PATH_UTF16		MAX_PATH+6 + +/* Maximum size of a UTF-8 Win32 path.  We remove the "\\?\" or "\\?\UNC\" + * prefixes for presentation, bringing us back to 259 (non-NULL) + * characters.  UTF-8 does have 4-byte sequences, but they are encoded in + * UTF-16 using surrogate pairs, which takes up the space of two characters. + * Two characters in the range U+0800 -> U+FFFF take up more space in UTF-8 + * (6 bytes) than one surrogate pair (4 bytes). + */ +#define GIT_WIN_PATH_UTF8		(259 * 3 + 1) + +/* Win32 path types */ +typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16]; +typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8]; + +/** + * Create a Win32 path (in UCS-2 format) from a UTF-8 string. + * + * @param dest The buffer to receive the wide string. + * @param src The UTF-8 string to convert. + * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure + */ +extern int git_win32_path_from_utf8(git_win32_path dest, const char *src); + +/** + * Canonicalize a Win32 UCS-2 path so that it is suitable for delivery to the + * Win32 APIs: remove multiple directory separators, squashing to a single one, + * strip trailing directory separators, ensure directory separators are all + * canonical (always backslashes, never forward slashes) and process any + * directory entries of '.' or '..'. + * + * This processes the buffer in place. + * + * @param path The buffer to process + * @return The new length of the buffer, in wchar_t's (not counting the NULL terminator) + */ +extern int git_win32_path_canonicalize(git_win32_path path); + +/** + * Create an internal format (posix-style) UTF-8 path from a Win32 UCS-2 path. + * + * @param dest The buffer to receive the UTF-8 string. + * @param src The wide string to convert. + * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure + */ +extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src); + +#endif diff --git a/src/win32/posix.h b/src/win32/posix.h index e055a77d0..104966edc 100644 --- a/src/win32/posix.h +++ b/src/win32/posix.h @@ -9,6 +9,7 @@  #include "common.h"  #include "../posix.h" +#include "path_w32.h"  #include "utf-conv.h"  #include "dir.h" diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index 7b4555719..e446ccab0 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -7,6 +7,7 @@  #include "../posix.h"  #include "../fileops.h"  #include "path.h" +#include "path_w32.h"  #include "utf-conv.h"  #include "repository.h"  #include "reparse.h" @@ -35,22 +36,6 @@  /* GetFinalPathNameByHandleW signature */  typedef DWORD(WINAPI *PFGetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD); -/* Helper function which converts UTF-8 paths to UTF-16. - * On failure, errno is set. */ -static int utf8_to_16_with_errno(git_win32_path dest, const char *src) -{ -	int len = git_win32_path_from_utf8(dest, src); - -	if (len < 0) { -		if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) -			errno = ENAMETOOLONG; -		else -			errno = EINVAL; /* Bad code point, presumably */ -	} - -	return len; -} -  int p_ftruncate(int fd, long size)  {  #if defined(_MSC_VER) && _MSC_VER >= 1500 @@ -66,7 +51,7 @@ int p_mkdir(const char *path, mode_t mode)  	GIT_UNUSED(mode); -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	return _wmkdir(buf); @@ -85,7 +70,7 @@ int p_unlink(const char *path)  	git_win32_path buf;  	int error; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	error = _wunlink(buf); @@ -292,7 +277,7 @@ static int do_lstat(const char *path, struct stat *buf, bool posixly_correct)  	git_win32_path path_w;  	int len; -	if ((len = utf8_to_16_with_errno(path_w, path)) < 0) +	if ((len = git_win32_path_from_utf8(path_w, path)) < 0)  		return -1;  	git_win32__path_trim_end(path_w, len); @@ -323,7 +308,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz)  	 * could occur in the middle of the encoding of a code point,  	 * we need to buffer the result on the stack. */ -	if (utf8_to_16_with_errno(path_w, path) < 0 || +	if (git_win32_path_from_utf8(path_w, path) < 0 ||  		readlink_w(target_w, path_w) < 0 ||  		(len = git_win32_path_to_utf8(target, target_w)) < 0)  		return -1; @@ -347,7 +332,7 @@ int p_open(const char *path, int flags, ...)  	git_win32_path buf;  	mode_t mode = 0; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	if (flags & O_CREAT) { @@ -365,7 +350,7 @@ int p_creat(const char *path, mode_t mode)  {  	git_win32_path buf; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | STANDARD_OPEN_FLAGS, mode); @@ -463,7 +448,7 @@ int p_stat(const char* path, struct stat* buf)  	git_win32_path path_w;  	int len; -	if ((len = utf8_to_16_with_errno(path_w, path)) < 0) +	if ((len = git_win32_path_from_utf8(path_w, path)) < 0)  		return -1;  	git_win32__path_trim_end(path_w, len); @@ -483,7 +468,7 @@ int p_chdir(const char* path)  {  	git_win32_path buf; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	return _wchdir(buf); @@ -493,7 +478,7 @@ int p_chmod(const char* path, mode_t mode)  {  	git_win32_path buf; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	return _wchmod(buf, mode); @@ -504,7 +489,7 @@ int p_rmdir(const char* path)  	git_win32_path buf;  	int error; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	error = _wrmdir(buf); @@ -533,7 +518,7 @@ char *p_realpath(const char *orig_path, char *buffer)  {  	git_win32_path orig_path_w, buffer_w; -	if (utf8_to_16_with_errno(orig_path_w, orig_path) < 0) +	if (git_win32_path_from_utf8(orig_path_w, orig_path) < 0)  		return NULL;  	/* Note that if the path provided is a relative path, then the current directory @@ -554,20 +539,17 @@ char *p_realpath(const char *orig_path, char *buffer)  		return NULL;  	} -	/* Convert the path to UTF-8. */ -	if (buffer) { -		/* If the caller provided a buffer, then it is assumed to be GIT_WIN_PATH_UTF8 -		 * characters in size. If it isn't, then we may overflow. */ -		if (git__utf16_to_8(buffer, GIT_WIN_PATH_UTF8, buffer_w) < 0) -			return NULL; -	} else { -		/* If the caller did not provide a buffer, then we allocate one for the caller -		 * from the heap. */ -		if (git__utf16_to_8_alloc(&buffer, buffer_w) < 0) -			return NULL; +	if (!buffer && !(buffer = git__malloc(GIT_WIN_PATH_UTF8))) { +		errno = ENOMEM; +		return NULL;  	} -	/* Convert backslashes to forward slashes */ +	/* Convert the path to UTF-8. If the caller provided a buffer, then it +	 * is assumed to be GIT_WIN_PATH_UTF8 characters in size. If it isn't, +	 * then we may overflow. */ +	if (git_win32_path_to_utf8(buffer, buffer_w) < 0) +		return NULL; +  	git_path_mkposix(buffer);  	return buffer; @@ -608,6 +590,7 @@ int p_snprintf(char *buffer, size_t count, const char *format, ...)  	return r;  } +/* TODO: wut? */  int p_mkstemp(char *tmp_path)  {  #if defined(_MSC_VER) && _MSC_VER >= 1500 @@ -625,7 +608,7 @@ int p_access(const char* path, mode_t mode)  {  	git_win32_path buf; -	if (utf8_to_16_with_errno(buf, path) < 0) +	if (git_win32_path_from_utf8(buf, path) < 0)  		return -1;  	return _waccess(buf, mode); @@ -664,8 +647,8 @@ int p_rename(const char *from, const char *to)  	int rename_succeeded;  	int error; -	if (utf8_to_16_with_errno(wfrom, from) < 0 || -		utf8_to_16_with_errno(wto, to) < 0) +	if (git_win32_path_from_utf8(wfrom, from) < 0 || +		git_win32_path_from_utf8(wto, to) < 0)  		return -1;  	/* wait up to 50ms if file is locked by another thread or process */ diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c index b9ccfb5e5..b0205b019 100644 --- a/src/win32/utf-conv.c +++ b/src/win32/utf-conv.c @@ -26,6 +26,14 @@ GIT_INLINE(DWORD) get_wc_flags(void)  	return flags;  } +GIT_INLINE(void) git__set_errno(void) +{ +	if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) +		errno = ENAMETOOLONG; +	else +		errno = EINVAL; +} +  /**   * Converts a UTF-8 string to wide characters.   * @@ -36,10 +44,15 @@ GIT_INLINE(DWORD) get_wc_flags(void)   */  int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)  { +	int len; +  	/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to  	* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's  	* length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */ -	return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1; +	if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0) +		git__set_errno(); + +	return len;  }  /** @@ -52,10 +65,15 @@ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)   */  int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)  { +	int len; +  	/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to  	 * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's  	 * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */ -	return WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1; +	if ((len = WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0) +		git__set_errno(); + +	return len;  }  /** @@ -76,17 +94,23 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src)  	/* Length of -1 indicates NULL termination of the input string */  	utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0); -	if (!utf16_size) +	if (!utf16_size) { +		git__set_errno();  		return -1; +	}  	*dest = git__malloc(utf16_size * sizeof(wchar_t)); -	if (!*dest) +	if (!*dest) { +		errno = ENOMEM;  		return -1; +	}  	utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);  	if (!utf16_size) { +		git__set_errno(); +  		git__free(*dest);  		*dest = NULL;  	} @@ -116,17 +140,23 @@ int git__utf16_to_8_alloc(char **dest, const wchar_t *src)  	/* Length of -1 indicates NULL termination of the input string */  	utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL); -	if (!utf8_size) +	if (!utf8_size) { +		git__set_errno();  		return -1; +	}  	*dest = git__malloc(utf8_size); -	if (!*dest) +	if (!*dest) { +		errno = ENOMEM;  		return -1; +	}  	utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL);  	if (!utf8_size) { +		git__set_errno(); +  		git__free(*dest);  		*dest = NULL;  	} diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h index a480cd93e..89cdb96da 100644 --- a/src/win32/utf-conv.h +++ b/src/win32/utf-conv.h @@ -10,21 +10,6 @@  #include <wchar.h>  #include "common.h" -/* Equal to the Win32 MAX_PATH constant. The maximum path length is 259 - * characters plus a NULL terminator. */ -#define GIT_WIN_PATH_UTF16		260 - -/* Maximum size of a UTF-8 Win32 path. UTF-8 does have 4-byte sequences, - * but they are encoded in UTF-16 using surrogate pairs, which takes up - * the space of two characters. Two characters in the range U+0800 -> - * U+FFFF take up more space in UTF-8 (6 bytes) than one surrogate pair - * (4 bytes). */ -#define GIT_WIN_PATH_UTF8		(259 * 3 + 1) - -/* Win32 path types */ -typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16]; -typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8]; -  /**   * Converts a UTF-8 string to wide characters.   * @@ -67,28 +52,4 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src);   */  int git__utf16_to_8_alloc(char **dest, const wchar_t *src); -/** - * Converts a UTF-8 Win32 path to wide characters. - * - * @param dest The buffer to receive the wide string. - * @param src The UTF-8 string to convert. - * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure - */ -GIT_INLINE(int) git_win32_path_from_utf8(git_win32_path dest, const char *src) -{ -	return git__utf8_to_16(dest, GIT_WIN_PATH_UTF16, src); -} - -/** - * Converts a wide Win32 path to UTF-8. - * - * @param dest The buffer to receive the UTF-8 string. - * @param src The wide string to convert. - * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure - */ -GIT_INLINE(int) git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src) -{ -	return git__utf16_to_8(dest, GIT_WIN_PATH_UTF8, src); -} -  #endif diff --git a/src/win32/w32_util.h b/src/win32/w32_util.h index a1d388af5..9c1b94359 100644 --- a/src/win32/w32_util.h +++ b/src/win32/w32_util.h @@ -9,6 +9,7 @@  #define INCLUDE_w32_util_h__  #include "utf-conv.h" +#include "path_w32.h"  GIT_INLINE(bool) git_win32__isalpha(wchar_t c)  { diff --git a/tests/core/link.c b/tests/core/link.c index 83999ebdf..ec85ec4e0 100644 --- a/tests/core/link.c +++ b/tests/core/link.c @@ -197,19 +197,6 @@ static void do_custom_reparse(const char *path)  #endif -git_buf *unslashify(git_buf *buf) -{ -#ifdef GIT_WIN32 -	size_t i; - -	for (i = 0; i < buf->size; i++) -		if (buf->ptr[i] == '/') -			buf->ptr[i] = '\\'; -#endif - -	return buf; -} -  void test_core_link__stat_regular_file(void)  {  	struct stat st; @@ -584,7 +571,7 @@ void test_core_link__readlink_symlink(void)  	buf[len] = 0; -	cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf); +	cl_assert_equal_s(git_buf_cstr(&target_path), buf);  	git_buf_free(&target_path);  } @@ -607,7 +594,7 @@ void test_core_link__readlink_dangling(void)  	buf[len] = 0; -	cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf); +	cl_assert_equal_s(git_buf_cstr(&target_path), buf);  	git_buf_free(&target_path);  } @@ -636,7 +623,7 @@ void test_core_link__readlink_multiple(void)  	buf[len] = 0; -	cl_assert_equal_s(git_buf_cstr(unslashify(&path2)), buf); +	cl_assert_equal_s(git_buf_cstr(&path2), buf);  	git_buf_free(&path1);  	git_buf_free(&path2); diff --git a/tests/path/win32.c b/tests/path/win32.c new file mode 100644 index 000000000..ef0b5d2f2 --- /dev/null +++ b/tests/path/win32.c @@ -0,0 +1,190 @@ + +#include "clar_libgit2.h" +#include "path.h" + +#ifdef GIT_WIN32 +#include "win32/path_w32.h" +#endif + +void test_utf8_to_utf16(const char *utf8_in, const wchar_t *utf16_expected) +{ +#ifdef GIT_WIN32 +	git_win32_path path_utf16; +	int path_utf16len; + +	cl_assert((path_utf16len = git_win32_path_from_utf8(path_utf16, utf8_in)) >= 0); +	cl_assert_equal_wcs(utf16_expected, path_utf16); +	cl_assert_equal_i(wcslen(utf16_expected), path_utf16len); +#else +	GIT_UNUSED(utf8_in); +	GIT_UNUSED(utf16_expected); +#endif +} + +void test_path_win32__utf8_to_utf16(void) +{ +#ifdef GIT_WIN32 +	test_utf8_to_utf16("C:\\", L"\\\\?\\C:\\"); +	test_utf8_to_utf16("c:\\", L"\\\\?\\c:\\"); +	test_utf8_to_utf16("C:/", L"\\\\?\\C:\\"); +	test_utf8_to_utf16("c:/", L"\\\\?\\c:\\"); +#endif +} + +void test_path_win32__removes_trailing_slash(void) +{ +#ifdef GIT_WIN32 +	test_utf8_to_utf16("C:\\Foo\\", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("C:/Foo/", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("C:/Foo///", L"\\\\?\\C:\\Foo"); +#endif +} + +void test_path_win32__squashes_multiple_slashes(void) +{ +#ifdef GIT_WIN32 +	test_utf8_to_utf16("C:\\\\Foo\\Bar\\\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); +	test_utf8_to_utf16("C://Foo/Bar///Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); +#endif +} + +void test_path_win32__unc(void) +{ +#ifdef GIT_WIN32 +	test_utf8_to_utf16("\\\\server\\c$\\unc\\path", L"\\\\?\\UNC\\server\\c$\\unc\\path"); +	test_utf8_to_utf16("//server/git/style/unc/path", L"\\\\?\\UNC\\server\\git\\style\\unc\\path"); +#endif +} + +void test_path_win32__honors_max_path(void) +{ +#ifdef GIT_WIN32 +	git_win32_path path_utf16; + +	test_utf8_to_utf16("C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij", +		L"\\\\?\\C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"); +	test_utf8_to_utf16("\\\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij", +		L"\\\\?\\UNC\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"); + +	cl_check_fail(git_win32_path_from_utf8(path_utf16, "C:\\This path is 260 chars and is sadly too long for windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij")); +	cl_check_fail(git_win32_path_from_utf8(path_utf16, "\\\\unc\\paths are also bound by 260 character restrictions\\including the server name portion\\bcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij")); +#endif +} + +void test_path_win32__dot_and_dotdot(void) +{ +#ifdef GIT_WIN32 +	test_utf8_to_utf16("C:\\Foo\\..\\Foobar", L"\\\\?\\C:\\Foobar"); +	test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar", L"\\\\?\\C:\\Foo\\Foobar"); +	test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar\\..", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("C:\\Foobar\\..", L"\\\\?\\C:\\"); +	test_utf8_to_utf16("C:/Foo/Bar/../Foobar", L"\\\\?\\C:\\Foo\\Foobar"); +	test_utf8_to_utf16("C:/Foo/Bar/../Foobar/../Asdf/", L"\\\\?\\C:\\Foo\\Asdf"); +	test_utf8_to_utf16("C:/Foo/Bar/../Foobar/..", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("C:/Foo/..", L"\\\\?\\C:\\"); + +	test_utf8_to_utf16("C:\\Foo\\Bar\\.\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); +	test_utf8_to_utf16("C:\\.\\Foo\\.\\Bar\\.\\Foobar\\.\\", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); +	test_utf8_to_utf16("C:/Foo/Bar/./Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar"); +	test_utf8_to_utf16("C:/Foo/../Bar/./Foobar/../", L"\\\\?\\C:\\Bar"); + +	test_utf8_to_utf16("C:\\Foo\\..\\..\\Bar", L"\\\\?\\C:\\Bar"); +#endif +} + +void test_path_win32__absolute_from_no_drive_letter(void) +{ +#ifdef GIT_WIN32 +	test_utf8_to_utf16("\\Foo", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar"); +	test_utf8_to_utf16("/Foo/Bar", L"\\\\?\\C:\\Foo\\Bar"); +#endif +} + +void test_path_win32__absolute_from_relative(void) +{ +#ifdef GIT_WIN32 +	char cwd_backup[MAX_PATH]; + +	cl_must_pass(p_getcwd(cwd_backup, MAX_PATH)); +	cl_must_pass(p_chdir("C:/")); + +	test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("..\\..\\Foo", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("Foo\\..", L"\\\\?\\C:\\"); +	test_utf8_to_utf16("Foo\\..\\..", L"\\\\?\\C:\\"); +	test_utf8_to_utf16("", L"\\\\?\\C:\\"); + +	cl_must_pass(p_chdir("C:/Windows")); + +	test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Windows\\Foo"); +	test_utf8_to_utf16("Foo\\Bar", L"\\\\?\\C:\\Windows\\Foo\\Bar"); +	test_utf8_to_utf16("..\\Foo", L"\\\\?\\C:\\Foo"); +	test_utf8_to_utf16("Foo\\..\\Bar", L"\\\\?\\C:\\Windows\\Bar"); +	test_utf8_to_utf16("", L"\\\\?\\C:\\Windows"); + +	cl_must_pass(p_chdir(cwd_backup)); +#endif +} + +void test_canonicalize(const wchar_t *in, const wchar_t *expected) +{ +#ifdef GIT_WIN32 +	git_win32_path canonical; + +	cl_assert(wcslen(in) < MAX_PATH); +	wcscpy(canonical, in); + +	cl_must_pass(git_win32_path_canonicalize(canonical)); +	cl_assert_equal_wcs(expected, canonical); +#else +	GIT_UNUSED(in); +	GIT_UNUSED(expected); +#endif +} + +void test_path_win32__canonicalize(void) +{ +#ifdef GIT_WIN32 +	test_canonicalize(L"C:\\Foo\\Bar", L"C:\\Foo\\Bar"); +	test_canonicalize(L"C:\\Foo\\", L"C:\\Foo"); +	test_canonicalize(L"C:\\Foo\\\\", L"C:\\Foo"); +	test_canonicalize(L"C:\\Foo\\..\\Bar", L"C:\\Bar"); +	test_canonicalize(L"C:\\Foo\\..\\..\\Bar", L"C:\\Bar"); +	test_canonicalize(L"C:\\Foo\\..\\..\\..\\..\\", L"C:\\"); +	test_canonicalize(L"C:/Foo/Bar", L"C:\\Foo\\Bar"); +	test_canonicalize(L"C:/", L"C:\\"); + +	test_canonicalize(L"Foo\\\\Bar\\\\Asdf\\\\", L"Foo\\Bar\\Asdf"); +	test_canonicalize(L"Foo\\\\Bar\\\\..\\\\Asdf\\", L"Foo\\Asdf"); +	test_canonicalize(L"Foo\\\\Bar\\\\.\\\\Asdf\\", L"Foo\\Bar\\Asdf"); +	test_canonicalize(L"Foo\\\\..\\Bar\\\\.\\\\Asdf\\", L"Bar\\Asdf"); +	test_canonicalize(L"\\", L""); +	test_canonicalize(L"", L""); +	test_canonicalize(L"Foo\\..\\..\\..\\..", L""); +	test_canonicalize(L"..\\..\\..\\..", L""); +	test_canonicalize(L"\\..\\..\\..\\..", L""); + +	test_canonicalize(L"\\\\?\\C:\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar"); +	test_canonicalize(L"\\\\?\\C:\\Foo\\Bar\\", L"\\\\?\\C:\\Foo\\Bar"); +	test_canonicalize(L"\\\\?\\C:\\\\Foo\\.\\Bar\\\\..\\", L"\\\\?\\C:\\Foo"); +	test_canonicalize(L"\\\\?\\C:\\\\", L"\\\\?\\C:\\"); +	test_canonicalize(L"//?/C:/", L"\\\\?\\C:\\"); +	test_canonicalize(L"//?/C:/../../Foo/", L"\\\\?\\C:\\Foo"); +	test_canonicalize(L"//?/C:/Foo/../../", L"\\\\?\\C:\\"); + +	test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder", L"\\\\?\\UNC\\server\\C$\\folder"); +	test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder"); +	test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder"); +	test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\..\\..\\..\\..\\share\\", L"\\\\?\\UNC\\server\\share"); + +	test_canonicalize(L"\\\\server\\share", L"\\\\server\\share"); +	test_canonicalize(L"\\\\server\\share\\", L"\\\\server\\share"); +	test_canonicalize(L"\\\\server\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar"); +	test_canonicalize(L"\\\\server\\\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar"); +	test_canonicalize(L"\\\\server\\share\\..\\foo", L"\\\\server\\foo"); +	test_canonicalize(L"\\\\server\\..\\..\\share\\.\\foo", L"\\\\server\\share\\foo"); +#endif +} | 
