summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Thomson <ethomson@microsoft.com>2014-12-01 13:09:58 -0500
committerEdward Thomson <ethomson@microsoft.com>2014-12-17 14:58:13 -0600
commitd1dd81b3d154264bb06e28fbf5277d3683e59be6 (patch)
tree29180ecf1304c79fc6c8e69174322850f6124c98
parentbf4f50ac6ae54c605cd2aaeff0cad7e2180d9e96 (diff)
downloadlibgit2-d1dd81b3d154264bb06e28fbf5277d3683e59be6.tar.gz
win32: use NT-prefixed "\\?\" paths
When turning UTF-8 paths into UCS-2 paths for Windows, always use the \\?\-prefixed paths. Because this bypasses the system's path canonicalization, handle the canonicalization functions ourselves. We must: 1. always use a backslash as a directory separator 2. only use a single backslash between directories 3. not rely on the system to translate "." and ".." in paths 4. remove trailing backslashes, except at the drive root (C:\)
-rw-r--r--src/win32/findfile.c1
-rw-r--r--src/win32/path_w32.c269
-rw-r--r--src/win32/path_w32.h65
-rw-r--r--src/win32/posix.h1
-rw-r--r--src/win32/posix_w32.c67
-rw-r--r--src/win32/utf-conv.c42
-rw-r--r--src/win32/utf-conv.h39
-rw-r--r--src/win32/w32_util.h1
-rw-r--r--tests/core/link.c19
-rw-r--r--tests/path/win32.c190
10 files changed, 591 insertions, 103 deletions
diff --git a/src/win32/findfile.c b/src/win32/findfile.c
index 86d4ef5bd..de27dd060 100644
--- a/src/win32/findfile.c
+++ b/src/win32/findfile.c
@@ -5,6 +5,7 @@
* a Linking Exception. For full terms see the included COPYING file.
*/
+#include "path_w32.h"
#include "utf-conv.h"
#include "path.h"
#include "findfile.h"
diff --git a/src/win32/path_w32.c b/src/win32/path_w32.c
new file mode 100644
index 000000000..f0eacaa63
--- /dev/null
+++ b/src/win32/path_w32.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "common.h"
+#include "path.h"
+#include "path_w32.h"
+#include "utf-conv.h"
+
+#define PATH__NT_NAMESPACE L"\\\\?\\"
+#define PATH__NT_NAMESPACE_LEN 4
+
+#define PATH__ABSOLUTE_LEN 3
+
+#define path__is_dirsep(p) ((p) == '/' || (p) == '\\')
+
+#define path__is_absolute(p) \
+ (git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/'))
+
+#define path__is_nt_namespace(p) \
+ (((p)[0] == '\\' && (p)[1] == '\\' && (p)[2] == '?' && (p)[3] == '\\') || \
+ ((p)[0] == '/' && (p)[1] == '/' && (p)[2] == '?' && (p)[3] == '/'))
+
+#define path__is_unc(p) \
+ (((p)[0] == '\\' && (p)[1] == '\\') || ((p)[0] == '/' && (p)[1] == '/'))
+
+GIT_INLINE(int) path__cwd(wchar_t *path, int size)
+{
+ int len;
+
+ if ((len = GetCurrentDirectoryW(size, path)) == 0) {
+ errno = GetLastError() == ERROR_ACCESS_DENIED ? EACCES : ENOENT;
+ return -1;
+ } else if (len > size) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+
+ /* The Win32 APIs may return "\\?\" once you've used it first.
+ * But it may not. What a gloriously predictible API!
+ */
+ if (wcsncmp(path, PATH__NT_NAMESPACE, PATH__NT_NAMESPACE_LEN))
+ return len;
+
+ len -= PATH__NT_NAMESPACE_LEN;
+
+ memmove(path, path + PATH__NT_NAMESPACE_LEN, sizeof(wchar_t) * len);
+ return len;
+}
+
+static wchar_t *path__skip_server(wchar_t *path)
+{
+ wchar_t *c;
+
+ for (c = path; *c; c++) {
+ if (path__is_dirsep(*c))
+ return c + 1;
+ }
+
+ return c;
+}
+
+static wchar_t *path__skip_prefix(wchar_t *path)
+{
+ if (path__is_nt_namespace(path)) {
+ path += PATH__NT_NAMESPACE_LEN;
+
+ if (wcsncmp(path, L"UNC\\", 4) == 0)
+ path = path__skip_server(path + 4);
+ else if (path__is_absolute(path))
+ path += PATH__ABSOLUTE_LEN;
+ } else if (path__is_absolute(path)) {
+ path += PATH__ABSOLUTE_LEN;
+ } else if (path__is_unc(path)) {
+ path = path__skip_server(path + 2);
+ }
+
+ return path;
+}
+
+int git_win32_path_canonicalize(git_win32_path path)
+{
+ wchar_t *base, *from, *to, *next;
+ size_t len;
+
+ base = to = path__skip_prefix(path);
+
+ /* Unposixify if the prefix */
+ for (from = path; from < to; from++) {
+ if (*from == L'/')
+ *from = L'\\';
+ }
+
+ while (*from) {
+ for (next = from; *next; ++next) {
+ if (*next == L'/') {
+ *next = L'\\';
+ break;
+ }
+
+ if (*next == L'\\')
+ break;
+ }
+
+ len = next - from;
+
+ if (len == 1 && from[0] == L'.')
+ /* do nothing with singleton dot */;
+
+ else if (len == 2 && from[0] == L'.' && from[1] == L'.') {
+ if (to == base) {
+ /* no more path segments to strip, eat the "../" */
+ if (*next == L'\\')
+ len++;
+
+ base = to;
+ } else {
+ /* back up a path segment */
+ while (to > base && to[-1] == L'\\') to--;
+ while (to > base && to[-1] != L'\\') to--;
+ }
+ } else {
+ if (*next == L'\\' && *from != L'\\')
+ len++;
+
+ if (to != from)
+ memmove(to, from, sizeof(wchar_t) * len);
+
+ to += len;
+ }
+
+ from += len;
+
+ while (*from == L'\\') from++;
+ }
+
+ /* Strip trailing backslashes */
+ while (to > base && to[-1] == L'\\') to--;
+
+ *to = L'\0';
+
+ return (to - path);
+}
+
+int git_win32_path__cwd(wchar_t *out, size_t len)
+{
+ int cwd_len;
+
+ if ((cwd_len = path__cwd(out, len)) < 0)
+ return -1;
+
+ /* UNC paths */
+ if (wcsncmp(L"\\\\", out, 2) == 0) {
+ /* Our buffer must be at least 5 characters larger than the
+ * current working directory: we swallow one of the leading
+ * '\'s, but we we add a 'UNC' specifier to the path, plus
+ * a trailing directory separator, plus a NUL.
+ */
+ if (cwd_len > MAX_PATH - 4) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+
+ memmove(out+2, out, sizeof(wchar_t) * cwd_len);
+ out[0] = L'U';
+ out[1] = L'N';
+ out[2] = L'C';
+
+ cwd_len += 2;
+ }
+
+ /* Our buffer must be at least 2 characters larger than the current
+ * working directory. (One character for the directory separator,
+ * one for the null.
+ */
+ else if (cwd_len > MAX_PATH - 2) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+
+ return cwd_len;
+}
+
+int git_win32_path_from_utf8(git_win32_path out, const char *src)
+{
+ wchar_t *dest = out;
+
+ /* All win32 paths are in NT-prefixed format, beginning with "\\?\". */
+ memcpy(dest, PATH__NT_NAMESPACE, sizeof(wchar_t) * PATH__NT_NAMESPACE_LEN);
+ dest += PATH__NT_NAMESPACE_LEN;
+
+ /* See if this is an absolute path (beginning with a drive letter) */
+ if (path__is_absolute(src)) {
+ if (git__utf8_to_16(dest, MAX_PATH, src) < 0)
+ return -1;
+ }
+ /* File-prefixed NT-style paths beginning with \\?\ */
+ else if (path__is_nt_namespace(src)) {
+ /* Skip the NT prefix, the destination already contains it */
+ if (git__utf8_to_16(dest, MAX_PATH, src + PATH__NT_NAMESPACE_LEN) < 0)
+ return -1;
+ }
+ /* UNC paths */
+ else if (path__is_unc(src)) {
+ memcpy(dest, L"UNC\\", sizeof(wchar_t) * 4);
+ dest += 4;
+
+ /* Skip the leading "\\" */
+ if (git__utf8_to_16(dest, MAX_PATH - 2, src + 2) < 0)
+ return -1;
+ }
+ /* Absolute paths omitting the drive letter */
+ else if (src[0] == '\\' || src[0] == '/') {
+ if (path__cwd(dest, MAX_PATH) < 0)
+ return -1;
+
+ if (!path__is_absolute(dest)) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ /* Skip the drive letter specification ("C:") */
+ if (git__utf8_to_16(dest + 2, MAX_PATH - 2, src) < 0)
+ return -1;
+ }
+ /* Relative paths */
+ else {
+ int cwd_len;
+
+ if ((cwd_len = git_win32_path__cwd(dest, MAX_PATH)) < 0)
+ return -1;
+
+ dest[cwd_len++] = L'\\';
+
+ if (git__utf8_to_16(dest + cwd_len, MAX_PATH - cwd_len, src) < 0)
+ return -1;
+ }
+
+ return git_win32_path_canonicalize(out);
+}
+
+int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
+{
+ char *out = dest;
+ int len;
+
+ /* Strip NT namespacing "\\?\" */
+ if (path__is_nt_namespace(src)) {
+ src += 4;
+
+ /* "\\?\UNC\server\share" -> "\\server\share" */
+ if (wcsncmp(src, L"UNC\\", 4) == 0) {
+ src += 4;
+
+ memcpy(dest, "\\\\", 2);
+ out = dest + 2;
+ }
+ }
+
+ if ((len = git__utf16_to_8(out, GIT_WIN_PATH_UTF8, src)) < 0)
+ return len;
+
+ git_path_mkposix(dest);
+
+ return len;
+}
diff --git a/src/win32/path_w32.h b/src/win32/path_w32.h
new file mode 100644
index 000000000..dc7a68e59
--- /dev/null
+++ b/src/win32/path_w32.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+#ifndef INCLUDE_git_path_w32_h__
+#define INCLUDE_git_path_w32_h__
+
+/*
+ * Provides a large enough buffer to support Windows paths: MAX_PATH is
+ * 260, corresponding to a maximum path length of 259 characters plus a
+ * NULL terminator. Prefixing with "\\?\" adds 4 characters, but if the
+ * original was a UNC path, then we turn "\\server\share" into
+ * "\\?\UNC\server\share". So we replace the first two characters with
+ * 8 characters, a net gain of 6, so the maximum length is MAX_PATH+6.
+ */
+#define GIT_WIN_PATH_UTF16 MAX_PATH+6
+
+/* Maximum size of a UTF-8 Win32 path. We remove the "\\?\" or "\\?\UNC\"
+ * prefixes for presentation, bringing us back to 259 (non-NULL)
+ * characters. UTF-8 does have 4-byte sequences, but they are encoded in
+ * UTF-16 using surrogate pairs, which takes up the space of two characters.
+ * Two characters in the range U+0800 -> U+FFFF take up more space in UTF-8
+ * (6 bytes) than one surrogate pair (4 bytes).
+ */
+#define GIT_WIN_PATH_UTF8 (259 * 3 + 1)
+
+/* Win32 path types */
+typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
+typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
+
+/**
+ * Create a Win32 path (in UCS-2 format) from a UTF-8 string.
+ *
+ * @param dest The buffer to receive the wide string.
+ * @param src The UTF-8 string to convert.
+ * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
+ */
+extern int git_win32_path_from_utf8(git_win32_path dest, const char *src);
+
+/**
+ * Canonicalize a Win32 UCS-2 path so that it is suitable for delivery to the
+ * Win32 APIs: remove multiple directory separators, squashing to a single one,
+ * strip trailing directory separators, ensure directory separators are all
+ * canonical (always backslashes, never forward slashes) and process any
+ * directory entries of '.' or '..'.
+ *
+ * This processes the buffer in place.
+ *
+ * @param path The buffer to process
+ * @return The new length of the buffer, in wchar_t's (not counting the NULL terminator)
+ */
+extern int git_win32_path_canonicalize(git_win32_path path);
+
+/**
+ * Create an internal format (posix-style) UTF-8 path from a Win32 UCS-2 path.
+ *
+ * @param dest The buffer to receive the UTF-8 string.
+ * @param src The wide string to convert.
+ * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
+ */
+extern int git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src);
+
+#endif
diff --git a/src/win32/posix.h b/src/win32/posix.h
index e055a77d0..104966edc 100644
--- a/src/win32/posix.h
+++ b/src/win32/posix.h
@@ -9,6 +9,7 @@
#include "common.h"
#include "../posix.h"
+#include "path_w32.h"
#include "utf-conv.h"
#include "dir.h"
diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c
index 7b4555719..e446ccab0 100644
--- a/src/win32/posix_w32.c
+++ b/src/win32/posix_w32.c
@@ -7,6 +7,7 @@
#include "../posix.h"
#include "../fileops.h"
#include "path.h"
+#include "path_w32.h"
#include "utf-conv.h"
#include "repository.h"
#include "reparse.h"
@@ -35,22 +36,6 @@
/* GetFinalPathNameByHandleW signature */
typedef DWORD(WINAPI *PFGetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD);
-/* Helper function which converts UTF-8 paths to UTF-16.
- * On failure, errno is set. */
-static int utf8_to_16_with_errno(git_win32_path dest, const char *src)
-{
- int len = git_win32_path_from_utf8(dest, src);
-
- if (len < 0) {
- if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
- errno = ENAMETOOLONG;
- else
- errno = EINVAL; /* Bad code point, presumably */
- }
-
- return len;
-}
-
int p_ftruncate(int fd, long size)
{
#if defined(_MSC_VER) && _MSC_VER >= 1500
@@ -66,7 +51,7 @@ int p_mkdir(const char *path, mode_t mode)
GIT_UNUSED(mode);
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
return _wmkdir(buf);
@@ -85,7 +70,7 @@ int p_unlink(const char *path)
git_win32_path buf;
int error;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
error = _wunlink(buf);
@@ -292,7 +277,7 @@ static int do_lstat(const char *path, struct stat *buf, bool posixly_correct)
git_win32_path path_w;
int len;
- if ((len = utf8_to_16_with_errno(path_w, path)) < 0)
+ if ((len = git_win32_path_from_utf8(path_w, path)) < 0)
return -1;
git_win32__path_trim_end(path_w, len);
@@ -323,7 +308,7 @@ int p_readlink(const char *path, char *buf, size_t bufsiz)
* could occur in the middle of the encoding of a code point,
* we need to buffer the result on the stack. */
- if (utf8_to_16_with_errno(path_w, path) < 0 ||
+ if (git_win32_path_from_utf8(path_w, path) < 0 ||
readlink_w(target_w, path_w) < 0 ||
(len = git_win32_path_to_utf8(target, target_w)) < 0)
return -1;
@@ -347,7 +332,7 @@ int p_open(const char *path, int flags, ...)
git_win32_path buf;
mode_t mode = 0;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
if (flags & O_CREAT) {
@@ -365,7 +350,7 @@ int p_creat(const char *path, mode_t mode)
{
git_win32_path buf;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | STANDARD_OPEN_FLAGS, mode);
@@ -463,7 +448,7 @@ int p_stat(const char* path, struct stat* buf)
git_win32_path path_w;
int len;
- if ((len = utf8_to_16_with_errno(path_w, path)) < 0)
+ if ((len = git_win32_path_from_utf8(path_w, path)) < 0)
return -1;
git_win32__path_trim_end(path_w, len);
@@ -483,7 +468,7 @@ int p_chdir(const char* path)
{
git_win32_path buf;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
return _wchdir(buf);
@@ -493,7 +478,7 @@ int p_chmod(const char* path, mode_t mode)
{
git_win32_path buf;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
return _wchmod(buf, mode);
@@ -504,7 +489,7 @@ int p_rmdir(const char* path)
git_win32_path buf;
int error;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
error = _wrmdir(buf);
@@ -533,7 +518,7 @@ char *p_realpath(const char *orig_path, char *buffer)
{
git_win32_path orig_path_w, buffer_w;
- if (utf8_to_16_with_errno(orig_path_w, orig_path) < 0)
+ if (git_win32_path_from_utf8(orig_path_w, orig_path) < 0)
return NULL;
/* Note that if the path provided is a relative path, then the current directory
@@ -554,20 +539,17 @@ char *p_realpath(const char *orig_path, char *buffer)
return NULL;
}
- /* Convert the path to UTF-8. */
- if (buffer) {
- /* If the caller provided a buffer, then it is assumed to be GIT_WIN_PATH_UTF8
- * characters in size. If it isn't, then we may overflow. */
- if (git__utf16_to_8(buffer, GIT_WIN_PATH_UTF8, buffer_w) < 0)
- return NULL;
- } else {
- /* If the caller did not provide a buffer, then we allocate one for the caller
- * from the heap. */
- if (git__utf16_to_8_alloc(&buffer, buffer_w) < 0)
- return NULL;
+ if (!buffer && !(buffer = git__malloc(GIT_WIN_PATH_UTF8))) {
+ errno = ENOMEM;
+ return NULL;
}
- /* Convert backslashes to forward slashes */
+ /* Convert the path to UTF-8. If the caller provided a buffer, then it
+ * is assumed to be GIT_WIN_PATH_UTF8 characters in size. If it isn't,
+ * then we may overflow. */
+ if (git_win32_path_to_utf8(buffer, buffer_w) < 0)
+ return NULL;
+
git_path_mkposix(buffer);
return buffer;
@@ -608,6 +590,7 @@ int p_snprintf(char *buffer, size_t count, const char *format, ...)
return r;
}
+/* TODO: wut? */
int p_mkstemp(char *tmp_path)
{
#if defined(_MSC_VER) && _MSC_VER >= 1500
@@ -625,7 +608,7 @@ int p_access(const char* path, mode_t mode)
{
git_win32_path buf;
- if (utf8_to_16_with_errno(buf, path) < 0)
+ if (git_win32_path_from_utf8(buf, path) < 0)
return -1;
return _waccess(buf, mode);
@@ -664,8 +647,8 @@ int p_rename(const char *from, const char *to)
int rename_succeeded;
int error;
- if (utf8_to_16_with_errno(wfrom, from) < 0 ||
- utf8_to_16_with_errno(wto, to) < 0)
+ if (git_win32_path_from_utf8(wfrom, from) < 0 ||
+ git_win32_path_from_utf8(wto, to) < 0)
return -1;
/* wait up to 50ms if file is locked by another thread or process */
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index b9ccfb5e5..b0205b019 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -26,6 +26,14 @@ GIT_INLINE(DWORD) get_wc_flags(void)
return flags;
}
+GIT_INLINE(void) git__set_errno(void)
+{
+ if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
+ errno = ENAMETOOLONG;
+ else
+ errno = EINVAL;
+}
+
/**
* Converts a UTF-8 string to wide characters.
*
@@ -36,10 +44,15 @@ GIT_INLINE(DWORD) get_wc_flags(void)
*/
int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
{
+ int len;
+
/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
* length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */
- return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1;
+ if ((len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1) < 0)
+ git__set_errno();
+
+ return len;
}
/**
@@ -52,10 +65,15 @@ int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
*/
int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)
{
+ int len;
+
/* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
* turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
* length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */
- return WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1;
+ if ((len = WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1) < 0)
+ git__set_errno();
+
+ return len;
}
/**
@@ -76,17 +94,23 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src)
/* Length of -1 indicates NULL termination of the input string */
utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0);
- if (!utf16_size)
+ if (!utf16_size) {
+ git__set_errno();
return -1;
+ }
*dest = git__malloc(utf16_size * sizeof(wchar_t));
- if (!*dest)
+ if (!*dest) {
+ errno = ENOMEM;
return -1;
+ }
utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);
if (!utf16_size) {
+ git__set_errno();
+
git__free(*dest);
*dest = NULL;
}
@@ -116,17 +140,23 @@ int git__utf16_to_8_alloc(char **dest, const wchar_t *src)
/* Length of -1 indicates NULL termination of the input string */
utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL);
- if (!utf8_size)
+ if (!utf8_size) {
+ git__set_errno();
return -1;
+ }
*dest = git__malloc(utf8_size);
- if (!*dest)
+ if (!*dest) {
+ errno = ENOMEM;
return -1;
+ }
utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL);
if (!utf8_size) {
+ git__set_errno();
+
git__free(*dest);
*dest = NULL;
}
diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h
index a480cd93e..89cdb96da 100644
--- a/src/win32/utf-conv.h
+++ b/src/win32/utf-conv.h
@@ -10,21 +10,6 @@
#include <wchar.h>
#include "common.h"
-/* Equal to the Win32 MAX_PATH constant. The maximum path length is 259
- * characters plus a NULL terminator. */
-#define GIT_WIN_PATH_UTF16 260
-
-/* Maximum size of a UTF-8 Win32 path. UTF-8 does have 4-byte sequences,
- * but they are encoded in UTF-16 using surrogate pairs, which takes up
- * the space of two characters. Two characters in the range U+0800 ->
- * U+FFFF take up more space in UTF-8 (6 bytes) than one surrogate pair
- * (4 bytes). */
-#define GIT_WIN_PATH_UTF8 (259 * 3 + 1)
-
-/* Win32 path types */
-typedef wchar_t git_win32_path[GIT_WIN_PATH_UTF16];
-typedef char git_win32_utf8_path[GIT_WIN_PATH_UTF8];
-
/**
* Converts a UTF-8 string to wide characters.
*
@@ -67,28 +52,4 @@ int git__utf8_to_16_alloc(wchar_t **dest, const char *src);
*/
int git__utf16_to_8_alloc(char **dest, const wchar_t *src);
-/**
- * Converts a UTF-8 Win32 path to wide characters.
- *
- * @param dest The buffer to receive the wide string.
- * @param src The UTF-8 string to convert.
- * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
- */
-GIT_INLINE(int) git_win32_path_from_utf8(git_win32_path dest, const char *src)
-{
- return git__utf8_to_16(dest, GIT_WIN_PATH_UTF16, src);
-}
-
-/**
- * Converts a wide Win32 path to UTF-8.
- *
- * @param dest The buffer to receive the UTF-8 string.
- * @param src The wide string to convert.
- * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
- */
-GIT_INLINE(int) git_win32_path_to_utf8(git_win32_utf8_path dest, const wchar_t *src)
-{
- return git__utf16_to_8(dest, GIT_WIN_PATH_UTF8, src);
-}
-
#endif
diff --git a/src/win32/w32_util.h b/src/win32/w32_util.h
index a1d388af5..9c1b94359 100644
--- a/src/win32/w32_util.h
+++ b/src/win32/w32_util.h
@@ -9,6 +9,7 @@
#define INCLUDE_w32_util_h__
#include "utf-conv.h"
+#include "path_w32.h"
GIT_INLINE(bool) git_win32__isalpha(wchar_t c)
{
diff --git a/tests/core/link.c b/tests/core/link.c
index 1794a3893..2674e35fc 100644
--- a/tests/core/link.c
+++ b/tests/core/link.c
@@ -196,19 +196,6 @@ static void do_custom_reparse(const char *path)
#endif
-git_buf *unslashify(git_buf *buf)
-{
-#ifdef GIT_WIN32
- size_t i;
-
- for (i = 0; i < buf->size; i++)
- if (buf->ptr[i] == '/')
- buf->ptr[i] = '\\';
-#endif
-
- return buf;
-}
-
void test_core_link__stat_regular_file(void)
{
struct stat st;
@@ -547,7 +534,7 @@ void test_core_link__readlink_symlink(void)
buf[len] = 0;
- cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf);
+ cl_assert_equal_s(git_buf_cstr(&target_path), buf);
git_buf_free(&target_path);
}
@@ -567,7 +554,7 @@ void test_core_link__readlink_dangling(void)
buf[len] = 0;
- cl_assert_equal_s(git_buf_cstr(unslashify(&target_path)), buf);
+ cl_assert_equal_s(git_buf_cstr(&target_path), buf);
git_buf_free(&target_path);
}
@@ -593,7 +580,7 @@ void test_core_link__readlink_multiple(void)
buf[len] = 0;
- cl_assert_equal_s(git_buf_cstr(unslashify(&path2)), buf);
+ cl_assert_equal_s(git_buf_cstr(&path2), buf);
git_buf_free(&path1);
git_buf_free(&path2);
diff --git a/tests/path/win32.c b/tests/path/win32.c
new file mode 100644
index 000000000..ef0b5d2f2
--- /dev/null
+++ b/tests/path/win32.c
@@ -0,0 +1,190 @@
+
+#include "clar_libgit2.h"
+#include "path.h"
+
+#ifdef GIT_WIN32
+#include "win32/path_w32.h"
+#endif
+
+void test_utf8_to_utf16(const char *utf8_in, const wchar_t *utf16_expected)
+{
+#ifdef GIT_WIN32
+ git_win32_path path_utf16;
+ int path_utf16len;
+
+ cl_assert((path_utf16len = git_win32_path_from_utf8(path_utf16, utf8_in)) >= 0);
+ cl_assert_equal_wcs(utf16_expected, path_utf16);
+ cl_assert_equal_i(wcslen(utf16_expected), path_utf16len);
+#else
+ GIT_UNUSED(utf8_in);
+ GIT_UNUSED(utf16_expected);
+#endif
+}
+
+void test_path_win32__utf8_to_utf16(void)
+{
+#ifdef GIT_WIN32
+ test_utf8_to_utf16("C:\\", L"\\\\?\\C:\\");
+ test_utf8_to_utf16("c:\\", L"\\\\?\\c:\\");
+ test_utf8_to_utf16("C:/", L"\\\\?\\C:\\");
+ test_utf8_to_utf16("c:/", L"\\\\?\\c:\\");
+#endif
+}
+
+void test_path_win32__removes_trailing_slash(void)
+{
+#ifdef GIT_WIN32
+ test_utf8_to_utf16("C:\\Foo\\", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("C:\\Foo\\\\", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("C:/Foo/", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("C:/Foo///", L"\\\\?\\C:\\Foo");
+#endif
+}
+
+void test_path_win32__squashes_multiple_slashes(void)
+{
+#ifdef GIT_WIN32
+ test_utf8_to_utf16("C:\\\\Foo\\Bar\\\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+ test_utf8_to_utf16("C://Foo/Bar///Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+#endif
+}
+
+void test_path_win32__unc(void)
+{
+#ifdef GIT_WIN32
+ test_utf8_to_utf16("\\\\server\\c$\\unc\\path", L"\\\\?\\UNC\\server\\c$\\unc\\path");
+ test_utf8_to_utf16("//server/git/style/unc/path", L"\\\\?\\UNC\\server\\git\\style\\unc\\path");
+#endif
+}
+
+void test_path_win32__honors_max_path(void)
+{
+#ifdef GIT_WIN32
+ git_win32_path path_utf16;
+
+ test_utf8_to_utf16("C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij",
+ L"\\\\?\\C:\\This path is 259 chars and is the max length in windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij");
+ test_utf8_to_utf16("\\\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij",
+ L"\\\\?\\UNC\\unc\\paths may also be 259 characters including the server\\123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij");
+
+ cl_check_fail(git_win32_path_from_utf8(path_utf16, "C:\\This path is 260 chars and is sadly too long for windows\\0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"));
+ cl_check_fail(git_win32_path_from_utf8(path_utf16, "\\\\unc\\paths are also bound by 260 character restrictions\\including the server name portion\\bcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789abcdefghij"));
+#endif
+}
+
+void test_path_win32__dot_and_dotdot(void)
+{
+#ifdef GIT_WIN32
+ test_utf8_to_utf16("C:\\Foo\\..\\Foobar", L"\\\\?\\C:\\Foobar");
+ test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar", L"\\\\?\\C:\\Foo\\Foobar");
+ test_utf8_to_utf16("C:\\Foo\\Bar\\..\\Foobar\\..", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("C:\\Foobar\\..", L"\\\\?\\C:\\");
+ test_utf8_to_utf16("C:/Foo/Bar/../Foobar", L"\\\\?\\C:\\Foo\\Foobar");
+ test_utf8_to_utf16("C:/Foo/Bar/../Foobar/../Asdf/", L"\\\\?\\C:\\Foo\\Asdf");
+ test_utf8_to_utf16("C:/Foo/Bar/../Foobar/..", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("C:/Foo/..", L"\\\\?\\C:\\");
+
+ test_utf8_to_utf16("C:\\Foo\\Bar\\.\\Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+ test_utf8_to_utf16("C:\\.\\Foo\\.\\Bar\\.\\Foobar\\.\\", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+ test_utf8_to_utf16("C:/Foo/Bar/./Foobar", L"\\\\?\\C:\\Foo\\Bar\\Foobar");
+ test_utf8_to_utf16("C:/Foo/../Bar/./Foobar/../", L"\\\\?\\C:\\Bar");
+
+ test_utf8_to_utf16("C:\\Foo\\..\\..\\Bar", L"\\\\?\\C:\\Bar");
+#endif
+}
+
+void test_path_win32__absolute_from_no_drive_letter(void)
+{
+#ifdef GIT_WIN32
+ test_utf8_to_utf16("\\Foo", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar");
+ test_utf8_to_utf16("/Foo/Bar", L"\\\\?\\C:\\Foo\\Bar");
+#endif
+}
+
+void test_path_win32__absolute_from_relative(void)
+{
+#ifdef GIT_WIN32
+ char cwd_backup[MAX_PATH];
+
+ cl_must_pass(p_getcwd(cwd_backup, MAX_PATH));
+ cl_must_pass(p_chdir("C:/"));
+
+ test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("..\\..\\Foo", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("Foo\\..", L"\\\\?\\C:\\");
+ test_utf8_to_utf16("Foo\\..\\..", L"\\\\?\\C:\\");
+ test_utf8_to_utf16("", L"\\\\?\\C:\\");
+
+ cl_must_pass(p_chdir("C:/Windows"));
+
+ test_utf8_to_utf16("Foo", L"\\\\?\\C:\\Windows\\Foo");
+ test_utf8_to_utf16("Foo\\Bar", L"\\\\?\\C:\\Windows\\Foo\\Bar");
+ test_utf8_to_utf16("..\\Foo", L"\\\\?\\C:\\Foo");
+ test_utf8_to_utf16("Foo\\..\\Bar", L"\\\\?\\C:\\Windows\\Bar");
+ test_utf8_to_utf16("", L"\\\\?\\C:\\Windows");
+
+ cl_must_pass(p_chdir(cwd_backup));
+#endif
+}
+
+void test_canonicalize(const wchar_t *in, const wchar_t *expected)
+{
+#ifdef GIT_WIN32
+ git_win32_path canonical;
+
+ cl_assert(wcslen(in) < MAX_PATH);
+ wcscpy(canonical, in);
+
+ cl_must_pass(git_win32_path_canonicalize(canonical));
+ cl_assert_equal_wcs(expected, canonical);
+#else
+ GIT_UNUSED(in);
+ GIT_UNUSED(expected);
+#endif
+}
+
+void test_path_win32__canonicalize(void)
+{
+#ifdef GIT_WIN32
+ test_canonicalize(L"C:\\Foo\\Bar", L"C:\\Foo\\Bar");
+ test_canonicalize(L"C:\\Foo\\", L"C:\\Foo");
+ test_canonicalize(L"C:\\Foo\\\\", L"C:\\Foo");
+ test_canonicalize(L"C:\\Foo\\..\\Bar", L"C:\\Bar");
+ test_canonicalize(L"C:\\Foo\\..\\..\\Bar", L"C:\\Bar");
+ test_canonicalize(L"C:\\Foo\\..\\..\\..\\..\\", L"C:\\");
+ test_canonicalize(L"C:/Foo/Bar", L"C:\\Foo\\Bar");
+ test_canonicalize(L"C:/", L"C:\\");
+
+ test_canonicalize(L"Foo\\\\Bar\\\\Asdf\\\\", L"Foo\\Bar\\Asdf");
+ test_canonicalize(L"Foo\\\\Bar\\\\..\\\\Asdf\\", L"Foo\\Asdf");
+ test_canonicalize(L"Foo\\\\Bar\\\\.\\\\Asdf\\", L"Foo\\Bar\\Asdf");
+ test_canonicalize(L"Foo\\\\..\\Bar\\\\.\\\\Asdf\\", L"Bar\\Asdf");
+ test_canonicalize(L"\\", L"");
+ test_canonicalize(L"", L"");
+ test_canonicalize(L"Foo\\..\\..\\..\\..", L"");
+ test_canonicalize(L"..\\..\\..\\..", L"");
+ test_canonicalize(L"\\..\\..\\..\\..", L"");
+
+ test_canonicalize(L"\\\\?\\C:\\Foo\\Bar", L"\\\\?\\C:\\Foo\\Bar");
+ test_canonicalize(L"\\\\?\\C:\\Foo\\Bar\\", L"\\\\?\\C:\\Foo\\Bar");
+ test_canonicalize(L"\\\\?\\C:\\\\Foo\\.\\Bar\\\\..\\", L"\\\\?\\C:\\Foo");
+ test_canonicalize(L"\\\\?\\C:\\\\", L"\\\\?\\C:\\");
+ test_canonicalize(L"//?/C:/", L"\\\\?\\C:\\");
+ test_canonicalize(L"//?/C:/../../Foo/", L"\\\\?\\C:\\Foo");
+ test_canonicalize(L"//?/C:/Foo/../../", L"\\\\?\\C:\\");
+
+ test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder", L"\\\\?\\UNC\\server\\C$\\folder");
+ test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder");
+ test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\", L"\\\\?\\UNC\\server\\C$\\folder");
+ test_canonicalize(L"\\\\?\\UNC\\server\\C$\\folder\\..\\..\\..\\..\\share\\", L"\\\\?\\UNC\\server\\share");
+
+ test_canonicalize(L"\\\\server\\share", L"\\\\server\\share");
+ test_canonicalize(L"\\\\server\\share\\", L"\\\\server\\share");
+ test_canonicalize(L"\\\\server\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar");
+ test_canonicalize(L"\\\\server\\\\share\\\\foo\\\\bar", L"\\\\server\\share\\foo\\bar");
+ test_canonicalize(L"\\\\server\\share\\..\\foo", L"\\\\server\\foo");
+ test_canonicalize(L"\\\\server\\..\\..\\share\\.\\foo", L"\\\\server\\share\\foo");
+#endif
+}