summaryrefslogtreecommitdiff
path: root/src/win32/utf-conv.c
diff options
context:
space:
mode:
authorPhilip Kelley <phkelley@hotmail.com>2014-04-19 18:05:31 -0400
committerPhilip Kelley <phkelley@hotmail.com>2014-04-19 21:50:44 -0400
commitc2c8161541e54689926ec1f463569d5d1b975503 (patch)
treed3d1f7096c6449a9d9e19747508ad08cc910d8fe /src/win32/utf-conv.c
parentbfc50f83f829afe52c10f615c4f7efa478a83098 (diff)
downloadlibgit2-c2c8161541e54689926ec1f463569d5d1b975503.tar.gz
Win32: UTF-8 <-> WCHAR conversion overhaul
Diffstat (limited to 'src/win32/utf-conv.c')
-rw-r--r--src/win32/utf-conv.c133
1 files changed, 130 insertions, 3 deletions
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index a96385f10..fe94701a8 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -8,12 +8,139 @@
#include "common.h"
#include "utf-conv.h"
-int git__utf8_to_16(wchar_t * dest, size_t dest_size, const char *src)
+#ifndef WC_ERR_INVALID_CHARS
+#define WC_ERR_INVALID_CHARS 0x80
+#endif
+
+GIT_INLINE(DWORD) get_wc_flags(void)
{
- return MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, (int)dest_size);
+ static char inited = 0;
+ static DWORD flags;
+
+ /* Invalid code point check supported on Vista+ only */
+ if (!inited) {
+ flags = git_has_win32_version(6, 0, 0) ? WC_ERR_INVALID_CHARS : 0;
+ inited = 1;
+ }
+
+ return flags;
}
+/**
+ * Converts a UTF-8 string to wide characters.
+ *
+ * @param dest The buffer to receive the wide string.
+ * @param dest_size The size of the buffer, in characters.
+ * @param src The UTF-8 string to convert.
+ * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
+ */
+int git__utf8_to_16(wchar_t *dest, size_t dest_size, const char *src)
+{
+ /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
+ * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
+ * length. MultiByteToWideChar never returns int's minvalue, so underflow is not possible */
+ return MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, dest, (int)dest_size) - 1;
+}
+
+/**
+ * Converts a wide string to UTF-8.
+ *
+ * @param dest The buffer to receive the UTF-8 string.
+ * @param dest_size The size of the buffer, in bytes.
+ * @param src The wide string to convert.
+ * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
+ */
int git__utf16_to_8(char *dest, size_t dest_size, const wchar_t *src)
{
- return WideCharToMultiByte(CP_UTF8, 0, src, -1, dest, (int)dest_size, NULL, NULL);
+ /* Length of -1 indicates NULL termination of the input string. Subtract 1 from the result to
+ * turn 0 into -1 (an error code) and to not count the NULL terminator as part of the string's
+ * length. WideCharToMultiByte never returns int's minvalue, so underflow is not possible */
+ return WideCharToMultiByte(CP_UTF8, get_wc_flags(), src, -1, dest, (int)dest_size, NULL, NULL) - 1;
+}
+
+/**
+ * Converts a UTF-8 string to wide characters.
+ * Memory is allocated to hold the converted string.
+ * The caller is responsible for freeing the string with git__free.
+ *
+ * @param dest Receives a pointer to the wide string.
+ * @param src The UTF-8 string to convert.
+ * @return The length of the wide string, in characters (not counting the NULL terminator), or < 0 for failure
+ */
+int git__utf8_to_16_alloc(wchar_t **dest, const char *src)
+{
+ int utf16_size;
+
+ *dest = NULL;
+
+ /* Length of -1 indicates NULL termination of the input string */
+ utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, NULL, 0);
+
+ if (!utf16_size)
+ return -1;
+
+ *dest = git__malloc(utf16_size * sizeof(wchar_t));
+
+ if (!*dest)
+ return -1;
+
+ utf16_size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, src, -1, *dest, utf16_size);
+
+ if (!utf16_size) {
+ /* Don't let git__free stomp on the thread-local last error code,
+ * so that the caller can call giterr_set(GITERR_OS, ...) */
+ DWORD last_error = GetLastError();
+ git__free(*dest);
+ *dest = NULL;
+ SetLastError(last_error);
+ }
+
+ /* Subtract 1 from the result to turn 0 into -1 (an error code) and to not count the NULL
+ * terminator as part of the string's length. MultiByteToWideChar never returns int's minvalue,
+ * so underflow is not possible */
+ return utf16_size - 1;
+}
+
+/**
+ * Converts a wide string to UTF-8.
+ * Memory is allocated to hold the converted string.
+ * The caller is responsible for freeing the string with git__free.
+ *
+ * @param dest Receives a pointer to the UTF-8 string.
+ * @param src The wide string to convert.
+ * @return The length of the UTF-8 string, in bytes (not counting the NULL terminator), or < 0 for failure
+ */
+int git__utf16_to_8_alloc(char **dest, const wchar_t *src)
+{
+ int utf8_size;
+ DWORD dwFlags = get_wc_flags();
+
+ *dest = NULL;
+
+ /* Length of -1 indicates NULL termination of the input string */
+ utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, NULL, 0, NULL, NULL);
+
+ if (!utf8_size)
+ return -1;
+
+ *dest = git__malloc(utf8_size);
+
+ if (!*dest)
+ return -1;
+
+ utf8_size = WideCharToMultiByte(CP_UTF8, dwFlags, src, -1, *dest, utf8_size, NULL, NULL);
+
+ if (!utf8_size) {
+ /* Don't let git__free stomp on the thread-local last error code,
+ * so that the caller can call giterr_set(GITERR_OS, ...) */
+ DWORD last_error = GetLastError();
+ git__free(*dest);
+ *dest = NULL;
+ SetLastError(last_error);
+ }
+
+ /* Subtract 1 from the result to turn 0 into -1 (an error code) and to not count the NULL
+ * terminator as part of the string's length. MultiByteToWideChar never returns int's minvalue,
+ * so underflow is not possible */
+ return utf8_size - 1;
}