diff options
author | Vicent Marti <tanoku@gmail.com> | 2012-08-28 22:19:08 -0700 |
---|---|---|
committer | Vicent Marti <tanoku@gmail.com> | 2012-08-28 23:26:00 -0700 |
commit | 0f4c61754bd123b3bee997b397187c9b813ca3e4 (patch) | |
tree | 4278875a298e527b27a4517f5a054b7e2cf87cee | |
parent | 6813169ac9fe2558e4503f0149f22c5fad9d61c1 (diff) | |
download | libgit2-0f4c61754bd123b3bee997b397187c9b813ca3e4.tar.gz |
Add bounds checking to UTF-8 conversion
-rw-r--r-- | src/fileops.c | 9 | ||||
-rw-r--r-- | src/path.c | 5 | ||||
-rw-r--r-- | src/win32/dir.c | 4 | ||||
-rw-r--r-- | src/win32/posix.h | 2 | ||||
-rw-r--r-- | src/win32/posix_w32.c | 26 | ||||
-rw-r--r-- | src/win32/utf-conv.c | 61 | ||||
-rw-r--r-- | src/win32/utf-conv.h | 2 | ||||
-rw-r--r-- | tests-clar/clar_helpers.c | 6 |
8 files changed, 45 insertions, 70 deletions
diff --git a/src/fileops.c b/src/fileops.c index 6adccdd9..95eacb5f 100644 --- a/src/fileops.c +++ b/src/fileops.c @@ -56,7 +56,7 @@ int git_futils_creat_locked(const char *path, const mode_t mode) #ifdef GIT_WIN32 wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); fd = _wopen(buf, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode); #else fd = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL, mode); @@ -381,7 +381,7 @@ static int win32_expand_path(struct win32_path *s_root, const wchar_t *templ) static int win32_find_file(git_buf *path, const struct win32_path *root, const char *filename) { - size_t len; + size_t len, alloc_len; wchar_t *file_utf16 = NULL; char file_utf8[GIT_PATH_MAX]; @@ -389,7 +389,8 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c return GIT_ENOTFOUND; /* allocate space for wchar_t path to file */ - file_utf16 = git__calloc(root->len + len + 2, sizeof(wchar_t)); + alloc_len = root->len + len + 2; + file_utf16 = git__calloc(alloc_len, sizeof(wchar_t)); GITERR_CHECK_ALLOC(file_utf16); /* append root + '\\' + filename as wchar_t */ @@ -398,7 +399,7 @@ static int win32_find_file(git_buf *path, const struct win32_path *root, const c if (*filename == '/' || *filename == '\\') filename++; - git__utf8_to_16(file_utf16 + root->len - 1, filename); + git__utf8_to_16(file_utf16 + root->len - 1, alloc_len, filename); /* check access */ if (_waccess(file_utf16, F_OK) < 0) { @@ -432,14 +432,14 @@ bool git_path_is_empty_dir(const char *path) { git_buf pathbuf = GIT_BUF_INIT; HANDLE hFind = INVALID_HANDLE_VALUE; - wchar_t *wbuf; + wchar_t wbuf[GIT_WIN_PATH]; WIN32_FIND_DATAW ffd; bool retval = true; if (!git_path_isdir(path)) return false; git_buf_printf(&pathbuf, "%s\\*", path); - wbuf = gitwin_to_utf16(git_buf_cstr(&pathbuf)); + git__utf8_to_16(wbuf, GIT_WIN_PATH, git_buf_cstr(&pathbuf)); hFind = FindFirstFileW(wbuf, &ffd); if (INVALID_HANDLE_VALUE == hFind) { @@ -455,7 +455,6 @@ bool git_path_is_empty_dir(const char *path) FindClose(hFind); git_buf_free(&pathbuf); - git__free(wbuf); return retval; } diff --git a/src/win32/dir.c b/src/win32/dir.c index 8b4f8962..5cb1082b 100644 --- a/src/win32/dir.c +++ b/src/win32/dir.c @@ -40,7 +40,7 @@ git__DIR *git__opendir(const char *dir) if (!new->dir) goto fail; - git__utf8_to_16(filter_w, filter); + git__utf8_to_16(filter_w, GIT_WIN_PATH, filter); new->h = FindFirstFileW(filter_w, &new->f); if (new->h == INVALID_HANDLE_VALUE) { @@ -116,7 +116,7 @@ void git__rewinddir(git__DIR *d) if (!init_filter(filter, sizeof(filter), d->dir)) return; - git__utf8_to_16(filter_w, filter); + git__utf8_to_16(filter_w, GIT_WIN_PATH, filter); d->h = FindFirstFileW(filter_w, &d->f); if (d->h == INVALID_HANDLE_VALUE) diff --git a/src/win32/posix.h b/src/win32/posix.h index ddab88a3..da46cf51 100644 --- a/src/win32/posix.h +++ b/src/win32/posix.h @@ -23,7 +23,7 @@ GIT_INLINE(int) p_mkdir(const char *path, mode_t mode) { wchar_t buf[GIT_WIN_PATH]; GIT_UNUSED(mode); - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return _wmkdir(buf); } diff --git a/src/win32/posix_w32.c b/src/win32/posix_w32.c index 682a40ad..649fe9b9 100644 --- a/src/win32/posix_w32.c +++ b/src/win32/posix_w32.c @@ -16,7 +16,7 @@ int p_unlink(const char *path) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); _wchmod(buf, 0666); return _wunlink(buf); } @@ -58,7 +58,7 @@ static int do_lstat(const char *file_name, struct stat *buf) wchar_t fbuf[GIT_WIN_PATH]; DWORD last_error; - git__utf8_to_16(fbuf, file_name); + git__utf8_to_16(fbuf, GIT_WIN_PATH, file_name); if (GetFileAttributesExW(fbuf, GetFileExInfoStandard, &fdata)) { int fMode = S_IREAD; @@ -157,7 +157,7 @@ int p_readlink(const char *link, char *target, size_t target_len) } } - git__utf8_to_16(link_w, link); + git__utf8_to_16(link_w, GIT_WIN_PATH, link); hFile = CreateFileW(link_w, // file to open GENERIC_READ, // open for reading @@ -226,7 +226,7 @@ int p_open(const char *path, int flags, ...) wchar_t buf[GIT_WIN_PATH]; mode_t mode = 0; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); if (flags & O_CREAT) { va_list arg_list; @@ -242,7 +242,7 @@ int p_open(const char *path, int flags, ...) int p_creat(const char *path, mode_t mode) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return _wopen(buf, _O_WRONLY | _O_CREAT | _O_TRUNC | _O_BINARY, mode); } @@ -274,28 +274,28 @@ int p_stat(const char* path, struct stat* buf) int p_chdir(const char* path) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return _wchdir(buf); } int p_chmod(const char* path, mode_t mode) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return _wchmod(buf, mode); } int p_rmdir(const char* path) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return _wrmdir(buf); } int p_hide_directory__w32(const char *path) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return (SetFileAttributesW(buf, FILE_ATTRIBUTE_HIDDEN) != 0) ? 0 : -1; } @@ -305,7 +305,7 @@ char *p_realpath(const char *orig_path, char *buffer) wchar_t orig_path_w[GIT_WIN_PATH]; wchar_t buffer_w[GIT_WIN_PATH]; - git__utf8_to_16(orig_path_w, orig_path); + git__utf8_to_16(orig_path_w, GIT_WIN_PATH, orig_path); ret = GetFullPathNameW(orig_path_w, GIT_WIN_PATH, buffer_w, NULL); /* According to MSDN, a return value equals to zero means a failure. */ @@ -399,7 +399,7 @@ int p_setenv(const char* name, const char* value, int overwrite) int p_access(const char* path, mode_t mode) { wchar_t buf[GIT_WIN_PATH]; - git__utf8_to_16(buf, path); + git__utf8_to_16(buf, GIT_WIN_PATH, path); return _waccess(buf, mode); } @@ -408,8 +408,8 @@ int p_rename(const char *from, const char *to) wchar_t wfrom[GIT_WIN_PATH]; wchar_t wto[GIT_WIN_PATH]; - git__utf8_to_16(wfrom, from); - git__utf8_to_16(wto, to); + git__utf8_to_16(wfrom, GIT_WIN_PATH, from); + git__utf8_to_16(wto, GIT_WIN_PATH, to); return MoveFileExW(wfrom, wto, MOVEFILE_REPLACE_EXISTING | MOVEFILE_COPY_ALLOWED) ? 0 : -1; } diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c index a98e814f..88a84141 100644 --- a/src/win32/utf-conv.c +++ b/src/win32/utf-conv.c @@ -11,83 +11,52 @@ #define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0) #define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00) -void git__utf8_to_16(wchar_t *dest, const char *src) +#if 0 +void git__utf8_to_16(wchar_t *dest, size_t length, const char *src) { wchar_t *pDest = dest; uint32_t ch; const uint8_t* pSrc = (uint8_t*) src; - const uint8_t *pSrcLimit = pSrc + strlen(src); - assert(dest && src); + assert(dest && src && length); - if ((pSrcLimit - pSrc) >= 4) { - pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ + length--; - /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ - do { - ch = *pSrc++; - if(ch < 0xc0) { - /* - * ASCII, or a trail byte in lead position which is treated like - * a single-byte sequence for better character boundary - * resynchronization after illegal sequences. - */ - *pDest++=(wchar_t)ch; - } else if(ch < 0xe0) { /* U+0080..U+07FF */ - /* 0x3080 = (0xc0 << 6) + 0x80 */ - *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080); - } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ - /* 0x2080 = (0x80 << 6) + 0x80 */ - ch = (ch << 12) + (*pSrc++ << 6); - *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080); - } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ - ch = (ch << 18) + (*pSrc++ << 12); - ch += *pSrc++ << 6; - ch += *pSrc++ - 0x3c82080; - *(pDest++) = U16_LEAD(ch); - *(pDest++) = U16_TRAIL(ch); - } - } while(pSrc < pSrcLimit); - - pSrcLimit += 3; /* restore original pSrcLimit */ - } - - while(pSrc < pSrcLimit) { + while(*pSrc && length > 0) { ch = *pSrc++; + length--; + if(ch < 0xc0) { /* * ASCII, or a trail byte in lead position which is treated like * a single-byte sequence for better character boundary * resynchronization after illegal sequences. */ - *pDest++=(wchar_t)ch; + *pDest++ = (wchar_t)ch; continue; } else if(ch < 0xe0) { /* U+0080..U+07FF */ - if(pSrc < pSrcLimit) { + if (pSrc[0]) { /* 0x3080 = (0xc0 << 6) + 0x80 */ *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080); continue; } } else if(ch < 0xf0) { /* U+0800..U+FFFF */ - if((pSrcLimit - pSrc) >= 2) { + if (pSrc[0] && pSrc[1]) { /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ /* 0x2080 = (0x80 << 6) + 0x80 */ ch = (ch << 12) + (*pSrc++ << 6); *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080); - pSrc += 3; continue; } } else /* f0..f4 */ { /* U+10000..U+10FFFF */ - if((pSrcLimit - pSrc) >= 3) { + if (length >= 1 && pSrc[0] && pSrc[1] && pSrc[2]) { /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ ch = (ch << 18) + (*pSrc++ << 12); ch += *pSrc++ << 6; ch += *pSrc++ - 0x3c82080; *(pDest++) = U16_LEAD(ch); *(pDest++) = U16_TRAIL(ch); - pSrc += 4; + length--; /* two bytes for this character */ continue; } } @@ -99,6 +68,12 @@ void git__utf8_to_16(wchar_t *dest, const char *src) *pDest++ = 0x0; } +#endif + +void git__utf8_to_16(wchar_t *dest, size_t length, const char *src) +{ + MultiByteToWideChar(CP_UTF8, 0, src, -1, dest, length); +} void git__utf16_to_8(char *out, const wchar_t *input) { diff --git a/src/win32/utf-conv.h b/src/win32/utf-conv.h index c0cfffe4..3bd1549b 100644 --- a/src/win32/utf-conv.h +++ b/src/win32/utf-conv.h @@ -12,7 +12,7 @@ #define GIT_WIN_PATH (260 + 1) -void git__utf8_to_16(wchar_t *dest, const char *src); +void git__utf8_to_16(wchar_t *dest, size_t length, const char *src); void git__utf16_to_8(char *dest, const wchar_t *src); #endif diff --git a/tests-clar/clar_helpers.c b/tests-clar/clar_helpers.c index 125f7855..fa48ac8f 100644 --- a/tests-clar/clar_helpers.c +++ b/tests-clar/clar_helpers.c @@ -60,7 +60,7 @@ char *cl_getenv(const char *name) wchar_t *value_utf16; char *value_utf8; - git__utf8_to_16(name_utf16, name); + git__utf8_to_16(name_utf16, GIT_WIN_PATH, name); alloc_len = GetEnvironmentVariableW(name_utf16, NULL, 0); if (alloc_len <= 0) return NULL; @@ -83,10 +83,10 @@ int cl_setenv(const char *name, const char *value) wchar_t name_utf16[GIT_WIN_PATH]; wchar_t value_utf16[GIT_WIN_PATH]; - git__utf8_to_16(name_utf16, name); + git__utf8_to_16(name_utf16, GIT_WIN_PATH, name); if (value != NULL) - git__utf8_to_16(value_utf16, value); + git__utf8_to_16(value_utf16, GIT_WIN_PATH, value); cl_assert(SetEnvironmentVariableW(name_utf16, value ? value_utf16 : NULL)); return 0; |