summaryrefslogtreecommitdiff
path: root/src/win32/utf-conv.c
diff options
context:
space:
mode:
authorVicent Martí <tanoku@gmail.com>2012-04-25 16:26:12 -0700
committerVicent Marti <tanoku@gmail.com>2012-08-28 23:09:42 -0700
commit3b73a03497e4fd67459960318308c9265bcd7805 (patch)
treeceb79d99b6f98145a2e5166709abcaf9cd98d8f6 /src/win32/utf-conv.c
parent319ad0ba208ec67523a808269b8662c88727a0c9 (diff)
downloadlibgit2-3b73a03497e4fd67459960318308c9265bcd7805.tar.gz
UTF-8 changes yo
Diffstat (limited to 'src/win32/utf-conv.c')
-rw-r--r--src/win32/utf-conv.c92
1 files changed, 92 insertions, 0 deletions
diff --git a/src/win32/utf-conv.c b/src/win32/utf-conv.c
index 0a705c0ad..4b95001d2 100644
--- a/src/win32/utf-conv.c
+++ b/src/win32/utf-conv.c
@@ -29,6 +29,98 @@ void gitwin_set_utf8(void)
_active_codepage = CP_UTF8;
}
+#define U16_LEAD(c) (wchar_t)(((c)>>10)+0xd7c0)
+#define U16_TRAIL(c) (wchar_t)(((c)&0x3ff)|0xdc00)
+
+void git__utf8_to_16(wchar_t *dest, const char *src, size_t srcLength)
+{
+ wchar_t *pDest = dest;
+ uint32_t ch;
+ const uint8_t* pSrc = (uint8_t*) src;
+ const uint8_t *pSrcLimit = pSrc + srcLength;
+
+ assert(dest && src && srcLength > 0);
+
+ if ((pSrcLimit - pSrc) >= 4) {
+ pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
+
+ /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
+ do {
+ ch = *pSrc++;
+ if(ch < 0xc0) {
+ /*
+ * ASCII, or a trail byte in lead position which is treated like
+ * a single-byte sequence for better character boundary
+ * resynchronization after illegal sequences.
+ */
+ *pDest++=(wchar_t)ch;
+ } else if(ch < 0xe0) { /* U+0080..U+07FF */
+ /* 0x3080 = (0xc0 << 6) + 0x80 */
+ *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
+ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ /* 0x2080 = (0x80 << 6) + 0x80 */
+ ch = (ch << 12) + (*pSrc++ << 6);
+ *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
+ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
+ /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+ ch = (ch << 18) + (*pSrc++ << 12);
+ ch += *pSrc++ << 6;
+ ch += *pSrc++ - 0x3c82080;
+ *(pDest++) = U16_LEAD(ch);
+ *(pDest++) = U16_TRAIL(ch);
+ }
+ } while(pSrc < pSrcLimit);
+
+ pSrcLimit += 3; /* restore original pSrcLimit */
+ }
+
+ while(pSrc < pSrcLimit) {
+ ch = *pSrc++;
+ if(ch < 0xc0) {
+ /*
+ * ASCII, or a trail byte in lead position which is treated like
+ * a single-byte sequence for better character boundary
+ * resynchronization after illegal sequences.
+ */
+ *pDest++=(wchar_t)ch;
+ continue;
+ } else if(ch < 0xe0) { /* U+0080..U+07FF */
+ if(pSrc < pSrcLimit) {
+ /* 0x3080 = (0xc0 << 6) + 0x80 */
+ *pDest++ = (wchar_t)((ch << 6) + *pSrc++ - 0x3080);
+ continue;
+ }
+ } else if(ch < 0xf0) { /* U+0800..U+FFFF */
+ if((pSrcLimit - pSrc) >= 2) {
+ /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
+ /* 0x2080 = (0x80 << 6) + 0x80 */
+ ch = (ch << 12) + (*pSrc++ << 6);
+ *pDest++ = (wchar_t)(ch + *pSrc++ - 0x2080);
+ pSrc += 3;
+ continue;
+ }
+ } else /* f0..f4 */ { /* U+10000..U+10FFFF */
+ if((pSrcLimit - pSrc) >= 3) {
+ /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+ ch = (ch << 18) + (*pSrc++ << 12);
+ ch += *pSrc++ << 6;
+ ch += *pSrc++ - 0x3c82080;
+ *(pDest++) = U16_LEAD(ch);
+ *(pDest++) = U16_TRAIL(ch);
+ pSrc += 4;
+ continue;
+ }
+ }
+
+ /* truncated character at the end */
+ *pDest++ = 0xfffd;
+ break;
+ }
+
+ *pDest++ = 0x0;
+}
+
wchar_t* gitwin_to_utf16(const char* str)
{
wchar_t* ret;