/*- * Copyright (c) 2011-2012 Michihiro NAKAJIMA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "test.h" __FBSDID("$FreeBSD$"); #include #define __LIBARCHIVE_TEST #include "archive_string.h" /* Execute the following to rebuild the data for this program: tail -n +36 test_archive_string_conversion.c | /bin/sh # # This requires http://unicode.org/Public/6.0.0/ucd/NormalizationTest.txt # if="NormalizationTest.txt" if [ ! -f ${if} ]; then echo "Not found: \"${if}\"" exit 0 fi of=test_archive_string_conversion.txt.Z echo "\$FreeBSD\$" > ${of}.uu awk -F ';' '$0 ~/^[0-9A-F]+/ {printf "%s;%s\n", $2, $3}' ${if} | compress | uuencode ${of} >> ${of}.uu exit 1 */ static int unicode_to_utf8(char *p, uint32_t uc) { char *_p = p; /* Translate code point to UTF8 */ if (uc <= 0x7f) { *p++ = (char)uc; } else if (uc <= 0x7ff) { *p++ = 0xc0 | ((uc >> 6) & 0x1f); *p++ = 0x80 | (uc & 0x3f); } else if (uc <= 0xffff) { *p++ = 0xe0 | ((uc >> 12) & 0x0f); *p++ = 0x80 | ((uc >> 6) & 0x3f); *p++ = 0x80 | (uc & 0x3f); } else { *p++ = 0xf0 | ((uc >> 18) & 0x07); *p++ = 0x80 | ((uc >> 12) & 0x3f); *p++ = 0x80 | ((uc >> 6) & 0x3f); *p++ = 0x80 | (uc & 0x3f); } return ((int)(p - _p)); } static void archive_be16enc(void *pp, uint16_t u) { unsigned char *p = (unsigned char *)pp; p[0] = (u >> 8) & 0xff; p[1] = u & 0xff; } static int unicode_to_utf16be(char *p, uint32_t uc) { char *utf16 = p; if (uc > 0xffff) { /* We have a code point that won't fit into a * wchar_t; convert it to a surrogate pair. */ uc -= 0x10000; archive_be16enc(utf16, ((uc >> 10) & 0x3ff) + 0xD800); archive_be16enc(utf16+2, (uc & 0x3ff) + 0xDC00); return (4); } else { archive_be16enc(utf16, uc); return (2); } } static void archive_le16enc(void *pp, uint16_t u) { unsigned char *p = (unsigned char *)pp; p[0] = u & 0xff; p[1] = (u >> 8) & 0xff; } static size_t unicode_to_utf16le(char *p, uint32_t uc) { char *utf16 = p; if (uc > 0xffff) { /* We have a code point that won't fit into a * wchar_t; convert it to a surrogate pair. */ uc -= 0x10000; archive_le16enc(utf16, ((uc >> 10) & 0x3ff) + 0xD800); archive_le16enc(utf16+2, (uc & 0x3ff) + 0xDC00); return (4); } else { archive_le16enc(utf16, uc); return (2); } } static int wc_size(void) { return (sizeof(wchar_t)); } static int unicode_to_wc(wchar_t *wp, uint32_t uc) { if (wc_size() == 4) { *wp = (wchar_t)uc; return (1); } if (uc > 0xffff) { /* We have a code point that won't fit into a * wchar_t; convert it to a surrogate pair. */ uc -= 0x10000; *wp++ = (wchar_t)(((uc >> 10) & 0x3ff) + 0xD800); *wp = (wchar_t)((uc & 0x3ff) + 0xDC00); return (2); } else { *wp = (wchar_t)uc; return (1); } } /* * Note: U+2000 - U+2FFF, U+F900 - U+FAFF and U+2F800 - U+2FAFF are not * converted to NFD on Mac OS. * see also http://developer.apple.com/library/mac/#qa/qa2001/qa1173.html */ static int scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le, const char *pattern, int mac_nfd) { unsigned uc = 0; const char *p = pattern; char *op = out; wchar_t *owp = wout; char *op16be = u16be; char *op16le = u16le; int ret = 0; for (;;) { if (*p >= '0' && *p <= '9') uc = (uc << 4) + (*p - '0'); else if (*p >= 'A' && *p <= 'F') uc = (uc << 4) + (*p - 'A' + 0x0a); else { if (mac_nfd && op == out) { /* * These are not converted to NFD on Mac OS. * U+2000 - U+2FFF * U+F900 - U+FAFF * U+2F800 - U+2FAFF */ switch (uc) { case 0x2194: case 0x219A: case 0x219B: case 0x21AE: case 0x21CD: case 0x21CE: case 0x21CF: case 0x2204: case 0x2209: case 0x220C: case 0x2224: case 0x2226: case 0x2241: case 0x2244: case 0x2247: case 0x2249: case 0x2260: case 0x2262: case 0x226D: case 0x226E: case 0x226F: case 0x2270: case 0x2271: case 0x2274: case 0x2275: case 0x2276: case 0x2278: case 0x2279: case 0x227A: case 0x227B: case 0x2280: case 0x2281: case 0x2284: case 0x2285: case 0x2288: case 0x2289: case 0x22AC: case 0x22AD: case 0x22AE: case 0x22AF: case 0x22E0: case 0x22E1: case 0x22E2: case 0x22E3: case 0x22EA: case 0x22EB: case 0x22EC: case 0x22ED: /* * Those code points are not converted to * NFD on Mac OS. I do not know the reason * because it is undocumented. * NFC NFD * 1109A ==> 11099 110BA * 1109C ==> 1109B 110BA * 110AB ==> 110A5 110BA */ case 0x1109A: case 0x1109C: case 0x110AB: ret = 1; break; } } op16be += unicode_to_utf16be(op16be, uc); op16le += unicode_to_utf16le(op16le, uc); owp += unicode_to_wc(owp, uc); op += unicode_to_utf8(op, uc); if (!*p) { *op16be++ = 0; *op16be = 0; *op16le++ = 0; *op16le = 0; *owp = L'\0'; *op = '\0'; break; } uc = 0; } p++; } return (ret); } static int is_wc_unicode(void) { #if defined(_WIN32) && !defined(__CYGWIN__) return (1); #else return (0); #endif } /* * A conversion test that we correctly normalize UTF-8 and UTF-16BE characters. * On Mac OS, the characters to be Form D. * On other platforms, the characters to be Form C. */ static void test_archive_string_normalization_nfc(const char *testdata) { struct archive *a, *a2; struct archive_string utf8; struct archive_mstring mstr; struct archive_string_conv *f_sconv8, *t_sconv8; struct archive_string_conv *f_sconv16be, *f_sconv16le; FILE *fp; char buff[512]; int line = 0; int locale_is_utf8, wc_is_unicode; int sconv_opt = SCONV_SET_OPT_NORMALIZATION_C; locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8")); wc_is_unicode = is_wc_unicode(); /* If it doesn't exist, just warn and return. */ if (!locale_is_utf8 && !wc_is_unicode) { skipping("A test of string normalization for NFC requires " "a suitable locale; en_US.UTF-8 not available on this " "system"); return; } archive_string_init(&utf8); memset(&mstr, 0, sizeof(mstr)); /* * Create string conversion objects. */ assert((a = archive_read_new()) != NULL); assertA(NULL != (f_sconv8 = archive_string_conversion_from_charset(a, "UTF-8", 0))); assertA(NULL != (f_sconv16be = archive_string_conversion_from_charset(a, "UTF-16BE", 0))); assertA(NULL != (f_sconv16le = archive_string_conversion_from_charset(a, "UTF-16LE", 0))); assert((a2 = archive_write_new()) != NULL); assertA(NULL != (t_sconv8 = archive_string_conversion_to_charset(a2, "UTF-8", 0))); if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL || t_sconv8 == NULL) { /* We cannot continue this test. */ assertEqualInt(ARCHIVE_OK, archive_read_free(a)); return; } archive_string_conversion_set_opt(f_sconv8, sconv_opt); archive_string_conversion_set_opt(f_sconv16be, sconv_opt); archive_string_conversion_set_opt(f_sconv16le, sconv_opt); archive_string_conversion_set_opt(t_sconv8, sconv_opt); /* Open a test pattern file. */ assert((fp = fopen(testdata, "r")) != NULL); /* * Read test data. * Test data format: * ';' '\n' * Unicode pattern format: * [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,} */ while (fgets(buff, sizeof(buff), fp) != NULL) { char nfc[80], nfd[80]; char utf8_nfc[80], utf8_nfd[80]; char utf16be_nfc[80], utf16be_nfd[80]; char utf16le_nfc[80], utf16le_nfd[80]; wchar_t wc_nfc[40], wc_nfd[40]; char *e, *p; const wchar_t *wp; const char *mp; size_t mplen; line++; if (buff[0] == '#') continue; p = strchr(buff, ';'); if (p == NULL) continue; *p++ = '\0'; /* Copy an NFC pattern */ strncpy(nfc, buff, sizeof(nfc)-1); nfc[sizeof(nfc)-1] = '\0'; e = p; p = strchr(p, '\n'); if (p == NULL) continue; *p = '\0'; /* Copy an NFD pattern */ strncpy(nfd, e, sizeof(nfd)-1); nfd[sizeof(nfd)-1] = '\0'; /* * Get an NFC patterns. */ scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc, nfc, 0); /* * Get an NFD patterns. */ scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd, nfd, 0); if (locale_is_utf8) { /* * Normalize an NFD string for import. */ assertEqualInt(0, archive_strcpy_l( &utf8, utf8_nfd, f_sconv8)); failure("NFD(%s) should be converted to NFC(%s):%d", nfd, nfc, line); assertEqualUTF8String(utf8_nfc, utf8.s); /* * Normalize an NFC string for import. */ assertEqualInt(0, archive_strcpy_l( &utf8, utf8_nfc, f_sconv8)); failure("NFC(%s) should not be any changed:%d", nfc, line); assertEqualUTF8String(utf8_nfc, utf8.s); /* * Copy an NFC string for export. */ assertEqualInt(0, archive_strcpy_l( &utf8, utf8_nfc, t_sconv8)); failure("NFC(%s) should not be any changed:%d", nfc, line); assertEqualUTF8String(utf8_nfc, utf8.s); /* * Normalize an NFD string in UTF-16BE for import. */ assertEqualInt(0, archive_strncpy_l( &utf8, utf16be_nfd, 100000, f_sconv16be)); failure("NFD(%s) should be converted to NFC(%s):%d", nfd, nfc, line); assertEqualUTF8String(utf8_nfc, utf8.s); /* * Normalize an NFD string in UTF-16LE for import. */ assertEqualInt(0, archive_strncpy_l( &utf8, utf16le_nfd, 100000, f_sconv16le)); failure("NFD(%s) should be converted to NFC(%s):%d", nfd, nfc, line); assertEqualUTF8String(utf8_nfc, utf8.s); } /* * Test for archive_mstring interface. * In specific, Windows platform UTF-16BE is directly * converted to/from wide-character to avoid the effect of * current locale since windows platform cannot make * locale UTF-8. */ if (locale_is_utf8 || wc_is_unicode) { /* * Normalize an NFD string in UTF-8 for import. */ assertEqualInt(0, archive_mstring_copy_mbs_len_l( &mstr, utf8_nfd, 100000, f_sconv8)); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr, &wp)); failure("UTF-8 NFD(%s) should be converted " "to WCS NFC(%s):%d", nfd, nfc, line); assertEqualWString(wc_nfc, wp); /* * Normalize an NFD string in UTF-16BE for import. */ assertEqualInt(0, archive_mstring_copy_mbs_len_l( &mstr, utf16be_nfd, 100000, f_sconv16be)); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr, &wp)); failure("UTF-8 NFD(%s) should be converted " "to WCS NFC(%s):%d", nfd, nfc, line); assertEqualWString(wc_nfc, wp); /* * Normalize an NFD string in UTF-16LE for import. */ assertEqualInt(0, archive_mstring_copy_mbs_len_l( &mstr, utf16le_nfd, 100000, f_sconv16le)); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr, &wp)); failure("UTF-8 NFD(%s) should be converted " "to WCS NFC(%s):%d", nfd, nfc, line); assertEqualWString(wc_nfc, wp); /* * Copy an NFC wide-string for export. */ assertEqualInt(0, archive_mstring_copy_wcs(&mstr, wc_nfc)); assertEqualInt(0, archive_mstring_get_mbs_l( a, &mstr, &mp, &mplen, t_sconv8)); failure("WCS NFC(%s) should be UTF-8 NFC:%d" ,nfc, line); assertEqualUTF8String(utf8_nfc, mp); } } archive_string_free(&utf8); archive_mstring_clean(&mstr); fclose(fp); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); assertEqualInt(ARCHIVE_OK, archive_write_free(a2)); } static void test_archive_string_normalization_mac_nfd(const char *testdata) { struct archive *a, *a2; struct archive_string utf8; struct archive_mstring mstr; struct archive_string_conv *f_sconv8, *t_sconv8; struct archive_string_conv *f_sconv16be, *f_sconv16le; FILE *fp; char buff[512]; int line = 0; int locale_is_utf8, wc_is_unicode; int sconv_opt = SCONV_SET_OPT_NORMALIZATION_D; locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8")); wc_is_unicode = is_wc_unicode(); /* If it doesn't exist, just warn and return. */ if (!locale_is_utf8 && !wc_is_unicode) { skipping("A test of string normalization for NFD requires " "a suitable locale; en_US.UTF-8 not available on this " "system"); return; } archive_string_init(&utf8); memset(&mstr, 0, sizeof(mstr)); /* * Create string conversion objects. */ assert((a = archive_read_new()) != NULL); assertA(NULL != (f_sconv8 = archive_string_conversion_from_charset(a, "UTF-8", 0))); assertA(NULL != (f_sconv16be = archive_string_conversion_from_charset(a, "UTF-16BE", 0))); assertA(NULL != (f_sconv16le = archive_string_conversion_from_charset(a, "UTF-16LE", 0))); assert((a2 = archive_write_new()) != NULL); assertA(NULL != (t_sconv8 = archive_string_conversion_to_charset(a2, "UTF-8", 0))); if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL || t_sconv8 == NULL) { /* We cannot continue this test. */ assertEqualInt(ARCHIVE_OK, archive_read_free(a)); return; } archive_string_conversion_set_opt(f_sconv8, sconv_opt); archive_string_conversion_set_opt(f_sconv16be, sconv_opt); archive_string_conversion_set_opt(f_sconv16le, sconv_opt); archive_string_conversion_set_opt(t_sconv8, sconv_opt); /* Open a test pattern file. */ assert((fp = fopen(testdata, "r")) != NULL); /* * Read test data. * Test data format: * ';' '\n' * Unicode pattern format: * [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,} */ while (fgets(buff, sizeof(buff), fp) != NULL) { char nfc[80], nfd[80]; char utf8_nfc[80], utf8_nfd[80]; char utf16be_nfc[80], utf16be_nfd[80]; char utf16le_nfc[80], utf16le_nfd[80]; wchar_t wc_nfc[40], wc_nfd[40]; char *e, *p; const wchar_t *wp; const char *mp; size_t mplen; int should_be_nfc; line++; if (buff[0] == '#') continue; p = strchr(buff, ';'); if (p == NULL) continue; *p++ = '\0'; /* Copy an NFC pattern */ strncpy(nfc, buff, sizeof(nfc)-1); nfc[sizeof(nfc)-1] = '\0'; e = p; p = strchr(p, '\n'); if (p == NULL) continue; *p = '\0'; /* Copy an NFD pattern */ strncpy(nfd, e, sizeof(nfd)-1); nfd[sizeof(nfd)-1] = '\0'; /* * Get an NFC patterns. */ should_be_nfc = scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc, nfc, 1); /* * Get an NFD patterns. */ scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd, nfd, 0); if (locale_is_utf8) { /* * Normalize an NFC string for import. */ assertEqualInt(0, archive_strcpy_l( &utf8, utf8_nfc, f_sconv8)); if (should_be_nfc) { failure("NFC(%s) should not be converted to" " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfc, utf8.s); } else { failure("NFC(%s) should be converted to" " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfd, utf8.s); } /* * Normalize an NFD string for import. */ assertEqualInt(0, archive_strcpy_l( &utf8, utf8_nfd, f_sconv8)); failure("NFD(%s) should not be any changed:%d", nfd, line); assertEqualUTF8String(utf8_nfd, utf8.s); /* * Copy an NFD string for export. */ assertEqualInt(0, archive_strcpy_l( &utf8, utf8_nfd, t_sconv8)); failure("NFD(%s) should not be any changed:%d", nfd, line); assertEqualUTF8String(utf8_nfd, utf8.s); /* * Normalize an NFC string in UTF-16BE for import. */ assertEqualInt(0, archive_strncpy_l( &utf8, utf16be_nfc, 100000, f_sconv16be)); if (should_be_nfc) { failure("NFC(%s) should not be converted to" " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfc, utf8.s); } else { failure("NFC(%s) should be converted to" " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfd, utf8.s); } /* * Normalize an NFC string in UTF-16LE for import. */ assertEqualInt(0, archive_strncpy_l( &utf8, utf16le_nfc, 100000, f_sconv16le)); if (should_be_nfc) { failure("NFC(%s) should not be converted to" " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfc, utf8.s); } else { failure("NFC(%s) should be converted to" " NFD(%s):%d", nfc, nfd, line); assertEqualUTF8String(utf8_nfd, utf8.s); } } /* * Test for archive_mstring interface. * In specific, Windows platform UTF-16BE is directly * converted to/from wide-character to avoid the effect of * current locale since windows platform cannot make * locale UTF-8. */ if (locale_is_utf8 || wc_is_unicode) { /* * Normalize an NFD string in UTF-8 for import. */ assertEqualInt(0, archive_mstring_copy_mbs_len_l( &mstr, utf8_nfc, 100000, f_sconv8)); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr, &wp)); if (should_be_nfc) { failure("UTF-8 NFC(%s) should not be converted " "to WCS NFD(%s):%d", nfc, nfd, line); assertEqualWString(wc_nfc, wp); } else { failure("UTF-8 NFC(%s) should be converted " "to WCS NFD(%s):%d", nfc, nfd, line); assertEqualWString(wc_nfd, wp); } /* * Normalize an NFD string in UTF-16BE for import. */ assertEqualInt(0, archive_mstring_copy_mbs_len_l( &mstr, utf16be_nfc, 100000, f_sconv16be)); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr, &wp)); if (should_be_nfc) { failure("UTF-16BE NFC(%s) should not be " "converted to WCS NFD(%s):%d", nfc, nfd, line); assertEqualWString(wc_nfc, wp); } else { failure("UTF-16BE NFC(%s) should be converted " "to WCS NFD(%s):%d", nfc, nfd, line); assertEqualWString(wc_nfd, wp); } /* * Normalize an NFD string in UTF-16LE for import. */ assertEqualInt(0, archive_mstring_copy_mbs_len_l( &mstr, utf16le_nfc, 100000, f_sconv16le)); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr, &wp)); if (should_be_nfc) { failure("UTF-16LE NFC(%s) should not be " "converted to WCS NFD(%s):%d", nfc, nfd, line); assertEqualWString(wc_nfc, wp); } else { failure("UTF-16LE NFC(%s) should be converted " "to WCS NFD(%s):%d", nfc, nfd, line); assertEqualWString(wc_nfd, wp); } /* * Copy an NFD wide-string for export. */ assertEqualInt(0, archive_mstring_copy_wcs( &mstr, wc_nfd)); assertEqualInt(0, archive_mstring_get_mbs_l( a, &mstr, &mp, &mplen, t_sconv8)); failure("WCS NFD(%s) should be UTF-8 NFD:%d" ,nfd, line); assertEqualUTF8String(utf8_nfd, mp); } } archive_string_free(&utf8); archive_mstring_clean(&mstr); fclose(fp); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); assertEqualInt(ARCHIVE_OK, archive_write_free(a2)); } static void test_archive_string_canonicalization(void) { struct archive *a; struct archive_string_conv *sconv; setlocale(LC_ALL, "en_US.UTF-8"); assert((a = archive_read_new()) != NULL); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "UTF-8", 1))); failure("Charset name should be UTF-8"); assertEqualString("UTF-8", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "UTF8", 1))); failure("Charset name should be UTF-8"); assertEqualString("UTF-8", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "utf8", 1))); failure("Charset name should be UTF-8"); assertEqualString("UTF-8", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "UTF-16BE", 1))); failure("Charset name should be UTF-16BE"); assertEqualString("UTF-16BE", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "UTF16BE", 1))); failure("Charset name should be UTF-16BE"); assertEqualString("UTF-16BE", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "utf16be", 1))); failure("Charset name should be UTF-16BE"); assertEqualString("UTF-16BE", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "UTF-16LE", 1))); failure("Charset name should be UTF-16LE"); assertEqualString("UTF-16LE", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "UTF16LE", 1))); failure("Charset name should be UTF-16LE"); assertEqualString("UTF-16LE", archive_string_conversion_charset_name(sconv)); assertA(NULL != (sconv = archive_string_conversion_to_charset(a, "utf16le", 1))); failure("Charset name should be UTF-16LE"); assertEqualString("UTF-16LE", archive_string_conversion_charset_name(sconv)); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); } static void check_string(struct archive *a, struct archive_mstring *mstr, struct archive_string_conv *sc, const char *exp, const wchar_t *wexp) { /* Do all the tests on a copy so that we can have a clear initial state every time */ struct archive_mstring mstr2; const char *p = NULL; const wchar_t *wp = NULL; size_t len = 0; memset(&mstr2, 0, sizeof(mstr2)); archive_mstring_copy(&mstr2, mstr); assertEqualInt(0, archive_mstring_get_mbs(a, &mstr2, &p)); assertEqualString(exp, p); p = NULL; archive_mstring_copy(&mstr2, mstr); assertEqualInt(0, archive_mstring_get_utf8(a, &mstr2, &p)); assertEqualString(exp, p); p = NULL; archive_mstring_copy(&mstr2, mstr); assertEqualInt(0, archive_mstring_get_wcs(a, &mstr2, &wp)); assertEqualWString(wexp, wp); wp = NULL; archive_mstring_copy(&mstr2, mstr); assertEqualInt(0, archive_mstring_get_mbs_l(a, &mstr2, &p, &len, sc)); assertEqualString(exp, p); assertEqualInt(len, strlen(exp)); p = NULL; len = 0; archive_mstring_clean(&mstr2); } /* * Make sure no matter what the input encoding is, the string can be * converted too all the output encodings. */ static void test_archive_string_set_get(void) { struct archive *a; struct archive_mstring mstr; struct archive_string_conv *sc; setlocale(LC_ALL, "en_US.UTF-8"); assert((a = archive_read_new()) != NULL); memset(&mstr, 0, sizeof(mstr)); assertA(NULL != (sc = archive_string_conversion_to_charset(a, "UTF-8", 1))); failure("Charset name should be UTF-8"); assertEqualString("UTF-8", archive_string_conversion_charset_name(sc)); assertEqualInt(0, archive_mstring_copy_mbs(&mstr, "AAA")); check_string(a, &mstr, sc, "AAA", L"AAA"); assertEqualInt(4, archive_mstring_copy_utf8(&mstr, "BBBB")); check_string(a, &mstr, sc, "BBBB", L"BBBB"); assertEqualInt(0, archive_mstring_copy_wcs(&mstr, L"CCC12")); check_string(a, &mstr, sc, "CCC12", L"CCC12"); assertEqualInt(0, archive_mstring_copy_mbs_len_l(&mstr, "DDDD-l", 6, sc)); check_string(a, &mstr, sc, "DDDD-l", L"DDDD-l"); assertEqualInt(0, archive_mstring_update_utf8(a, &mstr, "EEEEE---H")); check_string(a, &mstr, sc, "EEEEE---H", L"EEEEE---H"); archive_mstring_clean(&mstr); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); } DEFINE_TEST(test_archive_string_conversion) { static const char reffile[] = "test_archive_string_conversion.txt.Z"; static const char testdata[] = "testdata.txt"; struct archive *a; struct archive_entry *ae; char buff[512]; ssize_t size; FILE *fp; /* * Extract a test pattern file. */ extract_reference_file(reffile); assert((a = archive_read_new()) != NULL); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a)); assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, reffile, 512)); assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); assert((fp = fopen(testdata, "w")) != NULL); while ((size = archive_read_data(a, buff, 512)) > 0) assertEqualInt(size, fwrite(buff, 1, size, fp)); assertEqualInt(0, fclose(fp)); assertEqualInt(ARCHIVE_OK, archive_read_free(a)); test_archive_string_normalization_nfc(testdata); test_archive_string_normalization_mac_nfd(testdata); test_archive_string_canonicalization(); test_archive_string_set_get(); }