summaryrefslogtreecommitdiff
path: root/subversion/tests/libsvn_subr/utf-test.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/tests/libsvn_subr/utf-test.c')
-rw-r--r--subversion/tests/libsvn_subr/utf-test.c549
1 files changed, 546 insertions, 3 deletions
diff --git a/subversion/tests/libsvn_subr/utf-test.c b/subversion/tests/libsvn_subr/utf-test.c
index 2028e14..dd81ccd 100644
--- a/subversion/tests/libsvn_subr/utf-test.c
+++ b/subversion/tests/libsvn_subr/utf-test.c
@@ -25,6 +25,7 @@
#include "svn_utf.h"
#include "svn_pools.h"
+#include "private/svn_string_private.h"
#include "private/svn_utf_private.h"
/* Random number seed. Yes, it's global, just pretend you can't see it. */
@@ -226,7 +227,7 @@ test_utf_cstring_to_utf8_ex2(apr_pool_t *pool)
const char *expected_result;
const char *from_page;
} tests[] = {
- {"ascii text\n", "ascii text\n", "unexistant-page"},
+ {"ascii text\n", "ascii text\n", "unexistent-page"},
{"Edelwei\xdf", "Edelwei\xc3\x9f", "ISO-8859-1"}
};
@@ -266,7 +267,7 @@ test_utf_cstring_from_utf8_ex2(apr_pool_t *pool)
const char *expected_result;
const char *to_page;
} tests[] = {
- {"ascii text\n", "ascii text\n", "unexistant-page"},
+ {"ascii text\n", "ascii text\n", "unexistent-page"},
{"Edelwei\xc3\x9f", "Edelwei\xdf", "ISO-8859-1"}
};
@@ -294,10 +295,540 @@ test_utf_cstring_from_utf8_ex2(apr_pool_t *pool)
return SVN_NO_ERROR;
}
+/* Test normalization-independent UTF-8 string comparison */
+static svn_error_t *
+test_utf_collated_compare(apr_pool_t *pool)
+{
+ /* Normalized: NFC */
+ static const char nfc[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe1\xbb\x9d" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* Normalized: NFD */
+ static const char nfd[] =
+ "S\xcc\xa3\xcc\x87" /* S with dot above and below */
+ "u\xcc\x8a" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "v\xcc\x83" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "r\xcc\x8f" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "i\xcc\x88\xcc\x81" /* i with diaeresis and acute */
+ "o\xcc\x9b\xcc\x80" /* o with grave and hook */
+ "n\xcc\xad"; /* n with circumflex below */
+
+ /* Mixed, denormalized */
+ static const char mixup[] =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ static const char longer[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe1\xbb\x9d" /* o with grave and hook */
+ "\xe1\xb9\x8b" /* n with circumflex below */
+ "X";
+
+ static const char shorter[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe1\xbb\x9d"; /* o with grave and hook */
+
+ static const char lowcase[] =
+ "s\xcc\x87\xcc\xa3" /* s with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ static const struct utfcmp_test_t {
+ const char *stra;
+ char op;
+ const char *strb;
+ const char *taga;
+ const char *tagb;
+ } utfcmp_tests[] = {
+ /* Empty key */
+ {"", '=', "", "empty", "empty"},
+ {"", '<', "a", "empty", "nonempty"},
+ {"a", '>', "", "nonempty", "empty"},
+
+ /* Deterministic ordering */
+ {"a", '<', "b", "a", "b"},
+ {"b", '<', "c", "b", "c"},
+ {"a", '<', "c", "a", "c"},
+
+ /* Normalized equality */
+ {nfc, '=', nfd, "nfc", "nfd"},
+ {nfd, '=', nfc, "nfd", "nfc"},
+ {nfc, '=', mixup, "nfc", "mixup"},
+ {nfd, '=', mixup, "nfd", "mixup"},
+ {mixup, '=', nfd, "mixup", "nfd"},
+ {mixup, '=', nfc, "mixup", "nfc"},
+
+ /* Key length */
+ {nfc, '<', longer, "nfc", "longer"},
+ {longer, '>', nfc, "longer", "nfc"},
+ {nfd, '>', shorter, "nfd", "shorter"},
+ {shorter, '<', nfd, "shorter", "nfd"},
+ {mixup, '<', lowcase, "mixup", "lowcase"},
+ {lowcase, '>', mixup, "lowcase", "mixup"},
+
+ {NULL, 0, NULL, NULL, NULL}
+ };
+
+
+ const struct utfcmp_test_t *ut;
+ svn_membuf_t bufa, bufb;
+ svn_membuf__create(&bufa, 0, pool);
+ svn_membuf__create(&bufb, 0, pool);
+
+ srand(111);
+ for (ut = utfcmp_tests; ut->stra; ++ut)
+ {
+ const svn_boolean_t implicit_size = (rand() % 17) & 1;
+ const apr_size_t lena = (implicit_size
+ ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->stra));
+ const apr_size_t lenb = (implicit_size
+ ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->strb));
+ int result;
+
+ SVN_ERR(svn_utf__normcmp(&result,
+ ut->stra, lena, ut->strb, lenb,
+ &bufa, &bufb));
+
+ /* UCS-4 debugging dump of the decomposed strings
+ {
+ const apr_int32_t *const ucsbufa = bufa.data;
+ const apr_int32_t *const ucsbufb = bufb.data;
+ apr_size_t i;
+
+ printf("(%c)%7s %c %s\n", ut->op,
+ ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb);
+
+ for (i = 0; i < bufa.size || i < bufb.size; ++i)
+ {
+ if (i < bufa.size && i < bufb.size)
+ printf(" U+%04X U+%04X\n", ucsbufa[i], ucsbufb[i]);
+ else if (i < bufa.size)
+ printf(" U+%04X\n", ucsbufa[i]);
+ else
+ printf(" U+%04X\n", ucsbufb[i]);
+ }
+ }
+ */
+
+ if (('=' == ut->op && 0 != result)
+ || ('<' == ut->op && 0 <= result)
+ || ('>' == ut->op && 0 >= result))
+ {
+ return svn_error_createf
+ (SVN_ERR_TEST_FAILED, NULL,
+ "Ut->Op '%s' %c '%s' but '%s' %c '%s'",
+ ut->taga, ut->op, ut->tagb,
+ ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb);
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+
+static svn_error_t *
+test_utf_pattern_match(apr_pool_t *pool)
+{
+ static const struct glob_test_t {
+ svn_boolean_t sql_like;
+ svn_boolean_t matches;
+ const char *pattern;
+ const char *string;
+ const char *escape;
+ } glob_tests[] = {
+#define LIKE_MATCH TRUE, TRUE
+#define LIKE_FAIL TRUE, FALSE
+#define GLOB_MATCH FALSE, TRUE
+#define GLOB_FAIL FALSE, FALSE
+
+ {LIKE_FAIL, "", "test", NULL},
+ {GLOB_FAIL, "", "test", NULL},
+ {LIKE_FAIL, "", "%", NULL},
+ {GLOB_FAIL, "", "*", NULL},
+ {LIKE_FAIL, "test", "%", NULL},
+ {GLOB_FAIL, "test", "*", NULL},
+ {LIKE_MATCH, "test", "test", NULL},
+ {GLOB_MATCH, "test", "test", NULL},
+ {LIKE_MATCH, "t\xe1\xb8\x9dst", "te\xcc\xa7\xcc\x86st", NULL},
+ {GLOB_MATCH, "te\xcc\xa7\xcc\x86st", "t\xe1\xb8\x9dst", NULL},
+
+ {LIKE_FAIL, "test", "test", "\xe1\xb8\x9d"}, /* escape char not ascii */
+ {LIKE_FAIL, "test", "test", ""}, /* empty escape string */
+
+ {LIKE_MATCH, "te#st", "test", "#"},
+ {LIKE_FAIL, "te#st", "test", NULL},
+ {GLOB_MATCH, "te\\st", "test", NULL},
+ {LIKE_MATCH, "te##st", "te#st", "#"},
+ {LIKE_FAIL, "te##st", "te#st", NULL},
+ {GLOB_MATCH, "te\\\\st", "te\\st", NULL},
+ {GLOB_FAIL, "te\\\\st", "te\\st", "\\"}, /* escape char with glob */
+ {LIKE_FAIL, "te#%t", "te%t", NULL},
+ {LIKE_MATCH, "te#%t", "te%t", "#"},
+ {GLOB_MATCH, "te\\*t", "te*t", NULL},
+ {LIKE_FAIL, "te#%t", "test", NULL},
+ {GLOB_FAIL, "te\\*t", "test", NULL},
+ {LIKE_FAIL, "te#_t", "te_t", NULL},
+ {LIKE_MATCH, "te#_t", "te_t", "#"},
+ {GLOB_MATCH, "te\\?t", "te?t", NULL},
+ {LIKE_FAIL, "te#_t", "test", NULL},
+ {LIKE_FAIL, "te#_t", "test", "#"},
+ {GLOB_FAIL, "te\\?t", "test", NULL},
+
+ {LIKE_MATCH, "_est", "test", NULL},
+ {GLOB_MATCH, "?est", "test", NULL},
+ {LIKE_MATCH, "te_t", "test", NULL},
+ {GLOB_MATCH, "te?t", "test", NULL},
+ {LIKE_MATCH, "tes_", "test", NULL},
+ {GLOB_MATCH, "tes?", "test", NULL},
+ {LIKE_FAIL, "test_", "test", NULL},
+ {GLOB_FAIL, "test?", "test", NULL},
+
+ {LIKE_MATCH, "[s%n]", "[subversion]", NULL},
+ {GLOB_FAIL, "[s*n]", "[subversion]", NULL},
+ {LIKE_MATCH, "#[s%n]", "[subversion]", "#"},
+ {GLOB_MATCH, "\\[s*n]", "[subversion]", NULL},
+
+ {GLOB_MATCH, ".[\\-\\t]", ".t", NULL},
+ {GLOB_MATCH, "test*?*[a-z]*", "testgoop", NULL},
+ {GLOB_MATCH, "te[^x]t", "test", NULL},
+ {GLOB_MATCH, "te[^abc]t", "test", NULL},
+ {GLOB_MATCH, "te[^x]t", "test", NULL},
+ {GLOB_MATCH, "te[!x]t", "test", NULL},
+ {GLOB_FAIL, "te[^x]t", "text", NULL},
+ {GLOB_FAIL, "te[^\\x]t", "text", NULL},
+ {GLOB_FAIL, "te[^x\\", "text", NULL},
+ {GLOB_FAIL, "te[/]t", "text", NULL},
+ {GLOB_MATCH, "te[r-t]t", "test", NULL},
+ {GLOB_MATCH, "te[r-Tz]t", "tezt", NULL},
+ {GLOB_FAIL, "te[R-T]t", "tent", NULL},
+/* {GLOB_MATCH, "tes[]t]", "test", NULL}, */
+ {GLOB_MATCH, "tes[t-]", "test", NULL},
+ {GLOB_MATCH, "tes[t-]]", "test]", NULL},
+ {GLOB_FAIL, "tes[t-]]", "test", NULL},
+ {GLOB_FAIL, "tes[u-]", "test", NULL},
+ {GLOB_FAIL, "tes[t-]", "tes[t-]", NULL},
+ {GLOB_MATCH, "test[/-/]", "test/", NULL},
+ {GLOB_MATCH, "test[\\/-/]", "test/", NULL},
+ {GLOB_MATCH, "test[/-\\/]", "test/", NULL},
+
+#undef LIKE_MATCH
+#undef LIKE_FAIL
+#undef GLOB_MATCH
+#undef GLOB_FAIL
+
+ {FALSE, FALSE, NULL, NULL, NULL}
+ };
+
+ const struct glob_test_t *gt;
+ svn_membuf_t bufa, bufb, bufc;
+ svn_membuf__create(&bufa, 0, pool);
+ svn_membuf__create(&bufb, 0, pool);
+ svn_membuf__create(&bufc, 0, pool);
+
+ srand(79);
+ for (gt = glob_tests; gt->pattern; ++gt)
+ {
+ const svn_boolean_t implicit_size = (rand() % 13) & 1;
+ const apr_size_t lenptn = (implicit_size
+ ? SVN_UTF__UNKNOWN_LENGTH
+ : strlen(gt->pattern));
+ const apr_size_t lenstr = (implicit_size
+ ? SVN_UTF__UNKNOWN_LENGTH
+ : strlen(gt->string));
+ const apr_size_t lenesc = (implicit_size
+ ? SVN_UTF__UNKNOWN_LENGTH
+ : (gt->escape ? strlen(gt->escape) : 0));
+ svn_boolean_t match;
+ svn_error_t *err;
+
+
+ err = svn_utf__glob(&match,
+ gt->pattern, lenptn,
+ gt->string, lenstr,
+ gt->escape, lenesc,
+ gt->sql_like, &bufa, &bufb, &bufc);
+
+ if (!gt->sql_like && gt->escape && !err)
+ return svn_error_create
+ (SVN_ERR_TEST_FAILED, err, "Failed to detect GLOB ESCAPE");
+
+ if ((err && gt->matches)
+ || (!err && !match != !gt->matches))
+ {
+ if (gt->sql_like)
+ return svn_error_createf
+ (SVN_ERR_TEST_FAILED, err,
+ "Wrong result: %s'%s' LIKE '%s'%s%s%s%s",
+ (gt->matches ? "NOT " : ""), gt->string, gt->pattern,
+ (gt->escape ? " ESCAPE " : ""), (gt->escape ? "'" : ""),
+ (gt->escape ? gt->escape : ""), (gt->escape ? "'" : ""));
+ else
+ return svn_error_createf
+ (SVN_ERR_TEST_FAILED, err, "Wrong result: %s%s GLOB %s",
+ (gt->matches ? "NOT " : ""), gt->string, gt->pattern);
+ }
+
+ if (err)
+ svn_error_clear(err);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+test_utf_fuzzy_escape(apr_pool_t *pool)
+{
+
+ /* Accented latin, mixed normalization */
+ static const char mixup[] =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* As above, but latin lowercase 'o' replaced with Greek 'omicron' */
+ static const char greekish[] =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xce\xbf\xcc\x80\xcc\x9b" /* omicron with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* More interesting invalid characters. */
+ static const char invalid[] =
+ "Not Unicode: \xef\xb7\x91;" /* U+FDD1 */
+ "Out of range: \xf4\x90\x80\x81;" /* U+110001 */
+ "Not UTF-8: \xe6;"
+ "Null byte: \0;";
+
+ const char *fuzzy;
+
+ fuzzy = svn_utf__fuzzy_escape(mixup, strlen(mixup), pool);
+ SVN_TEST_ASSERT(0 == strcmp(fuzzy, "Subversion"));
+
+ fuzzy = svn_utf__fuzzy_escape(greekish, strlen(greekish), pool);
+ SVN_TEST_ASSERT(0 == strcmp(fuzzy, "Subversi{U+03BF}n"));
+
+ fuzzy = svn_utf__fuzzy_escape(invalid, sizeof(invalid) - 1, pool);
+ /*fprintf(stderr, "%s\n", fuzzy);*/
+ SVN_TEST_ASSERT(0 == strcmp(fuzzy,
+ "Not Unicode: {U?FDD1};"
+ "Out of range: ?\\F4?\\90?\\80?\\81;"
+ "Not UTF-8: ?\\E6;"
+ "Null byte: \\0;"));
+
+ return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+test_utf_is_normalized(apr_pool_t *pool)
+{
+ /* Normalized: NFC */
+ static const char nfc[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe1\xbb\x9d" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* Normalized: NFD */
+ static const char nfd[] =
+ "S\xcc\xa3\xcc\x87" /* S with dot above and below */
+ "u\xcc\x8a" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "v\xcc\x83" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "r\xcc\x8f" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "i\xcc\x88\xcc\x81" /* i with diaeresis and acute */
+ "o\xcc\x9b\xcc\x80" /* o with grave and hook */
+ "n\xcc\xad"; /* n with circumflex below */
+
+ /* Mixed, denormalized */
+ static const char mixup[] =
+ "S\xcc\x87\xcc\xa3" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "b\xcc\xb1" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "e\xcc\xa7\xcc\x86" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "s\xcc\x8c" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "o\xcc\x80\xcc\x9b" /* o with grave and hook */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ /* Invalid UTF-8 */
+ static const char invalid[] =
+ "\xe1\xb9\xa8" /* S with dot above and below */
+ "\xc5\xaf" /* u with ring */
+ "\xe1\xb8\x87" /* b with macron below */
+ "\xe1\xb9\xbd" /* v with tilde */
+ "\xe1\xb8\x9d" /* e with breve and cedilla */
+ "\xc8\x91" /* r with double grave */
+ "\xc5\xa1" /* s with caron */
+ "\xe1\xb8\xaf" /* i with diaeresis and acute */
+ "\xe6" /* Invalid byte */
+ "\xe1\xb9\x8b"; /* n with circumflex below */
+
+ SVN_ERR_ASSERT(svn_utf__is_normalized(nfc, pool));
+ SVN_ERR_ASSERT(!svn_utf__is_normalized(nfd, pool));
+ SVN_ERR_ASSERT(!svn_utf__is_normalized(mixup, pool));
+ SVN_ERR_ASSERT(!svn_utf__is_normalized(invalid, pool));
+
+ return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+test_utf_conversions(apr_pool_t *pool)
+{
+ static const struct cvt_test_t
+ {
+ svn_boolean_t sixteenbit;
+ svn_boolean_t bigendian;
+ const char *source;
+ const char *result;
+ } tests[] = {
+
+#define UTF_32_LE FALSE, FALSE
+#define UTF_32_BE FALSE, TRUE
+#define UTF_16_LE TRUE, FALSE
+#define UTF_16_BE TRUE, TRUE
+
+ /* Normal character conversion */
+ { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" },
+ { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" },
+ { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" },
+ { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" },
+
+ /* Valid surrogate pairs */
+ { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
+ { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
+ { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+
+ { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
+ { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
+ { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+
+ /* Swapped, single and trailing surrogate pairs */
+ { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
+ { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" },
+ { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" },
+ { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" },
+ { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" },
+
+ { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
+ { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" },
+ { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" },
+ { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" },
+ { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" },
+
+#undef UTF_32_LE
+#undef UTF_32_BE
+#undef UTF_16_LE
+#undef UTF_16_BE
+
+ { 0 }
+ };
+
+ const struct cvt_test_t *tc;
+ const svn_string_t *result;
+ int i;
+
+ for (i = 1, tc = tests; tc->source; ++tc, ++i)
+ {
+ if (tc->sixteenbit)
+ SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source,
+ SVN_UTF__UNKNOWN_LENGTH,
+ tc->bigendian, pool, pool));
+ else
+ SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source,
+ SVN_UTF__UNKNOWN_LENGTH,
+ tc->bigendian, pool, pool));
+ SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
+ }
+
+ /* Test counted strings with NUL characters */
+ SVN_ERR(svn_utf__utf16_to_utf8(
+ &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3,
+ FALSE, pool, pool));
+ SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
+
+ SVN_ERR(svn_utf__utf32_to_utf8(
+ &result,
+ (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3,
+ TRUE, pool, pool));
+ SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
+
+ return SVN_NO_ERROR;
+}
+
+
/* The test table. */
-struct svn_test_descriptor_t test_funcs[] =
+static int max_threads = 1;
+
+static struct svn_test_descriptor_t test_funcs[] =
{
SVN_TEST_NULL,
SVN_TEST_PASS2(utf_validate,
@@ -308,5 +839,17 @@ struct svn_test_descriptor_t test_funcs[] =
"test svn_utf_cstring_to_utf8_ex2"),
SVN_TEST_PASS2(test_utf_cstring_from_utf8_ex2,
"test svn_utf_cstring_from_utf8_ex2"),
+ SVN_TEST_PASS2(test_utf_collated_compare,
+ "test svn_utf__normcmp"),
+ SVN_TEST_PASS2(test_utf_pattern_match,
+ "test svn_utf__glob"),
+ SVN_TEST_PASS2(test_utf_fuzzy_escape,
+ "test svn_utf__fuzzy_escape"),
+ SVN_TEST_PASS2(test_utf_is_normalized,
+ "test svn_utf__is_normalized"),
+ SVN_TEST_PASS2(test_utf_conversions,
+ "test svn_utf__utf{16,32}_to_utf8"),
SVN_TEST_NULL
};
+
+SVN_TEST_MAIN