1 files changed, 546 insertions, 3 deletions
diff --git a/subversion/tests/libsvn_subr/utf-test.c b/subversion/tests/libsvn_subr/utf-test.c
index 2028e14..dd81ccd 100644
--- a/subversion/tests/libsvn_subr/utf-test.c
+++ b/subversion/tests/libsvn_subr/utf-test.c
@@ -25,6 +25,7 @@
 #include "svn_utf.h"
 #include "svn_pools.h"
 
+#include "private/svn_string_private.h"
 #include "private/svn_utf_private.h"
 
 /* Random number seed.  Yes, it's global, just pretend you can't see it. */
@@ -226,7 +227,7 @@ test_utf_cstring_to_utf8_ex2(apr_pool_t *pool)
       const char *expected_result;
       const char *from_page;
   } tests[] = {
-      {"ascii text\n", "ascii text\n", "unexistant-page"},
+      {"ascii text\n", "ascii text\n", "unexistent-page"},
       {"Edelwei\xdf", "Edelwei\xc3\x9f", "ISO-8859-1"}
   };
 
@@ -266,7 +267,7 @@ test_utf_cstring_from_utf8_ex2(apr_pool_t *pool)
       const char *expected_result;
       const char *to_page;
   } tests[] = {
-      {"ascii text\n", "ascii text\n", "unexistant-page"},
+      {"ascii text\n", "ascii text\n", "unexistent-page"},
       {"Edelwei\xc3\x9f", "Edelwei\xdf", "ISO-8859-1"}
   };
 
@@ -294,10 +295,540 @@ test_utf_cstring_from_utf8_ex2(apr_pool_t *pool)
   return SVN_NO_ERROR;
 }
 
+/* Test normalization-independent UTF-8 string comparison */
+static svn_error_t *
+test_utf_collated_compare(apr_pool_t *pool)
+{
+  /* Normalized: NFC */
+  static const char nfc[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe1\xbb\x9d"              /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  /* Normalized: NFD */
+  static const char nfd[] =
+    "S\xcc\xa3\xcc\x87"         /* S with dot above and below */
+    "u\xcc\x8a"                 /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "v\xcc\x83"                 /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "r\xcc\x8f"                 /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "i\xcc\x88\xcc\x81"         /* i with diaeresis and acute */
+    "o\xcc\x9b\xcc\x80"         /* o with grave and hook */
+    "n\xcc\xad";                /* n with circumflex below */
+
+  /* Mixed, denormalized */
+  static const char mixup[] =
+    "S\xcc\x87\xcc\xa3"         /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  static const char longer[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe1\xbb\x9d"              /* o with grave and hook */
+    "\xe1\xb9\x8b"              /* n with circumflex below */
+    "X";
+
+  static const char shorter[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe1\xbb\x9d";             /* o with grave and hook */
+
+  static const char lowcase[] =
+    "s\xcc\x87\xcc\xa3"         /* s with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  static const struct utfcmp_test_t {
+    const char *stra;
+    char op;
+    const char *strb;
+    const char *taga;
+    const char *tagb;
+  } utfcmp_tests[] = {
+    /* Empty key */
+    {"",  '=', "",  "empty",    "empty"},
+    {"",  '<', "a", "empty",    "nonempty"},
+    {"a", '>', "",  "nonempty", "empty"},
+
+    /* Deterministic ordering */
+    {"a", '<', "b", "a", "b"},
+    {"b", '<', "c", "b", "c"},
+    {"a", '<', "c", "a", "c"},
+
+    /* Normalized equality */
+    {nfc,   '=', nfd,    "nfc",   "nfd"},
+    {nfd,   '=', nfc,    "nfd",   "nfc"},
+    {nfc,   '=', mixup,  "nfc",   "mixup"},
+    {nfd,   '=', mixup,  "nfd",   "mixup"},
+    {mixup, '=', nfd,    "mixup", "nfd"},
+    {mixup, '=', nfc,    "mixup", "nfc"},
+
+    /* Key length */
+    {nfc,     '<', longer,    "nfc",     "longer"},
+    {longer,  '>', nfc,       "longer",  "nfc"},
+    {nfd,     '>', shorter,   "nfd",     "shorter"},
+    {shorter, '<', nfd,       "shorter", "nfd"},
+    {mixup,   '<', lowcase,   "mixup",   "lowcase"},
+    {lowcase, '>', mixup,     "lowcase",  "mixup"},
+
+    {NULL, 0, NULL, NULL, NULL}
+  };
+
+
+  const struct utfcmp_test_t *ut;
+  svn_membuf_t bufa, bufb;
+  svn_membuf__create(&bufa, 0, pool);
+  svn_membuf__create(&bufb, 0, pool);
+
+  srand(111);
+  for (ut = utfcmp_tests; ut->stra; ++ut)
+    {
+      const svn_boolean_t implicit_size = (rand() % 17) & 1;
+      const apr_size_t lena = (implicit_size
+                               ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->stra));
+      const apr_size_t lenb = (implicit_size
+                               ? SVN_UTF__UNKNOWN_LENGTH : strlen(ut->strb));
+      int result;
+
+      SVN_ERR(svn_utf__normcmp(&result,
+                               ut->stra, lena, ut->strb, lenb,
+                               &bufa, &bufb));
+
+      /* UCS-4 debugging dump of the decomposed strings
+      {
+        const apr_int32_t *const ucsbufa = bufa.data;
+        const apr_int32_t *const ucsbufb = bufb.data;
+        apr_size_t i;
+
+        printf("(%c)%7s %c %s\n", ut->op,
+               ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb);
+
+        for (i = 0; i < bufa.size || i < bufb.size; ++i)
+        {
+          if (i < bufa.size && i < bufb.size)
+            printf("    U+%04X   U+%04X\n", ucsbufa[i], ucsbufb[i]);
+          else if (i < bufa.size)
+            printf("    U+%04X\n", ucsbufa[i]);
+          else
+            printf("             U+%04X\n", ucsbufb[i]);
+        }
+      }
+      */
+
+      if (('=' == ut->op && 0 != result)
+          || ('<' == ut->op && 0 <= result)
+          || ('>' == ut->op && 0 >= result))
+        {
+          return svn_error_createf
+            (SVN_ERR_TEST_FAILED, NULL,
+             "Ut->Op '%s' %c '%s' but '%s' %c '%s'",
+             ut->taga, ut->op, ut->tagb,
+             ut->taga, (!result ? '=' : (result < 0 ? '<' : '>')), ut->tagb);
+        }
+    }
+
+  return SVN_NO_ERROR;
+}
+
+
+
+static svn_error_t *
+test_utf_pattern_match(apr_pool_t *pool)
+{
+  static const struct glob_test_t {
+    svn_boolean_t sql_like;
+    svn_boolean_t matches;
+    const char *pattern;
+    const char *string;
+    const char *escape;
+  } glob_tests[] = {
+#define LIKE_MATCH TRUE, TRUE
+#define LIKE_FAIL  TRUE, FALSE
+#define GLOB_MATCH FALSE, TRUE
+#define GLOB_FAIL  FALSE, FALSE
+
+    {LIKE_FAIL,  "",     "test", NULL},
+    {GLOB_FAIL,  "",     "test", NULL},
+    {LIKE_FAIL,  "",     "%",    NULL},
+    {GLOB_FAIL,  "",     "*",    NULL},
+    {LIKE_FAIL,  "test", "%",    NULL},
+    {GLOB_FAIL,  "test", "*",    NULL},
+    {LIKE_MATCH, "test", "test", NULL},
+    {GLOB_MATCH, "test", "test", NULL},
+    {LIKE_MATCH, "t\xe1\xb8\x9dst", "te\xcc\xa7\xcc\x86st", NULL},
+    {GLOB_MATCH, "te\xcc\xa7\xcc\x86st", "t\xe1\xb8\x9dst", NULL},
+
+    {LIKE_FAIL,  "test", "test", "\xe1\xb8\x9d"}, /* escape char not ascii */
+    {LIKE_FAIL,  "test", "test", ""},             /* empty escape string */
+
+    {LIKE_MATCH, "te#st",    "test",   "#"},
+    {LIKE_FAIL,  "te#st",    "test",   NULL},
+    {GLOB_MATCH, "te\\st",   "test",   NULL},
+    {LIKE_MATCH, "te##st",   "te#st",  "#"},
+    {LIKE_FAIL,  "te##st",   "te#st",  NULL},
+    {GLOB_MATCH, "te\\\\st", "te\\st", NULL},
+    {GLOB_FAIL,  "te\\\\st", "te\\st", "\\"}, /* escape char with glob */
+    {LIKE_FAIL,  "te#%t",    "te%t",   NULL},
+    {LIKE_MATCH, "te#%t",    "te%t",   "#"},
+    {GLOB_MATCH, "te\\*t",   "te*t",   NULL},
+    {LIKE_FAIL,  "te#%t",    "test",   NULL},
+    {GLOB_FAIL,  "te\\*t",   "test",   NULL},
+    {LIKE_FAIL,  "te#_t",    "te_t",   NULL},
+    {LIKE_MATCH, "te#_t",    "te_t",   "#"},
+    {GLOB_MATCH, "te\\?t",   "te?t",   NULL},
+    {LIKE_FAIL,  "te#_t",    "test",   NULL},
+    {LIKE_FAIL,  "te#_t",    "test",   "#"},
+    {GLOB_FAIL,  "te\\?t",   "test",   NULL},
+
+    {LIKE_MATCH, "_est",     "test",   NULL},
+    {GLOB_MATCH, "?est",     "test",   NULL},
+    {LIKE_MATCH, "te_t",     "test",   NULL},
+    {GLOB_MATCH, "te?t",     "test",   NULL},
+    {LIKE_MATCH, "tes_",     "test",   NULL},
+    {GLOB_MATCH, "tes?",     "test",   NULL},
+    {LIKE_FAIL,  "test_",    "test",   NULL},
+    {GLOB_FAIL,  "test?",    "test",   NULL},
+
+    {LIKE_MATCH, "[s%n]",   "[subversion]", NULL},
+    {GLOB_FAIL,  "[s*n]",   "[subversion]", NULL},
+    {LIKE_MATCH, "#[s%n]",  "[subversion]", "#"},
+    {GLOB_MATCH, "\\[s*n]", "[subversion]", NULL},
+
+    {GLOB_MATCH, ".[\\-\\t]", ".t",           NULL},
+    {GLOB_MATCH, "test*?*[a-z]*", "testgoop", NULL},
+    {GLOB_MATCH, "te[^x]t", "test",           NULL},
+    {GLOB_MATCH, "te[^abc]t", "test",         NULL},
+    {GLOB_MATCH, "te[^x]t", "test",           NULL},
+    {GLOB_MATCH, "te[!x]t", "test",           NULL},
+    {GLOB_FAIL,  "te[^x]t", "text",           NULL},
+    {GLOB_FAIL,  "te[^\\x]t", "text",         NULL},
+    {GLOB_FAIL,  "te[^x\\", "text",           NULL},
+    {GLOB_FAIL,  "te[/]t", "text",            NULL},
+    {GLOB_MATCH, "te[r-t]t", "test",          NULL},
+    {GLOB_MATCH, "te[r-Tz]t", "tezt",         NULL},
+    {GLOB_FAIL,  "te[R-T]t", "tent",          NULL},
+/*  {GLOB_MATCH, "tes[]t]", "test",           NULL}, */
+    {GLOB_MATCH, "tes[t-]", "test",           NULL},
+    {GLOB_MATCH, "tes[t-]]", "test]",         NULL},
+    {GLOB_FAIL,  "tes[t-]]", "test",          NULL},
+    {GLOB_FAIL,  "tes[u-]", "test",           NULL},
+    {GLOB_FAIL,  "tes[t-]", "tes[t-]",        NULL},
+    {GLOB_MATCH, "test[/-/]", "test/",        NULL},
+    {GLOB_MATCH, "test[\\/-/]", "test/",      NULL},
+    {GLOB_MATCH, "test[/-\\/]", "test/",      NULL},
+
+#undef LIKE_MATCH
+#undef LIKE_FAIL
+#undef GLOB_MATCH
+#undef GLOB_FAIL
+
+    {FALSE, FALSE, NULL, NULL, NULL}
+  };
+
+  const struct glob_test_t *gt;
+  svn_membuf_t bufa, bufb, bufc;
+  svn_membuf__create(&bufa, 0, pool);
+  svn_membuf__create(&bufb, 0, pool);
+  svn_membuf__create(&bufc, 0, pool);
+
+  srand(79);
+  for (gt = glob_tests; gt->pattern; ++gt)
+    {
+      const svn_boolean_t implicit_size = (rand() % 13) & 1;
+      const apr_size_t lenptn = (implicit_size
+                                 ? SVN_UTF__UNKNOWN_LENGTH
+                                 : strlen(gt->pattern));
+      const apr_size_t lenstr = (implicit_size
+                                 ? SVN_UTF__UNKNOWN_LENGTH
+                                 : strlen(gt->string));
+      const apr_size_t lenesc = (implicit_size
+                                 ? SVN_UTF__UNKNOWN_LENGTH
+                                 : (gt->escape ? strlen(gt->escape) : 0));
+      svn_boolean_t match;
+      svn_error_t *err;
+
+
+      err = svn_utf__glob(&match,
+                          gt->pattern, lenptn,
+                          gt->string, lenstr,
+                          gt->escape, lenesc,
+                          gt->sql_like, &bufa, &bufb, &bufc);
+
+      if (!gt->sql_like && gt->escape && !err)
+        return svn_error_create
+          (SVN_ERR_TEST_FAILED, err, "Failed to detect GLOB ESCAPE");
+
+      if ((err && gt->matches)
+          || (!err && !match != !gt->matches))
+        {
+          if (gt->sql_like)
+            return svn_error_createf
+              (SVN_ERR_TEST_FAILED, err,
+               "Wrong result: %s'%s' LIKE '%s'%s%s%s%s",
+               (gt->matches ? "NOT " : ""), gt->string, gt->pattern,
+               (gt->escape ? " ESCAPE " : ""), (gt->escape ? "'" : ""),
+               (gt->escape ? gt->escape : ""), (gt->escape ? "'" : ""));
+          else
+            return svn_error_createf
+              (SVN_ERR_TEST_FAILED, err, "Wrong result: %s%s GLOB %s",
+               (gt->matches ? "NOT " : ""), gt->string, gt->pattern);
+        }
+
+      if (err)
+        svn_error_clear(err);
+    }
+
+  return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+test_utf_fuzzy_escape(apr_pool_t *pool)
+{
+
+  /* Accented latin, mixed normalization */
+  static const char mixup[] =
+    "S\xcc\x87\xcc\xa3"         /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  /* As above, but latin lowercase 'o' replaced with Greek 'omicron' */
+  static const char greekish[] =
+    "S\xcc\x87\xcc\xa3"         /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xce\xbf\xcc\x80\xcc\x9b"  /* omicron with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  /* More interesting invalid characters. */
+  static const char invalid[] =
+    "Not Unicode: \xef\xb7\x91;"      /* U+FDD1 */
+    "Out of range: \xf4\x90\x80\x81;" /* U+110001 */
+    "Not UTF-8: \xe6;"
+    "Null byte: \0;";
+
+  const char *fuzzy;
+
+  fuzzy = svn_utf__fuzzy_escape(mixup, strlen(mixup), pool);
+  SVN_TEST_ASSERT(0 == strcmp(fuzzy, "Subversion"));
+
+  fuzzy = svn_utf__fuzzy_escape(greekish, strlen(greekish), pool);
+  SVN_TEST_ASSERT(0 == strcmp(fuzzy, "Subversi{U+03BF}n"));
+
+  fuzzy = svn_utf__fuzzy_escape(invalid, sizeof(invalid) - 1, pool);
+  /*fprintf(stderr, "%s\n", fuzzy);*/
+  SVN_TEST_ASSERT(0 == strcmp(fuzzy,
+                              "Not Unicode: {U?FDD1};"
+                              "Out of range: ?\\F4?\\90?\\80?\\81;"
+                              "Not UTF-8: ?\\E6;"
+                              "Null byte: \\0;"));
+
+  return SVN_NO_ERROR;
+}
+
+static svn_error_t *
+test_utf_is_normalized(apr_pool_t *pool)
+{
+  /* Normalized: NFC */
+  static const char nfc[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe1\xbb\x9d"              /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  /* Normalized: NFD */
+  static const char nfd[] =
+    "S\xcc\xa3\xcc\x87"         /* S with dot above and below */
+    "u\xcc\x8a"                 /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "v\xcc\x83"                 /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "r\xcc\x8f"                 /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "i\xcc\x88\xcc\x81"         /* i with diaeresis and acute */
+    "o\xcc\x9b\xcc\x80"         /* o with grave and hook */
+    "n\xcc\xad";                /* n with circumflex below */
+
+  /* Mixed, denormalized */
+  static const char mixup[] =
+    "S\xcc\x87\xcc\xa3"         /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "b\xcc\xb1"                 /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "e\xcc\xa7\xcc\x86"         /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "s\xcc\x8c"                 /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "o\xcc\x80\xcc\x9b"         /* o with grave and hook */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  /* Invalid UTF-8 */
+  static const char invalid[] =
+    "\xe1\xb9\xa8"              /* S with dot above and below */
+    "\xc5\xaf"                  /* u with ring */
+    "\xe1\xb8\x87"              /* b with macron below */
+    "\xe1\xb9\xbd"              /* v with tilde */
+    "\xe1\xb8\x9d"              /* e with breve and cedilla */
+    "\xc8\x91"                  /* r with double grave */
+    "\xc5\xa1"                  /* s with caron */
+    "\xe1\xb8\xaf"              /* i with diaeresis and acute */
+    "\xe6"                      /* Invalid byte */
+    "\xe1\xb9\x8b";             /* n with circumflex below */
+
+  SVN_ERR_ASSERT(svn_utf__is_normalized(nfc, pool));
+  SVN_ERR_ASSERT(!svn_utf__is_normalized(nfd, pool));
+  SVN_ERR_ASSERT(!svn_utf__is_normalized(mixup, pool));
+  SVN_ERR_ASSERT(!svn_utf__is_normalized(invalid, pool));
+
+  return SVN_NO_ERROR;
+}
+
+
+static svn_error_t *
+test_utf_conversions(apr_pool_t *pool)
+{
+  static const struct cvt_test_t
+  {
+    svn_boolean_t sixteenbit;
+    svn_boolean_t bigendian;
+    const char *source;
+    const char *result;
+  } tests[] = {
+
+#define UTF_32_LE FALSE, FALSE
+#define UTF_32_BE FALSE, TRUE
+#define UTF_16_LE TRUE, FALSE
+#define UTF_16_BE TRUE, TRUE
+
+    /* Normal character conversion */
+    { UTF_32_LE, "t\0\0\0" "e\0\0\0" "s\0\0\0" "t\0\0\0" "\0\0\0\0", "test" },
+    { UTF_32_BE, "\0\0\0t" "\0\0\0e" "\0\0\0s" "\0\0\0t" "\0\0\0\0", "test" },
+    { UTF_16_LE, "t\0" "e\0" "s\0" "t\0" "\0\0", "test" },
+    { UTF_16_BE, "\0t" "\0e" "\0s" "\0t" "\0\0", "test" },
+
+    /* Valid surrogate pairs */
+    { UTF_16_LE, "\x00\xD8" "\x00\xDC" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
+    { UTF_16_LE, "\x34\xD8" "\x1E\xDD" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
+    { UTF_16_LE, "\xFF\xDB" "\xFD\xDF" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+
+    { UTF_16_BE, "\xD8\x00" "\xDC\x00" "\0\0", "\xf0\x90\x80\x80" }, /* U+010000 */
+    { UTF_16_BE, "\xD8\x34" "\xDD\x1E" "\0\0", "\xf0\x9d\x84\x9e" }, /* U+01D11E */
+    { UTF_16_BE, "\xDB\xFF" "\xDF\xFD" "\0\0", "\xf4\x8f\xbf\xbd" }, /* U+10FFFD */
+
+    /* Swapped, single and trailing surrogate pairs */
+    { UTF_16_LE, "*\0" "\x00\xDC" "\x00\xD8" "*\0\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
+    { UTF_16_LE, "*\0" "\x1E\xDD" "*\0\0\0", "*\xed\xb4\x9e*" },
+    { UTF_16_LE, "*\0" "\xFF\xDB" "*\0\0\0", "*\xed\xaf\xbf*" },
+    { UTF_16_LE, "\x1E\xDD" "\0\0", "\xed\xb4\x9e" },
+    { UTF_16_LE, "\xFF\xDB" "\0\0", "\xed\xaf\xbf" },
+
+    { UTF_16_BE, "\0*" "\xDC\x00" "\xD8\x00" "\0*\0\0", "*\xed\xb0\x80" "\xed\xa0\x80*" },
+    { UTF_16_BE, "\0*" "\xDD\x1E" "\0*\0\0", "*\xed\xb4\x9e*" },
+    { UTF_16_BE, "\0*" "\xDB\xFF" "\0*\0\0", "*\xed\xaf\xbf*" },
+    { UTF_16_BE, "\xDD\x1E" "\0\0", "\xed\xb4\x9e" },
+    { UTF_16_BE, "\xDB\xFF" "\0\0", "\xed\xaf\xbf" },
+
+#undef UTF_32_LE
+#undef UTF_32_BE
+#undef UTF_16_LE
+#undef UTF_16_BE
+
+    { 0 }
+  };
+
+  const struct cvt_test_t *tc;
+  const svn_string_t *result;
+  int i;
+
+  for (i = 1, tc = tests; tc->source; ++tc, ++i)
+    {
+      if (tc->sixteenbit)
+        SVN_ERR(svn_utf__utf16_to_utf8(&result, (const void*)tc->source,
+                                       SVN_UTF__UNKNOWN_LENGTH,
+                                       tc->bigendian, pool, pool));
+      else
+        SVN_ERR(svn_utf__utf32_to_utf8(&result, (const void*)tc->source,
+                                       SVN_UTF__UNKNOWN_LENGTH,
+                                       tc->bigendian, pool, pool));
+      SVN_ERR_ASSERT(0 == strcmp(result->data, tc->result));
+    }
+
+  /* Test counted strings with NUL characters */
+  SVN_ERR(svn_utf__utf16_to_utf8(
+              &result, (void*)("x\0" "\0\0" "y\0" "*\0"), 3,
+              FALSE, pool, pool));
+  SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
+
+  SVN_ERR(svn_utf__utf32_to_utf8(
+              &result,
+              (void*)("\0\0\0x" "\0\0\0\0" "\0\0\0y" "\0\0\0*"), 3,
+              TRUE, pool, pool));
+  SVN_ERR_ASSERT(0 == memcmp(result->data, "x\0y", 3));
+
+  return SVN_NO_ERROR;
+}
+
+
 
 /* The test table.  */
 
-struct svn_test_descriptor_t test_funcs[] =
+static int max_threads = 1;
+
+static struct svn_test_descriptor_t test_funcs[] =
   {
     SVN_TEST_NULL,
     SVN_TEST_PASS2(utf_validate,
@@ -308,5 +839,17 @@ struct svn_test_descriptor_t test_funcs[] =
                    "test svn_utf_cstring_to_utf8_ex2"),
     SVN_TEST_PASS2(test_utf_cstring_from_utf8_ex2,
                    "test svn_utf_cstring_from_utf8_ex2"),
+    SVN_TEST_PASS2(test_utf_collated_compare,
+                   "test svn_utf__normcmp"),
+    SVN_TEST_PASS2(test_utf_pattern_match,
+                   "test svn_utf__glob"),
+    SVN_TEST_PASS2(test_utf_fuzzy_escape,
+                   "test svn_utf__fuzzy_escape"),
+    SVN_TEST_PASS2(test_utf_is_normalized,
+                   "test svn_utf__is_normalized"),
+    SVN_TEST_PASS2(test_utf_conversions,
+                   "test svn_utf__utf{16,32}_to_utf8"),
     SVN_TEST_NULL
   };
+
+SVN_TEST_MAIN