summaryrefslogtreecommitdiff
path: root/unittest
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2015-07-06 15:50:56 +0400
committerAlexander Barkov <bar@mariadb.org>2015-07-06 15:50:56 +0400
commitb2e324a21fbf481bccd15f9ee4bab819afefbed6 (patch)
tree2b005ed9e20ed4ed1abb22314046e144f1c805d6 /unittest
parent35d8ac350d97557d06edd1cffe7ecc28fc68930a (diff)
downloadmariadb-git-b2e324a21fbf481bccd15f9ee4bab819afefbed6.tar.gz
MDEV-8416 ucs2: compare broken bytes as "greater than any non-broken character"
MDEV-8418 utf16: compare broken bytes as "greater than any non-broken character"
Diffstat (limited to 'unittest')
-rw-r--r--unittest/strings/strings-t.c132
1 files changed, 124 insertions, 8 deletions
diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c
index 4e9ca820981..51537e624f9 100644
--- a/unittest/strings/strings-t.c
+++ b/unittest/strings/strings-t.c
@@ -149,7 +149,7 @@ typedef struct
A1A1 - MB2 or 8BIT+8BIT
E0E0 - MB2
*/
-STRNNCOLL_PARAM strcoll_mb2_common[]=
+static STRNNCOLL_PARAM strcoll_mb2_common[]=
{
/* Compare two good sequences */
{CSTR(""), CSTR(""), 0},
@@ -210,7 +210,7 @@ STRNNCOLL_PARAM strcoll_mb2_common[]=
/*
For character sets that have good mb2 characters A1A1 and F9FE
*/
-STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
+static STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
{
/* Compare two good characters */
{CSTR(""), CSTR("\xF9\xFE"), -1},
@@ -246,7 +246,7 @@ STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
A1A1 - a good mb2 character
F9FE - a bad sequence
*/
-STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
+static STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
{
/* Compare a good character to an illegal or an incomplete sequence */
{CSTR(""), CSTR("\xF9\xFE"), -1},
@@ -283,7 +283,7 @@ STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
F9 - ILSEQ or H2
F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
*/
-STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
+static STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
{
/* Compare two good characters */
{CSTR(""), CSTR("\xA1"), -1},
@@ -323,7 +323,7 @@ STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
and sort in this order:
8181 < A1 < E0E0
*/
-STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
+static STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
{
{CSTR("\x81\x81"), CSTR("\xA1"), -1},
{CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
@@ -336,7 +336,7 @@ STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
/*
A shared test for eucjpms and ujis.
*/
-STRNNCOLL_PARAM strcoll_ujis[]=
+static STRNNCOLL_PARAM strcoll_ujis[]=
{
{CSTR("\x8E\xA1"), CSTR("\x8E"), -1}, /* Good MB2 vs incomplete MB2 */
{CSTR("\x8E\xA1"), CSTR("\x8F\xA1"), -1}, /* Good MB2 vs incomplete MB3 */
@@ -347,7 +347,7 @@ STRNNCOLL_PARAM strcoll_ujis[]=
};
-STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
+static STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
{
{CSTR("\xC0"), CSTR("\xC1"), -1}, /* Unused byte vs unused byte */
{CSTR("\xC0"), CSTR("\xFF"), -1}, /* Unused byte vs unused byte */
@@ -369,7 +369,7 @@ STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
};
-STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
+static STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
{
/* Minimum four-byte character: U+10000 == _utf8 0xF0908080 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */
@@ -412,6 +412,101 @@ STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
};
+static STRNNCOLL_PARAM strcoll_ucs2_common[]=
+{
+ {CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */
+ {CSTR("\xC0"), CSTR("\xFF"), -1}, /* Incomlete MB2 vs incomplete MB2 */
+ {CSTR("\xC2\xA1"), CSTR("\xC0"), -1}, /* MB2 vs incomplete MB2 */
+ {CSTR("\xC2\xA1"), CSTR("\xC2"), -1}, /* MB2 vs incomplete MB2 */
+ {CSTR("\xC2\xA0"), CSTR("\xC2\xA1"), -1}, /* MB2 vs MB2 */
+ {CSTR("\xC2\xA1"), CSTR("\xC2\xA2"), -1}, /* MB2 vs MB2 */
+
+ {CSTR("\xFF\xFF"), CSTR("\x00"),-1}, /* MB2 vs incomplete */
+ {CSTR("\xFF\xFF\xFF\xFF"), CSTR("\x00"),-1}, /* MB2+MB2 vs incomplete */
+ {CSTR("\xFF\xFF\xFF\xFF"), CSTR("\x00\x00\x00"), 1},/* MB2+MB2 vs MB2+incomplete */
+
+ {NULL, 0, NULL, 0, 0}
+};
+
+
+/* Tests that involve comparison to SPACE (explicit, or padded) */
+static STRNNCOLL_PARAM strcoll_ucs2_space[]=
+{
+ {CSTR("\x00\x1F"), CSTR("\x00\x20"), -1}, /* MB2 vs MB2 */
+ {CSTR("\x00\x20"), CSTR("\x00\x21"), -1}, /* MB2 vs MB2 */
+ {CSTR("\x00\x1F"), CSTR(""), -1}, /* MB2 vs empty */
+ {CSTR("\x00\x20"), CSTR(""), 0}, /* MB2 vs empty */
+ {CSTR("\x00\x21"), CSTR(""), 1}, /* MB2 vs empty */
+
+ {NULL, 0, NULL, 0, 0}
+};
+
+
+/* Tests that involve comparison to SPACE (explicit, or padded) */
+static STRNNCOLL_PARAM strcoll_utf16le_space[]=
+{
+ {CSTR("\x1F\x00"), CSTR("\x20\x00"), -1}, /* MB2 vs MB2 */
+ {CSTR("\x20\x00"), CSTR("\x21\x00"), -1}, /* MB2 vs MB2 */
+ {CSTR("\x1F\x00"), CSTR(""), -1}, /* MB2 vs empty */
+ {CSTR("\x20\x00"), CSTR(""), 0}, /* MB2 vs empty */
+ {CSTR("\x21\x00"), CSTR(""), 1}, /* MB2 vs empty */
+
+ {NULL, 0, NULL, 0, 0}
+};
+
+
+static STRNNCOLL_PARAM strcoll_utf16_common[]=
+{
+ /* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x00"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xE0\x00"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xDC\x00"), -1},/* MB4 vs broken MB2 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC"), -1},/* MB4 vs incomplete MB4 */
+
+ /* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
+ {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xD8\x00\xDB\x00"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xD8\x00\xE0\x00"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\x00"), -1},/* MB4 vs broken MB2 */
+ {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */
+
+ /* Broken MB4 vs broken MB4 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* Broken MB4 vs broken MB4 */
+ {CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */
+
+ {NULL, 0, NULL, 0, 0}
+};
+
+
+static STRNNCOLL_PARAM strcoll_utf16le_common[]=
+{
+ /* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00\xDB"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00\xD0"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xDC"), -1},/* MB4 vs broken MB2 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00"), -1},/* MB4 vs incomplete MB4 */
+
+ /* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
+ {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
+ {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xD8\x00\xDB"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xD8\x00\xE0"),-1},/* MB4 vs broken MB4 */
+ {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xDC"), -1},/* MB4 vs broken MB2 */
+ {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */
+
+ /* Broken MB4 vs broken MB4 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"),-1},/* Broken MB4 vs broken MB4 */
+ {CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */
+
+ {NULL, 0, NULL, 0, 0}
+};
+
+
static void
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
{
@@ -528,6 +623,12 @@ test_strcollsp()
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0);
#endif
+#ifdef HAVE_CHARSET_ucs2
+ failed+= strcollsp(&my_charset_ucs2_general_ci, strcoll_ucs2_common);
+ failed+= strcollsp(&my_charset_ucs2_general_ci, strcoll_ucs2_space);
+ failed+= strcollsp(&my_charset_ucs2_bin, strcoll_ucs2_common);
+ failed+= strcollsp(&my_charset_ucs2_bin, strcoll_ucs2_space);
+#endif
#ifdef HAVE_CHARSET_ujis
failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_ujis_bin, strcoll_mb2_common);
@@ -536,6 +637,21 @@ test_strcollsp()
failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_ujis);
failed+= strcollsp(&my_charset_ujis_bin, strcoll_ujis);
#endif
+#ifdef HAVE_CHARSET_utf16
+ failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common);
+ failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space);
+ failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common);
+ failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common);
+ failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space);
+ failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common);
+
+ failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common);
+ failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space);
+ failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common);
+ failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common);
+ failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space);
+ failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common);
+#endif
#ifdef HAVE_CHARSET_utf8
failed+= strcollsp(&my_charset_utf8_general_ci, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8_general_mysql500_ci, strcoll_utf8mb3_common);