diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-07-06 10:47:39 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-07-06 10:47:39 +0400 |
commit | 35d8ac350d97557d06edd1cffe7ecc28fc68930a (patch) | |
tree | 512910172c0cd233303796e1dfb51cad3180e4ae /unittest | |
parent | 7ab7f5327a11c9d167fe4300b3d4abe38b704f48 (diff) | |
download | mariadb-git-35d8ac350d97557d06edd1cffe7ecc28fc68930a.tar.gz |
MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
Diffstat (limited to 'unittest')
-rw-r--r-- | unittest/strings/strings-t.c | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index 6da7a0cc72f..4e9ca820981 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -369,6 +369,49 @@ STRNNCOLL_PARAM strcoll_utf8mb3_common[]= }; +STRNNCOLL_PARAM strcoll_utf8mb4_common[]= +{ + /* Minimum four-byte character: U+10000 == _utf8 0xF0908080 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xC2"), -1}, /* MB4 vs incomplete MB2 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0\x7F"),-1}, /* MB4 vs broken MB3 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0\xC0"),-1}, /* MB4 vs broken MB3 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0"), -1}, /* MB4 vs incomplete MB3 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80"),-1}, /* MB4 vs incomplete MB4 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x7F"),-1},/* MB4 vs broken MB4 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\xC0"),-1},/* MB4 vs broken MB4 */ + + /* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xC2"), -1}, /* MB4 vs incomplete MB2 */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0\x7F"),-1}, /* MB4 vs broken MB3 */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0\xC0"),-1}, /* MB4 vs broken MB3 */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0"), -1}, /* MB4 vs incomplete MB3 */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80"),-1}, /* MB4 vs incomplete MB4 */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80\x7F"),-1},/* MB4 vs broken MB4 */ + {CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80\xC0"),-1},/* MB4 vs broken MB4 */ + + /* Broken MB4 vs incomplete/broken MB3 */ + {CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0"), 1}, /* Broken MB4 vs incomplete MB3 */ + {CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0\x7F"),1}, /* Broken MB4 vs broken MB3 */ + {CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0\xC0"),1}, /* Broken MB4 vs broken MB3 */ + + /* + Broken MB4 vs incomplete MB4: + The three leftmost bytes are compared binary, the fourth byte is compared + to auto-padded space. + */ + {CSTR("\xF0\x90\x80\x1F"), CSTR("\xF0\x90\x80"),-1}, /* Broken MB4 vs incomplete MB4 */ + {CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80"),1}, /* Broken MB4 vs incomplete MB4 */ + + /* Broken MB4 vs broken MB4 */ + {CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80\x7F"),-1},/* Broken MB4 vs broken MB4 */ + {CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80\xC0"),-1},/* Broken MB4 vs broken MB4 */ + + {NULL, 0, NULL, 0, 0} +}; + + static void str2hex(char *dst, size_t dstlen, const char *src, size_t srclen) { @@ -498,6 +541,12 @@ test_strcollsp() failed+= strcollsp(&my_charset_utf8_general_mysql500_ci, strcoll_utf8mb3_common); failed+= strcollsp(&my_charset_utf8_bin, strcoll_utf8mb3_common); #endif +#ifdef HAVE_CHARSET_utf8mb4 + failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common); + failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common); + failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common); + failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common); +#endif return failed; } |