diff options
-rw-r--r-- | mysql-test/include/ctype_utf8mb4.inc | 23 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf16.result | 61 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf16le.result | 61 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8mb4_heap.result | 91 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8mb4_innodb.result | 91 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8mb4_myisam.result | 91 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf16.test | 28 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf16le.test | 28 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 4 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 5 | ||||
-rw-r--r-- | unittest/strings/strings-t.c | 41 |
11 files changed, 385 insertions, 139 deletions
diff --git a/mysql-test/include/ctype_utf8mb4.inc b/mysql-test/include/ctype_utf8mb4.inc index a1b7d144c5d..152316e6158 100644 --- a/mysql-test/include/ctype_utf8mb4.inc +++ b/mysql-test/include/ctype_utf8mb4.inc @@ -1808,16 +1808,21 @@ DROP TABLE t1; --echo # --echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" --echo # -CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a)); -INSERT INTO t1 VALUES (0x61); -INSERT INTO t1 VALUES (0xC280),(0xDFBF); -INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF); -INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF); -SELECT HEX(a) FROM t1 ORDER BY a; -SELECT HEX(a) FROM t1 ORDER BY a DESC; +CREATE TABLE t1 ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (0x61); +INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF); +INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF); +INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +SELECT COUNT(DISTINCT a) FROM t1; ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; -SELECT HEX(a) FROM t1 ORDER BY a; -SELECT HEX(a) FROM t1 ORDER BY a DESC; +SELECT id,HEX(a) FROM t1 ORDER BY a; +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +SELECT COUNT(DISTINCT a) FROM t1; DROP TABLE t1; --echo # diff --git a/mysql-test/r/ctype_utf16.result b/mysql-test/r/ctype_utf16.result index 2d91ce3dd6f..4d8f2d38a03 100644 --- a/mysql-test/r/ctype_utf16.result +++ b/mysql-test/r/ctype_utf16.result @@ -2127,3 +2127,64 @@ DEALLOCATE PREPARE stmt; # # End of 10.0 tests # +# +# Start of 10.1 tests +# +# +# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" +# +CREATE TABLE t1 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a VARCHAR(10) CHARACTER SET utf16, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (_utf8mb4 0x61); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +id HEX(a) +1 0061 +2 0080 +3 07FF +4 0800 +6 D800DC00 +7 DBFFDFFF +5 FFFF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +5 FFFF +7 DBFFDFFF +6 D800DC00 +4 0800 +3 07FF +2 0080 +1 0061 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +6 +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16 COLLATE utf16_bin; +SELECT id,HEX(a) FROM t1 ORDER BY a; +id HEX(a) +1 0061 +2 0080 +3 07FF +4 0800 +5 FFFF +6 D800DC00 +7 DBFFDFFF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +7 DBFFDFFF +6 D800DC00 +5 FFFF +4 0800 +3 07FF +2 0080 +1 0061 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +7 +DROP TABLE t1; +# +# End of 10.1 tests +# diff --git a/mysql-test/r/ctype_utf16le.result b/mysql-test/r/ctype_utf16le.result index 8098b0d1666..c980743ce94 100644 --- a/mysql-test/r/ctype_utf16le.result +++ b/mysql-test/r/ctype_utf16le.result @@ -2319,3 +2319,64 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF # # End of 5.6 tests # +# +# Start of 10.1 tests +# +# +# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" +# +CREATE TABLE t1 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a VARCHAR(10) CHARACTER SET utf16le, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (_utf8mb4 0x61); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +id HEX(a) +1 6100 +2 8000 +3 FF07 +4 0008 +6 00D800DC +7 FFDBFFDF +5 FFFF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +5 FFFF +7 FFDBFFDF +6 00D800DC +4 0008 +3 FF07 +2 8000 +1 6100 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +6 +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16le COLLATE utf16le_bin; +SELECT id,HEX(a) FROM t1 ORDER BY a; +id HEX(a) +1 6100 +2 8000 +3 FF07 +4 0008 +5 FFFF +6 00D800DC +7 FFDBFFDF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +7 FFDBFFDF +6 00D800DC +5 FFFF +4 0008 +3 FF07 +2 8000 +1 6100 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +7 +DROP TABLE t1; +# +# End of 10.1 tests +# diff --git a/mysql-test/r/ctype_utf8mb4_heap.result b/mysql-test/r/ctype_utf8mb4_heap.result index 78cfe1da597..f60bf124a9f 100644 --- a/mysql-test/r/ctype_utf8mb4_heap.result +++ b/mysql-test/r/ctype_utf8mb4_heap.result @@ -2500,48 +2500,57 @@ DROP TABLE t1; # # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" # -CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a)); -INSERT INTO t1 VALUES (0x61); -INSERT INTO t1 VALUES (0xC280),(0xDFBF); -INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF); -INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF); -SELECT HEX(a) FROM t1 ORDER BY a; -HEX(a) -61 -C280 -DFBF -E0A080 -EFBFBF -F0908080 -F48FBFBF -SELECT HEX(a) FROM t1 ORDER BY a DESC; -HEX(a) -F48FBFBF -F0908080 -EFBFBF -E0A080 -DFBF -C280 -61 +CREATE TABLE t1 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (0x61); +INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF); +INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF); +INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +id HEX(a) +1 61 +2 C280 +3 DFBF +4 E0A080 +6 F0908080 +7 F48FBFBF +5 EFBFBF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +5 EFBFBF +7 F48FBFBF +6 F0908080 +4 E0A080 +3 DFBF +2 C280 +1 61 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +6 ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; -SELECT HEX(a) FROM t1 ORDER BY a; -HEX(a) -61 -C280 -DFBF -E0A080 -EFBFBF -F0908080 -F48FBFBF -SELECT HEX(a) FROM t1 ORDER BY a DESC; -HEX(a) -F48FBFBF -F0908080 -EFBFBF -E0A080 -DFBF -C280 -61 +SELECT id,HEX(a) FROM t1 ORDER BY a; +id HEX(a) +1 61 +2 C280 +3 DFBF +4 E0A080 +5 EFBFBF +6 F0908080 +7 F48FBFBF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +7 F48FBFBF +6 F0908080 +5 EFBFBF +4 E0A080 +3 DFBF +2 C280 +1 61 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +7 DROP TABLE t1; # # ctype_utf8mb4.inc: End of 10.1 tests diff --git a/mysql-test/r/ctype_utf8mb4_innodb.result b/mysql-test/r/ctype_utf8mb4_innodb.result index 722c03bdff9..f904ff6f99e 100644 --- a/mysql-test/r/ctype_utf8mb4_innodb.result +++ b/mysql-test/r/ctype_utf8mb4_innodb.result @@ -2647,48 +2647,57 @@ DROP TABLE t1; # # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" # -CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a)); -INSERT INTO t1 VALUES (0x61); -INSERT INTO t1 VALUES (0xC280),(0xDFBF); -INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF); -INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF); -SELECT HEX(a) FROM t1 ORDER BY a; -HEX(a) -61 -C280 -DFBF -E0A080 -EFBFBF -F0908080 -F48FBFBF -SELECT HEX(a) FROM t1 ORDER BY a DESC; -HEX(a) -F48FBFBF -F0908080 -EFBFBF -E0A080 -DFBF -C280 -61 +CREATE TABLE t1 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (0x61); +INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF); +INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF); +INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +id HEX(a) +1 61 +2 C280 +3 DFBF +4 E0A080 +6 F0908080 +7 F48FBFBF +5 EFBFBF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +5 EFBFBF +7 F48FBFBF +6 F0908080 +4 E0A080 +3 DFBF +2 C280 +1 61 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +6 ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; -SELECT HEX(a) FROM t1 ORDER BY a; -HEX(a) -61 -C280 -DFBF -E0A080 -EFBFBF -F0908080 -F48FBFBF -SELECT HEX(a) FROM t1 ORDER BY a DESC; -HEX(a) -F48FBFBF -F0908080 -EFBFBF -E0A080 -DFBF -C280 -61 +SELECT id,HEX(a) FROM t1 ORDER BY a; +id HEX(a) +1 61 +2 C280 +3 DFBF +4 E0A080 +5 EFBFBF +6 F0908080 +7 F48FBFBF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +7 F48FBFBF +6 F0908080 +5 EFBFBF +4 E0A080 +3 DFBF +2 C280 +1 61 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +7 DROP TABLE t1; # # ctype_utf8mb4.inc: End of 10.1 tests diff --git a/mysql-test/r/ctype_utf8mb4_myisam.result b/mysql-test/r/ctype_utf8mb4_myisam.result index f391f3fbba1..e44421d1410 100644 --- a/mysql-test/r/ctype_utf8mb4_myisam.result +++ b/mysql-test/r/ctype_utf8mb4_myisam.result @@ -2647,48 +2647,57 @@ DROP TABLE t1; # # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" # -CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a)); -INSERT INTO t1 VALUES (0x61); -INSERT INTO t1 VALUES (0xC280),(0xDFBF); -INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF); -INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF); -SELECT HEX(a) FROM t1 ORDER BY a; -HEX(a) -61 -C280 -DFBF -E0A080 -EFBFBF -F0908080 -F48FBFBF -SELECT HEX(a) FROM t1 ORDER BY a DESC; -HEX(a) -F48FBFBF -F0908080 -EFBFBF -E0A080 -DFBF -C280 -61 +CREATE TABLE t1 ( +id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, +a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (0x61); +INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF); +INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF); +INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +id HEX(a) +1 61 +2 C280 +3 DFBF +4 E0A080 +6 F0908080 +7 F48FBFBF +5 EFBFBF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +5 EFBFBF +7 F48FBFBF +6 F0908080 +4 E0A080 +3 DFBF +2 C280 +1 61 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +6 ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; -SELECT HEX(a) FROM t1 ORDER BY a; -HEX(a) -61 -C280 -DFBF -E0A080 -EFBFBF -F0908080 -F48FBFBF -SELECT HEX(a) FROM t1 ORDER BY a DESC; -HEX(a) -F48FBFBF -F0908080 -EFBFBF -E0A080 -DFBF -C280 -61 +SELECT id,HEX(a) FROM t1 ORDER BY a; +id HEX(a) +1 61 +2 C280 +3 DFBF +4 E0A080 +5 EFBFBF +6 F0908080 +7 F48FBFBF +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +id HEX(a) +7 F48FBFBF +6 F0908080 +5 EFBFBF +4 E0A080 +3 DFBF +2 C280 +1 61 +SELECT COUNT(DISTINCT a) FROM t1; +COUNT(DISTINCT a) +7 DROP TABLE t1; # # ctype_utf8mb4.inc: End of 10.1 tests diff --git a/mysql-test/t/ctype_utf16.test b/mysql-test/t/ctype_utf16.test index e4305ed9879..8ea6ea67f1f 100644 --- a/mysql-test/t/ctype_utf16.test +++ b/mysql-test/t/ctype_utf16.test @@ -860,3 +860,31 @@ DEALLOCATE PREPARE stmt; --echo # --echo # End of 10.0 tests --echo # + +--echo # +--echo # Start of 10.1 tests +--echo # + +--echo # +--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" +--echo # +CREATE TABLE t1 ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + a VARCHAR(10) CHARACTER SET utf16, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (_utf8mb4 0x61); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +SELECT COUNT(DISTINCT a) FROM t1; +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16 COLLATE utf16_bin; +SELECT id,HEX(a) FROM t1 ORDER BY a; +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +SELECT COUNT(DISTINCT a) FROM t1; +DROP TABLE t1; + +--echo # +--echo # End of 10.1 tests +--echo # diff --git a/mysql-test/t/ctype_utf16le.test b/mysql-test/t/ctype_utf16le.test index a8326900847..f5998fec18c 100644 --- a/mysql-test/t/ctype_utf16le.test +++ b/mysql-test/t/ctype_utf16le.test @@ -744,3 +744,31 @@ SET NAMES utf8, collation_connection=utf16le_bin; --echo # --echo # End of 5.6 tests --echo # + +--echo # +--echo # Start of 10.1 tests +--echo # + +--echo # +--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character" +--echo # +CREATE TABLE t1 ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + a VARCHAR(10) CHARACTER SET utf16le, KEY(a,id) +); +INSERT INTO t1 (a) VALUES (_utf8mb4 0x61); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF); +INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF); +SELECT id,HEX(a) FROM t1 ORDER BY a,id; +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +SELECT COUNT(DISTINCT a) FROM t1; +ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16le COLLATE utf16le_bin; +SELECT id,HEX(a) FROM t1 ORDER BY a; +SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC; +SELECT COUNT(DISTINCT a) FROM t1; +DROP TABLE t1; + +--echo # +--echo # End of 10.1 tests +--echo # diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 90aa1a93bed..41f6a90506a 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1216,7 +1216,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1) #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) -#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER #include "strcoll.ic" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin @@ -1665,7 +1665,7 @@ struct charset_info_st my_charset_utf16_bin= #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci #define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) -#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER #include "strcoll.ic" #define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 2fc53e84b5c..259928130b9 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -7775,10 +7775,9 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs, #define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1) #define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2) /* - There is no mapping between code point and weight for non-BMP characters - in utf8mb4_general_ci. Just using code point as weight. + All non-BMP characters have the same weight. */ -#define WEIGHT_MB4(b0,b1,b2,b3) UTF8MB4_CODE(b0,b1,b2,b3) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER #include "strcoll.ic" diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index 51537e624f9..65a7f1e1155 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -412,6 +412,18 @@ static STRNNCOLL_PARAM strcoll_utf8mb4_common[]= }; +static STRNNCOLL_PARAM strcoll_utf8mb4_general_ci[]= +{ + /* All non-BMP characters are equal in utf8mb4_general_ci */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x81"),0},/* Non-BMB MB4 vs non-BMP MB4 */ + {CSTR("\xF0\x90\x80\x80"), CSTR("\xF4\x8F\xBF\xBF"),0},/* Non-BMB MB4 vs non-BMP MB4 */ + {CSTR("\x00"), CSTR("\xF0\x90\x80\x80"),-1},/* U+0000 vs non-BMP MB4 */ + {CSTR("\x00"), CSTR("\xF0\x90\x80\x81"),-1},/* U+0000 vs non-BMP MB4 */ + {CSTR("\x00"), CSTR("\xF4\x8F\xBF\xBF"),-1},/* U+0000 vs non-BMP MB4 */ + {NULL, 0, NULL, 0, 0} +}; + + static STRNNCOLL_PARAM strcoll_ucs2_common[]= { {CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */ @@ -474,13 +486,24 @@ static STRNNCOLL_PARAM strcoll_utf16_common[]= {CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */ /* Broken MB4 vs broken MB4 */ - {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* Broken MB4 vs broken MB4 */ + {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x01"),-1},/* Broken MB4 vs broken MB4 */ {CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */ {NULL, 0, NULL, 0, 0} }; +static STRNNCOLL_PARAM strcoll_utf16_general_ci[]= +{ + /* All non-BMP characters are compared as equal */ + {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"), 0},/* Non-BMP MB4 vs non-BMP MB4 */ + {CSTR("\xD8\x00\xDC\x00"), CSTR("\xDB\xFF\xDF\xFF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */ + {CSTR("\x00\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* U+0000 vs non-BMP MB4 */ + {CSTR("\x00\x00"), CSTR("\xDB\xFF\xDF\xFF"),-1},/* U+0000 vs non-BMP MB4 */ + {NULL, 0, NULL, 0, 0} +}; + + static STRNNCOLL_PARAM strcoll_utf16le_common[]= { /* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */ @@ -500,13 +523,24 @@ static STRNNCOLL_PARAM strcoll_utf16le_common[]= {CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */ /* Broken MB4 vs broken MB4 */ - {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"),-1},/* Broken MB4 vs broken MB4 */ + {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDB"),-1},/* Broken MB4 vs broken MB4 */ {CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */ {NULL, 0, NULL, 0, 0} }; +static STRNNCOLL_PARAM strcoll_utf16le_general_ci[]= +{ + /* All non-BMP characters are compared as equal */ + {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"), 0},/* Non-BMP MB4 vs non-BMP MB4 */ + {CSTR("\x00\xD8\x00\xDC"), CSTR("\xFF\xDB\xFF\xDF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */ + {CSTR("\x00\x00"), CSTR("\x00\xD8\x01\xDC"), -1},/* U+0000 vs non-BMP MB4 */ + {CSTR("\x00\x00"), CSTR("\xFF\xDB\xFF\xDF"), -1},/* U+0000 vs non-BMP MB4 */ + {NULL, 0, NULL, 0, 0} +}; + + static void str2hex(char *dst, size_t dstlen, const char *src, size_t srclen) { @@ -641,6 +675,7 @@ test_strcollsp() failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common); failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space); failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common); + failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_general_ci); failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common); failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space); failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common); @@ -648,6 +683,7 @@ test_strcollsp() failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common); failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space); failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common); + failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_general_ci); failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common); failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space); failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common); @@ -661,6 +697,7 @@ test_strcollsp() failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common); failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common); failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common); + failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_general_ci); failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common); #endif return failed; |