summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/include/ctype_utf8mb4.inc23
-rw-r--r--mysql-test/r/ctype_utf16.result61
-rw-r--r--mysql-test/r/ctype_utf16le.result61
-rw-r--r--mysql-test/r/ctype_utf8mb4_heap.result91
-rw-r--r--mysql-test/r/ctype_utf8mb4_innodb.result91
-rw-r--r--mysql-test/r/ctype_utf8mb4_myisam.result91
-rw-r--r--mysql-test/t/ctype_utf16.test28
-rw-r--r--mysql-test/t/ctype_utf16le.test28
-rw-r--r--strings/ctype-ucs2.c4
-rw-r--r--strings/ctype-utf8.c5
-rw-r--r--unittest/strings/strings-t.c41
11 files changed, 385 insertions, 139 deletions
diff --git a/mysql-test/include/ctype_utf8mb4.inc b/mysql-test/include/ctype_utf8mb4.inc
index a1b7d144c5d..152316e6158 100644
--- a/mysql-test/include/ctype_utf8mb4.inc
+++ b/mysql-test/include/ctype_utf8mb4.inc
@@ -1808,16 +1808,21 @@ DROP TABLE t1;
--echo #
--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
--echo #
-CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
-INSERT INTO t1 VALUES (0x61);
-INSERT INTO t1 VALUES (0xC280),(0xDFBF);
-INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
-INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
-SELECT HEX(a) FROM t1 ORDER BY a;
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
+CREATE TABLE t1 (
+ id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (0x61);
+INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
+INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
+INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+SELECT COUNT(DISTINCT a) FROM t1;
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
-SELECT HEX(a) FROM t1 ORDER BY a;
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+SELECT COUNT(DISTINCT a) FROM t1;
DROP TABLE t1;
--echo #
diff --git a/mysql-test/r/ctype_utf16.result b/mysql-test/r/ctype_utf16.result
index 2d91ce3dd6f..4d8f2d38a03 100644
--- a/mysql-test/r/ctype_utf16.result
+++ b/mysql-test/r/ctype_utf16.result
@@ -2127,3 +2127,64 @@ DEALLOCATE PREPARE stmt;
#
# End of 10.0 tests
#
+#
+# Start of 10.1 tests
+#
+#
+# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
+#
+CREATE TABLE t1 (
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+a VARCHAR(10) CHARACTER SET utf16, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+id HEX(a)
+1 0061
+2 0080
+3 07FF
+4 0800
+6 D800DC00
+7 DBFFDFFF
+5 FFFF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+5 FFFF
+7 DBFFDFFF
+6 D800DC00
+4 0800
+3 07FF
+2 0080
+1 0061
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+6
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16 COLLATE utf16_bin;
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+id HEX(a)
+1 0061
+2 0080
+3 07FF
+4 0800
+5 FFFF
+6 D800DC00
+7 DBFFDFFF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+7 DBFFDFFF
+6 D800DC00
+5 FFFF
+4 0800
+3 07FF
+2 0080
+1 0061
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+7
+DROP TABLE t1;
+#
+# End of 10.1 tests
+#
diff --git a/mysql-test/r/ctype_utf16le.result b/mysql-test/r/ctype_utf16le.result
index 8098b0d1666..c980743ce94 100644
--- a/mysql-test/r/ctype_utf16le.result
+++ b/mysql-test/r/ctype_utf16le.result
@@ -2319,3 +2319,64 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
#
# End of 5.6 tests
#
+#
+# Start of 10.1 tests
+#
+#
+# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
+#
+CREATE TABLE t1 (
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+a VARCHAR(10) CHARACTER SET utf16le, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+id HEX(a)
+1 6100
+2 8000
+3 FF07
+4 0008
+6 00D800DC
+7 FFDBFFDF
+5 FFFF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+5 FFFF
+7 FFDBFFDF
+6 00D800DC
+4 0008
+3 FF07
+2 8000
+1 6100
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+6
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16le COLLATE utf16le_bin;
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+id HEX(a)
+1 6100
+2 8000
+3 FF07
+4 0008
+5 FFFF
+6 00D800DC
+7 FFDBFFDF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+7 FFDBFFDF
+6 00D800DC
+5 FFFF
+4 0008
+3 FF07
+2 8000
+1 6100
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+7
+DROP TABLE t1;
+#
+# End of 10.1 tests
+#
diff --git a/mysql-test/r/ctype_utf8mb4_heap.result b/mysql-test/r/ctype_utf8mb4_heap.result
index 78cfe1da597..f60bf124a9f 100644
--- a/mysql-test/r/ctype_utf8mb4_heap.result
+++ b/mysql-test/r/ctype_utf8mb4_heap.result
@@ -2500,48 +2500,57 @@ DROP TABLE t1;
#
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
#
-CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
-INSERT INTO t1 VALUES (0x61);
-INSERT INTO t1 VALUES (0xC280),(0xDFBF);
-INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
-INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
-SELECT HEX(a) FROM t1 ORDER BY a;
-HEX(a)
-61
-C280
-DFBF
-E0A080
-EFBFBF
-F0908080
-F48FBFBF
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
-HEX(a)
-F48FBFBF
-F0908080
-EFBFBF
-E0A080
-DFBF
-C280
-61
+CREATE TABLE t1 (
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (0x61);
+INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
+INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
+INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+id HEX(a)
+1 61
+2 C280
+3 DFBF
+4 E0A080
+6 F0908080
+7 F48FBFBF
+5 EFBFBF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+5 EFBFBF
+7 F48FBFBF
+6 F0908080
+4 E0A080
+3 DFBF
+2 C280
+1 61
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+6
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
-SELECT HEX(a) FROM t1 ORDER BY a;
-HEX(a)
-61
-C280
-DFBF
-E0A080
-EFBFBF
-F0908080
-F48FBFBF
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
-HEX(a)
-F48FBFBF
-F0908080
-EFBFBF
-E0A080
-DFBF
-C280
-61
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+id HEX(a)
+1 61
+2 C280
+3 DFBF
+4 E0A080
+5 EFBFBF
+6 F0908080
+7 F48FBFBF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+7 F48FBFBF
+6 F0908080
+5 EFBFBF
+4 E0A080
+3 DFBF
+2 C280
+1 61
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+7
DROP TABLE t1;
#
# ctype_utf8mb4.inc: End of 10.1 tests
diff --git a/mysql-test/r/ctype_utf8mb4_innodb.result b/mysql-test/r/ctype_utf8mb4_innodb.result
index 722c03bdff9..f904ff6f99e 100644
--- a/mysql-test/r/ctype_utf8mb4_innodb.result
+++ b/mysql-test/r/ctype_utf8mb4_innodb.result
@@ -2647,48 +2647,57 @@ DROP TABLE t1;
#
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
#
-CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
-INSERT INTO t1 VALUES (0x61);
-INSERT INTO t1 VALUES (0xC280),(0xDFBF);
-INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
-INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
-SELECT HEX(a) FROM t1 ORDER BY a;
-HEX(a)
-61
-C280
-DFBF
-E0A080
-EFBFBF
-F0908080
-F48FBFBF
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
-HEX(a)
-F48FBFBF
-F0908080
-EFBFBF
-E0A080
-DFBF
-C280
-61
+CREATE TABLE t1 (
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (0x61);
+INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
+INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
+INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+id HEX(a)
+1 61
+2 C280
+3 DFBF
+4 E0A080
+6 F0908080
+7 F48FBFBF
+5 EFBFBF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+5 EFBFBF
+7 F48FBFBF
+6 F0908080
+4 E0A080
+3 DFBF
+2 C280
+1 61
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+6
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
-SELECT HEX(a) FROM t1 ORDER BY a;
-HEX(a)
-61
-C280
-DFBF
-E0A080
-EFBFBF
-F0908080
-F48FBFBF
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
-HEX(a)
-F48FBFBF
-F0908080
-EFBFBF
-E0A080
-DFBF
-C280
-61
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+id HEX(a)
+1 61
+2 C280
+3 DFBF
+4 E0A080
+5 EFBFBF
+6 F0908080
+7 F48FBFBF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+7 F48FBFBF
+6 F0908080
+5 EFBFBF
+4 E0A080
+3 DFBF
+2 C280
+1 61
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+7
DROP TABLE t1;
#
# ctype_utf8mb4.inc: End of 10.1 tests
diff --git a/mysql-test/r/ctype_utf8mb4_myisam.result b/mysql-test/r/ctype_utf8mb4_myisam.result
index f391f3fbba1..e44421d1410 100644
--- a/mysql-test/r/ctype_utf8mb4_myisam.result
+++ b/mysql-test/r/ctype_utf8mb4_myisam.result
@@ -2647,48 +2647,57 @@ DROP TABLE t1;
#
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
#
-CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
-INSERT INTO t1 VALUES (0x61);
-INSERT INTO t1 VALUES (0xC280),(0xDFBF);
-INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
-INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
-SELECT HEX(a) FROM t1 ORDER BY a;
-HEX(a)
-61
-C280
-DFBF
-E0A080
-EFBFBF
-F0908080
-F48FBFBF
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
-HEX(a)
-F48FBFBF
-F0908080
-EFBFBF
-E0A080
-DFBF
-C280
-61
+CREATE TABLE t1 (
+id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (0x61);
+INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
+INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
+INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+id HEX(a)
+1 61
+2 C280
+3 DFBF
+4 E0A080
+6 F0908080
+7 F48FBFBF
+5 EFBFBF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+5 EFBFBF
+7 F48FBFBF
+6 F0908080
+4 E0A080
+3 DFBF
+2 C280
+1 61
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+6
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
-SELECT HEX(a) FROM t1 ORDER BY a;
-HEX(a)
-61
-C280
-DFBF
-E0A080
-EFBFBF
-F0908080
-F48FBFBF
-SELECT HEX(a) FROM t1 ORDER BY a DESC;
-HEX(a)
-F48FBFBF
-F0908080
-EFBFBF
-E0A080
-DFBF
-C280
-61
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+id HEX(a)
+1 61
+2 C280
+3 DFBF
+4 E0A080
+5 EFBFBF
+6 F0908080
+7 F48FBFBF
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+id HEX(a)
+7 F48FBFBF
+6 F0908080
+5 EFBFBF
+4 E0A080
+3 DFBF
+2 C280
+1 61
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+7
DROP TABLE t1;
#
# ctype_utf8mb4.inc: End of 10.1 tests
diff --git a/mysql-test/t/ctype_utf16.test b/mysql-test/t/ctype_utf16.test
index e4305ed9879..8ea6ea67f1f 100644
--- a/mysql-test/t/ctype_utf16.test
+++ b/mysql-test/t/ctype_utf16.test
@@ -860,3 +860,31 @@ DEALLOCATE PREPARE stmt;
--echo #
--echo # End of 10.0 tests
--echo #
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+--echo #
+--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
+--echo #
+CREATE TABLE t1 (
+ id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ a VARCHAR(10) CHARACTER SET utf16, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+SELECT COUNT(DISTINCT a) FROM t1;
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16 COLLATE utf16_bin;
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+SELECT COUNT(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # End of 10.1 tests
+--echo #
diff --git a/mysql-test/t/ctype_utf16le.test b/mysql-test/t/ctype_utf16le.test
index a8326900847..f5998fec18c 100644
--- a/mysql-test/t/ctype_utf16le.test
+++ b/mysql-test/t/ctype_utf16le.test
@@ -744,3 +744,31 @@ SET NAMES utf8, collation_connection=utf16le_bin;
--echo #
--echo # End of 5.6 tests
--echo #
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+--echo #
+--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
+--echo #
+CREATE TABLE t1 (
+ id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ a VARCHAR(10) CHARACTER SET utf16le, KEY(a,id)
+);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
+INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
+SELECT id,HEX(a) FROM t1 ORDER BY a,id;
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+SELECT COUNT(DISTINCT a) FROM t1;
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16le COLLATE utf16le_bin;
+SELECT id,HEX(a) FROM t1 ORDER BY a;
+SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
+SELECT COUNT(DISTINCT a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # End of 10.1 tests
+--echo #
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 90aa1a93bed..41f6a90506a 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1216,7 +1216,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
-#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
+#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin
@@ -1665,7 +1665,7 @@ struct charset_info_st my_charset_utf16_bin=
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
-#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
+#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 2fc53e84b5c..259928130b9 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -7775,10 +7775,9 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1)
#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2)
/*
- There is no mapping between code point and weight for non-BMP characters
- in utf8mb4_general_ci. Just using code point as weight.
+ All non-BMP characters have the same weight.
*/
-#define WEIGHT_MB4(b0,b1,b2,b3) UTF8MB4_CODE(b0,b1,b2,b3)
+#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
#include "strcoll.ic"
diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c
index 51537e624f9..65a7f1e1155 100644
--- a/unittest/strings/strings-t.c
+++ b/unittest/strings/strings-t.c
@@ -412,6 +412,18 @@ static STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
};
+static STRNNCOLL_PARAM strcoll_utf8mb4_general_ci[]=
+{
+ /* All non-BMP characters are equal in utf8mb4_general_ci */
+ {CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x81"),0},/* Non-BMB MB4 vs non-BMP MB4 */
+ {CSTR("\xF0\x90\x80\x80"), CSTR("\xF4\x8F\xBF\xBF"),0},/* Non-BMB MB4 vs non-BMP MB4 */
+ {CSTR("\x00"), CSTR("\xF0\x90\x80\x80"),-1},/* U+0000 vs non-BMP MB4 */
+ {CSTR("\x00"), CSTR("\xF0\x90\x80\x81"),-1},/* U+0000 vs non-BMP MB4 */
+ {CSTR("\x00"), CSTR("\xF4\x8F\xBF\xBF"),-1},/* U+0000 vs non-BMP MB4 */
+ {NULL, 0, NULL, 0, 0}
+};
+
+
static STRNNCOLL_PARAM strcoll_ucs2_common[]=
{
{CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */
@@ -474,13 +486,24 @@ static STRNNCOLL_PARAM strcoll_utf16_common[]=
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */
/* Broken MB4 vs broken MB4 */
- {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* Broken MB4 vs broken MB4 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x01"),-1},/* Broken MB4 vs broken MB4 */
{CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */
{NULL, 0, NULL, 0, 0}
};
+static STRNNCOLL_PARAM strcoll_utf16_general_ci[]=
+{
+ /* All non-BMP characters are compared as equal */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
+ {CSTR("\xD8\x00\xDC\x00"), CSTR("\xDB\xFF\xDF\xFF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
+ {CSTR("\x00\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* U+0000 vs non-BMP MB4 */
+ {CSTR("\x00\x00"), CSTR("\xDB\xFF\xDF\xFF"),-1},/* U+0000 vs non-BMP MB4 */
+ {NULL, 0, NULL, 0, 0}
+};
+
+
static STRNNCOLL_PARAM strcoll_utf16le_common[]=
{
/* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
@@ -500,13 +523,24 @@ static STRNNCOLL_PARAM strcoll_utf16le_common[]=
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */
/* Broken MB4 vs broken MB4 */
- {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"),-1},/* Broken MB4 vs broken MB4 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDB"),-1},/* Broken MB4 vs broken MB4 */
{CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */
{NULL, 0, NULL, 0, 0}
};
+static STRNNCOLL_PARAM strcoll_utf16le_general_ci[]=
+{
+ /* All non-BMP characters are compared as equal */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
+ {CSTR("\x00\xD8\x00\xDC"), CSTR("\xFF\xDB\xFF\xDF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
+ {CSTR("\x00\x00"), CSTR("\x00\xD8\x01\xDC"), -1},/* U+0000 vs non-BMP MB4 */
+ {CSTR("\x00\x00"), CSTR("\xFF\xDB\xFF\xDF"), -1},/* U+0000 vs non-BMP MB4 */
+ {NULL, 0, NULL, 0, 0}
+};
+
+
static void
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
{
@@ -641,6 +675,7 @@ test_strcollsp()
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common);
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space);
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common);
+ failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_general_ci);
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common);
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space);
failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common);
@@ -648,6 +683,7 @@ test_strcollsp()
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common);
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space);
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common);
+ failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_general_ci);
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common);
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space);
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common);
@@ -661,6 +697,7 @@ test_strcollsp()
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common);
+ failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_general_ci);
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common);
#endif
return failed;