diff options
-rw-r--r-- | include/m_ctype.h | 1 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8mb4.result | 56 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf8mb4.test | 45 | ||||
-rw-r--r-- | strings/ctype-uca.c | 2 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 6 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 7 |
6 files changed, 108 insertions, 9 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index d97c0c87b6e..7c01ebe7e5a 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -38,6 +38,7 @@ extern "C" { #define my_wc_t ulong +#define MY_CS_REPLACEMENT_CHARACTER 0xFFFD /* On i386 we store Unicode->CS conversion tables for diff --git a/mysql-test/r/ctype_utf8mb4.result b/mysql-test/r/ctype_utf8mb4.result index 40c7ce79cd0..26ac58fc79f 100644 --- a/mysql-test/r/ctype_utf8mb4.result +++ b/mysql-test/r/ctype_utf8mb4.result @@ -2412,6 +2412,62 @@ SELECT CONCAT('a', _utf8 '') FROM t1; ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,COERCIBLE) and (utf8_general_ci,COERCIBLE) for operation 'concat' DROP TABLE t1; # +# Bug#51675 Server crashes on inserting 4 byte char. +# after ALTER TABLE to 'utf8mb4' +# +SET NAMES utf8; +CREATE TABLE t1 ( +subject varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci, +p VARCHAR(15) CHARACTER SET utf8 +) DEFAULT CHARSET=latin1; +ALTER TABLE t1 ADD INDEX (subject); +ALTER TABLE t1 +DEFAULT CHARACTER SET utf8, +MODIFY subject varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, +MODIFY p varchar(255) CHARACTER SET utf8; +Warnings: +Warning 1071 Specified key was too long; max key length is 1000 bytes +Warning 1071 Specified key was too long; max key length is 1000 bytes +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `subject` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, + `p` varchar(255) DEFAULT NULL, + KEY `subject` (`subject`(250)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 +INSERT INTO t1(subject) VALUES ('abcd'); +INSERT INTO t1(subject) VALUES(x'f0909080'); +DROP TABLE t1; +# +# Bug #51676 Server crashes on SELECT, ORDER BY on 'utf8mb4' column +# +SET NAMES utf8mb4; +CREATE TABLE t1 ( +subject varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci, +p varchar(15) character set utf8mb4 +) DEFAULT CHARSET=latin1; +INSERT INTO t1(subject) VALUES(0xF0909080); +INSERT INTO t1(subject) VALUES(0x616263F0909080646566); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `subject` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL, + `p` varchar(15) CHARACTER SET utf8mb4 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +SELECT * FROM t1 ORDER BY 1; +subject p +𐐀 NULL +abc𐐀def NULL +SELECT hex(subject), length(subject), char_length(subject), octet_length(subject) FROM t1 ORDER BY 1; +hex(subject) length(subject) char_length(subject) octet_length(subject) +616263F0909080646566 10 7 10 +F0909080 4 1 4 +SELECT subject FROM t1 ORDER BY 1; +subject +𐐀 +abc𐐀def +DROP TABLE t1; +# # End of 5.5 tests # # diff --git a/mysql-test/t/ctype_utf8mb4.test b/mysql-test/t/ctype_utf8mb4.test index b4f5de96912..8916de670c1 100644 --- a/mysql-test/t/ctype_utf8mb4.test +++ b/mysql-test/t/ctype_utf8mb4.test @@ -1737,6 +1737,51 @@ SELECT CONCAT(utf8mb4, _utf8 '') FROM t1; SELECT CONCAT('a', _utf8 '') FROM t1; DROP TABLE t1; + +--echo # +--echo # Bug#51675 Server crashes on inserting 4 byte char. +--echo # after ALTER TABLE to 'utf8mb4' +--echo # +SET NAMES utf8; +CREATE TABLE t1 ( + subject varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci, + p VARCHAR(15) CHARACTER SET utf8 +) DEFAULT CHARSET=latin1; + +# Alter old table, add index +ALTER TABLE t1 ADD INDEX (subject); + +# Alter old 'utf8' table to new 'utf8mb4' +ALTER TABLE t1 + DEFAULT CHARACTER SET utf8, + MODIFY subject varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, + MODIFY p varchar(255) CHARACTER SET utf8; +SHOW CREATE TABLE t1; + +INSERT INTO t1(subject) VALUES ('abcd'); +# Insert 4 byte characters +INSERT INTO t1(subject) VALUES(x'f0909080'); +DROP TABLE t1; + +--echo # +--echo # Bug #51676 Server crashes on SELECT, ORDER BY on 'utf8mb4' column +--echo # +SET NAMES utf8mb4; +CREATE TABLE t1 ( + subject varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci, + p varchar(15) character set utf8mb4 +) DEFAULT CHARSET=latin1; +# Insert 4 byte characters +# 4byte character +INSERT INTO t1(subject) VALUES(0xF0909080); +# mix of 3 byte & 4 byte +INSERT INTO t1(subject) VALUES(0x616263F0909080646566); +SHOW CREATE TABLE t1; +SELECT * FROM t1 ORDER BY 1; +SELECT hex(subject), length(subject), char_length(subject), octet_length(subject) FROM t1 ORDER BY 1; +SELECT subject FROM t1 ORDER BY 1; +DROP TABLE t1; + --echo # --echo # End of 5.5 tests --echo # diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index 7dbec5a1321..b6d413432ed 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -6983,6 +6983,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) scanner->send)) <= 0)) return -1; + if (wc > 0xFFFF) + wc= MY_CS_REPLACEMENT_CHARACTER; scanner->page= wc >> 8; scanner->code= wc & 0xFF; scanner->sbeg+= mb_len; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 6de0ea8f7e8..e3e13af85ef 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -50,8 +50,6 @@ static unsigned long lfactor[9]= { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L }; -#define REPLACEMENT_CHAR 0xFFFD; - #ifdef HAVE_CHARSET_mb2_or_mb4 @@ -1145,7 +1143,7 @@ my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) } else { - *wc= REPLACEMENT_CHAR; + *wc= MY_CS_REPLACEMENT_CHARACTER; } } @@ -1874,7 +1872,7 @@ my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) } else { - *wc= REPLACEMENT_CHAR; + *wc= MY_CS_REPLACEMENT_CHARACTER; } } diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 7de5cdd00ee..ace39130c12 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1720,9 +1720,6 @@ MY_UNICASE_INFO *my_unicase_turkish[256]= }; -#define REPLACEMENT_CHAR 0xFFFD; - - static inline void my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) { @@ -1734,7 +1731,7 @@ my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) } else { - *wc= REPLACEMENT_CHAR; + *wc= MY_CS_REPLACEMENT_CHARACTER; } } @@ -1757,7 +1754,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs, { int result= -1; /* Not found, using wildcards */ my_wc_t s_wc, w_wc; - int scan, plane; + int scan; int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *, const uchar *); mb_wc= cs->cset->mb_wc; |