diff options
author | Alexander Barkov <bar@mariadb.com> | 2022-03-16 14:37:55 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2022-03-17 13:05:03 +0400 |
commit | 22fd31c5883622b5c7451cee74bc5d087d81e112 (patch) | |
tree | b6029b3a5b575f4e52f29f222d4abbfc72188e95 | |
parent | 118826d1734bc4f650f9ec96b3d0d885eedba9c1 (diff) | |
download | mariadb-git-22fd31c5883622b5c7451cee74bc5d087d81e112.tar.gz |
MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
TYPELIBs for ENUM/SET columns could erroneously undergo redundant
hex-unescaping at the table open time.
Fix:
- Prevent multiple unescaping of the same TYPELIB
- Prevent sharing TYPELIBs between columns with different mbminlen
-rw-r--r-- | mysql-test/r/ctype_utf32.result | 25 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf32.test | 19 | ||||
-rw-r--r-- | sql/table.cc | 20 | ||||
-rw-r--r-- | sql/unireg.cc | 11 |
4 files changed, 72 insertions, 3 deletions
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index 143fff9e419..22cea274182 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -2913,5 +2913,30 @@ t1 CREATE TABLE `t1` ( DROP TABLE t1; SET NAMES utf8; # +# MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets +# +CREATE TABLE t1 ( +c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a', +c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a' +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` enum('a','b') CHARACTER SET utf32 DEFAULT 'a', + `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +CREATE TABLE t1 ( +c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin, +c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` enum('00000061','00000062') CHARACTER SET latin1 COLLATE latin1_bin DEFAULT '00000061', + `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +# # End of 10.2 tests # diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index 46ff333b5f7..739096ae9cb 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -1068,5 +1068,24 @@ SET NAMES utf8; --echo # +--echo # MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets +--echo # + +CREATE TABLE t1 ( + c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a', + c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a' +); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t1 ( + c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin, + c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci +); +SHOW CREATE TABLE t1; +DROP TABLE t1; + + +--echo # --echo # End of 10.2 tests --echo # diff --git a/sql/table.cc b/sql/table.cc index ca6ce02e4f2..1f7b6452303 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -1229,6 +1229,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, plugin_ref se_plugin= 0; MEM_ROOT *old_root= thd->mem_root; Virtual_column_info **table_check_constraints; + bool *interval_unescaped= NULL; DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); keyinfo= &first_keyinfo; @@ -1686,6 +1687,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, goto err; + if (interval_count) + { + if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool)))) + goto err; + bzero(interval_unescaped, interval_count * sizeof(bool)); + } + field_ptr= share->field; table_check_constraints= share->check_constraints; read_length=(uint) (share->fields * field_pack_length + @@ -1956,11 +1964,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (share->mysql_version < 100200) pack_flag&= ~FIELDFLAG_LONG_DECIMAL; - if (interval_nr && charset->mbminlen > 1) + if (interval_nr && charset->mbminlen > 1 && + !interval_unescaped[interval_nr - 1]) { - /* Unescape UCS2 intervals from HEX notation */ + /* + Unescape UCS2/UTF16/UTF32 intervals from HEX notation. + Note, ENUM/SET columns with equal value list share a single + copy of TYPELIB. Unescape every TYPELIB only once. + */ TYPELIB *interval= share->intervals + interval_nr - 1; unhex_type2(interval); + interval_unescaped[interval_nr - 1]= true; } #ifndef TO_BE_DELETED_ON_PRODUCTION @@ -2610,6 +2624,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, share->error= OPEN_FRM_OK; thd->status_var.opened_shares++; thd->mem_root= old_root; + my_afree(interval_unescaped); DBUG_RETURN(0); err: @@ -2623,6 +2638,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno); thd->mem_root= old_root; + my_afree(interval_unescaped); DBUG_RETURN(HA_ERR_NOT_A_TABLE); } diff --git a/sql/unireg.cc b/sql/unireg.cc index 7974255af35..5471290651b 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -757,7 +757,16 @@ static uint get_interval_id(uint *int_count,List<Create_field> &create_fields, while ((field=it++) != last_field) { - if (field->interval_id && field->interval->count == interval->count) + /* + ENUM/SET columns with equal value lists share a single + copy of the underlying TYPELIB. + Fields with different mbminlen can't reuse TYPELIBs, because: + - mbminlen==1 are written to FRM as is + - mbminlen>1 are written to FRM in hex-encoded format + */ + if (field->interval_id && + field->interval->count == interval->count && + field->charset->mbminlen == last_field->charset->mbminlen) { const char **a,**b; for (a=field->interval->type_names, b=interval->type_names ; |