summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2022-03-16 14:37:55 +0400
committerAlexander Barkov <bar@mariadb.com>2022-03-17 13:05:03 +0400
commit22fd31c5883622b5c7451cee74bc5d087d81e112 (patch)
treeb6029b3a5b575f4e52f29f222d4abbfc72188e95
parent118826d1734bc4f650f9ec96b3d0d885eedba9c1 (diff)
downloadmariadb-git-22fd31c5883622b5c7451cee74bc5d087d81e112.tar.gz
MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
TYPELIBs for ENUM/SET columns could erroneously undergo redundant hex-unescaping at the table open time. Fix: - Prevent multiple unescaping of the same TYPELIB - Prevent sharing TYPELIBs between columns with different mbminlen
-rw-r--r--mysql-test/r/ctype_utf32.result25
-rw-r--r--mysql-test/t/ctype_utf32.test19
-rw-r--r--sql/table.cc20
-rw-r--r--sql/unireg.cc11
4 files changed, 72 insertions, 3 deletions
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index 143fff9e419..22cea274182 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -2913,5 +2913,30 @@ t1 CREATE TABLE `t1` (
DROP TABLE t1;
SET NAMES utf8;
#
+# MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
+#
+CREATE TABLE t1 (
+c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
+c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
+);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` enum('a','b') CHARACTER SET utf32 DEFAULT 'a',
+ `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
+c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
+);
+SHOW CREATE TABLE t1;
+Table Create Table
+t1 CREATE TABLE `t1` (
+ `c1` enum('00000061','00000062') CHARACTER SET latin1 COLLATE latin1_bin DEFAULT '00000061',
+ `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index 46ff333b5f7..739096ae9cb 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -1068,5 +1068,24 @@ SET NAMES utf8;
--echo #
+--echo # MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
+--echo #
+
+CREATE TABLE t1 (
+ c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
+ c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
+);
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+ c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
+ c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
+);
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+
+--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/sql/table.cc b/sql/table.cc
index ca6ce02e4f2..1f7b6452303 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -1229,6 +1229,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
plugin_ref se_plugin= 0;
MEM_ROOT *old_root= thd->mem_root;
Virtual_column_info **table_check_constraints;
+ bool *interval_unescaped= NULL;
DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image");
keyinfo= &first_keyinfo;
@@ -1686,6 +1687,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
goto err;
+ if (interval_count)
+ {
+ if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool))))
+ goto err;
+ bzero(interval_unescaped, interval_count * sizeof(bool));
+ }
+
field_ptr= share->field;
table_check_constraints= share->check_constraints;
read_length=(uint) (share->fields * field_pack_length +
@@ -1956,11 +1964,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (share->mysql_version < 100200)
pack_flag&= ~FIELDFLAG_LONG_DECIMAL;
- if (interval_nr && charset->mbminlen > 1)
+ if (interval_nr && charset->mbminlen > 1 &&
+ !interval_unescaped[interval_nr - 1])
{
- /* Unescape UCS2 intervals from HEX notation */
+ /*
+ Unescape UCS2/UTF16/UTF32 intervals from HEX notation.
+ Note, ENUM/SET columns with equal value list share a single
+ copy of TYPELIB. Unescape every TYPELIB only once.
+ */
TYPELIB *interval= share->intervals + interval_nr - 1;
unhex_type2(interval);
+ interval_unescaped[interval_nr - 1]= true;
}
#ifndef TO_BE_DELETED_ON_PRODUCTION
@@ -2610,6 +2624,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
share->error= OPEN_FRM_OK;
thd->status_var.opened_shares++;
thd->mem_root= old_root;
+ my_afree(interval_unescaped);
DBUG_RETURN(0);
err:
@@ -2623,6 +2638,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno);
thd->mem_root= old_root;
+ my_afree(interval_unescaped);
DBUG_RETURN(HA_ERR_NOT_A_TABLE);
}
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 7974255af35..5471290651b 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -757,7 +757,16 @@ static uint get_interval_id(uint *int_count,List<Create_field> &create_fields,
while ((field=it++) != last_field)
{
- if (field->interval_id && field->interval->count == interval->count)
+ /*
+ ENUM/SET columns with equal value lists share a single
+ copy of the underlying TYPELIB.
+ Fields with different mbminlen can't reuse TYPELIBs, because:
+ - mbminlen==1 are written to FRM as is
+ - mbminlen>1 are written to FRM in hex-encoded format
+ */
+ if (field->interval_id &&
+ field->interval->count == interval->count &&
+ field->charset->mbminlen == last_field->charset->mbminlen)
{
const char **a,**b;
for (a=field->interval->type_names, b=interval->type_names ;