MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets

TYPELIBs for ENUM/SET columns could erroneously undergo redundant hex-unescaping at the table open time. Fix: - Prevent multiple unescaping of the same TYPELIB - Prevent sharing TYPELIBs between columns with different mbminlen
author: Alexander Barkov <bar@mariadb.com> 2022-03-16 14:37:55 +0400
committer: Alexander Barkov <bar@mariadb.com> 2022-03-17 13:05:03 +0400
commit: 22fd31c5883622b5c7451cee74bc5d087d81e112 (patch)
tree: b6029b3a5b575f4e52f29f222d4abbfc72188e95
parent: 118826d1734bc4f650f9ec96b3d0d885eedba9c1 (diff)
download: mariadb-git-22fd31c5883622b5c7451cee74bc5d087d81e112.tar.gz
4 files changed, 72 insertions, 3 deletions
diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result
index 143fff9e419..22cea274182 100644
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@@ -2913,5 +2913,30 @@ t1	CREATE TABLE `t1` (
 DROP TABLE t1;
 SET NAMES utf8;
 #
+# MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
+#
+CREATE TABLE t1 (
+c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
+c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
+);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` enum('a','b') CHARACTER SET utf32 DEFAULT 'a',
+  `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+CREATE TABLE t1 (
+c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
+c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
+);
+SHOW CREATE TABLE t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `c1` enum('00000061','00000062') CHARACTER SET latin1 COLLATE latin1_bin DEFAULT '00000061',
+  `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a'
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+DROP TABLE t1;
+#
 # End of 10.2 tests
 #
diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test
index 46ff333b5f7..739096ae9cb 100644
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@@ -1068,5 +1068,24 @@ SET NAMES utf8;
 
 
 --echo #
+--echo # MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets
+--echo #
+
+CREATE TABLE t1 (
+  c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a',
+  c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a'
+);
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (
+  c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin,
+  c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci
+);
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+
+--echo #
 --echo # End of 10.2 tests
 --echo #
diff --git a/sql/table.cc b/sql/table.cc
index ca6ce02e4f2..1f7b6452303 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -1229,6 +1229,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
   plugin_ref se_plugin= 0;
   MEM_ROOT *old_root= thd->mem_root;
   Virtual_column_info **table_check_constraints;
+  bool *interval_unescaped= NULL;
   DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image");
 
   keyinfo= &first_keyinfo;
@@ -1686,6 +1687,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
 
     goto err;
 
+  if (interval_count)
+  {
+    if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool))))
+      goto err;
+    bzero(interval_unescaped, interval_count * sizeof(bool));
+  }
+
   field_ptr= share->field;
   table_check_constraints= share->check_constraints;
   read_length=(uint) (share->fields * field_pack_length +
@@ -1956,11 +1964,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
     if (share->mysql_version < 100200)
       pack_flag&= ~FIELDFLAG_LONG_DECIMAL;
 
-    if (interval_nr && charset->mbminlen > 1)
+    if (interval_nr && charset->mbminlen > 1 &&
+        !interval_unescaped[interval_nr - 1])
     {
-      /* Unescape UCS2 intervals from HEX notation */
+      /*
+        Unescape UCS2/UTF16/UTF32 intervals from HEX notation.
+        Note, ENUM/SET columns with equal value list share a single
+        copy of TYPELIB. Unescape every TYPELIB only once.
+      */
       TYPELIB *interval= share->intervals + interval_nr - 1;
       unhex_type2(interval);
+      interval_unescaped[interval_nr - 1]= true;
     }
 
 #ifndef TO_BE_DELETED_ON_PRODUCTION
@@ -2610,6 +2624,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
   share->error= OPEN_FRM_OK;
   thd->status_var.opened_shares++;
   thd->mem_root= old_root;
+  my_afree(interval_unescaped);
   DBUG_RETURN(0);
 
  err:
@@ -2623,6 +2638,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
     open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno);
 
   thd->mem_root= old_root;
+  my_afree(interval_unescaped);
   DBUG_RETURN(HA_ERR_NOT_A_TABLE);
 }
 
diff --git a/sql/unireg.cc b/sql/unireg.cc
index 7974255af35..5471290651b 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -757,7 +757,16 @@ static uint get_interval_id(uint *int_count,List<Create_field> &create_fields,
 
   while ((field=it++) != last_field)
   {
-    if (field->interval_id && field->interval->count == interval->count)
+    /*
+      ENUM/SET columns with equal value lists share a single
+      copy of the underlying TYPELIB.
+      Fields with different mbminlen can't reuse TYPELIBs, because:
+      - mbminlen==1 are written to FRM as is
+      - mbminlen>1 are written to FRM in hex-encoded format
+    */
+    if (field->interval_id &&
+        field->interval->count == interval->count &&
+        field->charset->mbminlen == last_field->charset->mbminlen)
     {
       const char **a,**b;
       for (a=field->interval->type_names, b=interval->type_names ;
author	Alexander Barkov <bar@mariadb.com>	2022-03-16 14:37:55 +0400
committer	Alexander Barkov <bar@mariadb.com>	2022-03-17 13:05:03 +0400
commit	22fd31c5883622b5c7451cee74bc5d087d81e112 (patch)
tree	b6029b3a5b575f4e52f29f222d4abbfc72188e95
parent	118826d1734bc4f650f9ec96b3d0d885eedba9c1 (diff)
download	mariadb-git-22fd31c5883622b5c7451cee74bc5d087d81e112.tar.gz