From 8d51c6d234b1730d4ff3b2c1fe7828eeca81998b Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 14 Dec 2022 18:46:27 +0400 Subject: MDEV-30164 System variable for default collations This patch adds a way to override default collations (or "character set collations") for desired character sets. The SQL standard says: > Each collation known in an SQL-environment is applicable to one > or more character sets, and for each character set, one or more > collations are applicable to it, one of which is associated with > it as its character set collation. In MariaDB, character set collations has been hard-coded so far, e.g. utf8mb4_general_ci has been a hard-coded character set collation for utf8mb4. This patch allows to override (globally per server, or per session) character set collations, so for example, uca1400_ai_ci can be set as a character set collation for Unicode character sets (instead of compiled xxx_general_ci). The array of overridden character set collations is stored in a new (session and global) system variable @@character_set_collations and can be set as a comma separated list of charset=collation pairs, e.g.: SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci'; The variable is empty by default, which mean use the hard-coded character set collations (e.g. utf8mb4_general_ci for utf8mb4). The variable can also be set globally by passing to the server startup command line, and/or in my.cnf. --- client/mysqlbinlog.cc | 1 + libmysqld/CMakeLists.txt | 2 +- mysql-test/main/ctype_collate_implicit.result | 258 ++++++++++++++++++++ mysql-test/main/ctype_collate_implicit.test | 209 ++++++++++++++++ mysql-test/main/ctype_collate_implicit_def.opt | 1 + mysql-test/main/ctype_collate_implicit_def.result | 60 +++++ mysql-test/main/ctype_collate_implicit_def.test | 47 ++++ mysql-test/main/ctype_utf8.result | 1 + mysql-test/main/ctype_utf8mb4.result | 1 + mysql-test/main/ctype_utf8mb4_heap.result | 1 + mysql-test/main/ctype_utf8mb4_innodb.result | 1 + mysql-test/main/ctype_utf8mb4_myisam.result | 1 + mysql-test/main/mysqlbinlog.result | 1 + mysql-test/main/mysqld--help.result | 3 + .../suite/binlog/r/binlog_mysqlbinlog_row.result | 20 ++ .../binlog/r/binlog_mysqlbinlog_row_innodb.result | 6 + .../binlog/r/binlog_mysqlbinlog_row_myisam.result | 6 + .../binlog/r/binlog_mysqlbinlog_row_trans.result | 2 + .../suite/binlog/r/binlog_row_ctype_ucs.result | 1 + .../suite/binlog/r/binlog_stm_ctype_ucs.result | 3 + .../binlog_stm_mysqlbinlog_collate_implicit.result | 193 +++++++++++++++ .../t/binlog_stm_mysqlbinlog_collate_implicit.test | 77 ++++++ .../suite/rpl/r/rpl_ctype_collate_implicit.result | 83 +++++++ .../suite/rpl/t/rpl_ctype_collate_implicit.test | 60 +++++ .../sys_vars/r/sysvars_server_embedded.result | 10 + .../sys_vars/r/sysvars_server_notembedded.result | 10 + sql/CMakeLists.txt | 2 +- sql/charset_collations.cc | 107 +++++++++ sql/charset_collations.h | 265 +++++++++++++++++++++ sql/field.h | 9 +- sql/handler.h | 26 +- sql/item_func.h | 4 +- sql/item_strfunc.cc | 5 +- sql/json_table.cc | 6 +- sql/lex_charset.cc | 79 ++++-- sql/lex_charset.h | 88 +++++-- sql/log_event.cc | 15 +- sql/log_event.h | 13 +- sql/log_event_client.cc | 32 +++ sql/log_event_server.cc | 18 ++ sql/mysqld.cc | 22 ++ sql/simple_tokenizer.h | 83 +++++++ sql/sql_class.h | 29 ++- sql/sql_connect.cc | 3 + sql/sql_lex.h | 14 +- sql/sql_parse.cc | 21 ++ sql/sql_prepare.cc | 43 +++- sql/sql_show.cc | 15 +- sql/sql_table.cc | 14 +- sql/sql_type.cc | 22 +- sql/sql_yacc.yy | 86 +++++-- sql/structs.h | 7 +- sql/sys_vars.cc | 109 +++++++++ 53 files changed, 2098 insertions(+), 97 deletions(-) create mode 100644 mysql-test/main/ctype_collate_implicit.result create mode 100644 mysql-test/main/ctype_collate_implicit.test create mode 100644 mysql-test/main/ctype_collate_implicit_def.opt create mode 100644 mysql-test/main/ctype_collate_implicit_def.result create mode 100644 mysql-test/main/ctype_collate_implicit_def.test create mode 100644 mysql-test/suite/binlog/r/binlog_stm_mysqlbinlog_collate_implicit.result create mode 100644 mysql-test/suite/binlog/t/binlog_stm_mysqlbinlog_collate_implicit.test create mode 100644 mysql-test/suite/rpl/r/rpl_ctype_collate_implicit.result create mode 100644 mysql-test/suite/rpl/t/rpl_ctype_collate_implicit.test create mode 100644 sql/charset_collations.cc create mode 100644 sql/charset_collations.h create mode 100644 sql/simple_tokenizer.h diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 67cf008d732..2b15209ec01 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -51,6 +51,7 @@ #include "sql_string.h" // needed for Rpl_filter #include "sql_list.h" // needed for Rpl_filter #include "rpl_filter.h" +#include "charset_collations.h" #include "mysqld.h" diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index 5d5cc35e1be..e2843a392f9 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -126,7 +126,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc ../sql/sql_analyze_stmt.cc ../sql/sql_analyze_stmt.h ../sql/compat56.cc ../sql/sql_schema.cc - ../sql/lex_charset.cc + ../sql/lex_charset.cc ../sql/charset_collations.cc ../sql/sql_type.cc ../sql/sql_type.h ../sql/sql_mode.cc ../sql/sql_type_string.cc diff --git a/mysql-test/main/ctype_collate_implicit.result b/mysql-test/main/ctype_collate_implicit.result new file mode 100644 index 00000000000..94fd05659e8 --- /dev/null +++ b/mysql-test/main/ctype_collate_implicit.result @@ -0,0 +1,258 @@ +# +# MDEV-30164 System variable for default collations +# +SET @@character_set_collations= ' utf8mb3 = utf8mb3_bin , LATIN1 = LATIN1_BIN '; +SELECT @@character_set_collations; +@@character_set_collations +latin1=latin1_bin,utf8mb3=utf8mb3_bin +SET @@character_set_collations=''; +SELECT @@character_set_collations; +@@character_set_collations + +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +SELECT @@character_set_collations; +@@character_set_collations +utf8mb3=utf8mb3_bin +SET @@character_set_collations=''; +SET @@character_set_collations='utf8mb3=utf8mb4_general_ci'; +ERROR 42000: COLLATION 'utf8mb4_general_ci' is not valid for CHARACTER SET 'utf8mb3' +SELECT @@character_set_collations; +@@character_set_collations + +SET @@character_set_collations='utf8mb4=utf8mb3_general_ci'; +ERROR 42000: COLLATION 'utf8mb3_general_ci' is not valid for CHARACTER SET 'utf8mb4' +SELECT @@character_set_collations; +@@character_set_collations + +SET @@character_set_collations='utf8mb3=utf8mb3_general_ci'; +SELECT @@character_set_collations; +@@character_set_collations +utf8mb3=utf8mb3_general_ci +SET @@character_set_collations='utf8mb4=utf8mb4_general_ci,latin1=latin1_bin'; +SELECT @@character_set_collations; +@@character_set_collations +latin1=latin1_bin,utf8mb4=utf8mb4_general_ci +SET @@character_set_collations='utf8mb4=uca1400_ai_ci,latin1=uca1400_ai_ci'; +ERROR 42000: COLLATION 'uca1400_ai_ci' is not valid for CHARACTER SET 'latin1' +SELECT @@character_set_collations; +@@character_set_collations +latin1=latin1_bin,utf8mb4=utf8mb4_general_ci +SELECT @@character_set_collations RLIKE 'utf8mb4=utf8mb4_general_ci' AS expect_true; +expect_true +1 +SET @@character_set_collations='utf8mb4=uca1400_ai_ci'; +SELECT @@character_set_collations; +@@character_set_collations +utf8mb4=utf8mb4_uca1400_ai_ci +SET NAMES utf8mb4; +SELECT @@collation_connection; +@@collation_connection +utf8mb4_uca1400_ai_ci +SELECT collation('literal'); +collation('literal') +utf8mb4_uca1400_ai_ci +EXECUTE IMMEDIATE 'SELECT COLLATION(?)' USING 'literal'; +COLLATION(?) +utf8mb4_uca1400_ai_ci +CREATE VIEW v1 AS SELECT 'literal', collation('literal') as cl; +SHOW CREATE VIEW v1; +View Create View character_set_client collation_connection +v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select 'literal' AS `literal`,collation('literal') AS `cl` utf8mb4 utf8mb4_uca1400_ai_ci +SELECT * FROM v1; +literal cl +literal utf8mb4_uca1400_ai_ci +DROP VIEW v1; +SET NAMES utf8mb4 COLLATE utf8mb4_general_ci; +CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` text CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +DROP TABLE t1; +CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4 COLLATE DEFAULT); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` text CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +DROP TABLE t1; +CREATE TABLE t1 (a TEXT COLLATE DEFAULT) CHARACTER SET utf8mb4; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` text DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci +DROP TABLE t1; +CREATE TABLE t1 (a TEXT) CHARACTER SET utf8mb4; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` text DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci +DROP TABLE t1; +CREATE DATABASE db1 CHARACTER SET utf8mb4; +SHOW CREATE DATABASE db1; +Database Create Database +db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci */ +DROP DATABASE db1; +SET NAMES utf8mb4 COLLATE utf8mb4_general_ci; +SELECT +@@collation_connection AS conn, +COLLATION('a') AS lit, +COLLATION(CONCAT(1)) AS num, +COLLATION(CAST(123 AS CHAR)) AS casti, +COLLATION(_utf8mb4'a') AS litu, +COLLATION(_utf8mb4 0x62) AS lituh, +COLLATION(_utf8mb4 X'63') AS lituhs, +COLLATION(CAST(123 AS CHAR CHARACTER SET utf8mb4)) AS castic, +COLLATION(CHAR(0x61 USING utf8mb4)) AS chr, +COLLATION(CONVERT('a' USING utf8mb4)) AS conv;; +conn utf8mb4_general_ci +lit utf8mb4_general_ci +num utf8mb4_general_ci +casti utf8mb4_general_ci +litu utf8mb4_uca1400_ai_ci +lituh utf8mb4_uca1400_ai_ci +lituhs utf8mb4_uca1400_ai_ci +castic utf8mb4_uca1400_ai_ci +chr utf8mb4_uca1400_ai_ci +conv utf8mb4_uca1400_ai_ci +SET NAMES utf8mb4; +SELECT +@@collation_connection AS conn, +COLLATION('a') AS lit, +COLLATION(CONCAT(1)) AS num, +COLLATION(CAST(123 AS CHAR)) AS casti, +COLLATION(_utf8mb4'a') AS litu, +COLLATION(_utf8mb4 0x62) AS lituh, +COLLATION(_utf8mb4 X'63') AS lituhs, +COLLATION(CAST(123 AS CHAR CHARACTER SET utf8mb4)) AS castic, +COLLATION(CHAR(0x61 USING utf8mb4)) AS chr, +COLLATION(CONVERT('a' USING utf8mb4)) AS conv;; +conn utf8mb4_uca1400_ai_ci +lit utf8mb4_uca1400_ai_ci +num utf8mb4_uca1400_ai_ci +casti utf8mb4_uca1400_ai_ci +litu utf8mb4_uca1400_ai_ci +lituh utf8mb4_uca1400_ai_ci +lituhs utf8mb4_uca1400_ai_ci +castic utf8mb4_uca1400_ai_ci +chr utf8mb4_uca1400_ai_ci +conv utf8mb4_uca1400_ai_ci +SET character_set_collations='latin1=latin1_bin,utf8mb4=uca1400_ai_ci'; +SHOW CHARACTER SET LIKE 'latin1'; +Charset Description Default collation Maxlen +latin1 cp1252 West European latin1_bin 1 +SELECT * FROM INFORMATION_SCHEMA.CHARACTER_SETS +WHERE CHARACTER_SET_NAME='latin1'; +CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN +latin1 latin1_bin cp1252 West European 1 +SHOW COLLATION LIKE 'latin1%'; +Collation Charset Id Default Compiled Sortlen +latin1_german1_ci latin1 5 Yes 1 +latin1_swedish_ci latin1 8 Yes 1 +latin1_danish_ci latin1 15 Yes 1 +latin1_german2_ci latin1 31 Yes 2 +latin1_bin latin1 47 Yes Yes 1 +latin1_general_ci latin1 48 Yes 1 +latin1_general_cs latin1 49 Yes 1 +latin1_spanish_ci latin1 94 Yes 1 +latin1_swedish_nopad_ci latin1 1032 Yes 1 +latin1_nopad_bin latin1 1071 Yes 1 +SELECT COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATIONS +WHERE CHARACTER_SET_NAME LIKE 'latin1%'; +COLLATION_NAME IS_DEFAULT +latin1_german1_ci +latin1_swedish_ci +latin1_danish_ci +latin1_german2_ci +latin1_bin Yes +latin1_general_ci +latin1_general_cs +latin1_spanish_ci +latin1_swedish_nopad_ci +latin1_nopad_bin +SELECT COLLATION_NAME, FULL_COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY +WHERE COLLATION_NAME LIKE 'latin1%'; +COLLATION_NAME FULL_COLLATION_NAME IS_DEFAULT +latin1_german1_ci latin1_german1_ci +latin1_swedish_ci latin1_swedish_ci +latin1_danish_ci latin1_danish_ci +latin1_german2_ci latin1_german2_ci +latin1_bin latin1_bin Yes +latin1_general_ci latin1_general_ci +latin1_general_cs latin1_general_cs +latin1_spanish_ci latin1_spanish_ci +latin1_swedish_nopad_ci latin1_swedish_nopad_ci +latin1_nopad_bin latin1_nopad_bin +SHOW CHARACTER SET LIKE 'utf8mb4'; +Charset Description Default collation Maxlen +utf8mb4 UTF-8 Unicode utf8mb4_uca1400_ai_ci 4 +SELECT * FROM INFORMATION_SCHEMA.CHARACTER_SETS +WHERE CHARACTER_SET_NAME='utf8mb4'; +CHARACTER_SET_NAME DEFAULT_COLLATE_NAME DESCRIPTION MAXLEN +utf8mb4 utf8mb4_uca1400_ai_ci UTF-8 Unicode 4 +SHOW COLLATION LIKE '%uca1400_ai_ci%'; +Collation Charset Id Default Compiled Sortlen +uca1400_ai_ci NULL NULL NULL Yes 8 +SELECT COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATIONS +WHERE COLLATION_NAME LIKE '%uca1400_ai_ci%'; +COLLATION_NAME IS_DEFAULT +uca1400_ai_ci NULL +SELECT COLLATION_NAME, FULL_COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY +WHERE COLLATION_NAME LIKE '%uca1400_ai_ci%'; +COLLATION_NAME FULL_COLLATION_NAME IS_DEFAULT +uca1400_ai_ci utf8mb3_uca1400_ai_ci +uca1400_ai_ci ucs2_uca1400_ai_ci +uca1400_ai_ci utf8mb4_uca1400_ai_ci Yes +uca1400_ai_ci utf16_uca1400_ai_ci +uca1400_ai_ci utf32_uca1400_ai_ci +SET @@character_set_collations=''; +PREPARE stmt FROM 'SELECT ' + 'COLLATION(CAST("x" AS CHAR CHARACTER SET utf8mb3)) AS a, ' + 'COLLATION(_utf8mb3"x") AS b'; +EXECUTE stmt; +a b +utf8mb3_general_ci utf8mb3_general_ci +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +EXECUTE stmt; +a b +utf8mb3_bin utf8mb3_bin +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +PREPARE stmt FROM 'SELECT ' + 'COLLATION(CAST("x" AS CHAR CHARACTER SET utf8mb3)) AS a, ' + 'COLLATION(_utf8mb3"x") AS b'; +EXECUTE stmt; +a b +utf8mb3_bin utf8mb3_bin +SET @@character_set_collations=DEFAULT; +EXECUTE stmt; +a b +utf8mb3_general_ci utf8mb3_general_ci +SET NAMES utf8mb3; +SET @@character_set_collations=''; +PREPARE stmt FROM 'CREATE TABLE t1 ' + '(a TEXT CHARACTER SET utf8mb3 COLLATE DEFAULT COLLATE utf8mb3_general_ci)'; +EXECUTE stmt; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` text CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +DROP TABLE t1; +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +EXECUTE stmt; +ERROR HY000: Conflicting declarations: 'COLLATE utf8mb3_bin' and 'COLLATE utf8mb3_general_ci' +SET @@character_set_collations=''; +EXECUTE stmt; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` text CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +DROP TABLE t1; diff --git a/mysql-test/main/ctype_collate_implicit.test b/mysql-test/main/ctype_collate_implicit.test new file mode 100644 index 00000000000..9636b04d577 --- /dev/null +++ b/mysql-test/main/ctype_collate_implicit.test @@ -0,0 +1,209 @@ +--source include/have_utf8.inc +--source include/have_utf8mb4.inc + +--echo # +--echo # MDEV-30164 System variable for default collations +--echo # + +SET @@character_set_collations= ' utf8mb3 = utf8mb3_bin , LATIN1 = LATIN1_BIN '; +SELECT @@character_set_collations; +SET @@character_set_collations=''; +SELECT @@character_set_collations; + +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +SELECT @@character_set_collations; +SET @@character_set_collations=''; + +--error ER_COLLATION_CHARSET_MISMATCH +SET @@character_set_collations='utf8mb3=utf8mb4_general_ci'; +SELECT @@character_set_collations; + +--error ER_COLLATION_CHARSET_MISMATCH +SET @@character_set_collations='utf8mb4=utf8mb3_general_ci'; +SELECT @@character_set_collations; + +SET @@character_set_collations='utf8mb3=utf8mb3_general_ci'; +SELECT @@character_set_collations; + +SET @@character_set_collations='utf8mb4=utf8mb4_general_ci,latin1=latin1_bin'; +SELECT @@character_set_collations; + +--error ER_COLLATION_CHARSET_MISMATCH +SET @@character_set_collations='utf8mb4=uca1400_ai_ci,latin1=uca1400_ai_ci'; + +# All or nothing is set. "Nothing" in this case because of the error on latin1. +# The "uca1400_ai_ci FOR utf8mb4" part was ignored. +SELECT @@character_set_collations; +SELECT @@character_set_collations RLIKE 'utf8mb4=utf8mb4_general_ci' AS expect_true; + + +SET @@character_set_collations='utf8mb4=uca1400_ai_ci'; +SELECT @@character_set_collations; + +SET NAMES utf8mb4; +SELECT @@collation_connection; + +# We have to disable --view-protocol for the following statement. +# 'mtr --view-protocol' creates a separate connection for these statements: +# CREATE VIEW mysqltest_tmp_sp AS ...; +# DROP VIEW mysqltest_tmp_sp; +# The current @@character_set_collations does not affect this connection. +# So --view-protocol would return the hard-coded character set collation here, +# instead of utf8mb4_uca1400_ai_ci + +--disable_view_protocol +SELECT collation('literal'); +--enable_view_protocol +EXECUTE IMMEDIATE 'SELECT COLLATION(?)' USING 'literal'; + +CREATE VIEW v1 AS SELECT 'literal', collation('literal') as cl; +SHOW CREATE VIEW v1; +SELECT * FROM v1; +DROP VIEW v1; + + +# Override @@collation_connection to utf8mb4_general_ci. +# Make sure that CREATE statements does not use @@collation_connection. +# to detect implicit collations. +# Implicit collations are detected using @@character_set_collations! + +SET NAMES utf8mb4 COLLATE utf8mb4_general_ci; + +CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4 COLLATE DEFAULT); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t1 (a TEXT COLLATE DEFAULT) CHARACTER SET utf8mb4; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t1 (a TEXT) CHARACTER SET utf8mb4; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE DATABASE db1 CHARACTER SET utf8mb4; +SHOW CREATE DATABASE db1; +DROP DATABASE db1; + + +# Test how @@character_set_collations affects various expressions +# with implicit collations. + + +let query=SELECT + @@collation_connection AS conn, + COLLATION('a') AS lit, + COLLATION(CONCAT(1)) AS num, + COLLATION(CAST(123 AS CHAR)) AS casti, + COLLATION(_utf8mb4'a') AS litu, + COLLATION(_utf8mb4 0x62) AS lituh, + COLLATION(_utf8mb4 X'63') AS lituhs, + COLLATION(CAST(123 AS CHAR CHARACTER SET utf8mb4)) AS castic, + COLLATION(CHAR(0x61 USING utf8mb4)) AS chr, + COLLATION(CONVERT('a' USING utf8mb4)) AS conv; + +# The below SET NAMES sets @@collation_connection to utf8mb4_general_ci. +# But @@character_set_collations still contains utf8mb4=uca1400_ai_ci. + +SET NAMES utf8mb4 COLLATE utf8mb4_general_ci; + +# Columns expected to print utf8mb4_general_ci +# because they use @@collation_connection: +# - String literals without introducers +# - Automatic number-to-string conversions +# - CAST(AS CHAR) - without USING +# +# Columns expected to print utf8mb4_uca1400_ai_ci +# because they use the current session default collation +# for the character set (as specified in @@collation_connection) +# - String literals with introducers +# - CAST(AS CHAR USING cs) +# - CHAR() +# - CONVERT() + +--vertical_results +--eval $query; +--horizontal_results + +# This sets collation_connection to utf8mb4_uca1400_ai_ci +# according to @@character_set_collations. +SET NAMES utf8mb4; + +# Now all columns are expected to print utf8mb4_uca1400_ai_ci: +# - Some columns because @@collation_connection says so +# - Some columns because @@character_set_collations says so. + +--vertical_results +--eval $query; +--horizontal_results + + +# +# INFORMATION_SCHEMA +# + +SET character_set_collations='latin1=latin1_bin,utf8mb4=uca1400_ai_ci'; +SHOW CHARACTER SET LIKE 'latin1'; +SELECT * FROM INFORMATION_SCHEMA.CHARACTER_SETS +WHERE CHARACTER_SET_NAME='latin1'; + +SHOW COLLATION LIKE 'latin1%'; +SELECT COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATIONS +WHERE CHARACTER_SET_NAME LIKE 'latin1%'; +SELECT COLLATION_NAME, FULL_COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY +WHERE COLLATION_NAME LIKE 'latin1%'; + +SHOW CHARACTER SET LIKE 'utf8mb4'; +SELECT * FROM INFORMATION_SCHEMA.CHARACTER_SETS +WHERE CHARACTER_SET_NAME='utf8mb4'; + +SHOW COLLATION LIKE '%uca1400_ai_ci%'; +SELECT COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATIONS +WHERE COLLATION_NAME LIKE '%uca1400_ai_ci%'; +SELECT COLLATION_NAME, FULL_COLLATION_NAME, IS_DEFAULT +FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY +WHERE COLLATION_NAME LIKE '%uca1400_ai_ci%'; + +# +# Prepared statements: reprepare on @@character_set_collations change. +# + +SET @@character_set_collations=''; +PREPARE stmt FROM 'SELECT ' + 'COLLATION(CAST("x" AS CHAR CHARACTER SET utf8mb3)) AS a, ' + 'COLLATION(_utf8mb3"x") AS b'; +EXECUTE stmt; +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +EXECUTE stmt; + +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +PREPARE stmt FROM 'SELECT ' + 'COLLATION(CAST("x" AS CHAR CHARACTER SET utf8mb3)) AS a, ' + 'COLLATION(_utf8mb3"x") AS b'; +EXECUTE stmt; +SET @@character_set_collations=DEFAULT; +EXECUTE stmt; + +SET NAMES utf8mb3; +SET @@character_set_collations=''; +PREPARE stmt FROM 'CREATE TABLE t1 ' + '(a TEXT CHARACTER SET utf8mb3 COLLATE DEFAULT COLLATE utf8mb3_general_ci)'; +EXECUTE stmt; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +SET @@character_set_collations='utf8mb3=utf8mb3_bin'; +--error ER_CONFLICTING_DECLARATIONS +EXECUTE stmt; + +SET @@character_set_collations=''; +EXECUTE stmt; +SHOW CREATE TABLE t1; +DROP TABLE t1; diff --git a/mysql-test/main/ctype_collate_implicit_def.opt b/mysql-test/main/ctype_collate_implicit_def.opt new file mode 100644 index 00000000000..a5d4c19fbfe --- /dev/null +++ b/mysql-test/main/ctype_collate_implicit_def.opt @@ -0,0 +1 @@ +--character-set-collations=utf8mb3=uca1400_ai_ci,latin1=latin1_bin diff --git a/mysql-test/main/ctype_collate_implicit_def.result b/mysql-test/main/ctype_collate_implicit_def.result new file mode 100644 index 00000000000..d0dbd5f2064 --- /dev/null +++ b/mysql-test/main/ctype_collate_implicit_def.result @@ -0,0 +1,60 @@ +# +# MDEV-30164 System variable for default collations +# +SELECT @@global.character_set_collations; +@@global.character_set_collations +latin1=latin1_bin,utf8mb3=utf8mb3_uca1400_ai_ci +SELECT @@session.character_set_collations; +@@session.character_set_collations +latin1=latin1_bin,utf8mb3=utf8mb3_uca1400_ai_ci +SELECT COLLATION('literal'); +COLLATION('literal') +latin1_bin +SET NAMES utf8mb3; +SELECT COLLATION('literal'); +COLLATION('literal') +utf8mb3_uca1400_ai_ci +SET @@session.character_set_collations='latin1=latin1_german2_ci'; +SELECT @@session.character_set_collations; +@@session.character_set_collations +latin1=latin1_german2_ci +SET @@session.character_set_collations=DEFAULT; +SELECT @@session.character_set_collations; +@@session.character_set_collations +latin1=latin1_bin,utf8mb3=utf8mb3_uca1400_ai_ci +SET @@global.character_set_collations='utf8mb3=uca1400_as_ci,latin1=latin1_danish_ci'; +connect con1,localhost,root,,; +connection con1; +SELECT @@session.character_set_collations; +@@session.character_set_collations +latin1=latin1_danish_ci,utf8mb3=utf8mb3_uca1400_as_ci +SELECT COLLATION('literal'); +COLLATION('literal') +latin1_danish_ci +disconnect con1; +connection default; +SET @@global.character_set_collations=DEFAULT; +SELECT @@global.character_set_collations; +@@global.character_set_collations + +connect con2,localhost,root,,; +connection con2; +SELECT @@session.character_set_collations; +@@session.character_set_collations + +SELECT COLLATION('literal'); +COLLATION('literal') +latin1_swedish_ci +disconnect con2; +connection default; +SET @@global.character_set_collations='utf8mb3=uca1400_ai_ci,latin1=latin1_bin'; +connect con3,localhost,root,,; +connection con3; +SELECT @@session.character_set_collations; +@@session.character_set_collations +latin1=latin1_bin,utf8mb3=utf8mb3_uca1400_ai_ci +SELECT COLLATION('literal'); +COLLATION('literal') +latin1_bin +disconnect con3; +connection default; diff --git a/mysql-test/main/ctype_collate_implicit_def.test b/mysql-test/main/ctype_collate_implicit_def.test new file mode 100644 index 00000000000..5116702aecb --- /dev/null +++ b/mysql-test/main/ctype_collate_implicit_def.test @@ -0,0 +1,47 @@ +--source include/have_utf8.inc +--source include/have_utf8mb4.inc + +--echo # +--echo # MDEV-30164 System variable for default collations +--echo # + +SELECT @@global.character_set_collations; +SELECT @@session.character_set_collations; +SELECT COLLATION('literal'); +SET NAMES utf8mb3; +SELECT COLLATION('literal'); + +SET @@session.character_set_collations='latin1=latin1_german2_ci'; +SELECT @@session.character_set_collations; + +SET @@session.character_set_collations=DEFAULT; +SELECT @@session.character_set_collations; + +SET @@global.character_set_collations='utf8mb3=uca1400_as_ci,latin1=latin1_danish_ci'; + +--connect (con1,localhost,root,,) +--connection con1 +SELECT @@session.character_set_collations; +SELECT COLLATION('literal'); +--disconnect con1 +--connection default + +SET @@global.character_set_collations=DEFAULT; +SELECT @@global.character_set_collations; + +--connect (con2,localhost,root,,) +--connection con2 +SELECT @@session.character_set_collations; +SELECT COLLATION('literal'); +--disconnect con2 +--connection default + +# Set back to the command line value, to avoid mtr internal check failure. +SET @@global.character_set_collations='utf8mb3=uca1400_ai_ci,latin1=latin1_bin'; + +--connect (con3,localhost,root,,) +--connection con3 +SELECT @@session.character_set_collations; +SELECT COLLATION('literal'); +--disconnect con3 +--connection default diff --git a/mysql-test/main/ctype_utf8.result b/mysql-test/main/ctype_utf8.result index 42ab1decc4e..8fb32d62d04 100644 --- a/mysql-test/main/ctype_utf8.result +++ b/mysql-test/main/ctype_utf8.result @@ -1819,6 +1819,7 @@ SET CHARACTER SET utf8; SHOW VARIABLES LIKE 'character\_set\_%'; Variable_name Value character_set_client utf8mb3 +character_set_collations character_set_connection latin1 character_set_database latin1 character_set_filesystem binary diff --git a/mysql-test/main/ctype_utf8mb4.result b/mysql-test/main/ctype_utf8mb4.result index 26e8784e4f1..00d22c32b94 100644 --- a/mysql-test/main/ctype_utf8mb4.result +++ b/mysql-test/main/ctype_utf8mb4.result @@ -1844,6 +1844,7 @@ SET CHARACTER SET utf8mb4; SHOW VARIABLES LIKE 'character\_set\_%'; Variable_name Value character_set_client utf8mb4 +character_set_collations character_set_connection latin1 character_set_database latin1 character_set_filesystem binary diff --git a/mysql-test/main/ctype_utf8mb4_heap.result b/mysql-test/main/ctype_utf8mb4_heap.result index 657f06366e3..4b23040db9c 100644 --- a/mysql-test/main/ctype_utf8mb4_heap.result +++ b/mysql-test/main/ctype_utf8mb4_heap.result @@ -1676,6 +1676,7 @@ SET CHARACTER SET utf8mb4; SHOW VARIABLES LIKE 'character\_set\_%'; Variable_name Value character_set_client utf8mb4 +character_set_collations character_set_connection latin1 character_set_database latin1 character_set_filesystem binary diff --git a/mysql-test/main/ctype_utf8mb4_innodb.result b/mysql-test/main/ctype_utf8mb4_innodb.result index da6a465647e..0291823ded3 100644 --- a/mysql-test/main/ctype_utf8mb4_innodb.result +++ b/mysql-test/main/ctype_utf8mb4_innodb.result @@ -1802,6 +1802,7 @@ SET CHARACTER SET utf8mb4; SHOW VARIABLES LIKE 'character\_set\_%'; Variable_name Value character_set_client utf8mb4 +character_set_collations character_set_connection latin1 character_set_database latin1 character_set_filesystem binary diff --git a/mysql-test/main/ctype_utf8mb4_myisam.result b/mysql-test/main/ctype_utf8mb4_myisam.result index 7e579e609b0..0ccde4ccac9 100644 --- a/mysql-test/main/ctype_utf8mb4_myisam.result +++ b/mysql-test/main/ctype_utf8mb4_myisam.result @@ -1809,6 +1809,7 @@ SET CHARACTER SET utf8mb4; SHOW VARIABLES LIKE 'character\_set\_%'; Variable_name Value character_set_client utf8mb4 +character_set_collations character_set_connection latin1 character_set_database latin1 character_set_filesystem binary diff --git a/mysql-test/main/mysqlbinlog.result b/mysql-test/main/mysqlbinlog.result index f7c7b2c677e..71d955da15b 100644 --- a/mysql-test/main/mysqlbinlog.result +++ b/mysql-test/main/mysqlbinlog.result @@ -549,6 +549,7 @@ SET @@session.sql_mode=#/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C latin1 *//*!*/; SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=#/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; create table t1 (a varchar(64) character set utf8) diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 5720988df2f..9c0ab88d361 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -144,6 +144,8 @@ The following specify which files/extra groups are read (specified before remain Don't ignore client side character set value sent during handshake. (Defaults to on; use --skip-character-set-client-handshake to disable.) + --character-set-collations=name + Set default collations for character sets. --character-set-filesystem=name Set the filesystem character set. -C, --character-set-server=name @@ -1532,6 +1534,7 @@ binlog-row-metadata NO_LOG binlog-stmt-cache-size 32768 bulk-insert-buffer-size 8388608 character-set-client-handshake TRUE +character-set-collations character-set-filesystem binary character-sets-dir MYSQL_CHARSETSDIR/ chroot (No default value) diff --git a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row.result b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row.result index b793887af0a..82b56eb4ad8 100644 --- a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row.result +++ b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row.result @@ -2320,6 +2320,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c37 NATIONAL CHAR) /*!*/; # at # @@ -2378,6 +2379,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c38 NATIONAL CHAR(0)) /*!*/; # at # @@ -2436,6 +2438,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c39 NATIONAL CHAR(1)) /*!*/; # at # @@ -2494,6 +2497,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c40 NATIONAL CHAR(255)) /*!*/; # at # @@ -2576,6 +2580,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c41 CHAR CHARACTER SET UCS2) /*!*/; # at # @@ -2634,6 +2639,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c42 CHAR(0) CHARACTER SET UCS2) /*!*/; # at # @@ -2692,6 +2698,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c43 CHAR(1) CHARACTER SET UCS2) /*!*/; # at # @@ -2750,6 +2757,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c44 CHAR(255) CHARACTER SET UCS2) /*!*/; # at # @@ -3064,6 +3072,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c49 NATIONAL VARCHAR(0)) /*!*/; # at # @@ -3122,6 +3131,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c50 NATIONAL VARCHAR(1)) /*!*/; # at # @@ -3180,6 +3190,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c51 NATIONAL VARCHAR(255)) /*!*/; # at # @@ -3262,6 +3273,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c52 NATIONAL VARCHAR(261)) /*!*/; # at # @@ -3344,6 +3356,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c53 VARCHAR(0) CHARACTER SET ucs2) /*!*/; # at # @@ -3402,6 +3415,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c54 VARCHAR(1) CHARACTER SET ucs2) /*!*/; # at # @@ -3460,6 +3474,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c55 VARCHAR(255) CHARACTER SET ucs2) /*!*/; # at # @@ -3518,6 +3533,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c56 VARCHAR(261) CHARACTER SET ucs2) /*!*/; # at # @@ -4656,6 +4672,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c67 TINYTEXT CHARACTER SET UCS2) /*!*/; # at # @@ -4714,6 +4731,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c70 TEXT CHARACTER SET UCS2) /*!*/; # at # @@ -4772,6 +4790,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c73 MEDIUMTEXT CHARACTER SET UCS2) /*!*/; # at # @@ -4830,6 +4849,7 @@ DROP TABLE `t1` /* generated by server */ # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t1 (c76 LONGTEXT CHARACTER SET UCS2) /*!*/; # at # diff --git a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_innodb.result b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_innodb.result index 1340337c0bd..c555fe70a74 100644 --- a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_innodb.result +++ b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_innodb.result @@ -2275,6 +2275,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -5284,6 +5285,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -5684,6 +5686,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -5698,6 +5701,7 @@ c_1_n INT -- row number # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t2 ( c_2_1 DATE, c_2_2 VARCHAR(255), @@ -5710,6 +5714,7 @@ c_2_n INT -- row number # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t3 ( c_3_1 DATE, c_3_2 VARCHAR(255), @@ -6322,6 +6327,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( diff --git a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_myisam.result b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_myisam.result index ec145fa4059..0f4c328b2d3 100644 --- a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_myisam.result +++ b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_myisam.result @@ -2273,6 +2273,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -5305,6 +5306,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -5711,6 +5713,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -5725,6 +5728,7 @@ c_1_n INT -- row number # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t2 ( c_2_1 DATE, c_2_2 VARCHAR(255), @@ -5737,6 +5741,7 @@ c_2_n INT -- row number # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t3 ( c_3_1 DATE, c_3_2 VARCHAR(255), @@ -6359,6 +6364,7 @@ SET @@session.sql_mode=0/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=33,@@session.collation_connection=33,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( diff --git a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_trans.result b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_trans.result index 5735b9b804d..1a49ad29ed3 100644 --- a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_trans.result +++ b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_row_trans.result @@ -151,6 +151,7 @@ SET @@session.sql_mode=1411383296/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C latin1 *//*!*/; SET @@session.character_set_client=X,@@session.collation_connection=X,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 ( @@ -164,6 +165,7 @@ c2 VARCHAR(20) # at # #010909 4:46:40 server id 1 end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations=''/*!*/; CREATE TABLE t2 ( c1 INT, c2 VARCHAR(20) diff --git a/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result b/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result index c02912b85d9..de8a9e89992 100644 --- a/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result +++ b/mysql-test/suite/binlog/r/binlog_row_ctype_ucs.result @@ -97,6 +97,7 @@ SET @@session.sql_mode=1411383296/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=X,@@session.collation_connection=X,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8) diff --git a/mysql-test/suite/binlog/r/binlog_stm_ctype_ucs.result b/mysql-test/suite/binlog/r/binlog_stm_ctype_ucs.result index f9dc5d64753..380e6f86fdf 100644 --- a/mysql-test/suite/binlog/r/binlog_stm_ctype_ucs.result +++ b/mysql-test/suite/binlog/r/binlog_stm_ctype_ucs.result @@ -99,6 +99,7 @@ SET @@session.sql_mode=1411383296/*!*/; SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; /*!\C utf8mb3 *//*!*/; SET @@session.character_set_client=X,@@session.collation_connection=X,@@session.collation_server=X/*!*/; +SET @@session.character_set_collations=''/*!*/; SET @@session.lc_time_names=0/*!*/; SET @@session.collation_database=DEFAULT/*!*/; CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8) @@ -126,6 +127,7 @@ START TRANSACTION # at # #YYMMDD HH:MM:SS server id # end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=XXX/*!*/; +SET @@session.character_set_collations=''/*!*/; INSERT INTO t1 VALUES ('ä(i2)') /*!*/; # at # @@ -171,6 +173,7 @@ START TRANSACTION # at # #YYMMDD HH:MM:SS server id # end_log_pos # CRC32 XXX Query thread_id=# exec_time=# error_code=0 xid= SET TIMESTAMP=XXX/*!*/; +SET @@session.character_set_collations=''/*!*/; INSERT INTO t1 VALUES ('ä(p2)') /*!*/; # at # diff --git a/mysql-test/suite/binlog/r/binlog_stm_mysqlbinlog_collate_implicit.result b/mysql-test/suite/binlog/r/binlog_stm_mysqlbinlog_collate_implicit.result new file mode 100644 index 00000000000..0bcb572fb54 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_stm_mysqlbinlog_collate_implicit.result @@ -0,0 +1,193 @@ +RESET MASTER; +SET timestamp=1000000000; +# +# MDEV-30164 System variable for default collations +# +SET character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'; +CREATE TABLE t1 (a VARCHAR(20)); +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4); +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3 COLLATE utf8mb3_bin); +DROP TABLE t1; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3); +INSERT INTO t1 VALUES ('a00'); +INSERT INTO t1 VALUES (_utf8mb3'a01-utf8mb3'); +INSERT INTO t1 VALUES (_utf8mb4'a01-utf8mb4'); +PREPARE stmt FROM 'INSERT INTO t1 VALUES (?)'; +EXECUTE stmt USING _utf8mb3'a02-utf8mb3'; +EXECUTE stmt USING _utf8mb4'a02-utf8mb4'; +EXECUTE stmt USING CONVERT('a03-utf8mb3' USING utf8mb3); +EXECUTE stmt USING CONVERT('a03-utf8mb4' USING utf8mb4); +EXECUTE stmt USING IF(0,CONVERT('a04-utf8mb3' USING utf8mb3),CONVERT('a03-utf8mb4' USING utf8mb4)); +EXECUTE stmt USING IF(1,CONVERT('a04-utf8mb3' USING utf8mb3),CONVERT('a03-utf8mb4' USING utf8mb4)); +DEALLOCATE PREPARE stmt; +DROP TABLE t1; +PREPARE stmt FROM 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4)'; +EXECUTE stmt; +DROP TABLE t1; +DEALLOCATE PREPARE stmt; +PREPARE stmt FROM 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3)'; +EXECUTE stmt; +DROP TABLE t1; +DEALLOCATE PREPARE stmt; +EXECUTE IMMEDIATE 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4)'; +DROP TABLE t1; +EXECUTE IMMEDIATE 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3)'; +DROP TABLE t1; +FLUSH LOGS; + +--- ---- --- +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=1*/; +/*!40019 SET @@session.max_delayed_threads=0*/; +/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/; +DELIMITER /*!*/; +ROLLBACK/*!*/; +use `test`/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.pseudo_thread_id=999999999/*!*/; +SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1, @@session.check_constraint_checks=1, @@session.sql_if_exists=0, @@session.explicit_defaults_for_timestamp=1, @@session.system_versioning_insert_history=0/*!*/; +SET @@session.sql_mode=#/*!*/; +SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/; +/*!\C latin1 *//*!*/; +SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=#/*!*/; +SET @@session.lc_time_names=0/*!*/; +SET @@session.collation_database=DEFAULT/*!*/; +CREATE TABLE t1 (a VARCHAR(20)) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3 COLLATE utf8mb3_bin) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3) +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +INSERT INTO t1 VALUES ('a00') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES (_utf8mb3'a01-utf8mb3') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES (_utf8mb4'a01-utf8mb4') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES ('a02-utf8mb3') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES ('a02-utf8mb4') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES ('a03-utf8mb3') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES ('a03-utf8mb4') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES ('a03-utf8mb4') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +START TRANSACTION +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +INSERT INTO t1 VALUES ('a04-utf8mb3') +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +COMMIT +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +SET @@session.character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'/*!*/; +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3) +/*!*/; +SET TIMESTAMP=1000000000/*!*/; +DROP TABLE `t1` /* generated by server */ +/*!*/; +DELIMITER ; +# End of log file +ROLLBACK /* added by mysqlbinlog */; +/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/; +/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/; diff --git a/mysql-test/suite/binlog/t/binlog_stm_mysqlbinlog_collate_implicit.test b/mysql-test/suite/binlog/t/binlog_stm_mysqlbinlog_collate_implicit.test new file mode 100644 index 00000000000..77dad9deb94 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_stm_mysqlbinlog_collate_implicit.test @@ -0,0 +1,77 @@ +-- source include/have_utf8.inc +-- source include/have_utf8mb4.inc +-- source include/have_ucs2.inc +-- source include/have_binlog_format_statement.inc +-- source include/have_log_bin.inc + +--disable_query_log +CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +--enable_query_log + +RESET MASTER; +SET timestamp=1000000000; + +--echo # +--echo # MDEV-30164 System variable for default collations +--echo # + +SET character_set_collations='utf8mb3=utf8mb3_bin,ucs2=ucs2_bin'; + +CREATE TABLE t1 (a VARCHAR(20)); +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4); +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3 COLLATE utf8mb3_bin); +DROP TABLE t1; + +CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3); + +INSERT INTO t1 VALUES ('a00'); +INSERT INTO t1 VALUES (_utf8mb3'a01-utf8mb3'); +INSERT INTO t1 VALUES (_utf8mb4'a01-utf8mb4'); + +PREPARE stmt FROM 'INSERT INTO t1 VALUES (?)'; +EXECUTE stmt USING _utf8mb3'a02-utf8mb3'; +EXECUTE stmt USING _utf8mb4'a02-utf8mb4'; + +EXECUTE stmt USING CONVERT('a03-utf8mb3' USING utf8mb3); +EXECUTE stmt USING CONVERT('a03-utf8mb4' USING utf8mb4); + +EXECUTE stmt USING IF(0,CONVERT('a04-utf8mb3' USING utf8mb3),CONVERT('a03-utf8mb4' USING utf8mb4)); +EXECUTE stmt USING IF(1,CONVERT('a04-utf8mb3' USING utf8mb3),CONVERT('a03-utf8mb4' USING utf8mb4)); + +DEALLOCATE PREPARE stmt; + +DROP TABLE t1; + +PREPARE stmt FROM 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4)'; +EXECUTE stmt; +DROP TABLE t1; +DEALLOCATE PREPARE stmt; + +PREPARE stmt FROM 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3)'; +EXECUTE stmt; +DROP TABLE t1; +DEALLOCATE PREPARE stmt; + + +EXECUTE IMMEDIATE 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb4)'; +DROP TABLE t1; + +EXECUTE IMMEDIATE 'CREATE TABLE t1 (a VARCHAR(20) CHARACTER SET utf8mb3)'; +DROP TABLE t1; + + +### Starting master-bin.000002 +FLUSH LOGS; + +--disable_query_log +SELECT "--- ---- ---" as ""; +--enable_query_log + +let $MYSQLD_DATADIR= `select @@datadir`; +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/ /@@session.sql_mode=\d+/@@session.sql_mode=#/ /collation_server=\d+/collation_server=#/ +--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ $MYSQLD_DATADIR/master-bin.000001 diff --git a/mysql-test/suite/rpl/r/rpl_ctype_collate_implicit.result b/mysql-test/suite/rpl/r/rpl_ctype_collate_implicit.result new file mode 100644 index 00000000000..0616b22e693 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_ctype_collate_implicit.result @@ -0,0 +1,83 @@ +include/master-slave.inc +[connection master] +# +# MDEV-30164 System variable for default collations +# +connection master; +SET @@character_set_collations='utf8mb3=uca1400_ai_ci,' + 'utf8mb4=uca1400_ai_ci,' + 'ucs2=uca1400_ai_ci,' + 'utf16=uca1400_ai_ci,' + 'utf32=uca1400_ai_ci'; +connection master; +CREATE TABLE t1 AS SELECT CHAR(0x61 USING utf8mb4); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `CHAR(0x61 USING utf8mb4)` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +connection slave; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `CHAR(0x61 USING utf8mb4)` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +connection master; +DROP TABLE t1; +connection slave; +connection master; +CREATE TABLE t1 AS SELECT CONVERT('a' USING utf8mb4); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `CONVERT('a' USING utf8mb4)` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +connection slave; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `CONVERT('a' USING utf8mb4)` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +connection master; +DROP TABLE t1; +connection slave; +connection master; +CREATE TABLE t1 ( +c0 TEXT CHARACTER SET utf8mb3, +c1 TEXT CHARACTER SET utf8mb4, +c2 TEXT CHARACTER SET utf16, +c3 TEXT CHARACTER SET utf32, +c4 TEXT CHARACTER SET ucs2 +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c0` text CHARACTER SET utf8mb3 COLLATE utf8mb3_uca1400_ai_ci DEFAULT NULL, + `c1` text CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL, + `c2` text CHARACTER SET utf16 COLLATE utf16_uca1400_ai_ci DEFAULT NULL, + `c3` text CHARACTER SET utf32 COLLATE utf32_uca1400_ai_ci DEFAULT NULL, + `c4` text CHARACTER SET ucs2 COLLATE ucs2_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +connection slave; +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c0` text CHARACTER SET utf8mb3 COLLATE utf8mb3_uca1400_ai_ci DEFAULT NULL, + `c1` text CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci DEFAULT NULL, + `c2` text CHARACTER SET utf16 COLLATE utf16_uca1400_ai_ci DEFAULT NULL, + `c3` text CHARACTER SET utf32 COLLATE utf32_uca1400_ai_ci DEFAULT NULL, + `c4` text CHARACTER SET ucs2 COLLATE ucs2_uca1400_ai_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +connection master; +DROP TABLE t1; +connection slave; +connection master; +CREATE DATABASE db1 CHARACTER SET utf8mb4; +connection slave; +SHOW CREATE DATABASE db1; +Database Create Database +db1 CREATE DATABASE `db1` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_uca1400_ai_ci */ +connection master; +DROP DATABASE db1; +connection slave; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_ctype_collate_implicit.test b/mysql-test/suite/rpl/t/rpl_ctype_collate_implicit.test new file mode 100644 index 00000000000..ba4e3feda7a --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_ctype_collate_implicit.test @@ -0,0 +1,60 @@ +--source include/have_binlog_format_row.inc +--source include/master-slave.inc + +--echo # +--echo # MDEV-30164 System variable for default collations +--echo # + +--connection master +SET @@character_set_collations='utf8mb3=uca1400_ai_ci,' + 'utf8mb4=uca1400_ai_ci,' + 'ucs2=uca1400_ai_ci,' + 'utf16=uca1400_ai_ci,' + 'utf32=uca1400_ai_ci'; + +--connection master +CREATE TABLE t1 AS SELECT CHAR(0x61 USING utf8mb4); +SHOW CREATE TABLE t1; +--sync_slave_with_master +SHOW CREATE TABLE t1; +--connection master +DROP TABLE t1; +--sync_slave_with_master + + +--connection master +CREATE TABLE t1 AS SELECT CONVERT('a' USING utf8mb4); +SHOW CREATE TABLE t1; +--sync_slave_with_master +SHOW CREATE TABLE t1; +--connection master +DROP TABLE t1; +--sync_slave_with_master + + +--connection master +CREATE TABLE t1 ( + c0 TEXT CHARACTER SET utf8mb3, + c1 TEXT CHARACTER SET utf8mb4, + c2 TEXT CHARACTER SET utf16, + c3 TEXT CHARACTER SET utf32, + c4 TEXT CHARACTER SET ucs2 +); +SHOW CREATE TABLE t1; +--sync_slave_with_master +SHOW CREATE TABLE t1; +--connection master +DROP TABLE t1; +--sync_slave_with_master + + +--connection master +CREATE DATABASE db1 CHARACTER SET utf8mb4; +--sync_slave_with_master +SHOW CREATE DATABASE db1; +--connection master +DROP DATABASE db1; +--sync_slave_with_master + + +--source include/rpl_end.inc diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index f896421559c..d0107933c8c 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -492,6 +492,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT NULL +VARIABLE_NAME CHARACTER_SET_COLLATIONS +VARIABLE_SCOPE SESSION +VARIABLE_TYPE VARCHAR +VARIABLE_COMMENT Default collations for character sets +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT NULL VARIABLE_NAME CHARACTER_SET_CONNECTION VARIABLE_SCOPE SESSION VARIABLE_TYPE ENUM diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 23942418b07..e66bee1985c 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -502,6 +502,16 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT NULL +VARIABLE_NAME CHARACTER_SET_COLLATIONS +VARIABLE_SCOPE SESSION +VARIABLE_TYPE VARCHAR +VARIABLE_COMMENT Default collations for character sets +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT NULL VARIABLE_NAME CHARACTER_SET_CONNECTION VARIABLE_SCOPE SESSION VARIABLE_TYPE ENUM diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 89b0bb21414..6f330e5bfe0 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -163,7 +163,7 @@ SET (SQL_SOURCE semisync.cc semisync_master.cc semisync_slave.cc semisync_master_ack_receiver.cc sql_schema.cc - lex_charset.cc + lex_charset.cc charset_collations.cc sql_type.cc sql_mode.cc sql_type_json.cc sql_type_string.cc sql_type_geom.cc diff --git a/sql/charset_collations.cc b/sql/charset_collations.cc new file mode 100644 index 00000000000..c2c2cc2e7d6 --- /dev/null +++ b/sql/charset_collations.cc @@ -0,0 +1,107 @@ +/* Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "my_global.h" +#include "my_sys.h" +#include "lex_charset.h" +#include "mysqld_error.h" +#include "charset_collations.h" +#include "simple_tokenizer.h" + +bool Charset_collation_map_st::insert_or_replace( + const Lex_exact_charset &charset, + const Lex_extended_collation &collation, + bool error_on_conflicting_duplicate) +{ + Lex_exact_charset_opt_extended_collate res(charset); + Used used; + if (res.merge_collation_override(&used, *this, collation)) + return true; + + if (error_on_conflicting_duplicate) + { + const Elem_st *dup; + if ((dup= find_elem_by_charset_id(charset.charset_info()->number)) && + dup->collation() != res.collation().charset_info()) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "", dup->collation()->coll_name.str, + "", res.collation().charset_info()->coll_name.str); + return true; + } + } + return insert_or_replace(Elem(charset.charset_info(), + res.collation().charset_info())); +} + + +bool Charset_collation_map_st::from_text(const LEX_CSTRING &str, myf utf8_flag) +{ + init(); + Simple_tokenizer stream(str.str, str.length); + + stream.get_spaces(); + if (stream.eof()) + return 0; /* Empty string */ + + for ( ; ; ) + { + LEX_CSTRING charset_name= stream.get_ident(); + if (!charset_name.length) + return true; + stream.get_spaces(); + if (stream.get_char('=')) + return true; + stream.get_spaces(); + LEX_CSTRING collation_name= stream.get_ident(); + if (!collation_name.length) + return true; + + char charset_name_c[MY_CS_CHARACTER_SET_NAME_SIZE + 1/*for '\0'*/]; + strmake(charset_name_c, charset_name.str, charset_name.length); + CHARSET_INFO *cs= get_charset_by_csname(charset_name_c, + MY_CS_PRIMARY, utf8_flag); + if (!cs) + { + my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), charset_name_c); + return true; + } + + char collation_name_c[MY_CS_COLLATION_NAME_SIZE + 1/*for '\0'*/]; + strmake(collation_name_c, collation_name.str, collation_name.length); + + Lex_exact_collation tmpec(&my_charset_bin); + Lex_extended_collation tmp(tmpec); + if (tmp.set_by_name(collation_name_c, utf8_flag)) + return true; + + /* + Don't allow duplicate conflicting declarations within the same string: + SET @@var='utf8mb3=utf8mb3_general_ci,utf8mb3=utf8mb3_bin'; + */ + if (insert_or_replace(Lex_exact_charset(cs), tmp, true/*err on dup*/)) + return true; + + stream.get_spaces(); + if (stream.eof()) + break; + if (stream.ptr()[0] != ',') + return true; + stream.get_char(','); + stream.get_spaces(); + } + return false; +} diff --git a/sql/charset_collations.h b/sql/charset_collations.h new file mode 100644 index 00000000000..6d1a96c4151 --- /dev/null +++ b/sql/charset_collations.h @@ -0,0 +1,265 @@ +/* Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LEX_CHARSET_COLLATIONS_INCLUDED +#define LEX_CHARSET_COLLATIONS_INCLUDED + +struct Charset_collation_map_st +{ +public: + + class Used + { + public: + enum map_used_t + { + USED_NONE= 0, + USED_COMPILED_COLLATION= 1 << 0, + USED_MAPPED_COLLATION= 1 << 1 + }; + protected: + map_used_t m_used; + public: + Used() + :m_used(USED_NONE) + { } + void add(map_used_t flag) + { + m_used= (map_used_t) ((uint) m_used | (uint) flag); + } + }; + + struct Elem_st + { + protected: + CHARSET_INFO *m_charset; + CHARSET_INFO *m_collation; + static size_t print_lex_string(char *dst, const LEX_CSTRING &str) + { + memcpy(dst, str.str, str.length); + return str.length; + } + public: + /* + Size in text format: 'utf8mb4=utf8mb4_unicode_ai_ci' + */ + static constexpr size_t text_size_max() + { + return MY_CS_CHARACTER_SET_NAME_SIZE + 1 + + MY_CS_COLLATION_NAME_SIZE; + } + CHARSET_INFO *charset() const + { + return m_charset; + } + CHARSET_INFO *collation() const + { + return m_collation; + } + void set_collation(CHARSET_INFO *cl) + { + m_collation= cl; + } + size_t print(char *dst) const + { + const char *dst0= dst; + dst+= print_lex_string(dst, m_charset->cs_name); + *dst++= '='; + dst+= print_lex_string(dst, m_collation->coll_name); + return (size_t) (dst - dst0); + } + int cmp_by_charset_id(const Elem_st &rhs) const + { + return m_charset->number < rhs.m_charset->number ? -1 : + m_charset->number > rhs.m_charset->number ? +1 : 0; + } + }; + class Elem: public Elem_st + { + public: + Elem(CHARSET_INFO *charset, CHARSET_INFO *collation) + { + m_charset= charset; + m_collation= collation; + } + }; +protected: + Elem_st m_element[8]; // Should be enough for now + uint m_count; + uint m_version; + + static int cmp_by_charset_id(const void *a, const void *b) + { + return static_cast(a)-> + cmp_by_charset_id(*static_cast(b)); + } + + void sort() + { + qsort(m_element, m_count, sizeof(Elem_st), cmp_by_charset_id); + } + + const Elem_st *find_elem_by_charset_id(uint id) const + { + if (!m_count) + return NULL; + int first= 0, last= ((int) m_count) - 1; + for ( ; first <= last; ) + { + const int middle= (first + last) / 2; + DBUG_ASSERT(middle >= 0); + DBUG_ASSERT(middle < (int) m_count); + const uint middle_id= m_element[middle].charset()->number; + if (middle_id == id) + return &m_element[middle]; + if (middle_id < id) + first= middle + 1; + else + last= middle - 1; + } + return NULL; + } + + bool insert(const Elem_st &elem) + { + DBUG_ASSERT(elem.charset()->state & MY_CS_PRIMARY); + if (m_count >= array_elements(m_element)) + return true; + m_element[m_count]= elem; + m_count++; + sort(); + return false; + } + + bool insert_or_replace(const Elem_st &elem) + { + DBUG_ASSERT(elem.charset()->state & MY_CS_PRIMARY); + const Elem_st *found= find_elem_by_charset_id(elem.charset()->number); + if (found) + { + const_cast(found)->set_collation(elem.collation()); + return false; + } + return insert(elem); + } + +public: + void init() + { + m_count= 0; + m_version= 0; + } + uint count() const + { + return m_count; + } + uint version() const + { + return m_version; + } + void set(const Charset_collation_map_st &rhs, uint version_increment) + { + uint version= m_version; + *this= rhs; + m_version= version + version_increment; + } + const Elem_st & operator[](uint pos) const + { + DBUG_ASSERT(pos < m_count); + return m_element[pos]; + } + bool insert_or_replace(const class Lex_exact_charset &cs, + const class Lex_extended_collation &cl, + bool error_on_conflicting_duplicate); + CHARSET_INFO *get_collation_for_charset(Used *used, + CHARSET_INFO *cs) const + { + DBUG_ASSERT(cs->state & MY_CS_PRIMARY); + const Elem_st *elem= find_elem_by_charset_id(cs->number); + if (elem) + { + used->add(Used::USED_MAPPED_COLLATION); + return elem->collation(); + } + used->add(Used::USED_COMPILED_COLLATION); + return cs; + } + size_t text_format_nbytes_needed() const + { + return (Elem_st::text_size_max() + 1/* for ',' */) * m_count; + } + size_t print(char *dst, size_t nbytes_available) const + { + const char *dst0= dst; + const char *end= dst + nbytes_available; + for (uint i= 0; i < m_count; i++) + { + if (Elem_st::text_size_max() + 1/* for ',' */ > (size_t) (end - dst)) + break; + if (i > 0) + *dst++= ','; + dst+= m_element[i].print(dst); + } + return dst - dst0; + } + static constexpr size_t binary_size_max() + { + return 1/*count*/ + 4 * array_elements(m_element); + } + size_t to_binary(char *dst) const + { + const char *dst0= dst; + *dst++= (char) (uchar) m_count; + for (uint i= 0; i < m_count; i++) + { + int2store(dst, (uint16) m_element[i].charset()->number); + dst+= 2; + int2store(dst, (uint16) m_element[i].collation()->number); + dst+= 2; + } + return (size_t) (dst - dst0); + } + size_t from_binary(const char *src, size_t srclen) + { + const char *src0= src; + init(); + if (!srclen) + return 0; // Empty + uint count= (uchar) *src++; + if (srclen < 1 + 4 * count) + return 0; + for (uint i= 0; i < count; i++, src+= 4) + { + CHARSET_INFO *cs, *cl; + if (!(cs= get_charset(uint2korr(src), MYF(0))) || + !(cl= get_charset(uint2korr(src + 2), MYF(0)))) + { + /* + Unpacking from binary format happens on the slave side. + If for some reasons the slave does not know about a + character set or a collation, just skip the pair here. + This pair might not even be needed. + */ + continue; + } + insert_or_replace(Elem(cs, cl)); + } + return src - src0; + } + bool from_text(const LEX_CSTRING &str, myf utf8_flag); +}; + + +#endif // LEX_CHARSET_COLLATIONS_INCLUDED diff --git a/sql/field.h b/sql/field.h index 13d80099124..48dc8676699 100644 --- a/sql/field.h +++ b/sql/field.h @@ -5340,7 +5340,9 @@ public: - find a _bin collation if the BINARY comparison style was specified, e.g.: CREATE TABLE t1 (a VARCHAR(10) BINARY) CHARSET utf8; */ - bool prepare_charset_for_string(const Column_derived_attributes *dattr); + bool prepare_charset_for_string(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Column_derived_attributes *dattr); /** Prepare a SET/ENUM field. @@ -5497,10 +5499,11 @@ public: bool check_vcol_for_key(THD *thd) const; - void set_charset_collation_attrs(const + void set_charset_collation_attrs(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, const Lex_column_charset_collation_attrs_st &lc) { - charset= lc.charset_info(); + charset= lc.charset_info(used, map); if (lc.is_contextually_typed_collation()) flags|= CONTEXT_COLLATION_FLAG; else diff --git a/sql/handler.h b/sql/handler.h index 77c77c83c0f..6d35d6fd953 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2347,32 +2347,42 @@ struct Table_specification_st: public HA_CREATE_INFO, convert_charset_collation.init(); } - bool add_table_option_convert_charset(CHARSET_INFO *cs) + bool add_table_option_convert_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + CHARSET_INFO *cs) { // cs can be NULL, e.g.: ALTER TABLE t1 CONVERT TO CHARACTER SET DEFAULT; used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET); return cs ? - convert_charset_collation.merge_exact_charset(Lex_exact_charset(cs)) : + convert_charset_collation.merge_exact_charset(used, map, + Lex_exact_charset(cs)) : convert_charset_collation.merge_charset_default(); } - bool add_table_option_convert_collation(const Lex_extended_collation_st &cl) + bool add_table_option_convert_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_extended_collation_st &cl) { used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET); - return convert_charset_collation.merge_collation(cl); + return convert_charset_collation.merge_collation(used, map, cl); } - bool add_table_option_default_charset(CHARSET_INFO *cs) + bool add_table_option_default_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + CHARSET_INFO *cs) { // cs can be NULL, e.g.: CREATE TABLE t1 (..) CHARACTER SET DEFAULT; used_fields|= HA_CREATE_USED_DEFAULT_CHARSET; return cs ? - default_charset_collation.merge_exact_charset(Lex_exact_charset(cs)) : + default_charset_collation.merge_exact_charset(used, map, + Lex_exact_charset(cs)) : default_charset_collation.merge_charset_default(); } - bool add_table_option_default_collation(const Lex_extended_collation_st &cl) + bool add_table_option_default_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_extended_collation_st &cl) { used_fields|= HA_CREATE_USED_DEFAULT_CHARSET; - return default_charset_collation.merge_collation(cl); + return default_charset_collation.merge_collation(used, map, cl); } bool resolve_to_charset_collation_context(THD *thd, diff --git a/sql/item_func.h b/sql/item_func.h index 6e714814526..157eb88406e 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -3784,10 +3784,12 @@ public: } bool set(const Type_handler *handler, const Lex_length_and_dec_st & length_and_dec, + Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, const Lex_column_charset_collation_attrs_st &cscl, CHARSET_INFO *defcs) { - CHARSET_INFO *tmp= cscl.resolved_to_character_set(defcs); + CHARSET_INFO *tmp= cscl.resolved_to_character_set(used, map, defcs); if (!tmp) return true; set(handler, length_and_dec, tmp); diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index b38d7086548..f02f0ba6056 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -3830,7 +3830,10 @@ bool Item_func_set_collation::fix_length_and_dec(THD *thd) if (agg_arg_charsets_for_string_result(collation, args, 1)) return true; Lex_exact_charset_opt_extended_collate cl(collation.collation, true); - if (cl.merge_collation_override(m_set_collation)) + Character_set_collations_used used(thd); + if (cl.merge_collation_override(&used, + thd->variables.character_set_collations, + m_set_collation)) return true; collation.set(cl.collation().charset_info(), DERIVATION_EXPLICIT, args[0]->collation.repertoire); diff --git a/sql/json_table.cc b/sql/json_table.cc index 05ee83bd3d8..c4f03201654 100644 --- a/sql/json_table.cc +++ b/sql/json_table.cc @@ -935,7 +935,11 @@ int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, return set(thd, ctype, path, nullptr); CHARSET_INFO *tmp; - if (!(tmp= cl.resolved_to_character_set(&my_charset_utf8mb4_general_ci))) + Character_set_collations_used used(thd); + if (!(tmp= cl.resolved_to_character_set( + &used, + thd->variables.character_set_collations, + &my_charset_utf8mb4_general_ci))) return 1; return set(thd, ctype, path, tmp); } diff --git a/sql/lex_charset.cc b/sql/lex_charset.cc index cfb74a0bf04..0673901bc44 100644 --- a/sql/lex_charset.cc +++ b/sql/lex_charset.cc @@ -197,7 +197,9 @@ Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1; */ bool Lex_exact_charset_opt_extended_collate:: - merge_context_collation_override(const Lex_context_collation &cl) + merge_context_collation_override(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_context_collation &cl) { DBUG_ASSERT(m_ci); @@ -215,7 +217,7 @@ bool Lex_exact_charset_opt_extended_collate:: // COLLATE DEFAULT if (cl.is_contextually_typed_collate_default()) { - CHARSET_INFO *ci= find_default_collation(); + CHARSET_INFO *ci= find_mapped_default_collation(used, map); DBUG_ASSERT(ci); if (!ci) return true; @@ -238,7 +240,9 @@ bool Lex_exact_charset_opt_extended_collate:: } -bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs) +bool Lex_extended_collation_st::merge_exact_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &cs) { switch (m_type) { case TYPE_EXACT: @@ -250,7 +254,7 @@ bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs) { // COLLATE DEFAULT .. CHARACTER SET latin1 Lex_exact_charset_opt_extended_collate tmp(cs); - if (tmp.merge_context_collation(Lex_context_collation(m_ci))) + if (tmp.merge_context_collation(used, map, Lex_context_collation(m_ci))) return true; *this= Lex_extended_collation(tmp.collation()); return false; @@ -419,7 +423,7 @@ CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const CHARSET_INFO * -Lex_exact_charset_opt_extended_collate::find_default_collation() const +Lex_exact_charset_opt_extended_collate::find_compiled_default_collation() const { // See comments in find_bin_collation() DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4)); @@ -447,6 +451,17 @@ Lex_exact_charset_opt_extended_collate::find_default_collation() const } +CHARSET_INFO * +Lex_exact_charset_opt_extended_collate:: + find_mapped_default_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map) const +{ + CHARSET_INFO *cs= find_compiled_default_collation(); + if (!cs) + return nullptr; + return map.get_collation_for_charset(used, cs); +} + /* Resolve an empty or a contextually typed collation according to the upper level default character set (and optionally a collation), e.g.: @@ -459,7 +474,9 @@ Lex_exact_charset_opt_extended_collate::find_default_collation() const "def" is the upper level CHARACTER SET clause (e.g. of a table) */ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: - resolved_to_character_set(CHARSET_INFO *def) const + resolved_to_character_set(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + CHARSET_INFO *def) const { DBUG_ASSERT(def); @@ -467,6 +484,10 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: case TYPE_EMPTY: return def; case TYPE_CHARACTER_SET: + { + DBUG_ASSERT(m_ci); + return map.get_collation_for_charset(used, m_ci); + } case TYPE_CHARACTER_SET_COLLATE_EXACT: case TYPE_COLLATE_EXACT: DBUG_ASSERT(m_ci); @@ -474,7 +495,7 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: case TYPE_COLLATE_CONTEXTUALLY_TYPED: { Lex_exact_charset_opt_extended_collate tmp(def, true); - if (tmp.merge_context_collation_override(Lex_context_collation(m_ci))) + if (tmp.merge_context_collation_override(used, map, Lex_context_collation(m_ci))) return NULL; return tmp.collation().charset_info(); } @@ -526,7 +547,9 @@ bool Lex_exact_charset_extended_collation_attrs_st:: bool Lex_exact_charset_extended_collation_attrs_st:: - merge_context_collation(const Lex_context_collation &cl) + merge_context_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_context_collation &cl) { switch (m_type) { case TYPE_EMPTY: @@ -540,7 +563,7 @@ bool Lex_exact_charset_extended_collation_attrs_st:: { // CHARACTER SET latin1 .. COLLATE DEFAULT Lex_exact_charset_opt_extended_collate tmp(m_ci, false); - if (tmp.merge_context_collation(cl)) + if (tmp.merge_context_collation(used, map, cl)) return true; *this= Lex_exact_charset_extended_collation_attrs(tmp); return false; @@ -582,24 +605,29 @@ bool Lex_exact_charset_opt_extended_collate:: bool Lex_exact_charset_opt_extended_collate:: - merge_context_collation(const Lex_context_collation &cl) + merge_context_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_context_collation &cl) { // CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT if (m_with_collate) return Lex_exact_collation(m_ci). raise_if_conflicts_with_context_collation(cl, false); - return merge_context_collation_override(cl); + return merge_context_collation_override(used, map, cl); } bool Lex_exact_charset_extended_collation_attrs_st:: - merge_collation(const Lex_extended_collation_st &cl) + merge_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_extended_collation_st &cl) { switch (cl.type()) { case Lex_extended_collation_st::TYPE_EXACT: return merge_exact_collation(Lex_exact_collation(cl.charset_info())); case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: - return merge_context_collation(Lex_context_collation(cl.charset_info())); + return merge_context_collation(used, map, + Lex_context_collation(cl.charset_info())); } DBUG_ASSERT(0); return false; @@ -613,7 +641,9 @@ bool Lex_exact_charset_extended_collation_attrs_st:: @param cs - The "CHARACTER SET exact_charset_name". */ bool Lex_exact_charset_extended_collation_attrs_st:: - merge_exact_charset(const Lex_exact_charset &cs) + merge_exact_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &cs) { DBUG_ASSERT(cs.charset_info()); @@ -643,7 +673,7 @@ bool Lex_exact_charset_extended_collation_attrs_st:: // COLLATE DEFAULT .. CHARACTER SET cs { Lex_exact_charset_opt_extended_collate tmp(cs); - if (tmp.merge_context_collation(Lex_context_collation(m_ci))) + if (tmp.merge_context_collation(used, map, Lex_context_collation(m_ci))) return true; *this= Lex_exact_charset_extended_collation_attrs(tmp); return false; @@ -664,11 +694,14 @@ bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default() bool Lex_extended_charset_extended_collation_attrs_st:: - merge_exact_charset(const Lex_exact_charset &cs) + merge_exact_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &cs) { if (m_charset_order == CHARSET_TYPE_EMPTY) m_charset_order= CHARSET_TYPE_EXACT; - return Lex_exact_charset_extended_collation_attrs_st::merge_exact_charset(cs); + return Lex_exact_charset_extended_collation_attrs_st:: + merge_exact_charset(used, map, cs); } @@ -691,13 +724,16 @@ bool Lex_extended_charset_extended_collation_attrs_st:: CHARSET_INFO * Lex_extended_charset_extended_collation_attrs_st:: - resolved_to_context(const Charset_collation_context &ctx) const + resolved_to_context(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Charset_collation_context &ctx) const { if (Lex_opt_context_charset_st::is_empty()) { // Without CHARACTER SET DEFAULT return Lex_exact_charset_extended_collation_attrs_st:: - resolved_to_character_set(ctx.collate_default().charset_info()); + resolved_to_character_set(used, map, + ctx.collate_default().charset_info()); } // With CHARACTER SET DEFAULT @@ -767,8 +803,9 @@ Lex_extended_charset_extended_collation_attrs_st:: ALTER DATABASE db1 COLLATE DEFAULT CHARACTER SET DEFAULT; */ return Lex_exact_charset_extended_collation_attrs_st:: - resolved_to_character_set(ctx.charset_default(). - collation().charset_info()); + resolved_to_character_set(used, map, + ctx.charset_default(). + collation().charset_info()); } DBUG_ASSERT(0); return NULL; diff --git a/sql/lex_charset.h b/sql/lex_charset.h index 2bbeff8a4a6..07617f03d7e 100644 --- a/sql/lex_charset.h +++ b/sql/lex_charset.h @@ -16,6 +16,7 @@ #ifndef LEX_CHARSET_INCLUDED #define LEX_CHARSET_INCLUDED +#include "charset_collations.h" /* An extention for Charset_loader_mysys, @@ -296,7 +297,9 @@ public: bool set_by_name(const char *name, myf my_flags); // e.g. MY_UTF8_IS_UTF8MB3 bool raise_if_conflicts_with_context_collation(const Lex_context_collation &) const; - bool merge_exact_charset(const Lex_exact_charset &rhs); + bool merge_exact_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &rhs); bool merge_exact_collation(const Lex_exact_collation &rhs); bool merge(const Lex_extended_collation_st &rhs); }; @@ -347,7 +350,10 @@ public: } bool with_collate() const { return m_with_collate; } CHARSET_INFO *find_bin_collation() const; - CHARSET_INFO *find_default_collation() const; + CHARSET_INFO *find_compiled_default_collation() const; + CHARSET_INFO *find_mapped_default_collation( + Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map) const; bool raise_if_charsets_differ(const Lex_exact_charset &cs) const; bool raise_if_not_applicable(const Lex_exact_collation &cl) const; /* @@ -355,18 +361,23 @@ public: So the full syntax looks like: CHARACTER SET cs [COLLATE cl] ... COLLATE cl2 */ - bool merge_collation(const Lex_extended_collation_st &cl) + bool merge_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_extended_collation_st &cl) { switch (cl.type()) { case Lex_extended_collation_st::TYPE_EXACT: return merge_exact_collation(Lex_exact_collation(cl.charset_info())); case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: - return merge_context_collation(Lex_context_collation(cl.charset_info())); + return merge_context_collation(used, map, + Lex_context_collation(cl.charset_info())); } DBUG_ASSERT(0); return false; } - bool merge_collation_override(const Lex_extended_collation_st &cl) + bool merge_collation_override(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_extended_collation_st &cl) { switch (cl.type()) { case Lex_extended_collation_st::TYPE_EXACT: @@ -374,7 +385,7 @@ public: Lex_exact_collation(cl.charset_info())); case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: return merge_context_collation_override( - Lex_context_collation(cl.charset_info())); + used, map, Lex_context_collation(cl.charset_info())); } DBUG_ASSERT(0); return false; @@ -383,8 +394,12 @@ public: Add a context collation: CHARACTER SET cs [COLLATE cl] ... COLLATE DEFAULT */ - bool merge_context_collation(const Lex_context_collation &cl); - bool merge_context_collation_override(const Lex_context_collation &cl); + bool merge_context_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_context_collation &cl); + bool merge_context_collation_override(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_context_collation &cl); /* Add an exact collation: CHARACTER SET cs [COLLATE cl] ... COLLATE latin1_bin @@ -399,7 +414,7 @@ public: { if ((m_ci->state & MY_CS_PRIMARY)) return Lex_exact_charset(m_ci); - return Lex_exact_charset(find_default_collation()); + return Lex_exact_charset(find_compiled_default_collation()); } }; @@ -507,11 +522,13 @@ public: m_ci= cs.charset_info(); m_type= TYPE_CHARACTER_SET; } - bool set_charset_collate_default(const Lex_exact_charset &cs) + bool set_charset_collate_default(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &cs) { CHARSET_INFO *ci; if (!(ci= Lex_exact_charset_opt_extended_collate(cs). - find_default_collation())) + find_mapped_default_collation(used, map))) return true; m_ci= ci; m_type= TYPE_CHARACTER_SET_COLLATE_EXACT; @@ -544,6 +561,21 @@ public: { return m_ci; } + CHARSET_INFO *charset_info(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map) const + { + switch (m_type) + { + case TYPE_CHARACTER_SET: + return map.get_collation_for_charset(used, m_ci); + case TYPE_EMPTY: + case TYPE_CHARACTER_SET_COLLATE_EXACT: + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + case TYPE_COLLATE_EXACT: + break; + } + return m_ci; + } Type type() const { return m_type; @@ -552,7 +584,9 @@ public: { return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED; } - CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const; + CHARSET_INFO *resolved_to_character_set(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + CHARSET_INFO *cs) const; /* Merge the column CHARACTER SET clause to: - an exact collation name @@ -561,6 +595,8 @@ public: "cl" corresponds to the COLLATE clause */ bool merge_column_charset_clause_and_collate_clause( + Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, const Lex_exact_charset_extended_collation_attrs_st &cl) { switch (cl.type()) { @@ -569,7 +605,8 @@ public: case TYPE_COLLATE_EXACT: return merge_exact_collation(Lex_exact_collation(cl.charset_info())); case TYPE_COLLATE_CONTEXTUALLY_TYPED: - return merge_context_collation(Lex_context_collation(cl.charset_info())); + return merge_context_collation(used, map, + Lex_context_collation(cl.charset_info())); case TYPE_CHARACTER_SET: case TYPE_CHARACTER_SET_COLLATE_EXACT: break; @@ -584,6 +621,8 @@ public: in an independent COLLATE clause in a column attribute. */ bool merge_column_collate_clause_and_collate_clause( + Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, const Lex_exact_charset_extended_collation_attrs_st &cl) { DBUG_ASSERT(m_type != TYPE_CHARACTER_SET); @@ -593,7 +632,8 @@ public: case TYPE_COLLATE_EXACT: return merge_exact_collation(Lex_exact_collation(cl.charset_info())); case TYPE_COLLATE_CONTEXTUALLY_TYPED: - return merge_context_collation(Lex_context_collation(cl.charset_info())); + return merge_context_collation(used, map, + Lex_context_collation(cl.charset_info())); case TYPE_CHARACTER_SET: case TYPE_CHARACTER_SET_COLLATE_EXACT: break; @@ -601,10 +641,16 @@ public: DBUG_ASSERT(0); return false; } - bool merge_exact_charset(const Lex_exact_charset &cs); + bool merge_exact_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &cs); bool merge_exact_collation(const Lex_exact_collation &cl); - bool merge_context_collation(const Lex_context_collation &cl); - bool merge_collation(const Lex_extended_collation_st &cl); + bool merge_context_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_context_collation &cl); + bool merge_collation(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_extended_collation_st &cl); }; @@ -713,9 +759,13 @@ public: } bool raise_if_charset_conflicts_with_default( const Lex_exact_charset_opt_extended_collate &def) const; - CHARSET_INFO *resolved_to_context(const Charset_collation_context &ctx) const; + CHARSET_INFO *resolved_to_context(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Charset_collation_context &ctx) const; bool merge_charset_default(); - bool merge_exact_charset(const Lex_exact_charset &cs); + bool merge_exact_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_exact_charset &cs); }; diff --git a/sql/log_event.cc b/sql/log_event.cc index 5e255646528..f9f52a88866 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1379,6 +1379,7 @@ code_name(int code) case Q_HRNOW: return "Q_HRNOW"; case Q_XID: return "XID"; case Q_GTID_FLAGS3: return "Q_GTID_FLAGS3"; + case Q_CHARACTER_SET_COLLATIONS: return "Q_CHARACTER_SET_COLLATIONS"; } sprintf(buf, "CODE#%d", code); return buf; @@ -1424,7 +1425,8 @@ Query_log_event::Query_log_event(const uchar *buf, uint event_len, Log_event_type event_type) :Log_event(buf, description_event), data_buf(0), query(NullS), db(NullS), catalog_len(0), status_vars_len(0), - flags2_inited(0), sql_mode_inited(0), charset_inited(0), flags2(0), + flags2_inited(0), sql_mode_inited(0), charset_inited(0), + character_set_collations({0,0}), flags2(0), auto_increment_increment(1), auto_increment_offset(1), time_zone_len(0), lc_time_names_number(0), charset_database_number(0), table_map_for_update(0), xid(0), master_data_written(0), gtid_flags_extra(0), @@ -1552,6 +1554,17 @@ Query_log_event::Query_log_event(const uchar *buf, uint event_len, pos+= 6; break; } + case Q_CHARACTER_SET_COLLATIONS: + { + const uchar *pos0= pos; + CHECK_SPACE(pos, end, 1); + uint16 count= *pos++; + CHECK_SPACE(pos, end, count * 4); + pos+= count * 4; + character_set_collations= Lex_cstring((const char *) pos0, + (const char *) pos); + break; + } case Q_TIME_ZONE_CODE: { if (get_str_len_and_pointer(&pos, &time_zone_str, &time_zone_len, end)) diff --git a/sql/log_event.h b/sql/log_event.h index 0b1503a5b03..6942e4291b2 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -40,6 +40,7 @@ #include #include #include +#include #ifdef MYSQL_CLIENT #include "sql_const.h" @@ -229,7 +230,8 @@ class String; packet (i.e. a query) sent from client to master; First, an auxiliary log_event status vars estimation: */ -#define MAX_SIZE_LOG_EVENT_STATUS (1 + 4 /* type, flags2 */ + \ +#define MAX_SIZE_LOG_EVENT_STATUS (uint) \ + (1 + 4 /* type, flags2 */ + \ 1 + 8 /* type, sql_mode */ + \ 1 + 1 + 255 /* type, length, catalog */ + \ 1 + 4 /* type, auto_increment */ + \ @@ -241,7 +243,10 @@ class String; 1 + 4 /* type, master_data_written */ + \ 1 + 3 /* type, sec_part of NOW() */ + \ 1 + 16 + 1 + 60/* type, user_len, user, host_len, host */ + \ - 1 + 2 + 8 /* type, flags3, seq_no */) + 1 + 2 + 8 /* type, flags3, seq_no */ + \ + 1 + Charset_collation_map_st::binary_size_max() \ + /* type, map */ \ + ) #define MAX_LOG_EVENT_HEADER ( /* in order of Query_log_event::write */ \ LOG_EVENT_HEADER_LEN + /* write_header */ \ QUERY_HEADER_LEN + /* write_data */ \ @@ -323,6 +328,8 @@ class String; #define Q_XID 129 #define Q_GTID_FLAGS3 130 + +#define Q_CHARACTER_SET_COLLATIONS 131 /* Intvar event post-header */ /* Intvar event data */ @@ -2147,6 +2154,8 @@ public: bool sql_mode_inited; bool charset_inited; + LEX_CSTRING character_set_collations; + uint32 flags2; sql_mode_t sql_mode; ulong auto_increment_increment, auto_increment_offset; diff --git a/sql/log_event_client.cc b/sql/log_event_client.cc index 15d3ae8921b..e41e8aaf173 100644 --- a/sql/log_event_client.cc +++ b/sql/log_event_client.cc @@ -1982,6 +1982,38 @@ bool Query_log_event::print_query_header(IO_CACHE* file, memcpy(print_event_info->charset, charset, 6); print_event_info->charset_inited= 1; } + + if (character_set_collations.length) + { + Charset_collation_map_st map; + size_t length= map.from_binary(character_set_collations.str, + character_set_collations.length); + if (length == character_set_collations.length) + { + Binary_string str; + size_t nbytes= map.text_format_nbytes_needed(); + if (str.alloc(nbytes)) + goto err; + size_t text_length= map.print((char*) str.ptr(), nbytes); + str.length(text_length); + /* + my_b_printf() does not seem to support '%.*s' + so append a \0 terminator. + */ + str.append_char('\0'); + if (my_b_printf(file, "SET @@session.character_set_collations='%s'%s\n", + str.ptr(), print_event_info->delimiter)) + goto err; + } + else + { + if (my_b_printf(file, + "/* SET @@session.character_set_collations='%s' */\n", + "")) + goto err; + } + } + if (time_zone_len) { if (memcmp(print_event_info->time_zone_str, diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 535a2cf93de..7018f6a0d1d 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -1194,6 +1194,14 @@ bool Query_log_event::write() int2store(start+2, auto_increment_offset); start+= 4; } + + if (thd && (thd->used & THD::CHARACTER_SET_COLLATIONS_USED)) + { + *start++= Q_CHARACTER_SET_COLLATIONS; + size_t len= thd->variables.character_set_collations.to_binary((char*)start); + start+= len; + } + if (charset_inited) { *start++= Q_CHARSET_CODE; @@ -1989,6 +1997,16 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, thd->variables.sql_mode= (sql_mode_t) ((thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE) | (sql_mode & ~(sql_mode_t) MODE_NO_DIR_IN_CREATE)); + + size_t cslen= thd->variables.character_set_collations.from_binary( + character_set_collations.str, + character_set_collations.length); + if (cslen != character_set_collations.length) + { + thd->variables.character_set_collations.init(); + goto compare_errors; // QQ: report an error here? + } + if (charset_inited) { rpl_sql_thread_info *sql_info= thd->system_thread_info.rpl_sql_info; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 23f34dd0d84..f62f0dc1515 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -342,6 +342,7 @@ char *enforced_storage_engine=NULL; char *gtid_pos_auto_engines; plugin_ref *opt_gtid_pos_auto_plugins; static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME; +static const char *character_set_collations_str= ""; Thread_cache thread_cache; static bool binlog_format_used= false; LEX_STRING opt_init_connect, opt_init_slave; @@ -4271,6 +4272,18 @@ static int init_common_variables() */ myf utf8_flag= global_system_variables.old_behavior & OLD_MODE_UTF8_IS_UTF8MB3 ? MY_UTF8_IS_UTF8MB3 : 0; + + if (character_set_collations_str[0]) + { + Lex_cstring_strlen str(character_set_collations_str); + if (global_system_variables.character_set_collations. + from_text(str, utf8_flag)) + { + sql_print_error(ER_DEFAULT(ER_WRONG_VALUE_FOR_VAR), + "character_set_collations", character_set_collations_str); + } + } + for (;;) { char *next_character_set_name= strchr(default_character_set_name, ','); @@ -4289,7 +4302,13 @@ static int init_common_variables() return 1; // Eof of the list } else + { + Charset_collation_map_st::Used used; + default_charset_info= global_system_variables.character_set_collations. + get_collation_for_charset(&used, + default_charset_info); break; + } } if (default_collation_name) @@ -6469,6 +6488,9 @@ struct my_option my_long_options[]= {"collation-server", 0, "Set the default collation.", &default_collation_name, &default_collation_name, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, + {"character-set-collations", 0, "Set default collations for character sets.", + &character_set_collations_str, &character_set_collations_str, + 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 }, {"console", OPT_CONSOLE, "Write error output on screen; don't remove the console window on windows.", &opt_console, &opt_console, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, diff --git a/sql/simple_tokenizer.h b/sql/simple_tokenizer.h new file mode 100644 index 00000000000..e0a7990a522 --- /dev/null +++ b/sql/simple_tokenizer.h @@ -0,0 +1,83 @@ +/* Copyright (c) 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef SIMPLE_TOKENIZER_INCLUDED +#define SIMPLE_TOKENIZER_INCLUDED + + +class Simple_tokenizer +{ + const char *m_ptr; + const char *m_end; +public: + Simple_tokenizer(const char *str, size_t length) + :m_ptr(str), m_end(str + length) + { } + const char *ptr() const + { + return m_ptr; + } + bool eof() const + { + return m_ptr >= m_end; + } + void get_spaces() + { + for ( ; !eof(); m_ptr++) + { + if (m_ptr[0] != ' ') + break; + } + } + bool is_ident_start(char ch) const + { + return (ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + ch == '_'; + } + bool is_ident_body(char ch) const + { + return is_ident_start(ch) || + (ch >= '0' && ch <= '9'); + } + bool is_ident_start() const + { + return !eof() && is_ident_start(*m_ptr); + } + bool is_ident_body() const + { + return !eof() && is_ident_body(*m_ptr); + } + LEX_CSTRING get_ident() + { + if (!is_ident_start()) + return {m_ptr,0}; + const char *start= m_ptr++; + for ( ; is_ident_body(); m_ptr++) + { } + LEX_CSTRING res= {start, (size_t) (m_ptr - start)}; + return res; + } + bool get_char(char ch) + { + if (eof() || *m_ptr != ch) + return true; + m_ptr++; + return false; + } +}; + + +#endif // SIMPLE_TOKENIZER_INCLUDED diff --git a/sql/sql_class.h b/sql/sql_class.h index 54a213d8553..57eb31a3542 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -887,6 +887,8 @@ typedef struct system_variables vers_asof_timestamp_t vers_asof_timestamp; ulong vers_alter_history; my_bool binlog_alter_two_phase; + + Charset_collation_map_st character_set_collations; } SV; /** @@ -2925,7 +2927,9 @@ public: typedef uint used_t; enum { RAND_USED=1, TIME_ZONE_USED=2, QUERY_START_SEC_PART_USED=4, - THREAD_SPECIFIC_USED=8 }; + THREAD_SPECIFIC_USED=8, + CHARACTER_SET_COLLATIONS_USED= 16 + }; used_t used; @@ -5620,6 +5624,29 @@ public: }; +class Character_set_collations_used: public Charset_collation_map_st::Used +{ + THD *m_thd; +public: + Character_set_collations_used(THD *thd) + :m_thd(thd) + { } + ~Character_set_collations_used() + { + /* + Mark THD that the collation map was used, + no matter if a compiled or a mapped collation was + found during charset->collation resolution. + Even if the map was empty, we still need to print + SET @@session.character_set_collations=''; + in mariadb-binlog output. + */ + if (m_used) + m_thd->used|= THD::CHARACTER_SET_COLLATIONS_USED; + } +}; + + /* Start a new independent transaction for the THD. The old one is stored in this object and restored when calling diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc index 6b195ac9fe7..f0baeae1b83 100644 --- a/sql/sql_connect.cc +++ b/sql/sql_connect.cc @@ -801,6 +801,9 @@ bool thd_init_client_charset(THD *thd, uint cs_number) cs->cs_name.str); return true; } + Charset_collation_map_st::Used used; + cs= global_system_variables.character_set_collations. + get_collation_for_charset(&used, cs); thd->org_charset= cs; thd->update_charset(cs,cs,cs); } diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 2ddd40568f9..44104c97e84 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -4405,19 +4405,23 @@ public: bool add_alter_list(LEX_CSTRING par_name, Virtual_column_info *expr, bool par_exists); bool add_alter_list(LEX_CSTRING name, LEX_CSTRING new_name, bool exists); - bool add_alter_list_item_convert_to_charset(CHARSET_INFO *cs) + bool add_alter_list_item_convert_to_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + CHARSET_INFO *cs) { - if (create_info.add_table_option_convert_charset(cs)) + if (create_info.add_table_option_convert_charset(used, map, cs)) return true; alter_info.flags|= ALTER_CONVERT_TO; return false; } bool - add_alter_list_item_convert_to_charset(CHARSET_INFO *cs, + add_alter_list_item_convert_to_charset(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + CHARSET_INFO *cs, const Lex_extended_collation_st &cl) { - if (create_info.add_table_option_convert_charset(cs) || - create_info.add_table_option_convert_collation(cl)) + if (create_info.add_table_option_convert_charset(used, map, cs) || + create_info.add_table_option_convert_collation(used, map, cl)) return true; alter_info.flags|= ALTER_CONVERT_TO; return false; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index b5d93e6fd99..0e35beed8f7 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -6049,6 +6049,27 @@ finish: } thd->reset_kill_query(); } + + /* + If a non-default collation (in @@character_set_collations) + was used during the statement, the mysqlbinlog output for + the current statement will contain a sequence like this: + + SET character_set_collations='utf8mb3=utf8mb3_bin'; + INSERT INTO t1 VALUES (_utf8mb3'test'); + COMMIT; + + The statment (INSERT in this example) is already in binlog at this point, and the + and the "SET character_set_collations" is written inside a + Q_CHARACTER_SET_COLLATIONS chunk in its log entry header. + The flag CHARACTER_SET_COLLATIONS_USED is not needed any more. + + Let's suppress the flag to avoid a Q_CHARACTER_SET_COLLATIONS chunk + inside the COMMIT log entry header - it would be useless and would + only waste space in the binary log. + */ + thd->used&= ~THD::CHARACTER_SET_COLLATIONS_USED; + if (unlikely(thd->is_error()) || (thd->variables.option_bits & OPTION_MASTER_SQL_ERROR)) { diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 1e84471eef2..7c1930dcbe9 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -209,6 +209,7 @@ public: inline bool is_sql_prepare() const { return flags & (uint) IS_SQL_PREPARE; } void set_sql_prepare() { flags|= (uint) IS_SQL_PREPARE; } bool prepare(const char *packet, uint packet_length); + bool reprepare(); bool execute_loop(String *expanded_query, bool open_cursor, uchar *packet_arg, uchar *packet_end_arg); @@ -221,6 +222,10 @@ public: /* Destroy this statement */ void deallocate(); bool execute_immediate(const char *query, uint query_length); + uint prepare_time_charset_collation_map_version() const + { + return m_prepare_time_charset_collation_map_version; + } private: /** The memory root to allocate parsed tree elements (instances of Item, @@ -228,13 +233,14 @@ private: */ MEM_ROOT main_mem_root; sql_mode_t m_sql_mode; + THD::used_t m_prepare_time_thd_used_flags; + uint m_prepare_time_charset_collation_map_version; private: bool set_db(const LEX_CSTRING *db); bool set_parameters(String *expanded_query, uchar *packet, uchar *packet_end); bool execute(String *expanded_query, bool open_cursor); void deallocate_immediate(); - bool reprepare(); bool validate_metadata(Prepared_statement *copy); void swap_prepared_statement(Prepared_statement *copy); }; @@ -3538,6 +3544,13 @@ static void mysql_stmt_execute_common(THD *thd, DBUG_VOID_RETURN; } + if (stmt->prepare_time_charset_collation_map_version() != + thd->variables.character_set_collations.version()) + { + if (stmt->reprepare()) + DBUG_VOID_RETURN; + } + /* In case of direct execution application decides how many parameters to send. @@ -3628,6 +3641,13 @@ void mysql_sql_stmt_execute(THD *thd) DBUG_VOID_RETURN; } + if (stmt->prepare_time_charset_collation_map_version() != + thd->variables.character_set_collations.version()) + { + if (stmt->reprepare()) + DBUG_VOID_RETURN; + } + if (stmt->param_count != lex->prepared_stmt.param_count()) { my_error(ER_WRONG_ARGUMENTS, MYF(0), "EXECUTE"); @@ -4125,7 +4145,9 @@ Prepared_statement::Prepared_statement(THD *thd_arg) iterations(0), start_param(0), read_types(0), - m_sql_mode(thd->variables.sql_mode) + m_sql_mode(thd->variables.sql_mode), + m_prepare_time_thd_used_flags(0), + m_prepare_time_charset_collation_map_version(0) { init_sql_alloc(key_memory_prepared_statement_main_mem_root, &main_mem_root, thd_arg->variables.query_alloc_block_size, @@ -4505,6 +4527,9 @@ bool Prepared_statement::prepare(const char *packet, uint packet_len) } // The same format as for triggers to compare hr_prepare_time= my_hrtime(); + m_prepare_time_thd_used_flags= thd->used; + m_prepare_time_charset_collation_map_version= + thd->variables.character_set_collations.version(); DBUG_RETURN(error); } @@ -5060,6 +5085,13 @@ Prepared_statement::swap_prepared_statement(Prepared_statement *copy) /* Ditto */ swap_variables(LEX_CSTRING, db, copy->db); + swap_variables(uint, + m_prepare_time_charset_collation_map_version, + copy->m_prepare_time_charset_collation_map_version); + swap_variables(THD::used_t, + m_prepare_time_thd_used_flags, + copy->m_prepare_time_thd_used_flags); + DBUG_ASSERT(param_count == copy->param_count); DBUG_ASSERT(thd == copy->thd); last_error[0]= '\0'; @@ -5220,6 +5252,13 @@ bool Prepared_statement::execute(String *expanded_query, bool open_cursor) MYSQL_QUERY_EXEC_START(thd->query(), thd->thread_id, thd->get_db(), &thd->security_ctx->priv_user[0], (char *) thd->security_ctx->host_or_ip, 1); + /* + If PREPARE used @@character_set_collations, + then we need to make sure binary log writes + the map in the event header. + */ + thd->used|= m_prepare_time_thd_used_flags & + THD::CHARACTER_SET_COLLATIONS_USED; error= mysql_execute_command(thd, true); MYSQL_QUERY_EXEC_DONE(error); thd->update_server_status(); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index 320f9f4f97c..84617c47b7a 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -6238,7 +6238,10 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond) const char *comment; restore_record(table, s->default_values); table->field[0]->store(&tmp_cs->cs_name, scs); - table->field[1]->store(&tmp_cs->coll_name, scs); + Character_set_collations_used used(thd); + CHARSET_INFO *def_cl= thd->variables.character_set_collations. + get_collation_for_charset(&used, tmp_cs); + table->field[1]->store(&def_cl->coll_name, scs); comment= tmp_cs->comment ? tmp_cs->comment : ""; table->field[2]->store(comment, strlen(comment), scs); table->field[3]->store((longlong) tmp_cs->mbmaxlen, TRUE); @@ -6341,6 +6344,9 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) (tmp_cs->state & MY_CS_HIDDEN) || !(tmp_cs->state & MY_CS_PRIMARY)) continue; + Character_set_collations_used used(thd); + CHARSET_INFO *def_cl= thd->variables.character_set_collations. + get_collation_for_charset(&used, tmp_cs); for (cl= all_charsets; cl < all_charsets + array_elements(all_charsets) ; cl ++) @@ -6381,7 +6387,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) table->field[2]->store((longlong) tmp_cl->number, TRUE); table->field[3]->set_notnull(); // IS_DEFAULT table->field[3]->store( - Show::Yes_or_empty::value(tmp_cl->default_flag()), scs); + Show::Yes_or_empty::value(def_cl == tmp_cl), scs); } table->field[4]->store( Show::Yes_or_empty::value(tmp_cl->compiled_flag()), scs); @@ -6409,6 +6415,9 @@ int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond) if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || !(tmp_cs->state & MY_CS_PRIMARY)) continue; + Character_set_collations_used used(thd); + CHARSET_INFO *def_cl= thd->variables.character_set_collations. + get_collation_for_charset(&used, tmp_cs); for (cl= all_charsets; cl < all_charsets + array_elements(all_charsets) ; cl ++) @@ -6428,7 +6437,7 @@ int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond) table->field[2]->store(full_collation_name, scs); table->field[3]->store(tmp_cl->number); table->field[4]->store( - Show::Yes_or_empty::value(tmp_cl->default_flag()), scs); + Show::Yes_or_empty::value(def_cl == tmp_cl), scs); if (schema_table_store_record(thd, table)) return 1; } diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 6f3f96d573f..924b6de0977 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2200,10 +2200,12 @@ bool check_duplicates_in_interval(const char *set_or_name, Generates an error to the diagnostics area in case of a failure. */ bool Column_definition:: - prepare_charset_for_string(const Column_derived_attributes *dattr) + prepare_charset_for_string(Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Column_derived_attributes *dattr) { CHARSET_INFO *tmp= charset_collation_attrs(). - resolved_to_character_set(dattr->charset()); + resolved_to_character_set(used, map, dattr->charset()); if (!tmp) return true; charset= tmp; @@ -12457,8 +12459,10 @@ bool HA_CREATE_INFO:: { // Make sure we don't do double resolution in direct SQL execution DBUG_ASSERT(!default_table_charset || thd->stmt_arena->is_stmt_execute()); + Character_set_collations_used used(thd); if (!(default_table_charset= - default_cscl.resolved_to_context(ctx))) + default_cscl.resolved_to_context(&used, + thd->variables.character_set_collations, ctx))) return true; } @@ -12469,8 +12473,10 @@ bool HA_CREATE_INFO:: // Make sure we don't do double resolution in direct SQL execution DBUG_ASSERT(!alter_table_convert_to_charset || thd->stmt_arena->is_stmt_execute()); + Character_set_collations_used used(thd); if (!(alter_table_convert_to_charset= - convert_cscl.resolved_to_context(ctx))) + convert_cscl.resolved_to_context(&used, + thd->variables.character_set_collations, ctx))) return true; } return false; diff --git a/sql/sql_type.cc b/sql/sql_type.cc index 3759c0ba02f..156db218f9b 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -2718,7 +2718,10 @@ Type_handler::Column_definition_set_attributes(THD *thd, column_definition_type_t type) const { - def->set_charset_collation_attrs(attr.charset_collation_attrs()); + Character_set_collations_used used(thd); + def->set_charset_collation_attrs(&used, + thd->variables.character_set_collations, + attr.charset_collation_attrs()); def->set_length_and_dec(attr); return false; } @@ -3028,7 +3031,10 @@ bool Type_handler_null:: *derived_attr) const { - def->prepare_charset_for_string(derived_attr); + Character_set_collations_used used(thd); + def->prepare_charset_for_string(&used, + thd->variables.character_set_collations, + derived_attr); def->create_length_to_internal_length_null(); return false; } @@ -3116,7 +3122,11 @@ bool Type_handler_typelib:: *derived_attr) const { - return def->prepare_charset_for_string(derived_attr) || + Character_set_collations_used used(thd); + return def->prepare_charset_for_string(&used, + thd->variables. + character_set_collations, + derived_attr) || def->prepare_stage1_typelib(thd, mem_root, file, table_flags); } @@ -3131,7 +3141,11 @@ bool Type_handler_string_result:: *derived_attr) const { - return def->prepare_charset_for_string(derived_attr) || + Character_set_collations_used used(thd); + return def->prepare_charset_for_string(&used, + thd->variables. + character_set_collations, + derived_attr) || def->prepare_stage1_string(thd, mem_root, file, table_flags); } diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 0fed9186dd2..169a7e1a7ca 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -5536,7 +5536,9 @@ versioning_option: default_charset: opt_default charset opt_equal charset_name_or_default { - if (unlikely(Lex->create_info.add_table_option_default_charset($4))) + Character_set_collations_used used(thd); + if (unlikely(Lex->create_info.add_table_option_default_charset( + &used, thd->variables.character_set_collations, $4))) MYSQL_YYABORT; } ; @@ -5544,8 +5546,10 @@ default_charset: default_collation: opt_default COLLATE_SYM opt_equal collation_name_or_default { + Character_set_collations_used used(thd); Table_specification_st *cinfo= &Lex->create_info; - if (unlikely(cinfo->add_table_option_default_collation($4))) + if (unlikely(cinfo->add_table_option_default_collation( + &used, thd->variables.character_set_collations, $4))) MYSQL_YYABORT; } ; @@ -5797,10 +5801,14 @@ field_type_or_serial: } field_def { + Character_set_collations_used used(thd); auto tmp= $1.charset_collation_attrs(); - if (tmp.merge_column_charset_clause_and_collate_clause($3)) + if (tmp.merge_column_charset_clause_and_collate_clause( + &used, thd->variables.character_set_collations, $3)) MYSQL_YYABORT; - Lex->last_field->set_charset_collation_attrs(tmp); + Lex->last_field->set_charset_collation_attrs( + &used, thd->variables.character_set_collations, + tmp); } | SERIAL_SYM { @@ -5838,7 +5846,9 @@ field_def: | attribute_list compressed_deprecated_column_attribute { $$= $1; } | attribute_list compressed_deprecated_column_attribute attribute_list { - if (($$= $1).merge_column_collate_clause_and_collate_clause($3)) + Character_set_collations_used used(thd); + if (($$= $1).merge_column_collate_clause_and_collate_clause( + &used, thd->variables.character_set_collations, $3)) MYSQL_YYABORT; } | opt_generated_always AS virtual_column_func @@ -6318,8 +6328,10 @@ opt_precision: attribute_list: attribute_list attribute { - if (($$= $1).merge_column_collate_clause_and_collate_clause($2)) - MYSQL_YYABORT; + Character_set_collations_used used(thd); + if (($$= $1).merge_column_collate_clause_and_collate_clause( + &used, thd->variables.character_set_collations, $2)) + MYSQL_YYABORT; } | attribute ; @@ -6546,11 +6558,19 @@ binary: } | charset_or_alias COLLATE_SYM DEFAULT { - $$.set_charset_collate_default(Lex_exact_charset($1)); + Character_set_collations_used used(thd); + $$.set_charset_collate_default( + &used, + thd->variables.character_set_collations, + Lex_exact_charset($1)); } | charset_or_alias COLLATE_SYM collation_name { - if ($3.merge_exact_charset(Lex_exact_charset($1))) + Character_set_collations_used used(thd); + if ($3.merge_exact_charset( + &used, + thd->variables.character_set_collations, + Lex_exact_charset($1))) MYSQL_YYABORT; $$= Lex_exact_charset_extended_collation_attrs($3); } @@ -7630,13 +7650,17 @@ alter_list_item: } | CONVERT_SYM TO_SYM charset charset_name_or_default { - if (Lex->add_alter_list_item_convert_to_charset($4)) + Character_set_collations_used used(thd); + if (Lex->add_alter_list_item_convert_to_charset( + &used, thd->variables.character_set_collations, $4)) MYSQL_YYABORT; } | CONVERT_SYM TO_SYM charset charset_name_or_default COLLATE_SYM collation_name_or_default { - if (Lex->add_alter_list_item_convert_to_charset($4, $6)) + Character_set_collations_used used(thd); + if (Lex->add_alter_list_item_convert_to_charset( + &used, thd->variables.character_set_collations, $4, $6)) MYSQL_YYABORT; } | create_table_options_space_separated @@ -9507,7 +9531,10 @@ temporal_dyncol_type: string_dyncol_type: char opt_binary { - if ($$.set(DYN_COL_STRING, $2, thd->variables.collation_connection)) + Character_set_collations_used used(thd); + if ($$.set(DYN_COL_STRING, &used, + thd->variables.character_set_collations, + $2, thd->variables.collation_connection)) MYSQL_YYABORT; } | nchar @@ -9686,6 +9713,9 @@ column_default_non_parenthesized_expr: } | CONVERT_SYM '(' expr USING charset_name ')' { + Character_set_collations_used used(thd); + $5= thd->variables.character_set_collations. + get_collation_for_charset(&used, $5); $$= new (thd->mem_root) Item_func_conv_charset(thd, $3, $5); if (unlikely($$ == NULL)) MYSQL_YYABORT; @@ -9838,6 +9868,9 @@ function_call_keyword: } | CHAR_SYM '(' expr_list USING charset_name ')' { + Character_set_collations_used used(thd); + $5= thd->variables.character_set_collations. + get_collation_for_charset(&used, $5); $$= new (thd->mem_root) Item_func_char(thd, *$3, $5); if (unlikely($$ == NULL)) MYSQL_YYABORT; @@ -11128,19 +11161,25 @@ cast_type: { $$.set(&type_handler_long_blob, $2, &my_charset_bin); } | CHAR_SYM opt_field_length opt_binary { - if ($$.set(&type_handler_long_blob, $2, $3, + Character_set_collations_used used(thd); + if ($$.set(&type_handler_long_blob, + $2, &used, thd->variables.character_set_collations, $3, thd->variables.collation_connection)) MYSQL_YYABORT; } | VARCHAR field_length opt_binary { - if ($$.set(&type_handler_long_blob, $2, $3, + Character_set_collations_used used(thd); + if ($$.set(&type_handler_long_blob, + $2, &used, thd->variables.character_set_collations, $3, thd->variables.collation_connection)) MYSQL_YYABORT; } | VARCHAR2_ORACLE_SYM field_length opt_binary { - if ($$.set(&type_handler_long_blob, $2, $3, + Character_set_collations_used used(thd); + if ($$.set(&type_handler_long_blob, + $2, &used, thd->variables.character_set_collations, $3, thd->variables.collation_connection)) MYSQL_YYABORT; } @@ -14791,6 +14830,9 @@ text_literal: } | UNDERSCORE_CHARSET TEXT_STRING { + Character_set_collations_used used(thd); + $1= thd->variables.character_set_collations. + get_collation_for_charset(&used, $1); if (unlikely(!($$= thd->make_string_literal_charset($2, $1)))) MYSQL_YYABORT; } @@ -14930,6 +14972,9 @@ literal: Item_string_with_introducer *item_str; LEX_CSTRING tmp; $2->get_value(&tmp); + Character_set_collations_used used(thd); + $1= thd->variables.character_set_collations. + get_collation_for_charset(&used, $1); /* Pass NULL as name. Name will be set in the "select_item" rule and will include the introducer and the original hex/bin notation. @@ -16646,7 +16691,12 @@ option_value_no_option_type: { CHARSET_INFO *def= global_system_variables.character_set_client; Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); - if (Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) + Lex_extended_collation_st cl; + cl.set_collate_default(); + Character_set_collations_used used(thd); + if (tmp.merge_collation(&used, thd->variables. + character_set_collations, cl) || + Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) MYSQL_YYABORT; } | NAMES_SYM charset_name_or_default @@ -16654,7 +16704,9 @@ option_value_no_option_type: { CHARSET_INFO *def= global_system_variables.character_set_client; Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); - if (tmp.merge_collation($4) || + Character_set_collations_used used(thd); + if (tmp.merge_collation(&used, thd->variables. + character_set_collations, $4) || Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) MYSQL_YYABORT; } diff --git a/sql/structs.h b/sql/structs.h index 0a71719376c..6ec498c61d8 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -773,10 +773,13 @@ public: m_ci= cs; Lex_length_and_dec_st::reset(); } - bool set(int type, const Lex_column_charset_collation_attrs_st &collation, + bool set(int type, + Charset_collation_map_st::Used *used, + const Charset_collation_map_st &map, + const Lex_column_charset_collation_attrs_st &collation, CHARSET_INFO *charset) { - CHARSET_INFO *tmp= collation.resolved_to_character_set(charset); + CHARSET_INFO *tmp= collation.resolved_to_character_set(used, map, charset); if (!tmp) return true; set(type, tmp); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 1ed3d61bcf8..c0943cd3048 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -436,6 +436,115 @@ static bool update_auto_increment_increment (sys_var *self, THD *thd, enum_var_t #endif /* WITH_WSREP */ + +class Sys_var_charset_collation_map: public sys_var +{ +public: + Sys_var_charset_collation_map(const char *name_arg, const char *comment, + int flag_args, ptrdiff_t off, size_t size, + CMD_LINE getopt, + enum binlog_status_enum binlog_status_arg) + :sys_var(&all_sys_vars, name_arg, comment, + flag_args, off, getopt.id, getopt.arg_type, + SHOW_CHAR, + DEFAULT(0), nullptr, binlog_status_arg, + nullptr, nullptr, nullptr) + { + option.var_type|= GET_STR; + } + +private: + + static bool charset_collation_map_from_item(Charset_collation_map_st *map, + Item *item, + myf utf8_flag) + { + String *value, buffer; + if (!(value= item->val_str_ascii(&buffer))) + return true; + return map->from_text(value->to_lex_cstring(), utf8_flag); + } + + static const uchar *make_value_ptr(THD *thd, + const Charset_collation_map_st &map) + { + size_t nbytes= map.text_format_nbytes_needed(); + char *buf= (char *) thd->alloc(nbytes); + size_t length= map.print(buf, nbytes); + return (uchar *) thd->strmake(buf, length); + } + + +private: + + bool do_check(THD *thd, set_var *var) override + { + Charset_collation_map_st map; + return charset_collation_map_from_item(&map, var->value, + thd->get_utf8_flag()); + } + + void session_save_default(THD *thd, set_var *var) override + { + thd->variables.character_set_collations.set( + global_system_variables.character_set_collations, 1); + } + + void global_save_default(THD *thd, set_var *var) override + { + global_system_variables.character_set_collations.init(); + } + + bool session_update(THD *thd, set_var *var) override + { + Charset_collation_map_st map; + if (!var->value) + { + session_save_default(thd, var); + return false; + } + if (charset_collation_map_from_item(&map, var->value, thd->get_utf8_flag())) + return true; + thd->variables.character_set_collations.set(map, 1); + return false; + } + + bool global_update(THD *thd, set_var *var) override + { + Charset_collation_map_st map; + if (!var->value) + { + global_save_default(thd, var); + return false; + } + if (charset_collation_map_from_item(&map, var->value, thd->get_utf8_flag())) + return true; + global_system_variables.character_set_collations= map; + return false; + } + + const uchar * + session_value_ptr(THD *thd, const LEX_CSTRING *base) const override + { + return make_value_ptr(thd, thd->variables.character_set_collations); + } + + const uchar * + global_value_ptr(THD *thd, const LEX_CSTRING *base) const override + { + return make_value_ptr(thd, global_system_variables. + character_set_collations); + } +}; + + +static Sys_var_charset_collation_map Sys_character_set_collations( + "character_set_collations", + "Default collations for character sets", + SESSION_VAR(character_set_collations), + NO_CMD_LINE, NOT_IN_BINLOG); + + static Sys_var_double Sys_analyze_sample_percentage( "analyze_sample_percentage", "Percentage of rows from the table ANALYZE TABLE will sample " -- cgit v1.2.1