diff options
author | Alexander Barkov <bar@mariadb.com> | 2022-02-09 21:21:39 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2022-03-22 17:12:15 +0400 |
commit | 0c4c064f98120e179ddfa49a1010d465a07bdc0a (patch) | |
tree | e8b4414484306ff6eb817269d34a07912e1f49df /sql | |
parent | d25b10fede8926f63203dfd2040ec60549d10936 (diff) | |
download | mariadb-git-0c4c064f98120e179ddfa49a1010d465a07bdc0a.tar.gz |
MDEV-27743 Remove Lex::charset
This patch also fixes:
MDEV-27690 Crash on `CHARACTER SET csname COLLATE DEFAULT` in column definition
MDEV-27853 Wrong data type on column `COLLATE DEFAULT` and table `COLLATE some_non_default_collation`
MDEV-28067 Multiple conflicting column COLLATE clauses are not rejected
MDEV-28118 Wrong collation of `CAST(.. AS CHAR COLLATE DEFAULT)`
MDEV-28119 Wrong column collation on MODIFY + CONVERT
Diffstat (limited to 'sql')
-rw-r--r-- | sql/CMakeLists.txt | 1 | ||||
-rw-r--r-- | sql/field.cc | 4 | ||||
-rw-r--r-- | sql/field.h | 19 | ||||
-rw-r--r-- | sql/item_func.h | 27 | ||||
-rw-r--r-- | sql/json_table.cc | 22 | ||||
-rw-r--r-- | sql/json_table.h | 2 | ||||
-rw-r--r-- | sql/lex_charset.cc | 339 | ||||
-rw-r--r-- | sql/lex_charset.h | 199 | ||||
-rw-r--r-- | sql/sql_lex.cc | 51 | ||||
-rw-r--r-- | sql/sql_lex.h | 6 | ||||
-rw-r--r-- | sql/sql_parse.cc | 18 | ||||
-rw-r--r-- | sql/sql_parse.h | 1 | ||||
-rw-r--r-- | sql/sql_table.cc | 24 | ||||
-rw-r--r-- | sql/sql_type.cc | 14 | ||||
-rw-r--r-- | sql/sql_type.h | 4 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 281 | ||||
-rw-r--r-- | sql/structs.h | 75 |
17 files changed, 849 insertions, 238 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index a96ab1dfced..8d83ecb32de 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -162,6 +162,7 @@ SET (SQL_SOURCE semisync.cc semisync_master.cc semisync_slave.cc semisync_master_ack_receiver.cc sql_schema.cc + lex_charset.cc sql_type.cc sql_mode.cc sql_type_json.cc sql_type_string.cc sql_type_geom.cc diff --git a/sql/field.cc b/sql/field.cc index ae82d617ad3..a9f540e2348 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -10410,17 +10410,15 @@ bool Column_definition::prepare_interval_field(MEM_ROOT *mem_root, bool Column_definition::set_attributes(THD *thd, const Lex_field_type_st &def, - CHARSET_INFO *cs, column_definition_type_t type) { DBUG_ASSERT(type_handler() == &type_handler_null); - DBUG_ASSERT(charset == &my_charset_bin || charset == NULL); DBUG_ASSERT(length == 0); DBUG_ASSERT(decimals == 0); set_handler(def.type_handler()); return type_handler()->Column_definition_set_attributes(thd, this, - def, cs, type); + def, type); } diff --git a/sql/field.h b/sql/field.h index 2ed02b37cfd..37f004b6420 100644 --- a/sql/field.h +++ b/sql/field.h @@ -5299,7 +5299,6 @@ public: Column_definition(THD *thd, Field *field, Field *orig_field); bool set_attributes(THD *thd, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type); void create_length_to_internal_length_null() { @@ -5493,6 +5492,24 @@ public: { return compression_method_ptr; } bool check_vcol_for_key(THD *thd) const; + + void set_lex_charset_collation(const Lex_charset_collation_st &lc) + { + charset= lc.charset_collation(); + if (lc.is_contextually_typed_collation()) + flags|= CONTEXT_COLLATION_FLAG; + else + flags&= ~CONTEXT_COLLATION_FLAG; + } + Lex_charset_collation lex_charset_collation() const + { + return Lex_charset_collation( + charset, + !charset ? Lex_charset_collation_st::TYPE_EMPTY : + flags & CONTEXT_COLLATION_FLAG ? + Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED : + Lex_charset_collation_st::TYPE_CHARACTER_SET); + } }; diff --git a/sql/item_func.h b/sql/item_func.h index 99b9a075d2c..23879f0a733 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -3761,26 +3761,41 @@ struct Lex_cast_type_st: public Lex_length_and_dec_st { private: const Type_handler *m_type_handler; + CHARSET_INFO *m_charset; public: - void set(const Type_handler *handler, Lex_length_and_dec_st length_and_dec) + void set(const Type_handler *handler, + Lex_length_and_dec_st length_and_dec, + CHARSET_INFO *cs= NULL) { m_type_handler= handler; + m_charset= cs; Lex_length_and_dec_st::operator=(length_and_dec); } + bool set(const Type_handler *handler, + const Lex_length_and_dec_st & length_and_dec, + const Lex_charset_collation_st &cscl, + CHARSET_INFO *defcs) + { + CHARSET_INFO *tmp= cscl.resolved_to_character_set(defcs); + if (!tmp) + return true; + set(handler, length_and_dec, tmp); + return false; + } void set(const Type_handler *handler) { m_type_handler= handler; + m_charset= NULL; Lex_length_and_dec_st::reset(); } const Type_handler *type_handler() const { return m_type_handler; } - Item *create_typecast_item(THD *thd, Item *item, - CHARSET_INFO *cs= NULL) const + CHARSET_INFO *charset() const { return m_charset; } + Item *create_typecast_item(THD *thd, Item *item) const { return m_type_handler-> - create_typecast_item(thd, item, Type_cast_attributes(*this, cs)); + create_typecast_item(thd, item, Type_cast_attributes(*this, m_charset)); } - Item *create_typecast_item_or_error(THD *thd, Item *item, - CHARSET_INFO *cs= NULL) const; + Item *create_typecast_item_or_error(THD *thd, Item *item) const; }; diff --git a/sql/json_table.cc b/sql/json_table.cc index 4384da18833..cb3787e4f25 100644 --- a/sql/json_table.cc +++ b/sql/json_table.cc @@ -716,7 +716,7 @@ bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table, uint fieldnr= 0; MEM_ROOT *mem_root_save= thd->mem_root; List_iterator_fast<Json_table_column> jc_i(jt->m_columns); - Column_derived_attributes da(NULL); + Column_derived_attributes da(&my_charset_utf8mb4_general_ci); DBUG_ENTER("add_json_table_fields"); thd->mem_root= &table->mem_root; @@ -733,8 +733,6 @@ bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table, executing a prepared statement for the second time. */ sql_f->length= sql_f->char_length; - if (!sql_f->charset) - sql_f->charset= &my_charset_utf8mb4_general_ci; if (sql_f->prepare_stage1(thd, thd->mem_root, table->file, table->file->ha_table_flags(), &da)) @@ -873,6 +871,19 @@ int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, } +int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + const Lex_charset_collation_st &cl) +{ + if (cl.is_empty() || cl.is_contextually_typed_collate_default()) + return set(thd, ctype, path, nullptr); + + CHARSET_INFO *tmp; + if (!(tmp= cl.resolved_to_character_set(&my_charset_utf8mb4_general_ci))) + return 1; + return set(thd, ctype, path, tmp); +} + + static int print_path(String *str, const json_path_t *p) { return str->append('\'') || @@ -915,7 +926,10 @@ int Json_table_column::print(THD *thd, Field **f, String *str) if (str->append(column_type) || ((*f)->has_charset() && m_explicit_cs && (str->append(STRING_WITH_LEN(" CHARSET ")) || - str->append(&m_explicit_cs->cs_name))) || + str->append(&m_explicit_cs->cs_name) || + (!(m_explicit_cs->state & MY_CS_PRIMARY) && + (str->append(STRING_WITH_LEN(" COLLATE ")) || + str->append(&m_explicit_cs->coll_name))))) || str->append(m_column_type == PATH ? &path : &exists_path) || print_path(str, &m_path)) return 1; diff --git a/sql/json_table.h b/sql/json_table.h index 52cdae13e9b..2cadb07961e 100644 --- a/sql/json_table.h +++ b/sql/json_table.h @@ -160,6 +160,8 @@ public: m_column_type= ctype; } int set(THD *thd, enum_type ctype, const LEX_CSTRING &path, CHARSET_INFO *cs); + int set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + const Lex_charset_collation_st &cl); Json_table_column(Create_field *f, Json_table_nested_path *nest) : m_field(f), m_nest(nest), m_explicit_cs(NULL) { diff --git a/sql/lex_charset.cc b/sql/lex_charset.cc new file mode 100644 index 00000000000..c6c9e402731 --- /dev/null +++ b/sql/lex_charset.cc @@ -0,0 +1,339 @@ +/* Copyright (c) 2021, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include "my_global.h" +#include "my_sys.h" +#include "m_ctype.h" +#include "lex_charset.h" +#include "mysqld_error.h" + + +/** find a collation with binary comparison rules +*/ +CHARSET_INFO *Lex_charset_collation_st::find_bin_collation(CHARSET_INFO *cs) +{ + /* + We don't need to handle old_mode=UTF8_IS_UTF8MB3 here, + because "cs" points to a real character set name. + It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8". + No thd->get_utf8_flag() flag passed to get_charset_by_csname(). + */ + DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4)); + /* + CREATE TABLE t1 (a CHAR(10) BINARY) + CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + Nothing to do, we have the binary collation already. + */ + if (cs->state & MY_CS_BINSORT) + return cs; + + // CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4; + if (!(cs= get_charset_by_csname(cs->cs_name.str, MY_CS_BINSORT, MYF(0)))) + { + char tmp[65]; + strxnmov(tmp, sizeof(tmp)-1, cs->cs_name.str, "_bin", NULL); + my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp); + } + return cs; +} + + +CHARSET_INFO *Lex_charset_collation_st::find_default_collation(CHARSET_INFO *cs) +{ + // See comments in find_bin_collation() + DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4)); + /* + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4; + Nothing to do, we have the default collation already. + */ + if (cs->state & MY_CS_PRIMARY) + return cs; + /* + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) + CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + + Don't need to handle old_mode=UTF8_IS_UTF8MB3 here. + See comments in find_bin_collation. + */ + cs= get_charset_by_csname(cs->cs_name.str, MY_CS_PRIMARY, MYF(MY_WME)); + /* + The above should never fail, as we have default collations for + all character sets. + */ + DBUG_ASSERT(cs); + return cs; +} + + +bool Lex_charset_collation_st::set_charset_collate_exact(CHARSET_INFO *cs, + CHARSET_INFO *cl) +{ + DBUG_ASSERT(cs != nullptr && cl != nullptr); + if (!my_charset_same(cl, cs)) + { + my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), + cl->coll_name.str, cs->cs_name.str); + return true; + } + set_collate_exact(cl); + return false; +} + + +/* + Resolve an empty or a contextually typed collation according to the + upper level default character set (and optionally a collation), e.g.: + CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1; + CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1; + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) + CHARACTER SET latin1 COLLATE latin1_bin; + + "this" is the COLLATE clause (e.g. of a column) + "def" is the upper level CHARACTER SET clause (e.g. of a table) +*/ +CHARSET_INFO * +Lex_charset_collation_st::resolved_to_character_set(CHARSET_INFO *def) const +{ + DBUG_ASSERT(def); + + switch (m_type) { + case TYPE_EMPTY: + return def; + case TYPE_CHARACTER_SET: + DBUG_ASSERT(m_ci); + return m_ci; + case TYPE_COLLATE_EXACT: + DBUG_ASSERT(m_ci); + return m_ci; + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + break; + } + + // Contextually typed + DBUG_ASSERT(m_ci); + + if (is_contextually_typed_binary_style()) // CHAR(10) BINARY + return find_bin_collation(def); + + if (is_contextually_typed_collate_default()) // CHAR(10) COLLATE DEFAULT + return find_default_collation(def); + + /* + Non-binary and non-default contextually typed collation. + We don't have such yet - the parser cannot produce this. + But will have soon, e.g. "uca1400_as_ci". + */ + DBUG_ASSERT(0); + return NULL; +} + + +/* + Merge the CHARACTER SET clause to: + - an empty COLLATE clause + - an explicitly typed collation name + - a contextually typed collation + + "this" corresponds to `CHARACTER SET xxx [BINARY]` + "cl" corresponds to the COLLATE clause +*/ +bool +Lex_charset_collation_st:: + merge_charset_clause_and_collate_clause(const Lex_charset_collation_st &cl) +{ + if (cl.is_empty()) // No COLLATE clause + return false; + + switch (m_type) { + case TYPE_EMPTY: + /* + No CHARACTER SET clause + CHAR(10) NOT NULL COLLATE latin1_bin + CHAR(10) NOT NULL COLLATE DEFAULT + */ + *this= cl; + return false; + case TYPE_CHARACTER_SET: + case TYPE_COLLATE_EXACT: + { + Lex_explicit_charset_opt_collate ecs(m_ci, m_type == TYPE_COLLATE_EXACT); + if (ecs.merge_collate_or_error(cl)) + return true; + set_collate_exact(ecs.charset_and_collation()); + return false; + } + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + break; + } + + if (is_contextually_typed_collation()) + { + if (cl.is_contextually_typed_collation()) + { + /* + CONTEXT + CONTEXT: + CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser + CHAR(10) BINARY .. COLLATE uca1400_as_ci - not supported yet + */ + DBUG_ASSERT(0); // Not possible yet + return false; + } + + /* + CONTEXT + EXPLICIT + CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci + CHAR(10) BINARY .. COLLATE latin1_bin + CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin + */ + if (is_contextually_typed_collate_default() && + !(cl.charset_collation()->state & MY_CS_PRIMARY)) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "COLLATE ", "DEFAULT", "COLLATE ", + cl.charset_collation()->coll_name.str); + return true; + } + + if (is_contextually_typed_binary_style() && + !(cl.charset_collation()->state & MY_CS_BINSORT)) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "", "BINARY", "COLLATE ", cl.charset_collation()->coll_name.str); + return true; + } + *this= cl; + return false; + } + + DBUG_ASSERT(0); + return false; +} + + +bool +Lex_explicit_charset_opt_collate:: + merge_collate_or_error(const Lex_charset_collation_st &cl) +{ + DBUG_ASSERT(cl.type() != Lex_charset_collation_st::TYPE_CHARACTER_SET); + + switch (cl.type()) { + case Lex_charset_collation_st::TYPE_EMPTY: + return false; + case Lex_charset_collation_st::TYPE_CHARACTER_SET: + DBUG_ASSERT(0); + return false; + case Lex_charset_collation_st::TYPE_COLLATE_EXACT: + /* + EXPLICIT + EXPLICIT + CHAR(10) CHARACTER SET latin1 .. COLLATE latin1_bin + CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin .. COLLATE latin1_bin + CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin + CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin + CHAR(10) CHARACTER SET latin1 BINARY .. COLLATE latin1_bin + */ + if (m_with_collate && m_ci != cl.charset_collation()) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "COLLATE ", m_ci->coll_name.str, + "COLLATE ", cl.charset_collation()->coll_name.str); + return true; + } + if (!my_charset_same(m_ci, cl.charset_collation())) + { + my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), + cl.charset_collation()->coll_name.str, m_ci->cs_name.str); + return true; + } + m_ci= cl.charset_collation(); + m_with_collate= true; + return false; + + case Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED: + if (cl.is_contextually_typed_collate_default()) + { + /* + SET NAMES latin1 COLLATE DEFAULT; + ALTER TABLE t1 CONVERT TO CHARACTER SET latin1 COLLATE DEFAULT; + */ + CHARSET_INFO *tmp= Lex_charset_collation_st::find_default_collation(m_ci); + if (!tmp) + return true; + m_ci= tmp; + m_with_collate= true; + return false; + } + else + { + /* + EXPLICIT + CONTEXT + CHAR(10) COLLATE latin1_bin .. COLLATE DEFAULT not possible yet + CHAR(10) COLLATE latin1_bin .. COLLATE uca1400_as_ci + */ + + DBUG_ASSERT(0); // Not possible yet + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +/* + This method is used in the "attribute_list" rule to merge two independent + COLLATE clauses (not belonging to a CHARACTER SET clause). +*/ +bool +Lex_charset_collation_st:: + merge_collate_clause_and_collate_clause(const Lex_charset_collation_st &cl) +{ + /* + "BINARY" and "COLLATE DEFAULT" are not possible + in an independent COLLATE clause in a column attribute. + */ + DBUG_ASSERT(!is_contextually_typed_collation()); + DBUG_ASSERT(!cl.is_contextually_typed_collation()); + + if (cl.is_empty()) + return false; + + switch (m_type) { + case TYPE_EMPTY: + *this= cl; + return false; + case TYPE_CHARACTER_SET: + DBUG_ASSERT(0); + return false; + case TYPE_COLLATE_EXACT: + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + break; + } + + /* + Two independent explicit collations: + CHAR(10) NOT NULL COLLATE latin1_bin DEFAULT 'a' COLLATE latin1_bin + Note, we should perhaps eventually disallow double COLLATE clauses. + But for now let's just disallow only conflicting ones. + */ + if (charset_collation() != cl.charset_collation()) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "COLLATE ", charset_collation()->coll_name.str, + "COLLATE ", cl.charset_collation()->coll_name.str); + return true; + } + return false; +} diff --git a/sql/lex_charset.h b/sql/lex_charset.h new file mode 100644 index 00000000000..abbe761df36 --- /dev/null +++ b/sql/lex_charset.h @@ -0,0 +1,199 @@ +/* Copyright (c) 2021, 2022, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LEX_CHARSET_INCLUDED +#define LEX_CHARSET_INCLUDED + +/* + Parse time character set and collation. + + Can be: + + 1. Empty (not specified on the column level): + CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin2; -- (1a) + CREATE TABLE t1 (a CHAR(10)); -- (1b) + + 2. Precisely typed: + CREATE TABLE t1 (a CHAR(10) COLLATE latin1_bin); -- (2a) + CREATE TABLE t1 ( + a CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin); -- (2b) + + 3. Contextually typed: + CREATE TABLE t2 (a CHAR(10) BINARY) CHARACTER SET latin2; -- (3a) + CREATE TABLE t2 (a CHAR(10) BINARY); -- (3b) + CREATE TABLE t2 (a CHAR(10) COLLATE DEFAULT) + CHARACER SET latin2 COLLATE latin2_bin; -- (3c) + + In case of an empty or a contextually typed collation, + it is a subject to later resolution, when the context + character set becomes known in the end of the CREATE statement: + - either after the explicit table level CHARACTER SET, like in (1a,3a,3c) + - or by the inhereted database level CHARACTER SET, like in (1b,3b) + + Resolution happens in Type_handler::Column_definition_prepare_stage1(). +*/ +struct Lex_charset_collation_st +{ +public: + enum Type + { + TYPE_EMPTY= 0, + TYPE_CHARACTER_SET= 1, + TYPE_COLLATE_EXACT= 2, + TYPE_COLLATE_CONTEXTUALLY_TYPED= 3 + }; + +// Number of bits required to store enum Type values + +#define LEX_CHARSET_COLLATION_TYPE_BITS 2 + static_assert(((1<<LEX_CHARSET_COLLATION_TYPE_BITS)-1) >= + TYPE_COLLATE_CONTEXTUALLY_TYPED, + "Lex_charset_collation_st::Type bits check"); + +protected: + CHARSET_INFO *m_ci; + Type m_type; +public: + static CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs); + static CHARSET_INFO *find_default_collation(CHARSET_INFO *cs); +public: + void init() + { + m_ci= NULL; + m_type= TYPE_EMPTY; + } + bool is_empty() const + { + return m_type == TYPE_EMPTY; + } + void set_charset(CHARSET_INFO *cs) + { + DBUG_ASSERT(cs); + m_ci= cs; + m_type= TYPE_CHARACTER_SET; + } + void set_charset_collate_default(CHARSET_INFO *cs) + { + DBUG_ASSERT(cs); + m_ci= cs; + m_type= TYPE_COLLATE_EXACT; + } + bool set_charset_collate_binary(CHARSET_INFO *cs) + { + DBUG_ASSERT(cs); + if (!(cs= find_bin_collation(cs))) + return true; + m_ci= cs; + m_type= TYPE_COLLATE_EXACT; + return false; + } + bool set_charset_collate_exact(CHARSET_INFO *cs, + CHARSET_INFO *cl); + void set_collate_default() + { + m_ci= &my_collation_contextually_typed_default; + m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + void set_contextually_typed_binary_style() + { + m_ci= &my_collation_contextually_typed_binary; + m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + bool is_contextually_typed_collate_default() const + { + return m_ci == &my_collation_contextually_typed_default; + } + bool is_contextually_typed_binary_style() const + { + return m_ci == &my_collation_contextually_typed_binary; + } + void set_collate_exact(CHARSET_INFO *cl) + { + DBUG_ASSERT(cl); + m_ci= cl; + m_type= TYPE_COLLATE_EXACT; + } + CHARSET_INFO *charset_collation() const + { + return m_ci; + } + Type type() const + { + return m_type; + } + bool is_contextually_typed_collation() const + { + return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const; + bool merge_charset_clause_and_collate_clause(const Lex_charset_collation_st &cl); + bool merge_collate_clause_and_collate_clause(const Lex_charset_collation_st &cl); +}; + + +/* + CHARACTER SET cs [COLLATE cl] +*/ +class Lex_explicit_charset_opt_collate +{ + CHARSET_INFO *m_ci; + bool m_with_collate; +public: + Lex_explicit_charset_opt_collate(CHARSET_INFO *ci, bool with_collate) + :m_ci(ci), m_with_collate(with_collate) + { + DBUG_ASSERT(m_ci); + // Item_func_set_collation uses non-default collations in "ci" + //DBUG_ASSERT(m_ci->default_flag() || m_with_collate); + } + /* + Merge to another COLLATE clause. So the full syntax looks like: + CHARACTER SET cs [COLLATE cl] ... COLLATE cl2 + */ + bool merge_collate_or_error(const Lex_charset_collation_st &cl); + bool merge_opt_collate_or_error(const Lex_charset_collation_st &cl) + { + if (cl.is_empty()) + return false; + return merge_collate_or_error(cl); + } + CHARSET_INFO *charset_and_collation() const { return m_ci; } + bool with_collate() const { return m_with_collate; } +}; + + +class Lex_charset_collation: public Lex_charset_collation_st +{ +public: + Lex_charset_collation() + { + init(); + } + Lex_charset_collation(CHARSET_INFO *collation, Type type) + { + DBUG_ASSERT(collation || type == TYPE_EMPTY); + m_ci= collation; + m_type= type; + } + static Lex_charset_collation national(bool bin_mod) + { + return bin_mod ? + Lex_charset_collation(&my_charset_utf8mb3_bin, TYPE_COLLATE_EXACT) : + Lex_charset_collation(&my_charset_utf8mb3_general_ci, TYPE_CHARACTER_SET); + } +}; + + +#endif // LEX_CHARSET_INCLUDED diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 8398b727841..86d4040a9be 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -543,36 +543,10 @@ bool LEX::add_alter_list(LEX_CSTRING name, LEX_CSTRING new_name, bool exists) void LEX::init_last_field(Column_definition *field, - const LEX_CSTRING *field_name, - const CHARSET_INFO *cs) + const LEX_CSTRING *field_name) { last_field= field; - field->field_name= *field_name; - - /* reset LEX fields that are used in Create_field::set_and_check() */ - charset= cs; -} - - -bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin) -{ - /* - if charset is NULL - we're parsing a field declaration. - we cannot call find_bin_collation for a field here, because actual - field charset is determined in get_sql_field_charset() much later. - so we only set a flag. - */ - if (!charset) - { - charset= cs; - last_field->flags|= bin ? BINCMP_FLAG : 0; - return false; - } - - charset= bin ? find_bin_collation(cs ? cs : charset) - : cs ? cs : charset; - return charset == NULL; } @@ -6392,8 +6366,7 @@ sp_variable *LEX::sp_param_init(LEX_CSTRING *name) return NULL; } sp_variable *spvar= spcont->add_variable(thd, name); - init_last_field(&spvar->field_def, name, - thd->variables.collation_database); + init_last_field(&spvar->field_def, name); return spvar; } @@ -6402,8 +6375,7 @@ bool LEX::sp_param_fill_definition(sp_variable *spvar, const Lex_field_type_st &def) { return - last_field->set_attributes(thd, def, charset, - COLUMN_DEFINITION_ROUTINE_PARAM) || + last_field->set_attributes(thd, def, COLUMN_DEFINITION_ROUTINE_PARAM) || sphead->fill_spvar_definition(thd, last_field, &spvar->name); } @@ -6411,8 +6383,7 @@ bool LEX::sp_param_fill_definition(sp_variable *spvar, bool LEX::sf_return_fill_definition(const Lex_field_type_st &def) { return - last_field->set_attributes(thd, def, charset, - COLUMN_DEFINITION_FUNCTION_RETURN) || + last_field->set_attributes(thd, def, COLUMN_DEFINITION_FUNCTION_RETURN) || sphead->fill_field_definition(thd, last_field); } @@ -6492,8 +6463,7 @@ void LEX::sp_variable_declarations_init(THD *thd, int nvars) sphead->reset_lex(thd); spcont->declare_var_boundary(nvars); - thd->lex->init_last_field(&spvar->field_def, &spvar->name, - thd->variables.collation_database); + thd->lex->init_last_field(&spvar->field_def, &spvar->name); } @@ -11465,16 +11435,15 @@ Spvar_definition *LEX::row_field_name(THD *thd, const Lex_ident_sys_st &name) } if (unlikely(!(res= new (thd->mem_root) Spvar_definition()))) return NULL; - init_last_field(res, &name, thd->variables.collation_database); + init_last_field(res, &name); return res; } Item * -Lex_cast_type_st::create_typecast_item_or_error(THD *thd, Item *item, - CHARSET_INFO *cs) const +Lex_cast_type_st::create_typecast_item_or_error(THD *thd, Item *item) const { - Item *tmp= create_typecast_item(thd, item, cs); + Item *tmp= create_typecast_item(thd, item); if (!tmp) { Name name= m_type_handler->name(); @@ -11534,8 +11503,7 @@ bool LEX::set_field_type_udt(Lex_field_type_st *type, const Type_handler *h; if (!(h= Type_handler::handler_by_name_or_error(thd, name))) return true; - type->set(h, attr); - charset= &my_charset_bin; + type->set(h, attr, &my_charset_bin); return false; } @@ -11547,7 +11515,6 @@ bool LEX::set_cast_type_udt(Lex_cast_type_st *type, if (!(h= Type_handler::handler_by_name_or_error(thd, name))) return true; type->set(h); - charset= NULL; return false; } diff --git a/sql/sql_lex.h b/sql/sql_lex.h index e8bac90fe5a..3dfc7845a28 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -3192,8 +3192,6 @@ public: /* Query Plan Footprint of a currently running select */ Explain_query *explain; - // type information - CHARSET_INFO *charset; /* LEX which represents current statement (conventional, SP or PS) @@ -3800,14 +3798,12 @@ public: bool is_analyze, bool *printed_anything); bool restore_set_statement_var(); - void init_last_field(Column_definition *field, const LEX_CSTRING *name, - const CHARSET_INFO *cs); + void init_last_field(Column_definition *field, const LEX_CSTRING *name); bool last_field_generated_always_as_row_start_or_end(Lex_ident *p, const char *type, uint flags); bool last_field_generated_always_as_row_start(); bool last_field_generated_always_as_row_end(); - bool set_bincmp(CHARSET_INFO *cs, bool bin); bool new_sp_instr_stmt(THD *, const LEX_CSTRING &prefix, const LEX_CSTRING &suffix); diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index fe2307a2e91..44902f0f162 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -10463,24 +10463,6 @@ merge_charset_and_collation(CHARSET_INFO *cs, CHARSET_INFO *cl) return cs; } -/** find a collation with binary comparison rules -*/ -CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs) -{ - const char *csname= cs->cs_name.str; - THD *thd= current_thd; - myf utf8_flag= thd->get_utf8_flag(); - - cs= get_charset_by_csname(csname, MY_CS_BINSORT, MYF(utf8_flag)); - if (!cs) - { - char tmp[65]; - strxnmov(tmp, sizeof(tmp)-1, csname, "_bin", NULL); - my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp); - } - return cs; -} - void LEX::mark_first_table_as_inserting() { TABLE_LIST *t= first_select_lex()->table_list.first; diff --git a/sql/sql_parse.h b/sql/sql_parse.h index ebe3fe97114..9e1ec6fabbc 100644 --- a/sql/sql_parse.h +++ b/sql/sql_parse.h @@ -79,7 +79,6 @@ bool check_string_char_length(const LEX_CSTRING *str, uint err_msg, bool no_error); bool check_ident_length(const LEX_CSTRING *ident); CHARSET_INFO* merge_charset_and_collation(CHARSET_INFO *cs, CHARSET_INFO *cl); -CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs); bool check_host_name(LEX_CSTRING *str); bool check_identifier_name(LEX_CSTRING *str, uint max_char_length, uint err_code, const char *param_for_err_msg); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 8b3d1a6aefc..3186cd0b0db 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -2190,12 +2190,27 @@ bool check_duplicates_in_interval(const char *set_or_name, } +/* + Resolves the column collation if: + - it was not typed at all, or + - it was contextually typed + according to the table level character set. + Generates an error to the diagnostics area in case of a failure. +*/ bool Column_definition:: prepare_charset_for_string(const Column_derived_attributes *dattr) { - if (!charset) - charset= dattr->charset(); - return (flags & BINCMP_FLAG) && !(charset= find_bin_collation(charset)); + CHARSET_INFO *tmp= lex_charset_collation(). + resolved_to_character_set(dattr->charset()); + if (!tmp) + return true; + charset= tmp; + /* + Remove the "is contextually typed collation" indicator on success, + for safety. + */ + flags&= ~CONTEXT_COLLATION_FLAG; + return false; } @@ -3959,8 +3974,7 @@ bool Column_definition::prepare_blob_field(THD *thd) bool Column_definition::sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root) { - DBUG_ASSERT(charset); - const Column_derived_attributes dattr(&my_charset_bin); + const Column_derived_attributes dattr(thd->variables.collation_database); return prepare_stage1(thd, mem_root, NULL, HA_CAN_GEOMETRY, &dattr) || prepare_stage2(NULL, HA_CAN_GEOMETRY); } diff --git a/sql/sql_type.cc b/sql/sql_type.cc index 269a4b072d4..69ca474eee1 100644 --- a/sql/sql_type.cc +++ b/sql/sql_type.cc @@ -2713,11 +2713,10 @@ bool Type_handler::Column_definition_set_attributes(THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const { - def->charset= cs; + def->set_lex_charset_collation(attr.lex_charset_collation()); def->set_length_and_dec(attr); return false; } @@ -2746,11 +2745,10 @@ Type_handler_string::Column_definition_set_attributes( THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const { - Type_handler::Column_definition_set_attributes(thd, def, attr, cs, type); + Type_handler::Column_definition_set_attributes(thd, def, attr, type); if (attr.has_explicit_length()) return false; switch (type) { @@ -2778,11 +2776,10 @@ Type_handler_varchar::Column_definition_set_attributes( THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const { - Type_handler::Column_definition_set_attributes(thd, def, attr, cs, type); + Type_handler::Column_definition_set_attributes(thd, def, attr, type); if (attr.has_explicit_length()) return false; switch (type) { @@ -3156,7 +3153,7 @@ bool Type_handler_general_purpose_string:: Change character sets for all varchar/char/text columns, but do not touch varbinary/binary/blob columns. */ - if (defcs != &my_charset_bin) + if (!(def->flags & CONTEXT_COLLATION_FLAG) && defcs != &my_charset_bin) def->charset= bulk_alter_attr->alter_table_convert_to_charset(); return false; }; @@ -4267,10 +4264,9 @@ Type_handler_timestamp_common:: Column_definition_set_attributes(THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const { - Type_handler::Column_definition_set_attributes(thd, def, attr, cs, type); + Type_handler::Column_definition_set_attributes(thd, def, attr, type); if (!opt_explicit_defaults_for_timestamp) def->flags|= NOT_NULL_FLAG; return false; diff --git a/sql/sql_type.h b/sql/sql_type.h index 94ba8f5ffbc..7ff4bc64679 100644 --- a/sql/sql_type.h +++ b/sql/sql_type.h @@ -3924,7 +3924,6 @@ public: virtual bool Column_definition_set_attributes(THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const; // Fix attributes after the parser @@ -6659,7 +6658,6 @@ public: bool Column_definition_set_attributes(THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const override; }; @@ -6912,7 +6910,6 @@ public: bool Column_definition_set_attributes(THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const override; bool Column_definition_fix_attributes(Column_definition *c) const override; @@ -7009,7 +7006,6 @@ public: bool Column_definition_set_attributes(THD *thd, Column_definition *def, const Lex_field_type_st &attr, - CHARSET_INFO *cs, column_definition_type_t type) const override; bool Column_definition_fix_attributes(Column_definition *c) const override; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 3abad020007..be51b4120f5 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -193,14 +193,6 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)() #endif -#define bincmp_collation(X,Y) \ - do \ - { \ - if (unlikely(Lex->set_bincmp(X,Y))) \ - MYSQL_YYABORT; \ - } while(0) - - %} %union { int num; @@ -221,6 +213,7 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)() Lex_length_and_dec_st Lex_length_and_dec; Lex_cast_type_st Lex_cast_type; Lex_field_type_st Lex_field_type; + Lex_charset_collation_st Lex_charset_collation; Lex_dyncol_type_st Lex_dyncol_type; Lex_for_loop_st for_loop; Lex_for_loop_bounds_st for_loop_bounds; @@ -1386,6 +1379,15 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); field_type_misc json_table_field_type +%type <Lex_charset_collation> + binary + opt_binary + opt_binary_and_compression + attribute + attribute_list + field_def + + %type <Lex_dyncol_type> opt_dyncol_type dyncol_type numeric_dyncol_type temporal_dyncol_type string_dyncol_type @@ -1575,8 +1577,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); text_or_password %type <charset> - opt_collate - collate + opt_collate_or_default charset_name charset_or_alias charset_name_or_default @@ -1658,14 +1659,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); delete_limit_clause fields opt_values values no_braces_with_names opt_values_with_names values_with_names procedure_list procedure_list2 procedure_item - field_def handler opt_generated_always + handler opt_generated_always opt_ignore opt_column opt_restrict grant revoke set lock unlock string_list - opt_binary table_lock_list table_lock + table_lock_list table_lock ref_list opt_match_clause opt_on_update_delete use opt_delete_options opt_delete_option varchar nchar nvarchar opt_outer table_list table_name table_alias_ref_list table_alias_ref - attribute attribute_list compressed_deprecated_data_type_attribute compressed_deprecated_column_attribute grant_list @@ -3124,7 +3124,7 @@ optionally_qualified_column_ident: row_field_definition: row_field_name field_type { - Lex->last_field->set_attributes(thd, $2, Lex->charset, + Lex->last_field->set_attributes(thd, $2, COLUMN_DEFINITION_ROUTINE_LOCAL); } ; @@ -3157,7 +3157,7 @@ sp_decl_variable_list: sp_decl_idents_init_vars field_type { - Lex->last_field->set_attributes(thd, $2, Lex->charset, + Lex->last_field->set_attributes(thd, $2, COLUMN_DEFINITION_ROUTINE_LOCAL); } sp_opt_default @@ -5723,7 +5723,7 @@ field_spec: if (unlikely(!f)) MYSQL_YYABORT; - lex->init_last_field(f, &$1, NULL); + lex->init_last_field(f, &$1); $<create_field>$= f; lex->parsing_options.lookup_keywords_after_qualifier= true; } @@ -5751,10 +5751,16 @@ field_spec: field_type_or_serial: qualified_field_type { - Lex->last_field->set_attributes(thd, $1, Lex->charset, + Lex->last_field->set_attributes(thd, $1, COLUMN_DEFINITION_TABLE_FIELD); } field_def + { + Lex_charset_collation tmp= $1.lex_charset_collation(); + if (tmp.merge_charset_clause_and_collate_clause($3)) + MYSQL_YYABORT; + Lex->last_field->set_lex_charset_collation(tmp); + } | SERIAL_SYM { Lex->last_field->set_handler(&type_handler_ulonglong); @@ -5786,25 +5792,34 @@ opt_asrow_attribute_list: ; field_def: - /* empty */ { } + /* empty */ { $$.init(); } | attribute_list - | attribute_list compressed_deprecated_column_attribute + | attribute_list compressed_deprecated_column_attribute { $$= $1; } | attribute_list compressed_deprecated_column_attribute attribute_list + { + if (($$= $1).merge_collate_clause_and_collate_clause($3)) + MYSQL_YYABORT; + } | opt_generated_always AS virtual_column_func { Lex->last_field->vcol_info= $3; Lex->last_field->flags&= ~NOT_NULL_FLAG; // undo automatic NOT NULL for timestamps } vcol_opt_specifier vcol_opt_attribute + { + $$.init(); + } | opt_generated_always AS ROW_SYM START_SYM opt_asrow_attribute { if (Lex->last_field_generated_always_as_row_start()) MYSQL_YYABORT; + $$.init(); } | opt_generated_always AS ROW_SYM END opt_asrow_attribute { if (Lex->last_field_generated_always_as_row_end()) MYSQL_YYABORT; + $$.init(); } ; @@ -6017,49 +6032,46 @@ field_type_numeric: opt_binary_and_compression: - /* empty */ - | binary - | binary compressed_deprecated_data_type_attribute - | compressed opt_binary + /* empty */ { $$.init(); } + | binary { $$= $1; } + | binary compressed_deprecated_data_type_attribute { $$= $1; } + | compressed opt_binary { $$= $2; } ; field_type_string: char opt_field_length opt_binary { - $$.set(&type_handler_string, $2); + $$.set(&type_handler_string, $2, $3); } | nchar opt_field_length opt_bin_mod { - $$.set(&type_handler_string, $2); - bincmp_collation(national_charset_info, $3); + $$.set(&type_handler_string, $2, + Lex_charset_collation::national($3)); } | BINARY opt_field_length { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_string, $2); + $$.set(&type_handler_string, $2, &my_charset_bin); } | varchar opt_field_length opt_binary_and_compression { - $$.set(&type_handler_varchar, $2); + $$.set(&type_handler_varchar, $2, $3); } | VARCHAR2_ORACLE_SYM opt_field_length opt_binary_and_compression { - $$.set(&type_handler_varchar, $2); + $$.set(&type_handler_varchar, $2, $3); } | nvarchar opt_field_length opt_compressed opt_bin_mod { - $$.set(&type_handler_varchar, $2); - bincmp_collation(national_charset_info, $4); + $$.set(&type_handler_varchar, $2, + Lex_charset_collation::national($4)); } | VARBINARY opt_field_length opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_varchar, $2); + $$.set(&type_handler_varchar, $2, &my_charset_bin); } | RAW_ORACLE_SYM opt_field_length opt_compressed { - Lex->charset= &my_charset_bin; - $$.set(&type_handler_varchar, $2); + $$.set(&type_handler_varchar, $2, &my_charset_bin); } ; @@ -6105,65 +6117,57 @@ field_type_temporal: field_type_lob: TINYBLOB opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_tiny_blob); + $$.set(&type_handler_tiny_blob, &my_charset_bin); } | BLOB_MARIADB_SYM opt_field_length opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_blob, $2); + $$.set(&type_handler_blob, $2, &my_charset_bin); } | BLOB_ORACLE_SYM field_length opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_blob, $2); + $$.set(&type_handler_blob, $2, &my_charset_bin); } | BLOB_ORACLE_SYM opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_long_blob); + $$.set(&type_handler_long_blob, &my_charset_bin); } | MEDIUMBLOB opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_medium_blob); + $$.set(&type_handler_medium_blob, &my_charset_bin); } | LONGBLOB opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_long_blob); + $$.set(&type_handler_long_blob, &my_charset_bin); } | LONG_SYM VARBINARY opt_compressed { - Lex->charset=&my_charset_bin; - $$.set(&type_handler_medium_blob); + $$.set(&type_handler_medium_blob, &my_charset_bin); } | LONG_SYM varchar opt_binary_and_compression - { $$.set(&type_handler_medium_blob); } + { $$.set(&type_handler_medium_blob, $3); } | TINYTEXT opt_binary_and_compression - { $$.set(&type_handler_tiny_blob); } + { $$.set(&type_handler_tiny_blob, $2); } | TEXT_SYM opt_field_length opt_binary_and_compression - { $$.set(&type_handler_blob, $2); } + { $$.set(&type_handler_blob, $2, $3); } | MEDIUMTEXT opt_binary_and_compression - { $$.set(&type_handler_medium_blob); } + { $$.set(&type_handler_medium_blob, $2); } | LONGTEXT opt_binary_and_compression - { $$.set(&type_handler_long_blob); } + { $$.set(&type_handler_long_blob, $2); } | CLOB_ORACLE_SYM opt_binary_and_compression - { $$.set(&type_handler_long_blob); } + { $$.set(&type_handler_long_blob, $2); } | LONG_SYM opt_binary_and_compression - { $$.set(&type_handler_medium_blob); } + { $$.set(&type_handler_medium_blob, $2); } | JSON_SYM opt_compressed { - Lex->charset= &my_charset_utf8mb4_bin; - $$.set(&type_handler_long_blob_json); + $$.set(&type_handler_long_blob_json, &my_charset_utf8mb4_bin); } ; field_type_misc: ENUM '(' string_list ')' opt_binary - { $$.set(&type_handler_enum); } + { $$.set(&type_handler_enum, $5); } | SET '(' string_list ')' opt_binary - { $$.set(&type_handler_set); } + { $$.set(&type_handler_set, $5); } ; char: @@ -6272,35 +6276,38 @@ opt_precision: attribute_list: - attribute_list attribute {} + attribute_list attribute + { + if (($$= $1).merge_collate_clause_and_collate_clause($2)) + MYSQL_YYABORT; + } | attribute ; attribute: - NULL_SYM { Lex->last_field->flags&= ~ NOT_NULL_FLAG; } - | DEFAULT column_default_expr { Lex->last_field->default_value= $2; } + NULL_SYM { Lex->last_field->flags&= ~ NOT_NULL_FLAG; $$.init(); } + | DEFAULT column_default_expr { Lex->last_field->default_value= $2; $$.init(); } | ON UPDATE_SYM NOW_SYM opt_default_time_precision { Item *item= new (thd->mem_root) Item_func_now_local(thd, $4); if (unlikely(item == NULL)) MYSQL_YYABORT; Lex->last_field->on_update= item; + $$.init(); } - | AUTO_INC { Lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; } + | AUTO_INC { Lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; $$.init(); } | SERIAL_SYM DEFAULT VALUE_SYM { LEX *lex=Lex; lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG | UNIQUE_KEY_FLAG; lex->alter_info.flags|= ALTER_ADD_INDEX; + $$.init(); } | COLLATE_SYM collation_name { - if (unlikely(Lex->charset && !my_charset_same(Lex->charset,$2))) - my_yyabort_error((ER_COLLATION_CHARSET_MISMATCH, MYF(0), - $2->coll_name.str, Lex->charset->cs_name.str)); - Lex->last_field->charset= $2; + $$.set_collate_exact($2); } - | serial_attribute + | serial_attribute { $$.init(); } ; opt_compression_method: @@ -6444,7 +6451,7 @@ collation_name: } ; -opt_collate: +opt_collate_or_default: /* empty */ { $$=NULL; } | COLLATE_SYM collation_name_or_default { $$=$2; } ; @@ -6469,27 +6476,36 @@ charset_or_alias: } ; -collate: COLLATE_SYM collation_name_or_default { $$= $2; } - ; - opt_binary: - /* empty */ { bincmp_collation(NULL, false); } - | binary {} + /* empty */ { $$.init(); } + | binary ; binary: - BYTE_SYM { bincmp_collation(&my_charset_bin, false); } - | charset_or_alias opt_bin_mod { bincmp_collation($1, $2); } - | BINARY { bincmp_collation(NULL, true); } - | BINARY charset_or_alias { bincmp_collation($2, true); } - | charset_or_alias collate + BYTE_SYM { $$.set_charset(&my_charset_bin); } + | charset_or_alias { $$.set_charset($1); } + | charset_or_alias BINARY { - if (!my_charset_same($2, $1)) - my_yyabort_error((ER_COLLATION_CHARSET_MISMATCH, MYF(0), - $2->coll_name.str, $1->cs_name.str)); - Lex->charset= $2; + if ($$.set_charset_collate_binary($1)) + MYSQL_YYABORT; + } + | BINARY { $$.set_contextually_typed_binary_style(); } + | BINARY charset_or_alias + { + if ($$.set_charset_collate_binary($2)) + MYSQL_YYABORT; + } + | charset_or_alias COLLATE_SYM DEFAULT + { + $$.set_charset_collate_default($1); + } + | charset_or_alias COLLATE_SYM collation_name + { + if ($$.set_charset_collate_exact($1, $3)) + MYSQL_YYABORT; } - | collate { Lex->charset= $1; } + | COLLATE_SYM collation_name { $$.set_collate_exact($2); } + | COLLATE_SYM DEFAULT { $$.set_collate_default(); } ; opt_bin_mod: @@ -7559,7 +7575,8 @@ alter_list_item: lex->alter_info.alter_rename_key_list.push_back(ak); lex->alter_info.flags|= ALTER_RENAME_INDEX; } - | CONVERT_SYM TO_SYM charset charset_name_or_default opt_collate + | CONVERT_SYM TO_SYM charset charset_name_or_default + opt_collate_or_default { if (!$4) { @@ -9407,15 +9424,14 @@ opt_dyncol_type: /* empty */ { $$.set(DYN_COL_NULL); /* automatic type */ - Lex->charset= NULL; } | AS dyncol_type { $$= $2; } ; dyncol_type: - numeric_dyncol_type { $$= $1; Lex->charset= NULL; } - | temporal_dyncol_type { $$= $1; Lex->charset= NULL; } - | string_dyncol_type { $$= $1; } + numeric_dyncol_type + | temporal_dyncol_type + | string_dyncol_type ; numeric_dyncol_type: @@ -9434,23 +9450,20 @@ temporal_dyncol_type: ; string_dyncol_type: - char - { Lex->charset= thd->variables.collation_connection; } - opt_binary + char opt_binary { - $$.set(DYN_COL_STRING); + if ($$.set(DYN_COL_STRING, $2, thd->variables.collation_connection)) + MYSQL_YYABORT; } | nchar { - $$.set(DYN_COL_STRING); - Lex->charset= national_charset_info; + $$.set(DYN_COL_STRING, national_charset_info); } ; dyncall_create_element: expr ',' expr opt_dyncol_type { - LEX *lex= Lex; $$= (DYNCALL_CREATE_DEF *) alloc_root(thd->mem_root, sizeof(DYNCALL_CREATE_DEF)); if (unlikely($$ == NULL)) @@ -9458,7 +9471,7 @@ dyncall_create_element: $$->key= $1; $$->value= $3; $$->type= (DYNAMIC_COLUMN_TYPE)$4.dyncol_type(); - $$->cs= lex->charset; + $$->cs= $4.charset_collation(); if ($4.has_explicit_length()) $$->len= $4.length(); else @@ -9597,8 +9610,7 @@ column_default_non_parenthesized_expr: } | CAST_SYM '(' expr AS cast_type ')' { - if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3, - Lex->charset)))) + if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3)))) MYSQL_YYABORT; } | CASE_SYM when_list_opt_else END @@ -9614,8 +9626,7 @@ column_default_non_parenthesized_expr: } | CONVERT_SYM '(' expr ',' cast_type ')' { - if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3, - Lex->charset)))) + if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3)))) MYSQL_YYABORT; } | CONVERT_SYM '(' expr USING charset_name ')' @@ -10151,9 +10162,8 @@ function_call_nonkeyword: | COLUMN_GET_SYM '(' expr ',' expr AS cast_type ')' { - LEX *lex= Lex; $$= create_func_dyncol_get(thd, $3, $5, $7.type_handler(), - $7, lex->charset); + $7, $7.charset()); if (unlikely($$ == NULL)) MYSQL_YYABORT; } @@ -11054,26 +11064,31 @@ in_sum_expr: cast_type: BINARY opt_field_length - { $$.set(&type_handler_long_blob, $2); Lex->charset= &my_charset_bin; } - | CHAR_SYM opt_field_length - { Lex->charset= thd->variables.collation_connection; } - opt_binary - { $$.set(&type_handler_long_blob, $2); } - | VARCHAR field_length - { Lex->charset= thd->variables.collation_connection; } - opt_binary - { $$.set(&type_handler_long_blob, $2); } - | VARCHAR2_ORACLE_SYM field_length - { Lex->charset= thd->variables.collation_connection; } - opt_binary - { $$.set(&type_handler_long_blob, $2); } + { $$.set(&type_handler_long_blob, $2, &my_charset_bin); } + | CHAR_SYM opt_field_length opt_binary + { + if ($$.set(&type_handler_long_blob, $2, $3, + thd->variables.collation_connection)) + MYSQL_YYABORT; + } + | VARCHAR field_length opt_binary + { + if ($$.set(&type_handler_long_blob, $2, $3, + thd->variables.collation_connection)) + MYSQL_YYABORT; + } + | VARCHAR2_ORACLE_SYM field_length opt_binary + { + if ($$.set(&type_handler_long_blob, $2, $3, + thd->variables.collation_connection)) + MYSQL_YYABORT; + } | NCHAR_SYM opt_field_length { - Lex->charset= national_charset_info; - $$.set(&type_handler_long_blob, $2); + $$.set(&type_handler_long_blob, $2, national_charset_info); } - | cast_type_numeric { $$= $1; Lex->charset= NULL; } - | cast_type_temporal { $$= $1; Lex->charset= NULL; } + | cast_type_numeric { $$= $1; } + | cast_type_temporal { $$= $1; } | IDENT_sys { if (Lex->set_cast_type_udt(&$$, $1)) @@ -11262,7 +11277,7 @@ json_table_column: !lex->json_table->m_cur_json_table_column)) MYSQL_YYABORT; - lex->init_last_field(f, &$1, NULL); + lex->init_last_field(f, &$1); } json_table_column_type { @@ -11293,7 +11308,7 @@ json_table_column_type: { Lex_field_type_st type; type.set(&type_handler_slong); - Lex->last_field->set_attributes(thd, type, Lex->charset, + Lex->last_field->set_attributes(thd, type, COLUMN_DEFINITION_TABLE_FIELD); Lex->json_table->m_cur_json_table_column-> set(Json_table_column::FOR_ORDINALITY); @@ -11301,20 +11316,23 @@ json_table_column_type: | json_table_field_type PATH_SYM json_text_literal json_opt_on_empty_or_error { - Lex->last_field->set_attributes(thd, $1, Lex->charset, + Lex->last_field->set_attributes(thd, $1, COLUMN_DEFINITION_TABLE_FIELD); if (Lex->json_table->m_cur_json_table_column-> - set(thd, Json_table_column::PATH, $3, Lex->charset)) + set(thd, Json_table_column::PATH, $3, + $1.lex_charset_collation())) { MYSQL_YYABORT; } } | json_table_field_type EXISTS PATH_SYM json_text_literal { - Lex->last_field->set_attributes(thd, $1, Lex->charset, + Lex->last_field->set_attributes(thd, $1, COLUMN_DEFINITION_TABLE_FIELD); - Lex->json_table->m_cur_json_table_column-> - set(thd, Json_table_column::EXISTS_PATH, $4, Lex->charset); + if (Lex->json_table->m_cur_json_table_column-> + set(thd, Json_table_column::EXISTS_PATH, $4, + $1.lex_charset_collation())) + MYSQL_YYABORT; } ; @@ -16484,7 +16502,7 @@ option_value_no_option_type: thd->parse_error(); MYSQL_YYABORT; } - | NAMES_SYM charset_name_or_default opt_collate + | NAMES_SYM charset_name_or_default opt_collate_or_default { if (sp_create_assignment_lex(thd, $1.pos())) MYSQL_YYABORT; @@ -17716,8 +17734,7 @@ sf_return_type: { LEX *lex= Lex; lex->init_last_field(&lex->sphead->m_return_field_def, - &empty_clex_str, - thd->variables.collation_database); + &empty_clex_str); } field_type { diff --git a/sql/structs.h b/sql/structs.h index d5c363cdd25..ff6d9b70eb0 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -28,6 +28,7 @@ #include "my_base.h" /* ha_rows, ha_key_alg */ #include <mysql_com.h> /* USERNAME_LENGTH */ #include "sql_bitmap.h" +#include "lex_charset.h" struct TABLE; class Type_handler; @@ -601,18 +602,24 @@ public: struct Lex_length_and_dec_st { -private: +protected: uint32 m_length; uint8 m_dec; + uint8 m_collation_type:LEX_CHARSET_COLLATION_TYPE_BITS; bool m_has_explicit_length:1; bool m_has_explicit_dec:1; bool m_length_overflowed:1; bool m_dec_overflowed:1; + + static_assert(LEX_CHARSET_COLLATION_TYPE_BITS <= 8, + "Lex_length_and_dec_st::m_collation_type bits check"); + public: void reset() { m_length= 0; m_dec= 0; + m_collation_type= 0; m_has_explicit_length= false; m_has_explicit_dec= false; m_length_overflowed= false; @@ -622,6 +629,7 @@ public: { m_length= length; m_dec= 0; + m_collation_type= 0; m_has_explicit_length= true; m_has_explicit_dec= false; m_length_overflowed= false; @@ -631,6 +639,7 @@ public: { m_length= 0; m_dec= dec; + m_collation_type= 0; m_has_explicit_length= false; m_has_explicit_dec= true; m_length_overflowed= false; @@ -640,6 +649,7 @@ public: { m_length= length; m_dec= dec; + m_collation_type= 0; m_has_explicit_length= true; m_has_explicit_dec= true; m_length_overflowed= false; @@ -677,11 +687,37 @@ struct Lex_field_type_st: public Lex_length_and_dec_st { private: const Type_handler *m_handler; + CHARSET_INFO *m_ci; public: - void set(const Type_handler *handler, Lex_length_and_dec_st length_and_dec) + void set(const Type_handler *handler, + Lex_length_and_dec_st length_and_dec, + CHARSET_INFO *cs= NULL) + { + m_handler= handler; + m_ci= cs; + Lex_length_and_dec_st::operator=(length_and_dec); + } + void set(const Type_handler *handler, + const Lex_length_and_dec_st &length_and_dec, + const Lex_charset_collation_st &coll) { m_handler= handler; + m_ci= coll.charset_collation(); Lex_length_and_dec_st::operator=(length_and_dec); + m_collation_type= ((uint8) coll.type()) & 0x3; + } + void set(const Type_handler *handler, const Lex_charset_collation_st &coll) + { + m_handler= handler; + m_ci= coll.charset_collation(); + Lex_length_and_dec_st::reset(); + m_collation_type= ((uint8) coll.type()) & 0x3; + } + void set(const Type_handler *handler, CHARSET_INFO *cs= NULL) + { + m_handler= handler; + m_ci= cs; + Lex_length_and_dec_st::reset(); } void set_handler_length_flags(const Type_handler *handler, const Lex_length_and_dec_st &length, @@ -689,18 +725,21 @@ public: void set_handler_length(const Type_handler *handler, uint32 length) { m_handler= handler; + m_ci= NULL; Lex_length_and_dec_st::set_length_only(length); } - void set(const Type_handler *handler) - { - m_handler= handler; - Lex_length_and_dec_st::reset(); - } void set_handler(const Type_handler *handler) { m_handler= handler; } const Type_handler *type_handler() const { return m_handler; } + CHARSET_INFO *charset_collation() const { return m_ci; } + Lex_charset_collation lex_charset_collation() const + { + return Lex_charset_collation(m_ci, + (Lex_charset_collation_st::Type) + m_collation_type); + } }; @@ -708,18 +747,38 @@ struct Lex_dyncol_type_st: public Lex_length_and_dec_st { private: int m_type; // enum_dynamic_column_type is not visible here, so use int + CHARSET_INFO *m_ci; public: - void set(int type, Lex_length_and_dec_st length_and_dec) + void set(int type, Lex_length_and_dec_st length_and_dec, + CHARSET_INFO *cs= NULL) { m_type= type; + m_ci= cs; Lex_length_and_dec_st::operator=(length_and_dec); } void set(int type) { m_type= type; + m_ci= NULL; Lex_length_and_dec_st::reset(); } + void set(int type, CHARSET_INFO *cs) + { + m_type= type; + m_ci= cs; + Lex_length_and_dec_st::reset(); + } + bool set(int type, const Lex_charset_collation_st &collation, + CHARSET_INFO *charset) + { + CHARSET_INFO *tmp= collation.resolved_to_character_set(charset); + if (!tmp) + return true; + set(type, tmp); + return false; + } int dyncol_type() const { return m_type; } + CHARSET_INFO *charset_collation() const { return m_ci; } }; |