summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2022-02-09 21:21:39 +0400
committerAlexander Barkov <bar@mariadb.com>2022-03-22 17:12:15 +0400
commit0c4c064f98120e179ddfa49a1010d465a07bdc0a (patch)
treee8b4414484306ff6eb817269d34a07912e1f49df /sql
parentd25b10fede8926f63203dfd2040ec60549d10936 (diff)
downloadmariadb-git-0c4c064f98120e179ddfa49a1010d465a07bdc0a.tar.gz
MDEV-27743 Remove Lex::charset
This patch also fixes: MDEV-27690 Crash on `CHARACTER SET csname COLLATE DEFAULT` in column definition MDEV-27853 Wrong data type on column `COLLATE DEFAULT` and table `COLLATE some_non_default_collation` MDEV-28067 Multiple conflicting column COLLATE clauses are not rejected MDEV-28118 Wrong collation of `CAST(.. AS CHAR COLLATE DEFAULT)` MDEV-28119 Wrong column collation on MODIFY + CONVERT
Diffstat (limited to 'sql')
-rw-r--r--sql/CMakeLists.txt1
-rw-r--r--sql/field.cc4
-rw-r--r--sql/field.h19
-rw-r--r--sql/item_func.h27
-rw-r--r--sql/json_table.cc22
-rw-r--r--sql/json_table.h2
-rw-r--r--sql/lex_charset.cc339
-rw-r--r--sql/lex_charset.h199
-rw-r--r--sql/sql_lex.cc51
-rw-r--r--sql/sql_lex.h6
-rw-r--r--sql/sql_parse.cc18
-rw-r--r--sql/sql_parse.h1
-rw-r--r--sql/sql_table.cc24
-rw-r--r--sql/sql_type.cc14
-rw-r--r--sql/sql_type.h4
-rw-r--r--sql/sql_yacc.yy281
-rw-r--r--sql/structs.h75
17 files changed, 849 insertions, 238 deletions
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index a96ab1dfced..8d83ecb32de 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -162,6 +162,7 @@ SET (SQL_SOURCE
semisync.cc semisync_master.cc semisync_slave.cc
semisync_master_ack_receiver.cc
sql_schema.cc
+ lex_charset.cc
sql_type.cc sql_mode.cc sql_type_json.cc
sql_type_string.cc
sql_type_geom.cc
diff --git a/sql/field.cc b/sql/field.cc
index ae82d617ad3..a9f540e2348 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -10410,17 +10410,15 @@ bool Column_definition::prepare_interval_field(MEM_ROOT *mem_root,
bool Column_definition::set_attributes(THD *thd,
const Lex_field_type_st &def,
- CHARSET_INFO *cs,
column_definition_type_t type)
{
DBUG_ASSERT(type_handler() == &type_handler_null);
- DBUG_ASSERT(charset == &my_charset_bin || charset == NULL);
DBUG_ASSERT(length == 0);
DBUG_ASSERT(decimals == 0);
set_handler(def.type_handler());
return type_handler()->Column_definition_set_attributes(thd, this,
- def, cs, type);
+ def, type);
}
diff --git a/sql/field.h b/sql/field.h
index 2ed02b37cfd..37f004b6420 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -5299,7 +5299,6 @@ public:
Column_definition(THD *thd, Field *field, Field *orig_field);
bool set_attributes(THD *thd,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type);
void create_length_to_internal_length_null()
{
@@ -5493,6 +5492,24 @@ public:
{ return compression_method_ptr; }
bool check_vcol_for_key(THD *thd) const;
+
+ void set_lex_charset_collation(const Lex_charset_collation_st &lc)
+ {
+ charset= lc.charset_collation();
+ if (lc.is_contextually_typed_collation())
+ flags|= CONTEXT_COLLATION_FLAG;
+ else
+ flags&= ~CONTEXT_COLLATION_FLAG;
+ }
+ Lex_charset_collation lex_charset_collation() const
+ {
+ return Lex_charset_collation(
+ charset,
+ !charset ? Lex_charset_collation_st::TYPE_EMPTY :
+ flags & CONTEXT_COLLATION_FLAG ?
+ Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED :
+ Lex_charset_collation_st::TYPE_CHARACTER_SET);
+ }
};
diff --git a/sql/item_func.h b/sql/item_func.h
index 99b9a075d2c..23879f0a733 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -3761,26 +3761,41 @@ struct Lex_cast_type_st: public Lex_length_and_dec_st
{
private:
const Type_handler *m_type_handler;
+ CHARSET_INFO *m_charset;
public:
- void set(const Type_handler *handler, Lex_length_and_dec_st length_and_dec)
+ void set(const Type_handler *handler,
+ Lex_length_and_dec_st length_and_dec,
+ CHARSET_INFO *cs= NULL)
{
m_type_handler= handler;
+ m_charset= cs;
Lex_length_and_dec_st::operator=(length_and_dec);
}
+ bool set(const Type_handler *handler,
+ const Lex_length_and_dec_st & length_and_dec,
+ const Lex_charset_collation_st &cscl,
+ CHARSET_INFO *defcs)
+ {
+ CHARSET_INFO *tmp= cscl.resolved_to_character_set(defcs);
+ if (!tmp)
+ return true;
+ set(handler, length_and_dec, tmp);
+ return false;
+ }
void set(const Type_handler *handler)
{
m_type_handler= handler;
+ m_charset= NULL;
Lex_length_and_dec_st::reset();
}
const Type_handler *type_handler() const { return m_type_handler; }
- Item *create_typecast_item(THD *thd, Item *item,
- CHARSET_INFO *cs= NULL) const
+ CHARSET_INFO *charset() const { return m_charset; }
+ Item *create_typecast_item(THD *thd, Item *item) const
{
return m_type_handler->
- create_typecast_item(thd, item, Type_cast_attributes(*this, cs));
+ create_typecast_item(thd, item, Type_cast_attributes(*this, m_charset));
}
- Item *create_typecast_item_or_error(THD *thd, Item *item,
- CHARSET_INFO *cs= NULL) const;
+ Item *create_typecast_item_or_error(THD *thd, Item *item) const;
};
diff --git a/sql/json_table.cc b/sql/json_table.cc
index 4384da18833..cb3787e4f25 100644
--- a/sql/json_table.cc
+++ b/sql/json_table.cc
@@ -716,7 +716,7 @@ bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table,
uint fieldnr= 0;
MEM_ROOT *mem_root_save= thd->mem_root;
List_iterator_fast<Json_table_column> jc_i(jt->m_columns);
- Column_derived_attributes da(NULL);
+ Column_derived_attributes da(&my_charset_utf8mb4_general_ci);
DBUG_ENTER("add_json_table_fields");
thd->mem_root= &table->mem_root;
@@ -733,8 +733,6 @@ bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table,
executing a prepared statement for the second time.
*/
sql_f->length= sql_f->char_length;
- if (!sql_f->charset)
- sql_f->charset= &my_charset_utf8mb4_general_ci;
if (sql_f->prepare_stage1(thd, thd->mem_root, table->file,
table->file->ha_table_flags(), &da))
@@ -873,6 +871,19 @@ int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path,
}
+int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path,
+ const Lex_charset_collation_st &cl)
+{
+ if (cl.is_empty() || cl.is_contextually_typed_collate_default())
+ return set(thd, ctype, path, nullptr);
+
+ CHARSET_INFO *tmp;
+ if (!(tmp= cl.resolved_to_character_set(&my_charset_utf8mb4_general_ci)))
+ return 1;
+ return set(thd, ctype, path, tmp);
+}
+
+
static int print_path(String *str, const json_path_t *p)
{
return str->append('\'') ||
@@ -915,7 +926,10 @@ int Json_table_column::print(THD *thd, Field **f, String *str)
if (str->append(column_type) ||
((*f)->has_charset() && m_explicit_cs &&
(str->append(STRING_WITH_LEN(" CHARSET ")) ||
- str->append(&m_explicit_cs->cs_name))) ||
+ str->append(&m_explicit_cs->cs_name) ||
+ (!(m_explicit_cs->state & MY_CS_PRIMARY) &&
+ (str->append(STRING_WITH_LEN(" COLLATE ")) ||
+ str->append(&m_explicit_cs->coll_name))))) ||
str->append(m_column_type == PATH ? &path : &exists_path) ||
print_path(str, &m_path))
return 1;
diff --git a/sql/json_table.h b/sql/json_table.h
index 52cdae13e9b..2cadb07961e 100644
--- a/sql/json_table.h
+++ b/sql/json_table.h
@@ -160,6 +160,8 @@ public:
m_column_type= ctype;
}
int set(THD *thd, enum_type ctype, const LEX_CSTRING &path, CHARSET_INFO *cs);
+ int set(THD *thd, enum_type ctype, const LEX_CSTRING &path,
+ const Lex_charset_collation_st &cl);
Json_table_column(Create_field *f, Json_table_nested_path *nest) :
m_field(f), m_nest(nest), m_explicit_cs(NULL)
{
diff --git a/sql/lex_charset.cc b/sql/lex_charset.cc
new file mode 100644
index 00000000000..c6c9e402731
--- /dev/null
+++ b/sql/lex_charset.cc
@@ -0,0 +1,339 @@
+/* Copyright (c) 2021, 2022, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+
+#include "my_global.h"
+#include "my_sys.h"
+#include "m_ctype.h"
+#include "lex_charset.h"
+#include "mysqld_error.h"
+
+
+/** find a collation with binary comparison rules
+*/
+CHARSET_INFO *Lex_charset_collation_st::find_bin_collation(CHARSET_INFO *cs)
+{
+ /*
+ We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
+ because "cs" points to a real character set name.
+ It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
+ No thd->get_utf8_flag() flag passed to get_charset_by_csname().
+ */
+ DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
+ /*
+ CREATE TABLE t1 (a CHAR(10) BINARY)
+ CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+ Nothing to do, we have the binary collation already.
+ */
+ if (cs->state & MY_CS_BINSORT)
+ return cs;
+
+ // CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;
+ if (!(cs= get_charset_by_csname(cs->cs_name.str, MY_CS_BINSORT, MYF(0))))
+ {
+ char tmp[65];
+ strxnmov(tmp, sizeof(tmp)-1, cs->cs_name.str, "_bin", NULL);
+ my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
+ }
+ return cs;
+}
+
+
+CHARSET_INFO *Lex_charset_collation_st::find_default_collation(CHARSET_INFO *cs)
+{
+ // See comments in find_bin_collation()
+ DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
+ /*
+ CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
+ Nothing to do, we have the default collation already.
+ */
+ if (cs->state & MY_CS_PRIMARY)
+ return cs;
+ /*
+ CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
+ CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+
+ Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
+ See comments in find_bin_collation.
+ */
+ cs= get_charset_by_csname(cs->cs_name.str, MY_CS_PRIMARY, MYF(MY_WME));
+ /*
+ The above should never fail, as we have default collations for
+ all character sets.
+ */
+ DBUG_ASSERT(cs);
+ return cs;
+}
+
+
+bool Lex_charset_collation_st::set_charset_collate_exact(CHARSET_INFO *cs,
+ CHARSET_INFO *cl)
+{
+ DBUG_ASSERT(cs != nullptr && cl != nullptr);
+ if (!my_charset_same(cl, cs))
+ {
+ my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
+ cl->coll_name.str, cs->cs_name.str);
+ return true;
+ }
+ set_collate_exact(cl);
+ return false;
+}
+
+
+/*
+ Resolve an empty or a contextually typed collation according to the
+ upper level default character set (and optionally a collation), e.g.:
+ CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1;
+ CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
+ CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
+ CHARACTER SET latin1 COLLATE latin1_bin;
+
+ "this" is the COLLATE clause (e.g. of a column)
+ "def" is the upper level CHARACTER SET clause (e.g. of a table)
+*/
+CHARSET_INFO *
+Lex_charset_collation_st::resolved_to_character_set(CHARSET_INFO *def) const
+{
+ DBUG_ASSERT(def);
+
+ switch (m_type) {
+ case TYPE_EMPTY:
+ return def;
+ case TYPE_CHARACTER_SET:
+ DBUG_ASSERT(m_ci);
+ return m_ci;
+ case TYPE_COLLATE_EXACT:
+ DBUG_ASSERT(m_ci);
+ return m_ci;
+ case TYPE_COLLATE_CONTEXTUALLY_TYPED:
+ break;
+ }
+
+ // Contextually typed
+ DBUG_ASSERT(m_ci);
+
+ if (is_contextually_typed_binary_style()) // CHAR(10) BINARY
+ return find_bin_collation(def);
+
+ if (is_contextually_typed_collate_default()) // CHAR(10) COLLATE DEFAULT
+ return find_default_collation(def);
+
+ /*
+ Non-binary and non-default contextually typed collation.
+ We don't have such yet - the parser cannot produce this.
+ But will have soon, e.g. "uca1400_as_ci".
+ */
+ DBUG_ASSERT(0);
+ return NULL;
+}
+
+
+/*
+ Merge the CHARACTER SET clause to:
+ - an empty COLLATE clause
+ - an explicitly typed collation name
+ - a contextually typed collation
+
+ "this" corresponds to `CHARACTER SET xxx [BINARY]`
+ "cl" corresponds to the COLLATE clause
+*/
+bool
+Lex_charset_collation_st::
+ merge_charset_clause_and_collate_clause(const Lex_charset_collation_st &cl)
+{
+ if (cl.is_empty()) // No COLLATE clause
+ return false;
+
+ switch (m_type) {
+ case TYPE_EMPTY:
+ /*
+ No CHARACTER SET clause
+ CHAR(10) NOT NULL COLLATE latin1_bin
+ CHAR(10) NOT NULL COLLATE DEFAULT
+ */
+ *this= cl;
+ return false;
+ case TYPE_CHARACTER_SET:
+ case TYPE_COLLATE_EXACT:
+ {
+ Lex_explicit_charset_opt_collate ecs(m_ci, m_type == TYPE_COLLATE_EXACT);
+ if (ecs.merge_collate_or_error(cl))
+ return true;
+ set_collate_exact(ecs.charset_and_collation());
+ return false;
+ }
+ case TYPE_COLLATE_CONTEXTUALLY_TYPED:
+ break;
+ }
+
+ if (is_contextually_typed_collation())
+ {
+ if (cl.is_contextually_typed_collation())
+ {
+ /*
+ CONTEXT + CONTEXT:
+ CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
+ CHAR(10) BINARY .. COLLATE uca1400_as_ci - not supported yet
+ */
+ DBUG_ASSERT(0); // Not possible yet
+ return false;
+ }
+
+ /*
+ CONTEXT + EXPLICIT
+ CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
+ CHAR(10) BINARY .. COLLATE latin1_bin
+ CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
+ */
+ if (is_contextually_typed_collate_default() &&
+ !(cl.charset_collation()->state & MY_CS_PRIMARY))
+ {
+ my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
+ "COLLATE ", "DEFAULT", "COLLATE ",
+ cl.charset_collation()->coll_name.str);
+ return true;
+ }
+
+ if (is_contextually_typed_binary_style() &&
+ !(cl.charset_collation()->state & MY_CS_BINSORT))
+ {
+ my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
+ "", "BINARY", "COLLATE ", cl.charset_collation()->coll_name.str);
+ return true;
+ }
+ *this= cl;
+ return false;
+ }
+
+ DBUG_ASSERT(0);
+ return false;
+}
+
+
+bool
+Lex_explicit_charset_opt_collate::
+ merge_collate_or_error(const Lex_charset_collation_st &cl)
+{
+ DBUG_ASSERT(cl.type() != Lex_charset_collation_st::TYPE_CHARACTER_SET);
+
+ switch (cl.type()) {
+ case Lex_charset_collation_st::TYPE_EMPTY:
+ return false;
+ case Lex_charset_collation_st::TYPE_CHARACTER_SET:
+ DBUG_ASSERT(0);
+ return false;
+ case Lex_charset_collation_st::TYPE_COLLATE_EXACT:
+ /*
+ EXPLICIT + EXPLICIT
+ CHAR(10) CHARACTER SET latin1 .. COLLATE latin1_bin
+ CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin .. COLLATE latin1_bin
+ CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin
+ CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin
+ CHAR(10) CHARACTER SET latin1 BINARY .. COLLATE latin1_bin
+ */
+ if (m_with_collate && m_ci != cl.charset_collation())
+ {
+ my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
+ "COLLATE ", m_ci->coll_name.str,
+ "COLLATE ", cl.charset_collation()->coll_name.str);
+ return true;
+ }
+ if (!my_charset_same(m_ci, cl.charset_collation()))
+ {
+ my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
+ cl.charset_collation()->coll_name.str, m_ci->cs_name.str);
+ return true;
+ }
+ m_ci= cl.charset_collation();
+ m_with_collate= true;
+ return false;
+
+ case Lex_charset_collation_st::TYPE_COLLATE_CONTEXTUALLY_TYPED:
+ if (cl.is_contextually_typed_collate_default())
+ {
+ /*
+ SET NAMES latin1 COLLATE DEFAULT;
+ ALTER TABLE t1 CONVERT TO CHARACTER SET latin1 COLLATE DEFAULT;
+ */
+ CHARSET_INFO *tmp= Lex_charset_collation_st::find_default_collation(m_ci);
+ if (!tmp)
+ return true;
+ m_ci= tmp;
+ m_with_collate= true;
+ return false;
+ }
+ else
+ {
+ /*
+ EXPLICIT + CONTEXT
+ CHAR(10) COLLATE latin1_bin .. COLLATE DEFAULT not possible yet
+ CHAR(10) COLLATE latin1_bin .. COLLATE uca1400_as_ci
+ */
+
+ DBUG_ASSERT(0); // Not possible yet
+ return false;
+ }
+ }
+ DBUG_ASSERT(0);
+ return false;
+}
+
+
+/*
+ This method is used in the "attribute_list" rule to merge two independent
+ COLLATE clauses (not belonging to a CHARACTER SET clause).
+*/
+bool
+Lex_charset_collation_st::
+ merge_collate_clause_and_collate_clause(const Lex_charset_collation_st &cl)
+{
+ /*
+ "BINARY" and "COLLATE DEFAULT" are not possible
+ in an independent COLLATE clause in a column attribute.
+ */
+ DBUG_ASSERT(!is_contextually_typed_collation());
+ DBUG_ASSERT(!cl.is_contextually_typed_collation());
+
+ if (cl.is_empty())
+ return false;
+
+ switch (m_type) {
+ case TYPE_EMPTY:
+ *this= cl;
+ return false;
+ case TYPE_CHARACTER_SET:
+ DBUG_ASSERT(0);
+ return false;
+ case TYPE_COLLATE_EXACT:
+ case TYPE_COLLATE_CONTEXTUALLY_TYPED:
+ break;
+ }
+
+ /*
+ Two independent explicit collations:
+ CHAR(10) NOT NULL COLLATE latin1_bin DEFAULT 'a' COLLATE latin1_bin
+ Note, we should perhaps eventually disallow double COLLATE clauses.
+ But for now let's just disallow only conflicting ones.
+ */
+ if (charset_collation() != cl.charset_collation())
+ {
+ my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
+ "COLLATE ", charset_collation()->coll_name.str,
+ "COLLATE ", cl.charset_collation()->coll_name.str);
+ return true;
+ }
+ return false;
+}
diff --git a/sql/lex_charset.h b/sql/lex_charset.h
new file mode 100644
index 00000000000..abbe761df36
--- /dev/null
+++ b/sql/lex_charset.h
@@ -0,0 +1,199 @@
+/* Copyright (c) 2021, 2022, MariaDB Corporation.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#ifndef LEX_CHARSET_INCLUDED
+#define LEX_CHARSET_INCLUDED
+
+/*
+ Parse time character set and collation.
+
+ Can be:
+
+ 1. Empty (not specified on the column level):
+ CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin2; -- (1a)
+ CREATE TABLE t1 (a CHAR(10)); -- (1b)
+
+ 2. Precisely typed:
+ CREATE TABLE t1 (a CHAR(10) COLLATE latin1_bin); -- (2a)
+ CREATE TABLE t1 (
+ a CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin); -- (2b)
+
+ 3. Contextually typed:
+ CREATE TABLE t2 (a CHAR(10) BINARY) CHARACTER SET latin2; -- (3a)
+ CREATE TABLE t2 (a CHAR(10) BINARY); -- (3b)
+ CREATE TABLE t2 (a CHAR(10) COLLATE DEFAULT)
+ CHARACER SET latin2 COLLATE latin2_bin; -- (3c)
+
+ In case of an empty or a contextually typed collation,
+ it is a subject to later resolution, when the context
+ character set becomes known in the end of the CREATE statement:
+ - either after the explicit table level CHARACTER SET, like in (1a,3a,3c)
+ - or by the inhereted database level CHARACTER SET, like in (1b,3b)
+
+ Resolution happens in Type_handler::Column_definition_prepare_stage1().
+*/
+struct Lex_charset_collation_st
+{
+public:
+ enum Type
+ {
+ TYPE_EMPTY= 0,
+ TYPE_CHARACTER_SET= 1,
+ TYPE_COLLATE_EXACT= 2,
+ TYPE_COLLATE_CONTEXTUALLY_TYPED= 3
+ };
+
+// Number of bits required to store enum Type values
+
+#define LEX_CHARSET_COLLATION_TYPE_BITS 2
+ static_assert(((1<<LEX_CHARSET_COLLATION_TYPE_BITS)-1) >=
+ TYPE_COLLATE_CONTEXTUALLY_TYPED,
+ "Lex_charset_collation_st::Type bits check");
+
+protected:
+ CHARSET_INFO *m_ci;
+ Type m_type;
+public:
+ static CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs);
+ static CHARSET_INFO *find_default_collation(CHARSET_INFO *cs);
+public:
+ void init()
+ {
+ m_ci= NULL;
+ m_type= TYPE_EMPTY;
+ }
+ bool is_empty() const
+ {
+ return m_type == TYPE_EMPTY;
+ }
+ void set_charset(CHARSET_INFO *cs)
+ {
+ DBUG_ASSERT(cs);
+ m_ci= cs;
+ m_type= TYPE_CHARACTER_SET;
+ }
+ void set_charset_collate_default(CHARSET_INFO *cs)
+ {
+ DBUG_ASSERT(cs);
+ m_ci= cs;
+ m_type= TYPE_COLLATE_EXACT;
+ }
+ bool set_charset_collate_binary(CHARSET_INFO *cs)
+ {
+ DBUG_ASSERT(cs);
+ if (!(cs= find_bin_collation(cs)))
+ return true;
+ m_ci= cs;
+ m_type= TYPE_COLLATE_EXACT;
+ return false;
+ }
+ bool set_charset_collate_exact(CHARSET_INFO *cs,
+ CHARSET_INFO *cl);
+ void set_collate_default()
+ {
+ m_ci= &my_collation_contextually_typed_default;
+ m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED;
+ }
+ void set_contextually_typed_binary_style()
+ {
+ m_ci= &my_collation_contextually_typed_binary;
+ m_type= TYPE_COLLATE_CONTEXTUALLY_TYPED;
+ }
+ bool is_contextually_typed_collate_default() const
+ {
+ return m_ci == &my_collation_contextually_typed_default;
+ }
+ bool is_contextually_typed_binary_style() const
+ {
+ return m_ci == &my_collation_contextually_typed_binary;
+ }
+ void set_collate_exact(CHARSET_INFO *cl)
+ {
+ DBUG_ASSERT(cl);
+ m_ci= cl;
+ m_type= TYPE_COLLATE_EXACT;
+ }
+ CHARSET_INFO *charset_collation() const
+ {
+ return m_ci;
+ }
+ Type type() const
+ {
+ return m_type;
+ }
+ bool is_contextually_typed_collation() const
+ {
+ return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED;
+ }
+ CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const;
+ bool merge_charset_clause_and_collate_clause(const Lex_charset_collation_st &cl);
+ bool merge_collate_clause_and_collate_clause(const Lex_charset_collation_st &cl);
+};
+
+
+/*
+ CHARACTER SET cs [COLLATE cl]
+*/
+class Lex_explicit_charset_opt_collate
+{
+ CHARSET_INFO *m_ci;
+ bool m_with_collate;
+public:
+ Lex_explicit_charset_opt_collate(CHARSET_INFO *ci, bool with_collate)
+ :m_ci(ci), m_with_collate(with_collate)
+ {
+ DBUG_ASSERT(m_ci);
+ // Item_func_set_collation uses non-default collations in "ci"
+ //DBUG_ASSERT(m_ci->default_flag() || m_with_collate);
+ }
+ /*
+ Merge to another COLLATE clause. So the full syntax looks like:
+ CHARACTER SET cs [COLLATE cl] ... COLLATE cl2
+ */
+ bool merge_collate_or_error(const Lex_charset_collation_st &cl);
+ bool merge_opt_collate_or_error(const Lex_charset_collation_st &cl)
+ {
+ if (cl.is_empty())
+ return false;
+ return merge_collate_or_error(cl);
+ }
+ CHARSET_INFO *charset_and_collation() const { return m_ci; }
+ bool with_collate() const { return m_with_collate; }
+};
+
+
+class Lex_charset_collation: public Lex_charset_collation_st
+{
+public:
+ Lex_charset_collation()
+ {
+ init();
+ }
+ Lex_charset_collation(CHARSET_INFO *collation, Type type)
+ {
+ DBUG_ASSERT(collation || type == TYPE_EMPTY);
+ m_ci= collation;
+ m_type= type;
+ }
+ static Lex_charset_collation national(bool bin_mod)
+ {
+ return bin_mod ?
+ Lex_charset_collation(&my_charset_utf8mb3_bin, TYPE_COLLATE_EXACT) :
+ Lex_charset_collation(&my_charset_utf8mb3_general_ci, TYPE_CHARACTER_SET);
+ }
+};
+
+
+#endif // LEX_CHARSET_INCLUDED
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index 8398b727841..86d4040a9be 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -543,36 +543,10 @@ bool LEX::add_alter_list(LEX_CSTRING name, LEX_CSTRING new_name, bool exists)
void LEX::init_last_field(Column_definition *field,
- const LEX_CSTRING *field_name,
- const CHARSET_INFO *cs)
+ const LEX_CSTRING *field_name)
{
last_field= field;
-
field->field_name= *field_name;
-
- /* reset LEX fields that are used in Create_field::set_and_check() */
- charset= cs;
-}
-
-
-bool LEX::set_bincmp(CHARSET_INFO *cs, bool bin)
-{
- /*
- if charset is NULL - we're parsing a field declaration.
- we cannot call find_bin_collation for a field here, because actual
- field charset is determined in get_sql_field_charset() much later.
- so we only set a flag.
- */
- if (!charset)
- {
- charset= cs;
- last_field->flags|= bin ? BINCMP_FLAG : 0;
- return false;
- }
-
- charset= bin ? find_bin_collation(cs ? cs : charset)
- : cs ? cs : charset;
- return charset == NULL;
}
@@ -6392,8 +6366,7 @@ sp_variable *LEX::sp_param_init(LEX_CSTRING *name)
return NULL;
}
sp_variable *spvar= spcont->add_variable(thd, name);
- init_last_field(&spvar->field_def, name,
- thd->variables.collation_database);
+ init_last_field(&spvar->field_def, name);
return spvar;
}
@@ -6402,8 +6375,7 @@ bool LEX::sp_param_fill_definition(sp_variable *spvar,
const Lex_field_type_st &def)
{
return
- last_field->set_attributes(thd, def, charset,
- COLUMN_DEFINITION_ROUTINE_PARAM) ||
+ last_field->set_attributes(thd, def, COLUMN_DEFINITION_ROUTINE_PARAM) ||
sphead->fill_spvar_definition(thd, last_field, &spvar->name);
}
@@ -6411,8 +6383,7 @@ bool LEX::sp_param_fill_definition(sp_variable *spvar,
bool LEX::sf_return_fill_definition(const Lex_field_type_st &def)
{
return
- last_field->set_attributes(thd, def, charset,
- COLUMN_DEFINITION_FUNCTION_RETURN) ||
+ last_field->set_attributes(thd, def, COLUMN_DEFINITION_FUNCTION_RETURN) ||
sphead->fill_field_definition(thd, last_field);
}
@@ -6492,8 +6463,7 @@ void LEX::sp_variable_declarations_init(THD *thd, int nvars)
sphead->reset_lex(thd);
spcont->declare_var_boundary(nvars);
- thd->lex->init_last_field(&spvar->field_def, &spvar->name,
- thd->variables.collation_database);
+ thd->lex->init_last_field(&spvar->field_def, &spvar->name);
}
@@ -11465,16 +11435,15 @@ Spvar_definition *LEX::row_field_name(THD *thd, const Lex_ident_sys_st &name)
}
if (unlikely(!(res= new (thd->mem_root) Spvar_definition())))
return NULL;
- init_last_field(res, &name, thd->variables.collation_database);
+ init_last_field(res, &name);
return res;
}
Item *
-Lex_cast_type_st::create_typecast_item_or_error(THD *thd, Item *item,
- CHARSET_INFO *cs) const
+Lex_cast_type_st::create_typecast_item_or_error(THD *thd, Item *item) const
{
- Item *tmp= create_typecast_item(thd, item, cs);
+ Item *tmp= create_typecast_item(thd, item);
if (!tmp)
{
Name name= m_type_handler->name();
@@ -11534,8 +11503,7 @@ bool LEX::set_field_type_udt(Lex_field_type_st *type,
const Type_handler *h;
if (!(h= Type_handler::handler_by_name_or_error(thd, name)))
return true;
- type->set(h, attr);
- charset= &my_charset_bin;
+ type->set(h, attr, &my_charset_bin);
return false;
}
@@ -11547,7 +11515,6 @@ bool LEX::set_cast_type_udt(Lex_cast_type_st *type,
if (!(h= Type_handler::handler_by_name_or_error(thd, name)))
return true;
type->set(h);
- charset= NULL;
return false;
}
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index e8bac90fe5a..3dfc7845a28 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -3192,8 +3192,6 @@ public:
/* Query Plan Footprint of a currently running select */
Explain_query *explain;
- // type information
- CHARSET_INFO *charset;
/*
LEX which represents current statement (conventional, SP or PS)
@@ -3800,14 +3798,12 @@ public:
bool is_analyze, bool *printed_anything);
bool restore_set_statement_var();
- void init_last_field(Column_definition *field, const LEX_CSTRING *name,
- const CHARSET_INFO *cs);
+ void init_last_field(Column_definition *field, const LEX_CSTRING *name);
bool last_field_generated_always_as_row_start_or_end(Lex_ident *p,
const char *type,
uint flags);
bool last_field_generated_always_as_row_start();
bool last_field_generated_always_as_row_end();
- bool set_bincmp(CHARSET_INFO *cs, bool bin);
bool new_sp_instr_stmt(THD *, const LEX_CSTRING &prefix,
const LEX_CSTRING &suffix);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index fe2307a2e91..44902f0f162 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -10463,24 +10463,6 @@ merge_charset_and_collation(CHARSET_INFO *cs, CHARSET_INFO *cl)
return cs;
}
-/** find a collation with binary comparison rules
-*/
-CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs)
-{
- const char *csname= cs->cs_name.str;
- THD *thd= current_thd;
- myf utf8_flag= thd->get_utf8_flag();
-
- cs= get_charset_by_csname(csname, MY_CS_BINSORT, MYF(utf8_flag));
- if (!cs)
- {
- char tmp[65];
- strxnmov(tmp, sizeof(tmp)-1, csname, "_bin", NULL);
- my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
- }
- return cs;
-}
-
void LEX::mark_first_table_as_inserting()
{
TABLE_LIST *t= first_select_lex()->table_list.first;
diff --git a/sql/sql_parse.h b/sql/sql_parse.h
index ebe3fe97114..9e1ec6fabbc 100644
--- a/sql/sql_parse.h
+++ b/sql/sql_parse.h
@@ -79,7 +79,6 @@ bool check_string_char_length(const LEX_CSTRING *str, uint err_msg,
bool no_error);
bool check_ident_length(const LEX_CSTRING *ident);
CHARSET_INFO* merge_charset_and_collation(CHARSET_INFO *cs, CHARSET_INFO *cl);
-CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs);
bool check_host_name(LEX_CSTRING *str);
bool check_identifier_name(LEX_CSTRING *str, uint max_char_length,
uint err_code, const char *param_for_err_msg);
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 8b3d1a6aefc..3186cd0b0db 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -2190,12 +2190,27 @@ bool check_duplicates_in_interval(const char *set_or_name,
}
+/*
+ Resolves the column collation if:
+ - it was not typed at all, or
+ - it was contextually typed
+ according to the table level character set.
+ Generates an error to the diagnostics area in case of a failure.
+*/
bool Column_definition::
prepare_charset_for_string(const Column_derived_attributes *dattr)
{
- if (!charset)
- charset= dattr->charset();
- return (flags & BINCMP_FLAG) && !(charset= find_bin_collation(charset));
+ CHARSET_INFO *tmp= lex_charset_collation().
+ resolved_to_character_set(dattr->charset());
+ if (!tmp)
+ return true;
+ charset= tmp;
+ /*
+ Remove the "is contextually typed collation" indicator on success,
+ for safety.
+ */
+ flags&= ~CONTEXT_COLLATION_FLAG;
+ return false;
}
@@ -3959,8 +3974,7 @@ bool Column_definition::prepare_blob_field(THD *thd)
bool Column_definition::sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root)
{
- DBUG_ASSERT(charset);
- const Column_derived_attributes dattr(&my_charset_bin);
+ const Column_derived_attributes dattr(thd->variables.collation_database);
return prepare_stage1(thd, mem_root, NULL, HA_CAN_GEOMETRY, &dattr) ||
prepare_stage2(NULL, HA_CAN_GEOMETRY);
}
diff --git a/sql/sql_type.cc b/sql/sql_type.cc
index 269a4b072d4..69ca474eee1 100644
--- a/sql/sql_type.cc
+++ b/sql/sql_type.cc
@@ -2713,11 +2713,10 @@ bool
Type_handler::Column_definition_set_attributes(THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const
{
- def->charset= cs;
+ def->set_lex_charset_collation(attr.lex_charset_collation());
def->set_length_and_dec(attr);
return false;
}
@@ -2746,11 +2745,10 @@ Type_handler_string::Column_definition_set_attributes(
THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const
{
- Type_handler::Column_definition_set_attributes(thd, def, attr, cs, type);
+ Type_handler::Column_definition_set_attributes(thd, def, attr, type);
if (attr.has_explicit_length())
return false;
switch (type) {
@@ -2778,11 +2776,10 @@ Type_handler_varchar::Column_definition_set_attributes(
THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const
{
- Type_handler::Column_definition_set_attributes(thd, def, attr, cs, type);
+ Type_handler::Column_definition_set_attributes(thd, def, attr, type);
if (attr.has_explicit_length())
return false;
switch (type) {
@@ -3156,7 +3153,7 @@ bool Type_handler_general_purpose_string::
Change character sets for all varchar/char/text columns,
but do not touch varbinary/binary/blob columns.
*/
- if (defcs != &my_charset_bin)
+ if (!(def->flags & CONTEXT_COLLATION_FLAG) && defcs != &my_charset_bin)
def->charset= bulk_alter_attr->alter_table_convert_to_charset();
return false;
};
@@ -4267,10 +4264,9 @@ Type_handler_timestamp_common::
Column_definition_set_attributes(THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type) const
{
- Type_handler::Column_definition_set_attributes(thd, def, attr, cs, type);
+ Type_handler::Column_definition_set_attributes(thd, def, attr, type);
if (!opt_explicit_defaults_for_timestamp)
def->flags|= NOT_NULL_FLAG;
return false;
diff --git a/sql/sql_type.h b/sql/sql_type.h
index 94ba8f5ffbc..7ff4bc64679 100644
--- a/sql/sql_type.h
+++ b/sql/sql_type.h
@@ -3924,7 +3924,6 @@ public:
virtual bool Column_definition_set_attributes(THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const;
// Fix attributes after the parser
@@ -6659,7 +6658,6 @@ public:
bool Column_definition_set_attributes(THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const override;
};
@@ -6912,7 +6910,6 @@ public:
bool Column_definition_set_attributes(THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const override;
bool Column_definition_fix_attributes(Column_definition *c) const override;
@@ -7009,7 +7006,6 @@ public:
bool Column_definition_set_attributes(THD *thd,
Column_definition *def,
const Lex_field_type_st &attr,
- CHARSET_INFO *cs,
column_definition_type_t type)
const override;
bool Column_definition_fix_attributes(Column_definition *c) const override;
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 3abad020007..be51b4120f5 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -193,14 +193,6 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)()
#endif
-#define bincmp_collation(X,Y) \
- do \
- { \
- if (unlikely(Lex->set_bincmp(X,Y))) \
- MYSQL_YYABORT; \
- } while(0)
-
-
%}
%union {
int num;
@@ -221,6 +213,7 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)()
Lex_length_and_dec_st Lex_length_and_dec;
Lex_cast_type_st Lex_cast_type;
Lex_field_type_st Lex_field_type;
+ Lex_charset_collation_st Lex_charset_collation;
Lex_dyncol_type_st Lex_dyncol_type;
Lex_for_loop_st for_loop;
Lex_for_loop_bounds_st for_loop_bounds;
@@ -1386,6 +1379,15 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
field_type_misc
json_table_field_type
+%type <Lex_charset_collation>
+ binary
+ opt_binary
+ opt_binary_and_compression
+ attribute
+ attribute_list
+ field_def
+
+
%type <Lex_dyncol_type> opt_dyncol_type dyncol_type
numeric_dyncol_type temporal_dyncol_type string_dyncol_type
@@ -1575,8 +1577,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
text_or_password
%type <charset>
- opt_collate
- collate
+ opt_collate_or_default
charset_name
charset_or_alias
charset_name_or_default
@@ -1658,14 +1659,13 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
delete_limit_clause fields opt_values values
no_braces_with_names opt_values_with_names values_with_names
procedure_list procedure_list2 procedure_item
- field_def handler opt_generated_always
+ handler opt_generated_always
opt_ignore opt_column opt_restrict
grant revoke set lock unlock string_list
- opt_binary table_lock_list table_lock
+ table_lock_list table_lock
ref_list opt_match_clause opt_on_update_delete use
opt_delete_options opt_delete_option varchar nchar nvarchar
opt_outer table_list table_name table_alias_ref_list table_alias_ref
- attribute attribute_list
compressed_deprecated_data_type_attribute
compressed_deprecated_column_attribute
grant_list
@@ -3124,7 +3124,7 @@ optionally_qualified_column_ident:
row_field_definition:
row_field_name field_type
{
- Lex->last_field->set_attributes(thd, $2, Lex->charset,
+ Lex->last_field->set_attributes(thd, $2,
COLUMN_DEFINITION_ROUTINE_LOCAL);
}
;
@@ -3157,7 +3157,7 @@ sp_decl_variable_list:
sp_decl_idents_init_vars
field_type
{
- Lex->last_field->set_attributes(thd, $2, Lex->charset,
+ Lex->last_field->set_attributes(thd, $2,
COLUMN_DEFINITION_ROUTINE_LOCAL);
}
sp_opt_default
@@ -5723,7 +5723,7 @@ field_spec:
if (unlikely(!f))
MYSQL_YYABORT;
- lex->init_last_field(f, &$1, NULL);
+ lex->init_last_field(f, &$1);
$<create_field>$= f;
lex->parsing_options.lookup_keywords_after_qualifier= true;
}
@@ -5751,10 +5751,16 @@ field_spec:
field_type_or_serial:
qualified_field_type
{
- Lex->last_field->set_attributes(thd, $1, Lex->charset,
+ Lex->last_field->set_attributes(thd, $1,
COLUMN_DEFINITION_TABLE_FIELD);
}
field_def
+ {
+ Lex_charset_collation tmp= $1.lex_charset_collation();
+ if (tmp.merge_charset_clause_and_collate_clause($3))
+ MYSQL_YYABORT;
+ Lex->last_field->set_lex_charset_collation(tmp);
+ }
| SERIAL_SYM
{
Lex->last_field->set_handler(&type_handler_ulonglong);
@@ -5786,25 +5792,34 @@ opt_asrow_attribute_list:
;
field_def:
- /* empty */ { }
+ /* empty */ { $$.init(); }
| attribute_list
- | attribute_list compressed_deprecated_column_attribute
+ | attribute_list compressed_deprecated_column_attribute { $$= $1; }
| attribute_list compressed_deprecated_column_attribute attribute_list
+ {
+ if (($$= $1).merge_collate_clause_and_collate_clause($3))
+ MYSQL_YYABORT;
+ }
| opt_generated_always AS virtual_column_func
{
Lex->last_field->vcol_info= $3;
Lex->last_field->flags&= ~NOT_NULL_FLAG; // undo automatic NOT NULL for timestamps
}
vcol_opt_specifier vcol_opt_attribute
+ {
+ $$.init();
+ }
| opt_generated_always AS ROW_SYM START_SYM opt_asrow_attribute
{
if (Lex->last_field_generated_always_as_row_start())
MYSQL_YYABORT;
+ $$.init();
}
| opt_generated_always AS ROW_SYM END opt_asrow_attribute
{
if (Lex->last_field_generated_always_as_row_end())
MYSQL_YYABORT;
+ $$.init();
}
;
@@ -6017,49 +6032,46 @@ field_type_numeric:
opt_binary_and_compression:
- /* empty */
- | binary
- | binary compressed_deprecated_data_type_attribute
- | compressed opt_binary
+ /* empty */ { $$.init(); }
+ | binary { $$= $1; }
+ | binary compressed_deprecated_data_type_attribute { $$= $1; }
+ | compressed opt_binary { $$= $2; }
;
field_type_string:
char opt_field_length opt_binary
{
- $$.set(&type_handler_string, $2);
+ $$.set(&type_handler_string, $2, $3);
}
| nchar opt_field_length opt_bin_mod
{
- $$.set(&type_handler_string, $2);
- bincmp_collation(national_charset_info, $3);
+ $$.set(&type_handler_string, $2,
+ Lex_charset_collation::national($3));
}
| BINARY opt_field_length
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_string, $2);
+ $$.set(&type_handler_string, $2, &my_charset_bin);
}
| varchar opt_field_length opt_binary_and_compression
{
- $$.set(&type_handler_varchar, $2);
+ $$.set(&type_handler_varchar, $2, $3);
}
| VARCHAR2_ORACLE_SYM opt_field_length opt_binary_and_compression
{
- $$.set(&type_handler_varchar, $2);
+ $$.set(&type_handler_varchar, $2, $3);
}
| nvarchar opt_field_length opt_compressed opt_bin_mod
{
- $$.set(&type_handler_varchar, $2);
- bincmp_collation(national_charset_info, $4);
+ $$.set(&type_handler_varchar, $2,
+ Lex_charset_collation::national($4));
}
| VARBINARY opt_field_length opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_varchar, $2);
+ $$.set(&type_handler_varchar, $2, &my_charset_bin);
}
| RAW_ORACLE_SYM opt_field_length opt_compressed
{
- Lex->charset= &my_charset_bin;
- $$.set(&type_handler_varchar, $2);
+ $$.set(&type_handler_varchar, $2, &my_charset_bin);
}
;
@@ -6105,65 +6117,57 @@ field_type_temporal:
field_type_lob:
TINYBLOB opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_tiny_blob);
+ $$.set(&type_handler_tiny_blob, &my_charset_bin);
}
| BLOB_MARIADB_SYM opt_field_length opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_blob, $2);
+ $$.set(&type_handler_blob, $2, &my_charset_bin);
}
| BLOB_ORACLE_SYM field_length opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_blob, $2);
+ $$.set(&type_handler_blob, $2, &my_charset_bin);
}
| BLOB_ORACLE_SYM opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_long_blob);
+ $$.set(&type_handler_long_blob, &my_charset_bin);
}
| MEDIUMBLOB opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_medium_blob);
+ $$.set(&type_handler_medium_blob, &my_charset_bin);
}
| LONGBLOB opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_long_blob);
+ $$.set(&type_handler_long_blob, &my_charset_bin);
}
| LONG_SYM VARBINARY opt_compressed
{
- Lex->charset=&my_charset_bin;
- $$.set(&type_handler_medium_blob);
+ $$.set(&type_handler_medium_blob, &my_charset_bin);
}
| LONG_SYM varchar opt_binary_and_compression
- { $$.set(&type_handler_medium_blob); }
+ { $$.set(&type_handler_medium_blob, $3); }
| TINYTEXT opt_binary_and_compression
- { $$.set(&type_handler_tiny_blob); }
+ { $$.set(&type_handler_tiny_blob, $2); }
| TEXT_SYM opt_field_length opt_binary_and_compression
- { $$.set(&type_handler_blob, $2); }
+ { $$.set(&type_handler_blob, $2, $3); }
| MEDIUMTEXT opt_binary_and_compression
- { $$.set(&type_handler_medium_blob); }
+ { $$.set(&type_handler_medium_blob, $2); }
| LONGTEXT opt_binary_and_compression
- { $$.set(&type_handler_long_blob); }
+ { $$.set(&type_handler_long_blob, $2); }
| CLOB_ORACLE_SYM opt_binary_and_compression
- { $$.set(&type_handler_long_blob); }
+ { $$.set(&type_handler_long_blob, $2); }
| LONG_SYM opt_binary_and_compression
- { $$.set(&type_handler_medium_blob); }
+ { $$.set(&type_handler_medium_blob, $2); }
| JSON_SYM opt_compressed
{
- Lex->charset= &my_charset_utf8mb4_bin;
- $$.set(&type_handler_long_blob_json);
+ $$.set(&type_handler_long_blob_json, &my_charset_utf8mb4_bin);
}
;
field_type_misc:
ENUM '(' string_list ')' opt_binary
- { $$.set(&type_handler_enum); }
+ { $$.set(&type_handler_enum, $5); }
| SET '(' string_list ')' opt_binary
- { $$.set(&type_handler_set); }
+ { $$.set(&type_handler_set, $5); }
;
char:
@@ -6272,35 +6276,38 @@ opt_precision:
attribute_list:
- attribute_list attribute {}
+ attribute_list attribute
+ {
+ if (($$= $1).merge_collate_clause_and_collate_clause($2))
+ MYSQL_YYABORT;
+ }
| attribute
;
attribute:
- NULL_SYM { Lex->last_field->flags&= ~ NOT_NULL_FLAG; }
- | DEFAULT column_default_expr { Lex->last_field->default_value= $2; }
+ NULL_SYM { Lex->last_field->flags&= ~ NOT_NULL_FLAG; $$.init(); }
+ | DEFAULT column_default_expr { Lex->last_field->default_value= $2; $$.init(); }
| ON UPDATE_SYM NOW_SYM opt_default_time_precision
{
Item *item= new (thd->mem_root) Item_func_now_local(thd, $4);
if (unlikely(item == NULL))
MYSQL_YYABORT;
Lex->last_field->on_update= item;
+ $$.init();
}
- | AUTO_INC { Lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; }
+ | AUTO_INC { Lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; $$.init(); }
| SERIAL_SYM DEFAULT VALUE_SYM
{
LEX *lex=Lex;
lex->last_field->flags|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG | UNIQUE_KEY_FLAG;
lex->alter_info.flags|= ALTER_ADD_INDEX;
+ $$.init();
}
| COLLATE_SYM collation_name
{
- if (unlikely(Lex->charset && !my_charset_same(Lex->charset,$2)))
- my_yyabort_error((ER_COLLATION_CHARSET_MISMATCH, MYF(0),
- $2->coll_name.str, Lex->charset->cs_name.str));
- Lex->last_field->charset= $2;
+ $$.set_collate_exact($2);
}
- | serial_attribute
+ | serial_attribute { $$.init(); }
;
opt_compression_method:
@@ -6444,7 +6451,7 @@ collation_name:
}
;
-opt_collate:
+opt_collate_or_default:
/* empty */ { $$=NULL; }
| COLLATE_SYM collation_name_or_default { $$=$2; }
;
@@ -6469,27 +6476,36 @@ charset_or_alias:
}
;
-collate: COLLATE_SYM collation_name_or_default { $$= $2; }
- ;
-
opt_binary:
- /* empty */ { bincmp_collation(NULL, false); }
- | binary {}
+ /* empty */ { $$.init(); }
+ | binary
;
binary:
- BYTE_SYM { bincmp_collation(&my_charset_bin, false); }
- | charset_or_alias opt_bin_mod { bincmp_collation($1, $2); }
- | BINARY { bincmp_collation(NULL, true); }
- | BINARY charset_or_alias { bincmp_collation($2, true); }
- | charset_or_alias collate
+ BYTE_SYM { $$.set_charset(&my_charset_bin); }
+ | charset_or_alias { $$.set_charset($1); }
+ | charset_or_alias BINARY
{
- if (!my_charset_same($2, $1))
- my_yyabort_error((ER_COLLATION_CHARSET_MISMATCH, MYF(0),
- $2->coll_name.str, $1->cs_name.str));
- Lex->charset= $2;
+ if ($$.set_charset_collate_binary($1))
+ MYSQL_YYABORT;
+ }
+ | BINARY { $$.set_contextually_typed_binary_style(); }
+ | BINARY charset_or_alias
+ {
+ if ($$.set_charset_collate_binary($2))
+ MYSQL_YYABORT;
+ }
+ | charset_or_alias COLLATE_SYM DEFAULT
+ {
+ $$.set_charset_collate_default($1);
+ }
+ | charset_or_alias COLLATE_SYM collation_name
+ {
+ if ($$.set_charset_collate_exact($1, $3))
+ MYSQL_YYABORT;
}
- | collate { Lex->charset= $1; }
+ | COLLATE_SYM collation_name { $$.set_collate_exact($2); }
+ | COLLATE_SYM DEFAULT { $$.set_collate_default(); }
;
opt_bin_mod:
@@ -7559,7 +7575,8 @@ alter_list_item:
lex->alter_info.alter_rename_key_list.push_back(ak);
lex->alter_info.flags|= ALTER_RENAME_INDEX;
}
- | CONVERT_SYM TO_SYM charset charset_name_or_default opt_collate
+ | CONVERT_SYM TO_SYM charset charset_name_or_default
+ opt_collate_or_default
{
if (!$4)
{
@@ -9407,15 +9424,14 @@ opt_dyncol_type:
/* empty */
{
$$.set(DYN_COL_NULL); /* automatic type */
- Lex->charset= NULL;
}
| AS dyncol_type { $$= $2; }
;
dyncol_type:
- numeric_dyncol_type { $$= $1; Lex->charset= NULL; }
- | temporal_dyncol_type { $$= $1; Lex->charset= NULL; }
- | string_dyncol_type { $$= $1; }
+ numeric_dyncol_type
+ | temporal_dyncol_type
+ | string_dyncol_type
;
numeric_dyncol_type:
@@ -9434,23 +9450,20 @@ temporal_dyncol_type:
;
string_dyncol_type:
- char
- { Lex->charset= thd->variables.collation_connection; }
- opt_binary
+ char opt_binary
{
- $$.set(DYN_COL_STRING);
+ if ($$.set(DYN_COL_STRING, $2, thd->variables.collation_connection))
+ MYSQL_YYABORT;
}
| nchar
{
- $$.set(DYN_COL_STRING);
- Lex->charset= national_charset_info;
+ $$.set(DYN_COL_STRING, national_charset_info);
}
;
dyncall_create_element:
expr ',' expr opt_dyncol_type
{
- LEX *lex= Lex;
$$= (DYNCALL_CREATE_DEF *)
alloc_root(thd->mem_root, sizeof(DYNCALL_CREATE_DEF));
if (unlikely($$ == NULL))
@@ -9458,7 +9471,7 @@ dyncall_create_element:
$$->key= $1;
$$->value= $3;
$$->type= (DYNAMIC_COLUMN_TYPE)$4.dyncol_type();
- $$->cs= lex->charset;
+ $$->cs= $4.charset_collation();
if ($4.has_explicit_length())
$$->len= $4.length();
else
@@ -9597,8 +9610,7 @@ column_default_non_parenthesized_expr:
}
| CAST_SYM '(' expr AS cast_type ')'
{
- if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3,
- Lex->charset))))
+ if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3))))
MYSQL_YYABORT;
}
| CASE_SYM when_list_opt_else END
@@ -9614,8 +9626,7 @@ column_default_non_parenthesized_expr:
}
| CONVERT_SYM '(' expr ',' cast_type ')'
{
- if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3,
- Lex->charset))))
+ if (unlikely(!($$= $5.create_typecast_item_or_error(thd, $3))))
MYSQL_YYABORT;
}
| CONVERT_SYM '(' expr USING charset_name ')'
@@ -10151,9 +10162,8 @@ function_call_nonkeyword:
|
COLUMN_GET_SYM '(' expr ',' expr AS cast_type ')'
{
- LEX *lex= Lex;
$$= create_func_dyncol_get(thd, $3, $5, $7.type_handler(),
- $7, lex->charset);
+ $7, $7.charset());
if (unlikely($$ == NULL))
MYSQL_YYABORT;
}
@@ -11054,26 +11064,31 @@ in_sum_expr:
cast_type:
BINARY opt_field_length
- { $$.set(&type_handler_long_blob, $2); Lex->charset= &my_charset_bin; }
- | CHAR_SYM opt_field_length
- { Lex->charset= thd->variables.collation_connection; }
- opt_binary
- { $$.set(&type_handler_long_blob, $2); }
- | VARCHAR field_length
- { Lex->charset= thd->variables.collation_connection; }
- opt_binary
- { $$.set(&type_handler_long_blob, $2); }
- | VARCHAR2_ORACLE_SYM field_length
- { Lex->charset= thd->variables.collation_connection; }
- opt_binary
- { $$.set(&type_handler_long_blob, $2); }
+ { $$.set(&type_handler_long_blob, $2, &my_charset_bin); }
+ | CHAR_SYM opt_field_length opt_binary
+ {
+ if ($$.set(&type_handler_long_blob, $2, $3,
+ thd->variables.collation_connection))
+ MYSQL_YYABORT;
+ }
+ | VARCHAR field_length opt_binary
+ {
+ if ($$.set(&type_handler_long_blob, $2, $3,
+ thd->variables.collation_connection))
+ MYSQL_YYABORT;
+ }
+ | VARCHAR2_ORACLE_SYM field_length opt_binary
+ {
+ if ($$.set(&type_handler_long_blob, $2, $3,
+ thd->variables.collation_connection))
+ MYSQL_YYABORT;
+ }
| NCHAR_SYM opt_field_length
{
- Lex->charset= national_charset_info;
- $$.set(&type_handler_long_blob, $2);
+ $$.set(&type_handler_long_blob, $2, national_charset_info);
}
- | cast_type_numeric { $$= $1; Lex->charset= NULL; }
- | cast_type_temporal { $$= $1; Lex->charset= NULL; }
+ | cast_type_numeric { $$= $1; }
+ | cast_type_temporal { $$= $1; }
| IDENT_sys
{
if (Lex->set_cast_type_udt(&$$, $1))
@@ -11262,7 +11277,7 @@ json_table_column:
!lex->json_table->m_cur_json_table_column))
MYSQL_YYABORT;
- lex->init_last_field(f, &$1, NULL);
+ lex->init_last_field(f, &$1);
}
json_table_column_type
{
@@ -11293,7 +11308,7 @@ json_table_column_type:
{
Lex_field_type_st type;
type.set(&type_handler_slong);
- Lex->last_field->set_attributes(thd, type, Lex->charset,
+ Lex->last_field->set_attributes(thd, type,
COLUMN_DEFINITION_TABLE_FIELD);
Lex->json_table->m_cur_json_table_column->
set(Json_table_column::FOR_ORDINALITY);
@@ -11301,20 +11316,23 @@ json_table_column_type:
| json_table_field_type PATH_SYM json_text_literal
json_opt_on_empty_or_error
{
- Lex->last_field->set_attributes(thd, $1, Lex->charset,
+ Lex->last_field->set_attributes(thd, $1,
COLUMN_DEFINITION_TABLE_FIELD);
if (Lex->json_table->m_cur_json_table_column->
- set(thd, Json_table_column::PATH, $3, Lex->charset))
+ set(thd, Json_table_column::PATH, $3,
+ $1.lex_charset_collation()))
{
MYSQL_YYABORT;
}
}
| json_table_field_type EXISTS PATH_SYM json_text_literal
{
- Lex->last_field->set_attributes(thd, $1, Lex->charset,
+ Lex->last_field->set_attributes(thd, $1,
COLUMN_DEFINITION_TABLE_FIELD);
- Lex->json_table->m_cur_json_table_column->
- set(thd, Json_table_column::EXISTS_PATH, $4, Lex->charset);
+ if (Lex->json_table->m_cur_json_table_column->
+ set(thd, Json_table_column::EXISTS_PATH, $4,
+ $1.lex_charset_collation()))
+ MYSQL_YYABORT;
}
;
@@ -16484,7 +16502,7 @@ option_value_no_option_type:
thd->parse_error();
MYSQL_YYABORT;
}
- | NAMES_SYM charset_name_or_default opt_collate
+ | NAMES_SYM charset_name_or_default opt_collate_or_default
{
if (sp_create_assignment_lex(thd, $1.pos()))
MYSQL_YYABORT;
@@ -17716,8 +17734,7 @@ sf_return_type:
{
LEX *lex= Lex;
lex->init_last_field(&lex->sphead->m_return_field_def,
- &empty_clex_str,
- thd->variables.collation_database);
+ &empty_clex_str);
}
field_type
{
diff --git a/sql/structs.h b/sql/structs.h
index d5c363cdd25..ff6d9b70eb0 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -28,6 +28,7 @@
#include "my_base.h" /* ha_rows, ha_key_alg */
#include <mysql_com.h> /* USERNAME_LENGTH */
#include "sql_bitmap.h"
+#include "lex_charset.h"
struct TABLE;
class Type_handler;
@@ -601,18 +602,24 @@ public:
struct Lex_length_and_dec_st
{
-private:
+protected:
uint32 m_length;
uint8 m_dec;
+ uint8 m_collation_type:LEX_CHARSET_COLLATION_TYPE_BITS;
bool m_has_explicit_length:1;
bool m_has_explicit_dec:1;
bool m_length_overflowed:1;
bool m_dec_overflowed:1;
+
+ static_assert(LEX_CHARSET_COLLATION_TYPE_BITS <= 8,
+ "Lex_length_and_dec_st::m_collation_type bits check");
+
public:
void reset()
{
m_length= 0;
m_dec= 0;
+ m_collation_type= 0;
m_has_explicit_length= false;
m_has_explicit_dec= false;
m_length_overflowed= false;
@@ -622,6 +629,7 @@ public:
{
m_length= length;
m_dec= 0;
+ m_collation_type= 0;
m_has_explicit_length= true;
m_has_explicit_dec= false;
m_length_overflowed= false;
@@ -631,6 +639,7 @@ public:
{
m_length= 0;
m_dec= dec;
+ m_collation_type= 0;
m_has_explicit_length= false;
m_has_explicit_dec= true;
m_length_overflowed= false;
@@ -640,6 +649,7 @@ public:
{
m_length= length;
m_dec= dec;
+ m_collation_type= 0;
m_has_explicit_length= true;
m_has_explicit_dec= true;
m_length_overflowed= false;
@@ -677,11 +687,37 @@ struct Lex_field_type_st: public Lex_length_and_dec_st
{
private:
const Type_handler *m_handler;
+ CHARSET_INFO *m_ci;
public:
- void set(const Type_handler *handler, Lex_length_and_dec_st length_and_dec)
+ void set(const Type_handler *handler,
+ Lex_length_and_dec_st length_and_dec,
+ CHARSET_INFO *cs= NULL)
+ {
+ m_handler= handler;
+ m_ci= cs;
+ Lex_length_and_dec_st::operator=(length_and_dec);
+ }
+ void set(const Type_handler *handler,
+ const Lex_length_and_dec_st &length_and_dec,
+ const Lex_charset_collation_st &coll)
{
m_handler= handler;
+ m_ci= coll.charset_collation();
Lex_length_and_dec_st::operator=(length_and_dec);
+ m_collation_type= ((uint8) coll.type()) & 0x3;
+ }
+ void set(const Type_handler *handler, const Lex_charset_collation_st &coll)
+ {
+ m_handler= handler;
+ m_ci= coll.charset_collation();
+ Lex_length_and_dec_st::reset();
+ m_collation_type= ((uint8) coll.type()) & 0x3;
+ }
+ void set(const Type_handler *handler, CHARSET_INFO *cs= NULL)
+ {
+ m_handler= handler;
+ m_ci= cs;
+ Lex_length_and_dec_st::reset();
}
void set_handler_length_flags(const Type_handler *handler,
const Lex_length_and_dec_st &length,
@@ -689,18 +725,21 @@ public:
void set_handler_length(const Type_handler *handler, uint32 length)
{
m_handler= handler;
+ m_ci= NULL;
Lex_length_and_dec_st::set_length_only(length);
}
- void set(const Type_handler *handler)
- {
- m_handler= handler;
- Lex_length_and_dec_st::reset();
- }
void set_handler(const Type_handler *handler)
{
m_handler= handler;
}
const Type_handler *type_handler() const { return m_handler; }
+ CHARSET_INFO *charset_collation() const { return m_ci; }
+ Lex_charset_collation lex_charset_collation() const
+ {
+ return Lex_charset_collation(m_ci,
+ (Lex_charset_collation_st::Type)
+ m_collation_type);
+ }
};
@@ -708,18 +747,38 @@ struct Lex_dyncol_type_st: public Lex_length_and_dec_st
{
private:
int m_type; // enum_dynamic_column_type is not visible here, so use int
+ CHARSET_INFO *m_ci;
public:
- void set(int type, Lex_length_and_dec_st length_and_dec)
+ void set(int type, Lex_length_and_dec_st length_and_dec,
+ CHARSET_INFO *cs= NULL)
{
m_type= type;
+ m_ci= cs;
Lex_length_and_dec_st::operator=(length_and_dec);
}
void set(int type)
{
m_type= type;
+ m_ci= NULL;
Lex_length_and_dec_st::reset();
}
+ void set(int type, CHARSET_INFO *cs)
+ {
+ m_type= type;
+ m_ci= cs;
+ Lex_length_and_dec_st::reset();
+ }
+ bool set(int type, const Lex_charset_collation_st &collation,
+ CHARSET_INFO *charset)
+ {
+ CHARSET_INFO *tmp= collation.resolved_to_character_set(charset);
+ if (!tmp)
+ return true;
+ set(type, tmp);
+ return false;
+ }
int dyncol_type() const { return m_type; }
+ CHARSET_INFO *charset_collation() const { return m_ci; }
};