diff options
-rw-r--r-- | sql/field.h | 8 | ||||
-rw-r--r-- | sql/lex_charset.cc | 575 | ||||
-rw-r--r-- | sql/lex_charset.h | 348 | ||||
-rw-r--r-- | sql/sql_lex.cc | 15 | ||||
-rw-r--r-- | sql/sql_lex.h | 3 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 105 |
6 files changed, 764 insertions, 290 deletions
diff --git a/sql/field.h b/sql/field.h index dd8ffe7fea0..cb78bdccd4b 100644 --- a/sql/field.h +++ b/sql/field.h @@ -5514,11 +5514,9 @@ public: { if (!charset) return Lex_column_charset_collation_attrs(); - return Lex_column_charset_collation_attrs( - charset, - flags & CONTEXT_COLLATION_FLAG ? - Lex_column_charset_collation_attrs_st::TYPE_COLLATE_CONTEXTUALLY_TYPED : - Lex_column_charset_collation_attrs_st::TYPE_CHARACTER_SET); + if (flags & CONTEXT_COLLATION_FLAG) + return Lex_column_charset_collation_attrs(Lex_context_collation(charset)); + return Lex_column_charset_collation_attrs(Lex_exact_collation(charset)); } }; diff --git a/sql/lex_charset.cc b/sql/lex_charset.cc index 923e20001a3..1373091d222 100644 --- a/sql/lex_charset.cc +++ b/sql/lex_charset.cc @@ -21,49 +21,317 @@ #include "mysqld_error.h" +static void +raise_ER_CONFLICTING_DECLARATIONS(const char *clause1, + const char *name1, + const char *clause2, + const char *name2, + bool reverse_order) +{ + if (!reverse_order) + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + clause1, name1, clause2, name2); + else + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + clause2, name2, clause1, name1); +} + + +bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const +{ + if (m_ci == rhs.m_ci) + return false; + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "CHARACTER SET ", m_ci->cs_name.str, + "CHARACTER SET ", rhs.m_ci->cs_name.str); + return true; +} + + +bool Lex_exact_charset:: + raise_if_not_applicable(const Lex_exact_collation &cl) const +{ + return Lex_exact_charset_opt_extended_collate(m_ci, false). + raise_if_not_applicable(cl); +} + + +bool Lex_exact_charset_opt_extended_collate:: + raise_if_not_applicable(const Lex_exact_collation &cl) const +{ + if (!my_charset_same(m_ci, cl.charset_info())) + { + my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), + cl.charset_info()->coll_name.str, m_ci->cs_name.str); + return true; + } + return false; +} + + +bool +Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const +{ + if (m_ci != cl.m_ci) + { + my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), + "COLLATE ", m_ci->coll_name.str, + "COLLATE ", cl.m_ci->coll_name.str); + return true; + } + return false; +} + + +/* + Merge an exact collation and a contexual collation. + @param cl - The contextual collation to merge to "this". + @param reverse_order - If the contextual collation is on the left side + + Use reverse_order as follows: + false: COLLATE latin1_swedish_ci COLLATE DEFAULT + true: COLLATE DEFAULT COLLATE latin1_swedish_ci +*/ +bool +Lex_exact_collation:: + raise_if_conflicts_with_context_collation(const Lex_context_collation &cl, + bool reverse_order) const +{ + if (cl.is_contextually_typed_collate_default() && + !(m_ci->state & MY_CS_PRIMARY)) + { + raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str, + "COLLATE ", "DEFAULT", reverse_order); + return true; + } + + if (cl.is_contextually_typed_binary_style() && + !(m_ci->state & MY_CS_BINSORT)) + { + raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str, + "", "BINARY", reverse_order); + return true; + } + return false; +} + + +bool +Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const +{ + /* + Only equal context collations are possible here so far: + - Column grammar only supports BINARY, but does not support COLLATE DEFAULT + - DB/Table grammar only support COLLATE DEFAULT + But we'll have different collations here - uca140 is coming soon. + */ + DBUG_ASSERT(m_ci == cl.m_ci); + return false; +} + + +/* + Resolve a context collation to the character set (when the former gets known): + CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1; + CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1; +*/ +bool Lex_exact_charset_opt_extended_collate:: + merge_context_collation_override(const Lex_context_collation &cl) +{ + DBUG_ASSERT(m_ci); + + // CHAR(10) BINARY + if (cl.is_contextually_typed_binary_style()) + { + CHARSET_INFO *ci= find_bin_collation(); + if (!ci) + return true; + m_ci= ci; + m_with_collate= true; + return false; + } + + // COLLATE DEFAULT + if (cl.is_contextually_typed_collate_default()) + { + CHARSET_INFO *ci= find_default_collation(); + DBUG_ASSERT(ci); + if (!ci) + return true; + m_ci= ci; + m_with_collate= true; + return false; + } + + /* + A non-binary and non-default contextually typed collation. + We don't have such yet - the parser cannot produce this. + But we have "uca1400_as_ci" coming soon. + */ + DBUG_ASSERT(0); + return false; +} + + +bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs) +{ + switch (m_type) { + case TYPE_EXACT: + { + // COLLATE latin1_swedish_ci .. CHARACTER SET latin1 + return cs.raise_if_not_applicable(Lex_exact_collation(m_ci)); + } + case TYPE_CONTEXTUALLY_TYPED: + { + // COLLATE DEFAULT .. CHARACTER SET latin1 + Lex_exact_charset_opt_extended_collate tmp(cs); + if (tmp.merge_context_collation(Lex_context_collation(m_ci))) + return true; + *this= Lex_extended_collation(tmp.collation()); + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +bool Lex_extended_collation_st:: + merge_exact_collation(const Lex_exact_collation &rhs) +{ + switch (m_type) { + + case TYPE_EXACT: + /* + EXACT + EXACT + COLLATE latin1_bin .. COLLATE latin1_bin + */ + return Lex_exact_collation(m_ci).raise_if_not_equal(rhs); + + case TYPE_CONTEXTUALLY_TYPED: + { + /* + CONTEXT + EXACT + CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci + CHAR(10) BINARY .. COLLATE latin1_bin + CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin - coming soon + */ + if (rhs.raise_if_conflicts_with_context_collation( + Lex_context_collation(m_ci), true)) + return true; + *this= Lex_extended_collation(rhs); + return false; + } + } + DBUG_ASSERT(0); + return false; +} + + +bool Lex_extended_collation_st:: + raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs) + const +{ + switch (m_type) { + + case TYPE_EXACT: + /* + EXACT + CONTEXT + COLLATE latin1_swedish_ci .. COLLATE DEFAULT + */ + return Lex_exact_collation(m_ci). + raise_if_conflicts_with_context_collation(rhs, false); + + case TYPE_CONTEXTUALLY_TYPED: + { + /* + CONTEXT + CONTEXT: + CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser + CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT; + */ + return Lex_context_collation(m_ci).raise_if_not_equal(rhs); + } + } + DBUG_ASSERT(0); + return false; +} + + +/* + Merge two non-empty COLLATE clauses. +*/ +bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs) +{ + switch (rhs.type()) { + case TYPE_EXACT: + /* + EXACT + EXACT + COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci + + CONTEXT + EXACT + COLLATE DEFAULT .. COLLATE latin1_swedish_ci + CHAR(10) BINARY .. COLLATE latin1_bin + */ + return merge_exact_collation(Lex_exact_collation(rhs.m_ci)); + case TYPE_CONTEXTUALLY_TYPED: + /* + EXACT + CONTEXT + COLLATE latin1_swedish_ci .. COLLATE DEFAULT + + CONTEXT + CONTEXT + COLLATE DEFAULT .. COLLATE DEFAULT + CHAR(10) BINARY .. COLLATE DEFAULT + */ + return raise_if_conflicts_with_context_collation( + Lex_context_collation(rhs.m_ci)); + } + DBUG_ASSERT(0); + return false; +} + + /** find a collation with binary comparison rules */ -CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: - find_bin_collation(CHARSET_INFO *cs) +CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const { /* We don't need to handle old_mode=UTF8_IS_UTF8MB3 here, - because "cs" points to a real character set name. + because "m_ci" points to a real character set name. It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8". No thd->get_utf8_flag() flag passed to get_charset_by_csname(). */ - DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4)); + DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4)); /* CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; Nothing to do, we have the binary collation already. */ - if (cs->state & MY_CS_BINSORT) - return cs; + if (m_ci->state & MY_CS_BINSORT) + return m_ci; // CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4; - const LEX_CSTRING &cs_name= cs->cs_name; - if (!(cs= get_charset_by_csname(cs->cs_name.str, MY_CS_BINSORT, MYF(0)))) + CHARSET_INFO *cs; + if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0)))) { char tmp[65]; - strxnmov(tmp, sizeof(tmp)-1, cs_name.str, "_bin", NULL); + strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL); my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp); } return cs; } -CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: - find_default_collation(CHARSET_INFO *cs) +CHARSET_INFO * +Lex_exact_charset_opt_extended_collate::find_default_collation() const { // See comments in find_bin_collation() - DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4)); + DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4)); /* CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4; Nothing to do, we have the default collation already. */ - if (cs->state & MY_CS_PRIMARY) - return cs; + if (m_ci->state & MY_CS_PRIMARY) + return m_ci; /* CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; @@ -71,7 +339,8 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: Don't need to handle old_mode=UTF8_IS_UTF8MB3 here. See comments in find_bin_collation. */ - cs= get_charset_by_csname(cs->cs_name.str, MY_CS_PRIMARY, MYF(MY_WME)); + CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str, + MY_CS_PRIMARY, MYF(MY_WME)); /* The above should never fail, as we have default collations for all character sets. @@ -81,21 +350,6 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: } -bool Lex_exact_charset_extended_collation_attrs_st:: - set_charset_collate_exact(CHARSET_INFO *cs, CHARSET_INFO *cl) -{ - DBUG_ASSERT(cs != nullptr && cl != nullptr); - if (!my_charset_same(cl, cs)) - { - my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), - cl->coll_name.str, cs->cs_name.str); - return true; - } - set_collate_exact(cl); - return false; -} - - /* Resolve an empty or a contextually typed collation according to the upper level default character set (and optionally a collation), e.g.: @@ -122,103 +376,84 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st:: DBUG_ASSERT(m_ci); return m_ci; case TYPE_COLLATE_CONTEXTUALLY_TYPED: - break; + { + Lex_exact_charset_opt_extended_collate tmp(def, true); + if (tmp.merge_context_collation_override(Lex_context_collation(m_ci))) + return NULL; + return tmp.collation().charset_info(); + } } - - // Contextually typed - DBUG_ASSERT(m_ci); - - if (is_contextually_typed_binary_style()) // CHAR(10) BINARY - return find_bin_collation(def); - - if (is_contextually_typed_collate_default()) // CHAR(10) COLLATE DEFAULT - return find_default_collation(def); - - /* - Non-binary and non-default contextually typed collation. - We don't have such yet - the parser cannot produce this. - But will have soon, e.g. "uca1400_as_ci". - */ DBUG_ASSERT(0); return NULL; } -/* - Merge the CHARACTER SET clause to: - - an empty COLLATE clause - - an explicitly typed collation name - - a contextually typed collation - - "this" corresponds to `CHARACTER SET xxx [BINARY]` - "cl" corresponds to the COLLATE clause -*/ bool Lex_exact_charset_extended_collation_attrs_st:: - merge_charset_clause_and_collate_clause( - const Lex_exact_charset_extended_collation_attrs_st &cl) + merge_exact_collation(const Lex_exact_collation &cl) { - if (cl.is_empty()) // No COLLATE clause - return false; - switch (m_type) { case TYPE_EMPTY: /* No CHARACTER SET clause CHAR(10) NOT NULL COLLATE latin1_bin - CHAR(10) NOT NULL COLLATE DEFAULT */ - *this= cl; + *this= Lex_exact_charset_extended_collation_attrs(cl); return false; case TYPE_CHARACTER_SET: - case TYPE_COLLATE_EXACT: { - Lex_exact_charset_opt_extended_collate ecs(m_ci, m_type == TYPE_COLLATE_EXACT); - if (ecs.merge_collate_or_error(cl)) + // CHARACTER SET latin1 .. COLLATE latin1_swedish_ci + if (Lex_exact_charset(m_ci).raise_if_not_applicable(cl)) return true; - set_collate_exact(ecs.charset_info()); + *this= Lex_exact_charset_extended_collation_attrs(cl); return false; } + case TYPE_COLLATE_EXACT: + { + // [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin + return Lex_exact_collation(m_ci).raise_if_not_equal(cl); + } case TYPE_COLLATE_CONTEXTUALLY_TYPED: - break; - } - - if (is_contextually_typed_collation()) - { - if (cl.is_contextually_typed_collation()) { - /* - CONTEXT + CONTEXT: - CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser - CHAR(10) BINARY .. COLLATE uca1400_as_ci - not supported yet - */ - DBUG_ASSERT(0); // Not possible yet + // COLLATE DEFAULT .. COLLATE latin1_swedish_ci + if (cl.raise_if_conflicts_with_context_collation( + Lex_context_collation(m_ci), true)) + return true; + *this= Lex_exact_charset_extended_collation_attrs(cl); return false; } + } + DBUG_ASSERT(0); + return false; +} + +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_context_collation(const Lex_context_collation &cl) +{ + switch (m_type) { + case TYPE_EMPTY: /* - CONTEXT + EXPLICIT - CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci - CHAR(10) BINARY .. COLLATE latin1_bin - CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin + No CHARACTER SET clause + CHAR(10) NOT NULL .. COLLATE DEFAULT */ - if (is_contextually_typed_collate_default() && - !(cl.charset_info()->state & MY_CS_PRIMARY)) - { - my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), - "COLLATE ", "DEFAULT", "COLLATE ", - cl.charset_info()->coll_name.str); - return true; - } - - if (is_contextually_typed_binary_style() && - !(cl.charset_info()->state & MY_CS_BINSORT)) + *this= Lex_exact_charset_extended_collation_attrs(cl); + return false; + case TYPE_CHARACTER_SET: { - my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), - "", "BINARY", "COLLATE ", cl.charset_info()->coll_name.str); - return true; + // CHARACTER SET latin1 .. COLLATE DEFAULT + Lex_exact_charset_opt_extended_collate tmp(m_ci, false); + if (tmp.merge_context_collation(cl)) + return true; + *this= Lex_exact_charset_extended_collation_attrs(tmp.collation()); + return false; } - *this= cl; - return false; + case TYPE_COLLATE_EXACT: + // [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT + return Lex_exact_collation(m_ci). + raise_if_conflicts_with_context_collation(cl, false); + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + // COLLATE DEFAULT .. COLLATE DEFAULT + return Lex_context_collation(m_ci).raise_if_not_equal(cl); } DBUG_ASSERT(0); @@ -227,69 +462,37 @@ bool Lex_exact_charset_extended_collation_attrs_st:: bool Lex_exact_charset_opt_extended_collate:: - merge_collate_or_error( - const Lex_exact_charset_extended_collation_attrs_st &cl) + merge_exact_collation(const Lex_exact_collation &cl) { - DBUG_ASSERT(cl.type() != - Lex_exact_charset_extended_collation_attrs_st::TYPE_CHARACTER_SET); + // CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin + if (m_with_collate) + return Lex_exact_collation(m_ci).raise_if_not_equal(cl); + if (raise_if_not_applicable(cl)) + return true; + *this= Lex_exact_charset_opt_extended_collate(cl); + return false; +} - switch (cl.type()) { - case Lex_exact_charset_extended_collation_attrs_st::TYPE_EMPTY: - return false; - case Lex_exact_charset_extended_collation_attrs_st::TYPE_CHARACTER_SET: - DBUG_ASSERT(0); - return false; - case Lex_exact_charset_extended_collation_attrs_st::TYPE_COLLATE_EXACT: - /* - EXPLICIT + EXPLICIT - CHAR(10) CHARACTER SET latin1 .. COLLATE latin1_bin - CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin .. COLLATE latin1_bin - CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin - CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin - CHAR(10) CHARACTER SET latin1 BINARY .. COLLATE latin1_bin - */ - if (m_with_collate && m_ci != cl.charset_info()) - { - my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), - "COLLATE ", m_ci->coll_name.str, - "COLLATE ", cl.charset_info()->coll_name.str); - return true; - } - if (!my_charset_same(m_ci, cl.charset_info())) - { - my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), - cl.charset_info()->coll_name.str, m_ci->cs_name.str); - return true; - } - m_ci= cl.charset_info(); - m_with_collate= true; - return false; - case Lex_exact_charset_extended_collation_attrs_st::TYPE_COLLATE_CONTEXTUALLY_TYPED: - if (cl.is_contextually_typed_collate_default()) - { - /* - SET NAMES latin1 COLLATE DEFAULT; - ALTER TABLE t1 CONVERT TO CHARACTER SET latin1 COLLATE DEFAULT; - */ - CHARSET_INFO *tmp= Lex_exact_charset_extended_collation_attrs_st::find_default_collation(m_ci); - if (!tmp) - return true; - m_ci= tmp; - m_with_collate= true; - return false; - } - else - { - /* - EXPLICIT + CONTEXT - CHAR(10) COLLATE latin1_bin .. COLLATE DEFAULT not possible yet - CHAR(10) COLLATE latin1_bin .. COLLATE uca1400_as_ci - */ +bool Lex_exact_charset_opt_extended_collate:: + merge_context_collation(const Lex_context_collation &cl) +{ + // CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT + if (m_with_collate) + return Lex_exact_collation(m_ci). + raise_if_conflicts_with_context_collation(cl, false); + return merge_context_collation_override(cl); +} - DBUG_ASSERT(0); // Not possible yet - return false; - } + +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_collation(const Lex_extended_collation_st &cl) +{ + switch (cl.type()) { + case Lex_extended_collation_st::TYPE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return merge_context_collation(Lex_context_collation(cl.charset_info())); } DBUG_ASSERT(0); return false; @@ -297,48 +500,40 @@ bool Lex_exact_charset_opt_extended_collate:: /* - This method is used in the "attribute_list" rule to merge two independent - COLLATE clauses (not belonging to a CHARACTER SET clause). + Mix an unordered combination of CHARACTER SET and COLLATE clauses + (i.e. COLLATE can come before CHARACTER SET). + Merge a CHARACTER SET clause. + @param cs - The "CHARACTER SET exact_charset_name". */ -bool -Lex_exact_charset_extended_collation_attrs_st:: - merge_collate_clause_and_collate_clause( - const Lex_exact_charset_extended_collation_attrs_st &cl) +bool Lex_exact_charset_extended_collation_attrs_st:: + merge_exact_charset(const Lex_exact_charset &cs) { - /* - "BINARY" and "COLLATE DEFAULT" are not possible - in an independent COLLATE clause in a column attribute. - */ - DBUG_ASSERT(!is_contextually_typed_collation()); - DBUG_ASSERT(!cl.is_contextually_typed_collation()); - - if (cl.is_empty()) - return false; + DBUG_ASSERT(cs.charset_info()); switch (m_type) { case TYPE_EMPTY: - *this= cl; + // CHARACTER SET cs + *this= Lex_exact_charset_extended_collation_attrs(cs); return false; + case TYPE_CHARACTER_SET: - DBUG_ASSERT(0); - return false; + // CHARACTER SET cs1 .. CHARACTER SET cs2 + return Lex_exact_charset(m_ci).raise_if_not_equal(cs); + case TYPE_COLLATE_EXACT: - case TYPE_COLLATE_CONTEXTUALLY_TYPED: - break; - } + // COLLATE latin1_bin .. CHARACTER SET cs + return cs.raise_if_not_applicable(Lex_exact_collation(m_ci)); - /* - Two independent explicit collations: - CHAR(10) NOT NULL COLLATE latin1_bin DEFAULT 'a' COLLATE latin1_bin - Note, we should perhaps eventually disallow double COLLATE clauses. - But for now let's just disallow only conflicting ones. - */ - if (charset_info() != cl.charset_info()) - { - my_error(ER_CONFLICTING_DECLARATIONS, MYF(0), - "COLLATE ", charset_info()->coll_name.str, - "COLLATE ", cl.charset_info()->coll_name.str); - return true; + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + // COLLATE DEFAULT .. CHARACTER SET cs + { + Lex_exact_charset_opt_extended_collate tmp(cs); + if (tmp.merge_context_collation(Lex_context_collation(m_ci))) + return true; + *this= Lex_exact_charset_extended_collation_attrs(tmp.collation()); + return false; + } } + DBUG_ASSERT(0); return false; } diff --git a/sql/lex_charset.h b/sql/lex_charset.h index 09dc51d193f..c6b8f6b17ac 100644 --- a/sql/lex_charset.h +++ b/sql/lex_charset.h @@ -16,8 +16,202 @@ #ifndef LEX_CHARSET_INCLUDED #define LEX_CHARSET_INCLUDED + /* - Parse time character set and collation. + An exact character set, e.g: + CHARACTER SET latin1 +*/ +class Lex_exact_charset +{ + CHARSET_INFO *m_ci; +public: + explicit Lex_exact_charset(CHARSET_INFO *ci) + :m_ci(ci) + { + DBUG_ASSERT(m_ci); + DBUG_ASSERT(m_ci->state & MY_CS_PRIMARY); + } + CHARSET_INFO *charset_info() const { return m_ci; } + bool raise_if_not_equal(const Lex_exact_charset &rhs) const; + bool raise_if_not_applicable(const class Lex_exact_collation &cl) const; +}; + + +/* + A contextually typed collation, e.g.: + COLLATE DEFAULT + CHAR(10) BINARY +*/ +class Lex_context_collation +{ + CHARSET_INFO *m_ci; +public: + explicit Lex_context_collation(CHARSET_INFO *ci) + :m_ci(ci) + { + DBUG_ASSERT(ci); + } + CHARSET_INFO *charset_info() const { return m_ci; } + bool is_contextually_typed_collate_default() const + { + return m_ci == &my_collation_contextually_typed_default; + } + bool is_contextually_typed_binary_style() const + { + return m_ci == &my_collation_contextually_typed_binary; + } + bool raise_if_not_equal(const Lex_context_collation &cl) const; +}; + + +/* + An exact collation, e.g. + COLLATE latin1_swedish_ci +*/ +class Lex_exact_collation +{ + CHARSET_INFO *m_ci; +public: + explicit Lex_exact_collation(CHARSET_INFO *ci) + :m_ci(ci) + { + DBUG_ASSERT(ci); + } + CHARSET_INFO *charset_info() const { return m_ci; } + // EXACT + EXACT + bool raise_if_not_equal(const Lex_exact_collation &cl) const; + // EXACT + CONTEXT + // CONTEXT + EXACT + bool raise_if_conflicts_with_context_collation(const Lex_context_collation &, + bool reverse_order) const; +}; + + +/* + Parse time COLLATE clause: + COLLATE colation_name + The collation can be either exact or contextual: + COLLATE latin1_bin + COLLATE DEFAULT +*/ +class Lex_extended_collation_st +{ +public: + enum Type + { + TYPE_EXACT, + TYPE_CONTEXTUALLY_TYPED + }; +protected: + CHARSET_INFO *m_ci; + Type m_type; +public: + void init(CHARSET_INFO *ci, Type type) + { + m_ci= ci; + m_type= type; + } + CHARSET_INFO *charset_info() const { return m_ci; } + Type type() const { return m_type; } + void set_collate_default() + { + m_ci= &my_collation_contextually_typed_default; + m_type= TYPE_CONTEXTUALLY_TYPED; + } + bool raise_if_conflicts_with_context_collation(const Lex_context_collation &) + const; + bool merge_exact_charset(const Lex_exact_charset &rhs); + bool merge_exact_collation(const Lex_exact_collation &rhs); + bool merge(const Lex_extended_collation_st &rhs); +}; + + +class Lex_extended_collation: public Lex_extended_collation_st +{ +public: + Lex_extended_collation(CHARSET_INFO *ci, Type type) + { + init(ci, type); + } + Lex_extended_collation(const Lex_exact_collation &rhs) + { + init(rhs.charset_info(), TYPE_EXACT); + } +}; + + +/* + CHARACTER SET cs_exact [COLLATE cl_exact_or_context] +*/ +class Lex_exact_charset_opt_extended_collate +{ + CHARSET_INFO *m_ci; + bool m_with_collate; +public: + Lex_exact_charset_opt_extended_collate(CHARSET_INFO *ci, bool with_collate) + :m_ci(ci), m_with_collate(with_collate) + { + DBUG_ASSERT(m_ci); + DBUG_ASSERT((m_ci->state & MY_CS_PRIMARY) || m_with_collate); + } + Lex_exact_charset_opt_extended_collate(const Lex_exact_charset &cs) + :m_ci(cs.charset_info()), m_with_collate(false) + { + DBUG_ASSERT(m_ci); + DBUG_ASSERT(m_ci->state & MY_CS_PRIMARY); + } + Lex_exact_charset_opt_extended_collate(const Lex_exact_collation &cl) + :m_ci(cl.charset_info()), m_with_collate(true) + { + DBUG_ASSERT(m_ci); + } + bool with_collate() const { return m_with_collate; } + CHARSET_INFO *find_bin_collation() const; + CHARSET_INFO *find_default_collation() const; + bool raise_if_not_applicable(const Lex_exact_collation &cl) const; + /* + Add another COLLATE clause (exact or context). + So the full syntax looks like: + CHARACTER SET cs [COLLATE cl] ... COLLATE cl2 + */ + bool merge_collation(const Lex_extended_collation_st &cl) + { + switch (cl.type()) { + case Lex_extended_collation_st::TYPE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return merge_context_collation(Lex_context_collation(cl.charset_info())); + } + DBUG_ASSERT(0); + return false; + } + /* + Add a context collation: + CHARACTER SET cs [COLLATE cl] ... COLLATE DEFAULT + */ + bool merge_context_collation(const Lex_context_collation &cl); + bool merge_context_collation_override(const Lex_context_collation &cl); + /* + Add an exact collation: + CHARACTER SET cs [COLLATE cl] ... COLLATE latin1_bin + */ + bool merge_exact_collation(const Lex_exact_collation &cl); + Lex_exact_collation collation() const + { + return Lex_exact_collation(m_ci); + } + Lex_exact_charset charset() const + { + if ((m_ci->state & MY_CS_PRIMARY)) + return Lex_exact_charset(m_ci); + return Lex_exact_charset(find_default_collation()); + } +}; + + +/* + Parse time character set and collation for: + [CHARACTER SET cs_exact] [COLLATE cl_exact_or_context] Can be: @@ -65,9 +259,18 @@ public: protected: CHARSET_INFO *m_ci; Type m_type; -public: - static CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs); - static CHARSET_INFO *find_default_collation(CHARSET_INFO *cs); +protected: + static Type type_from_lex_collation_type(Lex_extended_collation_st::Type type) + { + switch (type) { + case Lex_extended_collation_st::TYPE_EXACT: + return TYPE_COLLATE_EXACT; + case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED: + return TYPE_COLLATE_CONTEXTUALLY_TYPED; + } + DBUG_ASSERT(0); + return TYPE_COLLATE_EXACT; + } public: void init() { @@ -80,6 +283,21 @@ public: m_ci= cs; m_type= type; } + void init(const Lex_exact_charset &cs) + { + m_ci= cs.charset_info(); + m_type= TYPE_CHARACTER_SET; + } + void init(const Lex_exact_collation &cs) + { + m_ci= cs.charset_info(); + m_type= TYPE_COLLATE_EXACT; + } + void init(const Lex_exact_charset_opt_extended_collate &cscl) + { + cscl.with_collate() ? init(cscl.collation()) : + init(cscl.charset()); + } bool is_empty() const { return m_type == TYPE_EMPTY; @@ -90,23 +308,26 @@ public: m_ci= cs; m_type= TYPE_CHARACTER_SET; } - void set_charset_collate_default(CHARSET_INFO *cs) + bool set_charset_collate_default(CHARSET_INFO *cs) { DBUG_ASSERT(cs); + if (!(cs= Lex_exact_charset_opt_extended_collate(cs, true). + find_default_collation())) + return true; m_ci= cs; m_type= TYPE_COLLATE_EXACT; + return false; } bool set_charset_collate_binary(CHARSET_INFO *cs) { DBUG_ASSERT(cs); - if (!(cs= find_bin_collation(cs))) + if (!(cs= Lex_exact_charset_opt_extended_collate(cs, true). + find_bin_collation())) return true; m_ci= cs; m_type= TYPE_COLLATE_EXACT; return false; } - bool set_charset_collate_exact(CHARSET_INFO *cs, - CHARSET_INFO *cl); void set_collate_default() { m_ci= &my_collation_contextually_typed_default; @@ -119,17 +340,7 @@ public: } bool is_contextually_typed_collate_default() const { - return m_ci == &my_collation_contextually_typed_default; - } - bool is_contextually_typed_binary_style() const - { - return m_ci == &my_collation_contextually_typed_binary; - } - void set_collate_exact(CHARSET_INFO *cl) - { - DBUG_ASSERT(cl); - m_ci= cl; - m_type= TYPE_COLLATE_EXACT; + return Lex_context_collation(m_ci).is_contextually_typed_collate_default(); } CHARSET_INFO *charset_info() const { @@ -144,44 +355,55 @@ public: return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED; } CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const; - bool merge_charset_clause_and_collate_clause( - const Lex_exact_charset_extended_collation_attrs_st &cl); - bool merge_collate_clause_and_collate_clause( - const Lex_exact_charset_extended_collation_attrs_st &cl); -}; - - -/* - CHARACTER SET cs_exact [COLLATE cl_exact_or_context] -*/ -class Lex_exact_charset_opt_extended_collate -{ - CHARSET_INFO *m_ci; - bool m_with_collate; -public: - Lex_exact_charset_opt_extended_collate(CHARSET_INFO *ci, bool with_collate) - :m_ci(ci), m_with_collate(with_collate) + /* + Merge the column CHARACTER SET clause to: + - an exact collation name + - a contextually typed collation + "this" corresponds to `CHARACTER SET xxx [BINARY]` + "cl" corresponds to the COLLATE clause + */ + bool merge_column_charset_clause_and_collate_clause( + const Lex_exact_charset_extended_collation_attrs_st &cl) { - DBUG_ASSERT(m_ci); - // Item_func_set_collation uses non-default collations in "ci" - //DBUG_ASSERT(m_ci->default_flag() || m_with_collate); + switch (cl.type()) { + case TYPE_EMPTY: + return false; + case TYPE_COLLATE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + case TYPE_CHARACTER_SET: + break; + } + DBUG_ASSERT(0); + return false; } /* - Add another COLLATE clause (exact or context). - So the full syntax looks like: - CHARACTER SET cs [COLLATE cl] ... COLLATE cl2 + This method is used in the "attribute_list" rule to merge two independent + COLLATE clauses (not belonging to a CHARACTER SET clause). + "BINARY" and "COLLATE DEFAULT" are not possible + in an independent COLLATE clause in a column attribute. */ - bool merge_collate_or_error( - const Lex_exact_charset_extended_collation_attrs_st &cl); - bool merge_opt_collate_or_error( - const Lex_exact_charset_extended_collation_attrs_st &cl) + bool merge_column_collate_clause_and_collate_clause( + const Lex_exact_charset_extended_collation_attrs_st &cl) { - if (cl.is_empty()) + DBUG_ASSERT(m_type != TYPE_COLLATE_CONTEXTUALLY_TYPED); + DBUG_ASSERT(m_type != TYPE_CHARACTER_SET); + switch (cl.type()) { + case TYPE_EMPTY: return false; - return merge_collate_or_error(cl); + case TYPE_COLLATE_EXACT: + return merge_exact_collation(Lex_exact_collation(cl.charset_info())); + case TYPE_COLLATE_CONTEXTUALLY_TYPED: + case TYPE_CHARACTER_SET: + break; + } + DBUG_ASSERT(0); + return false; } - CHARSET_INFO *charset_info() const { return m_ci; } - bool with_collate() const { return m_with_collate; } + bool merge_exact_charset(const Lex_exact_charset &cs); + bool merge_exact_collation(const Lex_exact_collation &cl); + bool merge_context_collation(const Lex_context_collation &cl); + bool merge_collation(const Lex_extended_collation_st &cl); }; @@ -197,6 +419,32 @@ public: { init(collation, type); } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_exact_charset &cs) + { + init(cs.charset_info(), TYPE_CHARACTER_SET); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_exact_collation &cl) + { + init(cl.charset_info(), TYPE_COLLATE_EXACT); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_context_collation &cl) + { + init(cl.charset_info(), TYPE_COLLATE_CONTEXTUALLY_TYPED); + } + explicit + Lex_exact_charset_extended_collation_attrs( + const Lex_exact_charset_opt_extended_collate &cscl) + { + init(cscl); + } + explicit + Lex_exact_charset_extended_collation_attrs(const Lex_extended_collation_st &cl) + { + init(cl.charset_info(), type_from_lex_collation_type(cl.type())); + } static Lex_exact_charset_extended_collation_attrs national(bool bin_mod) { return bin_mod ? diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 53034fde142..5f2f072b348 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -11874,6 +11874,21 @@ bool LEX::sp_create_set_password_instr(THD *thd, } +bool LEX::set_names(const char *pos, + const Lex_exact_charset_opt_extended_collate &cscl, + bool no_lookahead) +{ + if (sp_create_assignment_lex(thd, pos)) + return true; + CHARSET_INFO *ci= cscl.collation().charset_info(); + set_var_collation_client *var; + var= new (thd->mem_root) set_var_collation_client(ci, ci, ci); + return unlikely(var == NULL) || + unlikely(thd->lex->var_list.push_back(var, thd->mem_root)) || + unlikely(sp_create_assignment_instr(thd, no_lookahead)); +} + + bool LEX::map_data_type(const Lex_ident_sys_st &schema_name, Lex_field_type_st *type) const { diff --git a/sql/sql_lex.h b/sql/sql_lex.h index a4dcfbbf99f..736a8c1fb03 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -3833,6 +3833,9 @@ public: int case_stmt_action_then(); bool setup_select_in_parentheses(); + bool set_names(const char *pos, + const Lex_exact_charset_opt_extended_collate &cs, + bool no_lookahead); bool set_trigger_new_row(const LEX_CSTRING *name, Item *val); bool set_trigger_field(const LEX_CSTRING *name1, const LEX_CSTRING *name2, Item *val); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index b7c3fa34c3b..610a7ee4c05 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -215,6 +215,7 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)() Lex_field_type_st Lex_field_type; Lex_exact_charset_extended_collation_attrs_st Lex_exact_charset_extended_collation_attrs; + Lex_extended_collation_st Lex_extended_collation; Lex_dyncol_type_st Lex_dyncol_type; Lex_for_loop_st for_loop; Lex_for_loop_bounds_st for_loop_bounds; @@ -1387,6 +1388,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); attribute_list field_def +%type <Lex_extended_collation> + collation_name + collation_name_or_default + %type <Lex_dyncol_type> opt_dyncol_type dyncol_type numeric_dyncol_type temporal_dyncol_type string_dyncol_type @@ -1579,14 +1584,11 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize); text_or_password %type <charset> - opt_collate_or_default charset_name charset_or_alias charset_name_or_default old_or_new_charset_name old_or_new_charset_name_or_default - collation_name - collation_name_or_default opt_load_data_charset UNDERSCORE_CHARSET @@ -5532,13 +5534,15 @@ default_collation: opt_default COLLATE_SYM opt_equal collation_name_or_default { HA_CREATE_INFO *cinfo= &Lex->create_info; + bool is_exact= $4.type() == Lex_extended_collation_st::TYPE_EXACT; + CHARSET_INFO *cl= is_exact ? $4.charset_info() : NULL; if (unlikely((cinfo->used_fields & HA_CREATE_USED_DEFAULT_CHARSET) && - cinfo->default_table_charset && $4 && - !($4= merge_charset_and_collation(cinfo->default_table_charset, - $4)))) + cinfo->default_table_charset && cl && + !(cl= merge_charset_and_collation(cinfo->default_table_charset, + cl)))) MYSQL_YYABORT; - Lex->create_info.default_table_charset= $4; + Lex->create_info.default_table_charset= cl; Lex->create_info.used_fields|= HA_CREATE_USED_DEFAULT_CHARSET; } ; @@ -5791,7 +5795,7 @@ field_type_or_serial: field_def { auto tmp= $1.charset_collation_attrs(); - if (tmp.merge_charset_clause_and_collate_clause($3)) + if (tmp.merge_column_charset_clause_and_collate_clause($3)) MYSQL_YYABORT; Lex->last_field->set_charset_collation_attrs(tmp); } @@ -5831,7 +5835,7 @@ field_def: | attribute_list compressed_deprecated_column_attribute { $$= $1; } | attribute_list compressed_deprecated_column_attribute attribute_list { - if (($$= $1).merge_collate_clause_and_collate_clause($3)) + if (($$= $1).merge_column_collate_clause_and_collate_clause($3)) MYSQL_YYABORT; } | opt_generated_always AS virtual_column_func @@ -6312,7 +6316,7 @@ opt_precision: attribute_list: attribute_list attribute { - if (($$= $1).merge_collate_clause_and_collate_clause($2)) + if (($$= $1).merge_column_collate_clause_and_collate_clause($2)) MYSQL_YYABORT; } | attribute @@ -6339,7 +6343,7 @@ attribute: } | COLLATE_SYM collation_name { - $$.set_collate_exact($2); + $$= Lex_exact_charset_extended_collation_attrs($2); } | serial_attribute { $$.init(); } ; @@ -6479,20 +6483,17 @@ old_or_new_charset_name_or_default: collation_name: ident_or_text { - if (unlikely(!($$= mysqld_collation_get_by_name($1.str, + CHARSET_INFO *cs; + if (unlikely(!(cs= mysqld_collation_get_by_name($1.str, thd->get_utf8_flag())))) MYSQL_YYABORT; + $$= Lex_extended_collation(Lex_exact_collation(cs)); } ; -opt_collate_or_default: - /* empty */ { $$=NULL; } - | COLLATE_SYM collation_name_or_default { $$=$2; } - ; - collation_name_or_default: collation_name { $$=$1; } - | DEFAULT { $$=NULL; } + | DEFAULT { $$.set_collate_default(); } ; opt_default: @@ -6535,11 +6536,18 @@ binary: } | charset_or_alias COLLATE_SYM collation_name { - if ($$.set_charset_collate_exact($1, $3)) + if ($3.merge_exact_charset(Lex_exact_charset($1))) MYSQL_YYABORT; + $$= Lex_exact_charset_extended_collation_attrs($3); + } + | COLLATE_SYM collation_name + { + $$= Lex_exact_charset_extended_collation_attrs($2); + } + | COLLATE_SYM DEFAULT + { + $$.set_collate_default(); } - | COLLATE_SYM collation_name { $$.set_collate_exact($2); } - | COLLATE_SYM DEFAULT { $$.set_collate_default(); } ; opt_bin_mod: @@ -7610,17 +7618,28 @@ alter_list_item: lex->alter_info.flags|= ALTER_RENAME_INDEX; } | CONVERT_SYM TO_SYM charset charset_name_or_default - opt_collate_or_default { if (!$4) { $4= thd->variables.collation_database; } - $5= $5 ? $5 : $4; - if (unlikely(!my_charset_same($4,$5))) + if (unlikely(Lex->create_info.add_alter_list_item_convert_to_charset($4))) + MYSQL_YYABORT; + Lex->alter_info.flags|= ALTER_CONVERT_TO; + } + | CONVERT_SYM TO_SYM charset charset_name_or_default + COLLATE_SYM collation_name_or_default + { + if (!$4) + { + $4= thd->variables.collation_database; + } + bool is_exact= $6.type() == Lex_extended_collation_st::TYPE_EXACT; + CHARSET_INFO *cl= is_exact ? $6.charset_info() : $4; + if (unlikely(!my_charset_same($4,cl))) my_yyabort_error((ER_COLLATION_CHARSET_MISMATCH, MYF(0), - $5->coll_name.str, $4->cs_name.str)); - if (unlikely(Lex->create_info.add_alter_list_item_convert_to_charset($5))) + cl->coll_name.str, $4->cs_name.str)); + if (unlikely(Lex->create_info.add_alter_list_item_convert_to_charset(cl))) MYSQL_YYABORT; Lex->alter_info.flags|= ALTER_CONVERT_TO; } @@ -9739,7 +9758,9 @@ string_factor_expr: primary_expr | string_factor_expr COLLATE_SYM collation_name { - if (unlikely(!($$= new (thd->mem_root) Item_func_set_collation(thd, $1, $3)))) + if (unlikely(!($$= new (thd->mem_root) + Item_func_set_collation(thd, $1, + $3.charset_info())))) MYSQL_YYABORT; } ; @@ -16581,26 +16602,20 @@ option_value_no_option_type: thd->parse_error(); MYSQL_YYABORT; } - | NAMES_SYM charset_name_or_default opt_collate_or_default + | NAMES_SYM charset_name_or_default { - if (sp_create_assignment_lex(thd, $1.pos())) + CHARSET_INFO *def= global_system_variables.character_set_client; + Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); + if (Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) MYSQL_YYABORT; - LEX *lex= Lex; - CHARSET_INFO *cs2; - CHARSET_INFO *cs3; - cs2= $2 ? $2 : global_system_variables.character_set_client; - cs3= $3 ? $3 : cs2; - if (unlikely(!my_charset_same(cs2, cs3))) - { - my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0), - cs3->coll_name.str, cs2->cs_name.str); - MYSQL_YYABORT; - } - set_var_collation_client *var; - var= new (thd->mem_root) set_var_collation_client(cs3, cs3, cs3); - if (unlikely(var == NULL) || - unlikely(lex->var_list.push_back(var, thd->mem_root)) || - unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY))) + } + | NAMES_SYM charset_name_or_default + COLLATE_SYM collation_name_or_default + { + CHARSET_INFO *def= global_system_variables.character_set_client; + Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); + if (tmp.merge_collation($4) || + Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) MYSQL_YYABORT; } | DEFAULT ROLE_SYM grant_role |