diff options
-rw-r--r-- | include/m_ctype.h | 8 | ||||
-rw-r--r-- | mysql-test/r/ctype_ucs.result | 7 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8.result | 23 | ||||
-rw-r--r-- | mysql-test/t/ctype_ucs.test | 7 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf8.test | 18 | ||||
-rw-r--r-- | sql/item.cc | 75 | ||||
-rw-r--r-- | sql/item.h | 142 | ||||
-rw-r--r-- | sql/sql_string.cc | 2 | ||||
-rw-r--r-- | sql/sql_string.h | 2 | ||||
-rw-r--r-- | strings/ctype.c | 93 |
10 files changed, 255 insertions, 122 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 6f372002ebb..5994816cbfc 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len); my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len); +typedef struct +{ + size_t char_length; + uint repertoire; +} MY_STRING_METADATA; + +void my_string_metadata_get(MY_STRING_METADATA *metadata, + CHARSET_INFO *cs, const char *str, size_t len); uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len); my_bool my_charset_is_ascii_based(CHARSET_INFO *cs); my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs); diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index a98295c10ce..3cfc076b8a0 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI; PI pi=3.141593 # +# MDEV-6695 Bad column name for UCS2 string literals +# +SET NAMES utf8, character_set_connection=ucs2; +SELECT 'a','aa'; +a aa +a aa +# # End of 10.0 tests # diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 8a3fcd9dc0d..767f0b04b98 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) aa DROP TABLE t1; # +# MDEV-6694 Illegal mix of collation with a PS parameter +# +SET NAMES utf8; +CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1); +INSERT INTO t1 VALUES (1,'a'); +SELECT CONCAT(b,IF(a,'b','b')) FROM t1; +CONCAT(b,IF(a,'b','b')) +ab +PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1"; +SET @b='b'; +EXECUTE stmt USING @b,@b; +CONCAT(b,IF(a,?,?)) +ab +SET @b=''; +EXECUTE stmt USING @b,@b; +CONCAT(b,IF(a,?,?)) +a +SET @b='я'; +EXECUTE stmt USING @b,@b; +ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat' +DEALLOCATE PREPARE stmt; +DROP TABLE t1; +# # End of 10.0 tests # diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index 94d79494502..d269fb35dfe 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -903,5 +903,12 @@ DROP TABLE t1; SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI; --echo # +--echo # MDEV-6695 Bad column name for UCS2 string literals +--echo # +SET NAMES utf8, character_set_connection=ucs2; +SELECT 'a','aa'; + + +--echo # --echo # End of 10.0 tests --echo # diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 75630cf9cd5..eca1be2b4e7 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1; SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1; DROP TABLE t1; +--echo # +--echo # MDEV-6694 Illegal mix of collation with a PS parameter +--echo # +SET NAMES utf8; +CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1); +INSERT INTO t1 VALUES (1,'a'); +SELECT CONCAT(b,IF(a,'b','b')) FROM t1; +PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1"; +SET @b='b'; +EXECUTE stmt USING @b,@b; +SET @b=''; +EXECUTE stmt USING @b,@b; +SET @b='я'; +--error ER_CANT_AGGREGATE_2COLLATIONS +EXECUTE stmt USING @b,@b; +DEALLOCATE PREPARE stmt; +DROP TABLE t1; + --echo # --echo # End of 10.0 tests diff --git a/sql/item.cc b/sql/item.cc index 5e6409caf67..28c234da523 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs) name_length= 0; return; } - if (cs->ctype) - { - const char *str_start= str; + const char *str_start= str; + if (!cs->ctype || cs->mbminlen > 1) + { + str+= cs->cset->scan(cs, str, str + length, MY_SEQ_SPACES); + } + else + { /* This will probably need a better implementation in the future: a function in CHARSET_INFO structure. @@ -1086,21 +1090,21 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs) length--; str++; } - if (str != str_start && !is_autogenerated_name) - { - char buff[SAFE_NAME_LEN]; - strmake(buff, str_start, - MY_MIN(sizeof(buff)-1, length + (int) (str-str_start))); - - if (length == 0) - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY), - buff); - else - push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, - ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES), - buff); - } + } + if (str != str_start && !is_autogenerated_name) + { + char buff[SAFE_NAME_LEN]; + strmake(buff, str_start, + MY_MIN(sizeof(buff)-1, length + (int) (str-str_start))); + + if (length == 0) + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY), + buff); + else + push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN, + ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES), + buff); } if (!my_charset_same(cs, system_charset_info)) { @@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs) SET @@arg= 1; EXECUTE stms USING @arg; - result_type is STRING_RESULT at prepare time, + In the above example result_type is STRING_RESULT at prepare time, and INT_RESULT at execution time. */ - if (const_item()) - { - if (state == NULL_VALUE) - return this; - uint cnv_errors; - String *ostr= val_str(&cnvstr); - if (!needs_charset_converter(tocs)) - return this; - cnvitem->copy_value(ostr->ptr(), ostr->length(), - ostr->charset(), tocs, &cnv_errors); - if (cnv_errors) - return NULL; - if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin && - !cnvitem->check_well_formed_result(true)) - return NULL; - return cnvitem; - } - return this; + return !const_item() || state == NULL_VALUE ? + this : const_charset_converter(tocs, true); } @@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) : value is set. */ maybe_null= 1; - cnvitem= new Item_string("", 0, &my_charset_bin, DERIVATION_COERCIBLE); - cnvstr.set(cnvbuf, sizeof(cnvbuf), &my_charset_bin); } @@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd) str_value.set_charset(value.cs_info.final_character_set_of_str_value); /* Here str_value is guaranteed to be in final_character_set_of_str_value */ - max_length= str_value.numchars() * str_value.charset()->mbmaxlen; - - /* For the strings converted to numeric form within some functions */ - decimals= NOT_FIXED_DEC; /* str_value_ptr is returned from val_str(). It must be not alloced to prevent it's modification by val_str() invoker. */ str_value_ptr.set(str_value.ptr(), str_value.length(), str_value.charset()); - /* Synchronize item charset with value charset */ - collation.set(str_value.charset(), DERIVATION_COERCIBLE); + /* Synchronize item charset and length with value charset */ + fix_charset_and_length_from_str_value(DERIVATION_COERCIBLE); } return rc; } @@ -3777,7 +3759,8 @@ Item_param::clone_item() case STRING_VALUE: case LONG_DATA_VALUE: return new Item_string(name, str_value.c_ptr_quick(), str_value.length(), - str_value.charset()); + str_value.charset(), + collation.derivation, collation.repertoire); case TIME_VALUE: break; case NO_VALUE: diff --git a/sql/item.h b/sql/item.h index 9d07fedfbce..70f5cd52bcc 100644 --- a/sql/item.h +++ b/sql/item.h @@ -1694,7 +1694,41 @@ class Item_basic_value :public Item value->bin_eq(other) : collation.collation == cs && value->eq(other, collation.collation); } + protected: + // Value metadata, e.g. to make string processing easier + class Metadata: private MY_STRING_METADATA + { + public: + Metadata(const String *str) + { + my_string_metadata_get(this, str->charset(), str->ptr(), str->length()); + } + Metadata(const String *str, uint repertoire) + { + MY_STRING_METADATA::repertoire= repertoire; + MY_STRING_METADATA::char_length= str->numchars(); + } + uint repertoire() const { return MY_STRING_METADATA::repertoire; } + size_t char_length() const { return MY_STRING_METADATA::char_length; } + }; + void fix_charset_and_length_from_str_value(Derivation dv, Metadata metadata) + { + /* + We have to have a different max_length than 'length' here to + ensure that we get the right length if we do use the item + to create a new table. In this case max_length must be the maximum + number of chars for a string of this type because we in Create_field:: + divide the max_length with mbmaxlen). + */ + collation.set(str_value.charset(), dv, metadata.repertoire()); + fix_char_length(metadata.char_length()); + decimals= NOT_FIXED_DEC; + } + void fix_charset_and_length_from_str_value(Derivation dv) + { + fix_charset_and_length_from_str_value(dv, Metadata(&str_value)); + } Item_basic_value(): Item() {} /* In the xxx_eq() methods below we need to cast off "const" to @@ -2374,10 +2408,6 @@ public: class Item_param :public Item_basic_value, private Settable_routine_parameter { - char cnvbuf[MAX_FIELD_WIDTH]; - String cnvstr; - Item_string *cnvitem; - public: enum enum_item_param_state { @@ -2727,40 +2757,16 @@ protected: { m_cs_specified= cs_specified; } - -public: - Item_string(const char *str,uint length, - CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE, - uint repertoire= MY_REPERTOIRE_UNICODE30) - : m_cs_specified(FALSE) + void fix_from_value(Derivation dv, const Metadata metadata) { - str_value.set_or_copy_aligned(str, length, cs); - collation.set(cs, dv, repertoire); - /* - We have to have a different max_length than 'length' here to - ensure that we get the right length if we do use the item - to create a new table. In this case max_length must be the maximum - number of chars for a string of this type because we in Create_field:: - divide the max_length with mbmaxlen). - */ - max_length= str_value.numchars()*cs->mbmaxlen; - set_name(str, length, cs); - decimals=NOT_FIXED_DEC; + fix_charset_and_length_from_str_value(dv, metadata); // it is constant => can be used without fix_fields (and frequently used) fixed= 1; } - Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors, - Derivation dv, uint repertoire) - :m_cs_specified(false) + void fix_and_set_name_from_value(Derivation dv, const Metadata metadata) { - if (str_value.copy(str, tocs, conv_errors)) - str_value.set("", 0, tocs); // EOM ? - str_value.mark_as_const(); - collation.set(tocs, dv, repertoire); - fix_char_length(str_value.numchars()); - set_name(str_value.ptr(), str_value.length(), tocs); - decimals= NOT_FIXED_DEC; - fixed= 1; + fix_from_value(dv, metadata); + set_name(str_value.ptr(), str_value.length(), str_value.charset()); } protected: /* Just create an item and do not fill string representation */ @@ -2769,51 +2775,55 @@ protected: { collation.set(cs, dv); max_length= 0; - set_name(NULL, 0, cs); + set_name(NULL, 0, system_charset_info); decimals= NOT_FIXED_DEC; fixed= 1; } public: - Item_string(const char *name_par, const char *str, uint length, - CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE, - uint repertoire= MY_REPERTOIRE_UNICODE30) + // Constructors with the item name set from its value + Item_string(const char *str, uint length, CHARSET_INFO *cs, + Derivation dv, uint repertoire) : m_cs_specified(FALSE) { str_value.set_or_copy_aligned(str, length, cs); - collation.set(cs, dv, repertoire); - max_length= str_value.numchars()*cs->mbmaxlen; - set_name(name_par, 0, cs); - decimals=NOT_FIXED_DEC; - // it is constant => can be used without fix_fields (and frequently used) - fixed= 1; + fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire)); + } + Item_string(const char *str, uint length, + CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) + : m_cs_specified(FALSE) + { + str_value.set_or_copy_aligned(str, length, cs); + fix_and_set_name_from_value(dv, Metadata(&str_value)); } - void copy_value(const char *str, uint32 length, CHARSET_INFO *fromcs, - CHARSET_INFO *tocs, uint *cnv_errors) + Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors, + Derivation dv, uint repertoire) + :m_cs_specified(false) { - str_value.copy(str, length, fromcs, tocs, cnv_errors); + if (str_value.copy(str, tocs, conv_errors)) + str_value.set("", 0, tocs); // EOM ? str_value.mark_as_const(); - collation.set(tocs); - fix_char_length(str_value.numchars()); + fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire)); } - - void print_value(String *to) const + // Constructors with an externally provided item name + Item_string(const char *name_par, const char *str, uint length, + CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) + :m_cs_specified(false) { - str_value.print(to); + str_value.set_or_copy_aligned(str, length, cs); + fix_from_value(dv, Metadata(&str_value)); + set_name(name_par, 0, system_charset_info); } - /* - This is used in stored procedures to avoid memory leaks and - does a deep copy of its argument. - */ - void set_str_with_copy(const char *str_arg, uint length_arg) + Item_string(const char *name_par, const char *str, uint length, + CHARSET_INFO *cs, Derivation dv, uint repertoire) + :m_cs_specified(false) { - str_value.copy(str_arg, length_arg, collation.collation); - max_length= str_value.numchars() * collation.collation->mbmaxlen; + str_value.set_or_copy_aligned(str, length, cs); + fix_from_value(dv, Metadata(&str_value, repertoire)); + set_name(name_par, 0, system_charset_info); } - void set_repertoire_from_value() + void print_value(String *to) const { - collation.repertoire= my_string_repertoire(str_value.charset(), - str_value.ptr(), - str_value.length()); + str_value.print(to); } enum Type type() const { return STRING_ITEM; } double val_real(); @@ -2835,7 +2845,7 @@ public: Item *clone_item() { return new Item_string(name, str_value.ptr(), - str_value.length(), collation.collation); + str_value.length(), collation.collation); } Item *safe_charset_converter(CHARSET_INFO *tocs) { @@ -2914,14 +2924,12 @@ public: Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs) :Item_string(str, length, cs) { - set_repertoire_from_value(); - set_cs_specified(true); + set_cs_specified(true); } Item_string_with_introducer(const String *str, CHARSET_INFO *tocs) :Item_string(str->ptr(), str->length(), tocs) { - set_repertoire_from_value(); - set_cs_specified(true); + set_cs_specified(true); } }; diff --git a/sql/sql_string.cc b/sql/sql_string.cc index a573de13bdf..55bf0f9b1c7 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -580,7 +580,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length, return FALSE; } -uint32 String::numchars() +uint32 String::numchars() const { return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length); } diff --git a/sql/sql_string.h b/sql/sql_string.h index 4360e360590..8c7e69edf4b 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -411,7 +411,7 @@ public: friend int stringcmp(const String *a,const String *b); friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length); friend class Field; - uint32 numchars(); + uint32 numchars() const; int charpos(longlong i,uint32 offset=0); int reserve(uint32 space_needed) diff --git a/strings/ctype.c b/strings/ctype.c index 566dc0331c4..ba130078153 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len) } +uint +my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length) +{ + const char *strend; + if ((cs->state & MY_CS_NONASCII) && length > 0) + return MY_REPERTOIRE_UNICODE30; + for (strend= str + length; str < strend; str++) + { + if (((uchar) *str) > 0x7F) + return MY_REPERTOIRE_UNICODE30; + } + return MY_REPERTOIRE_ASCII; +} + + +static void +my_string_metadata_init(MY_STRING_METADATA *metadata) +{ + metadata->repertoire= MY_REPERTOIRE_ASCII; + metadata->char_length= 0; +} + + +/** + This should probably eventually go as a virtual function into + MY_CHARSET_HANDLER or MY_COLLATION_HANDLER. +*/ +static void +my_string_metadata_get_mb(MY_STRING_METADATA *metadata, + CHARSET_INFO *cs, const char *str, ulong length) +{ + const char *strend= str + length; + for (my_string_metadata_init(metadata) ; + str < strend; + metadata->char_length++) + { + my_wc_t wc; + int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str, + (const uchar *) strend); + if (mblen > 0) /* Assigned character */ + { + if (wc > 0x7F) + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; + str+= mblen; + } + else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */ + { + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; + str++; + } + else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */ + { + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; + str+= (-mblen); + } + else /* Incomplete character, premature end-of-line */ + { + metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */ + break; + } + } +} + + +/** + Collect string metadata: length in characters and repertoire. +*/ +void +my_string_metadata_get(MY_STRING_METADATA *metadata, + CHARSET_INFO *cs, const char *str, ulong length) +{ + if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII)) + { + metadata->char_length= length; + metadata->repertoire= my_string_repertoire_8bit(cs, str, length); + } + else + { + my_string_metadata_get_mb(metadata, cs, str, length); + } +} + + /* Check repertoire: detect pure ascii strings */ uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length) { - const char *strend= str + length; - if (cs->mbminlen == 1) + if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII)) { - for ( ; str < strend; str++) - { - if (((uchar) *str) > 0x7F) - return MY_REPERTOIRE_UNICODE30; - } + return my_string_repertoire_8bit(cs, str, length); } else { + const char *strend= str + length; my_wc_t wc; int chlen; for (; |