summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/m_ctype.h8
-rw-r--r--mysql-test/r/ctype_ucs.result7
-rw-r--r--mysql-test/r/ctype_utf8.result23
-rw-r--r--mysql-test/t/ctype_ucs.test7
-rw-r--r--mysql-test/t/ctype_utf8.test18
-rw-r--r--sql/item.cc75
-rw-r--r--sql/item.h142
-rw-r--r--sql/sql_string.cc2
-rw-r--r--sql/sql_string.h2
-rw-r--r--strings/ctype.c93
10 files changed, 255 insertions, 122 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 6f372002ebb..5994816cbfc 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -735,6 +735,14 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
+typedef struct
+{
+ size_t char_length;
+ uint repertoire;
+} MY_STRING_METADATA;
+
+void my_string_metadata_get(MY_STRING_METADATA *metadata,
+ CHARSET_INFO *cs, const char *str, size_t len);
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index a98295c10ce..3cfc076b8a0 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -5333,5 +5333,12 @@ SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
PI
pi=3.141593
#
+# MDEV-6695 Bad column name for UCS2 string literals
+#
+SET NAMES utf8, character_set_connection=ucs2;
+SELECT 'a','aa';
+a aa
+a aa
+#
# End of 10.0 tests
#
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index 8a3fcd9dc0d..767f0b04b98 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -6008,5 +6008,28 @@ CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001'))
aa
DROP TABLE t1;
#
+# MDEV-6694 Illegal mix of collation with a PS parameter
+#
+SET NAMES utf8;
+CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
+INSERT INTO t1 VALUES (1,'a');
+SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
+CONCAT(b,IF(a,'b','b'))
+ab
+PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
+SET @b='b';
+EXECUTE stmt USING @b,@b;
+CONCAT(b,IF(a,?,?))
+ab
+SET @b='';
+EXECUTE stmt USING @b,@b;
+CONCAT(b,IF(a,?,?))
+a
+SET @b='я';
+EXECUTE stmt USING @b,@b;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1;
+#
# End of 10.0 tests
#
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index 94d79494502..d269fb35dfe 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -903,5 +903,12 @@ DROP TABLE t1;
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
--echo #
+--echo # MDEV-6695 Bad column name for UCS2 string literals
+--echo #
+SET NAMES utf8, character_set_connection=ucs2;
+SELECT 'a','aa';
+
+
+--echo #
--echo # End of 10.0 tests
--echo #
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index 75630cf9cd5..eca1be2b4e7 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -1719,6 +1719,24 @@ SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 X'61')) FROM t1;
SELECT CONCAT(a, IF(b>10, _utf8 X'61', _utf8 B'01100001')) FROM t1;
DROP TABLE t1;
+--echo #
+--echo # MDEV-6694 Illegal mix of collation with a PS parameter
+--echo #
+SET NAMES utf8;
+CREATE TABLE t1 (a INT, b VARCHAR(10) CHARACTER SET latin1);
+INSERT INTO t1 VALUES (1,'a');
+SELECT CONCAT(b,IF(a,'b','b')) FROM t1;
+PREPARE stmt FROM "SELECT CONCAT(b,IF(a,?,?)) FROM t1";
+SET @b='b';
+EXECUTE stmt USING @b,@b;
+SET @b='';
+EXECUTE stmt USING @b,@b;
+SET @b='я';
+--error ER_CANT_AGGREGATE_2COLLATIONS
+EXECUTE stmt USING @b,@b;
+DEALLOCATE PREPARE stmt;
+DROP TABLE t1;
+
--echo #
--echo # End of 10.0 tests
diff --git a/sql/item.cc b/sql/item.cc
index 5e6409caf67..28c234da523 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -1073,10 +1073,14 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
name_length= 0;
return;
}
- if (cs->ctype)
- {
- const char *str_start= str;
+ const char *str_start= str;
+ if (!cs->ctype || cs->mbminlen > 1)
+ {
+ str+= cs->cset->scan(cs, str, str + length, MY_SEQ_SPACES);
+ }
+ else
+ {
/*
This will probably need a better implementation in the future:
a function in CHARSET_INFO structure.
@@ -1086,21 +1090,21 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
length--;
str++;
}
- if (str != str_start && !is_autogenerated_name)
- {
- char buff[SAFE_NAME_LEN];
- strmake(buff, str_start,
- MY_MIN(sizeof(buff)-1, length + (int) (str-str_start)));
-
- if (length == 0)
- push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY),
- buff);
- else
- push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
- ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
- buff);
- }
+ }
+ if (str != str_start && !is_autogenerated_name)
+ {
+ char buff[SAFE_NAME_LEN];
+ strmake(buff, str_start,
+ MY_MIN(sizeof(buff)-1, length + (int) (str-str_start)));
+
+ if (length == 0)
+ push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_NAME_BECOMES_EMPTY, ER(ER_NAME_BECOMES_EMPTY),
+ buff);
+ else
+ push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_REMOVED_SPACES, ER(ER_REMOVED_SPACES),
+ buff);
}
if (!my_charset_same(cs, system_charset_info))
{
@@ -1269,27 +1273,11 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
SET @@arg= 1;
EXECUTE stms USING @arg;
- result_type is STRING_RESULT at prepare time,
+ In the above example result_type is STRING_RESULT at prepare time,
and INT_RESULT at execution time.
*/
- if (const_item())
- {
- if (state == NULL_VALUE)
- return this;
- uint cnv_errors;
- String *ostr= val_str(&cnvstr);
- if (!needs_charset_converter(tocs))
- return this;
- cnvitem->copy_value(ostr->ptr(), ostr->length(),
- ostr->charset(), tocs, &cnv_errors);
- if (cnv_errors)
- return NULL;
- if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
- !cnvitem->check_well_formed_result(true))
- return NULL;
- return cnvitem;
- }
- return this;
+ return !const_item() || state == NULL_VALUE ?
+ this : const_charset_converter(tocs, true);
}
@@ -3175,8 +3163,6 @@ Item_param::Item_param(uint pos_in_query_arg) :
value is set.
*/
maybe_null= 1;
- cnvitem= new Item_string("", 0, &my_charset_bin, DERIVATION_COERCIBLE);
- cnvstr.set(cnvbuf, sizeof(cnvbuf), &my_charset_bin);
}
@@ -3736,18 +3722,14 @@ bool Item_param::convert_str_value(THD *thd)
str_value.set_charset(value.cs_info.final_character_set_of_str_value);
/* Here str_value is guaranteed to be in final_character_set_of_str_value */
- max_length= str_value.numchars() * str_value.charset()->mbmaxlen;
-
- /* For the strings converted to numeric form within some functions */
- decimals= NOT_FIXED_DEC;
/*
str_value_ptr is returned from val_str(). It must be not alloced
to prevent it's modification by val_str() invoker.
*/
str_value_ptr.set(str_value.ptr(), str_value.length(),
str_value.charset());
- /* Synchronize item charset with value charset */
- collation.set(str_value.charset(), DERIVATION_COERCIBLE);
+ /* Synchronize item charset and length with value charset */
+ fix_charset_and_length_from_str_value(DERIVATION_COERCIBLE);
}
return rc;
}
@@ -3777,7 +3759,8 @@ Item_param::clone_item()
case STRING_VALUE:
case LONG_DATA_VALUE:
return new Item_string(name, str_value.c_ptr_quick(), str_value.length(),
- str_value.charset());
+ str_value.charset(),
+ collation.derivation, collation.repertoire);
case TIME_VALUE:
break;
case NO_VALUE:
diff --git a/sql/item.h b/sql/item.h
index 9d07fedfbce..70f5cd52bcc 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -1694,7 +1694,41 @@ class Item_basic_value :public Item
value->bin_eq(other) :
collation.collation == cs && value->eq(other, collation.collation);
}
+
protected:
+ // Value metadata, e.g. to make string processing easier
+ class Metadata: private MY_STRING_METADATA
+ {
+ public:
+ Metadata(const String *str)
+ {
+ my_string_metadata_get(this, str->charset(), str->ptr(), str->length());
+ }
+ Metadata(const String *str, uint repertoire)
+ {
+ MY_STRING_METADATA::repertoire= repertoire;
+ MY_STRING_METADATA::char_length= str->numchars();
+ }
+ uint repertoire() const { return MY_STRING_METADATA::repertoire; }
+ size_t char_length() const { return MY_STRING_METADATA::char_length; }
+ };
+ void fix_charset_and_length_from_str_value(Derivation dv, Metadata metadata)
+ {
+ /*
+ We have to have a different max_length than 'length' here to
+ ensure that we get the right length if we do use the item
+ to create a new table. In this case max_length must be the maximum
+ number of chars for a string of this type because we in Create_field::
+ divide the max_length with mbmaxlen).
+ */
+ collation.set(str_value.charset(), dv, metadata.repertoire());
+ fix_char_length(metadata.char_length());
+ decimals= NOT_FIXED_DEC;
+ }
+ void fix_charset_and_length_from_str_value(Derivation dv)
+ {
+ fix_charset_and_length_from_str_value(dv, Metadata(&str_value));
+ }
Item_basic_value(): Item() {}
/*
In the xxx_eq() methods below we need to cast off "const" to
@@ -2374,10 +2408,6 @@ public:
class Item_param :public Item_basic_value,
private Settable_routine_parameter
{
- char cnvbuf[MAX_FIELD_WIDTH];
- String cnvstr;
- Item_string *cnvitem;
-
public:
enum enum_item_param_state
{
@@ -2727,40 +2757,16 @@ protected:
{
m_cs_specified= cs_specified;
}
-
-public:
- Item_string(const char *str,uint length,
- CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
- uint repertoire= MY_REPERTOIRE_UNICODE30)
- : m_cs_specified(FALSE)
+ void fix_from_value(Derivation dv, const Metadata metadata)
{
- str_value.set_or_copy_aligned(str, length, cs);
- collation.set(cs, dv, repertoire);
- /*
- We have to have a different max_length than 'length' here to
- ensure that we get the right length if we do use the item
- to create a new table. In this case max_length must be the maximum
- number of chars for a string of this type because we in Create_field::
- divide the max_length with mbmaxlen).
- */
- max_length= str_value.numchars()*cs->mbmaxlen;
- set_name(str, length, cs);
- decimals=NOT_FIXED_DEC;
+ fix_charset_and_length_from_str_value(dv, metadata);
// it is constant => can be used without fix_fields (and frequently used)
fixed= 1;
}
- Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
- Derivation dv, uint repertoire)
- :m_cs_specified(false)
+ void fix_and_set_name_from_value(Derivation dv, const Metadata metadata)
{
- if (str_value.copy(str, tocs, conv_errors))
- str_value.set("", 0, tocs); // EOM ?
- str_value.mark_as_const();
- collation.set(tocs, dv, repertoire);
- fix_char_length(str_value.numchars());
- set_name(str_value.ptr(), str_value.length(), tocs);
- decimals= NOT_FIXED_DEC;
- fixed= 1;
+ fix_from_value(dv, metadata);
+ set_name(str_value.ptr(), str_value.length(), str_value.charset());
}
protected:
/* Just create an item and do not fill string representation */
@@ -2769,51 +2775,55 @@ protected:
{
collation.set(cs, dv);
max_length= 0;
- set_name(NULL, 0, cs);
+ set_name(NULL, 0, system_charset_info);
decimals= NOT_FIXED_DEC;
fixed= 1;
}
public:
- Item_string(const char *name_par, const char *str, uint length,
- CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
- uint repertoire= MY_REPERTOIRE_UNICODE30)
+ // Constructors with the item name set from its value
+ Item_string(const char *str, uint length, CHARSET_INFO *cs,
+ Derivation dv, uint repertoire)
: m_cs_specified(FALSE)
{
str_value.set_or_copy_aligned(str, length, cs);
- collation.set(cs, dv, repertoire);
- max_length= str_value.numchars()*cs->mbmaxlen;
- set_name(name_par, 0, cs);
- decimals=NOT_FIXED_DEC;
- // it is constant => can be used without fix_fields (and frequently used)
- fixed= 1;
+ fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
+ }
+ Item_string(const char *str, uint length,
+ CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+ : m_cs_specified(FALSE)
+ {
+ str_value.set_or_copy_aligned(str, length, cs);
+ fix_and_set_name_from_value(dv, Metadata(&str_value));
}
- void copy_value(const char *str, uint32 length, CHARSET_INFO *fromcs,
- CHARSET_INFO *tocs, uint *cnv_errors)
+ Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
+ Derivation dv, uint repertoire)
+ :m_cs_specified(false)
{
- str_value.copy(str, length, fromcs, tocs, cnv_errors);
+ if (str_value.copy(str, tocs, conv_errors))
+ str_value.set("", 0, tocs); // EOM ?
str_value.mark_as_const();
- collation.set(tocs);
- fix_char_length(str_value.numchars());
+ fix_and_set_name_from_value(dv, Metadata(&str_value, repertoire));
}
-
- void print_value(String *to) const
+ // Constructors with an externally provided item name
+ Item_string(const char *name_par, const char *str, uint length,
+ CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+ :m_cs_specified(false)
{
- str_value.print(to);
+ str_value.set_or_copy_aligned(str, length, cs);
+ fix_from_value(dv, Metadata(&str_value));
+ set_name(name_par, 0, system_charset_info);
}
- /*
- This is used in stored procedures to avoid memory leaks and
- does a deep copy of its argument.
- */
- void set_str_with_copy(const char *str_arg, uint length_arg)
+ Item_string(const char *name_par, const char *str, uint length,
+ CHARSET_INFO *cs, Derivation dv, uint repertoire)
+ :m_cs_specified(false)
{
- str_value.copy(str_arg, length_arg, collation.collation);
- max_length= str_value.numchars() * collation.collation->mbmaxlen;
+ str_value.set_or_copy_aligned(str, length, cs);
+ fix_from_value(dv, Metadata(&str_value, repertoire));
+ set_name(name_par, 0, system_charset_info);
}
- void set_repertoire_from_value()
+ void print_value(String *to) const
{
- collation.repertoire= my_string_repertoire(str_value.charset(),
- str_value.ptr(),
- str_value.length());
+ str_value.print(to);
}
enum Type type() const { return STRING_ITEM; }
double val_real();
@@ -2835,7 +2845,7 @@ public:
Item *clone_item()
{
return new Item_string(name, str_value.ptr(),
- str_value.length(), collation.collation);
+ str_value.length(), collation.collation);
}
Item *safe_charset_converter(CHARSET_INFO *tocs)
{
@@ -2914,14 +2924,12 @@ public:
Item_string_with_introducer(const char *str, uint length, CHARSET_INFO *cs)
:Item_string(str, length, cs)
{
- set_repertoire_from_value();
- set_cs_specified(true);
+ set_cs_specified(true);
}
Item_string_with_introducer(const String *str, CHARSET_INFO *tocs)
:Item_string(str->ptr(), str->length(), tocs)
{
- set_repertoire_from_value();
- set_cs_specified(true);
+ set_cs_specified(true);
}
};
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index a573de13bdf..55bf0f9b1c7 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -580,7 +580,7 @@ bool String::append_with_prefill(const char *s,uint32 arg_length,
return FALSE;
}
-uint32 String::numchars()
+uint32 String::numchars() const
{
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
}
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 4360e360590..8c7e69edf4b 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -411,7 +411,7 @@ public:
friend int stringcmp(const String *a,const String *b);
friend String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
friend class Field;
- uint32 numchars();
+ uint32 numchars() const;
int charpos(longlong i,uint32 offset=0);
int reserve(uint32 space_needed)
diff --git a/strings/ctype.c b/strings/ctype.c
index 566dc0331c4..ba130078153 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -818,23 +818,102 @@ my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
}
+uint
+my_string_repertoire_8bit(CHARSET_INFO *cs, const char *str, ulong length)
+{
+ const char *strend;
+ if ((cs->state & MY_CS_NONASCII) && length > 0)
+ return MY_REPERTOIRE_UNICODE30;
+ for (strend= str + length; str < strend; str++)
+ {
+ if (((uchar) *str) > 0x7F)
+ return MY_REPERTOIRE_UNICODE30;
+ }
+ return MY_REPERTOIRE_ASCII;
+}
+
+
+static void
+my_string_metadata_init(MY_STRING_METADATA *metadata)
+{
+ metadata->repertoire= MY_REPERTOIRE_ASCII;
+ metadata->char_length= 0;
+}
+
+
+/**
+ This should probably eventually go as a virtual function into
+ MY_CHARSET_HANDLER or MY_COLLATION_HANDLER.
+*/
+static void
+my_string_metadata_get_mb(MY_STRING_METADATA *metadata,
+ CHARSET_INFO *cs, const char *str, ulong length)
+{
+ const char *strend= str + length;
+ for (my_string_metadata_init(metadata) ;
+ str < strend;
+ metadata->char_length++)
+ {
+ my_wc_t wc;
+ int mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) str,
+ (const uchar *) strend);
+ if (mblen > 0) /* Assigned character */
+ {
+ if (wc > 0x7F)
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
+ str+= mblen;
+ }
+ else if (mblen == MY_CS_ILSEQ) /* Bad byte sequence */
+ {
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
+ str++;
+ }
+ else if (mblen > MY_CS_TOOSMALL) /* Unassigned character */
+ {
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED;
+ str+= (-mblen);
+ }
+ else /* Incomplete character, premature end-of-line */
+ {
+ metadata->repertoire|= MY_REPERTOIRE_EXTENDED; /* Just in case */
+ break;
+ }
+ }
+}
+
+
+/**
+ Collect string metadata: length in characters and repertoire.
+*/
+void
+my_string_metadata_get(MY_STRING_METADATA *metadata,
+ CHARSET_INFO *cs, const char *str, ulong length)
+{
+ if (cs->mbmaxlen == 1 && !(cs->state & MY_CS_NONASCII))
+ {
+ metadata->char_length= length;
+ metadata->repertoire= my_string_repertoire_8bit(cs, str, length);
+ }
+ else
+ {
+ my_string_metadata_get_mb(metadata, cs, str, length);
+ }
+}
+
+
/*
Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{
- const char *strend= str + length;
- if (cs->mbminlen == 1)
+ if (cs->mbminlen == 1 && !(cs->state & MY_CS_NONASCII))
{
- for ( ; str < strend; str++)
- {
- if (((uchar) *str) > 0x7F)
- return MY_REPERTOIRE_UNICODE30;
- }
+ return my_string_repertoire_8bit(cs, str, length);
}
else
{
+ const char *strend= str + length;
my_wc_t wc;
int chlen;
for (;