summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2004-11-02 16:02:12 +0400
committerunknown <bar@mysql.com>2004-11-02 16:02:12 +0400
commitfc04692c8b5fb192b9be52d4672423aa41ebc6b1 (patch)
treebfda701b872e8feb1aace99aa6e20993e2150d6b /sql
parent0d92f0c777e170b4de4eff45e3f1ed08f3ccc836 (diff)
downloadmariadb-git-fc04692c8b5fb192b9be52d4672423aa41ebc6b1.tar.gz
Many files:
Allow mixing of different character sets for more SQL functions. item_func.h: Allow mixing of different character sets for more SQL functions.. sql/item_cmpfunc.cc: Allow mixing of different character sets for more SQL functions. sql/item_func.cc: Allow mixing of different character sets for more SQL functions. sql/item_func.h: Allow mixing of different character sets for more SQL functions.. sql/item_strfunc.cc: Allow mixing of different character sets for more SQL functions. sql/item.cc: Allow mixing of different character sets for more SQL functions. sql/item.h: Allow mixing of different character sets for more SQL functions. mysql-test/t/ctype_recoding.test: Allow mixing of different character sets for more SQL functions. mysql-test/r/ctype_recoding.result: Allow mixing of different character sets for more SQL functions.
Diffstat (limited to 'sql')
-rw-r--r--sql/item.cc41
-rw-r--r--sql/item.h18
-rw-r--r--sql/item_cmpfunc.cc96
-rw-r--r--sql/item_func.cc105
-rw-r--r--sql/item_func.h3
-rw-r--r--sql/item_strfunc.cc58
6 files changed, 194 insertions, 127 deletions
diff --git a/sql/item.cc b/sql/item.cc
index 46215fd78ed..7dc7e9e542c 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -205,6 +205,41 @@ bool Item::eq(const Item *item, bool binary_cmp) const
}
+Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
+{
+ /*
+ Don't allow automatic conversion to non-Unicode charsets,
+ as it potentially loses data.
+ */
+ if (!(tocs->state & MY_CS_UNICODE))
+ return NULL; // safe conversion is not possible
+ return new Item_func_conv_charset(this, tocs);
+}
+
+
+Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs)
+{
+ Item_string *conv;
+ uint conv_errors;
+ String tmp, cstr, *ostr= val_str(&tmp);
+ cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
+ if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
+ cstr.charset(),
+ collation.derivation)))
+ {
+ /*
+ Safe conversion is not possible (or EOM).
+ We could not convert a string into the requested character set
+ without data loss. The target charset does not cover all the
+ characters from the string. Operation cannot be done correctly.
+ */
+ return NULL;
+ }
+ conv->str_value.copy();
+ return conv;
+}
+
+
bool Item_string::eq(const Item *item, bool binary_cmp) const
{
if (type() == item->type())
@@ -723,6 +758,12 @@ String *Item_null::val_str(String *str)
}
+Item *Item_null::safe_charset_converter(CHARSET_INFO *tocs)
+{
+ collation.set(tocs);
+ return this;
+}
+
/*********************** Item_param related ******************************/
/*
diff --git a/sql/item.h b/sql/item.h
index 2c0c3306c44..fea3aa010a8 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -39,13 +39,22 @@ enum Derivation
/*
Flags for collation aggregation modes:
- allow conversion to a superset
- allow conversion of a coercible value (i.e. constant).
+ MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
+ MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
+ (i.e. constant).
+ MY_COLL_ALLOW_CONV - allow any kind of conversion
+ (combintion of the above two)
+ MY_COLL_DISALLOW_NONE - don't allow return DERIVATION_NONE
+ (e.g. when aggregating for comparison)
+ MY_COLL_CMP_CONV - combination of MY_COLL_ALLOW_CONV
+ and MY_COLL_DISALLOW_NONE
*/
#define MY_COLL_ALLOW_SUPERSET_CONV 1
#define MY_COLL_ALLOW_COERCIBLE_CONV 2
-
+#define MY_COLL_ALLOW_CONV 3
+#define MY_COLL_DISALLOW_NONE 4
+#define MY_COLL_CMP_CONV 7
class DTCollation {
public:
@@ -302,6 +311,7 @@ public:
Field *tmp_table_field_from_field_type(TABLE *table);
virtual Item *neg_transformer(THD *thd) { return NULL; }
+ virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
void delete_self()
{
cleanup();
@@ -447,6 +457,7 @@ public:
Item *new_item() { return new Item_null(name); }
bool is_null() { return 1; }
void print(String *str) { str->append("NULL", 4); }
+ Item *safe_charset_converter(CHARSET_INFO *tocs);
};
@@ -717,6 +728,7 @@ public:
return new Item_string(name, str_value.ptr(),
str_value.length(), &my_charset_bin);
}
+ Item *safe_charset_converter(CHARSET_INFO *tocs);
String *const_string() { return &str_value; }
inline void append(char *str, uint length) { str_value.append(str, length); }
void print(String *str);
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index d9db07e2289..701894cacb5 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -173,89 +173,11 @@ void Item_bool_func2::fix_length_and_dec()
if (!args[0] || !args[1])
return;
- /*
- We allow to apply automatic character set conversion in some cases.
- The conditions when conversion is possible are:
- - arguments A and B have different charsets
- - A wins according to coercibility rules
- (i.e. a column is stronger than a string constant,
- an explicit COLLATE clause is stronger than a column)
- - character set of A is either superset for character set of B,
- or B is a string constant which can be converted into the
- character set of A without data loss.
-
- If all of the above is true, then it's possible to convert
- B into the character set of A, and then compare according
- to the collation of A.
- */
-
- uint32 dummy_offset;
DTCollation coll;
-
if (args[0]->result_type() == STRING_RESULT &&
args[1]->result_type() == STRING_RESULT &&
- String::needs_conversion(0, args[0]->collation.collation,
- args[1]->collation.collation,
- &dummy_offset) &&
- !coll.set(args[0]->collation, args[1]->collation,
- MY_COLL_ALLOW_SUPERSET_CONV |
- MY_COLL_ALLOW_COERCIBLE_CONV))
- {
- Item* conv= 0;
- Item_arena *arena= thd->current_arena, backup;
- uint strong= coll.strong;
- uint weak= strong ? 0 : 1;
- /*
- In case we're in statement prepare, create conversion item
- in its memory: it will be reused on each execute.
- */
- if (arena->is_stmt_prepare())
- thd->set_n_backup_item_arena(arena, &backup);
- if (args[weak]->type() == STRING_ITEM)
- {
- uint conv_errors;
- String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
- cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(),
- args[strong]->collation.collation, &conv_errors);
- if (conv_errors)
- {
- /*
- We could not convert a string into the character set
- of the stronger side of the operation without data loss.
- It can happen if we tried to combine a column with a string
- constant, and the column charset does not cover all the
- characters from the string. Operation cannot be done
- correctly. Return an error.
- */
- my_coll_agg_error(args[0]->collation, args[1]->collation,
- func_name());
- return;
- }
- conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
- args[weak]->collation.derivation);
- ((Item_string*)conv)->str_value.copy();
- }
- else
- {
- if (!(coll.collation->state & MY_CS_UNICODE))
- {
- /*
- Don't allow automatic conversion to non-Unicode charsets,
- as it potentially loses data.
- */
- my_coll_agg_error(args[0]->collation, args[1]->collation,
- func_name());
- return;
- }
- conv= new Item_func_conv_charset(args[weak],
- args[strong]->collation.collation);
- conv->collation.set(args[weak]->collation.derivation);
- conv->fix_fields(thd, 0, &conv);
- }
- if (arena->is_stmt_prepare())
- thd->restore_backup_item_arena(arena, &backup);
- args[weak]= conv ? conv : args[weak];
- }
+ agg_arg_charsets(coll, args, 2, MY_COLL_CMP_CONV))
+ return;
// Make a special case of compare with fields to get nicer DATE comparisons
@@ -871,7 +793,7 @@ void Item_func_between::fix_length_and_dec()
return;
agg_cmp_type(&cmp_type, args, 3);
if (cmp_type == STRING_RESULT &&
- agg_arg_collations_for_comparison(cmp_collation, args, 3))
+ agg_arg_charsets(cmp_collation, args, 3, MY_COLL_CMP_CONV))
return;
/*
@@ -987,7 +909,7 @@ Item_func_ifnull::fix_length_and_dec()
decimals=max(args[0]->decimals,args[1]->decimals);
agg_result_type(&cached_result_type, args, 2);
if (cached_result_type == STRING_RESULT)
- agg_arg_collations(collation, args, arg_count);
+ agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV);
else if (cached_result_type != REAL_RESULT)
decimals= 0;
@@ -1083,7 +1005,7 @@ Item_func_if::fix_length_and_dec()
agg_result_type(&cached_result_type, args+1, 2);
if (cached_result_type == STRING_RESULT)
{
- if (agg_arg_collations(collation, args+1, 2))
+ if (agg_arg_charsets(collation, args+1, 2, MY_COLL_ALLOW_CONV))
return;
}
else
@@ -1354,7 +1276,7 @@ void Item_func_case::fix_length_and_dec()
agg_result_type(&cached_result_type, agg, nagg);
if ((cached_result_type == STRING_RESULT) &&
- agg_arg_collations(collation, agg, nagg))
+ agg_arg_charsets(collation, agg, nagg, MY_COLL_ALLOW_CONV))
return;
@@ -1370,7 +1292,7 @@ void Item_func_case::fix_length_and_dec()
nagg++;
agg_cmp_type(&cmp_type, agg, nagg);
if ((cmp_type == STRING_RESULT) &&
- agg_arg_collations_for_comparison(cmp_collation, agg, nagg))
+ agg_arg_charsets(cmp_collation, agg, nagg, MY_COLL_CMP_CONV))
return;
}
@@ -1477,7 +1399,7 @@ void Item_func_coalesce::fix_length_and_dec()
set_if_bigger(decimals,args[i]->decimals);
}
if (cached_result_type == STRING_RESULT)
- agg_arg_collations(collation, args, arg_count);
+ agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV);
else if (cached_result_type != REAL_RESULT)
decimals= 0;
}
@@ -2423,7 +2345,7 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
max_length= 1;
decimals= 0;
- if (agg_arg_collations(cmp_collation, args, 2))
+ if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV))
return 1;
used_tables_cache=args[0]->used_tables() | args[1]->used_tables();
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 50843d3bf76..09d7e50eaa3 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -90,6 +90,12 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
return TRUE;
}
}
+ if ((flags & MY_COLL_DISALLOW_NONE) &&
+ c.derivation == DERIVATION_NONE)
+ {
+ my_coll_agg_error(av, count, func_name());
+ return TRUE;
+ }
return FALSE;
}
@@ -98,15 +104,7 @@ bool Item_func::agg_arg_collations_for_comparison(DTCollation &c,
Item **av, uint count,
uint flags)
{
- if (agg_arg_collations(c, av, count, flags))
- return TRUE;
-
- if (c.derivation == DERIVATION_NONE)
- {
- my_coll_agg_error(av, count, func_name());
- return TRUE;
- }
- return FALSE;
+ return (agg_arg_collations(c, av, count, flags | MY_COLL_DISALLOW_NONE));
}
@@ -119,6 +117,89 @@ eval_const_cond(COND *cond)
}
+
+/*
+ Collect arguments' character sets together.
+ We allow to apply automatic character set conversion in some cases.
+ The conditions when conversion is possible are:
+ - arguments A and B have different charsets
+ - A wins according to coercibility rules
+ (i.e. a column is stronger than a string constant,
+ an explicit COLLATE clause is stronger than a column)
+ - character set of A is either superset for character set of B,
+ or B is a string constant which can be converted into the
+ character set of A without data loss.
+
+ If all of the above is true, then it's possible to convert
+ B into the character set of A, and then compare according
+ to the collation of A.
+
+ For functions with more than two arguments:
+
+ collect(A,B,C) ::= collect(collect(A,B),C)
+*/
+
+bool Item_func::agg_arg_charsets(DTCollation &coll,
+ Item **args, uint nargs, uint flags)
+{
+ Item **arg, **last, *safe_args[2];
+ if (agg_arg_collations(coll, args, nargs, flags))
+ return TRUE;
+
+ /*
+ For better error reporting: save the first and the second argument.
+ We need this only if the the number of args is 3 or 2:
+ - for a longer argument list, "Illegal mix of collations"
+ doesn't display each argument's characteristics.
+ - if nargs is 1, then this error cannot happen.
+ */
+ if (nargs >=2 && nargs <= 3)
+ {
+ safe_args[0]= args[0];
+ safe_args[1]= args[1];
+ }
+
+ THD *thd= current_thd;
+ Item_arena *arena= thd->current_arena, backup;
+ bool res= FALSE;
+ /*
+ In case we're in statement prepare, create conversion item
+ in its memory: it will be reused on each execute.
+ */
+ if (arena->is_stmt_prepare())
+ thd->set_n_backup_item_arena(arena, &backup);
+
+ for (arg= args, last= args + nargs; arg < last; arg++)
+ {
+ Item* conv;
+ uint dummy_offset;
+ if (!String::needs_conversion(0, coll.collation,
+ (*arg)->collation.collation,
+ &dummy_offset))
+ continue;
+
+ if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
+ {
+ if (nargs >=2 && nargs <= 3)
+ {
+ /* restore the original arguments for better error message */
+ args[0]= safe_args[0];
+ args[1]= safe_args[1];
+ }
+ my_coll_agg_error(args, nargs, func_name());
+ res= TRUE;
+ break; // we cannot return here, we need to restore "arena".
+ }
+ conv->fix_fields(thd, 0, &conv);
+ *arg= conv;
+ }
+ if (arena->is_stmt_prepare())
+ thd->restore_backup_item_arena(arena, &backup);
+ return res;
+}
+
+
+
void Item_func::set_arguments(List<Item> &list)
{
allowed_arg_cols= 1;
@@ -1105,7 +1186,7 @@ void Item_func_min_max::fix_length_and_dec()
cmp_type=item_cmp_type(cmp_type,args[i]->result_type());
}
if (cmp_type == STRING_RESULT)
- agg_arg_collations_for_comparison(collation, args, arg_count);
+ agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV);
}
@@ -1259,7 +1340,7 @@ longlong Item_func_coercibility::val_int()
void Item_func_locate::fix_length_and_dec()
{
maybe_null=0; max_length=11;
- agg_arg_collations_for_comparison(cmp_collation, args, 2);
+ agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV);
}
@@ -1358,7 +1439,7 @@ void Item_func_field::fix_length_and_dec()
for (uint i=1; i < arg_count ; i++)
cmp_type= item_cmp_type(cmp_type, args[i]->result_type());
if (cmp_type == STRING_RESULT)
- agg_arg_collations_for_comparison(cmp_collation, args, arg_count);
+ agg_arg_charsets(cmp_collation, args, arg_count, MY_COLL_CMP_CONV);
}
diff --git a/sql/item_func.h b/sql/item_func.h
index 963038227a2..ce2b34499d6 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -145,7 +145,8 @@ public:
bool agg_arg_collations_for_comparison(DTCollation &c,
Item **items, uint nitems,
uint flags= 0);
-
+ bool agg_arg_charsets(DTCollation &c, Item **items, uint nitems,
+ uint flags= 0);
bool walk(Item_processor processor, byte *arg);
};
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 5eda89ef21e..81fff899ec7 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -346,7 +346,7 @@ void Item_func_concat::fix_length_and_dec()
{
max_length=0;
- if (agg_arg_collations(collation, args, arg_count))
+ if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV))
return;
for (uint i=0 ; i < arg_count ; i++)
@@ -640,7 +640,7 @@ void Item_func_concat_ws::fix_length_and_dec()
{
max_length=0;
- if (agg_arg_collations(collation, args, arg_count))
+ if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV))
return;
/*
@@ -848,7 +848,7 @@ void Item_func_replace::fix_length_and_dec()
maybe_null=1;
}
- if (agg_arg_collations_for_comparison(collation, args, 3))
+ if (agg_arg_charsets(collation, args, 3, MY_COLL_CMP_CONV))
return;
}
@@ -893,11 +893,13 @@ null:
void Item_func_insert::fix_length_and_dec()
{
- if (collation.set(args[0]->collation, args[3]->collation))
- {
- my_coll_agg_error(args[0]->collation, args[3]->collation, func_name());
- return;
- }
+ Item *cargs[2];
+ cargs[0]= args[0];
+ cargs[1]= args[3];
+ if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV))
+ return;
+ args[0]= cargs[0];
+ args[3]= cargs[1];
max_length=args[0]->max_length+args[3]->max_length;
if (max_length > MAX_BLOB_WIDTH)
{
@@ -1063,7 +1065,7 @@ void Item_func_substr_index::fix_length_and_dec()
{
max_length= args[0]->max_length;
- if (agg_arg_collations_for_comparison(collation, args, 2))
+ if (agg_arg_charsets(collation, args, 2, MY_COLL_CMP_CONV))
return;
}
@@ -1355,10 +1357,14 @@ void Item_func_trim::fix_length_and_dec()
remove.set_ascii(" ",1);
}
else
- if (collation.set(args[1]->collation, args[0]->collation) ||
- collation.derivation == DERIVATION_NONE)
{
- my_coll_agg_error(args[1]->collation, args[0]->collation, func_name());
+ Item *cargs[2];
+ cargs[0]= args[1];
+ cargs[1]= args[0];
+ if (agg_arg_charsets(collation, cargs, 2, MY_COLL_CMP_CONV))
+ return;
+ args[0]= cargs[1];
+ args[1]= cargs[0];
}
}
@@ -1679,7 +1685,7 @@ void Item_func_elt::fix_length_and_dec()
max_length=0;
decimals=0;
- if (agg_arg_collations(collation, args+1, arg_count-1))
+ if (agg_arg_charsets(collation, args+1, arg_count-1, MY_COLL_ALLOW_CONV))
return;
for (uint i= 1 ; i < arg_count ; i++)
@@ -1755,7 +1761,7 @@ void Item_func_make_set::fix_length_and_dec()
{
max_length=arg_count-1;
- if (agg_arg_collations(collation, args, arg_count))
+ if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV))
return;
for (uint i=0 ; i < arg_count ; i++)
@@ -1963,12 +1969,13 @@ err:
void Item_func_rpad::fix_length_and_dec()
{
- if (collation.set(args[0]->collation, args[2]->collation))
- {
- my_coll_agg_error(args[0]->collation, args[2]->collation, func_name());
+ Item *cargs[2];
+ cargs[0]= args[0];
+ cargs[1]= args[2];
+ if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV))
return;
- }
-
+ args[0]= cargs[0];
+ args[2]= cargs[1];
if (args[1]->const_item())
{
uint32 length= (uint32) args[1]->val_int() * collation.collation->mbmaxlen;
@@ -2047,11 +2054,13 @@ String *Item_func_rpad::val_str(String *str)
void Item_func_lpad::fix_length_and_dec()
{
- if (collation.set(args[0]->collation, args[2]->collation))
- {
- my_coll_agg_error(args[0]->collation, args[2]->collation, func_name());
+ Item *cargs[2];
+ cargs[0]= args[0];
+ cargs[1]= args[2];
+ if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV))
return;
- }
+ args[0]= cargs[0];
+ args[2]= cargs[1];
if (args[1]->const_item())
{
@@ -2495,7 +2504,8 @@ void Item_func_export_set::fix_length_and_dec()
uint sep_length=(arg_count > 3 ? args[3]->max_length : 1);
max_length=length*64+sep_length*63;
- if (agg_arg_collations(collation, args+1, min(4,arg_count)-1))
+ if (agg_arg_charsets(collation, args+1, min(4,arg_count)-1),
+ MY_COLL_ALLOW_CONV)
return;
}