diff options
author | unknown <bar@mysql.com> | 2004-11-02 16:02:12 +0400 |
---|---|---|
committer | unknown <bar@mysql.com> | 2004-11-02 16:02:12 +0400 |
commit | fc04692c8b5fb192b9be52d4672423aa41ebc6b1 (patch) | |
tree | bfda701b872e8feb1aace99aa6e20993e2150d6b /sql | |
parent | 0d92f0c777e170b4de4eff45e3f1ed08f3ccc836 (diff) | |
download | mariadb-git-fc04692c8b5fb192b9be52d4672423aa41ebc6b1.tar.gz |
Many files:
Allow mixing of different character sets for more SQL functions.
item_func.h:
Allow mixing of different character sets for more SQL functions..
sql/item_cmpfunc.cc:
Allow mixing of different character sets for more SQL functions.
sql/item_func.cc:
Allow mixing of different character sets for more SQL functions.
sql/item_func.h:
Allow mixing of different character sets for more SQL functions..
sql/item_strfunc.cc:
Allow mixing of different character sets for more SQL functions.
sql/item.cc:
Allow mixing of different character sets for more SQL functions.
sql/item.h:
Allow mixing of different character sets for more SQL functions.
mysql-test/t/ctype_recoding.test:
Allow mixing of different character sets for more SQL functions.
mysql-test/r/ctype_recoding.result:
Allow mixing of different character sets for more SQL functions.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item.cc | 41 | ||||
-rw-r--r-- | sql/item.h | 18 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 96 | ||||
-rw-r--r-- | sql/item_func.cc | 105 | ||||
-rw-r--r-- | sql/item_func.h | 3 | ||||
-rw-r--r-- | sql/item_strfunc.cc | 58 |
6 files changed, 194 insertions, 127 deletions
diff --git a/sql/item.cc b/sql/item.cc index 46215fd78ed..7dc7e9e542c 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -205,6 +205,41 @@ bool Item::eq(const Item *item, bool binary_cmp) const } +Item *Item::safe_charset_converter(CHARSET_INFO *tocs) +{ + /* + Don't allow automatic conversion to non-Unicode charsets, + as it potentially loses data. + */ + if (!(tocs->state & MY_CS_UNICODE)) + return NULL; // safe conversion is not possible + return new Item_func_conv_charset(this, tocs); +} + + +Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs) +{ + Item_string *conv; + uint conv_errors; + String tmp, cstr, *ostr= val_str(&tmp); + cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors); + if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(), + cstr.charset(), + collation.derivation))) + { + /* + Safe conversion is not possible (or EOM). + We could not convert a string into the requested character set + without data loss. The target charset does not cover all the + characters from the string. Operation cannot be done correctly. + */ + return NULL; + } + conv->str_value.copy(); + return conv; +} + + bool Item_string::eq(const Item *item, bool binary_cmp) const { if (type() == item->type()) @@ -723,6 +758,12 @@ String *Item_null::val_str(String *str) } +Item *Item_null::safe_charset_converter(CHARSET_INFO *tocs) +{ + collation.set(tocs); + return this; +} + /*********************** Item_param related ******************************/ /* diff --git a/sql/item.h b/sql/item.h index 2c0c3306c44..fea3aa010a8 100644 --- a/sql/item.h +++ b/sql/item.h @@ -39,13 +39,22 @@ enum Derivation /* Flags for collation aggregation modes: - allow conversion to a superset - allow conversion of a coercible value (i.e. constant). + MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset + MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value + (i.e. constant). + MY_COLL_ALLOW_CONV - allow any kind of conversion + (combintion of the above two) + MY_COLL_DISALLOW_NONE - don't allow return DERIVATION_NONE + (e.g. when aggregating for comparison) + MY_COLL_CMP_CONV - combination of MY_COLL_ALLOW_CONV + and MY_COLL_DISALLOW_NONE */ #define MY_COLL_ALLOW_SUPERSET_CONV 1 #define MY_COLL_ALLOW_COERCIBLE_CONV 2 - +#define MY_COLL_ALLOW_CONV 3 +#define MY_COLL_DISALLOW_NONE 4 +#define MY_COLL_CMP_CONV 7 class DTCollation { public: @@ -302,6 +311,7 @@ public: Field *tmp_table_field_from_field_type(TABLE *table); virtual Item *neg_transformer(THD *thd) { return NULL; } + virtual Item *safe_charset_converter(CHARSET_INFO *tocs); void delete_self() { cleanup(); @@ -447,6 +457,7 @@ public: Item *new_item() { return new Item_null(name); } bool is_null() { return 1; } void print(String *str) { str->append("NULL", 4); } + Item *safe_charset_converter(CHARSET_INFO *tocs); }; @@ -717,6 +728,7 @@ public: return new Item_string(name, str_value.ptr(), str_value.length(), &my_charset_bin); } + Item *safe_charset_converter(CHARSET_INFO *tocs); String *const_string() { return &str_value; } inline void append(char *str, uint length) { str_value.append(str, length); } void print(String *str); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index d9db07e2289..701894cacb5 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -173,89 +173,11 @@ void Item_bool_func2::fix_length_and_dec() if (!args[0] || !args[1]) return; - /* - We allow to apply automatic character set conversion in some cases. - The conditions when conversion is possible are: - - arguments A and B have different charsets - - A wins according to coercibility rules - (i.e. a column is stronger than a string constant, - an explicit COLLATE clause is stronger than a column) - - character set of A is either superset for character set of B, - or B is a string constant which can be converted into the - character set of A without data loss. - - If all of the above is true, then it's possible to convert - B into the character set of A, and then compare according - to the collation of A. - */ - - uint32 dummy_offset; DTCollation coll; - if (args[0]->result_type() == STRING_RESULT && args[1]->result_type() == STRING_RESULT && - String::needs_conversion(0, args[0]->collation.collation, - args[1]->collation.collation, - &dummy_offset) && - !coll.set(args[0]->collation, args[1]->collation, - MY_COLL_ALLOW_SUPERSET_CONV | - MY_COLL_ALLOW_COERCIBLE_CONV)) - { - Item* conv= 0; - Item_arena *arena= thd->current_arena, backup; - uint strong= coll.strong; - uint weak= strong ? 0 : 1; - /* - In case we're in statement prepare, create conversion item - in its memory: it will be reused on each execute. - */ - if (arena->is_stmt_prepare()) - thd->set_n_backup_item_arena(arena, &backup); - if (args[weak]->type() == STRING_ITEM) - { - uint conv_errors; - String tmp, cstr, *ostr= args[weak]->val_str(&tmp); - cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), - args[strong]->collation.collation, &conv_errors); - if (conv_errors) - { - /* - We could not convert a string into the character set - of the stronger side of the operation without data loss. - It can happen if we tried to combine a column with a string - constant, and the column charset does not cover all the - characters from the string. Operation cannot be done - correctly. Return an error. - */ - my_coll_agg_error(args[0]->collation, args[1]->collation, - func_name()); - return; - } - conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(), - args[weak]->collation.derivation); - ((Item_string*)conv)->str_value.copy(); - } - else - { - if (!(coll.collation->state & MY_CS_UNICODE)) - { - /* - Don't allow automatic conversion to non-Unicode charsets, - as it potentially loses data. - */ - my_coll_agg_error(args[0]->collation, args[1]->collation, - func_name()); - return; - } - conv= new Item_func_conv_charset(args[weak], - args[strong]->collation.collation); - conv->collation.set(args[weak]->collation.derivation); - conv->fix_fields(thd, 0, &conv); - } - if (arena->is_stmt_prepare()) - thd->restore_backup_item_arena(arena, &backup); - args[weak]= conv ? conv : args[weak]; - } + agg_arg_charsets(coll, args, 2, MY_COLL_CMP_CONV)) + return; // Make a special case of compare with fields to get nicer DATE comparisons @@ -871,7 +793,7 @@ void Item_func_between::fix_length_and_dec() return; agg_cmp_type(&cmp_type, args, 3); if (cmp_type == STRING_RESULT && - agg_arg_collations_for_comparison(cmp_collation, args, 3)) + agg_arg_charsets(cmp_collation, args, 3, MY_COLL_CMP_CONV)) return; /* @@ -987,7 +909,7 @@ Item_func_ifnull::fix_length_and_dec() decimals=max(args[0]->decimals,args[1]->decimals); agg_result_type(&cached_result_type, args, 2); if (cached_result_type == STRING_RESULT) - agg_arg_collations(collation, args, arg_count); + agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV); else if (cached_result_type != REAL_RESULT) decimals= 0; @@ -1083,7 +1005,7 @@ Item_func_if::fix_length_and_dec() agg_result_type(&cached_result_type, args+1, 2); if (cached_result_type == STRING_RESULT) { - if (agg_arg_collations(collation, args+1, 2)) + if (agg_arg_charsets(collation, args+1, 2, MY_COLL_ALLOW_CONV)) return; } else @@ -1354,7 +1276,7 @@ void Item_func_case::fix_length_and_dec() agg_result_type(&cached_result_type, agg, nagg); if ((cached_result_type == STRING_RESULT) && - agg_arg_collations(collation, agg, nagg)) + agg_arg_charsets(collation, agg, nagg, MY_COLL_ALLOW_CONV)) return; @@ -1370,7 +1292,7 @@ void Item_func_case::fix_length_and_dec() nagg++; agg_cmp_type(&cmp_type, agg, nagg); if ((cmp_type == STRING_RESULT) && - agg_arg_collations_for_comparison(cmp_collation, agg, nagg)) + agg_arg_charsets(cmp_collation, agg, nagg, MY_COLL_CMP_CONV)) return; } @@ -1477,7 +1399,7 @@ void Item_func_coalesce::fix_length_and_dec() set_if_bigger(decimals,args[i]->decimals); } if (cached_result_type == STRING_RESULT) - agg_arg_collations(collation, args, arg_count); + agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV); else if (cached_result_type != REAL_RESULT) decimals= 0; } @@ -2423,7 +2345,7 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref) max_length= 1; decimals= 0; - if (agg_arg_collations(cmp_collation, args, 2)) + if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV)) return 1; used_tables_cache=args[0]->used_tables() | args[1]->used_tables(); diff --git a/sql/item_func.cc b/sql/item_func.cc index 50843d3bf76..09d7e50eaa3 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -90,6 +90,12 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, return TRUE; } } + if ((flags & MY_COLL_DISALLOW_NONE) && + c.derivation == DERIVATION_NONE) + { + my_coll_agg_error(av, count, func_name()); + return TRUE; + } return FALSE; } @@ -98,15 +104,7 @@ bool Item_func::agg_arg_collations_for_comparison(DTCollation &c, Item **av, uint count, uint flags) { - if (agg_arg_collations(c, av, count, flags)) - return TRUE; - - if (c.derivation == DERIVATION_NONE) - { - my_coll_agg_error(av, count, func_name()); - return TRUE; - } - return FALSE; + return (agg_arg_collations(c, av, count, flags | MY_COLL_DISALLOW_NONE)); } @@ -119,6 +117,89 @@ eval_const_cond(COND *cond) } + +/* + Collect arguments' character sets together. + We allow to apply automatic character set conversion in some cases. + The conditions when conversion is possible are: + - arguments A and B have different charsets + - A wins according to coercibility rules + (i.e. a column is stronger than a string constant, + an explicit COLLATE clause is stronger than a column) + - character set of A is either superset for character set of B, + or B is a string constant which can be converted into the + character set of A without data loss. + + If all of the above is true, then it's possible to convert + B into the character set of A, and then compare according + to the collation of A. + + For functions with more than two arguments: + + collect(A,B,C) ::= collect(collect(A,B),C) +*/ + +bool Item_func::agg_arg_charsets(DTCollation &coll, + Item **args, uint nargs, uint flags) +{ + Item **arg, **last, *safe_args[2]; + if (agg_arg_collations(coll, args, nargs, flags)) + return TRUE; + + /* + For better error reporting: save the first and the second argument. + We need this only if the the number of args is 3 or 2: + - for a longer argument list, "Illegal mix of collations" + doesn't display each argument's characteristics. + - if nargs is 1, then this error cannot happen. + */ + if (nargs >=2 && nargs <= 3) + { + safe_args[0]= args[0]; + safe_args[1]= args[1]; + } + + THD *thd= current_thd; + Item_arena *arena= thd->current_arena, backup; + bool res= FALSE; + /* + In case we're in statement prepare, create conversion item + in its memory: it will be reused on each execute. + */ + if (arena->is_stmt_prepare()) + thd->set_n_backup_item_arena(arena, &backup); + + for (arg= args, last= args + nargs; arg < last; arg++) + { + Item* conv; + uint dummy_offset; + if (!String::needs_conversion(0, coll.collation, + (*arg)->collation.collation, + &dummy_offset)) + continue; + + if (!(conv= (*arg)->safe_charset_converter(coll.collation))) + { + if (nargs >=2 && nargs <= 3) + { + /* restore the original arguments for better error message */ + args[0]= safe_args[0]; + args[1]= safe_args[1]; + } + my_coll_agg_error(args, nargs, func_name()); + res= TRUE; + break; // we cannot return here, we need to restore "arena". + } + conv->fix_fields(thd, 0, &conv); + *arg= conv; + } + if (arena->is_stmt_prepare()) + thd->restore_backup_item_arena(arena, &backup); + return res; +} + + + void Item_func::set_arguments(List<Item> &list) { allowed_arg_cols= 1; @@ -1105,7 +1186,7 @@ void Item_func_min_max::fix_length_and_dec() cmp_type=item_cmp_type(cmp_type,args[i]->result_type()); } if (cmp_type == STRING_RESULT) - agg_arg_collations_for_comparison(collation, args, arg_count); + agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV); } @@ -1259,7 +1340,7 @@ longlong Item_func_coercibility::val_int() void Item_func_locate::fix_length_and_dec() { maybe_null=0; max_length=11; - agg_arg_collations_for_comparison(cmp_collation, args, 2); + agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV); } @@ -1358,7 +1439,7 @@ void Item_func_field::fix_length_and_dec() for (uint i=1; i < arg_count ; i++) cmp_type= item_cmp_type(cmp_type, args[i]->result_type()); if (cmp_type == STRING_RESULT) - agg_arg_collations_for_comparison(cmp_collation, args, arg_count); + agg_arg_charsets(cmp_collation, args, arg_count, MY_COLL_CMP_CONV); } diff --git a/sql/item_func.h b/sql/item_func.h index 963038227a2..ce2b34499d6 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -145,7 +145,8 @@ public: bool agg_arg_collations_for_comparison(DTCollation &c, Item **items, uint nitems, uint flags= 0); - + bool agg_arg_charsets(DTCollation &c, Item **items, uint nitems, + uint flags= 0); bool walk(Item_processor processor, byte *arg); }; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 5eda89ef21e..81fff899ec7 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -346,7 +346,7 @@ void Item_func_concat::fix_length_and_dec() { max_length=0; - if (agg_arg_collations(collation, args, arg_count)) + if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV)) return; for (uint i=0 ; i < arg_count ; i++) @@ -640,7 +640,7 @@ void Item_func_concat_ws::fix_length_and_dec() { max_length=0; - if (agg_arg_collations(collation, args, arg_count)) + if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV)) return; /* @@ -848,7 +848,7 @@ void Item_func_replace::fix_length_and_dec() maybe_null=1; } - if (agg_arg_collations_for_comparison(collation, args, 3)) + if (agg_arg_charsets(collation, args, 3, MY_COLL_CMP_CONV)) return; } @@ -893,11 +893,13 @@ null: void Item_func_insert::fix_length_and_dec() { - if (collation.set(args[0]->collation, args[3]->collation)) - { - my_coll_agg_error(args[0]->collation, args[3]->collation, func_name()); - return; - } + Item *cargs[2]; + cargs[0]= args[0]; + cargs[1]= args[3]; + if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV)) + return; + args[0]= cargs[0]; + args[3]= cargs[1]; max_length=args[0]->max_length+args[3]->max_length; if (max_length > MAX_BLOB_WIDTH) { @@ -1063,7 +1065,7 @@ void Item_func_substr_index::fix_length_and_dec() { max_length= args[0]->max_length; - if (agg_arg_collations_for_comparison(collation, args, 2)) + if (agg_arg_charsets(collation, args, 2, MY_COLL_CMP_CONV)) return; } @@ -1355,10 +1357,14 @@ void Item_func_trim::fix_length_and_dec() remove.set_ascii(" ",1); } else - if (collation.set(args[1]->collation, args[0]->collation) || - collation.derivation == DERIVATION_NONE) { - my_coll_agg_error(args[1]->collation, args[0]->collation, func_name()); + Item *cargs[2]; + cargs[0]= args[1]; + cargs[1]= args[0]; + if (agg_arg_charsets(collation, cargs, 2, MY_COLL_CMP_CONV)) + return; + args[0]= cargs[1]; + args[1]= cargs[0]; } } @@ -1679,7 +1685,7 @@ void Item_func_elt::fix_length_and_dec() max_length=0; decimals=0; - if (agg_arg_collations(collation, args+1, arg_count-1)) + if (agg_arg_charsets(collation, args+1, arg_count-1, MY_COLL_ALLOW_CONV)) return; for (uint i= 1 ; i < arg_count ; i++) @@ -1755,7 +1761,7 @@ void Item_func_make_set::fix_length_and_dec() { max_length=arg_count-1; - if (agg_arg_collations(collation, args, arg_count)) + if (agg_arg_charsets(collation, args, arg_count, MY_COLL_ALLOW_CONV)) return; for (uint i=0 ; i < arg_count ; i++) @@ -1963,12 +1969,13 @@ err: void Item_func_rpad::fix_length_and_dec() { - if (collation.set(args[0]->collation, args[2]->collation)) - { - my_coll_agg_error(args[0]->collation, args[2]->collation, func_name()); + Item *cargs[2]; + cargs[0]= args[0]; + cargs[1]= args[2]; + if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV)) return; - } - + args[0]= cargs[0]; + args[2]= cargs[1]; if (args[1]->const_item()) { uint32 length= (uint32) args[1]->val_int() * collation.collation->mbmaxlen; @@ -2047,11 +2054,13 @@ String *Item_func_rpad::val_str(String *str) void Item_func_lpad::fix_length_and_dec() { - if (collation.set(args[0]->collation, args[2]->collation)) - { - my_coll_agg_error(args[0]->collation, args[2]->collation, func_name()); + Item *cargs[2]; + cargs[0]= args[0]; + cargs[1]= args[2]; + if (agg_arg_charsets(collation, cargs, 2, MY_COLL_ALLOW_CONV)) return; - } + args[0]= cargs[0]; + args[2]= cargs[1]; if (args[1]->const_item()) { @@ -2495,7 +2504,8 @@ void Item_func_export_set::fix_length_and_dec() uint sep_length=(arg_count > 3 ? args[3]->max_length : 1); max_length=length*64+sep_length*63; - if (agg_arg_collations(collation, args+1, min(4,arg_count)-1)) + if (agg_arg_charsets(collation, args+1, min(4,arg_count)-1), + MY_COLL_ALLOW_CONV) return; } |