diff options
author | Alexander Barkov <bar@mysql.com> | 2010-02-11 08:17:25 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2010-02-11 08:17:25 +0400 |
commit | 702166bcdec5705dd90d8567a88056893433c719 (patch) | |
tree | d6306efc9e4d7e6bc6bf4ae5a58bd6aec40420e9 /sql/item.cc | |
parent | 6dd93757263fd219fcbd3d1ed100a681ecbc7c92 (diff) | |
download | mariadb-git-702166bcdec5705dd90d8567a88056893433c719.tar.gz |
WL#2649 Number-to-string conversions
added:
include/ctype_numconv.inc
mysql-test/include/ctype_numconv.inc
mysql-test/r/ctype_binary.result
mysql-test/t/ctype_binary.test
Adding tests
modified:
mysql-test/r/bigint.result
mysql-test/r/case.result
mysql-test/r/create.result
mysql-test/r/ctype_cp1251.result
mysql-test/r/ctype_latin1.result
mysql-test/r/ctype_ucs.result
mysql-test/r/func_gconcat.result
mysql-test/r/func_str.result
mysql-test/r/metadata.result
mysql-test/r/ps_1general.result
mysql-test/r/ps_2myisam.result
mysql-test/r/ps_3innodb.result
mysql-test/r/ps_4heap.result
mysql-test/r/ps_5merge.result
mysql-test/r/show_check.result
mysql-test/r/type_datetime.result
mysql-test/r/type_ranges.result
mysql-test/r/union.result
mysql-test/suite/ndb/r/ps_7ndb.result
mysql-test/t/ctype_cp1251.test
mysql-test/t/ctype_latin1.test
mysql-test/t/ctype_ucs.test
mysql-test/t/func_str.test
Fixing tests
@ sql/field.cc
- Return str result using my_charset_numeric.
- Using real multi-byte aware str_to_XXX functions
to handle tricky charset values propely (e.g. UCS2)
@ sql/field.h
- Changing derivation of non-string field types to DERIVATION_NUMERIC.
- Changing binary() for numeric/datetime fields to always
return TRUE even if charset is not my_charset_bin. We need
this to keep ha_base_keytype() return HA_KEYTYPE_BINARY.
- Adding BINARY_FLAG into some fields, because it's not
being set automatically anymore with
"my_charset_bin to my_charset_numeric" change.
- Changing derivation for numeric/datetime datatypes to a weaker
value, to make "SELECT concat('string', field)" use character
set of the string literal for the result of the function.
@ sql/item.cc
- Implementing generic val_str_ascii().
- Using max_char_length() instead of direct read of max_length
to make "tricky" charsets like UCS2 work.
NOTE: in the future we'll possibly remove all direct reads of max_length
- Fixing Item_num::safe_charset_converter().
Previously it alligned binary string to
character string (for example by adding leading 0x00
when doing binary->UCS2 conversion). Now it just
converts from my_charset_numbner to "tocs".
- Using val_str_ascii() in Item::get_time() to make UCS2 arguments work.
- Other misc changes
@ sql/item.h
- Changing MY_COLL_CMP_CONV and MY_COLL_ALLOW_CONV to
bit operations instead of hard-coded bit masks.
- Addding new method DTCollation.set_numeric().
- Adding new methods to Item.
- Adding helper functions to make code look nicer:
agg_item_charsets_for_string_result()
agg_item_charsets_for_comparison()
- Changing charset for Item_num-derived items
from my_charset_bin to my_charset_numeric
(which is an alias for latin1).
@ sql/item_cmpfunc.cc
- Using new helper functions
- Other misc changes
@ sql/item_cmpfunc.h
- Fixing strcmp() to return max_length=2.
Previously it returned 1, which was wrong,
because it did not fit '-1'.
@ sql/item_func.cc
- Using new helper functions
- Other minor changes
@ sql/item_func.h
- Removing unused functions
- Adding helper functions
agg_arg_charsets_for_string_result()
agg_arg_charsets_for_comparison()
- Adding set_numeric() into constructors of numeric items.
- Using fix_length_and_charset() and fix_char_length()
instead of direct write to max_length.
@ sql/item_geofunc.cc
- Changing class for Item_func_geometry_type and
Item_func_as_wkt from Item_str_func to
Item_str_ascii_func, to make them return UCS2 result
properly (when character_set_connection=ucs2).
@ sql/item_geofunc.h
- Changing class for Item_func_geometry_type and
Item_func_as_wkt from Item_str_func to
Item_str_ascii_func, to make them return UCS2 result
properly (when @@character_set_connection=ucs2).
@ sql/item_strfunc.cc
- Implementing Item_str_func::val_str().
- Renaming val_str to val_str_ascii for some items,
to make them work with UCS2 properly.
- Using new helper functions
- All single-argument functions that expect string
result now call this method:
agg_arg_charsets_for_string_result(collation, args, 1);
This enables character set conversion to @@character_set_connection
in case of pure numeric input.
@ sql/item_strfunc.h
- Introducing Item_str_ascii_func - for functions
which return pure ASCII data, for performance purposes,
as well as for the cases when the old implementation
of val_str() was heavily 8-bit oriented and implementing
a UCS2-aware version is tricky.
@ sql/item_sum.cc
- Using new helper functions.
@ sql/item_timefunc.cc
- Using my_charset_numeric instead of my_charset_bin.
- Using fix_char_length(), fix_length_and_charset()
and fix_length_and_charset_datetime()
instead of direct write to max_length.
- Using tricky-charset aware function str_to_time_with_warn()
@ sql/item_timefunc.h
- Using new helper functions for charset and length initialization.
- Changing base class for Item_func_get_format() to make
it return UCS2 properly (when character_set_connection=ucs2).
@ sql/item_xmlfunc.cc
- Using new helper function
@ sql/my_decimal.cc
- Adding a new DECIMAL to CHAR converter
with real multibyte support (e.g. UCS2)
@ sql/mysql_priv.h
- Introducing a new derivation level for numeric/datetime data types.
- Adding macros for my_charset_numeric and MY_REPERTOIRE_NUMERIC.
- Adding prototypes for str_set_decimal()
- Adding prototypes for character-set aware str_to_xxx() functions.
@ sql/protocol.cc
- Changing charsetnr to "binary" client-side metadata for
numeric/datetime data types.
@ sql/time.cc
- Adding to_ascii() helper function, to convert a string
in any character set to ascii representation. In the
future can be extended to understand digits written
in various non-Latin word scripts.
- Adding real multy-byte character set aware versions for str_to_XXXX,
to make these these type of queries work correct:
INSERT INTO t1 SET datetime_column=ucs2_expression;
@ strings/ctype-ucs2.c
- endptr was not calculated correctly. INSERTing of UCS2
values into numeric columns returned warnings about
truncated wrong data.
Diffstat (limited to 'sql/item.cc')
-rw-r--r-- | sql/item.cc | 97 |
1 files changed, 80 insertions, 17 deletions
diff --git a/sql/item.cc b/sql/item.cc index e785f0addde..f7643db57cd 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -201,6 +201,37 @@ bool Item::val_bool() } +/* + For the items which don't have its own fast val_str_ascii() + implementation we provide a generic slower version, + which converts from the Item character set to ASCII. + For better performance conversion happens only in + case of a "tricky" Item character set (e.g. UCS2). + Normally conversion does not happen. +*/ +String *Item::val_str_ascii(String *str) +{ + DBUG_ASSERT(fixed == 1); + + if (!(collation.collation->state & MY_CS_NONASCII)) + return val_str(str); + + DBUG_ASSERT(str != &str_value); + + uint errors; + String *res= val_str(&str_value); + if (!res) + return 0; + + if ((null_value= str->copy(res->ptr(), res->length(), + collation.collation, &my_charset_latin1, + &errors))) + return 0; + + return str; +} + + String *Item::val_string_from_real(String *str) { double nr= val_real(); @@ -443,10 +474,11 @@ uint Item::decimal_precision() const if ((restype == DECIMAL_RESULT) || (restype == INT_RESULT)) { uint prec= - my_decimal_length_to_precision(max_length, decimals, unsigned_flag); + my_decimal_length_to_precision(max_char_length(), decimals, + unsigned_flag); return min(prec, DECIMAL_MAX_PRECISION); } - return min(max_length, DECIMAL_MAX_PRECISION); + return min(max_char_length(), DECIMAL_MAX_PRECISION); } @@ -783,15 +815,40 @@ Item *Item::safe_charset_converter(CHARSET_INFO *tocs) */ Item *Item_num::safe_charset_converter(CHARSET_INFO *tocs) { + /* + Item_num returns pure ASCII result, + so conversion is needed only in case of "tricky" character + sets like UCS2. If tocs is not "tricky", return the item itself. + */ + if (!(tocs->state & MY_CS_NONASCII)) + return this; + Item_string *conv; - char buf[64]; - String *s, tmp(buf, sizeof(buf), &my_charset_bin); - s= val_str(&tmp); - if ((conv= new Item_string(s->ptr(), s->length(), s->charset()))) + uint conv_errors; + char buf[64], buf2[64]; + String tmp(buf, sizeof(buf), &my_charset_bin); + String cstr(buf2, sizeof(buf2), &my_charset_bin); + String *ostr= val_str(&tmp); + char *ptr; + cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors); + if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(), + cstr.charset(), + collation.derivation))) { - conv->str_value.copy(); - conv->str_value.mark_as_const(); + /* + Safe conversion is not possible (or EOM). + We could not convert a string into the requested character set + without data loss. The target charset does not cover all the + characters from the string. Operation cannot be done correctly. + */ + return NULL; } + if (!(ptr= current_thd->strmake(cstr.ptr(), cstr.length()))) + return NULL; + conv->str_value.set(ptr, cstr.length(), cstr.charset()); + /* Ensure that no one is going to change the result string */ + conv->str_value.mark_as_const(); + conv->fix_char_length(max_char_length()); return conv; } @@ -910,7 +967,7 @@ bool Item::get_date(MYSQL_TIME *ltime,uint fuzzydate) char buff[40]; String tmp(buff,sizeof(buff), &my_charset_bin),*res; if (!(res=val_str(&tmp)) || - str_to_datetime_with_warn(res->ptr(), res->length(), + str_to_datetime_with_warn(res->charset(), res->ptr(), res->length(), ltime, fuzzydate) <= MYSQL_TIMESTAMP_ERROR) goto err; } @@ -945,8 +1002,8 @@ bool Item::get_time(MYSQL_TIME *ltime) { char buff[40]; String tmp(buff,sizeof(buff),&my_charset_bin),*res; - if (!(res=val_str(&tmp)) || - str_to_time_with_warn(res->ptr(), res->length(), ltime)) + if (!(res=val_str_ascii(&tmp)) || + str_to_time_with_warn(res->charset(), res->ptr(), res->length(), ltime)) { bzero((char*) ltime,sizeof(*ltime)); return 1; @@ -1650,6 +1707,11 @@ bool agg_item_collations(DTCollation &c, const char *fname, my_coll_agg_error(av, count, fname, item_sep); return TRUE; } + + /* If all arguments where numbers, reset to @@collation_connection */ + if (c.derivation == DERIVATION_NUMERIC) + c.set(Item::default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_NUMERIC); + return FALSE; } @@ -1895,13 +1957,14 @@ void Item_field::set_field(Field *field_par) field=result_field=field_par; // for easy coding with fields maybe_null=field->maybe_null(); decimals= field->decimals(); - max_length= field_par->max_display_length(); table_name= *field_par->table_name; field_name= field_par->field_name; db_name= field_par->table->s->db.str; alias_name_used= field_par->table->alias_name_used; unsigned_flag=test(field_par->flags & UNSIGNED_FLAG); - collation.set(field_par->charset(), field_par->derivation()); + collation.set(field_par->charset(), field_par->derivation(), + field_par->repertoire()); + fix_char_length(field_par->char_length()); fixed= 1; if (field->table->s->tmp_table == SYSTEM_TMP_TABLE) any_privileges= 0; @@ -2210,7 +2273,7 @@ String *Item_int::val_str(String *str) { // following assert is redundant, because fixed=1 assigned in constructor DBUG_ASSERT(fixed == 1); - str->set(value, &my_charset_bin); + str->set(value, collation.collation); return str; } @@ -2240,7 +2303,7 @@ String *Item_uint::val_str(String *str) { // following assert is redundant, because fixed=1 assigned in constructor DBUG_ASSERT(fixed == 1); - str->set((ulonglong) value, &my_charset_bin); + str->set((ulonglong) value, collation.collation); return str; } @@ -2340,7 +2403,7 @@ double Item_decimal::val_real() String *Item_decimal::val_str(String *result) { - result->set_charset(&my_charset_bin); + result->set_charset(&my_charset_numeric); my_decimal2string(E_DEC_FATAL_ERROR, &decimal_value, 0, 0, 0, result); return result; } @@ -4866,7 +4929,7 @@ void Item::init_make_field(Send_field *tmp_field, tmp_field->col_name= name; tmp_field->charsetnr= collation.collation->number; tmp_field->flags= (maybe_null ? 0 : NOT_NULL_FLAG) | - (my_binary_compare(collation.collation) ? + (my_binary_compare(charset_for_protocol()) ? BINARY_FLAG : 0); tmp_field->type= field_type_arg; tmp_field->length=max_length; |