summaryrefslogtreecommitdiff
path: root/sql/item.cc
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2010-02-11 08:17:25 +0400
committerAlexander Barkov <bar@mysql.com>2010-02-11 08:17:25 +0400
commit702166bcdec5705dd90d8567a88056893433c719 (patch)
treed6306efc9e4d7e6bc6bf4ae5a58bd6aec40420e9 /sql/item.cc
parent6dd93757263fd219fcbd3d1ed100a681ecbc7c92 (diff)
downloadmariadb-git-702166bcdec5705dd90d8567a88056893433c719.tar.gz
WL#2649 Number-to-string conversions
added: include/ctype_numconv.inc mysql-test/include/ctype_numconv.inc mysql-test/r/ctype_binary.result mysql-test/t/ctype_binary.test Adding tests modified: mysql-test/r/bigint.result mysql-test/r/case.result mysql-test/r/create.result mysql-test/r/ctype_cp1251.result mysql-test/r/ctype_latin1.result mysql-test/r/ctype_ucs.result mysql-test/r/func_gconcat.result mysql-test/r/func_str.result mysql-test/r/metadata.result mysql-test/r/ps_1general.result mysql-test/r/ps_2myisam.result mysql-test/r/ps_3innodb.result mysql-test/r/ps_4heap.result mysql-test/r/ps_5merge.result mysql-test/r/show_check.result mysql-test/r/type_datetime.result mysql-test/r/type_ranges.result mysql-test/r/union.result mysql-test/suite/ndb/r/ps_7ndb.result mysql-test/t/ctype_cp1251.test mysql-test/t/ctype_latin1.test mysql-test/t/ctype_ucs.test mysql-test/t/func_str.test Fixing tests @ sql/field.cc - Return str result using my_charset_numeric. - Using real multi-byte aware str_to_XXX functions to handle tricky charset values propely (e.g. UCS2) @ sql/field.h - Changing derivation of non-string field types to DERIVATION_NUMERIC. - Changing binary() for numeric/datetime fields to always return TRUE even if charset is not my_charset_bin. We need this to keep ha_base_keytype() return HA_KEYTYPE_BINARY. - Adding BINARY_FLAG into some fields, because it's not being set automatically anymore with "my_charset_bin to my_charset_numeric" change. - Changing derivation for numeric/datetime datatypes to a weaker value, to make "SELECT concat('string', field)" use character set of the string literal for the result of the function. @ sql/item.cc - Implementing generic val_str_ascii(). - Using max_char_length() instead of direct read of max_length to make "tricky" charsets like UCS2 work. NOTE: in the future we'll possibly remove all direct reads of max_length - Fixing Item_num::safe_charset_converter(). Previously it alligned binary string to character string (for example by adding leading 0x00 when doing binary->UCS2 conversion). Now it just converts from my_charset_numbner to "tocs". - Using val_str_ascii() in Item::get_time() to make UCS2 arguments work. - Other misc changes @ sql/item.h - Changing MY_COLL_CMP_CONV and MY_COLL_ALLOW_CONV to bit operations instead of hard-coded bit masks. - Addding new method DTCollation.set_numeric(). - Adding new methods to Item. - Adding helper functions to make code look nicer: agg_item_charsets_for_string_result() agg_item_charsets_for_comparison() - Changing charset for Item_num-derived items from my_charset_bin to my_charset_numeric (which is an alias for latin1). @ sql/item_cmpfunc.cc - Using new helper functions - Other misc changes @ sql/item_cmpfunc.h - Fixing strcmp() to return max_length=2. Previously it returned 1, which was wrong, because it did not fit '-1'. @ sql/item_func.cc - Using new helper functions - Other minor changes @ sql/item_func.h - Removing unused functions - Adding helper functions agg_arg_charsets_for_string_result() agg_arg_charsets_for_comparison() - Adding set_numeric() into constructors of numeric items. - Using fix_length_and_charset() and fix_char_length() instead of direct write to max_length. @ sql/item_geofunc.cc - Changing class for Item_func_geometry_type and Item_func_as_wkt from Item_str_func to Item_str_ascii_func, to make them return UCS2 result properly (when character_set_connection=ucs2). @ sql/item_geofunc.h - Changing class for Item_func_geometry_type and Item_func_as_wkt from Item_str_func to Item_str_ascii_func, to make them return UCS2 result properly (when @@character_set_connection=ucs2). @ sql/item_strfunc.cc - Implementing Item_str_func::val_str(). - Renaming val_str to val_str_ascii for some items, to make them work with UCS2 properly. - Using new helper functions - All single-argument functions that expect string result now call this method: agg_arg_charsets_for_string_result(collation, args, 1); This enables character set conversion to @@character_set_connection in case of pure numeric input. @ sql/item_strfunc.h - Introducing Item_str_ascii_func - for functions which return pure ASCII data, for performance purposes, as well as for the cases when the old implementation of val_str() was heavily 8-bit oriented and implementing a UCS2-aware version is tricky. @ sql/item_sum.cc - Using new helper functions. @ sql/item_timefunc.cc - Using my_charset_numeric instead of my_charset_bin. - Using fix_char_length(), fix_length_and_charset() and fix_length_and_charset_datetime() instead of direct write to max_length. - Using tricky-charset aware function str_to_time_with_warn() @ sql/item_timefunc.h - Using new helper functions for charset and length initialization. - Changing base class for Item_func_get_format() to make it return UCS2 properly (when character_set_connection=ucs2). @ sql/item_xmlfunc.cc - Using new helper function @ sql/my_decimal.cc - Adding a new DECIMAL to CHAR converter with real multibyte support (e.g. UCS2) @ sql/mysql_priv.h - Introducing a new derivation level for numeric/datetime data types. - Adding macros for my_charset_numeric and MY_REPERTOIRE_NUMERIC. - Adding prototypes for str_set_decimal() - Adding prototypes for character-set aware str_to_xxx() functions. @ sql/protocol.cc - Changing charsetnr to "binary" client-side metadata for numeric/datetime data types. @ sql/time.cc - Adding to_ascii() helper function, to convert a string in any character set to ascii representation. In the future can be extended to understand digits written in various non-Latin word scripts. - Adding real multy-byte character set aware versions for str_to_XXXX, to make these these type of queries work correct: INSERT INTO t1 SET datetime_column=ucs2_expression; @ strings/ctype-ucs2.c - endptr was not calculated correctly. INSERTing of UCS2 values into numeric columns returned warnings about truncated wrong data.
Diffstat (limited to 'sql/item.cc')
-rw-r--r--sql/item.cc97
1 files changed, 80 insertions, 17 deletions
diff --git a/sql/item.cc b/sql/item.cc
index e785f0addde..f7643db57cd 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -201,6 +201,37 @@ bool Item::val_bool()
}
+/*
+ For the items which don't have its own fast val_str_ascii()
+ implementation we provide a generic slower version,
+ which converts from the Item character set to ASCII.
+ For better performance conversion happens only in
+ case of a "tricky" Item character set (e.g. UCS2).
+ Normally conversion does not happen.
+*/
+String *Item::val_str_ascii(String *str)
+{
+ DBUG_ASSERT(fixed == 1);
+
+ if (!(collation.collation->state & MY_CS_NONASCII))
+ return val_str(str);
+
+ DBUG_ASSERT(str != &str_value);
+
+ uint errors;
+ String *res= val_str(&str_value);
+ if (!res)
+ return 0;
+
+ if ((null_value= str->copy(res->ptr(), res->length(),
+ collation.collation, &my_charset_latin1,
+ &errors)))
+ return 0;
+
+ return str;
+}
+
+
String *Item::val_string_from_real(String *str)
{
double nr= val_real();
@@ -443,10 +474,11 @@ uint Item::decimal_precision() const
if ((restype == DECIMAL_RESULT) || (restype == INT_RESULT))
{
uint prec=
- my_decimal_length_to_precision(max_length, decimals, unsigned_flag);
+ my_decimal_length_to_precision(max_char_length(), decimals,
+ unsigned_flag);
return min(prec, DECIMAL_MAX_PRECISION);
}
- return min(max_length, DECIMAL_MAX_PRECISION);
+ return min(max_char_length(), DECIMAL_MAX_PRECISION);
}
@@ -783,15 +815,40 @@ Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
*/
Item *Item_num::safe_charset_converter(CHARSET_INFO *tocs)
{
+ /*
+ Item_num returns pure ASCII result,
+ so conversion is needed only in case of "tricky" character
+ sets like UCS2. If tocs is not "tricky", return the item itself.
+ */
+ if (!(tocs->state & MY_CS_NONASCII))
+ return this;
+
Item_string *conv;
- char buf[64];
- String *s, tmp(buf, sizeof(buf), &my_charset_bin);
- s= val_str(&tmp);
- if ((conv= new Item_string(s->ptr(), s->length(), s->charset())))
+ uint conv_errors;
+ char buf[64], buf2[64];
+ String tmp(buf, sizeof(buf), &my_charset_bin);
+ String cstr(buf2, sizeof(buf2), &my_charset_bin);
+ String *ostr= val_str(&tmp);
+ char *ptr;
+ cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
+ if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
+ cstr.charset(),
+ collation.derivation)))
{
- conv->str_value.copy();
- conv->str_value.mark_as_const();
+ /*
+ Safe conversion is not possible (or EOM).
+ We could not convert a string into the requested character set
+ without data loss. The target charset does not cover all the
+ characters from the string. Operation cannot be done correctly.
+ */
+ return NULL;
}
+ if (!(ptr= current_thd->strmake(cstr.ptr(), cstr.length())))
+ return NULL;
+ conv->str_value.set(ptr, cstr.length(), cstr.charset());
+ /* Ensure that no one is going to change the result string */
+ conv->str_value.mark_as_const();
+ conv->fix_char_length(max_char_length());
return conv;
}
@@ -910,7 +967,7 @@ bool Item::get_date(MYSQL_TIME *ltime,uint fuzzydate)
char buff[40];
String tmp(buff,sizeof(buff), &my_charset_bin),*res;
if (!(res=val_str(&tmp)) ||
- str_to_datetime_with_warn(res->ptr(), res->length(),
+ str_to_datetime_with_warn(res->charset(), res->ptr(), res->length(),
ltime, fuzzydate) <= MYSQL_TIMESTAMP_ERROR)
goto err;
}
@@ -945,8 +1002,8 @@ bool Item::get_time(MYSQL_TIME *ltime)
{
char buff[40];
String tmp(buff,sizeof(buff),&my_charset_bin),*res;
- if (!(res=val_str(&tmp)) ||
- str_to_time_with_warn(res->ptr(), res->length(), ltime))
+ if (!(res=val_str_ascii(&tmp)) ||
+ str_to_time_with_warn(res->charset(), res->ptr(), res->length(), ltime))
{
bzero((char*) ltime,sizeof(*ltime));
return 1;
@@ -1650,6 +1707,11 @@ bool agg_item_collations(DTCollation &c, const char *fname,
my_coll_agg_error(av, count, fname, item_sep);
return TRUE;
}
+
+ /* If all arguments where numbers, reset to @@collation_connection */
+ if (c.derivation == DERIVATION_NUMERIC)
+ c.set(Item::default_charset(), DERIVATION_COERCIBLE, MY_REPERTOIRE_NUMERIC);
+
return FALSE;
}
@@ -1895,13 +1957,14 @@ void Item_field::set_field(Field *field_par)
field=result_field=field_par; // for easy coding with fields
maybe_null=field->maybe_null();
decimals= field->decimals();
- max_length= field_par->max_display_length();
table_name= *field_par->table_name;
field_name= field_par->field_name;
db_name= field_par->table->s->db.str;
alias_name_used= field_par->table->alias_name_used;
unsigned_flag=test(field_par->flags & UNSIGNED_FLAG);
- collation.set(field_par->charset(), field_par->derivation());
+ collation.set(field_par->charset(), field_par->derivation(),
+ field_par->repertoire());
+ fix_char_length(field_par->char_length());
fixed= 1;
if (field->table->s->tmp_table == SYSTEM_TMP_TABLE)
any_privileges= 0;
@@ -2210,7 +2273,7 @@ String *Item_int::val_str(String *str)
{
// following assert is redundant, because fixed=1 assigned in constructor
DBUG_ASSERT(fixed == 1);
- str->set(value, &my_charset_bin);
+ str->set(value, collation.collation);
return str;
}
@@ -2240,7 +2303,7 @@ String *Item_uint::val_str(String *str)
{
// following assert is redundant, because fixed=1 assigned in constructor
DBUG_ASSERT(fixed == 1);
- str->set((ulonglong) value, &my_charset_bin);
+ str->set((ulonglong) value, collation.collation);
return str;
}
@@ -2340,7 +2403,7 @@ double Item_decimal::val_real()
String *Item_decimal::val_str(String *result)
{
- result->set_charset(&my_charset_bin);
+ result->set_charset(&my_charset_numeric);
my_decimal2string(E_DEC_FATAL_ERROR, &decimal_value, 0, 0, 0, result);
return result;
}
@@ -4866,7 +4929,7 @@ void Item::init_make_field(Send_field *tmp_field,
tmp_field->col_name= name;
tmp_field->charsetnr= collation.collation->number;
tmp_field->flags= (maybe_null ? 0 : NOT_NULL_FLAG) |
- (my_binary_compare(collation.collation) ?
+ (my_binary_compare(charset_for_protocol()) ?
BINARY_FLAG : 0);
tmp_field->type= field_type_arg;
tmp_field->length=max_length;