summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorunknown <bar@bar.myoffice.izhnet.ru>2007-08-03 15:30:31 +0500
committerunknown <bar@bar.myoffice.izhnet.ru>2007-08-03 15:30:31 +0500
commit0d1972aa72dd98a229a2a8c8faca46383bbcc572 (patch)
treec657f898b9581b0e2a19433212ad42eca9646b98 /sql
parent607ab14cf767ed0187e0c050ed61cb4ebaf34bb7 (diff)
parent53df09a9a6a99b82e2a8869eb16737a78772b29e (diff)
downloadmariadb-git-0d1972aa72dd98a229a2a8c8faca46383bbcc572.tar.gz
Merge mysql.com:/home/bar/mysql-work/mysql-5.0.b28875
into mysql.com:/home/bar/mysql-work/mysql-5.0-rpl mysql-test/r/ctype_utf8.result: Auto merged mysql-test/t/ctype_utf8.test: Auto merged sql/item.cc: Auto merged sql/item.h: Auto merged sql/item_func.cc: Auto merged sql/item_strfunc.cc: Auto merged sql/item_timefunc.cc: Auto merged sql/sql_lex.cc: Auto merged sql/sql_lex.h: Auto merged sql/sql_yacc.yy: Auto merged strings/conf_to_src.c: Auto merged strings/ctype-extra.c: Auto merged mysql-test/r/ctype_ucs.result: After merge fix mysql-test/t/ctype_ucs.test: After merge fix
Diffstat (limited to 'sql')
-rw-r--r--sql/item.cc46
-rw-r--r--sql/item.h43
-rw-r--r--sql/item_func.cc2
-rw-r--r--sql/item_strfunc.cc3
-rw-r--r--sql/item_timefunc.cc6
-rw-r--r--sql/sql_lex.cc6
-rw-r--r--sql/sql_lex.h4
-rw-r--r--sql/sql_string.cc2
-rw-r--r--sql/sql_yacc.yy88
9 files changed, 150 insertions, 50 deletions
diff --git a/sql/item.cc b/sql/item.cc
index 2fc58eebe75..3be32aa829c 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -1296,6 +1296,25 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
}
+static bool
+left_is_superset(DTCollation *left, DTCollation *right)
+{
+ /* Allow convert to Unicode */
+ if (left->collation->state & MY_CS_UNICODE &&
+ (left->derivation < right->derivation ||
+ (left->derivation == right->derivation &&
+ !(right->collation->state & MY_CS_UNICODE))))
+ return TRUE;
+ /* Allow convert from ASCII */
+ if (right->repertoire == MY_REPERTOIRE_ASCII &&
+ (left->derivation < right->derivation ||
+ (left->derivation == right->derivation &&
+ !(left->repertoire == MY_REPERTOIRE_ASCII))))
+ return TRUE;
+ /* Disallow conversion otherwise */
+ return FALSE;
+}
+
/*
Aggregate two collations together taking
into account their coercibility (aka derivation):
@@ -1360,18 +1379,12 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
; // Do nothing
}
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
- collation->state & MY_CS_UNICODE &&
- (derivation < dt.derivation ||
- (derivation == dt.derivation &&
- !(dt.collation->state & MY_CS_UNICODE))))
+ left_is_superset(this, &dt))
{
// Do nothing
}
else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
- dt.collation->state & MY_CS_UNICODE &&
- (dt.derivation < derivation ||
- (dt.derivation == derivation &&
- !(collation->state & MY_CS_UNICODE))))
+ left_is_superset(&dt, this))
{
set(dt);
}
@@ -1390,7 +1403,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
else
{
// Cannot apply conversion
- set(0, DERIVATION_NONE);
+ set(0, DERIVATION_NONE, 0);
return 1;
}
}
@@ -1412,8 +1425,8 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
{
if (derivation == DERIVATION_EXPLICIT)
{
- set(0, DERIVATION_NONE);
- return 1;
+ set(0, DERIVATION_NONE, 0);
+ return 1;
}
if (collation->state & MY_CS_BINSORT)
return 0;
@@ -1427,6 +1440,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
set(bin, DERIVATION_NONE);
}
}
+ repertoire|= dt.repertoire;
return 0;
}
@@ -1566,12 +1580,16 @@ bool agg_item_charsets(DTCollation &coll, const char *fname,
{
Item* conv;
uint32 dummy_offset;
- if (!String::needs_conversion(0, coll.collation,
- (*arg)->collation.collation,
+ if (!String::needs_conversion(0, (*arg)->collation.collation,
+ coll.collation,
&dummy_offset))
continue;
- if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
+ if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
+ ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
+ conv= new Item_func_conv_charset(*arg, coll.collation, 1);
+
+ if (!conv)
{
if (nargs >=2 && nargs <= 3)
{
diff --git a/sql/item.h b/sql/item.h
index 5b1a80a5f03..7d23bfe90de 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -49,29 +49,50 @@ class DTCollation {
public:
CHARSET_INFO *collation;
enum Derivation derivation;
+ uint repertoire;
+ void set_repertoire_from_charset(CHARSET_INFO *cs)
+ {
+ repertoire= cs->state & MY_CS_PUREASCII ?
+ MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+ }
DTCollation()
{
collation= &my_charset_bin;
derivation= DERIVATION_NONE;
+ repertoire= MY_REPERTOIRE_UNICODE30;
}
DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg)
{
collation= collation_arg;
derivation= derivation_arg;
+ set_repertoire_from_charset(collation_arg);
}
void set(DTCollation &dt)
{
collation= dt.collation;
derivation= dt.derivation;
+ repertoire= dt.repertoire;
}
void set(CHARSET_INFO *collation_arg, Derivation derivation_arg)
{
collation= collation_arg;
derivation= derivation_arg;
+ set_repertoire_from_charset(collation_arg);
+ }
+ void set(CHARSET_INFO *collation_arg,
+ Derivation derivation_arg,
+ uint repertoire_arg)
+ {
+ collation= collation_arg;
+ derivation= derivation_arg;
+ repertoire= repertoire_arg;
}
void set(CHARSET_INFO *collation_arg)
- { collation= collation_arg; }
+ {
+ collation= collation_arg;
+ set_repertoire_from_charset(collation_arg);
+ }
void set(Derivation derivation_arg)
{ derivation= derivation_arg; }
bool aggregate(DTCollation &dt, uint flags= 0);
@@ -1662,10 +1683,11 @@ class Item_string :public Item
{
public:
Item_string(const char *str,uint length,
- CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+ CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+ uint repertoire= MY_REPERTOIRE_UNICODE30)
{
- collation.set(cs, dv);
- str_value.set_or_copy_aligned(str,length,cs);
+ str_value.set_or_copy_aligned(str, length, cs);
+ collation.set(cs, dv, repertoire);
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
@@ -1689,10 +1711,11 @@ public:
fixed= 1;
}
Item_string(const char *name_par, const char *str, uint length,
- CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+ CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+ uint repertoire= MY_REPERTOIRE_UNICODE30)
{
- collation.set(cs, dv);
- str_value.set_or_copy_aligned(str,length,cs);
+ str_value.set_or_copy_aligned(str, length, cs);
+ collation.set(cs, dv, repertoire);
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par, 0, cs);
decimals=NOT_FIXED_DEC;
@@ -1708,6 +1731,12 @@ public:
str_value.copy(str_arg, length_arg, collation.collation);
max_length= str_value.numchars() * collation.collation->mbmaxlen;
}
+ void set_repertoire_from_value()
+ {
+ collation.repertoire= my_string_repertoire(str_value.charset(),
+ str_value.ptr(),
+ str_value.length());
+ }
enum Type type() const { return STRING_ITEM; }
double val_real();
longlong val_int();
diff --git a/sql/item_func.cc b/sql/item_func.cc
index b256ce4624a..fa1352052a2 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -3775,7 +3775,7 @@ static user_var_entry *get_variable(HASH *hash, LEX_STRING &name,
entry->value=0;
entry->length=0;
entry->update_query_id=0;
- entry->collation.set(NULL, DERIVATION_IMPLICIT);
+ entry->collation.set(NULL, DERIVATION_IMPLICIT, 0);
entry->unsigned_flag= 0;
/*
If we are here, we were called from a SET or a query which sets a
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 20a4b64640a..0c11c9eece8 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -2673,7 +2673,8 @@ void Item_func_set_collation::fix_length_and_dec()
colname, args[0]->collation.collation->csname);
return;
}
- collation.set(set_collation, DERIVATION_EXPLICIT);
+ collation.set(set_collation, DERIVATION_EXPLICIT,
+ args[0]->collation.repertoire);
max_length= args[0]->max_length;
}
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 873e2833a1e..ae18e4786d7 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -1718,7 +1718,11 @@ void Item_func_date_format::fix_length_and_dec()
Item *arg1= args[1]->this_item();
decimals=0;
- collation.set(thd->variables.collation_connection);
+ CHARSET_INFO *cs= thd->variables.collation_connection;
+ uint32 repertoire= arg1->collation.repertoire;
+ if (!thd->variables.lc_time_names->is_ascii)
+ repertoire|= MY_REPERTOIRE_EXTENDED;
+ collation.set(cs, arg1->collation.derivation, repertoire);
if (arg1->type() == STRING_ITEM)
{ // Optimize the normal case
fixed_length=1;
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index c37d77345b6..4268120c54d 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -311,10 +311,12 @@ static char *get_text(Lex_input_stream *lip)
uint found_escape=0;
CHARSET_INFO *cs= lip->m_thd->charset();
+ lip->tok_bitmap= 0;
sep= yyGetLast(); // String should end with this
while (lip->ptr != lip->end_of_query)
{
- c = yyGet();
+ c= yyGet();
+ lip->tok_bitmap|= c;
#ifdef USE_MB
{
int l;
@@ -605,6 +607,7 @@ int MYSQLlex(void *arg, void *yythd)
break;
}
yylval->lex_str.length= lip->yytoklen;
+ lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(NCHAR_STRING);
case MY_LEX_IDENT_OR_HEX:
@@ -926,6 +929,7 @@ int MYSQLlex(void *arg, void *yythd)
break;
}
yylval->lex_str.length=lip->yytoklen;
+ lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(TEXT_STRING);
case MY_LEX_COMMENT: // Comment
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index bfa6c05974f..b9c6abd2b06 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -957,6 +957,9 @@ public:
/** Position of ';' in the stream, to delimit multiple queries. */
const char* found_semicolon;
+
+ /** Token character bitmaps, to detect 7bit strings. */
+ uchar tok_bitmap;
/** SQL_MODE = IGNORE_SPACE. */
bool ignore_space;
@@ -994,6 +997,7 @@ typedef struct st_lex : public Query_tables_list
gptr yacc_yyss,yacc_yyvs;
THD *thd;
CHARSET_INFO *charset, *underscore_charset;
+ bool text_string_is_7bit;
/* store original leaf_tables for INSERT SELECT and PS/SP */
TABLE_LIST *leaf_tables_insert;
/* Position (first character index) of SELECT of CREATE VIEW statement */
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 9d7df73cd7a..a87074c3359 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -263,6 +263,8 @@ bool String::needs_conversion(uint32 arg_length,
(to_cs == &my_charset_bin) ||
(to_cs == from_cs) ||
my_charset_same(from_cs, to_cs) ||
+ (my_charset_is_ascii_based(to_cs) &&
+ my_charset_is_8bit_pure_ascii(from_cs)) ||
((from_cs == &my_charset_bin) &&
(!(*offset=(arg_length % to_cs->mbminlen)))))
return FALSE;
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 6fbd521e302..c4a2e69ad9b 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -7523,18 +7523,54 @@ opt_load_data_set_spec:
/* Common definitions */
text_literal:
- TEXT_STRING_literal
- {
- THD *thd= YYTHD;
- $$ = new Item_string($1.str,$1.length,thd->variables.collation_connection);
- }
- | NCHAR_STRING
- { $$= new Item_string($1.str,$1.length,national_charset_info); }
- | UNDERSCORE_CHARSET TEXT_STRING
- { $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
- | text_literal TEXT_STRING_literal
- { ((Item_string*) $1)->append($2.str,$2.length); }
- ;
+ TEXT_STRING
+ {
+ LEX_STRING tmp;
+ THD *thd= YYTHD;
+ CHARSET_INFO *cs_con= thd->variables.collation_connection;
+ CHARSET_INFO *cs_cli= thd->variables.character_set_client;
+ uint repertoire= thd->lex->text_string_is_7bit &&
+ my_charset_is_ascii_based(cs_cli) ?
+ MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+ if (thd->charset_is_collation_connection ||
+ (repertoire == MY_REPERTOIRE_ASCII &&
+ my_charset_is_ascii_based(cs_con)))
+ tmp= $1;
+ else
+ thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli);
+ $$= new Item_string(tmp.str, tmp.length, cs_con,
+ DERIVATION_COERCIBLE, repertoire);
+ }
+ | NCHAR_STRING
+ {
+ uint repertoire= Lex->text_string_is_7bit ?
+ MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+ DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
+ $$= new Item_string($1.str, $1.length, national_charset_info,
+ DERIVATION_COERCIBLE, repertoire);
+ }
+ | UNDERSCORE_CHARSET TEXT_STRING
+ {
+ $$= new Item_string($2.str, $2.length, Lex->underscore_charset);
+ ((Item_string*) $$)->set_repertoire_from_value();
+ }
+ | text_literal TEXT_STRING_literal
+ {
+ Item_string* item= (Item_string*) $1;
+ item->append($2.str, $2.length);
+ if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED))
+ {
+ /*
+ If the string has been pure ASCII so far,
+ check the new part.
+ */
+ CHARSET_INFO *cs= YYTHD->variables.collation_connection;
+ item->collation.repertoire|= my_string_repertoire(cs,
+ $2.str,
+ $2.length);
+ }
+ }
+ ;
text_string:
TEXT_STRING_literal
@@ -7606,20 +7642,22 @@ literal:
| TRUE_SYM { $$= new Item_int((char*) "TRUE",1,1); }
| HEX_NUM { $$ = new Item_hex_string($1.str, $1.length);}
| BIN_NUM { $$= new Item_bin_string($1.str, $1.length); }
- | UNDERSCORE_CHARSET HEX_NUM
- {
- Item *tmp= new Item_hex_string($2.str, $2.length);
- /*
- it is OK only emulate fix_fieds, because we need only
+ | UNDERSCORE_CHARSET HEX_NUM
+ {
+ Item *tmp= new Item_hex_string($2.str, $2.length);
+ /*
+ it is OK only emulate fix_fieds, because we need only
value of constant
- */
- String *str= tmp ?
- tmp->quick_fix_field(), tmp->val_str((String*) 0) :
- (String*) 0;
- $$= new Item_string(str ? str->ptr() : "",
- str ? str->length() : 0,
- Lex->underscore_charset);
- }
+ */
+ String *str= tmp ?
+ tmp->quick_fix_field(), tmp->val_str((String*) 0) :
+ (String*) 0;
+ $$= new Item_string(str ? str->ptr() : "",
+ str ? str->length() : 0,
+ Lex->underscore_charset);
+ if ($$)
+ ((Item_string *) $$)->set_repertoire_from_value();
+ }
| UNDERSCORE_CHARSET BIN_NUM
{
Item *tmp= new Item_bin_string($2.str, $2.length);