diff options
author | unknown <bar@bar.myoffice.izhnet.ru> | 2007-08-03 15:30:31 +0500 |
---|---|---|
committer | unknown <bar@bar.myoffice.izhnet.ru> | 2007-08-03 15:30:31 +0500 |
commit | 0d1972aa72dd98a229a2a8c8faca46383bbcc572 (patch) | |
tree | c657f898b9581b0e2a19433212ad42eca9646b98 /sql | |
parent | 607ab14cf767ed0187e0c050ed61cb4ebaf34bb7 (diff) | |
parent | 53df09a9a6a99b82e2a8869eb16737a78772b29e (diff) | |
download | mariadb-git-0d1972aa72dd98a229a2a8c8faca46383bbcc572.tar.gz |
Merge mysql.com:/home/bar/mysql-work/mysql-5.0.b28875
into mysql.com:/home/bar/mysql-work/mysql-5.0-rpl
mysql-test/r/ctype_utf8.result:
Auto merged
mysql-test/t/ctype_utf8.test:
Auto merged
sql/item.cc:
Auto merged
sql/item.h:
Auto merged
sql/item_func.cc:
Auto merged
sql/item_strfunc.cc:
Auto merged
sql/item_timefunc.cc:
Auto merged
sql/sql_lex.cc:
Auto merged
sql/sql_lex.h:
Auto merged
sql/sql_yacc.yy:
Auto merged
strings/conf_to_src.c:
Auto merged
strings/ctype-extra.c:
Auto merged
mysql-test/r/ctype_ucs.result:
After merge fix
mysql-test/t/ctype_ucs.test:
After merge fix
Diffstat (limited to 'sql')
-rw-r--r-- | sql/item.cc | 46 | ||||
-rw-r--r-- | sql/item.h | 43 | ||||
-rw-r--r-- | sql/item_func.cc | 2 | ||||
-rw-r--r-- | sql/item_strfunc.cc | 3 | ||||
-rw-r--r-- | sql/item_timefunc.cc | 6 | ||||
-rw-r--r-- | sql/sql_lex.cc | 6 | ||||
-rw-r--r-- | sql/sql_lex.h | 4 | ||||
-rw-r--r-- | sql/sql_string.cc | 2 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 88 |
9 files changed, 150 insertions, 50 deletions
diff --git a/sql/item.cc b/sql/item.cc index 2fc58eebe75..3be32aa829c 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -1296,6 +1296,25 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array, } +static bool +left_is_superset(DTCollation *left, DTCollation *right) +{ + /* Allow convert to Unicode */ + if (left->collation->state & MY_CS_UNICODE && + (left->derivation < right->derivation || + (left->derivation == right->derivation && + !(right->collation->state & MY_CS_UNICODE)))) + return TRUE; + /* Allow convert from ASCII */ + if (right->repertoire == MY_REPERTOIRE_ASCII && + (left->derivation < right->derivation || + (left->derivation == right->derivation && + !(left->repertoire == MY_REPERTOIRE_ASCII)))) + return TRUE; + /* Disallow conversion otherwise */ + return FALSE; +} + /* Aggregate two collations together taking into account their coercibility (aka derivation): @@ -1360,18 +1379,12 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags) ; // Do nothing } else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) && - collation->state & MY_CS_UNICODE && - (derivation < dt.derivation || - (derivation == dt.derivation && - !(dt.collation->state & MY_CS_UNICODE)))) + left_is_superset(this, &dt)) { // Do nothing } else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) && - dt.collation->state & MY_CS_UNICODE && - (dt.derivation < derivation || - (dt.derivation == derivation && - !(collation->state & MY_CS_UNICODE)))) + left_is_superset(&dt, this)) { set(dt); } @@ -1390,7 +1403,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags) else { // Cannot apply conversion - set(0, DERIVATION_NONE); + set(0, DERIVATION_NONE, 0); return 1; } } @@ -1412,8 +1425,8 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags) { if (derivation == DERIVATION_EXPLICIT) { - set(0, DERIVATION_NONE); - return 1; + set(0, DERIVATION_NONE, 0); + return 1; } if (collation->state & MY_CS_BINSORT) return 0; @@ -1427,6 +1440,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags) set(bin, DERIVATION_NONE); } } + repertoire|= dt.repertoire; return 0; } @@ -1566,12 +1580,16 @@ bool agg_item_charsets(DTCollation &coll, const char *fname, { Item* conv; uint32 dummy_offset; - if (!String::needs_conversion(0, coll.collation, - (*arg)->collation.collation, + if (!String::needs_conversion(0, (*arg)->collation.collation, + coll.collation, &dummy_offset)) continue; - if (!(conv= (*arg)->safe_charset_converter(coll.collation))) + if (!(conv= (*arg)->safe_charset_converter(coll.collation)) && + ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII)) + conv= new Item_func_conv_charset(*arg, coll.collation, 1); + + if (!conv) { if (nargs >=2 && nargs <= 3) { diff --git a/sql/item.h b/sql/item.h index 5b1a80a5f03..7d23bfe90de 100644 --- a/sql/item.h +++ b/sql/item.h @@ -49,29 +49,50 @@ class DTCollation { public: CHARSET_INFO *collation; enum Derivation derivation; + uint repertoire; + void set_repertoire_from_charset(CHARSET_INFO *cs) + { + repertoire= cs->state & MY_CS_PUREASCII ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } DTCollation() { collation= &my_charset_bin; derivation= DERIVATION_NONE; + repertoire= MY_REPERTOIRE_UNICODE30; } DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg) { collation= collation_arg; derivation= derivation_arg; + set_repertoire_from_charset(collation_arg); } void set(DTCollation &dt) { collation= dt.collation; derivation= dt.derivation; + repertoire= dt.repertoire; } void set(CHARSET_INFO *collation_arg, Derivation derivation_arg) { collation= collation_arg; derivation= derivation_arg; + set_repertoire_from_charset(collation_arg); + } + void set(CHARSET_INFO *collation_arg, + Derivation derivation_arg, + uint repertoire_arg) + { + collation= collation_arg; + derivation= derivation_arg; + repertoire= repertoire_arg; } void set(CHARSET_INFO *collation_arg) - { collation= collation_arg; } + { + collation= collation_arg; + set_repertoire_from_charset(collation_arg); + } void set(Derivation derivation_arg) { derivation= derivation_arg; } bool aggregate(DTCollation &dt, uint flags= 0); @@ -1662,10 +1683,11 @@ class Item_string :public Item { public: Item_string(const char *str,uint length, - CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) + CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE, + uint repertoire= MY_REPERTOIRE_UNICODE30) { - collation.set(cs, dv); - str_value.set_or_copy_aligned(str,length,cs); + str_value.set_or_copy_aligned(str, length, cs); + collation.set(cs, dv, repertoire); /* We have to have a different max_length than 'length' here to ensure that we get the right length if we do use the item @@ -1689,10 +1711,11 @@ public: fixed= 1; } Item_string(const char *name_par, const char *str, uint length, - CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) + CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE, + uint repertoire= MY_REPERTOIRE_UNICODE30) { - collation.set(cs, dv); - str_value.set_or_copy_aligned(str,length,cs); + str_value.set_or_copy_aligned(str, length, cs); + collation.set(cs, dv, repertoire); max_length= str_value.numchars()*cs->mbmaxlen; set_name(name_par, 0, cs); decimals=NOT_FIXED_DEC; @@ -1708,6 +1731,12 @@ public: str_value.copy(str_arg, length_arg, collation.collation); max_length= str_value.numchars() * collation.collation->mbmaxlen; } + void set_repertoire_from_value() + { + collation.repertoire= my_string_repertoire(str_value.charset(), + str_value.ptr(), + str_value.length()); + } enum Type type() const { return STRING_ITEM; } double val_real(); longlong val_int(); diff --git a/sql/item_func.cc b/sql/item_func.cc index b256ce4624a..fa1352052a2 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -3775,7 +3775,7 @@ static user_var_entry *get_variable(HASH *hash, LEX_STRING &name, entry->value=0; entry->length=0; entry->update_query_id=0; - entry->collation.set(NULL, DERIVATION_IMPLICIT); + entry->collation.set(NULL, DERIVATION_IMPLICIT, 0); entry->unsigned_flag= 0; /* If we are here, we were called from a SET or a query which sets a diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 20a4b64640a..0c11c9eece8 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -2673,7 +2673,8 @@ void Item_func_set_collation::fix_length_and_dec() colname, args[0]->collation.collation->csname); return; } - collation.set(set_collation, DERIVATION_EXPLICIT); + collation.set(set_collation, DERIVATION_EXPLICIT, + args[0]->collation.repertoire); max_length= args[0]->max_length; } diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 873e2833a1e..ae18e4786d7 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -1718,7 +1718,11 @@ void Item_func_date_format::fix_length_and_dec() Item *arg1= args[1]->this_item(); decimals=0; - collation.set(thd->variables.collation_connection); + CHARSET_INFO *cs= thd->variables.collation_connection; + uint32 repertoire= arg1->collation.repertoire; + if (!thd->variables.lc_time_names->is_ascii) + repertoire|= MY_REPERTOIRE_EXTENDED; + collation.set(cs, arg1->collation.derivation, repertoire); if (arg1->type() == STRING_ITEM) { // Optimize the normal case fixed_length=1; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index c37d77345b6..4268120c54d 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -311,10 +311,12 @@ static char *get_text(Lex_input_stream *lip) uint found_escape=0; CHARSET_INFO *cs= lip->m_thd->charset(); + lip->tok_bitmap= 0; sep= yyGetLast(); // String should end with this while (lip->ptr != lip->end_of_query) { - c = yyGet(); + c= yyGet(); + lip->tok_bitmap|= c; #ifdef USE_MB { int l; @@ -605,6 +607,7 @@ int MYSQLlex(void *arg, void *yythd) break; } yylval->lex_str.length= lip->yytoklen; + lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1; return(NCHAR_STRING); case MY_LEX_IDENT_OR_HEX: @@ -926,6 +929,7 @@ int MYSQLlex(void *arg, void *yythd) break; } yylval->lex_str.length=lip->yytoklen; + lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1; return(TEXT_STRING); case MY_LEX_COMMENT: // Comment diff --git a/sql/sql_lex.h b/sql/sql_lex.h index bfa6c05974f..b9c6abd2b06 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -957,6 +957,9 @@ public: /** Position of ';' in the stream, to delimit multiple queries. */ const char* found_semicolon; + + /** Token character bitmaps, to detect 7bit strings. */ + uchar tok_bitmap; /** SQL_MODE = IGNORE_SPACE. */ bool ignore_space; @@ -994,6 +997,7 @@ typedef struct st_lex : public Query_tables_list gptr yacc_yyss,yacc_yyvs; THD *thd; CHARSET_INFO *charset, *underscore_charset; + bool text_string_is_7bit; /* store original leaf_tables for INSERT SELECT and PS/SP */ TABLE_LIST *leaf_tables_insert; /* Position (first character index) of SELECT of CREATE VIEW statement */ diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 9d7df73cd7a..a87074c3359 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -263,6 +263,8 @@ bool String::needs_conversion(uint32 arg_length, (to_cs == &my_charset_bin) || (to_cs == from_cs) || my_charset_same(from_cs, to_cs) || + (my_charset_is_ascii_based(to_cs) && + my_charset_is_8bit_pure_ascii(from_cs)) || ((from_cs == &my_charset_bin) && (!(*offset=(arg_length % to_cs->mbminlen))))) return FALSE; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 6fbd521e302..c4a2e69ad9b 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -7523,18 +7523,54 @@ opt_load_data_set_spec: /* Common definitions */ text_literal: - TEXT_STRING_literal - { - THD *thd= YYTHD; - $$ = new Item_string($1.str,$1.length,thd->variables.collation_connection); - } - | NCHAR_STRING - { $$= new Item_string($1.str,$1.length,national_charset_info); } - | UNDERSCORE_CHARSET TEXT_STRING - { $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); } - | text_literal TEXT_STRING_literal - { ((Item_string*) $1)->append($2.str,$2.length); } - ; + TEXT_STRING + { + LEX_STRING tmp; + THD *thd= YYTHD; + CHARSET_INFO *cs_con= thd->variables.collation_connection; + CHARSET_INFO *cs_cli= thd->variables.character_set_client; + uint repertoire= thd->lex->text_string_is_7bit && + my_charset_is_ascii_based(cs_cli) ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + if (thd->charset_is_collation_connection || + (repertoire == MY_REPERTOIRE_ASCII && + my_charset_is_ascii_based(cs_con))) + tmp= $1; + else + thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli); + $$= new Item_string(tmp.str, tmp.length, cs_con, + DERIVATION_COERCIBLE, repertoire); + } + | NCHAR_STRING + { + uint repertoire= Lex->text_string_is_7bit ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info)); + $$= new Item_string($1.str, $1.length, national_charset_info, + DERIVATION_COERCIBLE, repertoire); + } + | UNDERSCORE_CHARSET TEXT_STRING + { + $$= new Item_string($2.str, $2.length, Lex->underscore_charset); + ((Item_string*) $$)->set_repertoire_from_value(); + } + | text_literal TEXT_STRING_literal + { + Item_string* item= (Item_string*) $1; + item->append($2.str, $2.length); + if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED)) + { + /* + If the string has been pure ASCII so far, + check the new part. + */ + CHARSET_INFO *cs= YYTHD->variables.collation_connection; + item->collation.repertoire|= my_string_repertoire(cs, + $2.str, + $2.length); + } + } + ; text_string: TEXT_STRING_literal @@ -7606,20 +7642,22 @@ literal: | TRUE_SYM { $$= new Item_int((char*) "TRUE",1,1); } | HEX_NUM { $$ = new Item_hex_string($1.str, $1.length);} | BIN_NUM { $$= new Item_bin_string($1.str, $1.length); } - | UNDERSCORE_CHARSET HEX_NUM - { - Item *tmp= new Item_hex_string($2.str, $2.length); - /* - it is OK only emulate fix_fieds, because we need only + | UNDERSCORE_CHARSET HEX_NUM + { + Item *tmp= new Item_hex_string($2.str, $2.length); + /* + it is OK only emulate fix_fieds, because we need only value of constant - */ - String *str= tmp ? - tmp->quick_fix_field(), tmp->val_str((String*) 0) : - (String*) 0; - $$= new Item_string(str ? str->ptr() : "", - str ? str->length() : 0, - Lex->underscore_charset); - } + */ + String *str= tmp ? + tmp->quick_fix_field(), tmp->val_str((String*) 0) : + (String*) 0; + $$= new Item_string(str ? str->ptr() : "", + str ? str->length() : 0, + Lex->underscore_charset); + if ($$) + ((Item_string *) $$)->set_repertoire_from_value(); + } | UNDERSCORE_CHARSET BIN_NUM { Item *tmp= new Item_bin_string($2.str, $2.length); |