Merge mysql.com:/home/bar/mysql-work/mysql-5.0.b28875

into mysql.com:/home/bar/mysql-work/mysql-5.0-rpl mysql-test/r/ctype_utf8.result: Auto merged mysql-test/t/ctype_utf8.test: Auto merged sql/item.cc: Auto merged sql/item.h: Auto merged sql/item_func.cc: Auto merged sql/item_strfunc.cc: Auto merged sql/item_timefunc.cc: Auto merged sql/sql_lex.cc: Auto merged sql/sql_lex.h: Auto merged sql/sql_yacc.yy: Auto merged strings/conf_to_src.c: Auto merged strings/ctype-extra.c: Auto merged mysql-test/r/ctype_ucs.result: After merge fix mysql-test/t/ctype_ucs.test: After merge fix
author: unknown <bar@bar.myoffice.izhnet.ru> 2007-08-03 15:30:31 +0500
committer: unknown <bar@bar.myoffice.izhnet.ru> 2007-08-03 15:30:31 +0500
commit: 0d1972aa72dd98a229a2a8c8faca46383bbcc572 (patch)
tree: c657f898b9581b0e2a19433212ad42eca9646b98 /sql
parent: 607ab14cf767ed0187e0c050ed61cb4ebaf34bb7 (diff)
parent: 53df09a9a6a99b82e2a8869eb16737a78772b29e (diff)
download: mariadb-git-0d1972aa72dd98a229a2a8c8faca46383bbcc572.tar.gz
9 files changed, 150 insertions, 50 deletions
diff --git a/sql/item.cc b/sql/item.cc
index 2fc58eebe75..3be32aa829c 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -1296,6 +1296,25 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
 }
 
 
+static bool
+left_is_superset(DTCollation *left, DTCollation *right)
+{
+  /* Allow convert to Unicode */
+  if (left->collation->state & MY_CS_UNICODE &&
+      (left->derivation < right->derivation ||
+       (left->derivation == right->derivation &&
+        !(right->collation->state & MY_CS_UNICODE))))
+    return TRUE;
+  /* Allow convert from ASCII */
+  if (right->repertoire == MY_REPERTOIRE_ASCII &&
+      (left->derivation < right->derivation ||
+       (left->derivation == right->derivation &&
+        !(left->repertoire == MY_REPERTOIRE_ASCII))))
+    return TRUE;
+  /* Disallow conversion otherwise */
+  return FALSE;
+}
+
 /*
    Aggregate two collations together taking
    into account their coercibility (aka derivation):
@@ -1360,18 +1379,12 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
        ; // Do nothing
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
-             collation->state & MY_CS_UNICODE &&
-             (derivation < dt.derivation ||
-             (derivation == dt.derivation &&
-             !(dt.collation->state & MY_CS_UNICODE))))
+             left_is_superset(this, &dt))
     {
       // Do nothing
     }
     else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
-             dt.collation->state & MY_CS_UNICODE &&
-             (dt.derivation < derivation ||
-              (dt.derivation == derivation &&
-             !(collation->state & MY_CS_UNICODE))))
+             left_is_superset(&dt, this))
     {
       set(dt);
     }
@@ -1390,7 +1403,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
     else
     {
       // Cannot apply conversion
-      set(0, DERIVATION_NONE);
+      set(0, DERIVATION_NONE, 0);
       return 1;
     }
   }
@@ -1412,8 +1425,8 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
     {
       if (derivation == DERIVATION_EXPLICIT)
       {
-	set(0, DERIVATION_NONE);
-	return 1;
+        set(0, DERIVATION_NONE, 0);
+        return 1;
       }
       if (collation->state & MY_CS_BINSORT)
         return 0;
@@ -1427,6 +1440,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
       set(bin, DERIVATION_NONE);
     }
   }
+  repertoire|= dt.repertoire;
   return 0;
 }
 
@@ -1566,12 +1580,16 @@ bool agg_item_charsets(DTCollation &coll, const char *fname,
   {
     Item* conv;
     uint32 dummy_offset;
-    if (!String::needs_conversion(0, coll.collation,
-                                  (*arg)->collation.collation,
+    if (!String::needs_conversion(0, (*arg)->collation.collation,
+                                  coll.collation,
                                   &dummy_offset))
       continue;
 
-    if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
+    if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
+        ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
+      conv= new Item_func_conv_charset(*arg, coll.collation, 1);
+
+    if (!conv)
     {
       if (nargs >=2 && nargs <= 3)
       {
diff --git a/sql/item.h b/sql/item.h
index 5b1a80a5f03..7d23bfe90de 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -49,29 +49,50 @@ class DTCollation {
 public:
   CHARSET_INFO     *collation;
   enum Derivation derivation;
+  uint repertoire;
   
+  void set_repertoire_from_charset(CHARSET_INFO *cs)
+  {
+    repertoire= cs->state & MY_CS_PUREASCII ?
+                MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+  }
   DTCollation()
   {
     collation= &my_charset_bin;
     derivation= DERIVATION_NONE;
+    repertoire= MY_REPERTOIRE_UNICODE30;
   }
   DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg)
   {
     collation= collation_arg;
     derivation= derivation_arg;
+    set_repertoire_from_charset(collation_arg);
   }
   void set(DTCollation &dt)
   { 
     collation= dt.collation;
     derivation= dt.derivation;
+    repertoire= dt.repertoire;
   }
   void set(CHARSET_INFO *collation_arg, Derivation derivation_arg)
   {
     collation= collation_arg;
     derivation= derivation_arg;
+    set_repertoire_from_charset(collation_arg);
+  }
+  void set(CHARSET_INFO *collation_arg,
+           Derivation derivation_arg,
+           uint repertoire_arg)
+  {
+    collation= collation_arg;
+    derivation= derivation_arg;
+    repertoire= repertoire_arg;
   }
   void set(CHARSET_INFO *collation_arg)
-  { collation= collation_arg; }
+  {
+    collation= collation_arg;
+    set_repertoire_from_charset(collation_arg);
+  }
   void set(Derivation derivation_arg)
   { derivation= derivation_arg; }
   bool aggregate(DTCollation &dt, uint flags= 0);
@@ -1662,10 +1683,11 @@ class Item_string :public Item
 {
 public:
   Item_string(const char *str,uint length,
-  	      CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+              CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+              uint repertoire= MY_REPERTOIRE_UNICODE30)
   {
-    collation.set(cs, dv);
-    str_value.set_or_copy_aligned(str,length,cs);
+    str_value.set_or_copy_aligned(str, length, cs);
+    collation.set(cs, dv, repertoire);
     /*
       We have to have a different max_length than 'length' here to
       ensure that we get the right length if we do use the item
@@ -1689,10 +1711,11 @@ public:
     fixed= 1;
   }
   Item_string(const char *name_par, const char *str, uint length,
-	      CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+              CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+              uint repertoire= MY_REPERTOIRE_UNICODE30)
   {
-    collation.set(cs, dv);
-    str_value.set_or_copy_aligned(str,length,cs);
+    str_value.set_or_copy_aligned(str, length, cs);
+    collation.set(cs, dv, repertoire);
     max_length= str_value.numchars()*cs->mbmaxlen;
     set_name(name_par, 0, cs);
     decimals=NOT_FIXED_DEC;
@@ -1708,6 +1731,12 @@ public:
     str_value.copy(str_arg, length_arg, collation.collation);
     max_length= str_value.numchars() * collation.collation->mbmaxlen;
   }
+  void set_repertoire_from_value()
+  {
+    collation.repertoire= my_string_repertoire(str_value.charset(),
+                                               str_value.ptr(),
+                                               str_value.length());
+  }
   enum Type type() const { return STRING_ITEM; }
   double val_real();
   longlong val_int();
diff --git a/sql/item_func.cc b/sql/item_func.cc
index b256ce4624a..fa1352052a2 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -3775,7 +3775,7 @@ static user_var_entry *get_variable(HASH *hash, LEX_STRING &name,
     entry->value=0;
     entry->length=0;
     entry->update_query_id=0;
-    entry->collation.set(NULL, DERIVATION_IMPLICIT);
+    entry->collation.set(NULL, DERIVATION_IMPLICIT, 0);
     entry->unsigned_flag= 0;
     /*
       If we are here, we were called from a SET or a query which sets a
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 20a4b64640a..0c11c9eece8 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -2673,7 +2673,8 @@ void Item_func_set_collation::fix_length_and_dec()
              colname, args[0]->collation.collation->csname);
     return;
   }
-  collation.set(set_collation, DERIVATION_EXPLICIT);
+  collation.set(set_collation, DERIVATION_EXPLICIT,
+                args[0]->collation.repertoire);
   max_length= args[0]->max_length;
 }
 
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 873e2833a1e..ae18e4786d7 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -1718,7 +1718,11 @@ void Item_func_date_format::fix_length_and_dec()
   Item *arg1= args[1]->this_item();
 
   decimals=0;
-  collation.set(thd->variables.collation_connection);
+  CHARSET_INFO *cs= thd->variables.collation_connection;
+  uint32 repertoire= arg1->collation.repertoire;
+  if (!thd->variables.lc_time_names->is_ascii)
+    repertoire|= MY_REPERTOIRE_EXTENDED;
+  collation.set(cs, arg1->collation.derivation, repertoire);
   if (arg1->type() == STRING_ITEM)
   {						// Optimize the normal case
     fixed_length=1;
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index c37d77345b6..4268120c54d 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -311,10 +311,12 @@ static char *get_text(Lex_input_stream *lip)
   uint found_escape=0;
   CHARSET_INFO *cs= lip->m_thd->charset();
 
+  lip->tok_bitmap= 0;
   sep= yyGetLast();			// String should end with this
   while (lip->ptr != lip->end_of_query)
   {
-    c = yyGet();
+    c= yyGet();
+    lip->tok_bitmap|= c;
 #ifdef USE_MB
     {
       int l;
@@ -605,6 +607,7 @@ int MYSQLlex(void *arg, void *yythd)
 	break;
       }
       yylval->lex_str.length= lip->yytoklen;
+      lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
       return(NCHAR_STRING);
 
     case MY_LEX_IDENT_OR_HEX:
@@ -926,6 +929,7 @@ int MYSQLlex(void *arg, void *yythd)
 	break;
       }
       yylval->lex_str.length=lip->yytoklen;
+      lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
       return(TEXT_STRING);
 
     case MY_LEX_COMMENT:			//  Comment
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index bfa6c05974f..b9c6abd2b06 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -957,6 +957,9 @@ public:
 
   /** Position of ';' in the stream, to delimit multiple queries. */
   const char* found_semicolon;
+  
+  /** Token character bitmaps, to detect 7bit strings. */
+  uchar tok_bitmap;
 
   /** SQL_MODE = IGNORE_SPACE. */
   bool ignore_space;
@@ -994,6 +997,7 @@ typedef struct st_lex : public Query_tables_list
   gptr yacc_yyss,yacc_yyvs;
   THD *thd;
   CHARSET_INFO *charset, *underscore_charset;
+  bool text_string_is_7bit;
   /* store original leaf_tables for INSERT SELECT and PS/SP */
   TABLE_LIST *leaf_tables_insert;
   /* Position (first character index) of SELECT of CREATE VIEW statement */
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 9d7df73cd7a..a87074c3359 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -263,6 +263,8 @@ bool String::needs_conversion(uint32 arg_length,
       (to_cs == &my_charset_bin) || 
       (to_cs == from_cs) ||
       my_charset_same(from_cs, to_cs) ||
+      (my_charset_is_ascii_based(to_cs) &&
+       my_charset_is_8bit_pure_ascii(from_cs)) ||
       ((from_cs == &my_charset_bin) &&
        (!(*offset=(arg_length % to_cs->mbminlen)))))
     return FALSE;
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 6fbd521e302..c4a2e69ad9b 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -7523,18 +7523,54 @@ opt_load_data_set_spec:
 /* Common definitions */
 
 text_literal:
-	TEXT_STRING_literal
-	{
-	  THD *thd= YYTHD;
-	  $$ = new Item_string($1.str,$1.length,thd->variables.collation_connection);
-	}
-	| NCHAR_STRING
-	{ $$=  new Item_string($1.str,$1.length,national_charset_info); }
-	| UNDERSCORE_CHARSET TEXT_STRING
-	  { $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
-	| text_literal TEXT_STRING_literal
-	  { ((Item_string*) $1)->append($2.str,$2.length); }
-	;
+        TEXT_STRING
+        {
+          LEX_STRING tmp;
+          THD *thd= YYTHD;
+          CHARSET_INFO *cs_con= thd->variables.collation_connection;
+          CHARSET_INFO *cs_cli= thd->variables.character_set_client;
+          uint repertoire= thd->lex->text_string_is_7bit &&
+                             my_charset_is_ascii_based(cs_cli) ?
+                           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+          if (thd->charset_is_collation_connection ||
+              (repertoire == MY_REPERTOIRE_ASCII &&
+               my_charset_is_ascii_based(cs_con)))
+            tmp= $1;
+          else
+            thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli);
+          $$= new Item_string(tmp.str, tmp.length, cs_con,
+                              DERIVATION_COERCIBLE, repertoire);
+        }
+        | NCHAR_STRING
+        {
+          uint repertoire= Lex->text_string_is_7bit ?
+                           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+          DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
+          $$= new Item_string($1.str, $1.length, national_charset_info,
+                              DERIVATION_COERCIBLE, repertoire);
+        }
+        | UNDERSCORE_CHARSET TEXT_STRING
+          {
+            $$= new Item_string($2.str, $2.length, Lex->underscore_charset);
+            ((Item_string*) $$)->set_repertoire_from_value();
+          }
+        | text_literal TEXT_STRING_literal
+          {
+            Item_string* item= (Item_string*) $1;
+            item->append($2.str, $2.length);
+            if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED))
+            {
+              /*
+                 If the string has been pure ASCII so far,
+                 check the new part.
+              */
+              CHARSET_INFO *cs= YYTHD->variables.collation_connection;
+              item->collation.repertoire|= my_string_repertoire(cs,
+                                                                $2.str,
+                                                                $2.length);
+            }
+          }
+        ;
 
 text_string:
 	TEXT_STRING_literal
@@ -7606,20 +7642,22 @@ literal:
 	| TRUE_SYM	{ $$= new Item_int((char*) "TRUE",1,1); }
 	| HEX_NUM	{ $$ =	new Item_hex_string($1.str, $1.length);}
 	| BIN_NUM	{ $$= new Item_bin_string($1.str, $1.length); }
-	| UNDERSCORE_CHARSET HEX_NUM
-	  {
-	    Item *tmp= new Item_hex_string($2.str, $2.length);
-	    /*
-	      it is OK only emulate fix_fieds, because we need only
+        | UNDERSCORE_CHARSET HEX_NUM
+          {
+            Item *tmp= new Item_hex_string($2.str, $2.length);
+            /*
+              it is OK only emulate fix_fieds, because we need only
               value of constant
-	    */
-	    String *str= tmp ?
-	      tmp->quick_fix_field(), tmp->val_str((String*) 0) :
-	      (String*) 0;
-	    $$= new Item_string(str ? str->ptr() : "",
-				str ? str->length() : 0,
-				Lex->underscore_charset);
-	  }
+            */
+            String *str= tmp ?
+              tmp->quick_fix_field(), tmp->val_str((String*) 0) :
+              (String*) 0;
+            $$= new Item_string(str ? str->ptr() : "",
+                                str ? str->length() : 0,
+                                Lex->underscore_charset);
+            if ($$)
+              ((Item_string *) $$)->set_repertoire_from_value();
+          }
 	| UNDERSCORE_CHARSET BIN_NUM
           {
 	    Item *tmp= new Item_bin_string($2.str, $2.length);
author	unknown <bar@bar.myoffice.izhnet.ru>	2007-08-03 15:30:31 +0500
committer	unknown <bar@bar.myoffice.izhnet.ru>	2007-08-03 15:30:31 +0500
commit	0d1972aa72dd98a229a2a8c8faca46383bbcc572 (patch)
tree	c657f898b9581b0e2a19433212ad42eca9646b98 /sql
parent	607ab14cf767ed0187e0c050ed61cb4ebaf34bb7 (diff)
parent	53df09a9a6a99b82e2a8869eb16737a78772b29e (diff)
download	mariadb-git-0d1972aa72dd98a229a2a8c8faca46383bbcc572.tar.gz