Bug #6737: REGEXP gives wrong result with case sensitive collation:

- A new flag MY_CS_CSSORT was introduced for case sensitivity. - Item_func_regexp doesn't substiture ICASE not only for binary collations but for case sensitive collations as well.
author: unknown <bar@mysql.com> 2004-11-22 11:58:40 +0400
committer: unknown <bar@mysql.com> 2004-11-22 11:58:40 +0400
commit: 6d6b38c27fcac1da19a11fa07a5f02c2c22635fe (patch)
tree: 31ad3a81538d6c1313dce3f959bcd8df3a3a2998
parent: 5a7198f54d1a11ac7ef70a55c6a9b9044147df54 (diff)
download: mariadb-git-6d6b38c27fcac1da19a11fa07a5f02c2c22635fe.tar.gz
7 files changed, 51 insertions, 22 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index ddc21070547..26e285b9683 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -63,7 +63,7 @@ typedef struct unicase_info_st
 #define MY_CS_UNICODE	128    /* is a charset is full unicode   */
 #define MY_CS_READY	256    /* if a charset is initialized    */
 #define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
-
+#define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
 #define MY_CHARSET_UNDEFINED 0
 
 
diff --git a/mysql-test/r/ctype_latin1.result b/mysql-test/r/ctype_latin1.result
index a8182438ac4..355f53b63a5 100644
--- a/mysql-test/r/ctype_latin1.result
+++ b/mysql-test/r/ctype_latin1.result
@@ -296,3 +296,12 @@ FD	C3BD	FD	1
 FE	C3BE	FE	1
 FF	C3BF	FF	1
 DROP TABLE t1;
+select 'a' regexp 'A' collate latin1_general_ci;
+'a' regexp 'A' collate latin1_general_ci
+1
+select 'a' regexp 'A' collate latin1_general_cs;
+'a' regexp 'A' collate latin1_general_cs
+0
+select 'a' regexp 'A' collate latin1_bin;
+'a' regexp 'A' collate latin1_bin
+0
diff --git a/mysql-test/t/ctype_latin1.test b/mysql-test/t/ctype_latin1.test
index 14062437428..677acd9faa9 100644
--- a/mysql-test/t/ctype_latin1.test
+++ b/mysql-test/t/ctype_latin1.test
@@ -53,3 +53,10 @@ SELECT
   hex(@l:=convert(@u using latin1)),
   a=@l FROM t1;
 DROP TABLE t1;
+
+#
+# Bug #6737: REGEXP gives wrong result with case sensitive collation
+#
+select 'a' regexp 'A' collate latin1_general_ci;
+select 'a' regexp 'A' collate latin1_general_cs;
+select 'a' regexp 'A' collate latin1_bin;
diff --git a/mysys/charset.c b/mysys/charset.c
index 1388fc40c6d..cb2379f8723 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
       }
       else
       {
+        uchar *sort_order= all_charsets[cs->number]->sort_order;
         simple_cs_init_functions(all_charsets[cs->number]);
         new->mbminlen= 1;
         new->mbmaxlen= 1;
@@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
           all_charsets[cs->number]->state |= MY_CS_LOADED;
         }
         all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
+        
+        /*
+          Check if case sensitive sort order: A < a < B.
+          We need MY_CS_FLAG for regex library, and for
+          case sensitivity flag for 5.0 client protocol,
+          to support isCaseSensitive() method in JDBC driver 
+        */
+        if (sort_order && sort_order['A'] < sort_order['a'] &&
+                          sort_order['a'] < sort_order['B'])
+          all_charsets[cs->number]->state|= MY_CS_CSSORT; 
       }
     }
     else
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index c36f2d191c7..4970517de87 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
       return 0;
     }
     int error;
-    if ((error=regcomp(&preg,res->c_ptr(),
-		       (cmp_collation.collation->state & MY_CS_BINSORT) ?
-		       REG_EXTENDED | REG_NOSUB :
-		       REG_EXTENDED | REG_NOSUB | REG_ICASE,
-		       cmp_collation.collation)))
+    if ((error= regcomp(&preg,res->c_ptr(),
+                        ((cmp_collation.collation->state & MY_CS_BINSORT) ||
+                         (cmp_collation.collation->state & MY_CS_CSSORT)) ?
+                         REG_EXTENDED | REG_NOSUB :
+                         REG_EXTENDED | REG_NOSUB | REG_ICASE,
+                        cmp_collation.collation)))
     {
       (void) regerror(error,&preg,buff,sizeof(buff));
       my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
@@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int()
 	regex_compiled=0;
       }
       if (regcomp(&preg,res2->c_ptr(),
-		  (cmp_collation.collation->state & MY_CS_BINSORT) ?
-		  REG_EXTENDED | REG_NOSUB :
-		  REG_EXTENDED | REG_NOSUB | REG_ICASE,
-		  cmp_collation.collation))
+                  ((cmp_collation.collation->state & MY_CS_BINSORT) ||
+                   (cmp_collation.collation->state & MY_CS_CSSORT)) ?
+                   REG_EXTENDED | REG_NOSUB :
+                   REG_EXTENDED | REG_NOSUB | REG_ICASE,
+                   cmp_collation.collation))
       {
 	null_value=1;
 	return 0;
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 6f9e9f74d35..2177a18504e 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
 
 CHARSET_INFO my_charset_latin2_czech_ci =
 {
-    2,0,0,				/* number */
-    MY_CS_COMPILED|MY_CS_STRNXFRM,	/* state      */
-    "latin2",				/* cs name    */
-    "latin2_czech_cs",			/* name */
-    "",					/* comment    */
-    NULL,				/* tailoring */
+    2,0,0,                                      /* number    */
+    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state     */
+    "latin2",                                   /* cs name   */
+    "latin2_czech_cs",                          /* name      */
+    "",                                         /* comment   */
+    NULL,                                       /* tailoring */
     ctype_czech,
     to_lower_czech,
     to_upper_czech,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index b4dbda3e8ed..4ada3d47bf5 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
 
 CHARSET_INFO my_charset_cp1250_czech_ci =
 {
-  34,0,0,			/* number    */
-  MY_CS_COMPILED|MY_CS_STRNXFRM,		/* state     */
-  "cp1250",			/* cs name    */
-  "cp1250_czech_cs",		/* name      */
-  "",				/* comment   */
-  NULL,				/* tailoring */
+  34,0,0,                                     /* number    */
+  MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state     */
+  "cp1250",                                   /* cs name   */
+  "cp1250_czech_cs",                          /* name      */
+  "",                                         /* comment   */
+  NULL,                                       /* tailoring */
   ctype_win1250ch,
   to_lower_win1250ch,
   to_upper_win1250ch,
author	unknown <bar@mysql.com>	2004-11-22 11:58:40 +0400
committer	unknown <bar@mysql.com>	2004-11-22 11:58:40 +0400
commit	6d6b38c27fcac1da19a11fa07a5f02c2c22635fe (patch)
tree	31ad3a81538d6c1313dce3f959bcd8df3a3a2998
parent	5a7198f54d1a11ac7ef70a55c6a9b9044147df54 (diff)
download	mariadb-git-6d6b38c27fcac1da19a11fa07a5f02c2c22635fe.tar.gz