summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <bar@mysql.com>2004-11-22 11:58:40 +0400
committerunknown <bar@mysql.com>2004-11-22 11:58:40 +0400
commit6d6b38c27fcac1da19a11fa07a5f02c2c22635fe (patch)
tree31ad3a81538d6c1313dce3f959bcd8df3a3a2998
parent5a7198f54d1a11ac7ef70a55c6a9b9044147df54 (diff)
downloadmariadb-git-6d6b38c27fcac1da19a11fa07a5f02c2c22635fe.tar.gz
Bug #6737: REGEXP gives wrong result with case sensitive collation:
- A new flag MY_CS_CSSORT was introduced for case sensitivity. - Item_func_regexp doesn't substiture ICASE not only for binary collations but for case sensitive collations as well.
-rw-r--r--include/m_ctype.h2
-rw-r--r--mysql-test/r/ctype_latin1.result9
-rw-r--r--mysql-test/t/ctype_latin1.test7
-rw-r--r--mysys/charset.c11
-rw-r--r--sql/item_cmpfunc.cc20
-rw-r--r--strings/ctype-czech.c12
-rw-r--r--strings/ctype-win1250ch.c12
7 files changed, 51 insertions, 22 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index ddc21070547..26e285b9683 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -63,7 +63,7 @@ typedef struct unicase_info_st
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
-
+#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CHARSET_UNDEFINED 0
diff --git a/mysql-test/r/ctype_latin1.result b/mysql-test/r/ctype_latin1.result
index a8182438ac4..355f53b63a5 100644
--- a/mysql-test/r/ctype_latin1.result
+++ b/mysql-test/r/ctype_latin1.result
@@ -296,3 +296,12 @@ FD C3BD FD 1
FE C3BE FE 1
FF C3BF FF 1
DROP TABLE t1;
+select 'a' regexp 'A' collate latin1_general_ci;
+'a' regexp 'A' collate latin1_general_ci
+1
+select 'a' regexp 'A' collate latin1_general_cs;
+'a' regexp 'A' collate latin1_general_cs
+0
+select 'a' regexp 'A' collate latin1_bin;
+'a' regexp 'A' collate latin1_bin
+0
diff --git a/mysql-test/t/ctype_latin1.test b/mysql-test/t/ctype_latin1.test
index 14062437428..677acd9faa9 100644
--- a/mysql-test/t/ctype_latin1.test
+++ b/mysql-test/t/ctype_latin1.test
@@ -53,3 +53,10 @@ SELECT
hex(@l:=convert(@u using latin1)),
a=@l FROM t1;
DROP TABLE t1;
+
+#
+# Bug #6737: REGEXP gives wrong result with case sensitive collation
+#
+select 'a' regexp 'A' collate latin1_general_ci;
+select 'a' regexp 'A' collate latin1_general_cs;
+select 'a' regexp 'A' collate latin1_bin;
diff --git a/mysys/charset.c b/mysys/charset.c
index 1388fc40c6d..cb2379f8723 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
}
else
{
+ uchar *sort_order= all_charsets[cs->number]->sort_order;
simple_cs_init_functions(all_charsets[cs->number]);
new->mbminlen= 1;
new->mbmaxlen= 1;
@@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
all_charsets[cs->number]->state |= MY_CS_LOADED;
}
all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
+
+ /*
+ Check if case sensitive sort order: A < a < B.
+ We need MY_CS_FLAG for regex library, and for
+ case sensitivity flag for 5.0 client protocol,
+ to support isCaseSensitive() method in JDBC driver
+ */
+ if (sort_order && sort_order['A'] < sort_order['a'] &&
+ sort_order['a'] < sort_order['B'])
+ all_charsets[cs->number]->state|= MY_CS_CSSORT;
}
}
else
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index c36f2d191c7..4970517de87 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
return 0;
}
int error;
- if ((error=regcomp(&preg,res->c_ptr(),
- (cmp_collation.collation->state & MY_CS_BINSORT) ?
- REG_EXTENDED | REG_NOSUB :
- REG_EXTENDED | REG_NOSUB | REG_ICASE,
- cmp_collation.collation)))
+ if ((error= regcomp(&preg,res->c_ptr(),
+ ((cmp_collation.collation->state & MY_CS_BINSORT) ||
+ (cmp_collation.collation->state & MY_CS_CSSORT)) ?
+ REG_EXTENDED | REG_NOSUB :
+ REG_EXTENDED | REG_NOSUB | REG_ICASE,
+ cmp_collation.collation)))
{
(void) regerror(error,&preg,buff,sizeof(buff));
my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
@@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int()
regex_compiled=0;
}
if (regcomp(&preg,res2->c_ptr(),
- (cmp_collation.collation->state & MY_CS_BINSORT) ?
- REG_EXTENDED | REG_NOSUB :
- REG_EXTENDED | REG_NOSUB | REG_ICASE,
- cmp_collation.collation))
+ ((cmp_collation.collation->state & MY_CS_BINSORT) ||
+ (cmp_collation.collation->state & MY_CS_CSSORT)) ?
+ REG_EXTENDED | REG_NOSUB :
+ REG_EXTENDED | REG_NOSUB | REG_ICASE,
+ cmp_collation.collation))
{
null_value=1;
return 0;
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 6f9e9f74d35..2177a18504e 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
CHARSET_INFO my_charset_latin2_czech_ci =
{
- 2,0,0, /* number */
- MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
- "latin2", /* cs name */
- "latin2_czech_cs", /* name */
- "", /* comment */
- NULL, /* tailoring */
+ 2,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
+ "latin2", /* cs name */
+ "latin2_czech_cs", /* name */
+ "", /* comment */
+ NULL, /* tailoring */
ctype_czech,
to_lower_czech,
to_upper_czech,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index b4dbda3e8ed..4ada3d47bf5 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
CHARSET_INFO my_charset_cp1250_czech_ci =
{
- 34,0,0, /* number */
- MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
- "cp1250", /* cs name */
- "cp1250_czech_cs", /* name */
- "", /* comment */
- NULL, /* tailoring */
+ 34,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
+ "cp1250", /* cs name */
+ "cp1250_czech_cs", /* name */
+ "", /* comment */
+ NULL, /* tailoring */
ctype_win1250ch,
to_lower_win1250ch,
to_upper_win1250ch,