diff options
author | unknown <bar@mysql.com> | 2004-11-22 11:58:40 +0400 |
---|---|---|
committer | unknown <bar@mysql.com> | 2004-11-22 11:58:40 +0400 |
commit | 6d6b38c27fcac1da19a11fa07a5f02c2c22635fe (patch) | |
tree | 31ad3a81538d6c1313dce3f959bcd8df3a3a2998 | |
parent | 5a7198f54d1a11ac7ef70a55c6a9b9044147df54 (diff) | |
download | mariadb-git-6d6b38c27fcac1da19a11fa07a5f02c2c22635fe.tar.gz |
Bug #6737: REGEXP gives wrong result with case sensitive collation:
- A new flag MY_CS_CSSORT was introduced for case sensitivity.
- Item_func_regexp doesn't substiture ICASE not only
for binary collations but for case sensitive collations as well.
-rw-r--r-- | include/m_ctype.h | 2 | ||||
-rw-r--r-- | mysql-test/r/ctype_latin1.result | 9 | ||||
-rw-r--r-- | mysql-test/t/ctype_latin1.test | 7 | ||||
-rw-r--r-- | mysys/charset.c | 11 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 20 | ||||
-rw-r--r-- | strings/ctype-czech.c | 12 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 12 |
7 files changed, 51 insertions, 22 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index ddc21070547..26e285b9683 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -63,7 +63,7 @@ typedef struct unicase_info_st #define MY_CS_UNICODE 128 /* is a charset is full unicode */ #define MY_CS_READY 256 /* if a charset is initialized */ #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ - +#define MY_CS_CSSORT 1024 /* if case sensitive sort order */ #define MY_CHARSET_UNDEFINED 0 diff --git a/mysql-test/r/ctype_latin1.result b/mysql-test/r/ctype_latin1.result index a8182438ac4..355f53b63a5 100644 --- a/mysql-test/r/ctype_latin1.result +++ b/mysql-test/r/ctype_latin1.result @@ -296,3 +296,12 @@ FD C3BD FD 1 FE C3BE FE 1 FF C3BF FF 1 DROP TABLE t1; +select 'a' regexp 'A' collate latin1_general_ci; +'a' regexp 'A' collate latin1_general_ci +1 +select 'a' regexp 'A' collate latin1_general_cs; +'a' regexp 'A' collate latin1_general_cs +0 +select 'a' regexp 'A' collate latin1_bin; +'a' regexp 'A' collate latin1_bin +0 diff --git a/mysql-test/t/ctype_latin1.test b/mysql-test/t/ctype_latin1.test index 14062437428..677acd9faa9 100644 --- a/mysql-test/t/ctype_latin1.test +++ b/mysql-test/t/ctype_latin1.test @@ -53,3 +53,10 @@ SELECT hex(@l:=convert(@u using latin1)), a=@l FROM t1; DROP TABLE t1; + +# +# Bug #6737: REGEXP gives wrong result with case sensitive collation +# +select 'a' regexp 'A' collate latin1_general_ci; +select 'a' regexp 'A' collate latin1_general_cs; +select 'a' regexp 'A' collate latin1_bin; diff --git a/mysys/charset.c b/mysys/charset.c index 1388fc40c6d..cb2379f8723 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs) } else { + uchar *sort_order= all_charsets[cs->number]->sort_order; simple_cs_init_functions(all_charsets[cs->number]); new->mbminlen= 1; new->mbmaxlen= 1; @@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs) all_charsets[cs->number]->state |= MY_CS_LOADED; } all_charsets[cs->number]->state|= MY_CS_AVAILABLE; + + /* + Check if case sensitive sort order: A < a < B. + We need MY_CS_FLAG for regex library, and for + case sensitivity flag for 5.0 client protocol, + to support isCaseSensitive() method in JDBC driver + */ + if (sort_order && sort_order['A'] < sort_order['a'] && + sort_order['a'] < sort_order['B']) + all_charsets[cs->number]->state|= MY_CS_CSSORT; } } else diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index c36f2d191c7..4970517de87 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref) return 0; } int error; - if ((error=regcomp(&preg,res->c_ptr(), - (cmp_collation.collation->state & MY_CS_BINSORT) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE, - cmp_collation.collation))) + if ((error= regcomp(&preg,res->c_ptr(), + ((cmp_collation.collation->state & MY_CS_BINSORT) || + (cmp_collation.collation->state & MY_CS_CSSORT)) ? + REG_EXTENDED | REG_NOSUB : + REG_EXTENDED | REG_NOSUB | REG_ICASE, + cmp_collation.collation))) { (void) regerror(error,&preg,buff,sizeof(buff)); my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff); @@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int() regex_compiled=0; } if (regcomp(&preg,res2->c_ptr(), - (cmp_collation.collation->state & MY_CS_BINSORT) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE, - cmp_collation.collation)) + ((cmp_collation.collation->state & MY_CS_BINSORT) || + (cmp_collation.collation->state & MY_CS_CSSORT)) ? + REG_EXTENDED | REG_NOSUB : + REG_EXTENDED | REG_NOSUB | REG_ICASE, + cmp_collation.collation)) { null_value=1; return 0; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index 6f9e9f74d35..2177a18504e 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = CHARSET_INFO my_charset_latin2_czech_ci = { - 2,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */ - "latin2", /* cs name */ - "latin2_czech_cs", /* name */ - "", /* comment */ - NULL, /* tailoring */ + 2,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */ + "latin2", /* cs name */ + "latin2_czech_cs", /* name */ + "", /* comment */ + NULL, /* tailoring */ ctype_czech, to_lower_czech, to_upper_czech, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index b4dbda3e8ed..4ada3d47bf5 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler = CHARSET_INFO my_charset_cp1250_czech_ci = { - 34,0,0, /* number */ - MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */ - "cp1250", /* cs name */ - "cp1250_czech_cs", /* name */ - "", /* comment */ - NULL, /* tailoring */ + 34,0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */ + "cp1250", /* cs name */ + "cp1250_czech_cs", /* name */ + "", /* comment */ + NULL, /* tailoring */ ctype_win1250ch, to_lower_win1250ch, to_upper_win1250ch, |