diff options
-rw-r--r-- | mysql-test/include/ctype_regex.inc | 42 | ||||
-rw-r--r-- | mysql-test/r/ctype_uca.result | 45 | ||||
-rw-r--r-- | mysql-test/r/ctype_ucs.result | 45 | ||||
-rw-r--r-- | mysql-test/r/ctype_utf8.result | 45 | ||||
-rw-r--r-- | mysql-test/r/func_regexp.result | 14 | ||||
-rw-r--r-- | mysql-test/t/ctype_uca.test | 4 | ||||
-rw-r--r-- | mysql-test/t/ctype_ucs.test | 4 | ||||
-rw-r--r-- | mysql-test/t/ctype_utf8.test | 7 | ||||
-rw-r--r-- | mysql-test/t/func_regexp.test | 23 | ||||
-rw-r--r-- | sql/item_cmpfunc.cc | 122 | ||||
-rw-r--r-- | sql/item_cmpfunc.h | 4 |
11 files changed, 283 insertions, 72 deletions
diff --git a/mysql-test/include/ctype_regex.inc b/mysql-test/include/ctype_regex.inc new file mode 100644 index 00000000000..0e6b4c41607 --- /dev/null +++ b/mysql-test/include/ctype_regex.inc @@ -0,0 +1,42 @@ +# +# To test a desired collation, set session.collation_connection to +# this collation before including this file +# + +--disable_warnings +drop table if exists t1; +--enable_warnings + +# +# Create a table with two varchar(64) null-able column, +# using current values of +# @@character_set_connection and @@collation_connection. +# + +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +delete from t1; + +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); + +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); + +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); + +select HIGH_PRIORITY s1 regexp s2 from t1; + +drop table t1; diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 889702e380c..ada7ace9696 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -2754,4 +2754,49 @@ a c ch drop table t1; +set collation_connection=ucs2_unicode_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL, + `s2` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names utf8; End for 5.0 tests diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index 023267c227c..954fdab7699 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -922,4 +922,49 @@ ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_gen select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0); ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '=' drop table t1; +set collation_connection=ucs2_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) character set ucs2 default NULL, + `s2` varchar(64) character set ucs2 default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names latin1; End of 5.0 tests diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 710cac388a5..7cdc5c265d7 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -267,6 +267,51 @@ b select * from t1 where a = 'b' and a != 'b'; a drop table t1; +set collation_connection=utf8_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) character set utf8 default NULL, + `s2` varchar(64) character set utf8 default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names utf8; set names utf8; select 'вася' rlike '[[:<:]]вася[[:>:]]'; 'вася' rlike '[[:<:]]вася[[:>:]]' diff --git a/mysql-test/r/func_regexp.result b/mysql-test/r/func_regexp.result index 584c8a9b820..752ec3a5810 100644 --- a/mysql-test/r/func_regexp.result +++ b/mysql-test/r/func_regexp.result @@ -1,5 +1,17 @@ drop table if exists t1; -create table t1 (s1 char(64),s2 char(64)); +set names latin1; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) default NULL, + `s2` varchar(64) default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; insert into t1 values('aaa','aaa'); insert into t1 values('aaa|qqq','qqq'); insert into t1 values('gheis','^[^a-dXYZ]+$'); diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 17632e7c8fc..bac3ef7d63c 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -538,4 +538,8 @@ alter table t1 convert to character set ucs2 collate ucs2_czech_ci; select * from t1 where a like 'c%'; drop table t1; +set collation_connection=ucs2_unicode_ci; +--source include/ctype_regex.inc +set names utf8; + -- echo End for 5.0 tests diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index bca3a9c3a96..3779dd50260 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -651,4 +651,8 @@ select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062); select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0); drop table t1; +set collation_connection=ucs2_general_ci; +--source include/ctype_regex.inc +set names latin1; + --echo End of 5.0 tests diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index f8eed0bae9a..927794f2d5a 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -186,6 +186,13 @@ select * from t1 where a = 'b' and a != 'b'; drop table t1; # +# Testing regexp +# +set collation_connection=utf8_general_ci; +--source include/ctype_regex.inc +set names utf8; + +# # Bug #3928 regexp [[:>:]] and UTF-8 # set names utf8; diff --git a/mysql-test/t/func_regexp.test b/mysql-test/t/func_regexp.test index 23070c71fe9..f34830f6100 100644 --- a/mysql-test/t/func_regexp.test +++ b/mysql-test/t/func_regexp.test @@ -6,28 +6,9 @@ drop table if exists t1; --enable_warnings -create table t1 (s1 char(64),s2 char(64)); +set names latin1; +--source include/ctype_regex.inc -insert into t1 values('aaa','aaa'); -insert into t1 values('aaa|qqq','qqq'); -insert into t1 values('gheis','^[^a-dXYZ]+$'); -insert into t1 values('aab','^aa?b'); -insert into t1 values('Baaan','^Ba*n'); -insert into t1 values('aaa','qqq|aaa'); -insert into t1 values('qqq','qqq|aaa'); - -insert into t1 values('bbb','qqq|aaa'); -insert into t1 values('bbb','qqq'); -insert into t1 values('aaa','aba'); - -insert into t1 values(null,'abc'); -insert into t1 values('def',null); -insert into t1 values(null,null); -insert into t1 values('ghi','ghi['); - -select HIGH_PRIORITY s1 regexp s2 from t1; - -drop table t1; # # This test a bug in regexp on Alpha diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 86eb10d50b0..51b3e8cda6b 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4226,6 +4226,51 @@ void Item_func_like::cleanup() #ifdef USE_REGEX bool +Item_func_regex::regcomp(bool send_error) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[1]->val_str(&tmp); + int error; + + if (args[1]->null_value) + return TRUE; + + if (regex_compiled) + { + if (!stringcmp(res, &prev_regexp)) + return FALSE; + prev_regexp.copy(*res); + my_regfree(&preg); + regex_compiled= 0; + } + + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) + return TRUE; + res= &conv; + } + + if ((error= my_regcomp(&preg, res->c_ptr(), + regex_lib_flags, regex_lib_charset))) + { + if (send_error) + { + (void) my_regerror(error, &preg, buff, sizeof(buff)); + my_error(ER_REGEXP_ERROR, MYF(0), buff); + } + return TRUE; + } + regex_compiled= 1; + return FALSE; +} + + +bool Item_func_regex::fix_fields(THD *thd, Item **ref) { DBUG_ASSERT(fixed == 0); @@ -4241,34 +4286,33 @@ Item_func_regex::fix_fields(THD *thd, Item **ref) if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1)) return TRUE; + regex_lib_flags= (cmp_collation.collation->state & + (MY_CS_BINSORT | MY_CS_CSSORT)) ? + REG_EXTENDED | REG_NOSUB : + REG_EXTENDED | REG_NOSUB | REG_ICASE; + /* + If the case of UCS2 and other non-ASCII character sets, + we will convert patterns and strings to UTF8. + */ + regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ? + &my_charset_utf8_general_ci : + cmp_collation.collation; + used_tables_cache=args[0]->used_tables() | args[1]->used_tables(); not_null_tables_cache= (args[0]->not_null_tables() | args[1]->not_null_tables()); const_item_cache=args[0]->const_item() && args[1]->const_item(); if (!regex_compiled && args[1]->const_item()) { - char buff[MAX_FIELD_WIDTH]; - String tmp(buff,sizeof(buff),&my_charset_bin); - String *res=args[1]->val_str(&tmp); if (args[1]->null_value) { // Will always return NULL maybe_null=1; return FALSE; } - int error; - if ((error= my_regcomp(&preg,res->c_ptr(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation))) - { - (void) my_regerror(error,&preg,buff,sizeof(buff)); - my_error(ER_REGEXP_ERROR, MYF(0), buff); + if (regcomp(TRUE)) return TRUE; - } - regex_compiled=regex_is_const=1; - maybe_null=args[0]->maybe_null; + regex_is_const= 1; + maybe_null= args[0]->maybe_null; } else maybe_null=1; @@ -4281,47 +4325,25 @@ longlong Item_func_regex::val_int() { DBUG_ASSERT(fixed == 1); char buff[MAX_FIELD_WIDTH]; - String *res, tmp(buff,sizeof(buff),&my_charset_bin); + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[0]->val_str(&tmp); - res=args[0]->val_str(&tmp); - if (args[0]->null_value) - { - null_value=1; + if ((null_value= (args[0]->null_value || + (!regex_is_const && regcomp(FALSE))))) return 0; - } - if (!regex_is_const) - { - char buff2[MAX_FIELD_WIDTH]; - String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin); - res2= args[1]->val_str(&tmp2); - if (args[1]->null_value) + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) { - null_value=1; + null_value= 1; return 0; } - if (!regex_compiled || stringcmp(res2,&prev_regexp)) - { - prev_regexp.copy(*res2); - if (regex_compiled) - { - my_regfree(&preg); - regex_compiled=0; - } - if (my_regcomp(&preg,res2->c_ptr_safe(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation)) - { - null_value=1; - return 0; - } - regex_compiled=1; - } + res= &conv; } - null_value=0; return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1; } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 8410c66b034..11851e9a4e4 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1313,6 +1313,10 @@ class Item_func_regex :public Item_bool_func bool regex_is_const; String prev_regexp; DTCollation cmp_collation; + CHARSET_INFO *regex_lib_charset; + int regex_lib_flags; + String conv; + bool regcomp(bool send_error); public: Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b), regex_compiled(0),regex_is_const(0) {} |