diff options
author | unknown <bar@mysql.com> | 2004-12-09 15:56:19 +0400 |
---|---|---|
committer | unknown <bar@mysql.com> | 2004-12-09 15:56:19 +0400 |
commit | 809ea73208ab3edd36cad744918a1d4d3785411a (patch) | |
tree | 8b8b980eeaba8605d61c99b0939a5ff52c9a39f5 | |
parent | e539f5bd3f4e94ea27f6ea35c3502b67c22597f2 (diff) | |
download | mariadb-git-809ea73208ab3edd36cad744918a1d4d3785411a.tar.gz |
Bugs: #7111: server crashes when regexp is used
-rw-r--r-- | mysql-test/r/ctype_uca.result | 11 | ||||
-rw-r--r-- | mysql-test/t/ctype_uca.test | 8 | ||||
-rw-r--r-- | regex/regcomp.c | 21 |
3 files changed, 38 insertions, 2 deletions
diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index cb060ad7ee4..fbe876d3b66 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -2375,3 +2375,14 @@ DROP DATABASE d1; USE test; SET character_set_server= @safe_character_set_server; SET collation_server= @safe_collation_server; +create table t1 (a varchar(1)) character set utf8 collate utf8_estonian_ci; +insert into t1 values ('A'),('B'),('C'),('a'),('b'),('c'); +select a, a regexp '[a]' from t1 order by binary a; +a a regexp '[a]' +A 1 +B 0 +C 0 +a 1 +b 0 +c 0 +drop table t1; diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 11833ba9bc7..8bca2a4b3c2 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -444,3 +444,11 @@ DROP TABLE t1; SET @test_character_set= 'utf8'; SET @test_collation= 'utf8_swedish_ci'; -- source include/ctype_common.inc + +# +# Bug 7111 server crashes when regexp is used +# +create table t1 (a varchar(1)) character set utf8 collate utf8_estonian_ci; +insert into t1 values ('A'),('B'),('C'),('a'),('b'),('c'); +select a, a regexp '[a]' from t1 order by binary a; +drop table t1; diff --git a/regex/regcomp.c b/regex/regcomp.c index 5f0351c32aa..998b39379aa 100644 --- a/regex/regcomp.c +++ b/regex/regcomp.c @@ -860,11 +860,28 @@ othercase(charset,ch) CHARSET_INFO *charset; int ch; { + /* + In MySQL some multi-byte character sets + have 'ctype' array but don't have 'to_lower' + and 'to_upper' arrays. In this case we handle + only basic latin letters a..z and A..Z. + + If 'to_lower' and 'to_upper' arrays are empty in a character set, + then my_isalpha(cs, ch) should never return TRUE for characters + other than basic latin letters. Otherwise it should be + considered as a mistake in character set definition. + */ assert(my_isalpha(charset,ch)); if (my_isupper(charset,ch)) - return(my_tolower(charset,ch)); + { + return(charset->to_lower ? my_tolower(charset,ch) : + ch - 'A' + 'a'); + } else if (my_islower(charset,ch)) - return(my_toupper(charset,ch)); + { + return(charset->to_upper ? my_toupper(charset,ch) : + ch - 'a' + 'A'); + } else /* peculiar, but could happen */ return(ch); } |