diff options
| author | Alexander Barkov <bar@mnogosearch.org> | 2013-09-26 18:02:17 +0400 |
|---|---|---|
| committer | Alexander Barkov <bar@mnogosearch.org> | 2013-09-26 18:02:17 +0400 |
| commit | 285e7aa179a6081531be3274772b89e8989fd107 (patch) | |
| tree | 71836ea6e49f48fff6e957bcef38628e7b979001 /mysql-test/t/func_regexp_pcre.test | |
| parent | 9d83468e78ba23f024ce3c11443913ad75cf1ea5 (diff) | |
| download | mariadb-git-285e7aa179a6081531be3274772b89e8989fd107.tar.gz | |
MDEV-4425 REGEXP enhancements
Diffstat (limited to 'mysql-test/t/func_regexp_pcre.test')
| -rw-r--r-- | mysql-test/t/func_regexp_pcre.test | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/mysql-test/t/func_regexp_pcre.test b/mysql-test/t/func_regexp_pcre.test new file mode 100644 index 00000000000..6da050e1824 --- /dev/null +++ b/mysql-test/t/func_regexp_pcre.test @@ -0,0 +1,331 @@ + +SET NAMES utf8; + +--echo # +--echo # MDEV-4425 REGEXP enhancements +--echo # + +--echo # +--echo # Checking RLIKE +--echo # + +# Checking that à is a single character +SELECT 'à' RLIKE '^.$'; + +# Checking \x{FFFF} syntax and case sensitivity +SELECT 'à' RLIKE '\\x{00E0}'; +SELECT 'À' RLIKE '\\x{00E0}'; +SELECT 'à' RLIKE '\\x{00C0}'; +SELECT 'À' RLIKE '\\x{00C0}'; +SELECT 'à' RLIKE '\\x{00E0}' COLLATE utf8_bin; +SELECT 'À' RLIKE '\\x{00E0}' COLLATE utf8_bin; +SELECT 'à' RLIKE '\\x{00C0}' COLLATE utf8_bin; +SELECT 'À' RLIKE '\\x{00C0}' COLLATE utf8_bin; + +# Checking Unicode character classes +CREATE TABLE t1 (ch VARCHAR(22)) CHARACTER SET utf8; +CREATE TABLE t2 (class VARCHAR(32)) CHARACTER SET utf8; +INSERT INTO t1 VALUES ('Я'),('Σ'),('A'),('À'); +INSERT INTO t1 VALUES ('я'),('σ'),('a'),('à'); +INSERT INTO t1 VALUES ('㐗'),('갷'),('ප'); +INSERT INTO t1 VALUES ('1'),('௨'); +INSERT INTO t2 VALUES ('\\p{Cyrillic}'),('\\p{Greek}'),('\\p{Latin}'); +INSERT INTO t2 VALUES ('\\p{Han}'),('\\p{Hangul}'); +INSERT INTO t2 VALUES ('\\p{Sinhala}'), ('\\p{Tamil}'); +INSERT INTO t2 VALUES ('\\p{L}'),('\\p{Ll}'),('\\p{Lu}'),('\\p{L&}'); +INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]'); +SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch; +DROP TABLE t1, t2; + +# newline character +SELECT '\n' RLIKE '(*CR)'; +SELECT '\n' RLIKE '(*LF)'; +SELECT '\n' RLIKE '(*CRLF)'; +SELECT '\n' RLIKE '(*ANYCRLF)'; +SELECT '\n' RLIKE '(*ANY)'; + +SELECT 'a\nb' RLIKE '(*LF)(?m)^a$'; +SELECT 'a\nb' RLIKE '(*CR)(?m)^a$'; +SELECT 'a\nb' RLIKE '(*CRLF)(?m)^a$'; +SELECT 'a\nb' RLIKE '(*ANYCRLF)(?m)^a$'; + +SELECT 'a\rb' RLIKE '(*LF)(?m)^a$'; +SELECT 'a\rb' RLIKE '(*CR)(?m)^a$'; +SELECT 'a\rb' RLIKE '(*CRLF)(?m)^a$'; +SELECT 'a\rb' RLIKE '(*ANYCRLF)(?m)^a$'; + +SELECT 'a\r\nb' RLIKE '(*LF)(?m)^a$'; +SELECT 'a\r\nb' RLIKE '(*CR)(?m)^a$'; +SELECT 'a\r\nb' RLIKE '(*CRLF)(?m)^a$'; +SELECT 'a\r\nb' RLIKE '(*ANYCRLF)(?m)^a$'; + +#backreference +SELECT 'aa' RLIKE '(a)\\g1'; +SELECT 'aa bb' RLIKE '(a)\\g1 (b)\\g2'; + +#repitition +SELECT 'aaaaa' RLIKE 'a{0,5}'; +SELECT 'aaaaa' RLIKE 'a{1,3}'; +SELECT 'aaaaa' RLIKE 'a{0,}'; +SELECT 'aaaaa' RLIKE 'a{10,20}'; + +#Recursion +SELECT 'aabb' RLIKE 'a(?R)?b'; +SELECT 'aabb' RLIKE 'aa(?R)?bb'; + +#subroutine +#SELECT 'abbbc' RLIKE '(a(b|(?1))*c)'; +#SELECT 'abca' RLIKE '([abc])(?1){3}'; + +#Atomic grouping +SELECT 'abcc' RLIKE 'a(?>bc|b)c'; +SELECT 'abc' RLIKE 'a(?>bc|b)c'; + +#lookahead - negative +SELECT 'ab' RLIKE 'a(?!b)'; +SELECT 'ac' RLIKE 'a(?!b)'; + +#lookahead - positive +SELECT 'ab' RLIKE 'a(?=b)'; +SELECT 'ac' RLIKE 'a(?=b)'; + +#lookbehind - negative +SELECT 'ab' RLIKE '(?<!a)b'; +SELECT 'cb' RLIKE '(?<!a)b'; + +#lookbehind - positive +SELECT 'ab' RLIKE '(?<=a)b'; +SELECT 'cb' RLIKE '(?<=a)b'; + +# named subpatterns +SELECT 'aa' RLIKE '(?P<pattern>a)(?P=pattern)'; +SELECT 'aba' RLIKE '(?P<pattern>a)b(?P=pattern)'; + +#comments +SELECT 'a' RLIKE 'a(?#comment)'; +SELECT 'aa' RLIKE 'a(?#comment)a'; +SELECT 'aba' RLIKE 'a(?#b)a'; + +#ungreedy maching +#SELECT 'ddd <ab>cc</ab> eee' RLIKE '<.+?>'; + +#Extended character classes +SELECT 'aaa' RLIKE '\\W\\W\\W'; +SELECT '%' RLIKE '\\W'; +SELECT '%a$' RLIKE '\\W.\\W'; + +SELECT '123' RLIKE '\\d\\d\\d'; +SELECT 'aaa' RLIKE '\\d\\d\\d'; +SELECT '1a3' RLIKE '\\d.\\d'; +SELECT 'a1b' RLIKE '\\d.\\d'; + +SELECT '8' RLIKE '\\D'; +SELECT 'a' RLIKE '\\D'; +SELECT '%' RLIKE '\\D'; +SELECT 'a1' RLIKE '\\D\\d'; +SELECT 'a1' RLIKE '\\d\\D'; + +SELECT '\t' RLIKE '\\s'; +SELECT '\r' RLIKE '\\s'; +SELECT '\n' RLIKE '\\s'; +SELECT '\v' RLIKE '\\s'; + +SELECT 'a' RLIKE '\\S'; +SELECT '1' RLIKE '\\S'; +SELECT '!' RLIKE '\\S'; +SELECT '.' RLIKE '\\S'; + +--echo # +--echo # Checking REGEXP_REPLACE +--echo # + +# Check data type +CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x'); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# Check print() +EXPLAIN EXTENDED SELECT REGEXP_REPLACE('abc','b','x'); + +# Check decimals +CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x')+0; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# Return NULL if any of the arguments are NULL +SELECT REGEXP_REPLACE(NULL,'b','c'); +SELECT REGEXP_REPLACE('a',NULL,'c'); +SELECT REGEXP_REPLACE('a','b',NULL); + +# Return the original string if no match +SELECT REGEXP_REPLACE('a','x','b'); + +# Return the original string for an empty pattern +SELECT REGEXP_REPLACE('a','','b'); + +# Check that replace stops on the first empty match +# 'a5b' matches the pattern and '5' is replaced to 'x' +# then 'ab' matches the pattern, but the match '5*' is empty, +# so replacing stops here. +SELECT REGEXP_REPLACE('a5b ab a5b','(?<=a)5*(?=b)','x'); + +# A modified version of the previous example, +# to check that all matches are replaced if no empty match is met. +SELECT REGEXP_REPLACE('a5b a5b a5b','(?<=a)5*(?=b)','x'); + + +# Check that case sensitiviry respects the collation +SELECT REGEXP_REPLACE('A','a','b'); +SELECT REGEXP_REPLACE('a','A','b'); +SELECT REGEXP_REPLACE('A' COLLATE utf8_bin,'a','b'); +SELECT REGEXP_REPLACE('a' COLLATE utf8_bin,'A','b'); + +# Pattern references in the "replace" string +SELECT REGEXP_REPLACE('James Bond', '(.*) (.*)', '\\2, \\1 \\2'); + +# Checking with UTF8 +SELECT REGEXP_REPLACE('абвгд','в','ц'); + +# Check that it does not treat binary strings as UTF8 +SELECT REGEXP_REPLACE('г',0xB3,0xB4); + +# Check that it replaces all matches by default +SELECT REGEXP_REPLACE('aaaa','a','b'); + +# Replace all matches except the first letter +SELECT REGEXP_REPLACE('aaaa','(?<=.)a','b'); + +# Replace all matches except the last letter +SELECT REGEXP_REPLACE('aaaa','a(?=.)','b'); + +# Replace all matches except the first and the last letter +SELECT REGEXP_REPLACE('aaaa','(?<=.)a(?=.)','b'); + +# newline character +SELECT REGEXP_REPLACE('a\nb','(*LF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\nb','(*CR)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\nb','(*CRLF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\nb','(*ANYCRLF)(?m)^a$','c'); + +SELECT REGEXP_REPLACE('a\rb','(*LF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\rb','(*CR)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\rb','(*CRLF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\rb','(*ANYCRLF)(?m)^a$','c'); + +SELECT REGEXP_REPLACE('a\r\nb','(*LF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\r\nb','(*CR)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\r\nb','(*CRLF)(?m)^a$','c'); +SELECT REGEXP_REPLACE('a\r\nb','(*ANYCRLF)(?m)^a$','c'); + +#backreference +SELECT REGEXP_REPLACE('aa','(a)\\g1','b'); +SELECT REGEXP_REPLACE('aa bb','(a)\\g1 (b)\\g2','c'); + +#repitition +SELECT REGEXP_REPLACE('aaaaa','a{1,3}','b'); +SELECT REGEXP_REPLACE('aaaaa','a{10,20}','b'); + +#Recursion +SELECT REGEXP_REPLACE('daabbd','a(?R)?b','c'); +SELECT REGEXP_REPLACE('daabbd','aa(?R)?bb','c'); + +#Atomic grouping +SELECT REGEXP_REPLACE('dabccd','a(?>bc|b)c','e'); +SELECT REGEXP_REPLACE('dabcd','a(?>bc|b)c','e'); + +#lookahead - negative +SELECT REGEXP_REPLACE('ab','a(?!b)','e'); +SELECT REGEXP_REPLACE('ac','a(?!b)','e'); + +#lookahead - positive +SELECT REGEXP_REPLACE('ab','a(?=b)','e'); +SELECT REGEXP_REPLACE('ac','a(?=b)','e'); + +#lookbehind - negative +SELECT REGEXP_REPLACE('ab','(?<!a)b','e'); +SELECT REGEXP_REPLACE('cb','(?<!a)b','e'); + +#lookbehind - positive +SELECT REGEXP_REPLACE('ab','(?<=a)b','e'); +SELECT REGEXP_REPLACE('cb','(?<=a)b','e'); + +# named subpatterns +SELECT REGEXP_REPLACE('aa','(?P<pattern>a)(?P=pattern)','b'); +SELECT REGEXP_REPLACE('aba','(?P<pattern>a)b(?P=pattern)','c'); + +#comments +SELECT REGEXP_REPLACE('a','a(?#comment)','e'); +SELECT REGEXP_REPLACE('aa','a(?#comment)a','e'); +SELECT REGEXP_REPLACE('aba','a(?#b)a','e'); + +#ungreedy maching +SELECT REGEXP_REPLACE('ddd<ab>cc</ab>eee','<.+?>','*'); + +#Extended character classes +SELECT REGEXP_REPLACE('aaa','\\W\\W\\W','e'); +SELECT REGEXP_REPLACE('aaa','\\w\\w\\w','e'); +SELECT REGEXP_REPLACE('%','\\W','e'); +SELECT REGEXP_REPLACE('%a$','\\W.\\W','e'); +SELECT REGEXP_REPLACE('%a$','\\W\\w\\W','e'); + +SELECT REGEXP_REPLACE('123','\\d\\d\\d\\d\\d\\d','e'); +SELECT REGEXP_REPLACE('123','\\d\\d\\d','e'); +SELECT REGEXP_REPLACE('aaa','\\d\\d\\d','e'); +SELECT REGEXP_REPLACE('1a3','\\d.\\d\\d.\\d','e'); +SELECT REGEXP_REPLACE('1a3','\\d.\\d','e'); +SELECT REGEXP_REPLACE('a1b','\\d.\\d','e'); + +SELECT REGEXP_REPLACE('8','\\D','e'); +SELECT REGEXP_REPLACE('a','\\D','e'); +SELECT REGEXP_REPLACE('%','\\D','e'); +SELECT REGEXP_REPLACE('a1','\\D\\d','e'); +SELECT REGEXP_REPLACE('a1','\\d\\D','e'); + +SELECT REGEXP_REPLACE('\t','\\s','e'); +SELECT REGEXP_REPLACE('\r','\\s','e'); +SELECT REGEXP_REPLACE('\n','\\s','e'); + +SELECT REGEXP_REPLACE('a','\\S','e'); +SELECT REGEXP_REPLACE('1','\\S','e'); +SELECT REGEXP_REPLACE('!','\\S','e'); +SELECT REGEXP_REPLACE('.','\\S','e'); + +--echo # +--echo # Checking REGEXP_INSTR +--echo # +SELECT REGEXP_INSTR('abcd','X'); +SELECT REGEXP_INSTR('abcd','a'); +SELECT REGEXP_INSTR('abcd','b'); +SELECT REGEXP_INSTR('abcd','c'); +SELECT REGEXP_INSTR('abcd','d'); +SELECT REGEXP_INSTR('aaaa','(?<=a)a'); + +SELECT REGEXP_INSTR('вася','в'); +SELECT REGEXP_INSTR('вася','а'); +SELECT REGEXP_INSTR('вася','с'); +SELECT REGEXP_INSTR('вася','я'); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('в' USING koi8r)); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('а' USING koi8r)); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('с' USING koi8r)); +SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('я' USING koi8r)); + + +--echo # +--echo # Checking REGEXP_SUBSTR +--echo # + +# Check data type +CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b'); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +# Check print() +EXPLAIN EXTENDED SELECT REGEXP_SUBSTR('abc','b'); + +# Check decimals +CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b')+0; +SHOW CREATE TABLE t1; +DROP TABLE t1; + + +SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*'); |
