summaryrefslogtreecommitdiff
path: root/mysql-test/t/func_regexp_pcre.test
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mnogosearch.org>2013-09-26 18:02:17 +0400
committerAlexander Barkov <bar@mnogosearch.org>2013-09-26 18:02:17 +0400
commit285e7aa179a6081531be3274772b89e8989fd107 (patch)
tree71836ea6e49f48fff6e957bcef38628e7b979001 /mysql-test/t/func_regexp_pcre.test
parent9d83468e78ba23f024ce3c11443913ad75cf1ea5 (diff)
downloadmariadb-git-285e7aa179a6081531be3274772b89e8989fd107.tar.gz
MDEV-4425 REGEXP enhancements
Diffstat (limited to 'mysql-test/t/func_regexp_pcre.test')
-rw-r--r--mysql-test/t/func_regexp_pcre.test331
1 files changed, 331 insertions, 0 deletions
diff --git a/mysql-test/t/func_regexp_pcre.test b/mysql-test/t/func_regexp_pcre.test
new file mode 100644
index 00000000000..6da050e1824
--- /dev/null
+++ b/mysql-test/t/func_regexp_pcre.test
@@ -0,0 +1,331 @@
+
+SET NAMES utf8;
+
+--echo #
+--echo # MDEV-4425 REGEXP enhancements
+--echo #
+
+--echo #
+--echo # Checking RLIKE
+--echo #
+
+# Checking that à is a single character
+SELECT 'à' RLIKE '^.$';
+
+# Checking \x{FFFF} syntax and case sensitivity
+SELECT 'à' RLIKE '\\x{00E0}';
+SELECT 'À' RLIKE '\\x{00E0}';
+SELECT 'à' RLIKE '\\x{00C0}';
+SELECT 'À' RLIKE '\\x{00C0}';
+SELECT 'à' RLIKE '\\x{00E0}' COLLATE utf8_bin;
+SELECT 'À' RLIKE '\\x{00E0}' COLLATE utf8_bin;
+SELECT 'à' RLIKE '\\x{00C0}' COLLATE utf8_bin;
+SELECT 'À' RLIKE '\\x{00C0}' COLLATE utf8_bin;
+
+# Checking Unicode character classes
+CREATE TABLE t1 (ch VARCHAR(22)) CHARACTER SET utf8;
+CREATE TABLE t2 (class VARCHAR(32)) CHARACTER SET utf8;
+INSERT INTO t1 VALUES ('Я'),('Σ'),('A'),('À');
+INSERT INTO t1 VALUES ('я'),('σ'),('a'),('à');
+INSERT INTO t1 VALUES ('㐗'),('갷'),('ප');
+INSERT INTO t1 VALUES ('1'),('௨');
+INSERT INTO t2 VALUES ('\\p{Cyrillic}'),('\\p{Greek}'),('\\p{Latin}');
+INSERT INTO t2 VALUES ('\\p{Han}'),('\\p{Hangul}');
+INSERT INTO t2 VALUES ('\\p{Sinhala}'), ('\\p{Tamil}');
+INSERT INTO t2 VALUES ('\\p{L}'),('\\p{Ll}'),('\\p{Lu}'),('\\p{L&}');
+INSERT INTO t2 VALUES ('[[:alpha:]]'),('[[:digit:]]');
+SELECT class, ch, ch RLIKE class FROM t1, t2 ORDER BY class, BINARY ch;
+DROP TABLE t1, t2;
+
+# newline character
+SELECT '\n' RLIKE '(*CR)';
+SELECT '\n' RLIKE '(*LF)';
+SELECT '\n' RLIKE '(*CRLF)';
+SELECT '\n' RLIKE '(*ANYCRLF)';
+SELECT '\n' RLIKE '(*ANY)';
+
+SELECT 'a\nb' RLIKE '(*LF)(?m)^a$';
+SELECT 'a\nb' RLIKE '(*CR)(?m)^a$';
+SELECT 'a\nb' RLIKE '(*CRLF)(?m)^a$';
+SELECT 'a\nb' RLIKE '(*ANYCRLF)(?m)^a$';
+
+SELECT 'a\rb' RLIKE '(*LF)(?m)^a$';
+SELECT 'a\rb' RLIKE '(*CR)(?m)^a$';
+SELECT 'a\rb' RLIKE '(*CRLF)(?m)^a$';
+SELECT 'a\rb' RLIKE '(*ANYCRLF)(?m)^a$';
+
+SELECT 'a\r\nb' RLIKE '(*LF)(?m)^a$';
+SELECT 'a\r\nb' RLIKE '(*CR)(?m)^a$';
+SELECT 'a\r\nb' RLIKE '(*CRLF)(?m)^a$';
+SELECT 'a\r\nb' RLIKE '(*ANYCRLF)(?m)^a$';
+
+#backreference
+SELECT 'aa' RLIKE '(a)\\g1';
+SELECT 'aa bb' RLIKE '(a)\\g1 (b)\\g2';
+
+#repitition
+SELECT 'aaaaa' RLIKE 'a{0,5}';
+SELECT 'aaaaa' RLIKE 'a{1,3}';
+SELECT 'aaaaa' RLIKE 'a{0,}';
+SELECT 'aaaaa' RLIKE 'a{10,20}';
+
+#Recursion
+SELECT 'aabb' RLIKE 'a(?R)?b';
+SELECT 'aabb' RLIKE 'aa(?R)?bb';
+
+#subroutine
+#SELECT 'abbbc' RLIKE '(a(b|(?1))*c)';
+#SELECT 'abca' RLIKE '([abc])(?1){3}';
+
+#Atomic grouping
+SELECT 'abcc' RLIKE 'a(?>bc|b)c';
+SELECT 'abc' RLIKE 'a(?>bc|b)c';
+
+#lookahead - negative
+SELECT 'ab' RLIKE 'a(?!b)';
+SELECT 'ac' RLIKE 'a(?!b)';
+
+#lookahead - positive
+SELECT 'ab' RLIKE 'a(?=b)';
+SELECT 'ac' RLIKE 'a(?=b)';
+
+#lookbehind - negative
+SELECT 'ab' RLIKE '(?<!a)b';
+SELECT 'cb' RLIKE '(?<!a)b';
+
+#lookbehind - positive
+SELECT 'ab' RLIKE '(?<=a)b';
+SELECT 'cb' RLIKE '(?<=a)b';
+
+# named subpatterns
+SELECT 'aa' RLIKE '(?P<pattern>a)(?P=pattern)';
+SELECT 'aba' RLIKE '(?P<pattern>a)b(?P=pattern)';
+
+#comments
+SELECT 'a' RLIKE 'a(?#comment)';
+SELECT 'aa' RLIKE 'a(?#comment)a';
+SELECT 'aba' RLIKE 'a(?#b)a';
+
+#ungreedy maching
+#SELECT 'ddd <ab>cc</ab> eee' RLIKE '<.+?>';
+
+#Extended character classes
+SELECT 'aaa' RLIKE '\\W\\W\\W';
+SELECT '%' RLIKE '\\W';
+SELECT '%a$' RLIKE '\\W.\\W';
+
+SELECT '123' RLIKE '\\d\\d\\d';
+SELECT 'aaa' RLIKE '\\d\\d\\d';
+SELECT '1a3' RLIKE '\\d.\\d';
+SELECT 'a1b' RLIKE '\\d.\\d';
+
+SELECT '8' RLIKE '\\D';
+SELECT 'a' RLIKE '\\D';
+SELECT '%' RLIKE '\\D';
+SELECT 'a1' RLIKE '\\D\\d';
+SELECT 'a1' RLIKE '\\d\\D';
+
+SELECT '\t' RLIKE '\\s';
+SELECT '\r' RLIKE '\\s';
+SELECT '\n' RLIKE '\\s';
+SELECT '\v' RLIKE '\\s';
+
+SELECT 'a' RLIKE '\\S';
+SELECT '1' RLIKE '\\S';
+SELECT '!' RLIKE '\\S';
+SELECT '.' RLIKE '\\S';
+
+--echo #
+--echo # Checking REGEXP_REPLACE
+--echo #
+
+# Check data type
+CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x');
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+# Check print()
+EXPLAIN EXTENDED SELECT REGEXP_REPLACE('abc','b','x');
+
+# Check decimals
+CREATE TABLE t1 AS SELECT REGEXP_REPLACE('abc','b','x')+0;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+# Return NULL if any of the arguments are NULL
+SELECT REGEXP_REPLACE(NULL,'b','c');
+SELECT REGEXP_REPLACE('a',NULL,'c');
+SELECT REGEXP_REPLACE('a','b',NULL);
+
+# Return the original string if no match
+SELECT REGEXP_REPLACE('a','x','b');
+
+# Return the original string for an empty pattern
+SELECT REGEXP_REPLACE('a','','b');
+
+# Check that replace stops on the first empty match
+# 'a5b' matches the pattern and '5' is replaced to 'x'
+# then 'ab' matches the pattern, but the match '5*' is empty,
+# so replacing stops here.
+SELECT REGEXP_REPLACE('a5b ab a5b','(?<=a)5*(?=b)','x');
+
+# A modified version of the previous example,
+# to check that all matches are replaced if no empty match is met.
+SELECT REGEXP_REPLACE('a5b a5b a5b','(?<=a)5*(?=b)','x');
+
+
+# Check that case sensitiviry respects the collation
+SELECT REGEXP_REPLACE('A','a','b');
+SELECT REGEXP_REPLACE('a','A','b');
+SELECT REGEXP_REPLACE('A' COLLATE utf8_bin,'a','b');
+SELECT REGEXP_REPLACE('a' COLLATE utf8_bin,'A','b');
+
+# Pattern references in the "replace" string
+SELECT REGEXP_REPLACE('James Bond', '(.*) (.*)', '\\2, \\1 \\2');
+
+# Checking with UTF8
+SELECT REGEXP_REPLACE('абвгд','в','ц');
+
+# Check that it does not treat binary strings as UTF8
+SELECT REGEXP_REPLACE('г',0xB3,0xB4);
+
+# Check that it replaces all matches by default
+SELECT REGEXP_REPLACE('aaaa','a','b');
+
+# Replace all matches except the first letter
+SELECT REGEXP_REPLACE('aaaa','(?<=.)a','b');
+
+# Replace all matches except the last letter
+SELECT REGEXP_REPLACE('aaaa','a(?=.)','b');
+
+# Replace all matches except the first and the last letter
+SELECT REGEXP_REPLACE('aaaa','(?<=.)a(?=.)','b');
+
+# newline character
+SELECT REGEXP_REPLACE('a\nb','(*LF)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\nb','(*CR)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\nb','(*CRLF)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\nb','(*ANYCRLF)(?m)^a$','c');
+
+SELECT REGEXP_REPLACE('a\rb','(*LF)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\rb','(*CR)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\rb','(*CRLF)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\rb','(*ANYCRLF)(?m)^a$','c');
+
+SELECT REGEXP_REPLACE('a\r\nb','(*LF)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\r\nb','(*CR)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\r\nb','(*CRLF)(?m)^a$','c');
+SELECT REGEXP_REPLACE('a\r\nb','(*ANYCRLF)(?m)^a$','c');
+
+#backreference
+SELECT REGEXP_REPLACE('aa','(a)\\g1','b');
+SELECT REGEXP_REPLACE('aa bb','(a)\\g1 (b)\\g2','c');
+
+#repitition
+SELECT REGEXP_REPLACE('aaaaa','a{1,3}','b');
+SELECT REGEXP_REPLACE('aaaaa','a{10,20}','b');
+
+#Recursion
+SELECT REGEXP_REPLACE('daabbd','a(?R)?b','c');
+SELECT REGEXP_REPLACE('daabbd','aa(?R)?bb','c');
+
+#Atomic grouping
+SELECT REGEXP_REPLACE('dabccd','a(?>bc|b)c','e');
+SELECT REGEXP_REPLACE('dabcd','a(?>bc|b)c','e');
+
+#lookahead - negative
+SELECT REGEXP_REPLACE('ab','a(?!b)','e');
+SELECT REGEXP_REPLACE('ac','a(?!b)','e');
+
+#lookahead - positive
+SELECT REGEXP_REPLACE('ab','a(?=b)','e');
+SELECT REGEXP_REPLACE('ac','a(?=b)','e');
+
+#lookbehind - negative
+SELECT REGEXP_REPLACE('ab','(?<!a)b','e');
+SELECT REGEXP_REPLACE('cb','(?<!a)b','e');
+
+#lookbehind - positive
+SELECT REGEXP_REPLACE('ab','(?<=a)b','e');
+SELECT REGEXP_REPLACE('cb','(?<=a)b','e');
+
+# named subpatterns
+SELECT REGEXP_REPLACE('aa','(?P<pattern>a)(?P=pattern)','b');
+SELECT REGEXP_REPLACE('aba','(?P<pattern>a)b(?P=pattern)','c');
+
+#comments
+SELECT REGEXP_REPLACE('a','a(?#comment)','e');
+SELECT REGEXP_REPLACE('aa','a(?#comment)a','e');
+SELECT REGEXP_REPLACE('aba','a(?#b)a','e');
+
+#ungreedy maching
+SELECT REGEXP_REPLACE('ddd<ab>cc</ab>eee','<.+?>','*');
+
+#Extended character classes
+SELECT REGEXP_REPLACE('aaa','\\W\\W\\W','e');
+SELECT REGEXP_REPLACE('aaa','\\w\\w\\w','e');
+SELECT REGEXP_REPLACE('%','\\W','e');
+SELECT REGEXP_REPLACE('%a$','\\W.\\W','e');
+SELECT REGEXP_REPLACE('%a$','\\W\\w\\W','e');
+
+SELECT REGEXP_REPLACE('123','\\d\\d\\d\\d\\d\\d','e');
+SELECT REGEXP_REPLACE('123','\\d\\d\\d','e');
+SELECT REGEXP_REPLACE('aaa','\\d\\d\\d','e');
+SELECT REGEXP_REPLACE('1a3','\\d.\\d\\d.\\d','e');
+SELECT REGEXP_REPLACE('1a3','\\d.\\d','e');
+SELECT REGEXP_REPLACE('a1b','\\d.\\d','e');
+
+SELECT REGEXP_REPLACE('8','\\D','e');
+SELECT REGEXP_REPLACE('a','\\D','e');
+SELECT REGEXP_REPLACE('%','\\D','e');
+SELECT REGEXP_REPLACE('a1','\\D\\d','e');
+SELECT REGEXP_REPLACE('a1','\\d\\D','e');
+
+SELECT REGEXP_REPLACE('\t','\\s','e');
+SELECT REGEXP_REPLACE('\r','\\s','e');
+SELECT REGEXP_REPLACE('\n','\\s','e');
+
+SELECT REGEXP_REPLACE('a','\\S','e');
+SELECT REGEXP_REPLACE('1','\\S','e');
+SELECT REGEXP_REPLACE('!','\\S','e');
+SELECT REGEXP_REPLACE('.','\\S','e');
+
+--echo #
+--echo # Checking REGEXP_INSTR
+--echo #
+SELECT REGEXP_INSTR('abcd','X');
+SELECT REGEXP_INSTR('abcd','a');
+SELECT REGEXP_INSTR('abcd','b');
+SELECT REGEXP_INSTR('abcd','c');
+SELECT REGEXP_INSTR('abcd','d');
+SELECT REGEXP_INSTR('aaaa','(?<=a)a');
+
+SELECT REGEXP_INSTR('вася','в');
+SELECT REGEXP_INSTR('вася','а');
+SELECT REGEXP_INSTR('вася','с');
+SELECT REGEXP_INSTR('вася','я');
+SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('в' USING koi8r));
+SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('а' USING koi8r));
+SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('с' USING koi8r));
+SELECT REGEXP_INSTR(CONVERT('вася' USING koi8r), CONVERT('я' USING koi8r));
+
+
+--echo #
+--echo # Checking REGEXP_SUBSTR
+--echo #
+
+# Check data type
+CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b');
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+# Check print()
+EXPLAIN EXTENDED SELECT REGEXP_SUBSTR('abc','b');
+
+# Check decimals
+CREATE TABLE t1 AS SELECT REGEXP_SUBSTR('abc','b')+0;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+
+SELECT REGEXP_SUBSTR('See https://mariadb.org/en/foundation/ for details', 'https?://[^/]*');