diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-08-01 16:32:40 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-08-01 16:32:40 +0000 |
commit | 31c980d344bf14c20556d7949c82f7d522fb39cf (patch) | |
tree | 4a9191e08745b25d9663c81169c1a14980276180 | |
parent | 98367bb5b2578f0134a4082da196a1f9adc816ee (diff) | |
download | pcre-31c980d344bf14c20556d7949c82f7d522fb39cf.tar.gz |
Test updates.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@651 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rwxr-xr-x | RunTest | 34 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | testdata/testinput11 | 6 | ||||
-rw-r--r-- | testdata/testinput12 | 568 | ||||
-rw-r--r-- | testdata/testinput13 | 569 | ||||
-rw-r--r-- | testdata/testinput2 | 12 | ||||
-rw-r--r-- | testdata/testinput5 | 12 | ||||
-rw-r--r-- | testdata/testoutput11 | 10 | ||||
-rw-r--r-- | testdata/testoutput12 | 1276 | ||||
-rw-r--r-- | testdata/testoutput13 | 1275 | ||||
-rw-r--r-- | testdata/testoutput2 | 16 | ||||
-rw-r--r-- | testdata/testoutput5 | 16 |
12 files changed, 1910 insertions, 1886 deletions
@@ -54,6 +54,7 @@ do9=no do10=no do11=no do12=no +do13=no while [ $# -gt 0 ] ; do case $1 in @@ -69,6 +70,7 @@ while [ $# -gt 0 ] ; do 10) do10=yes;; 11) do11=yes;; 12) do12=yes;; + 13) do12=yes;; valgrind) valgrind="valgrind -q";; *) echo "Unknown test number $1"; exit 1;; esac @@ -88,6 +90,10 @@ if [ $utf8 -eq 0 ] ; then echo "Can't run test 8 because UTF-8 support is not configured" exit 1 fi + if [ $do12 = yes ] ; then + echo "Can't run test 12 because UTF-8 support is not configured" + exit 1 + fi fi if [ $ucp -eq 0 ] ; then @@ -103,7 +109,7 @@ if [ $ucp -eq 0 ] ; then echo "Can't run test 10 because Unicode property support is not configured" exit 1 fi - if [ $do12 = yes ] ; then + if [ $do13 = yes ] ; then echo "Can't run test 12 because Unicode property support is not configured" exit 1 fi @@ -132,7 +138,8 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \ if [ $utf8 -ne 0 -a $ucp -ne 0 ] ; then do9=yes; fi if [ $link_size -eq 2 -a $ucp -ne 0 ] ; then do10=yes; fi do11=yes - if [ $utf8 -ne 0 -a $ucp -ne 0 ] ; then do12=yes; fi + if [ $utf8 -ne 0 ] ; then do12=yes; fi + if [ $utf8 -ne 0 -a $ucp -ne 0 ] ; then do13=yes; fi fi # Show which release @@ -328,10 +335,10 @@ if [ $do10 = yes ] ; then done fi -# Test of Perl >= 5.10 features +# Test of Perl >= 5.10 features without UTF8 support if [ $do11 = yes ] ; then - echo "Test 11: Features from Perl >= 5.10" + echo "Test 11: Features from Perl >= 5.10 without UTF8 support" for opt in "" "-s"; do $valgrind ./pcretest -q $opt $testdata/testinput11 testtry if [ $? = 0 ] ; then @@ -343,10 +350,10 @@ if [ $do11 = yes ] ; then done fi -# Test non-Perl-compatible Unicode property support +# Test of Perl >= 5.10 features with UTF8 support if [ $do12 = yes ] ; then - echo "Test 12: API, internals, and non-Perl stuff for Unicode property support" + echo "Test 12: Features from Perl >= 5.10 with UTF8 support" for opt in "" "-s"; do $valgrind ./pcretest -q $opt $testdata/testinput12 testtry if [ $? = 0 ] ; then @@ -358,4 +365,19 @@ if [ $do12 = yes ] ; then done fi +# Test non-Perl-compatible Unicode property support + +if [ $do13 = yes ] ; then + echo "Test 13: API, internals, and non-Perl stuff for Unicode property support" + for opt in "" "-s"; do + $valgrind ./pcretest -q $opt $testdata/testinput13 testtry + if [ $? = 0 ] ; then + $cf $testdata/testoutput13 testtry + if [ $? != 0 ] ; then exit 1; fi + else exit 1 + fi + if [ "$opt" = "-s" ] ; then echo "OK with study" ; else echo "OK"; fi + done +fi + # End diff --git a/configure.ac b/configure.ac index 9c7537c..1b50ed6 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,7 @@ dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre_major, [8]) m4_define(pcre_minor, [13]) m4_define(pcre_prerelease, [-RC1]) -m4_define(pcre_date, [2011-04-30]) +m4_define(pcre_date, [2011-08-02]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre_version, [0:1:0]) diff --git a/testdata/testinput11 b/testdata/testinput11 index 120594e..c151406 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -670,10 +670,4 @@ however, we need the complication for Perl. ---/ name)/K abc -/a(*:a\x{1234}b)/8K - abc - -/a(*:a£b)/8K - abc - /-- End of testinput11 --/ diff --git a/testdata/testinput12 b/testdata/testinput12 index d9f6daf..3fb57a4 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -1,569 +1,7 @@ -/-- These tests for Unicode property support test PCRE's API and show some of - the compiled code. They are not Perl-compatible. --/ - -/[\p{L}]/DZ - -/[\p{^L}]/DZ - -/[\P{L}]/DZ - -/[\P{^L}]/DZ - -/[abc\p{L}\x{0660}]/8DZ - -/[\p{Nd}]/8DZ - 1234 - -/[\p{Nd}+-]+/8DZ - 1234 - 12-34 - 12+\x{661}-34 - ** Failers - abcd - -/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ - -/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ - -/AB\x{1fb0}/8DZ - -/AB\x{1fb0}/8DZi - -/[\x{105}-\x{109}]/8iDZ - \x{104} - \x{105} - \x{109} - ** Failers - \x{100} - \x{10a} - -/[z-\x{100}]/8iDZ - Z - z - \x{39c} - \x{178} - | - \x{80} - \x{ff} - \x{100} - \x{101} - ** Failers - \x{102} - Y - y - -/[z-\x{100}]/8DZi - -/(?:[\PPa*]*){8,}/ - -/[\P{Any}]/BZ - -/[\P{Any}\E]/BZ - -/(\P{Yi}+\277)/ - -/(\P{Yi}+\277)?/ - -/(?<=\P{Yi}{3}A)X/ - -/\p{Yi}+(\P{Yi}+)(?1)/ - -/(\P{Yi}{2}\277)?/ - -/[\P{Yi}A]/ - -/[\P{Yi}\P{Yi}\P{Yi}A]/ - -/[^\P{Yi}A]/ - -/[^\P{Yi}\P{Yi}\P{Yi}A]/ - -/(\P{Yi}*\277)*/ - -/(\P{Yi}*?\277)*/ - -/(\p{Yi}*+\277)*/ - -/(\P{Yi}?\277)*/ - -/(\P{Yi}??\277)*/ - -/(\p{Yi}?+\277)*/ - -/(\P{Yi}{0,3}\277)*/ - -/(\P{Yi}{0,3}?\277)*/ - -/(\p{Yi}{0,3}+\277)*/ - -/\p{Zl}{2,3}+/8BZ - \xe2\x80\xa8\xe2\x80\xa8 - \x{2028}\x{2028}\x{2028} - -/\p{Zl}/8BZ - -/\p{Lu}{3}+/8BZ - -/\pL{2}+/8BZ - -/\p{Cc}{2}+/8BZ - -/^\p{Cs}/8 - \?\x{dfff} - ** Failers - \x{09f} - -/^\p{Sc}+/8 - $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} - \x{9f2} - ** Failers - X - \x{2c2} - -/^\p{Zs}/8 - \ \ - \x{a0} - \x{1680} - \x{180e} - \x{2000} - \x{2001} - ** Failers - \x{2028} - \x{200d} - -/-- These four are here rather than in test 6 because Perl has problems with - the negative versions of the properties. --/ - -/\p{^Lu}/8i - 1234 - ** Failers - ABC - -/\P{Lu}/8i - 1234 - ** Failers - ABC - -/\p{Ll}/8i - a - Az - ** Failers - ABC - -/\p{Lu}/8i - A - a\x{10a0}B - ** Failers - a - \x{1d00} - -/[\x{c0}\x{391}]/8i - \x{c0} - \x{e0} - -/-- The next two are special cases where the lengths of the different cases of -the same character differ. The first went wrong with heap frame storage; the -second was broken in all cases. --/ - -/^\x{023a}+?(\x{0130}+)/8i - \x{023a}\x{2c65}\x{0130} - -/^\x{023a}+([^X])/8i - \x{023a}\x{2c65}X - -/\x{c0}+\x{116}+/8i - \x{c0}\x{e0}\x{116}\x{117} - -/[\x{c0}\x{116}]+/8i - \x{c0}\x{e0}\x{116}\x{117} - -/(\x{de})\1/8i - \x{de}\x{de} - \x{de}\x{fe} - \x{fe}\x{fe} - \x{fe}\x{de} - -/^\x{c0}$/8i - \x{c0} - \x{e0} - -/^\x{e0}$/8i - \x{c0} - \x{e0} - -/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE -will match it only with UCP support, because without that it has no notion -of case for anything other than the ASCII letters. --/ - -/((?i)[\x{c0}])/8 - \x{c0} - \x{e0} - -/(?i:[\x{c0}])/8 - \x{c0} - \x{e0} - -/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8 - -/^\X/8 - A - A\x{300}BC - A\x{300}\x{301}\x{302}BC - *** Failers - \x{300} - -/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ - -/^\p{Xan}/8 - ABCD - 1234 - \x{6ca} - \x{a6c} - \x{10a7} - ** Failers - _ABC - -/^\p{Xan}+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - ** Failers - _ABC - -/^\p{Xan}+?/8 - \x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xan}*/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xan}{2,9}/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xan}{2,9}?/8 - \x{6ca}\x{a6c}\x{10a7}_ - -/^[\p{Xan}]/8 - ABCD1234_ - 1234abcd_ - \x{6ca} - \x{a6c} - \x{10a7} - ** Failers - _ABC - -/^[\p{Xan}]+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - ** Failers - _ABC - -/^>\p{Xsp}/8 - >\x{1680}\x{2028}\x{0b} - >\x{a0} - ** Failers - \x{0b} - -/^>\p{Xsp}+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xsp}+?/8 - >\x{1680}\x{2028}\x{0b} - -/^>\p{Xsp}*/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xsp}{2,9}/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xsp}{2,9}?/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>[\p{Xsp}]/8 - >\x{2028}\x{0b} - -/^>[\p{Xsp}]+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}/8 - >\x{1680}\x{2028}\x{0b} - >\x{a0} - ** Failers - \x{0b} - -/^>\p{Xps}+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}+?/8 - >\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}*/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}{2,9}/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}{2,9}?/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>[\p{Xps}]/8 - >\x{2028}\x{0b} - -/^>[\p{Xps}]+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^\p{Xwd}/8 - ABCD - 1234 - \x{6ca} - \x{a6c} - \x{10a7} - _ABC - ** Failers - [] - -/^\p{Xwd}+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xwd}+?/8 - \x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xwd}*/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xwd}{2,9}/8 - A_B12\x{6ca}\x{a6c}\x{10a7} - -/^\p{Xwd}{2,9}?/8 - \x{6ca}\x{a6c}\x{10a7}_ - -/^[\p{Xwd}]/8 - ABCD1234_ - 1234abcd_ - \x{6ca} - \x{a6c} - \x{10a7} - _ABC - ** Failers - [] - -/^[\p{Xwd}]+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/-- A check not in UTF-8 mode --/ - -/^[\p{Xwd}]+/ - ABCD1234_ - -/-- Some negative checks --/ - -/^[\P{Xwd}]+/8 - !.+\x{019}\x{35a}AB - -/^[\p{^Xwd}]+/8 - !.+\x{019}\x{35a}AB - -/[\D]/WBZ8 - 1\x{3c8}2 - -/[\d]/WBZ8 - >\x{6f4}< - -/[\S]/WBZ8 - \x{1680}\x{6f4}\x{1680} - -/[\s]/WBZ8 - >\x{1680}< - -/[\W]/WBZ8 - A\x{1712}B - -/[\w]/WBZ8 - >\x{1723}< - -/\D/WBZ8 - 1\x{3c8}2 - -/\d/WBZ8 - >\x{6f4}< - -/\S/WBZ8 - \x{1680}\x{6f4}\x{1680} - -/\s/WBZ8 - >\x{1680}> - -/\W/WBZ8 - A\x{1712}B - -/\w/WBZ8 - >\x{1723}< - -/[[:alpha:]]/WBZ - -/[[:lower:]]/WBZ - -/[[:upper:]]/WBZ - -/[[:alnum:]]/WBZ - -/[[:ascii:]]/WBZ - -/[[:blank:]]/WBZ - -/[[:cntrl:]]/WBZ - -/[[:digit:]]/WBZ - -/[[:graph:]]/WBZ - -/[[:print:]]/WBZ - -/[[:punct:]]/WBZ - -/[[:space:]]/WBZ - -/[[:word:]]/WBZ - -/[[:xdigit:]]/WBZ - -/-- Unicode properties for \b abd \B --/ - -/\b...\B/8W - abc_ - \x{37e}abc\x{376} - \x{37e}\x{376}\x{371}\x{393}\x{394} - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ - -/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ - -/\b...\B/8 - abc_ - ** Failers - \x{37e}abc\x{376} - \x{37e}\x{376}\x{371}\x{393}\x{394} - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ - -/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ - -/\b...\B/W - abc_ - !\x{c0}++\x{c1}\x{c2} - !\x{c0}+++++ - -/-- POSIX interface --/ - -/\w/P - +++\x{c2} - -/\w/WP - +++\x{c2} - -/-- Some of these are silly, but they check various combinations --/ - -/[[:^alpha:][:^cntrl:]]+/8WBZ - 123 - abc - -/[[:^cntrl:][:^alpha:]]+/8WBZ - 123 - abc - -/[[:alpha:]]+/8WBZ +/a(*:a\x{1234}b)/8K abc -/[[:^alpha:]\S]+/8WBZ - 123 - abc - -/[^\d]+/8WBZ - abc123 - abc\x{123} - \x{660}abc - -/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI - \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} - \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} - -/\p{Xps}*/SI - -/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ - -/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ - -/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ - -/\p{Han}+X\p{Greek}+\x{370}/BZ8 - -/\p{Xan}+!\p{Xan}+A/BZ - -/\p{Xsp}+!\p{Xsp}\t/BZ - -/\p{Xps}+!\p{Xps}\t/BZ - -/\p{Xwd}+!\p{Xwd}_/BZ - -/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ - -/-- These behaved oddly in Perl, so they are kept in this test --/ - -/(\x{23a}\x{23a}\x{23a})?\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} - -/(ȺȺȺ)?\1/8i - ȺȺȺⱥⱥ - -/(\x{23a}\x{23a}\x{23a})?\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - -/(ȺȺȺ)?\1/8i - ȺȺȺⱥⱥⱥ - -/(\x{23a}\x{23a}\x{23a})\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} - -/(ȺȺȺ)\1/8i - ȺȺȺⱥⱥ - -/(\x{23a}\x{23a}\x{23a})\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - -/(ȺȺȺ)\1/8i - ȺȺȺⱥⱥⱥ - -/(\x{2c65}\x{2c65})\1/8i - \x{2c65}\x{2c65}\x{23a}\x{23a} - -/(ⱥⱥ)\1/8i - ⱥⱥȺȺ - -/(\x{23a}\x{23a}\x{23a})\1Y/8i - X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ - -/(\x{2c65}\x{2c65})\1Y/8i - X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ - -/-- --/ - -/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ - -/^[\p{Batak}]/8 - \x{1bc0} - \x{1bff} - ** Failers - \x{1bf4} - -/^[\p{Brahmi}]/8 - \x{11000} - \x{1106f} - ** Failers - \x{1104e} - -/^[\p{Mandaic}]/8 - \x{840} - \x{85e} - ** Failers - \x{85c} - \x{85d} - -/-- --/ +/a(*:a£b)/8K + abc /-- End of testinput12 --/ diff --git a/testdata/testinput13 b/testdata/testinput13 new file mode 100644 index 0000000..6b78eeb --- /dev/null +++ b/testdata/testinput13 @@ -0,0 +1,569 @@ +/-- These tests for Unicode property support test PCRE's API and show some of + the compiled code. They are not Perl-compatible. --/ + +/[\p{L}]/DZ + +/[\p{^L}]/DZ + +/[\P{L}]/DZ + +/[\P{^L}]/DZ + +/[abc\p{L}\x{0660}]/8DZ + +/[\p{Nd}]/8DZ + 1234 + +/[\p{Nd}+-]+/8DZ + 1234 + 12-34 + 12+\x{661}-34 + ** Failers + abcd + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ + +/AB\x{1fb0}/8DZ + +/AB\x{1fb0}/8DZi + +/[\x{105}-\x{109}]/8iDZ + \x{104} + \x{105} + \x{109} + ** Failers + \x{100} + \x{10a} + +/[z-\x{100}]/8iDZ + Z + z + \x{39c} + \x{178} + | + \x{80} + \x{ff} + \x{100} + \x{101} + ** Failers + \x{102} + Y + y + +/[z-\x{100}]/8DZi + +/(?:[\PPa*]*){8,}/ + +/[\P{Any}]/BZ + +/[\P{Any}\E]/BZ + +/(\P{Yi}+\277)/ + +/(\P{Yi}+\277)?/ + +/(?<=\P{Yi}{3}A)X/ + +/\p{Yi}+(\P{Yi}+)(?1)/ + +/(\P{Yi}{2}\277)?/ + +/[\P{Yi}A]/ + +/[\P{Yi}\P{Yi}\P{Yi}A]/ + +/[^\P{Yi}A]/ + +/[^\P{Yi}\P{Yi}\P{Yi}A]/ + +/(\P{Yi}*\277)*/ + +/(\P{Yi}*?\277)*/ + +/(\p{Yi}*+\277)*/ + +/(\P{Yi}?\277)*/ + +/(\P{Yi}??\277)*/ + +/(\p{Yi}?+\277)*/ + +/(\P{Yi}{0,3}\277)*/ + +/(\P{Yi}{0,3}?\277)*/ + +/(\p{Yi}{0,3}+\277)*/ + +/\p{Zl}{2,3}+/8BZ + \xe2\x80\xa8\xe2\x80\xa8 + \x{2028}\x{2028}\x{2028} + +/\p{Zl}/8BZ + +/\p{Lu}{3}+/8BZ + +/\pL{2}+/8BZ + +/\p{Cc}{2}+/8BZ + +/^\p{Cs}/8 + \?\x{dfff} + ** Failers + \x{09f} + +/^\p{Sc}+/8 + $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} + \x{9f2} + ** Failers + X + \x{2c2} + +/^\p{Zs}/8 + \ \ + \x{a0} + \x{1680} + \x{180e} + \x{2000} + \x{2001} + ** Failers + \x{2028} + \x{200d} + +/-- These four are here rather than in test 6 because Perl has problems with + the negative versions of the properties. --/ + +/\p{^Lu}/8i + 1234 + ** Failers + ABC + +/\P{Lu}/8i + 1234 + ** Failers + ABC + +/\p{Ll}/8i + a + Az + ** Failers + ABC + +/\p{Lu}/8i + A + a\x{10a0}B + ** Failers + a + \x{1d00} + +/[\x{c0}\x{391}]/8i + \x{c0} + \x{e0} + +/-- The next two are special cases where the lengths of the different cases of +the same character differ. The first went wrong with heap frame storage; the +second was broken in all cases. --/ + +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + +/\x{c0}+\x{116}+/8i + \x{c0}\x{e0}\x{116}\x{117} + +/[\x{c0}\x{116}]+/8i + \x{c0}\x{e0}\x{116}\x{117} + +/(\x{de})\1/8i + \x{de}\x{de} + \x{de}\x{fe} + \x{fe}\x{fe} + \x{fe}\x{de} + +/^\x{c0}$/8i + \x{c0} + \x{e0} + +/^\x{e0}$/8i + \x{c0} + \x{e0} + +/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE +will match it only with UCP support, because without that it has no notion +of case for anything other than the ASCII letters. --/ + +/((?i)[\x{c0}])/8 + \x{c0} + \x{e0} + +/(?i:[\x{c0}])/8 + \x{c0} + \x{e0} + +/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8 + +/^\X/8 + A + A\x{300}BC + A\x{300}\x{301}\x{302}BC + *** Failers + \x{300} + +/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ + +/^\p{Xan}/8 + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + ** Failers + _ABC + +/^\p{Xan}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + ** Failers + _ABC + +/^\p{Xan}+?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xan}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xan}{2,9}/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xan}{2,9}?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^[\p{Xan}]/8 + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + ** Failers + _ABC + +/^[\p{Xan}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + ** Failers + _ABC + +/^>\p{Xsp}/8 + >\x{1680}\x{2028}\x{0b} + >\x{a0} + ** Failers + \x{0b} + +/^>\p{Xsp}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}+?/8 + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xsp}]/8 + >\x{2028}\x{0b} + +/^>[\p{Xsp}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/8 + >\x{1680}\x{2028}\x{0b} + >\x{a0} + ** Failers + \x{0b} + +/^>\p{Xps}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/8 + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xps}]/8 + >\x{2028}\x{0b} + +/^>[\p{Xps}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/8 + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + _ABC + ** Failers + [] + +/^\p{Xwd}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}+?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/8 + A_B12\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xwd}{2,9}?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^[\p{Xwd}]/8 + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + _ABC + ** Failers + [] + +/^[\p{Xwd}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/-- A check not in UTF-8 mode --/ + +/^[\p{Xwd}]+/ + ABCD1234_ + +/-- Some negative checks --/ + +/^[\P{Xwd}]+/8 + !.+\x{019}\x{35a}AB + +/^[\p{^Xwd}]+/8 + !.+\x{019}\x{35a}AB + +/[\D]/WBZ8 + 1\x{3c8}2 + +/[\d]/WBZ8 + >\x{6f4}< + +/[\S]/WBZ8 + \x{1680}\x{6f4}\x{1680} + +/[\s]/WBZ8 + >\x{1680}< + +/[\W]/WBZ8 + A\x{1712}B + +/[\w]/WBZ8 + >\x{1723}< + +/\D/WBZ8 + 1\x{3c8}2 + +/\d/WBZ8 + >\x{6f4}< + +/\S/WBZ8 + \x{1680}\x{6f4}\x{1680} + +/\s/WBZ8 + >\x{1680}> + +/\W/WBZ8 + A\x{1712}B + +/\w/WBZ8 + >\x{1723}< + +/[[:alpha:]]/WBZ + +/[[:lower:]]/WBZ + +/[[:upper:]]/WBZ + +/[[:alnum:]]/WBZ + +/[[:ascii:]]/WBZ + +/[[:blank:]]/WBZ + +/[[:cntrl:]]/WBZ + +/[[:digit:]]/WBZ + +/[[:graph:]]/WBZ + +/[[:print:]]/WBZ + +/[[:punct:]]/WBZ + +/[[:space:]]/WBZ + +/[[:word:]]/WBZ + +/[[:xdigit:]]/WBZ + +/-- Unicode properties for \b abd \B --/ + +/\b...\B/8W + abc_ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ + +/\b...\B/8 + abc_ + ** Failers + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ + +/\b...\B/W + abc_ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- POSIX interface --/ + +/\w/P + +++\x{c2} + +/\w/WP + +++\x{c2} + +/-- Some of these are silly, but they check various combinations --/ + +/[[:^alpha:][:^cntrl:]]+/8WBZ + 123 + abc + +/[[:^cntrl:][:^alpha:]]+/8WBZ + 123 + abc + +/[[:alpha:]]+/8WBZ + abc + +/[[:^alpha:]\S]+/8WBZ + 123 + abc + +/[^\d]+/8WBZ + abc123 + abc\x{123} + \x{660}abc + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/\p{Xps}*/SI + +/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ + +/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ + +/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ + +/\p{Han}+X\p{Greek}+\x{370}/BZ8 + +/\p{Xan}+!\p{Xan}+A/BZ + +/\p{Xsp}+!\p{Xsp}\t/BZ + +/\p{Xps}+!\p{Xps}\t/BZ + +/\p{Xwd}+!\p{Xwd}_/BZ + +/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ + +/-- These behaved oddly in Perl, so they are kept in this test --/ + +/(\x{23a}\x{23a}\x{23a})?\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/8i + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})?\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/8i + ȺȺȺⱥⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/8i + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/8i + ȺȺȺⱥⱥⱥ + +/(\x{2c65}\x{2c65})\1/8i + \x{2c65}\x{2c65}\x{23a}\x{23a} + +/(ⱥⱥ)\1/8i + ⱥⱥȺȺ + +/(\x{23a}\x{23a}\x{23a})\1Y/8i + X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ + +/(\x{2c65}\x{2c65})\1Y/8i + X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ + +/-- --/ + +/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ + +/^[\p{Batak}]/8 + \x{1bc0} + \x{1bff} + ** Failers + \x{1bf4} + +/^[\p{Brahmi}]/8 + \x{11000} + \x{1106f} + ** Failers + \x{1104e} + +/^[\p{Mandaic}]/8 + \x{840} + \x{85e} + ** Failers + \x{85c} + \x{85d} + +/-- --/ + +/-- End of testinput13 --/ diff --git a/testdata/testinput2 b/testdata/testinput2 index 0778c5f..3636e8c 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3837,16 +3837,4 @@ with \Y. ---/ /.(*F)/ \P\Pabc -/f.*/ - \P\Pfor - -/f.*/s - \P\Pfor - -/f.*/8 - \P\Pfor - -/f.*/8s - \P\Pfor - /-- End of testinput2 --/ diff --git a/testdata/testinput5 b/testdata/testinput5 index 28d54a2..bb1d222 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -879,4 +879,16 @@ correctly, but that messes up comparisons). --/ //<bsr_anycrlf><bsr_unicode> +/f.*/ + \P\Pfor + +/f.*/s + \P\Pfor + +/f.*/8 + \P\Pfor + +/f.*/8s + \P\Pfor + /-- End of testinput5 --/ diff --git a/testdata/testoutput11 b/testdata/testoutput11 index 3bcd67f..71bcbc6 100644 --- a/testdata/testoutput11 +++ b/testdata/testoutput11 @@ -1259,14 +1259,4 @@ name)/K MK: any name -/a(*:a\x{1234}b)/8K - abc - 0: a -MK: a\x{1234}b - -/a(*:a£b)/8K - abc - 0: a -MK: a£b - /-- End of testinput11 --/ diff --git a/testdata/testoutput12 b/testdata/testoutput12 index efcd5d3..e526838 100644 --- a/testdata/testoutput12 +++ b/testdata/testoutput12 @@ -1,1275 +1,11 @@ -/-- These tests for Unicode property support test PCRE's API and show some of - the compiled code. They are not Perl-compatible. --/ - -/[\p{L}]/DZ ------------------------------------------------------------------- - Bra - [\p{L}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -No options -No first char -No need char - -/[\p{^L}]/DZ ------------------------------------------------------------------- - Bra - [\P{L}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -No options -No first char -No need char - -/[\P{L}]/DZ ------------------------------------------------------------------- - Bra - [\P{L}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -No options -No first char -No need char - -/[\P{^L}]/DZ ------------------------------------------------------------------- - Bra - [\p{L}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -No options -No first char -No need char - -/[abc\p{L}\x{0660}]/8DZ ------------------------------------------------------------------- - Bra - [a-c\p{L}\x{660}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - -/[\p{Nd}]/8DZ ------------------------------------------------------------------- - Bra - [\p{Nd}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - 1234 - 0: 1 - -/[\p{Nd}+-]+/8DZ ------------------------------------------------------------------- - Bra - [+\-\p{Nd}]+ - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -No first char -No need char - 1234 - 0: 1234 - 12-34 - 0: 12-34 - 12+\x{661}-34 - 0: 12+\x{661}-34 - ** Failers -No match - abcd -No match - -/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ ------------------------------------------------------------------- - Bra - /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: caseless utf8 -First char = 'A' (caseless) -No need char - -/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ ------------------------------------------------------------------- - Bra - A\x{391}\x{10427}\x{ff3a}\x{1fb0} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 'A' -Need char = 176 - -/AB\x{1fb0}/8DZ ------------------------------------------------------------------- - Bra - AB\x{1fb0} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: utf8 -First char = 'A' -Need char = 176 - -/AB\x{1fb0}/8DZi ------------------------------------------------------------------- - Bra - /i AB\x{1fb0} - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: caseless utf8 -First char = 'A' (caseless) -Need char = 'B' (caseless) - -/[\x{105}-\x{109}]/8iDZ ------------------------------------------------------------------- - Bra - [\x{104}-\x{109}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char - \x{104} - 0: \x{104} - \x{105} - 0: \x{105} - \x{109} - 0: \x{109} - ** Failers -No match - \x{100} -No match - \x{10a} -No match - -/[z-\x{100}]/8iDZ ------------------------------------------------------------------- - Bra - [Z\x{39c}\x{178}z-\x{101}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char - Z - 0: Z - z - 0: z - \x{39c} - 0: \x{39c} - \x{178} - 0: \x{178} - | - 0: | - \x{80} - 0: \x{80} - \x{ff} - 0: \x{ff} - \x{100} - 0: \x{100} - \x{101} - 0: \x{101} - ** Failers -No match - \x{102} -No match - Y -No match - y -No match - -/[z-\x{100}]/8DZi ------------------------------------------------------------------- - Bra - [Z\x{39c}\x{178}z-\x{101}] - Ket - End ------------------------------------------------------------------- -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char - -/(?:[\PPa*]*){8,}/ - -/[\P{Any}]/BZ ------------------------------------------------------------------- - Bra - [\P{Any}] - Ket - End ------------------------------------------------------------------- - -/[\P{Any}\E]/BZ ------------------------------------------------------------------- - Bra - [\P{Any}] - Ket - End ------------------------------------------------------------------- - -/(\P{Yi}+\277)/ - -/(\P{Yi}+\277)?/ - -/(?<=\P{Yi}{3}A)X/ - -/\p{Yi}+(\P{Yi}+)(?1)/ - -/(\P{Yi}{2}\277)?/ - -/[\P{Yi}A]/ - -/[\P{Yi}\P{Yi}\P{Yi}A]/ - -/[^\P{Yi}A]/ - -/[^\P{Yi}\P{Yi}\P{Yi}A]/ - -/(\P{Yi}*\277)*/ - -/(\P{Yi}*?\277)*/ - -/(\p{Yi}*+\277)*/ - -/(\P{Yi}?\277)*/ - -/(\P{Yi}??\277)*/ - -/(\p{Yi}?+\277)*/ - -/(\P{Yi}{0,3}\277)*/ - -/(\P{Yi}{0,3}?\277)*/ - -/(\p{Yi}{0,3}+\277)*/ - -/\p{Zl}{2,3}+/8BZ ------------------------------------------------------------------- - Bra - prop Zl {2} - prop Zl ?+ - Ket - End ------------------------------------------------------------------- - \xe2\x80\xa8\xe2\x80\xa8 - 0: \x{2028}\x{2028} - \x{2028}\x{2028}\x{2028} - 0: \x{2028}\x{2028}\x{2028} - -/\p{Zl}/8BZ ------------------------------------------------------------------- - Bra - prop Zl - Ket - End ------------------------------------------------------------------- - -/\p{Lu}{3}+/8BZ ------------------------------------------------------------------- - Bra - prop Lu {3} - Ket - End ------------------------------------------------------------------- - -/\pL{2}+/8BZ ------------------------------------------------------------------- - Bra - prop L {2} - Ket - End ------------------------------------------------------------------- - -/\p{Cc}{2}+/8BZ ------------------------------------------------------------------- - Bra - prop Cc {2} - Ket - End ------------------------------------------------------------------- - -/^\p{Cs}/8 - \?\x{dfff} - 0: \x{dfff} - ** Failers -No match - \x{09f} -No match - -/^\p{Sc}+/8 - $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} - 0: $\x{a2}\x{a3}\x{a4}\x{a5} - \x{9f2} - 0: \x{9f2} - ** Failers -No match - X -No match - \x{2c2} -No match - -/^\p{Zs}/8 - \ \ - 0: - \x{a0} - 0: \x{a0} - \x{1680} - 0: \x{1680} - \x{180e} - 0: \x{180e} - \x{2000} - 0: \x{2000} - \x{2001} - 0: \x{2001} - ** Failers -No match - \x{2028} -No match - \x{200d} -No match - -/-- These four are here rather than in test 6 because Perl has problems with - the negative versions of the properties. --/ - -/\p{^Lu}/8i - 1234 - 0: 1 - ** Failers - 0: * - ABC -No match - -/\P{Lu}/8i - 1234 - 0: 1 - ** Failers - 0: * - ABC -No match - -/\p{Ll}/8i - a - 0: a - Az - 0: z - ** Failers +/a(*:a\x{1234}b)/8K + abc 0: a - ABC -No match - -/\p{Lu}/8i - A - 0: A - a\x{10a0}B - 0: \x{10a0} - ** Failers - 0: F - a -No match - \x{1d00} -No match - -/[\x{c0}\x{391}]/8i - \x{c0} - 0: \x{c0} - \x{e0} - 0: \x{e0} - -/-- The next two are special cases where the lengths of the different cases of -the same character differ. The first went wrong with heap frame storage; the -second was broken in all cases. --/ - -/^\x{023a}+?(\x{0130}+)/8i - \x{023a}\x{2c65}\x{0130} - 0: \x{23a}\x{2c65}\x{130} - 1: \x{130} - -/^\x{023a}+([^X])/8i - \x{023a}\x{2c65}X - 0: \x{23a}\x{2c65} - 1: \x{2c65} - -/\x{c0}+\x{116}+/8i - \x{c0}\x{e0}\x{116}\x{117} - 0: \x{c0}\x{e0}\x{116}\x{117} - -/[\x{c0}\x{116}]+/8i - \x{c0}\x{e0}\x{116}\x{117} - 0: \x{c0}\x{e0}\x{116}\x{117} - -/(\x{de})\1/8i - \x{de}\x{de} - 0: \x{de}\x{de} - 1: \x{de} - \x{de}\x{fe} - 0: \x{de}\x{fe} - 1: \x{de} - \x{fe}\x{fe} - 0: \x{fe}\x{fe} - 1: \x{fe} - \x{fe}\x{de} - 0: \x{fe}\x{de} - 1: \x{fe} - -/^\x{c0}$/8i - \x{c0} - 0: \x{c0} - \x{e0} - 0: \x{e0} - -/^\x{e0}$/8i - \x{c0} - 0: \x{c0} - \x{e0} - 0: \x{e0} - -/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE -will match it only with UCP support, because without that it has no notion -of case for anything other than the ASCII letters. --/ - -/((?i)[\x{c0}])/8 - \x{c0} - 0: \x{c0} - 1: \x{c0} - \x{e0} - 0: \x{e0} - 1: \x{e0} - -/(?i:[\x{c0}])/8 - \x{c0} - 0: \x{c0} - \x{e0} - 0: \x{e0} - -/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8 - -/^\X/8 - A - 0: A - A\x{300}BC - 0: A\x{300} - A\x{300}\x{301}\x{302}BC - 0: A\x{300}\x{301}\x{302} - *** Failers - 0: * - \x{300} -No match - -/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ - -/^\p{Xan}/8 - ABCD - 0: A - 1234 - 0: 1 - \x{6ca} - 0: \x{6ca} - \x{a6c} - 0: \x{a6c} - \x{10a7} - 0: \x{10a7} - ** Failers -No match - _ABC -No match - -/^\p{Xan}+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} - ** Failers -No match - _ABC -No match - -/^\p{Xan}+?/8 - \x{6ca}\x{a6c}\x{10a7}_ - 0: \x{6ca} - -/^\p{Xan}*/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} - -/^\p{Xan}{2,9}/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca} - -/^\p{Xan}{2,9}?/8 - \x{6ca}\x{a6c}\x{10a7}_ - 0: \x{6ca}\x{a6c} - -/^[\p{Xan}]/8 - ABCD1234_ - 0: A - 1234abcd_ - 0: 1 - \x{6ca} - 0: \x{6ca} - \x{a6c} - 0: \x{a6c} - \x{10a7} - 0: \x{10a7} - ** Failers -No match - _ABC -No match - -/^[\p{Xan}]+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} - ** Failers -No match - _ABC -No match - -/^>\p{Xsp}/8 - >\x{1680}\x{2028}\x{0b} - 0: >\x{1680} - >\x{a0} - 0: >\x{a0} - ** Failers -No match - \x{0b} -No match - -/^>\p{Xsp}+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} - -/^>\p{Xsp}+?/8 - >\x{1680}\x{2028}\x{0b} - 0: >\x{1680} - -/^>\p{Xsp}*/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} - -/^>\p{Xsp}{2,9}/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} - -/^>\p{Xsp}{2,9}?/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09} - -/^>[\p{Xsp}]/8 - >\x{2028}\x{0b} - 0: >\x{2028} - -/^>[\p{Xsp}]+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} - -/^>\p{Xps}/8 - >\x{1680}\x{2028}\x{0b} - 0: >\x{1680} - >\x{a0} - 0: >\x{a0} - ** Failers -No match - \x{0b} -No match - -/^>\p{Xps}+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}+?/8 - >\x{1680}\x{2028}\x{0b} - 0: >\x{1680} - -/^>\p{Xps}*/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}{2,9}/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^>\p{Xps}{2,9}?/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09} - -/^>[\p{Xps}]/8 - >\x{2028}\x{0b} - 0: >\x{2028} - -/^>[\p{Xps}]+/8 - > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} - -/^\p{Xwd}/8 - ABCD - 0: A - 1234 - 0: 1 - \x{6ca} - 0: \x{6ca} - \x{a6c} - 0: \x{a6c} - \x{10a7} - 0: \x{10a7} - _ABC - 0: _ - ** Failers -No match - [] -No match - -/^\p{Xwd}+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xwd}+?/8 - \x{6ca}\x{a6c}\x{10a7}_ - 0: \x{6ca} +MK: a\x{1234}b -/^\p{Xwd}*/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/^\p{Xwd}{2,9}/8 - A_B12\x{6ca}\x{a6c}\x{10a7} - 0: A_B12\x{6ca}\x{a6c}\x{10a7} - -/^\p{Xwd}{2,9}?/8 - \x{6ca}\x{a6c}\x{10a7}_ - 0: \x{6ca}\x{a6c} - -/^[\p{Xwd}]/8 - ABCD1234_ - 0: A - 1234abcd_ - 0: 1 - \x{6ca} - 0: \x{6ca} - \x{a6c} - 0: \x{a6c} - \x{10a7} - 0: \x{10a7} - _ABC - 0: _ - ** Failers -No match - [] -No match - -/^[\p{Xwd}]+/8 - ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ - -/-- A check not in UTF-8 mode --/ - -/^[\p{Xwd}]+/ - ABCD1234_ - 0: ABCD1234_ - -/-- Some negative checks --/ - -/^[\P{Xwd}]+/8 - !.+\x{019}\x{35a}AB - 0: !.+\x{19}\x{35a} - -/^[\p{^Xwd}]+/8 - !.+\x{019}\x{35a}AB - 0: !.+\x{19}\x{35a} - -/[\D]/WBZ8 ------------------------------------------------------------------- - Bra - [\P{Nd}] - Ket - End ------------------------------------------------------------------- - 1\x{3c8}2 - 0: \x{3c8} - -/[\d]/WBZ8 ------------------------------------------------------------------- - Bra - [\p{Nd}] - Ket - End ------------------------------------------------------------------- - >\x{6f4}< - 0: \x{6f4} - -/[\S]/WBZ8 ------------------------------------------------------------------- - Bra - [\P{Xsp}] - Ket - End ------------------------------------------------------------------- - \x{1680}\x{6f4}\x{1680} - 0: \x{6f4} - -/[\s]/WBZ8 ------------------------------------------------------------------- - Bra - [\p{Xsp}] - Ket - End ------------------------------------------------------------------- - >\x{1680}< - 0: \x{1680} - -/[\W]/WBZ8 ------------------------------------------------------------------- - Bra - [\P{Xwd}] - Ket - End ------------------------------------------------------------------- - A\x{1712}B - 0: \x{1712} - -/[\w]/WBZ8 ------------------------------------------------------------------- - Bra - [\p{Xwd}] - Ket - End ------------------------------------------------------------------- - >\x{1723}< - 0: \x{1723} - -/\D/WBZ8 ------------------------------------------------------------------- - Bra - notprop Nd - Ket - End ------------------------------------------------------------------- - 1\x{3c8}2 - 0: \x{3c8} - -/\d/WBZ8 ------------------------------------------------------------------- - Bra - prop Nd - Ket - End ------------------------------------------------------------------- - >\x{6f4}< - 0: \x{6f4} - -/\S/WBZ8 ------------------------------------------------------------------- - Bra - notprop Xsp - Ket - End ------------------------------------------------------------------- - \x{1680}\x{6f4}\x{1680} - 0: \x{6f4} - -/\s/WBZ8 ------------------------------------------------------------------- - Bra - prop Xsp - Ket - End ------------------------------------------------------------------- - >\x{1680}> - 0: \x{1680} - -/\W/WBZ8 ------------------------------------------------------------------- - Bra - notprop Xwd - Ket - End ------------------------------------------------------------------- - A\x{1712}B - 0: \x{1712} - -/\w/WBZ8 ------------------------------------------------------------------- - Bra - prop Xwd - Ket - End ------------------------------------------------------------------- - >\x{1723}< - 0: \x{1723} - -/[[:alpha:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{L}] - Ket - End ------------------------------------------------------------------- - -/[[:lower:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{Ll}] - Ket - End ------------------------------------------------------------------- - -/[[:upper:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{Lu}] - Ket - End ------------------------------------------------------------------- - -/[[:alnum:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{Xan}] - Ket - End ------------------------------------------------------------------- - -/[[:ascii:]]/WBZ ------------------------------------------------------------------- - Bra - [\x00-\x7f] - Ket - End ------------------------------------------------------------------- - -/[[:blank:]]/WBZ ------------------------------------------------------------------- - Bra - [\x09 \xa0] - Ket - End ------------------------------------------------------------------- - -/[[:cntrl:]]/WBZ ------------------------------------------------------------------- - Bra - [\x00-\x1f\x7f] - Ket - End ------------------------------------------------------------------- - -/[[:digit:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{Nd}] - Ket - End ------------------------------------------------------------------- - -/[[:graph:]]/WBZ ------------------------------------------------------------------- - Bra - [!-~] - Ket - End ------------------------------------------------------------------- - -/[[:print:]]/WBZ ------------------------------------------------------------------- - Bra - [ -~] - Ket - End ------------------------------------------------------------------- - -/[[:punct:]]/WBZ ------------------------------------------------------------------- - Bra - [!-/:-@[-`{-~] - Ket - End ------------------------------------------------------------------- - -/[[:space:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{Xps}] - Ket - End ------------------------------------------------------------------- - -/[[:word:]]/WBZ ------------------------------------------------------------------- - Bra - [\p{Xwd}] - Ket - End ------------------------------------------------------------------- - -/[[:xdigit:]]/WBZ ------------------------------------------------------------------- - Bra - [0-9A-Fa-f] - Ket - End ------------------------------------------------------------------- - -/-- Unicode properties for \b abd \B --/ - -/\b...\B/8W - abc_ - 0: abc - \x{37e}abc\x{376} - 0: abc - \x{37e}\x{376}\x{371}\x{393}\x{394} - 0: \x{376}\x{371}\x{393} - !\x{c0}++\x{c1}\x{c2} - 0: ++\x{c1} - !\x{c0}+++++ - 0: \x{c0}++ - -/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ - -/\b...\B/8 - abc_ - 0: abc - ** Failers - 0: Fai - \x{37e}abc\x{376} -No match - \x{37e}\x{376}\x{371}\x{393}\x{394} -No match - !\x{c0}++\x{c1}\x{c2} -No match - !\x{c0}+++++ -No match - -/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ - -/\b...\B/W - abc_ - 0: abc - !\x{c0}++\x{c1}\x{c2} - 0: ++\xc1 - !\x{c0}+++++ - 0: \xc0++ - -/-- POSIX interface --/ - -/\w/P - +++\x{c2} -No match: POSIX code 17: match failed - -/\w/WP - +++\x{c2} - 0: \xc2 - -/-- Some of these are silly, but they check various combinations --/ - -/[[:^alpha:][:^cntrl:]]+/8WBZ ------------------------------------------------------------------- - Bra - [ -~\x80-\xff\P{L}]+ - Ket - End ------------------------------------------------------------------- - 123 - 0: 123 - abc - 0: abc - -/[[:^cntrl:][:^alpha:]]+/8WBZ ------------------------------------------------------------------- - Bra - [ -~\x80-\xff\P{L}]+ - Ket - End ------------------------------------------------------------------- - 123 - 0: 123 - abc - 0: abc - -/[[:alpha:]]+/8WBZ ------------------------------------------------------------------- - Bra - [\p{L}]+ - Ket - End ------------------------------------------------------------------- +/a(*:a£b)/8K abc - 0: abc - -/[[:^alpha:]\S]+/8WBZ ------------------------------------------------------------------- - Bra - [\P{L}\P{Xsp}]+ - Ket - End ------------------------------------------------------------------- - 123 - 0: 123 - abc - 0: abc - -/[^\d]+/8WBZ ------------------------------------------------------------------- - Bra - [^\p{Nd}]+ - Ket - End ------------------------------------------------------------------- - abc123 - 0: abc - abc\x{123} - 0: abc\x{123} - \x{660}abc - 0: abc - -/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI -Capturing subpattern count = 0 -Options: caseless utf8 -No first char -No need char -Subject length lower bound = 17 -Starting byte set: \xd0 \xd1 - \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} - 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} - \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} - 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} - -/\p{Xps}*/SI -Capturing subpattern count = 0 -No options -No first char -No need char -Subject length lower bound = 0 -No set of starting bytes - -/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ ------------------------------------------------------------------- - Bra - prop Lu ++ - 9 - prop Lu + - B - prop Lu ++ - b - Ket - End ------------------------------------------------------------------- - -/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ ------------------------------------------------------------------- - Bra - notprop Lu + - 9 - notprop Lu ++ - B - notprop Lu + - b - Ket - End ------------------------------------------------------------------- - -/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ ------------------------------------------------------------------- - Bra - notprop Lu + - 9 - notprop Lu ++ - B - notprop Lu + - b - Ket - End ------------------------------------------------------------------- - -/\p{Han}+X\p{Greek}+\x{370}/BZ8 ------------------------------------------------------------------- - Bra - prop Han ++ - X - prop Greek + - \x{370} - Ket - End ------------------------------------------------------------------- - -/\p{Xan}+!\p{Xan}+A/BZ ------------------------------------------------------------------- - Bra - prop Xan ++ - ! - prop Xan + - A - Ket - End ------------------------------------------------------------------- - -/\p{Xsp}+!\p{Xsp}\t/BZ ------------------------------------------------------------------- - Bra - prop Xsp ++ - ! - prop Xsp - \x09 - Ket - End ------------------------------------------------------------------- - -/\p{Xps}+!\p{Xps}\t/BZ ------------------------------------------------------------------- - Bra - prop Xps ++ - ! - prop Xps - \x09 - Ket - End ------------------------------------------------------------------- - -/\p{Xwd}+!\p{Xwd}_/BZ ------------------------------------------------------------------- - Bra - prop Xwd ++ - ! - prop Xwd - _ - Ket - End ------------------------------------------------------------------- - -/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ ------------------------------------------------------------------- - Bra - A++ - prop N - A++ - prop Nd - B+ - prop N *+ - B+ - prop Nd * - Ket - End ------------------------------------------------------------------- - -/-- These behaved oddly in Perl, so they are kept in this test --/ - -/(\x{23a}\x{23a}\x{23a})?\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} -No match - -/(ȺȺȺ)?\1/8i - ȺȺȺⱥⱥ -No match - -/(\x{23a}\x{23a}\x{23a})?\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - 1: \x{23a}\x{23a}\x{23a} - -/(ȺȺȺ)?\1/8i - ȺȺȺⱥⱥⱥ - 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - 1: \x{23a}\x{23a}\x{23a} - -/(\x{23a}\x{23a}\x{23a})\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} -No match - -/(ȺȺȺ)\1/8i - ȺȺȺⱥⱥ -No match - -/(\x{23a}\x{23a}\x{23a})\1/8i - \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - 1: \x{23a}\x{23a}\x{23a} - -/(ȺȺȺ)\1/8i - ȺȺȺⱥⱥⱥ - 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} - 1: \x{23a}\x{23a}\x{23a} - -/(\x{2c65}\x{2c65})\1/8i - \x{2c65}\x{2c65}\x{23a}\x{23a} - 0: \x{2c65}\x{2c65}\x{23a}\x{23a} - 1: \x{2c65}\x{2c65} - -/(ⱥⱥ)\1/8i - ⱥⱥȺȺ - 0: \x{2c65}\x{2c65}\x{23a}\x{23a} - 1: \x{2c65}\x{2c65} - -/(\x{23a}\x{23a}\x{23a})\1Y/8i - X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ - 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y - 1: \x{23a}\x{23a}\x{23a} - -/(\x{2c65}\x{2c65})\1Y/8i - X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ - 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y - 1: \x{2c65}\x{2c65} - -/-- --/ - -/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ - -/^[\p{Batak}]/8 - \x{1bc0} - 0: \x{1bc0} - \x{1bff} - 0: \x{1bff} - ** Failers -No match - \x{1bf4} -No match - -/^[\p{Brahmi}]/8 - \x{11000} - 0: \x{11000} - \x{1106f} - 0: \x{1106f} - ** Failers -No match - \x{1104e} -No match - -/^[\p{Mandaic}]/8 - \x{840} - 0: \x{840} - \x{85e} - 0: \x{85e} - ** Failers -No match - \x{85c} -No match - \x{85d} -No match - -/-- --/ + 0: a +MK: a£b /-- End of testinput12 --/ diff --git a/testdata/testoutput13 b/testdata/testoutput13 new file mode 100644 index 0000000..3cc2fbe --- /dev/null +++ b/testdata/testoutput13 @@ -0,0 +1,1275 @@ +/-- These tests for Unicode property support test PCRE's API and show some of + the compiled code. They are not Perl-compatible. --/ + +/[\p{L}]/DZ +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +No options +No first char +No need char + +/[\p{^L}]/DZ +------------------------------------------------------------------ + Bra + [\P{L}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +No options +No first char +No need char + +/[\P{L}]/DZ +------------------------------------------------------------------ + Bra + [\P{L}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +No options +No first char +No need char + +/[\P{^L}]/DZ +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +No options +No first char +No need char + +/[abc\p{L}\x{0660}]/8DZ +------------------------------------------------------------------ + Bra + [a-c\p{L}\x{660}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +No need char + +/[\p{Nd}]/8DZ +------------------------------------------------------------------ + Bra + [\p{Nd}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +No need char + 1234 + 0: 1 + +/[\p{Nd}+-]+/8DZ +------------------------------------------------------------------ + Bra + [+\-\p{Nd}]+ + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +No first char +No need char + 1234 + 0: 1234 + 12-34 + 0: 12-34 + 12+\x{661}-34 + 0: 12+\x{661}-34 + ** Failers +No match + abcd +No match + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ +------------------------------------------------------------------ + Bra + /i A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf8 +First char = 'A' (caseless) +No need char + +/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ +------------------------------------------------------------------ + Bra + A\x{391}\x{10427}\x{ff3a}\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 'A' +Need char = 176 + +/AB\x{1fb0}/8DZ +------------------------------------------------------------------ + Bra + AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf8 +First char = 'A' +Need char = 176 + +/AB\x{1fb0}/8DZi +------------------------------------------------------------------ + Bra + /i AB\x{1fb0} + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf8 +First char = 'A' (caseless) +Need char = 'B' (caseless) + +/[\x{105}-\x{109}]/8iDZ +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf8 +No first char +No need char + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} + ** Failers +No match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/8iDZ +------------------------------------------------------------------ + Bra + [Z\x{39c}\x{178}z-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf8 +No first char +No need char + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} + ** Failers +No match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/8DZi +------------------------------------------------------------------ + Bra + [Z\x{39c}\x{178}z-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf8 +No first char +No need char + +/(?:[\PPa*]*){8,}/ + +/[\P{Any}]/BZ +------------------------------------------------------------------ + Bra + [\P{Any}] + Ket + End +------------------------------------------------------------------ + +/[\P{Any}\E]/BZ +------------------------------------------------------------------ + Bra + [\P{Any}] + Ket + End +------------------------------------------------------------------ + +/(\P{Yi}+\277)/ + +/(\P{Yi}+\277)?/ + +/(?<=\P{Yi}{3}A)X/ + +/\p{Yi}+(\P{Yi}+)(?1)/ + +/(\P{Yi}{2}\277)?/ + +/[\P{Yi}A]/ + +/[\P{Yi}\P{Yi}\P{Yi}A]/ + +/[^\P{Yi}A]/ + +/[^\P{Yi}\P{Yi}\P{Yi}A]/ + +/(\P{Yi}*\277)*/ + +/(\P{Yi}*?\277)*/ + +/(\p{Yi}*+\277)*/ + +/(\P{Yi}?\277)*/ + +/(\P{Yi}??\277)*/ + +/(\p{Yi}?+\277)*/ + +/(\P{Yi}{0,3}\277)*/ + +/(\P{Yi}{0,3}?\277)*/ + +/(\p{Yi}{0,3}+\277)*/ + +/\p{Zl}{2,3}+/8BZ +------------------------------------------------------------------ + Bra + prop Zl {2} + prop Zl ?+ + Ket + End +------------------------------------------------------------------ + \xe2\x80\xa8\xe2\x80\xa8 + 0: \x{2028}\x{2028} + \x{2028}\x{2028}\x{2028} + 0: \x{2028}\x{2028}\x{2028} + +/\p{Zl}/8BZ +------------------------------------------------------------------ + Bra + prop Zl + Ket + End +------------------------------------------------------------------ + +/\p{Lu}{3}+/8BZ +------------------------------------------------------------------ + Bra + prop Lu {3} + Ket + End +------------------------------------------------------------------ + +/\pL{2}+/8BZ +------------------------------------------------------------------ + Bra + prop L {2} + Ket + End +------------------------------------------------------------------ + +/\p{Cc}{2}+/8BZ +------------------------------------------------------------------ + Bra + prop Cc {2} + Ket + End +------------------------------------------------------------------ + +/^\p{Cs}/8 + \?\x{dfff} + 0: \x{dfff} + ** Failers +No match + \x{09f} +No match + +/^\p{Sc}+/8 + $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} + 0: $\x{a2}\x{a3}\x{a4}\x{a5} + \x{9f2} + 0: \x{9f2} + ** Failers +No match + X +No match + \x{2c2} +No match + +/^\p{Zs}/8 + \ \ + 0: + \x{a0} + 0: \x{a0} + \x{1680} + 0: \x{1680} + \x{180e} + 0: \x{180e} + \x{2000} + 0: \x{2000} + \x{2001} + 0: \x{2001} + ** Failers +No match + \x{2028} +No match + \x{200d} +No match + +/-- These four are here rather than in test 6 because Perl has problems with + the negative versions of the properties. --/ + +/\p{^Lu}/8i + 1234 + 0: 1 + ** Failers + 0: * + ABC +No match + +/\P{Lu}/8i + 1234 + 0: 1 + ** Failers + 0: * + ABC +No match + +/\p{Ll}/8i + a + 0: a + Az + 0: z + ** Failers + 0: a + ABC +No match + +/\p{Lu}/8i + A + 0: A + a\x{10a0}B + 0: \x{10a0} + ** Failers + 0: F + a +No match + \x{1d00} +No match + +/[\x{c0}\x{391}]/8i + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +/-- The next two are special cases where the lengths of the different cases of +the same character differ. The first went wrong with heap frame storage; the +second was broken in all cases. --/ + +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + 0: \x{23a}\x{2c65}\x{130} + 1: \x{130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + 0: \x{23a}\x{2c65} + 1: \x{2c65} + +/\x{c0}+\x{116}+/8i + \x{c0}\x{e0}\x{116}\x{117} + 0: \x{c0}\x{e0}\x{116}\x{117} + +/[\x{c0}\x{116}]+/8i + \x{c0}\x{e0}\x{116}\x{117} + 0: \x{c0}\x{e0}\x{116}\x{117} + +/(\x{de})\1/8i + \x{de}\x{de} + 0: \x{de}\x{de} + 1: \x{de} + \x{de}\x{fe} + 0: \x{de}\x{fe} + 1: \x{de} + \x{fe}\x{fe} + 0: \x{fe}\x{fe} + 1: \x{fe} + \x{fe}\x{de} + 0: \x{fe}\x{de} + 1: \x{fe} + +/^\x{c0}$/8i + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +/^\x{e0}$/8i + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE +will match it only with UCP support, because without that it has no notion +of case for anything other than the ASCII letters. --/ + +/((?i)[\x{c0}])/8 + \x{c0} + 0: \x{c0} + 1: \x{c0} + \x{e0} + 0: \x{e0} + 1: \x{e0} + +/(?i:[\x{c0}])/8 + \x{c0} + 0: \x{c0} + \x{e0} + 0: \x{e0} + +/-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8 + +/^\X/8 + A + 0: A + A\x{300}BC + 0: A\x{300} + A\x{300}\x{301}\x{302}BC + 0: A\x{300}\x{301}\x{302} + *** Failers + 0: * + \x{300} +No match + +/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ + +/^\p{Xan}/8 + ABCD + 0: A + 1234 + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + ** Failers +No match + _ABC +No match + +/^\p{Xan}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} + ** Failers +No match + _ABC +No match + +/^\p{Xan}+?/8 + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca} + +/^\p{Xan}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xan}{2,9}/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca} + +/^\p{Xan}{2,9}?/8 + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca}\x{a6c} + +/^[\p{Xan}]/8 + ABCD1234_ + 0: A + 1234abcd_ + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + ** Failers +No match + _ABC +No match + +/^[\p{Xan}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} + ** Failers +No match + _ABC +No match + +/^>\p{Xsp}/8 + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + >\x{a0} + 0: >\x{a0} + ** Failers +No match + \x{0b} +No match + +/^>\p{Xsp}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + +/^>\p{Xsp}+?/8 + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + +/^>\p{Xsp}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + +/^>\p{Xsp}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + +/^>\p{Xsp}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09} + +/^>[\p{Xsp}]/8 + >\x{2028}\x{0b} + 0: >\x{2028} + +/^>[\p{Xsp}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028} + +/^>\p{Xps}/8 + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + >\x{a0} + 0: >\x{a0} + ** Failers +No match + \x{0b} +No match + +/^>\p{Xps}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/8 + >\x{1680}\x{2028}\x{0b} + 0: >\x{1680} + +/^>\p{Xps}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09} + +/^>[\p{Xps}]/8 + >\x{2028}\x{0b} + 0: >\x{2028} + +/^>[\p{Xps}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/8 + ABCD + 0: A + 1234 + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + _ABC + 0: _ + ** Failers +No match + [] +No match + +/^\p{Xwd}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}+?/8 + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca} + +/^\p{Xwd}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/8 + A_B12\x{6ca}\x{a6c}\x{10a7} + 0: A_B12\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xwd}{2,9}?/8 + \x{6ca}\x{a6c}\x{10a7}_ + 0: \x{6ca}\x{a6c} + +/^[\p{Xwd}]/8 + ABCD1234_ + 0: A + 1234abcd_ + 0: 1 + \x{6ca} + 0: \x{6ca} + \x{a6c} + 0: \x{a6c} + \x{10a7} + 0: \x{10a7} + _ABC + 0: _ + ** Failers +No match + [] +No match + +/^[\p{Xwd}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/-- A check not in UTF-8 mode --/ + +/^[\p{Xwd}]+/ + ABCD1234_ + 0: ABCD1234_ + +/-- Some negative checks --/ + +/^[\P{Xwd}]+/8 + !.+\x{019}\x{35a}AB + 0: !.+\x{19}\x{35a} + +/^[\p{^Xwd}]+/8 + !.+\x{019}\x{35a}AB + 0: !.+\x{19}\x{35a} + +/[\D]/WBZ8 +------------------------------------------------------------------ + Bra + [\P{Nd}] + Ket + End +------------------------------------------------------------------ + 1\x{3c8}2 + 0: \x{3c8} + +/[\d]/WBZ8 +------------------------------------------------------------------ + Bra + [\p{Nd}] + Ket + End +------------------------------------------------------------------ + >\x{6f4}< + 0: \x{6f4} + +/[\S]/WBZ8 +------------------------------------------------------------------ + Bra + [\P{Xsp}] + Ket + End +------------------------------------------------------------------ + \x{1680}\x{6f4}\x{1680} + 0: \x{6f4} + +/[\s]/WBZ8 +------------------------------------------------------------------ + Bra + [\p{Xsp}] + Ket + End +------------------------------------------------------------------ + >\x{1680}< + 0: \x{1680} + +/[\W]/WBZ8 +------------------------------------------------------------------ + Bra + [\P{Xwd}] + Ket + End +------------------------------------------------------------------ + A\x{1712}B + 0: \x{1712} + +/[\w]/WBZ8 +------------------------------------------------------------------ + Bra + [\p{Xwd}] + Ket + End +------------------------------------------------------------------ + >\x{1723}< + 0: \x{1723} + +/\D/WBZ8 +------------------------------------------------------------------ + Bra + notprop Nd + Ket + End +------------------------------------------------------------------ + 1\x{3c8}2 + 0: \x{3c8} + +/\d/WBZ8 +------------------------------------------------------------------ + Bra + prop Nd + Ket + End +------------------------------------------------------------------ + >\x{6f4}< + 0: \x{6f4} + +/\S/WBZ8 +------------------------------------------------------------------ + Bra + notprop Xsp + Ket + End +------------------------------------------------------------------ + \x{1680}\x{6f4}\x{1680} + 0: \x{6f4} + +/\s/WBZ8 +------------------------------------------------------------------ + Bra + prop Xsp + Ket + End +------------------------------------------------------------------ + >\x{1680}> + 0: \x{1680} + +/\W/WBZ8 +------------------------------------------------------------------ + Bra + notprop Xwd + Ket + End +------------------------------------------------------------------ + A\x{1712}B + 0: \x{1712} + +/\w/WBZ8 +------------------------------------------------------------------ + Bra + prop Xwd + Ket + End +------------------------------------------------------------------ + >\x{1723}< + 0: \x{1723} + +/[[:alpha:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{L}] + Ket + End +------------------------------------------------------------------ + +/[[:lower:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{Ll}] + Ket + End +------------------------------------------------------------------ + +/[[:upper:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{Lu}] + Ket + End +------------------------------------------------------------------ + +/[[:alnum:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{Xan}] + Ket + End +------------------------------------------------------------------ + +/[[:ascii:]]/WBZ +------------------------------------------------------------------ + Bra + [\x00-\x7f] + Ket + End +------------------------------------------------------------------ + +/[[:blank:]]/WBZ +------------------------------------------------------------------ + Bra + [\x09 \xa0] + Ket + End +------------------------------------------------------------------ + +/[[:cntrl:]]/WBZ +------------------------------------------------------------------ + Bra + [\x00-\x1f\x7f] + Ket + End +------------------------------------------------------------------ + +/[[:digit:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{Nd}] + Ket + End +------------------------------------------------------------------ + +/[[:graph:]]/WBZ +------------------------------------------------------------------ + Bra + [!-~] + Ket + End +------------------------------------------------------------------ + +/[[:print:]]/WBZ +------------------------------------------------------------------ + Bra + [ -~] + Ket + End +------------------------------------------------------------------ + +/[[:punct:]]/WBZ +------------------------------------------------------------------ + Bra + [!-/:-@[-`{-~] + Ket + End +------------------------------------------------------------------ + +/[[:space:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{Xps}] + Ket + End +------------------------------------------------------------------ + +/[[:word:]]/WBZ +------------------------------------------------------------------ + Bra + [\p{Xwd}] + Ket + End +------------------------------------------------------------------ + +/[[:xdigit:]]/WBZ +------------------------------------------------------------------ + Bra + [0-9A-Fa-f] + Ket + End +------------------------------------------------------------------ + +/-- Unicode properties for \b abd \B --/ + +/\b...\B/8W + abc_ + 0: abc + \x{37e}abc\x{376} + 0: abc + \x{37e}\x{376}\x{371}\x{393}\x{394} + 0: \x{376}\x{371}\x{393} + !\x{c0}++\x{c1}\x{c2} + 0: ++\x{c1} + !\x{c0}+++++ + 0: \x{c0}++ + +/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ + +/\b...\B/8 + abc_ + 0: abc + ** Failers + 0: Fai + \x{37e}abc\x{376} +No match + \x{37e}\x{376}\x{371}\x{393}\x{394} +No match + !\x{c0}++\x{c1}\x{c2} +No match + !\x{c0}+++++ +No match + +/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ + +/\b...\B/W + abc_ + 0: abc + !\x{c0}++\x{c1}\x{c2} + 0: ++\xc1 + !\x{c0}+++++ + 0: \xc0++ + +/-- POSIX interface --/ + +/\w/P + +++\x{c2} +No match: POSIX code 17: match failed + +/\w/WP + +++\x{c2} + 0: \xc2 + +/-- Some of these are silly, but they check various combinations --/ + +/[[:^alpha:][:^cntrl:]]+/8WBZ +------------------------------------------------------------------ + Bra + [ -~\x80-\xff\P{L}]+ + Ket + End +------------------------------------------------------------------ + 123 + 0: 123 + abc + 0: abc + +/[[:^cntrl:][:^alpha:]]+/8WBZ +------------------------------------------------------------------ + Bra + [ -~\x80-\xff\P{L}]+ + Ket + End +------------------------------------------------------------------ + 123 + 0: 123 + abc + 0: abc + +/[[:alpha:]]+/8WBZ +------------------------------------------------------------------ + Bra + [\p{L}]+ + Ket + End +------------------------------------------------------------------ + abc + 0: abc + +/[[:^alpha:]\S]+/8WBZ +------------------------------------------------------------------ + Bra + [\P{L}\P{Xsp}]+ + Ket + End +------------------------------------------------------------------ + 123 + 0: 123 + abc + 0: abc + +/[^\d]+/8WBZ +------------------------------------------------------------------ + Bra + [^\p{Nd}]+ + Ket + End +------------------------------------------------------------------ + abc123 + 0: abc + abc\x{123} + 0: abc\x{123} + \x{660}abc + 0: abc + +/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI +Capturing subpattern count = 0 +Options: caseless utf8 +No first char +No need char +Subject length lower bound = 17 +Starting byte set: \xd0 \xd1 + \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} + \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} + +/\p{Xps}*/SI +Capturing subpattern count = 0 +No options +No first char +No need char +Subject length lower bound = 0 +No set of starting bytes + +/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ +------------------------------------------------------------------ + Bra + prop Lu ++ + 9 + prop Lu + + B + prop Lu ++ + b + Ket + End +------------------------------------------------------------------ + +/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ +------------------------------------------------------------------ + Bra + notprop Lu + + 9 + notprop Lu ++ + B + notprop Lu + + b + Ket + End +------------------------------------------------------------------ + +/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ +------------------------------------------------------------------ + Bra + notprop Lu + + 9 + notprop Lu ++ + B + notprop Lu + + b + Ket + End +------------------------------------------------------------------ + +/\p{Han}+X\p{Greek}+\x{370}/BZ8 +------------------------------------------------------------------ + Bra + prop Han ++ + X + prop Greek + + \x{370} + Ket + End +------------------------------------------------------------------ + +/\p{Xan}+!\p{Xan}+A/BZ +------------------------------------------------------------------ + Bra + prop Xan ++ + ! + prop Xan + + A + Ket + End +------------------------------------------------------------------ + +/\p{Xsp}+!\p{Xsp}\t/BZ +------------------------------------------------------------------ + Bra + prop Xsp ++ + ! + prop Xsp + \x09 + Ket + End +------------------------------------------------------------------ + +/\p{Xps}+!\p{Xps}\t/BZ +------------------------------------------------------------------ + Bra + prop Xps ++ + ! + prop Xps + \x09 + Ket + End +------------------------------------------------------------------ + +/\p{Xwd}+!\p{Xwd}_/BZ +------------------------------------------------------------------ + Bra + prop Xwd ++ + ! + prop Xwd + _ + Ket + End +------------------------------------------------------------------ + +/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ +------------------------------------------------------------------ + Bra + A++ + prop N + A++ + prop Nd + B+ + prop N *+ + B+ + prop Nd * + Ket + End +------------------------------------------------------------------ + +/-- These behaved oddly in Perl, so they are kept in this test --/ + +/(\x{23a}\x{23a}\x{23a})?\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} +No match + +/(ȺȺȺ)?\1/8i + ȺȺȺⱥⱥ +No match + +/(\x{23a}\x{23a}\x{23a})?\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(ȺȺȺ)?\1/8i + ȺȺȺⱥⱥⱥ + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(\x{23a}\x{23a}\x{23a})\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} +No match + +/(ȺȺȺ)\1/8i + ȺȺȺⱥⱥ +No match + +/(\x{23a}\x{23a}\x{23a})\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(ȺȺȺ)\1/8i + ȺȺȺⱥⱥⱥ + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + 1: \x{23a}\x{23a}\x{23a} + +/(\x{2c65}\x{2c65})\1/8i + \x{2c65}\x{2c65}\x{23a}\x{23a} + 0: \x{2c65}\x{2c65}\x{23a}\x{23a} + 1: \x{2c65}\x{2c65} + +/(ⱥⱥ)\1/8i + ⱥⱥȺȺ + 0: \x{2c65}\x{2c65}\x{23a}\x{23a} + 1: \x{2c65}\x{2c65} + +/(\x{23a}\x{23a}\x{23a})\1Y/8i + X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ + 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y + 1: \x{23a}\x{23a}\x{23a} + +/(\x{2c65}\x{2c65})\1Y/8i + X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ + 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y + 1: \x{2c65}\x{2c65} + +/-- --/ + +/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ + +/^[\p{Batak}]/8 + \x{1bc0} + 0: \x{1bc0} + \x{1bff} + 0: \x{1bff} + ** Failers +No match + \x{1bf4} +No match + +/^[\p{Brahmi}]/8 + \x{11000} + 0: \x{11000} + \x{1106f} + 0: \x{1106f} + ** Failers +No match + \x{1104e} +No match + +/^[\p{Mandaic}]/8 + \x{840} + 0: \x{840} + \x{85e} + 0: \x{85e} + ** Failers +No match + \x{85c} +No match + \x{85d} +No match + +/-- --/ + +/-- End of testinput13 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 220a627..c2622ce 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -12217,20 +12217,4 @@ Latest Mark: <unset> \P\Pabc No match -/f.*/ - \P\Pfor -Partial match: for - -/f.*/s - \P\Pfor -Partial match: for - -/f.*/8 - \P\Pfor -Partial match: for - -/f.*/8s - \P\Pfor -Partial match: for - /-- End of testinput2 --/ diff --git a/testdata/testoutput5 b/testdata/testoutput5 index c5148bf..ab77378 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -2430,4 +2430,20 @@ No match //<bsr_anycrlf><bsr_unicode> Failed: inconsistent NEWLINE options at offset 0 +/f.*/ + \P\Pfor +Partial match: for + +/f.*/s + \P\Pfor +Partial match: for + +/f.*/8 + \P\Pfor +Partial match: for + +/f.*/8s + \P\Pfor +Partial match: for + /-- End of testinput5 --/ |