diff options
Diffstat (limited to 'pcre/testdata/testinput7')
-rw-r--r-- | pcre/testdata/testinput7 | 738 |
1 files changed, 738 insertions, 0 deletions
diff --git a/pcre/testdata/testinput7 b/pcre/testdata/testinput7 new file mode 100644 index 00000000000..24c00d278b4 --- /dev/null +++ b/pcre/testdata/testinput7 @@ -0,0 +1,738 @@ +/-- These tests for Unicode property support test PCRE's API and show some of + the compiled code. They are not Perl-compatible. --/ + +/[\p{L}]/DZ + +/[\p{^L}]/DZ + +/[\P{L}]/DZ + +/[\P{^L}]/DZ + +/[abc\p{L}\x{0660}]/8DZ + +/[\p{Nd}]/8DZ + 1234 + +/[\p{Nd}+-]+/8DZ + 1234 + 12-34 + 12+\x{661}-34 + ** Failers + abcd + +/[\x{105}-\x{109}]/8iDZ + \x{104} + \x{105} + \x{109} + ** Failers + \x{100} + \x{10a} + +/[z-\x{100}]/8iDZ + Z + z + \x{39c} + \x{178} + | + \x{80} + \x{ff} + \x{100} + \x{101} + ** Failers + \x{102} + Y + y + +/[z-\x{100}]/8DZi + +/(?:[\PPa*]*){8,}/ + +/[\P{Any}]/BZ + +/[\P{Any}\E]/BZ + +/(\P{Yi}+\277)/ + +/(\P{Yi}+\277)?/ + +/(?<=\P{Yi}{3}A)X/ + +/\p{Yi}+(\P{Yi}+)(?1)/ + +/(\P{Yi}{2}\277)?/ + +/[\P{Yi}A]/ + +/[\P{Yi}\P{Yi}\P{Yi}A]/ + +/[^\P{Yi}A]/ + +/[^\P{Yi}\P{Yi}\P{Yi}A]/ + +/(\P{Yi}*\277)*/ + +/(\P{Yi}*?\277)*/ + +/(\p{Yi}*+\277)*/ + +/(\P{Yi}?\277)*/ + +/(\P{Yi}??\277)*/ + +/(\p{Yi}?+\277)*/ + +/(\P{Yi}{0,3}\277)*/ + +/(\P{Yi}{0,3}?\277)*/ + +/(\p{Yi}{0,3}+\277)*/ + +/\p{Zl}{2,3}+/8BZ +
+ \x{2028}\x{2028}\x{2028} + +/\p{Zl}/8BZ + +/\p{Lu}{3}+/8BZ + +/\pL{2}+/8BZ + +/\p{Cc}{2}+/8BZ + +/^\p{Cs}/8 + \?\x{dfff} + ** Failers + \x{09f} + +/^\p{Sc}+/8 + $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} + \x{9f2} + ** Failers + X + \x{2c2} + +/^\p{Zs}/8 + \ \ + \x{a0} + \x{1680} + \x{180e} + \x{2000} + \x{2001} + ** Failers + \x{2028} + \x{200d} + +/-- These four are here rather than in test 6 because Perl has problems with + the negative versions of the properties. --/ + +/\p{^Lu}/8i + 1234 + ** Failers + ABC + +/\P{Lu}/8i + 1234 + ** Failers + ABC + +/\p{Ll}/8i + a + Az + ** Failers + ABC + +/\p{Lu}/8i + A + a\x{10a0}B + ** Failers + a + \x{1d00} + +/[\x{c0}\x{391}]/8i + \x{c0} + \x{e0} + +/-- The next two are special cases where the lengths of the different cases of +the same character differ. The first went wrong with heap frame storage; the +second was broken in all cases. --/ + +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + +/\x{c0}+\x{116}+/8i + \x{c0}\x{e0}\x{116}\x{117} + +/[\x{c0}\x{116}]+/8i + \x{c0}\x{e0}\x{116}\x{117} + +/(\x{de})\1/8i + \x{de}\x{de} + \x{de}\x{fe} + \x{fe}\x{fe} + \x{fe}\x{de} + +/^\x{c0}$/8i + \x{c0} + \x{e0} + +/^\x{e0}$/8i + \x{c0} + \x{e0} + +/-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE +will match it only with UCP support, because without that it has no notion +of case for anything other than the ASCII letters. --/ + +/((?i)[\x{c0}])/8 + \x{c0} + \x{e0} + +/(?i:[\x{c0}])/8 + \x{c0} + \x{e0} + +/-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ + +/^\p{Xan}/8 + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + ** Failers + _ABC + +/^\p{Xan}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + ** Failers + _ABC + +/^\p{Xan}+?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xan}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xan}{2,9}/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xan}{2,9}?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^[\p{Xan}]/8 + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + ** Failers + _ABC + +/^[\p{Xan}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + ** Failers + _ABC + +/^>\p{Xsp}/8 + >\x{1680}\x{2028}\x{0b} + >\x{a0} + ** Failers + \x{0b} + +/^>\p{Xsp}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}+?/8 + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xsp}]/8 + >\x{2028}\x{0b} + +/^>[\p{Xsp}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/8 + >\x{1680}\x{2028}\x{0b} + >\x{a0} + ** Failers + \x{0b} + +/^>\p{Xps}+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/8 + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}*/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xps}]/8 + >\x{2028}\x{0b} + +/^>[\p{Xps}]+/8 + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/8 + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + _ABC + ** Failers + [] + +/^\p{Xwd}+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}+?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/8 + A_B12\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xwd}{2,9}?/8 + \x{6ca}\x{a6c}\x{10a7}_ + +/^[\p{Xwd}]/8 + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + _ABC + ** Failers + [] + +/^[\p{Xwd}]+/8 + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/-- A check not in UTF-8 mode --/ + +/^[\p{Xwd}]+/ + ABCD1234_ + +/-- Some negative checks --/ + +/^[\P{Xwd}]+/8 + !.+\x{019}\x{35a}AB + +/^[\p{^Xwd}]+/8 + !.+\x{019}\x{35a}AB + +/[\D]/WBZ8 + 1\x{3c8}2 + +/[\d]/WBZ8 + >\x{6f4}< + +/[\S]/WBZ8 + \x{1680}\x{6f4}\x{1680} + +/[\s]/WBZ8 + >\x{1680}< + +/[\W]/WBZ8 + A\x{1712}B + +/[\w]/WBZ8 + >\x{1723}< + +/\D/WBZ8 + 1\x{3c8}2 + +/\d/WBZ8 + >\x{6f4}< + +/\S/WBZ8 + \x{1680}\x{6f4}\x{1680} + +/\s/WBZ8 + >\x{1680}> + +/\W/WBZ8 + A\x{1712}B + +/\w/WBZ8 + >\x{1723}< + +/[[:alpha:]]/WBZ + +/[[:lower:]]/WBZ + +/[[:upper:]]/WBZ + +/[[:alnum:]]/WBZ + +/[[:ascii:]]/WBZ + +/[[:cntrl:]]/WBZ + +/[[:digit:]]/WBZ + +/[[:graph:]]/WBZ + +/[[:print:]]/WBZ + +/[[:punct:]]/WBZ + +/[[:space:]]/WBZ + +/[[:word:]]/WBZ + +/[[:xdigit:]]/WBZ + +/-- Unicode properties for \b abd \B --/ + +/\b...\B/8W + abc_ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ + +/\b...\B/8 + abc_ + ** Failers + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ + +/\b...\B/W + abc_ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +/-- Some of these are silly, but they check various combinations --/ + +/[[:^alpha:][:^cntrl:]]+/8WBZ + 123 + abc + +/[[:^cntrl:][:^alpha:]]+/8WBZ + 123 + abc + +/[[:alpha:]]+/8WBZ + abc + +/[[:^alpha:]\S]+/8WBZ + 123 + abc + +/[^\d]+/8WBZ + abc123 + abc\x{123} + \x{660}abc + +/\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ + +/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ + +/\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ + +/\p{Han}+X\p{Greek}+\x{370}/BZ8 + +/\p{Xan}+!\p{Xan}+A/BZ + +/\p{Xsp}+!\p{Xsp}\t/BZ + +/\p{Xps}+!\p{Xps}\t/BZ + +/\p{Xwd}+!\p{Xwd}_/BZ + +/A+\p{N}A+\dB+\p{N}*B+\d*/WBZ + +/-- These behaved oddly in Perl, so they are kept in this test --/ + +/(\x{23a}\x{23a}\x{23a})?\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/8i + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})?\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/8i + ȺȺȺⱥⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/8i + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/8i + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/8i + ȺȺȺⱥⱥⱥ + +/(\x{2c65}\x{2c65})\1/8i + \x{2c65}\x{2c65}\x{23a}\x{23a} + +/(ⱥⱥ)\1/8i + ⱥⱥȺȺ + +/(\x{23a}\x{23a}\x{23a})\1Y/8i + X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ + +/(\x{2c65}\x{2c65})\1Y/8i + X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ + +/-- --/ + +/-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ + +/^[\p{Batak}]/8 + \x{1bc0} + \x{1bff} + ** Failers + \x{1bf4} + +/^[\p{Brahmi}]/8 + \x{11000} + \x{1106f} + ** Failers + \x{1104e} + +/^[\p{Mandaic}]/8 + \x{840} + \x{85e} + ** Failers + \x{85c} + \x{85d} + +/-- --/ + +/(\X*)(.)/s8 + A\x{300} + +/^S(\X*)e(\X*)$/8 + Stéréo + +/^\X/8 + ́réo + +/^a\X41z/<JS> + aX41z + *** Failers + aAz + +/(?<=ab\Cde)X/8 + +/\X/ + a\P + a\P\P + +/\Xa/ + aa\P + aa\P\P + +/\X{2}/ + aa\P + aa\P\P + +/\X+a/ + a\P + aa\P + aa\P\P + +/\X+?a/ + a\P + ab\P + aa\P + aa\P\P + aba\P + +/-- These Unicode 6.1.0 scripts are not known to Perl. --/ + +/\p{Chakma}\d/8W + \x{11100}\x{1113c} + +/\p{Takri}\d/8W + \x{11680}\x{116c0} + +/^\X/8 + A\P + A\P\P + A\x{300}\x{301}\P + A\x{300}\x{301}\P\P + A\x{301}\P + A\x{301}\P\P + +/^\X{2,3}/8 + A\P + A\P\P + AA\P + AA\P\P + A\x{300}\x{301}\P + A\x{300}\x{301}\P\P + A\x{300}\x{301}A\x{300}\x{301}\P + A\x{300}\x{301}A\x{300}\x{301}\P\P + +/^\X{2}/8 + AA\P + AA\P\P + A\x{300}\x{301}A\x{300}\x{301}\P + A\x{300}\x{301}A\x{300}\x{301}\P\P + +/^\X+/8 + AA\P + AA\P\P + +/^\X+?Z/8 + AA\P + AA\P\P + +/A\x{3a3}B/8iDZ + +/\x{3a3}B/8iDZ + +/[\x{3a3}]/8iBZ + +/[^\x{3a3}]/8iBZ + +/[\x{3a3}]+/8iBZ + +/[^\x{3a3}]+/8iBZ + +/a*\x{3a3}/8iBZ + +/\x{3a3}+a/8iBZ + +/\x{3a3}*\x{3c2}/8iBZ + +/\x{3a3}{3}/8i+ + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}/8i+ + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}?/8i+ + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}+./8i+ + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}++./8i+ + ** Failers + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}*\x{3c2}/8iBZ + +/[^\x{3a3}]*\x{3c2}/8iBZ + +/[^a]*\x{3c2}/8iBZ + +/ist/8iBZ + ikt + +/is+t/8i + iSs\x{17f}t + ikt + +/is+?t/8i + ikt + +/is?t/8i + ikt + +/is{2}t/8i + iskt + +/-- This property is a PCRE special --/ + +/^\p{Xuc}/8 + $abc + @abc + `abc + \x{1234}abc + ** Failers + abc + +/^\p{Xuc}+/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}+?/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}+?\*/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}++/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}{3,5}/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\p{Xuc}{3,5}?/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^[\p{Xuc}]/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^[\p{Xuc}]+/8 + $@`\x{a0}\x{1234}\x{e000}** + ** Failers + \x{9f} + +/^\P{Xuc}/8 + abc + ** Failers + $abc + @abc + `abc + \x{1234}abc + +/^[\P{Xuc}]/8 + abc + ** Failers + $abc + @abc + `abc + \x{1234}abc + +/-- End of testinput7 --/ |