diff options
Diffstat (limited to 'testdata')
-rw-r--r-- | testdata/testinput10 | 29 | ||||
-rw-r--r-- | testdata/testinput12 | 67 | ||||
-rw-r--r-- | testdata/testinput14 | 50 | ||||
-rw-r--r-- | testdata/testoutput10 | 64 | ||||
-rw-r--r-- | testdata/testoutput12-16 | 132 | ||||
-rw-r--r-- | testdata/testoutput12-32 | 132 | ||||
-rw-r--r-- | testdata/testoutput14-16 | 70 | ||||
-rw-r--r-- | testdata/testoutput14-32 | 70 | ||||
-rw-r--r-- | testdata/testoutput14-8 | 70 |
9 files changed, 669 insertions, 15 deletions
diff --git a/testdata/testinput10 b/testdata/testinput10 index 3813709..be6d426 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -570,8 +570,10 @@ /[\xff\x{ffff}]/I,utf /[\xff\x{ff}]/I,utf + abc\x{ff}def /[\xff\x{ff}]/I + abc\x{ff}def /[Ss]/I @@ -585,4 +587,31 @@ abc\x80\=startchar abc\x80\=startchar,offset=3 +#subject no_jit + +/\x{c1}+\x{e1}/iIB,ucp + \x{c1}\x{c1}\x{c1} + \x{e1}\x{e1}\x{e1} + +/a|\x{c1}/iI,ucp + \x{e1}xxx + +/a|\x{c1}/iI,utf + \x{e1}xxx + +/\x{c1}|\x{e1}/iI,ucp + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + +/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended + X\x{c1}Y + +# Without UTF or UCP characters > 127 have only one case in the default locale. + +/X(\x{e1})Y/replace=>\U$1<,substitute_extended + X\x{e1}Y + +#subject + # End of testinput10 diff --git a/testdata/testinput12 b/testdata/testinput12 index bed00a5..32e97b5 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -463,4 +463,71 @@ /(?:\x{ff}|\x{3000})/I,utf +# ---------------------------------------------------- +# UCP and casing tests + +/\x{120}/i,I + +/\x{c1}/i,I,ucp + +/[\x{120}\x{121}]/iB,ucp + +/[ab\x{120}]+/iB,ucp + aABb\x{121}\x{120} + +#subject no_jit + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + +/[^\x{120}]/i,no_start_optimize + \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} + +/[^\x{120}]/i + \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} + +/\x{120}{2}/i,ucp + \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} + +/\x{c1}+\x{e1}/iB,ucp + \x{c1}\x{c1}\x{c1} + +/\x{c1}+\x{e1}/iIB,ucp + \x{c1}\x{c1}\x{c1} + \x{e1}\x{e1}\x{e1} + +/a|\x{c1}/iI,ucp + \x{e1}xxx + +/\x{c1}|\x{e1}/iI,ucp + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + +/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended + X\x{121}Y + +#subject + +# ---------------------------------------------------- + # End of testinput12 diff --git a/testdata/testinput14 b/testdata/testinput14 index f97f3ec..8a17ae7 100644 --- a/testdata/testinput14 +++ b/testdata/testinput14 @@ -1,9 +1,12 @@ -# These test special (mostly error) UTF features of DFA matching. They are a -# selection of the more comprehensive tests that are run for non-DFA matching. -# The output is different for the different widths. +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. #subject dfa +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + /X/utf XX\x{d800} XX\x{d800}\=offset=3 @@ -33,5 +36,46 @@ XX\xef\x80\=ph \xf7\=ph \xf7\x80\=ph + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} + +/\x{c1}+\x{e1}/iB,ucp + \x{c1}\x{c1}\x{c1} + \x{e1}\x{e1}\x{e1} + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + +/[^\x{120}]/i,no_start_optimize + \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} + +/[^\x{120}]/i + \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} + +/\x{120}{2}/i,ucp + \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} + +# ---------------------------------------------------- # End of testinput14 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 775c2ab..9fe5ef6 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1780,11 +1780,15 @@ Capture group count = 0 Options: utf Starting code units: \xc3 Subject length lower bound = 1 + abc\x{ff}def + 0: \x{ff} /[\xff\x{ff}]/I Capture group count = 0 -Starting code units: \xff +First code unit = \xff Subject length lower bound = 1 + abc\x{ff}def + 0: \xff /[Ss]/I Capture group count = 0 @@ -1813,4 +1817,62 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 abc\x80\=startchar,offset=3 Error -36 (bad UTF-8 offset) +#subject no_jit + +/\x{c1}+\x{e1}/iIB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Last code unit = \xe1 (caseless) +Subject length lower bound = 2 + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + +/a|\x{c1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +Starting code units: A a \xc1 \xe1 +Subject length lower bound = 1 + \x{e1}xxx + 0: \xe1 + +/a|\x{c1}/iI,utf +Capture group count = 0 +Options: caseless utf +Starting code units: A a \xc3 +Subject length lower bound = 1 + \x{e1}xxx + 0: \x{e1} + +/\x{c1}|\x{e1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xc1< + +/X(\x{e1})Y/i,ucp,replace=>\L$1<,substitute_extended + X\x{c1}Y + 1: >\xe1< + +# Without UTF or UCP characters > 127 have only one case in the default locale. + +/X(\x{e1})Y/replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xe1< + +#subject + # End of testinput10 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 3006bc1..b944311 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1613,7 +1613,7 @@ Subject length lower bound = 1 /[Ss]/I Capture group count = 0 -Starting code units: S s +First code unit = 'S' (caseless) Subject length lower bound = 1 /[Ss]/I,utf @@ -1628,4 +1628,134 @@ Options: utf Starting code units: \xff Subject length lower bound = 1 +# ---------------------------------------------------- +# UCP and casing tests + +/\x{120}/i,I +Capture group count = 0 +Options: caseless +First code unit = \x{120} +Subject length lower bound = 1 + +/\x{c1}/i,I,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/[\x{120}\x{121}]/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{120} + Ket + End +------------------------------------------------------------------ + +/[ab\x{120}]+/iB,ucp +------------------------------------------------------------------ + Bra + [ABab\x{120}-\x{121}]++ + Ket + End +------------------------------------------------------------------ + aABb\x{121}\x{120} + 0: aABb\x{121}\x{120} + +#subject no_jit + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + +/\x{c1}+\x{e1}/iIB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Last code unit = \xe1 (caseless) +Subject length lower bound = 2 + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + +/a|\x{c1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +Starting code units: A a \xc1 \xe1 +Subject length lower bound = 1 + \x{e1}xxx + 0: \xe1 + +/\x{c1}|\x{e1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xc1< + +/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended + X\x{121}Y + 1: >\x{120}< + +#subject + +# ---------------------------------------------------- + # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index ad240e2..74ccac8 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1611,7 +1611,7 @@ Subject length lower bound = 1 /[Ss]/I Capture group count = 0 -Starting code units: S s +First code unit = 'S' (caseless) Subject length lower bound = 1 /[Ss]/I,utf @@ -1626,4 +1626,134 @@ Options: utf Starting code units: \xff Subject length lower bound = 1 +# ---------------------------------------------------- +# UCP and casing tests + +/\x{120}/i,I +Capture group count = 0 +Options: caseless +First code unit = \x{120} +Subject length lower bound = 1 + +/\x{c1}/i,I,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/[\x{120}\x{121}]/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{120} + Ket + End +------------------------------------------------------------------ + +/[ab\x{120}]+/iB,ucp +------------------------------------------------------------------ + Bra + [ABab\x{120}-\x{121}]++ + Ket + End +------------------------------------------------------------------ + aABb\x{121}\x{120} + 0: aABb\x{121}\x{120} + +#subject no_jit + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + +/\x{c1}+\x{e1}/iIB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Last code unit = \xe1 (caseless) +Subject length lower bound = 2 + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + +/a|\x{c1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +Starting code units: A a \xc1 \xe1 +Subject length lower bound = 1 + \x{e1}xxx + 0: \xe1 + +/\x{c1}|\x{e1}/iI,ucp +Capture group count = 0 +Options: caseless ucp +First code unit = \xc1 (caseless) +Subject length lower bound = 1 + +/X(\x{e1})Y/ucp,replace=>\U$1<,substitute_extended + X\x{e1}Y + 1: >\xc1< + +/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended + X\x{121}Y + 1: >\x{120}< + +#subject + +# ---------------------------------------------------- + # End of testinput12 diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 index 05b7d48..2d58f1c 100644 --- a/testdata/testoutput14-16 +++ b/testdata/testoutput14-16 @@ -1,9 +1,12 @@ -# These test special (mostly error) UTF features of DFA matching. They are a -# selection of the more comprehensive tests that are run for non-DFA matching. -# The output is different for the different widths. +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. #subject dfa +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + /X/utf XX\x{d800} Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2 @@ -57,5 +60,66 @@ No match No match \xf7\x80\=ph No match + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + 1: \xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + 1: \xe1\xe1 + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +# ---------------------------------------------------- # End of testinput14 diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32 index 30d7fa6..f1f65b7 100644 --- a/testdata/testoutput14-32 +++ b/testdata/testoutput14-32 @@ -1,9 +1,12 @@ -# These test special (mostly error) UTF features of DFA matching. They are a -# selection of the more comprehensive tests that are run for non-DFA matching. -# The output is different for the different widths. +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. #subject dfa +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + /X/utf XX\x{d800} Failed: error -27: UTF-32 error: code points 0xd800-0xdfff are not defined at offset 2 @@ -57,5 +60,66 @@ No match No match \xf7\x80\=ph No match + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + 1: \xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + 1: \xe1\xe1 + +/\x{120}\x{c1}/i,ucp,no_start_optimize + \x{121}\x{e1} + 0: \x{121}\xe1 + +/\x{120}\x{c1}/i,ucp + \x{121}\x{e1} + 0: \x{121}\xe1 + +/[^\x{120}]/i,no_start_optimize + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +\= Expect no match + \x{121} +No match + +/[^\x{120}]/i + \x{121} + 0: \x{121} + +/[^\x{120}]/i,ucp +\= Expect no match + \x{121} +No match + +/\x{120}{2}/i,ucp + \x{121}\x{121} + 0: \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +\= Expect no match + \x{121}\x{121} +No match + +# ---------------------------------------------------- # End of testinput14 diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8 index 1fb0dc1..aa62414 100644 --- a/testdata/testoutput14-8 +++ b/testdata/testoutput14-8 @@ -1,9 +1,12 @@ -# These test special (mostly error) UTF features of DFA matching. They are a -# selection of the more comprehensive tests that are run for non-DFA matching. -# The output is different for the different widths. +# These test special UTF and UCP features of DFA matching. The output is +# different for the different widths. #subject dfa +# ---------------------------------------------------- +# These are a selection of the more comprehensive tests that are run for +# non-DFA matching. + /X/utf XX\x{d800} Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 @@ -57,5 +60,66 @@ Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 \xf7\x80\=ph Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 + +# ---------------------------------------------------- +# UCP and casing tests - except for the first two, these will all fail in 8-bit +# mode because they are testing UCP without UTF and use characters > 255. + +/\x{c1}/i,no_start_optimize +\= Expect no match + \x{e1} +No match + +/\x{c1}+\x{e1}/iB,ucp +------------------------------------------------------------------ + Bra + /i \x{c1}+ + /i \x{e1} + Ket + End +------------------------------------------------------------------ + \x{c1}\x{c1}\x{c1} + 0: \xc1\xc1\xc1 + 1: \xc1\xc1 + \x{e1}\x{e1}\x{e1} + 0: \xe1\xe1\xe1 + 1: \xe1\xe1 + +/\x{120}\x{c1}/i,ucp,no_start_optimize +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + \x{121}\x{e1} + +/\x{120}\x{c1}/i,ucp +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + \x{121}\x{e1} + +/[^\x{120}]/i,no_start_optimize +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large + \x{121} + +/[^\x{120}]/i,ucp,no_start_optimize +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{121} + +/[^\x{120}]/i +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large + \x{121} + +/[^\x{120}]/i,ucp +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{121} + +/\x{120}{2}/i,ucp +Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large + \x{121}\x{121} + +/[^\x{120}]{2}/i,ucp +Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large +\= Expect no match + \x{121}\x{121} + +# ---------------------------------------------------- # End of testinput14 |