diff options
Diffstat (limited to 't/re')
-rw-r--r-- | t/re/pat.t | 4389 | ||||
-rw-r--r-- | t/re/pat_thr.t | 7 | ||||
-rw-r--r-- | t/re/qr.t | 13 | ||||
-rw-r--r-- | t/re/qr_gc.t | 35 | ||||
-rw-r--r-- | t/re/qrstack.t | 11 | ||||
-rw-r--r-- | t/re/re.t | 46 | ||||
-rw-r--r-- | t/re/re_tests | 1395 | ||||
-rw-r--r-- | t/re/reg_60508.t | 40 | ||||
-rw-r--r-- | t/re/reg_email.t | 103 | ||||
-rw-r--r-- | t/re/reg_email_thr.t | 7 | ||||
-rw-r--r-- | t/re/reg_fold.t | 42 | ||||
-rw-r--r-- | t/re/reg_mesg.t | 194 | ||||
-rw-r--r-- | t/re/reg_namedcapture.t | 26 | ||||
-rw-r--r-- | t/re/reg_nc_tie.t | 53 | ||||
-rw-r--r-- | t/re/reg_pmod.t | 49 | ||||
-rw-r--r-- | t/re/reg_posixcc.t | 160 | ||||
-rw-r--r-- | t/re/reg_unsafe.t | 19 | ||||
-rw-r--r-- | t/re/regexp.t | 207 | ||||
-rw-r--r-- | t/re/regexp_noamp.t | 10 | ||||
-rw-r--r-- | t/re/regexp_notrie.t | 15 | ||||
-rw-r--r-- | t/re/regexp_qr.t | 10 | ||||
-rw-r--r-- | t/re/regexp_qr_embed.t | 11 | ||||
-rw-r--r-- | t/re/regexp_qr_embed_thr.t | 11 | ||||
-rw-r--r-- | t/re/regexp_trielist.t | 15 | ||||
-rw-r--r-- | t/re/regexp_unicode_prop.t | 303 | ||||
-rw-r--r-- | t/re/regexp_unicode_prop_thr.t | 7 | ||||
-rw-r--r-- | t/re/rxcode.t | 86 | ||||
-rw-r--r-- | t/re/subst.t | 600 | ||||
-rw-r--r-- | t/re/substT.t | 9 | ||||
-rw-r--r-- | t/re/subst_amp.t | 104 | ||||
-rw-r--r-- | t/re/subst_wamp.t | 11 | ||||
-rw-r--r-- | t/re/substr.t | 685 | ||||
-rw-r--r-- | t/re/substr_thr.t | 7 |
33 files changed, 8680 insertions, 0 deletions
diff --git a/t/re/pat.t b/t/re/pat.t new file mode 100644 index 0000000000..c610a6a9b5 --- /dev/null +++ b/t/re/pat.t @@ -0,0 +1,4389 @@ +#!./perl +# +# This is a home for regular expression tests that don't fit into +# the format supported by op/regexp.t. If you want to add a test +# that does fit that format, add it to op/re_tests, not here. + +use strict; +use warnings; +use 5.010; + + +sub run_tests; + +$| = 1; + +my $EXPECTED_TESTS = 4066; # Update this when adding/deleting tests. + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} +our $TODO; +our $Message = "Noname test"; +our $Error; +our $DiePattern; +our $WarnPattern; +our $BugId; +our $PatchId; +our $running_as_thread; + +my $ordA = ord ('A'); # This defines ASCII/UTF-8 vs EBCDIC/UTF-EBCDIC +# This defined the platform. +my $IS_ASCII = $ordA == 65; +my $IS_EBCDIC = $ordA == 193; + +use vars '%Config'; +eval 'use Config'; # Defaults assumed if this fails + +my $test = 0; + +print "1..$EXPECTED_TESTS\n"; + +run_tests unless caller (); + +END { +} + +sub pretty { + my ($mess) = @_; + $mess =~ s/\n/\\n/g; + $mess =~ s/\r/\\r/g; + $mess =~ s/\t/\\t/g; + $mess =~ s/([\00-\37\177])/sprintf '\%03o', ord $1/eg; + $mess =~ s/#/\\#/g; + $mess; +} + +sub safe_globals { + defined($_) and s/#/\\#/g for $BugId, $PatchId, $TODO; +} + +sub _ok { + my ($ok, $mess, $error) = @_; + safe_globals(); + $mess = pretty ($mess // $Message); + $mess .= "; Bug $BugId" if defined $BugId; + $mess .= "; Patch $PatchId" if defined $PatchId; + $mess .= " # TODO $TODO" if defined $TODO; + + my $line_nr = (caller(1)) [2]; + + printf "%sok %d - %s\n", + ($ok ? "" : "not "), + ++ $test, + "$mess\tLine $line_nr"; + + unless ($ok) { + print "# Failed test at line $line_nr\n" unless defined $TODO; + if ($error //= $Error) { + no warnings 'utf8'; + chomp $error; + $error = join "\n#", map {pretty $_} split /\n\h*#/ => $error; + $error = "# $error" unless $error =~ /^\h*#/; + print $error, "\n"; + } + } + + return $ok; +} + +# Force scalar context on the pattern match +sub ok ($;$$) {_ok $_ [0], $_ [1], $_ [2]} +sub nok ($;$$) {_ok !$_ [0], "Failed: " . ($_ [1] // $Message), $_ [2]} + + +sub skip { + my $why = shift; + safe_globals(); + $why =~ s/\n.*//s; + $why .= "; Bug $BugId" if defined $BugId; + # seems like the new harness code doesnt like todo and skip to be mixed. + # which seems like a bug in the harness to me. -- dmq + #$why .= " # TODO $TODO" if defined $TODO; + + my $n = shift // 1; + my $line_nr = (caller(0)) [2]; + for (1 .. $n) { + ++ $test; + #print "not " if defined $TODO; + print "ok $test # skip $why\tLine $line_nr\n"; + } + no warnings "exiting"; + last SKIP; +} + +sub iseq ($$;$) { + my ($got, $expect, $name) = @_; + + $_ = defined ($_) ? "'$_'" : "undef" for $got, $expect; + + my $ok = $got eq $expect; + my $error = "# expected: $expect\n" . + "# result: $got"; + + _ok $ok, $name, $error; +} + +sub isneq ($$;$) { + my ($got, $expect, $name) = @_; + my $todo = $TODO ? " # TODO $TODO" : ''; + + $_ = defined ($_) ? "'$_'" : "undef" for $got, $expect; + + my $ok = $got ne $expect; + my $error = "# results are equal ($got)"; + + _ok $ok, $name, $error; +} + + +sub eval_ok ($;$) { + my ($code, $name) = @_; + local $@; + if (ref $code) { + _ok eval {&$code} && !$@, $name; + } + else { + _ok eval ($code) && !$@, $name; + } +} + +sub must_die { + my ($code, $pattern, $name) = @_; + $pattern //= $DiePattern; + undef $@; + ref $code ? &$code : eval $code; + my $r = $@ && $@ =~ /$pattern/; + _ok $r, $name // $Message // "\$\@ =~ /$pattern/"; +} + +sub must_warn { + my ($code, $pattern, $name) = @_; + $pattern //= $WarnPattern; + my $w; + local $SIG {__WARN__} = sub {$w .= join "" => @_}; + use warnings 'all'; + ref $code ? &$code : eval $code; + my $r = $w && $w =~ /$pattern/; + $w //= "UNDEF"; + _ok $r, $name // $Message // "Got warning /$pattern/", + "# expected: /$pattern/\n" . + "# result: $w"; +} + +sub may_not_warn { + my ($code, $name) = @_; + my $w; + local $SIG {__WARN__} = sub {$w .= join "" => @_}; + use warnings 'all'; + ref $code ? &$code : eval $code; + _ok !$w, $name // ($Message ? "$Message (did not warn)" + : "Did not warn"), + "Got warning '$w'"; +} + + +# +# Tests start here. +# +sub run_tests { + + { + + my $x = "abc\ndef\n"; + + ok $x =~ /^abc/, qq ["$x" =~ /^abc/]; + ok $x !~ /^def/, qq ["$x" !~ /^def/]; + + # used to be a test for $* + ok $x =~ /^def/m, qq ["$x" =~ /^def/m]; + + nok $x =~ /^xxx/, qq ["$x" =~ /^xxx/]; + nok $x !~ /^abc/, qq ["$x" !~ /^abc/]; + + ok $x =~ /def/, qq ["$x" =~ /def/]; + nok $x !~ /def/, qq ["$x" !~ /def/]; + + ok $x !~ /.def/, qq ["$x" !~ /.def/]; + nok $x =~ /.def/, qq ["$x" =~ /.def/]; + + ok $x =~ /\ndef/, qq ["$x" =~ /\ndef/]; + nok $x !~ /\ndef/, qq ["$x" !~ /\ndef/]; + } + + { + $_ = '123'; + ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; + } + + { + $_ = 'aaabbbccc'; + ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', + qq [\$_ = '$_'; /(a*b*)(c*)/]; + ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; + nok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; + + $_ = 'aaabccc'; + ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; + ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; + + $_ = 'aaaccc'; + ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; + nok /a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]; + + $_ = 'abcdef'; + ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; + ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; + ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; + ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; + } + + { + # used to be a test for $* + ok "ab\ncd\n" =~ /^cd/m, qq ["ab\ncd\n" =~ /^cd/m]; + } + + { + our %XXX = map {($_ => $_)} 123, 234, 345; + + our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); + while ($_ = shift(@XXX)) { + my $f = index ($_, 'not') >= 0 ? \&nok : \&ok; + my $r = ?(.*)?; + &$f ($r, "?(.*)?"); + /not/ && reset; + if (/not ok 2/) { + if ($^O eq 'VMS') { + $_ = shift(@XXX); + } + else { + reset 'X'; + } + } + } + + SKIP: { + if ($^O eq 'VMS') { + skip "Reset 'X'", 1; + } + ok !keys %XXX, "%XXX is empty"; + } + + } + + { + local $Message = "Test empty pattern"; + my $xyz = 'xyz'; + my $cde = 'cde'; + + $cde =~ /[^ab]*/; + $xyz =~ //; + iseq $&, $xyz; + + my $foo = '[^ab]*'; + $cde =~ /$foo/; + $xyz =~ //; + iseq $&, $xyz; + + $cde =~ /$foo/; + my $null; + no warnings 'uninitialized'; + $xyz =~ /$null/; + iseq $&, $xyz; + + $null = ""; + $xyz =~ /$null/; + iseq $&, $xyz; + } + + { + local $Message = q !Check $`, $&, $'!; + $_ = 'abcdefghi'; + /def/; # optimized up to cmd + iseq "$`:$&:$'", 'abc:def:ghi'; + + no warnings 'void'; + /cde/ + 0; # optimized only to spat + iseq "$`:$&:$'", 'ab:cde:fghi'; + + /[d][e][f]/; # not optimized + iseq "$`:$&:$'", 'abc:def:ghi'; + } + + { + $_ = 'now is the {time for all} good men to come to.'; + / {([^}]*)}/; + iseq $1, 'time for all', "Match braces"; + } + + { + local $Message = "{N,M} quantifier"; + $_ = 'xxx {3,4} yyy zzz'; + ok /( {3,4})/; + iseq $1, ' '; + ok !/( {4,})/; + ok /( {2,3}.)/; + iseq $1, ' y'; + ok /(y{2,3}.)/; + iseq $1, 'yyy '; + ok !/x {3,4}/; + ok !/^xxx {3,4}/; + } + + { + local $Message = "Test /g"; + local $" = ":"; + $_ = "now is the time for all good men to come to."; + my @words = /(\w+)/g; + my $exp = "now:is:the:time:for:all:good:men:to:come:to"; + + iseq "@words", $exp; + + @words = (); + while (/\w+/g) { + push (@words, $&); + } + iseq "@words", $exp; + + @words = (); + pos = 0; + while (/to/g) { + push(@words, $&); + } + iseq "@words", "to:to"; + + pos $_ = 0; + @words = /to/g; + iseq "@words", "to:to"; + } + + { + $_ = "abcdefghi"; + + my $pat1 = 'def'; + my $pat2 = '^def'; + my $pat3 = '.def.'; + my $pat4 = 'abc'; + my $pat5 = '^abc'; + my $pat6 = 'abc$'; + my $pat7 = 'ghi'; + my $pat8 = '\w*ghi'; + my $pat9 = 'ghi$'; + + my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = + my $t6 = my $t7 = my $t8 = my $t9 = 0; + + for my $iter (1 .. 5) { + $t1++ if /$pat1/o; + $t2++ if /$pat2/o; + $t3++ if /$pat3/o; + $t4++ if /$pat4/o; + $t5++ if /$pat5/o; + $t6++ if /$pat6/o; + $t7++ if /$pat7/o; + $t8++ if /$pat8/o; + $t9++ if /$pat9/o; + } + my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; + iseq $x, '505550555', "Test /o"; + } + + + SKIP: { + my $xyz = 'xyz'; + ok "abc" =~ /^abc$|$xyz/, "| after \$"; + + # perl 4.009 says "unmatched ()" + local $Message = '$ inside ()'; + + my $result; + eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; + iseq $@, "" or skip "eval failed", 1; + iseq $result, "abc:bc"; + } + + + { + local $Message = "Scalar /g"; + $_ = "abcfooabcbar"; + + ok /abc/g && $` eq ""; + ok /abc/g && $` eq "abcfoo"; + ok !/abc/g; + + local $Message = "Scalar /gi"; + pos = 0; + ok /ABC/gi && $` eq ""; + ok /ABC/gi && $` eq "abcfoo"; + ok !/ABC/gi; + + local $Message = "Scalar /g"; + pos = 0; + ok /abc/g && $' eq "fooabcbar"; + ok /abc/g && $' eq "bar"; + + $_ .= ''; + my @x = /abc/g; + iseq @x, 2, "/g reset after assignment"; + } + + { + local $Message = '/g, \G and pos'; + $_ = "abdc"; + pos $_ = 2; + /\Gc/gc; + iseq pos $_, 2; + /\Gc/g; + ok !defined pos $_; + } + + { + local $Message = '(?{ })'; + our $out = 1; + 'abc' =~ m'a(?{ $out = 2 })b'; + iseq $out, 2; + + $out = 1; + 'abc' =~ m'a(?{ $out = 3 })c'; + iseq $out, 1; + } + + + { + $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; + my @out = /(?<!foo)bar./g; + iseq "@out", 'bar2 barf', "Negative lookbehind"; + } + + { + local $Message = "REG_INFTY tests"; + # Tests which depend on REG_INFTY + $::reg_infty = $Config {reg_infty} // 32767; + $::reg_infty_m = $::reg_infty - 1; + $::reg_infty_p = $::reg_infty + 1; + $::reg_infty_m = $::reg_infty_m; # Surpress warning. + + # As well as failing if the pattern matches do unexpected things, the + # next three tests will fail if you should have picked up a lower-than- + # default value for $reg_infty from Config.pm, but have not. + + eval_ok q (('aaa' =~ /(a{1,$::reg_infty_m})/)[0] eq 'aaa'); + eval_ok q (('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/); + eval_ok q (('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/); + eval "'aaa' =~ /a{1,$::reg_infty}/"; + ok $@ =~ /^\QQuantifier in {,} bigger than/; + eval "'aaa' =~ /a{1,$::reg_infty_p}/"; + ok $@ =~ /^\QQuantifier in {,} bigger than/; + } + + { + # Poke a couple more parse failures + my $context = 'x' x 256; + eval qq("${context}y" =~ /(?<=$context)y/); + ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; + } + + { + # Long Monsters + local $Message = "Long monster"; + for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory + my $a = 'a' x $l; + local $Error = "length = $l"; + ok "ba$a=" =~ /a$a=/; + nok "b$a=" =~ /a$a=/; + ok "b$a=" =~ /ba+=/; + + ok "ba$a=" =~ /b(?:a|b)+=/; + } + } + + + { + # 20000 nodes, each taking 3 words per string, and 1 per branch + my $long_constant_len = join '|', 12120 .. 32645; + my $long_var_len = join '|', 8120 .. 28645; + my %ans = ( 'ax13876y25677lbc' => 1, + 'ax13876y25677mcb' => 0, # not b. + 'ax13876y35677nbc' => 0, # Num too big + 'ax13876y25677y21378obc' => 1, + 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] + 'ax13876y25677y21378y21378kbc' => 1, + 'ax13876y25677y21378y21378kcb' => 0, # Not b. + 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs + ); + + local $Message = "20000 nodes"; + for (keys %ans) { + local $Error = "const-len '$_'"; + ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o); + + local $Error = "var-len '$_'"; + ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o); + } + } + + { + local $Message = "Complicated backtracking"; + $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; + my $expect = "(bla()) ((l)u((e))) (l(e)e)"; + + use vars '$c'; + sub matchit { + m/ + ( + \( + (?{ $c = 1 }) # Initialize + (?: + (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop + (?! + ) # Fail: will unwind one iteration back + ) + (?: + [^()]+ # Match a big chunk + (?= + [()] + ) # Do not try to match subchunks + | + \( + (?{ ++$c }) + | + \) + (?{ --$c }) + ) + )+ # This may not match with different subblocks + ) + (?(?{ $c != 0 }) + (?! + ) # Fail + ) # Otherwise the chunk 1 may succeed with $c>0 + /xg; + } + + my @ans = (); + my $res; + push @ans, $res while $res = matchit; + iseq "@ans", "1 1 1"; + + @ans = matchit; + iseq "@ans", $expect; + + local $Message = "Recursion with (??{ })"; + our $matched; + $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; + + @ans = my @ans1 = (); + push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; + + iseq "@ans", "1 1 1"; + iseq "@ans1", $expect; + + @ans = m/$matched/g; + iseq "@ans", $expect; + + } + + { + ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; + } + + { + my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad + iseq "@ans", 'a/ b', "Stack may be bad"; + } + + { + local $Message = "Eval-group not allowed at runtime"; + my $code = '{$blah = 45}'; + our $blah = 12; + eval { /(?$code)/ }; + ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12; + + for $code ('{$blah = 45}','=xx') { + $blah = 12; + my $res = eval { "xx" =~ /(?$code)/o }; + no warnings 'uninitialized'; + local $Error = "'$@', '$res', '$blah'"; + if ($code eq '=xx') { + ok !$@ && $res; + } + else { + ok $@ && $@ =~ /not allowed at runtime/ && $blah == 12; + } + } + + $code = '{$blah = 45}'; + $blah = 12; + eval "/(?$code)/"; + iseq $blah, 45; + + $blah = 12; + /(?{$blah = 45})/; + iseq $blah, 45; + } + + { + local $Message = "Pos checks"; + my $x = 'banana'; + $x =~ /.a/g; + iseq pos ($x), 2; + + $x =~ /.z/gc; + iseq pos ($x), 2; + + sub f { + my $p = $_[0]; + return $p; + } + + $x =~ /.a/g; + iseq f (pos ($x)), 4; + } + + { + local $Message = 'Checking $^R'; + our $x = $^R = 67; + 'foot' =~ /foo(?{$x = 12; 75})[t]/; + iseq $^R, 75; + + $x = $^R = 67; + 'foot' =~ /foo(?{$x = 12; 75})[xy]/; + ok $^R eq '67' && $x eq '12'; + + $x = $^R = 67; + 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; + ok $^R eq '79' && $x eq '12'; + } + + { + iseq qr/\b\v$/i, '(?i-xsm:\b\v$)', 'qr/\b\v$/i'; + iseq qr/\b\v$/s, '(?s-xim:\b\v$)', 'qr/\b\v$/s'; + iseq qr/\b\v$/m, '(?m-xis:\b\v$)', 'qr/\b\v$/m'; + iseq qr/\b\v$/x, '(?x-ism:\b\v$)', 'qr/\b\v$/x'; + iseq qr/\b\v$/xism, '(?msix:\b\v$)', 'qr/\b\v$/xism'; + iseq qr/\b\v$/, '(?-xism:\b\v$)', 'qr/\b\v$/'; + } + + + { + local $Message = "Look around"; + $_ = 'xabcx'; + SKIP: + foreach my $ans ('', 'c') { + ok /(?<=(?=a)..)((?=c)|.)/g or skip "Match failed", 1; + iseq $1, $ans; + } + } + + { + local $Message = "Empty clause"; + $_ = 'a'; + foreach my $ans ('', 'a', '') { + ok /^|a|$/g or skip "Match failed", 1; + iseq $&, $ans; + } + } + + { + local $Message = "Prefixify"; + sub prefixify { + SKIP: { + my ($v, $a, $b, $res) = @_; + ok $v =~ s/\Q$a\E/$b/ or skip "Match failed", 1; + iseq $v, $res; + } + } + + prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); + prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); + } + + { + $_ = 'var="foo"'; + /(\")/; + ok $1 && /$1/, "Capture a quote"; + } + + { + local $Message = "Call code from qr //"; + $a = qr/(?{++$b})/; + $b = 7; + ok /$a$a/ && $b eq '9'; + + $c="$a"; + ok /$a$a/ && $b eq '11'; + + undef $@; + eval {/$c/}; + ok $@ && $@ =~ /not allowed at runtime/; + + use re "eval"; + /$a$c$a/; + iseq $b, '14'; + + our $lex_a = 43; + our $lex_b = 17; + our $lex_c = 27; + my $lex_res = ($lex_b =~ qr/$lex_b(?{ $lex_c = $lex_a++ })/); + + iseq $lex_res, 1; + iseq $lex_a, 44; + iseq $lex_c, 43; + + no re "eval"; + undef $@; + my $match = eval { /$a$c$a/ }; + ok $@ && $@ =~ /Eval-group not allowed/ && !$match; + iseq $b, '14'; + + $lex_a = 2; + $lex_a = 43; + $lex_b = 17; + $lex_c = 27; + $lex_res = ($lex_b =~ qr/17(?{ $lex_c = $lex_a++ })/); + + iseq $lex_res, 1; + iseq $lex_a, 44; + iseq $lex_c, 43; + + } + + + { + no warnings 'closure'; + local $Message = '(?{ $var } refers to package vars'; + package aa; + our $c = 2; + $::c = 3; + '' =~ /(?{ $c = 4 })/; + main::iseq $c, 4; + main::iseq $::c, 3; + } + + + { + must_die 'q(a:[b]:) =~ /[x[:foo:]]/', + 'POSIX class \[:[^:]+:\] unknown in regex', + 'POSIX class [: :] must have valid name'; + + for my $d (qw [= .]) { + must_die "/[[${d}foo${d}]]/", + "\QPOSIX syntax [$d $d] is reserved for future extensions", + "POSIX syntax [[$d $d]] is an error"; + } + } + + + { + # test if failure of patterns returns empty list + local $Message = "Failed pattern returns empty list"; + $_ = 'aaa'; + @_ = /bbb/; + iseq "@_", ""; + + @_ = /bbb/g; + iseq "@_", ""; + + @_ = /(bbb)/; + iseq "@_", ""; + + @_ = /(bbb)/g; + iseq "@_", ""; + } + + + { + local $Message = '@- and @+ tests'; + + /a(?=.$)/; + iseq $#+, 0; + iseq $#-, 0; + iseq $+ [0], 2; + iseq $- [0], 1; + ok !defined $+ [1] && !defined $- [1] && + !defined $+ [2] && !defined $- [2]; + + /a(a)(a)/; + iseq $#+, 2; + iseq $#-, 2; + iseq $+ [0], 3; + iseq $- [0], 0; + iseq $+ [1], 2; + iseq $- [1], 1; + iseq $+ [2], 3; + iseq $- [2], 2; + ok !defined $+ [3] && !defined $- [3] && + !defined $+ [4] && !defined $- [4]; + + + /.(a)(b)?(a)/; + iseq $#+, 3; + iseq $#-, 3; + iseq $+ [1], 2; + iseq $- [1], 1; + iseq $+ [3], 3; + iseq $- [3], 2; + ok !defined $+ [2] && !defined $- [2] && + !defined $+ [4] && !defined $- [4]; + + + /.(a)/; + iseq $#+, 1; + iseq $#-, 1; + iseq $+ [0], 2; + iseq $- [0], 0; + iseq $+ [1], 2; + iseq $- [1], 1; + ok !defined $+ [2] && !defined $- [2] && + !defined $+ [3] && !defined $- [3]; + + /.(a)(ba*)?/; + iseq $#+, 2; + iseq $#-, 1; + } + + + { + local $DiePattern = '^Modification of a read-only value attempted'; + local $Message = 'Elements of @- and @+ are read-only'; + must_die '$+[0] = 13'; + must_die '$-[0] = 13'; + must_die '@+ = (7, 6, 5)'; + must_die '@- = qw (foo bar)'; + } + + + { + local $Message = '\G testing'; + $_ = 'aaa'; + pos = 1; + my @a = /\Ga/g; + iseq "@a", "a a"; + + my $str = 'abcde'; + pos $str = 2; + ok $str !~ /^\G/; + ok $str !~ /^.\G/; + ok $str =~ /^..\G/; + ok $str !~ /^...\G/; + ok $str =~ /\G../ && $& eq 'cd'; + + local $TODO = $running_as_thread; + ok $str =~ /.\G./ && $& eq 'bc'; + } + + + { + local $Message = 'pos inside (?{ })'; + my $str = 'abcde'; + our ($foo, $bar); + ok $str =~ /b(?{$foo = $_; $bar = pos})c/; + iseq $foo, $str; + iseq $bar, 2; + ok !defined pos ($str); + + undef $foo; + undef $bar; + pos $str = undef; + ok $str =~ /b(?{$foo = $_; $bar = pos})c/g; + iseq $foo, $str; + iseq $bar, 2; + iseq pos ($str), 3; + + $_ = $str; + undef $foo; + undef $bar; + ok /b(?{$foo = $_; $bar = pos})c/; + iseq $foo, $str; + iseq $bar, 2; + + undef $foo; + undef $bar; + ok /b(?{$foo = $_; $bar = pos})c/g; + iseq $foo, $str; + iseq $bar, 2; + iseq pos, 3; + + undef $foo; + undef $bar; + pos = undef; + 1 while /b(?{$foo = $_; $bar = pos})c/g; + iseq $foo, $str; + iseq $bar, 2; + ok !defined pos; + + undef $foo; + undef $bar; + $_ = 'abcde|abcde'; + ok s/b(?{$foo = $_; $bar = pos})c/x/g; + iseq $foo, 'abcde|abcde'; + iseq $bar, 8; + iseq $_, 'axde|axde'; + + # List context: + $_ = 'abcde|abcde'; + our @res; + () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; + @res = map {defined $_ ? "'$_'" : 'undef'} @res; + iseq "@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'"; + + @res = (); + () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; + @res = map {defined $_ ? "'$_'" : 'undef'} @res; + iseq "@res", "'' 'ab' 'cde|abcde' " . + "'' 'abc' 'de|abcde' " . + "'abcd' 'e|' 'abcde' " . + "'abcde|' 'ab' 'cde' " . + "'abcde|' 'abc' 'de'" ; + } + + + { + local $Message = '\G anchor checks'; + my $foo = 'aabbccddeeffgg'; + pos ($foo) = 1; + { + local $TODO = $running_as_thread; + no warnings 'uninitialized'; + ok $foo =~ /.\G(..)/g; + iseq $1, 'ab'; + + pos ($foo) += 1; + ok $foo =~ /.\G(..)/g; + iseq $1, 'cc'; + + pos ($foo) += 1; + ok $foo =~ /.\G(..)/g; + iseq $1, 'de'; + + ok $foo =~ /\Gef/g; + } + + undef pos $foo; + ok $foo =~ /\G(..)/g; + iseq $1, 'aa'; + + ok $foo =~ /\G(..)/g; + iseq $1, 'bb'; + + pos ($foo) = 5; + ok $foo =~ /\G(..)/g; + iseq $1, 'cd'; + } + + + { + $_ = '123x123'; + my @res = /(\d*|x)/g; + local $" = '|'; + iseq "@res", "123||x|123|", "0 match in alternation"; + } + + + { + local $Message = "Match against temporaries (created via pp_helem())" . + " is safe"; + ok {foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g; + iseq $1, "bar"; + } + + + { + local $Message = 'package $i inside (?{ }), ' . + 'saved substrings and changing $_'; + our @a = qw [foo bar]; + our @b = (); + s/(\w)(?{push @b, $1})/,$1,/g for @a; + iseq "@b", "f o o b a r"; + iseq "@a", ",f,,o,,o, ,b,,a,,r,"; + + local $Message = 'lexical $i inside (?{ }), ' . + 'saved substrings and changing $_'; + no warnings 'closure'; + my @c = qw [foo bar]; + my @d = (); + s/(\w)(?{push @d, $1})/,$1,/g for @c; + iseq "@d", "f o o b a r"; + iseq "@c", ",f,,o,,o, ,b,,a,,r,"; + } + + + { + local $Message = 'Brackets'; + our $brackets; + $brackets = qr { + { (?> [^{}]+ | (??{ $brackets }) )* } + }x; + + ok "{{}" =~ $brackets; + iseq $&, "{}"; + ok "something { long { and } hairy" =~ $brackets; + iseq $&, "{ and }"; + ok "something { long { and } hairy" =~ m/((??{ $brackets }))/; + iseq $&, "{ and }"; + } + + + { + $_ = "a-a\nxbb"; + pos = 1; + nok m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'; + } + + + { + local $Message = '\G anchor checks'; + my $text = "aaXbXcc"; + pos ($text) = 0; + ok $text !~ /\GXb*X/g; + } + + + { + $_ = "xA\n" x 500; + nok /^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'; + + my $text = "abc dbf"; + my @res = ($text =~ /.*?(b).*?\b/g); + iseq "@res", "b b", '\b is not special'; + } + + + { + local $Message = '\S, [\S], \s, [\s]'; + my @a = map chr, 0 .. 255; + my @b = grep /\S/, @a; + my @c = grep /[^\s]/, @a; + iseq "@b", "@c"; + + @b = grep /\S/, @a; + @c = grep /[\S]/, @a; + iseq "@b", "@c"; + + @b = grep /\s/, @a; + @c = grep /[^\S]/, @a; + iseq "@b", "@c"; + + @b = grep /\s/, @a; + @c = grep /[\s]/, @a; + iseq "@b", "@c"; + } + { + local $Message = '\D, [\D], \d, [\d]'; + my @a = map chr, 0 .. 255; + my @b = grep /\D/, @a; + my @c = grep /[^\d]/, @a; + iseq "@b", "@c"; + + @b = grep /\D/, @a; + @c = grep /[\D]/, @a; + iseq "@b", "@c"; + + @b = grep /\d/, @a; + @c = grep /[^\D]/, @a; + iseq "@b", "@c"; + + @b = grep /\d/, @a; + @c = grep /[\d]/, @a; + iseq "@b", "@c"; + } + { + local $Message = '\W, [\W], \w, [\w]'; + my @a = map chr, 0 .. 255; + my @b = grep /\W/, @a; + my @c = grep /[^\w]/, @a; + iseq "@b", "@c"; + + @b = grep /\W/, @a; + @c = grep /[\W]/, @a; + iseq "@b", "@c"; + + @b = grep /\w/, @a; + @c = grep /[^\W]/, @a; + iseq "@b", "@c"; + + @b = grep /\w/, @a; + @c = grep /[\w]/, @a; + iseq "@b", "@c"; + } + + + { + # see if backtracking optimization works correctly + local $Message = 'Backtrack optimization'; + ok "\n\n" =~ /\n $ \n/x; + ok "\n\n" =~ /\n* $ \n/x; + ok "\n\n" =~ /\n+ $ \n/x; + ok "\n\n" =~ /\n? $ \n/x; + ok "\n\n" =~ /\n*? $ \n/x; + ok "\n\n" =~ /\n+? $ \n/x; + ok "\n\n" =~ /\n?? $ \n/x; + ok "\n\n" !~ /\n*+ $ \n/x; + ok "\n\n" !~ /\n++ $ \n/x; + ok "\n\n" =~ /\n?+ $ \n/x; + } + + + { + package S; + use overload '""' => sub {'Object S'}; + sub new {bless []} + + local $Message = "Ref stringification"; + ::ok do { \my $v} =~ /^SCALAR/, "Scalar ref stringification"; + ::ok do {\\my $v} =~ /^REF/, "Ref ref stringification"; + ::ok [] =~ /^ARRAY/, "Array ref stringification"; + ::ok {} =~ /^HASH/, "Hash ref stringification"; + ::ok 'S' -> new =~ /^Object S/, "Object stringification"; + } + + + { + local $Message = "Test result of match used as match"; + ok 'a1b' =~ ('xyz' =~ /y/); + iseq $`, 'a'; + ok 'a1b' =~ ('xyz' =~ /t/); + iseq $`, 'a'; + } + + + { + local $Message = '"1" is not \s'; + may_not_warn sub {ok ("1\n" x 102) !~ /^\s*\n/m}; + } + + + { + local $Message = '\s, [[:space:]] and [[:blank:]]'; + my %space = (spc => " ", + tab => "\t", + cr => "\r", + lf => "\n", + ff => "\f", + # There's no \v but the vertical tabulator seems miraculously + # be 11 both in ASCII and EBCDIC. + vt => chr(11), + false => "space"); + + my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; + my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; + my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; + + iseq "@space0", "cr ff lf spc tab"; + iseq "@space1", "cr ff lf spc tab vt"; + iseq "@space2", "spc tab"; + } + + + { + local $BugId = '20000731.001'; + ok "A \x{263a} B z C" =~ /A . B (??{ "z" }) C/, + "Match UTF-8 char in presense of (??{ })"; + } + + + { + local $BugId = '20001021.005'; + no warnings 'uninitialized'; + ok undef =~ /^([^\/]*)(.*)$/, "Used to cause a SEGV"; + } + + + SKIP: + { + local $Message = '\C matches octet'; + $_ = "a\x{100}b"; + ok /(.)(\C)(\C)(.)/ or skip q [\C doesn't match], 4; + iseq $1, "a"; + if ($IS_ASCII) { # ASCII (or equivalent), should be UTF-8 + iseq $2, "\xC4"; + iseq $3, "\x80"; + } + elsif ($IS_EBCDIC) { # EBCDIC (or equivalent), should be UTF-EBCDIC + iseq $2, "\x8C"; + iseq $3, "\x41"; + } + else { + SKIP: { + ok 0, "Unexpected platform", "ord ('A') = $ordA"; + skip "Unexpected platform"; + } + } + iseq $4, "b"; + } + + + SKIP: + { + local $Message = '\C matches octet'; + $_ = "\x{100}"; + ok /(\C)/g or skip q [\C doesn't match], 2; + if ($IS_ASCII) { + iseq $1, "\xC4"; + } + elsif ($IS_EBCDIC) { + iseq $1, "\x8C"; + } + else { + ok 0, "Unexpected platform", "ord ('A') = $ordA"; + } + ok /(\C)/g or skip q [\C doesn't match]; + if ($IS_ASCII) { + iseq $1, "\x80"; + } + elsif ($IS_EBCDIC) { + iseq $1, "\x41"; + } + else { + ok 0, "Unexpected platform", "ord ('A') = $ordA"; + } + } + + + { + # Japhy -- added 03/03/2001 + () = (my $str = "abc") =~ /(...)/; + $str = "def"; + iseq $1, "abc", 'Changing subject does not modify $1'; + } + + + SKIP: + { + # The trick is that in EBCDIC the explicit numeric range should + # match (as also in non-EBCDIC) but the explicit alphabetic range + # should not match. + ok "\x8e" =~ /[\x89-\x91]/, '"\x8e" =~ /[\x89-\x91]/'; + ok "\xce" =~ /[\xc9-\xd1]/, '"\xce" =~ /[\xc9-\xd1]/'; + + skip "Not an EBCDIC platform", 2 unless ord ('i') == 0x89 && + ord ('J') == 0xd1; + + # In most places these tests would succeed since \x8e does not + # in most character sets match 'i' or 'j' nor would \xce match + # 'I' or 'J', but strictly speaking these tests are here for + # the good of EBCDIC, so let's test these only there. + nok "\x8e" !~ /[i-j]/, '"\x8e" !~ /[i-j]/'; + nok "\xce" !~ /[I-J]/, '"\xce" !~ /[I-J]/'; + } + + + { + ok "\x{ab}" =~ /\x{ab}/, '"\x{ab}" =~ /\x{ab}/ '; + ok "\x{abcd}" =~ /\x{abcd}/, '"\x{abcd}" =~ /\x{abcd}/'; + } + + + { + local $Message = 'bug id 20001008.001'; + + my @x = ("stra\337e 138", "stra\337e 138"); + for (@x) { + ok s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; + ok my ($latin) = /^(.+)(?:\s+\d)/; + iseq $latin, "stra\337e"; + ok $latin =~ s/stra\337e/straße/; + # + # Previous code follows, but outcommented - there were no tests. + # + # $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a + # use utf8; # needed for the raw UTF-8 + # $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a + } + } + + + { + local $Message = 'Test \x escapes'; + ok "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + ok "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + ok "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + ok "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + ok "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + ok "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + ok "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + ok "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + } + + + { + local $BugId = '20001028.003'; + + # Fist half of the bug. + local $Message = 'HEBREW ACCENT QADMA matched by .*'; + my $X = chr (1448); + ok my ($Y) = $X =~ /(.*)/; + iseq $Y, v1448; + iseq length ($Y), 1; + + # Second half of the bug. + $Message = 'HEBREW ACCENT QADMA in replacement'; + $X = ''; + $X =~ s/^/chr(1488)/e; + iseq length $X, 1; + iseq ord ($X), 1488; + } + + + { + local $BugId = '20001108.001'; + local $Message = 'Repeated s///'; + my $X = "Szab\x{f3},Bal\x{e1}zs"; + my $Y = $X; + $Y =~ s/(B)/$1/ for 0 .. 3; + iseq $Y, $X; + iseq $X, "Szab\x{f3},Bal\x{e1}zs"; + } + + + { + local $BugId = '20000517.001'; + local $Message = 's/// on UTF-8 string'; + my $x = "\x{100}A"; + $x =~ s/A/B/; + iseq $x, "\x{100}B"; + iseq length $x, 2; + } + + + { + local $BugId = '20001230.002'; + local $Message = '\C and É'; + ok "École" =~ /^\C\C(.)/ && $1 eq 'c'; + ok "École" =~ /^\C\C(c)/; + } + + + SKIP: + { + local $Message = 'Match code points > 255'; + $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; + ok /(.\x{300})./ or skip "No match", 4; + ok $` eq "abc\x{100}" && length ($`) == 4; + ok $& eq "\x{200}\x{300}\x{380}" && length ($&) == 3; + ok $' eq "\x{400}defg" && length ($') == 5; + ok $1 eq "\x{200}\x{300}" && length ($1) == 2; + } + + + { + # The original bug report had 'no utf8' here but that was irrelevant. + local $BugId = '20010306.008'; + local $Message = "Don't dump core"; + my $a = "a\x{1234}"; + ok $a =~ m/\w/; # used to core dump. + } + + + { + local $BugId = '20010410.006'; + local $Message = '/g in scalar context'; + for my $rx ('/(.*?)\{(.*?)\}/csg', + '/(.*?)\{(.*?)\}/cg', + '/(.*?)\{(.*?)\}/sg', + '/(.*?)\{(.*?)\}/g', + '/(.+?)\{(.+?)\}/csg',) { + my $i = 0; + my $input = "a{b}c{d}"; + eval <<" --"; + while (eval \$input =~ $rx) { + \$i ++; + } + -- + iseq $i, 2; + } + } + + + { + my $x = "\x{10FFFD}"; + $x =~ s/(.)/$1/g; + ok ord($x) == 0x10FFFD && length($x) == 1, "From Robin Houston"; + } + + + { + my %d = ( + "7f" => [0, 0, 0], + "80" => [1, 1, 0], + "ff" => [1, 1, 0], + "100" => [0, 1, 1], + ); + SKIP: + while (my ($code, $match) = each %d) { + local $Message = "Properties of \\x$code"; + my $char = eval qq ["\\x{$code}"]; + my $i = 0; + ok (($char =~ /[\x80-\xff]/) xor !$$match [$i ++]); + ok (($char =~ /[\x80-\x{100}]/) xor !$$match [$i ++]); + ok (($char =~ /[\x{100}]/) xor !$$match [$i ++]); + } + } + + + { + # From Japhy + local $Message; + must_warn 'qr/(?c)/', '^Useless \(\?c\)'; + must_warn 'qr/(?-c)/', '^Useless \(\?-c\)'; + must_warn 'qr/(?g)/', '^Useless \(\?g\)'; + must_warn 'qr/(?-g)/', '^Useless \(\?-g\)'; + must_warn 'qr/(?o)/', '^Useless \(\?o\)'; + must_warn 'qr/(?-o)/', '^Useless \(\?-o\)'; + + # Now test multi-error regexes + must_warn 'qr/(?g-o)/', '^Useless \(\?g\).*\nUseless \(\?-o\)'; + must_warn 'qr/(?g-c)/', '^Useless \(\?g\).*\nUseless \(\?-c\)'; + # (?c) means (?g) error won't be thrown + must_warn 'qr/(?o-cg)/', '^Useless \(\?o\).*\nUseless \(\?-c\)'; + must_warn 'qr/(?ogc)/', '^Useless \(\?o\).*\nUseless \(\?g\).*\n' . + 'Useless \(\?c\)'; + } + + + { + local $Message = "/x tests"; + $_ = "foo"; + eval_ok <<" --"; + /f + o\r + o + \$ + /x + -- + eval_ok <<" --"; + /f + o + o + \$\r + /x + -- + } + + + { + local $Message = "/o feature"; + sub test_o {$_ [0] =~ /$_[1]/o; return $1} + iseq test_o ('abc', '(.)..'), 'a'; + iseq test_o ('abc', '..(.)'), 'a'; + } + + + { + local $BugId = "20010619.003"; + # Amazingly vertical tabulator is the same in ASCII and EBCDIC. + for ("\n", "\t", "\014", "\r") { + ok !/[[:print:]]/, "'$_' not in [[:print:]]"; + } + for (" ") { + ok /[[:print:]]/, "'$_' in [[:print:]]"; + } + } + + + { + # Test basic $^N usage outside of a regex + local $Message = '$^N usage outside of a regex'; + my $x = "abcdef"; + ok ($x =~ /cde/ and !defined $^N); + ok ($x =~ /(cde)/ and $^N eq "cde"); + ok ($x =~ /(c)(d)(e)/ and $^N eq "e"); + ok ($x =~ /(c(d)e)/ and $^N eq "cde"); + ok ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde"); + ok ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde"); + ok ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc"); + ok ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde"); + ok ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde"); + ok ($x =~ /(?:c(d)e)/ and $^N eq "d"); + ok ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d"); + ok ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f"); + ok ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f"); + ok ($x =~ /(([ace])|([bd]))*/ and $^N eq "e"); + {ok ($x =~ /(([ace])|([bdf]))*/ and $^N eq "f");} + ## Test to see if $^N is automatically localized -- it should now + ## have the value set in the previous test. + iseq $^N, "e", '$^N is automatically localized'; + + # Now test inside (?{ ... }) + local $Message = '$^N usage inside (?{ ... })'; + our ($y, $z); + ok ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b"); + ok ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"); + ok ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"); + ok ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" + and $z eq "abcd"); + ok ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" + and $z eq "abcde"); + + } + + + SKIP: + { + ## Should probably put in tests for all the POSIX stuff, + ## but not sure how to guarantee a specific locale...... + + skip "Not an ASCII platform", 2 unless $IS_ASCII; + local $Message = 'Test [[:cntrl:]]'; + my $AllBytes = join "" => map {chr} 0 .. 255; + (my $x = $AllBytes) =~ s/[[:cntrl:]]//g; + iseq $x, join "", map {chr} 0x20 .. 0x7E, 0x80 .. 0xFF; + + ($x = $AllBytes) =~ s/[^[:cntrl:]]//g; + iseq $x, join "", map {chr} 0x00 .. 0x1F, 0x7F; + } + + + { + # With /s modifier UTF8 chars were interpreted as bytes + local $Message = "UTF-8 chars aren't bytes"; + my $a = "Hello \x{263A} World"; + my @a = ($a =~ /./gs); + iseq $#a, 12; + } + + + { + local $Message = '. matches \n with /s'; + my $str1 = "foo\nbar"; + my $str2 = "foo\n\x{100}bar"; + my ($a, $b) = map {chr} $IS_ASCII ? (0xc4, 0x80) : (0x8c, 0x41); + my @a; + @a = $str1 =~ /./g; iseq @a, 6; iseq "@a", "f o o b a r"; + @a = $str1 =~ /./gs; iseq @a, 7; iseq "@a", "f o o \n b a r"; + @a = $str1 =~ /\C/g; iseq @a, 7; iseq "@a", "f o o \n b a r"; + @a = $str1 =~ /\C/gs; iseq @a, 7; iseq "@a", "f o o \n b a r"; + @a = $str2 =~ /./g; iseq @a, 7; iseq "@a", "f o o \x{100} b a r"; + @a = $str2 =~ /./gs; iseq @a, 8; iseq "@a", "f o o \n \x{100} b a r"; + @a = $str2 =~ /\C/g; iseq @a, 9; iseq "@a", "f o o \n $a $b b a r"; + @a = $str2 =~ /\C/gs; iseq @a, 9; iseq "@a", "f o o \n $a $b b a r"; + } + + + { + # [ID 20010814.004] pos() doesn't work when using =~m// in list context + local $BugId = '20010814.004'; + $_ = "ababacadaea"; + my $a = join ":", /b./gc; + my $b = join ":", /a./gc; + my $c = pos; + iseq "$a $b $c", 'ba:ba ad:ae 10', "pos() works with () = m//"; + } + + + { + # [ID 20010407.006] matching utf8 return values from + # functions does not work + local $BugId = '20010407.006'; + local $Message = 'UTF-8 return values from functions'; + package ID_20010407_006; + sub x {"a\x{1234}"} + my $x = x; + my $y; + ::ok $x =~ /(..)/; + $y = $1; + ::ok length ($y) == 2 && $y eq $x; + ::ok x =~ /(..)/; + $y = $1; + ::ok length ($y) == 2 && $y eq $x; + } + + + { + no warnings 'digit'; + # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. + my $x; + $x = "\x4e" . "E"; + ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); + + # and now again in [] ranges + + $x = "\x4e" . "E"; + ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); + + $x = "\x4e" . "i"; + ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); + + $x = "\x4" . "j"; + ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); + + $x = "\x0" . "k"; + ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); + + $x = "\x0" . "x"; + ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); + + $x = "\x0" . "xa"; + ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); + + $x = "\x9" . "_b"; + ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); + + # Check that \x{##} works. 5.6.1 fails quite a few of these. + + $x = "\x9b"; + ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); + + $x = "\x9b"; + ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_b}y]{2}$/, + "\\x{9_b} is to be treated as \\x9b (again)"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); + + $x = "\x0" . "y"; + ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); + + $x = "\x9b" . "y"; + ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); + + } + + + { + # High bit bug -- japhy + my $x = "ab\200d"; + ok $x =~ /.*?\200/, "High bit fine"; + } + + + { + # The basic character classes and Unicode + ok "\x{0100}" =~ /\w/, 'LATIN CAPITAL LETTER A WITH MACRON in /\w/'; + ok "\x{0660}" =~ /\d/, 'ARABIC-INDIC DIGIT ZERO in /\d/'; + ok "\x{1680}" =~ /\s/, 'OGHAM SPACE MARK in /\s/'; + } + + + { + local $Message = "Folding matches and Unicode"; + ok "a\x{100}" =~ /A/i; + ok "A\x{100}" =~ /a/i; + ok "a\x{100}" =~ /a/i; + ok "A\x{100}" =~ /A/i; + ok "\x{101}a" =~ /\x{100}/i; + ok "\x{100}a" =~ /\x{100}/i; + ok "\x{101}a" =~ /\x{101}/i; + ok "\x{100}a" =~ /\x{101}/i; + ok "a\x{100}" =~ /A\x{100}/i; + ok "A\x{100}" =~ /a\x{100}/i; + ok "a\x{100}" =~ /a\x{100}/i; + ok "A\x{100}" =~ /A\x{100}/i; + ok "a\x{100}" =~ /[A]/i; + ok "A\x{100}" =~ /[a]/i; + ok "a\x{100}" =~ /[a]/i; + ok "A\x{100}" =~ /[A]/i; + ok "\x{101}a" =~ /[\x{100}]/i; + ok "\x{100}a" =~ /[\x{100}]/i; + ok "\x{101}a" =~ /[\x{101}]/i; + ok "\x{100}a" =~ /[\x{101}]/i; + } + + + { + use charnames ':full'; + local $Message = "Folding 'LATIN LETTER A WITH GRAVE'"; + + my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; + my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; + + ok $lower =~ m/$UPPER/i; + ok $UPPER =~ m/$lower/i; + ok $lower =~ m/[$UPPER]/i; + ok $UPPER =~ m/[$lower]/i; + + local $Message = "Folding 'GREEK LETTER ALPHA WITH VRACHY'"; + + $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; + $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; + + ok $lower =~ m/$UPPER/i; + ok $UPPER =~ m/$lower/i; + ok $lower =~ m/[$UPPER]/i; + ok $UPPER =~ m/[$lower]/i; + + local $Message = "Folding 'LATIN LETTER Y WITH DIAERESIS'"; + + $lower = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; + $UPPER = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; + + ok $lower =~ m/$UPPER/i; + ok $UPPER =~ m/$lower/i; + ok $lower =~ m/[$UPPER]/i; + ok $UPPER =~ m/[$lower]/i; + } + + + { + use charnames ':full'; + local $PatchId = "13843"; + local $Message = "GREEK CAPITAL LETTER SIGMA vs " . + "COMBINING GREEK PERISPOMENI"; + + my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; + my $char = "\N{COMBINING GREEK PERISPOMENI}"; + + may_not_warn sub {ok "_:$char:_" !~ m/_:$SIGMA:_/i}; + } + + + { + local $Message = '\X'; + use charnames ':full'; + + ok "a!" =~ /^(\X)!/ && $1 eq "a"; + ok "\xDF!" =~ /^(\X)!/ && $1 eq "\xDF"; + ok "\x{100}!" =~ /^(\X)!/ && $1 eq "\x{100}"; + ok "\x{100}\x{300}!" =~ /^(\X)!/ && $1 eq "\x{100}\x{300}"; + ok "\N{LATIN CAPITAL LETTER E}!" =~ /^(\X)!/ && + $1 eq "\N{LATIN CAPITAL LETTER E}"; + ok "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}!" + =~ /^(\X)!/ && + $1 eq "\N{LATIN CAPITAL LETTER E}\N{COMBINING GRAVE ACCENT}"; + + local $Message = '\C and \X'; + ok "!abc!" =~ /a\Cc/; + ok "!abc!" =~ /a\Xc/; + } + + + { + local $Message = "Final Sigma"; + + my $SIGMA = "\x{03A3}"; # CAPITAL + my $Sigma = "\x{03C2}"; # SMALL FINAL + my $sigma = "\x{03C3}"; # SMALL + + ok $SIGMA =~ /$SIGMA/i; + ok $SIGMA =~ /$Sigma/i; + ok $SIGMA =~ /$sigma/i; + + ok $Sigma =~ /$SIGMA/i; + ok $Sigma =~ /$Sigma/i; + ok $Sigma =~ /$sigma/i; + + ok $sigma =~ /$SIGMA/i; + ok $sigma =~ /$Sigma/i; + ok $sigma =~ /$sigma/i; + + ok $SIGMA =~ /[$SIGMA]/i; + ok $SIGMA =~ /[$Sigma]/i; + ok $SIGMA =~ /[$sigma]/i; + + ok $Sigma =~ /[$SIGMA]/i; + ok $Sigma =~ /[$Sigma]/i; + ok $Sigma =~ /[$sigma]/i; + + ok $sigma =~ /[$SIGMA]/i; + ok $sigma =~ /[$Sigma]/i; + ok $sigma =~ /[$sigma]/i; + + local $Message = "More final Sigma"; + + my $S3 = "$SIGMA$Sigma$sigma"; + + ok ":$S3:" =~ /:(($SIGMA)+):/i && $1 eq $S3 && $2 eq $sigma; + ok ":$S3:" =~ /:(($Sigma)+):/i && $1 eq $S3 && $2 eq $sigma; + ok ":$S3:" =~ /:(($sigma)+):/i && $1 eq $S3 && $2 eq $sigma; + + ok ":$S3:" =~ /:(([$SIGMA])+):/i && $1 eq $S3 && $2 eq $sigma; + ok ":$S3:" =~ /:(([$Sigma])+):/i && $1 eq $S3 && $2 eq $sigma; + ok ":$S3:" =~ /:(([$sigma])+):/i && $1 eq $S3 && $2 eq $sigma; + } + + + { + use charnames ':full'; + local $Message = "Parlez-Vous " . + "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais?"; + + ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran.ais/ && + $& eq "Francais"; + ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran.ais/ && + $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; + ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Cais/ && + $& eq "Francais"; + # COMBINING CEDILLA is two bytes when encoded + ok "Franc\N{COMBINING CEDILLA}ais" =~ /Franc\C\Cais/; + ok "Fran\N{LATIN SMALL LETTER C}ais" =~ /Fran\Xais/ && + $& eq "Francais"; + ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ /Fran\Xais/ && + $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; + ok "Franc\N{COMBINING CEDILLA}ais" =~ /Fran\Xais/ && + $& eq "Franc\N{COMBINING CEDILLA}ais"; + ok "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais" =~ + /Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais/ && + $& eq "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"; + ok "Franc\N{COMBINING CEDILLA}ais" =~ /Franc\N{COMBINING CEDILLA}ais/ && + $& eq "Franc\N{COMBINING CEDILLA}ais"; + + my @f = ( + ["Fran\N{LATIN SMALL LETTER C}ais", "Francais"], + ["Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais", + "Fran\N{LATIN SMALL LETTER C WITH CEDILLA}ais"], + ["Franc\N{COMBINING CEDILLA}ais", "Franc\N{COMBINING CEDILLA}ais"], + ); + foreach my $entry (@f) { + my ($subject, $match) = @$entry; + ok $subject =~ /Fran(?:c\N{COMBINING CEDILLA}?| + \N{LATIN SMALL LETTER C WITH CEDILLA})ais/x && + $& eq $match; + } + } + + + { + local $Message = "Lingering (and useless) UTF8 flag doesn't mess up /i"; + my $pat = "ABcde"; + my $str = "abcDE\x{100}"; + chop $str; + ok $str =~ /$pat/i; + + $pat = "ABcde\x{100}"; + $str = "abcDE"; + chop $pat; + ok $str =~ /$pat/i; + + $pat = "ABcde\x{100}"; + $str = "abcDE\x{100}"; + chop $pat; + chop $str; + ok $str =~ /$pat/i; + } + + + { + use charnames ':full'; + local $Message = "LATIN SMALL LETTER SHARP S " . + "(\N{LATIN SMALL LETTER SHARP S})"; + + ok "\N{LATIN SMALL LETTER SHARP S}" =~ + /\N{LATIN SMALL LETTER SHARP S}/; + ok "\N{LATIN SMALL LETTER SHARP S}" =~ + /\N{LATIN SMALL LETTER SHARP S}/i; + ok "\N{LATIN SMALL LETTER SHARP S}" =~ + /[\N{LATIN SMALL LETTER SHARP S}]/; + ok "\N{LATIN SMALL LETTER SHARP S}" =~ + /[\N{LATIN SMALL LETTER SHARP S}]/i; + + ok "ss" =~ /\N{LATIN SMALL LETTER SHARP S}/i; + ok "SS" =~ /\N{LATIN SMALL LETTER SHARP S}/i; + ok "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i; + ok "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}]/i; + + ok "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i; + ok "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i; + + local $Message = "Unoptimized named sequence in class"; + ok "ss" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i; + ok "SS" =~ /[\N{LATIN SMALL LETTER SHARP S}x]/i; + ok "\N{LATIN SMALL LETTER SHARP S}" =~ + /[\N{LATIN SMALL LETTER SHARP S}x]/; + ok "\N{LATIN SMALL LETTER SHARP S}" =~ + /[\N{LATIN SMALL LETTER SHARP S}x]/i; + } + + + { + # More whitespace: U+0085, U+2028, U+2029\n"; + + # U+0085, U+00A0 need to be forced to be Unicode, the \x{100} does that. + SKIP: { + skip "EBCDIC platform", 4 if $IS_EBCDIC; + # Do \x{0015} and \x{0041} match \s in EBCDIC? + ok "<\x{100}\x{0085}>" =~ /<\x{100}\s>/, '\x{0085} in \s'; + ok "<\x{0085}>" =~ /<\v>/, '\x{0085} in \v'; + ok "<\x{100}\x{00A0}>" =~ /<\x{100}\s>/, '\x{00A0} in \s'; + ok "<\x{00A0}>" =~ /<\h>/, '\x{00A0} in \h'; + } + my @h = map {sprintf "%05x" => $_} 0x01680, 0x0180E, 0x02000 .. 0x0200A, + 0x0202F, 0x0205F, 0x03000; + my @v = map {sprintf "%05x" => $_} 0x02028, 0x02029; + + my @H = map {sprintf "%05x" => $_} 0x01361, 0x0200B, 0x02408, 0x02420, + 0x0303F, 0xE0020; + my @V = map {sprintf "%05x" => $_} 0x0008A .. 0x0008D, 0x00348, 0x10100, + 0xE005F, 0xE007C; + + for my $hex (@h) { + my $str = eval qq ["<\\x{$hex}>"]; + ok $str =~ /<\s>/, "\\x{$hex} in \\s"; + ok $str =~ /<\h>/, "\\x{$hex} in \\h"; + ok $str !~ /<\v>/, "\\x{$hex} not in \\v"; + } + + for my $hex (@v) { + my $str = eval qq ["<\\x{$hex}>"]; + ok $str =~ /<\s>/, "\\x{$hex} in \\s"; + ok $str =~ /<\v>/, "\\x{$hex} in \\v"; + ok $str !~ /<\h>/, "\\x{$hex} not in \\h"; + } + + for my $hex (@H) { + my $str = eval qq ["<\\x{$hex}>"]; + ok $str =~ /<\S>/, "\\x{$hex} in \\S"; + ok $str =~ /<\H>/, "\\x{$hex} in \\H"; + } + + for my $hex (@V) { + my $str = eval qq ["<\\x{$hex}>"]; + ok $str =~ /<\S>/, "\\x{$hex} in \\S"; + ok $str =~ /<\V>/, "\\x{$hex} in \\V"; + } + } + + + { + # . with /s should work on characters, as opposed to bytes + local $Message = ". with /s works on characters, not bytes"; + + my $s = "\x{e4}\x{100}"; + # This is not expected to match: the point is that + # neither should we get "Malformed UTF-8" warnings. + may_not_warn sub {$s =~ /\G(.+?)\n/gcs}, "No 'Malformed UTF-8' warning"; + + my @c; + push @c => $1 while $s =~ /\G(.)/gs; + + local $" = ""; + iseq "@c", $s; + + # Test only chars < 256 + my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; + my $r1 = ""; + while ($t1 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r1 .= $1 . $2; + } + + my $t2 = $t1 . "\x{100}"; # Repeat with a larger char + my $r2 = ""; + while ($t2 =~ / \G ( .+? ) \n\s+ ( .+? ) ( $ | \n\s+ ) /xgcs) { + $r2 .= $1 . $2; + } + $r2 =~ s/\x{100}//; + + iseq $r1, $r2; + } + + + { + local $Message = "Unicode lookbehind"; + ok "A\x{100}B" =~ /(?<=A.)B/; + ok "A\x{200}\x{300}B" =~ /(?<=A..)B/; + ok "\x{400}AB" =~ /(?<=\x{400}.)B/; + ok "\x{500}\x{600}B" =~ /(?<=\x{500}.)B/; + + # Original code also contained: + # ok "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/; + # but that looks like a typo. + } + + + { + local $Message = 'UTF-8 hash keys and /$/'; + # http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters + # /2002-01/msg01327.html + + my $u = "a\x{100}"; + my $v = substr ($u, 0, 1); + my $w = substr ($u, 1, 1); + my %u = ($u => $u, $v => $v, $w => $w); + for (keys %u) { + my $m1 = /^\w*$/ ? 1 : 0; + my $m2 = $u {$_} =~ /^\w*$/ ? 1 : 0; + iseq $m1, $m2; + } + } + + + { + local $BugId = "20020124.005"; + local $PatchId = "14795"; + local $Message = "s///eg"; + + for my $char ("a", "\x{df}", "\x{100}") { + my $x = "$char b $char"; + $x =~ s{($char)}{ + "c" =~ /c/; + "x"; + }ge; + iseq substr ($x, 0, 1), substr ($x, -1, 1); + } + } + + + { + local $Message = "No SEGV in s/// and UTF-8"; + my $s = "s#\x{100}" x 4; + ok $s =~ s/[^\w]/ /g; + if ( $ENV{PERL_TEST_LEGACY_POSIX_CC} ) { + iseq $s, "s \x{100}" x 4; + } + else { + iseq $s, "s " x 4; + } + } + + + { + local $Message = "UTF-8 bug (maybe already known?)"; + my $u = "foo"; + $u =~ s/./\x{100}/g; + iseq $u, "\x{100}\x{100}\x{100}"; + + $u = "foobar"; + $u =~ s/[ao]/\x{100}/g; + iseq $u, "f\x{100}\x{100}b\x{100}r"; + + $u =~ s/\x{100}/e/g; + iseq $u, "feeber"; + } + + + { + local $Message = "UTF-8 bug with s///"; + # check utf8/non-utf8 mixtures + # try to force all float/anchored check combinations + + my $c = "\x{100}"; + my $subst; + for my $re ("xx.*$c", "x.*$c$c", "$c.*xx", "$c$c.*x", + "xx.*(?=$c)", "(?=$c).*xx",) { + ok "xxx" !~ /$re/; + ok +($subst = "xxx") !~ s/$re//; + } + for my $re ("xx.*$c*", "$c*.*xx") { + ok "xxx" =~ /$re/; + ok +($subst = "xxx") =~ s/$re//; + iseq $subst, ""; + } + for my $re ("xxy*", "y*xx") { + ok "xx$c" =~ /$re/; + ok +($subst = "xx$c") =~ s/$re//; + iseq $subst, $c; + ok "xy$c" !~ /$re/; + ok +($subst = "xy$c") !~ s/$re//; + } + for my $re ("xy$c*z", "x$c*yz") { + ok "xyz" =~ /$re/; + ok +($subst = "xyz") =~ s/$re//; + iseq $subst, ""; + } + } + + + { + local $Message = "qr /.../x"; + my $R = qr / A B C # D E/x; + ok "ABCDE" =~ $R && $& eq "ABC"; + ok "ABCDE" =~ /$R/ && $& eq "ABC"; + ok "ABCDE" =~ m/$R/ && $& eq "ABC"; + ok "ABCDE" =~ /($R)/ && $1 eq "ABC"; + ok "ABCDE" =~ m/($R)/ && $1 eq "ABC"; + } + + + { + local $BugId = "20020412.005"; + local $Message = "Correct pmop flags checked when empty pattern"; + + # Requires reuse of last successful pattern. + my $num = 123; + $num =~ /\d/; + for (0 .. 1) { + my $match = ?? + 0; + ok $match != $_, $Message, + sprintf "'match one' %s on %s iteration" => + $match ? 'succeeded' : 'failed', + $_ ? 'second' : 'first'; + } + $num =~ /(\d)/; + my $result = join "" => $num =~ //g; + iseq $result, $num; + } + + + { + local $BugId = '20020630.002'; + local $Message = 'UTF-8 regex matches above 32k'; + for (['byte', "\x{ff}"], ['utf8', "\x{1ff}"]) { + my ($type, $char) = @$_; + for my $len (32000, 32768, 33000) { + my $s = $char . "f" x $len; + my $r = $s =~ /$char([f]*)/gc; + ok $r, $Message, "<$type x $len>"; + ok !$r || pos ($s) == $len + 1, $Message, + "<$type x $len>; pos = @{[pos $s]}"; + } + } + } + + + { + our $a = bless qr /foo/ => 'Foo'; + ok 'goodfood' =~ $a, "Reblessed qr // matches"; + iseq $a, '(?-xism:foo)', "Reblessed qr // stringifies"; + my $x = "\x{3fe}"; + my $z = my $y = "\317\276"; # Byte representation of $x + $a = qr /$x/; + ok $x =~ $a, "UTF-8 interpolation in qr //"; + ok "a$a" =~ $x, "Stringified qr // preserves UTF-8"; + ok "a$x" =~ /^a$a\z/, "Interpolated qr // preserves UTF-8"; + ok "a$x" =~ /^a(??{$a})\z/, + "Postponed interpolation of qr // preserves UTF-8"; + { + local $BugId = '17776'; + iseq length qr /##/x, 12, "## in qr // doesn't corrupt memory"; + } + { + use re 'eval'; + ok "$x$x" =~ /^$x(??{$x})\z/, + "Postponed UTF-8 string in UTF-8 re matches UTF-8"; + ok "$y$x" =~ /^$y(??{$x})\z/, + "Postponed UTF-8 string in non-UTF-8 re matches UTF-8"; + ok "$y$x" !~ /^$y(??{$y})\z/, + "Postponed non-UTF-8 string in non-UTF-8 re doesn't match UTF-8"; + ok "$x$x" !~ /^$x(??{$y})\z/, + "Postponed non-UTF-8 string in UTF-8 re doesn't match UTF-8"; + ok "$y$y" =~ /^$y(??{$y})\z/, + "Postponed non-UTF-8 string in non-UTF-8 re matches non-UTF8"; + ok "$x$y" =~ /^$x(??{$y})\z/, + "Postponed non-UTF-8 string in UTF-8 re matches non-UTF8"; + + $y = $z; # Reset $y after upgrade. + ok "$x$y" !~ /^$x(??{$x})\z/, + "Postponed UTF-8 string in UTF-8 re doesn't match non-UTF-8"; + ok "$y$y" !~ /^$y(??{$x})\z/, + "Postponed UTF-8 string in non-UTF-8 re doesn't match non-UTF-8"; + } + } + + + { + local $PatchId = '18179'; + my $s = "\x{100}" x 5; + my $ok = $s =~ /(\x{100}{4})/; + my ($ord, $len) = (ord $1, length $1); + ok $ok && $ord == 0x100 && $len == 4, "No panic: end_shift"; + } + + + { + local $BugId = '15763'; + our $a = "x\x{100}"; + chop $a; # Leaves the UTF-8 flag + $a .= "y"; # 1 byte before 'y'. + + ok $a =~ /^\C/, 'match one \C on 1-byte UTF-8'; + ok $a =~ /^\C{1}/, 'match \C{1}'; + + ok $a =~ /^\Cy/, 'match \Cy'; + ok $a =~ /^\C{1}y/, 'match \C{1}y'; + + ok $a !~ /^\C\Cy/, q {don't match two \Cy}; + ok $a !~ /^\C{2}y/, q {don't match \C{2}y}; + + $a = "\x{100}y"; # 2 bytes before "y" + + ok $a =~ /^\C/, 'match one \C on 2-byte UTF-8'; + ok $a =~ /^\C{1}/, 'match \C{1}'; + ok $a =~ /^\C\C/, 'match two \C'; + ok $a =~ /^\C{2}/, 'match \C{2}'; + + ok $a =~ /^\C\C\C/, 'match three \C on 2-byte UTF-8 and a byte'; + ok $a =~ /^\C{3}/, 'match \C{3}'; + + ok $a =~ /^\C\Cy/, 'match two \C'; + ok $a =~ /^\C{2}y/, 'match \C{2}'; + + ok $a !~ /^\C\C\Cy/, q {don't match three \Cy}; + ok $a !~ /^\C{2}\Cy/, q {don't match \C{2}\Cy}; + ok $a !~ /^\C{3}y/, q {don't match \C{3}y}; + + $a = "\x{1000}y"; # 3 bytes before "y" + + ok $a =~ /^\C/, 'match one \C on three-byte UTF-8'; + ok $a =~ /^\C{1}/, 'match \C{1}'; + ok $a =~ /^\C\C/, 'match two \C'; + ok $a =~ /^\C{2}/, 'match \C{2}'; + ok $a =~ /^\C\C\C/, 'match three \C'; + ok $a =~ /^\C{3}/, 'match \C{3}'; + + ok $a =~ /^\C\C\C\C/, 'match four \C on three-byte UTF-8 and a byte'; + ok $a =~ /^\C{4}/, 'match \C{4}'; + + ok $a =~ /^\C\C\Cy/, 'match three \Cy'; + ok $a =~ /^\C{3}y/, 'match \C{3}y'; + + ok $a !~ /^\C\C\C\Cy/, q {don't match four \Cy}; + ok $a !~ /^\C{4}y/, q {don't match \C{4}y}; + } + + + { + local $\; + $_ = 'aaaaaaaaaa'; + utf8::upgrade($_); chop $_; $\="\n"; + ok /[^\s]+/, 'm/[^\s]/ utf8'; + ok /[^\d]+/, 'm/[^\d]/ utf8'; + ok +($a = $_, $_ =~ s/[^\s]+/./g), 's/[^\s]/ utf8'; + ok +($a = $_, $a =~ s/[^\d]+/./g), 's/[^\s]/ utf8'; + } + + + { + local $BugId = '15397'; + local $Message = 'UTF-8 matching'; + ok "\x{100}" =~ /\x{100}/; + ok "\x{100}" =~ /(\x{100})/; + ok "\x{100}" =~ /(\x{100}){1}/; + ok "\x{100}\x{100}" =~ /(\x{100}){2}/; + ok "\x{100}\x{100}" =~ /(\x{100})(\x{100})/; + } + + + { + local $BugId = '7471'; + local $Message = 'Neither ()* nor ()*? sets $1 when matched 0 times'; + local $_ = 'CD'; + ok /(AB)*?CD/ && !defined $1; + ok /(AB)*CD/ && !defined $1; + } + + + { + local $BugId = '3547'; + local $Message = "Caching shouldn't prevent match"; + my $pattern = "^(b+?|a){1,2}c"; + ok "bac" =~ /$pattern/ && $1 eq 'a'; + ok "bbac" =~ /$pattern/ && $1 eq 'a'; + ok "bbbac" =~ /$pattern/ && $1 eq 'a'; + ok "bbbbac" =~ /$pattern/ && $1 eq 'a'; + } + + + + { + local $BugId = '18232'; + local $Message = '$1 should keep UTF-8 ness'; + ok "\x{100}" =~ /(.)/; + iseq $1, "\x{100}", '$1 is UTF-8'; + { 'a' =~ /./; } + iseq $1, "\x{100}", '$1 is still UTF-8'; + isneq $1, "\xC4\x80", '$1 is not non-UTF-8'; + } + + + { + local $BugId = '19767'; + local $Message = "Optimizer doesn't prematurely reject match"; + use utf8; + + my $attr = 'Name-1'; + my $NormalChar = qr /[\p{IsDigit}\p{IsLower}\p{IsUpper}]/; + my $NormalWord = qr /${NormalChar}+?/; + my $PredNameHyphen = qr /^${NormalWord}(\-${NormalWord})*?$/; + + $attr =~ /^$/; + ok $attr =~ $PredNameHyphen; # Original test. + + "a" =~ m/[b]/; + ok "0" =~ /\p{N}+\z/; # Variant. + } + + + { + local $BugId = '20683'; + local $Message = "(??{ }) doesn't return stale values"; + our $p = 1; + foreach (1, 2, 3, 4) { + $p ++ if /(??{ $p })/ + } + iseq $p, 5; + + { + package P; + $a = 1; + sub TIESCALAR {bless []} + sub FETCH {$a ++} + } + tie $p, "P"; + foreach (1, 2, 3, 4) { + /(??{ $p })/ + } + iseq $p, 5; + } + + + { + # Subject: Odd regexp behavior + # From: Markus Kuhn <Markus.Kuhn@cl.cam.ac.uk> + # Date: Wed, 26 Feb 2003 16:53:12 +0000 + # Message-Id: <E18o4nw-0008Ly-00@wisbech.cl.cam.ac.uk> + # To: perl-unicode@perl.org + + local $Message = 'Markus Kuhn 2003-02-26'; + + my $x = "\x{2019}\nk"; + ok $x =~ s/(\S)\n(\S)/$1 $2/sg; + ok $x eq "\x{2019} k"; + + $x = "b\nk"; + ok $x =~ s/(\S)\n(\S)/$1 $2/sg; + ok $x eq "b k"; + + ok "\x{2019}" =~ /\S/; + } + + + { + local $BugId = '21411'; + local $Message = "(??{ .. }) in split doesn't corrupt its stack"; + our $i; + ok '-1-3-5-' eq join '', split /((??{$i++}))/, '-1-3-5-'; + no warnings 'deprecated', 'syntax'; + split /(?{'WOW'})/, 'abc'; + local $" = "|"; + iseq "@_", "a|b|c"; + } + + + { + # XXX DAPM 13-Apr-06. Recursive split is still broken. It's only luck it + # hasn't been crashing. Disable this test until it is fixed properly. + # XXX also check what it returns rather than just doing ok(1,...) + # split /(?{ split "" })/, "abc"; + local $TODO = "Recursive split is still broken"; + ok 0, 'cache_re & "(?{": it dumps core in 5.6.1 & 5.8.0'; + } + + + { + ok "\x{100}\n" =~ /\x{100}\n$/, "UTF-8 length cache and fbm_compile"; + } + + + { + package Str; + use overload q /""/ => sub {${$_ [0]};}; + sub new {my ($c, $v) = @_; bless \$v, $c;} + + package main; + $_ = Str -> new ("a\x{100}/\x{100}b"); + ok join (":", /\b(.)\x{100}/g) eq "a:/", "re_intuit_start and PL_bostr"; + } + + + { + local $BugId = '17757'; + $_ = "code: 'x' { '...' }\n"; study; + my @x; push @x, $& while m/'[^\']*'/gx; + local $" = ":"; + iseq "@x", "'x':'...'", "Parse::RecDescent triggered infinite loop"; + } + + + { + my $re = qq /^([^X]*)X/; + utf8::upgrade ($re); + ok "\x{100}X" =~ /$re/, "S_cl_and ANYOF_UNICODE & ANYOF_INVERTED"; + } + + + { + local $BugId = '22354'; + sub func ($) { + ok "a\nb" !~ /^b/, "Propagated modifier; $_[0]"; + ok "a\nb" =~ /^b/m, "Propagated modifier; $_[0] - with /m"; + } + func "standalone"; + $_ = "x"; s/x/func "in subst"/e; + $_ = "x"; s/x/func "in multiline subst"/em; + + # + # Next two give 'panic: malloc'. + # Outcommented, using two TODOs. + # + local $TODO = 'panic: malloc'; + local $Message = 'Postponed regexp and propaged modifier'; + # ok 0 for 1 .. 2; + SKIP: { + skip "panic: malloc", 2; + $_ = "x"; /x(?{func "in regexp"})/; + $_ = "x"; /x(?{func "in multiline regexp"})/m; + } + } + + + { + local $BugId = '19049'; + $_ = "abcdef\n"; + my @x = m/./g; + iseq "abcde", $`, 'Global match sets $`'; + } + + + { + ok "123\x{100}" =~ /^.*1.*23\x{100}$/, + 'UTF-8 + multiple floating substr'; + } + + + { + local $Message = '<20030808193656.5109.1@llama.ni-s.u-net.com>'; + + # LATIN SMALL/CAPITAL LETTER A WITH MACRON + ok " \x{101}" =~ qr/\x{100}/i; + + # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + ok " \x{1E01}" =~ qr/\x{1E00}/i; + + # DESERET SMALL/CAPITAL LETTER LONG I + ok " \x{10428}" =~ qr/\x{10400}/i; + + # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' + ok " \x{1E01}x" =~ qr/\x{1E00}X/i; + } + + + { + # [perl #23769] Unicode regex broken on simple example + # regrepeat() didn't handle UTF-8 EXACT case right. + local $BugId = '23769'; + my $Mess = 'regrepeat() handles UTF-8 EXACT case right'; + local $Message = $Mess; + + my $s = "\x{a0}\x{a0}\x{a0}\x{100}"; chop $s; + + ok $s =~ /\x{a0}/; + ok $s =~ /\x{a0}+/; + ok $s =~ /\x{a0}\x{a0}/; + + $Message = "$Mess (easy variant)"; + ok "aaa\x{100}" =~ /(a+)/; + iseq $1, "aaa"; + + $Message = "$Mess (easy invariant)"; + ok "aaa\x{100} " =~ /(a+?)/; + iseq $1, "a"; + + $Message = "$Mess (regrepeat variant)"; + ok "\xa0\xa0\xa0\x{100} " =~ /(\xa0+?)/; + iseq $1, "\xa0"; + + $Message = "$Mess (regrepeat invariant)"; + ok "\xa0\xa0\xa0\x{100}" =~ /(\xa0+)/; + iseq $1, "\xa0\xa0\xa0"; + + $Message = "$Mess (hard variant)"; + ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+?)/; + iseq $1, "\xa0\xa1"; + + $Message = "$Mess (hard invariant)"; + ok "ababab\x{100} " =~ /((?:ab)+)/; + iseq $1, 'ababab'; + + ok "\xa0\xa1\xa0\xa1\xa0\xa1\x{100}" =~ /((?:\xa0\xa1)+)/; + iseq $1, "\xa0\xa1\xa0\xa1\xa0\xa1"; + + ok "ababab\x{100} " =~ /((?:ab)+?)/; + iseq $1, "ab"; + + $Message = "Don't match first byte of UTF-8 representation"; + ok "\xc4\xc4\xc4" !~ /(\x{100}+)/; + ok "\xc4\xc4\xc4" !~ /(\x{100}+?)/; + ok "\xc4\xc4\xc4" !~ /(\x{100}++)/; + } + + + { + for (120 .. 130) { + my $head = 'x' x $_; + local $Message = q [Don't misparse \x{...} in regexp ] . + q [near 127 char EXACT limit]; + for my $tail ('\x{0061}', '\x{1234}', '\x61') { + eval_ok qq ["$head$tail" =~ /$head$tail/]; + } + local $Message = q [Don't misparse \N{...} in regexp ] . + q [near 127 char EXACT limit]; + for my $tail ('\N{SNOWFLAKE}') { + eval_ok qq [use charnames ':full'; + "$head$tail" =~ /$head$tail/]; + } + } + } + + + { + # perl panic: pp_match start/end pointers + local $BugId = '25269'; + iseq "a-bc", eval {my ($x, $y) = "bca" =~ /^(?=.*(a)).*(bc)/; "$x-$y"}, + 'Captures can move backwards in string'; + } + + + { + local $BugId = '27940'; # \cA not recognized in character classes + ok "a\cAb" =~ /\cA/, '\cA in pattern'; + ok "a\cAb" =~ /[\cA]/, '\cA in character class'; + ok "a\cAb" =~ /[\cA-\cB]/, '\cA in character class range'; + ok "abc" =~ /[^\cA-\cB]/, '\cA in negated character class range'; + ok "a\cBb" =~ /[\cA-\cC]/, '\cB in character class range'; + ok "a\cCbc" =~ /[^\cA-\cB]/, '\cC in negated character class range'; + ok "a\cAb" =~ /(??{"\cA"})/, '\cA in ??{} pattern'; + ok "ab" !~ /a\cIb/x, '\cI in pattern'; + } + + + { + # perl #28532: optional zero-width match at end of string is ignored + local $BugId = '28532'; + ok "abc" =~ /^abc(\z)?/ && defined($1), + 'Optional zero-width match at end of string'; + ok "abc" =~ /^abc(\z)??/ && !defined($1), + 'Optional zero-width match at end of string'; + } + + + + { # TRIE related + our @got = (); + "words" =~ /(word|word|word)(?{push @got, $1})s$/; + iseq @got, 1, "TRIE optimation"; + + @got = (); + "words" =~ /(word|word|word)(?{push @got,$1})s$/i; + iseq @got, 1,"TRIEF optimisation"; + + my @nums = map {int rand 1000} 1 .. 100; + my $re = "(" . (join "|", @nums) . ")"; + $re = qr/\b$re\b/; + + foreach (@nums) { + ok $_ =~ /$re/, "Trie nums"; + } + + $_ = join " ", @nums; + @got = (); + push @got, $1 while /$re/g; + + my %count; + $count {$_} ++ for @got; + my $ok = 1; + for (@nums) { + $ok = 0 if --$count {$_} < 0; + } + ok $ok, "Trie min count matches"; + } + + + { + # TRIE related + # LATIN SMALL/CAPITAL LETTER A WITH MACRON + ok "foba \x{101}foo" =~ qr/(foo|\x{100}foo|bar)/i && + $1 eq "\x{101}foo", + "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH MACRON"; + + # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + ok "foba \x{1E01}foo" =~ qr/(foo|\x{1E00}foo|bar)/i && + $1 eq "\x{1E01}foo", + "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW"; + + # DESERET SMALL/CAPITAL LETTER LONG I + ok "foba \x{10428}foo" =~ qr/(foo|\x{10400}foo|bar)/i && + $1 eq "\x{10428}foo", + "TRIEF + DESERET SMALL/CAPITAL LETTER LONG I"; + + # LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X' + ok "foba \x{1E01}xfoo" =~ qr/(foo|\x{1E00}Xfoo|bar)/i && + $1 eq "\x{1E01}xfoo", + "TRIEF + LATIN SMALL/CAPITAL LETTER A WITH RING BELOW + 'X'"; + + use charnames ':full'; + + my $s = "\N{LATIN SMALL LETTER SHARP S}"; + ok "foba ba$s" =~ qr/(foo|Ba$s|bar)/i && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; + ok "foba ba$s" =~ qr/(Ba$s|foo|bar)/i && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; + ok "foba ba$s" =~ qr/(foo|bar|Ba$s)/i && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; + + ok "foba ba$s" =~ qr/(foo|Bass|bar)/i && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ ss"; + + ok "foba ba$s" =~ qr/(foo|BaSS|bar)/i && $1 eq "ba$s", + "TRIEF + LATIN SMALL LETTER SHARP S =~ SS"; + + ok "foba ba${s}pxySS$s$s" =~ qr/(b(?:a${s}t|a${s}f|a${s}p)[xy]+$s*)/i + && $1 eq "ba${s}pxySS$s$s", + "COMMON PREFIX TRIEF + LATIN SMALL LETTER SHARP S"; + } + + + SKIP: + { + print "# Set PERL_SKIP_PSYCHO_TEST to skip this test\n"; + my @normal = qw [the are some normal words]; + + skip "Skipped Psycho", 2 * @normal if $ENV {PERL_SKIP_PSYCHO_TEST}; + + local $" = "|"; + + my @psycho = (@normal, map chr $_, 255 .. 20000); + my $psycho1 = "@psycho"; + for (my $i = @psycho; -- $i;) { + my $j = int rand (1 + $i); + @psycho [$i, $j] = @psycho [$j, $i]; + } + my $psycho2 = "@psycho"; + + foreach my $word (@normal) { + ok $word =~ /($psycho1)/ && $1 eq $word, 'Psycho'; + ok $word =~ /($psycho2)/ && $1 eq $word, 'Psycho'; + } + } + + + { + local $BugId = '36207'; + my $utf8 = "\xe9\x{100}"; chop $utf8; + my $latin1 = "\xe9"; + + ok $utf8 =~ /\xe9/i, "utf8/latin"; + ok $utf8 =~ /$latin1/i, "utf8/latin runtime"; + ok $utf8 =~ /(abc|\xe9)/i, "utf8/latin trie"; + ok $utf8 =~ /(abc|$latin1)/i, "utf8/latin trie runtime"; + + ok "\xe9" =~ /$utf8/i, "latin/utf8"; + ok "\xe9" =~ /(abc|$utf8)/i, "latin/utf8 trie"; + ok $latin1 =~ /$utf8/i, "latin/utf8 runtime"; + ok $latin1 =~ /(abc|$utf8)/i, "latin/utf8 trie runtime"; + } + + + { + local $BugId = '37038'; + my $s = "abcd"; + $s =~ /(..)(..)/g; + $s = $1; + $s = $2; + iseq $2, 'cd', + "Assigning to original string does not corrupt match vars"; + } + + + { + { + package wooosh; + sub gloople {"!"} + } + my $aeek = bless {} => 'wooosh'; + eval_ok sub {$aeek -> gloople () =~ /(.)/g}, + "//g match against return value of sub"; + + sub gloople {"!"} + eval_ok sub {gloople () =~ /(.)/g}, + "26410 didn't affect sub calls for some reason"; + } + + + { + local $TODO = "See changes 26925-26928, which reverted change 26410"; + { + package lv; + our $var = "abc"; + sub variable : lvalue {$var} + } + my $o = bless [] => 'lv'; + my $f = ""; + my $r = eval { + for (1 .. 2) { + $f .= $1 if $o -> variable =~ /(.)/g; + } + 1; + }; + if ($r) { + iseq $f, "ab", "pos() retained between calls"; + } + else { + local $TODO; + ok 0, "Code failed: $@"; + } + + our $var = "abc"; + sub variable : lvalue {$var} + my $g = ""; + my $s = eval { + for (1 .. 2) { + $g .= $1 if variable =~ /(.)/g; + } + 1; + }; + if ($s) { + iseq $g, "ab", "pos() retained between calls"; + } + else { + local $TODO; + ok 0, "Code failed: $@"; + } + } + + + SKIP: + { + local $BugId = '37836'; + skip "In EBCDIC" if $IS_EBCDIC; + no warnings 'utf8'; + $_ = pack 'U0C2', 0xa2, 0xf8; # Ill-formed UTF-8 + my $ret = 0; + eval_ok sub {!($ret = s/[\0]+//g)}, + "Ill-formed UTF-8 doesn't match NUL in class"; + } + + + { + # chr(65535) should be allowed in regexes + local $BugId = '38293'; + no warnings 'utf8'; # To allow non-characters + my ($c, $r, $s); + + $c = chr 0xffff; + $c =~ s/$c//g; + ok $c eq "", "U+FFFF, parsed as atom"; + + $c = chr 0xffff; + $r = "\\$c"; + $c =~ s/$r//g; + ok $c eq "", "U+FFFF backslashed, parsed as atom"; + + $c = chr 0xffff; + $c =~ s/[$c]//g; + ok $c eq "", "U+FFFF, parsed in class"; + + $c = chr 0xffff; + $r = "[\\$c]"; + $c =~ s/$r//g; + ok $c eq "", "U+FFFF backslashed, parsed in class"; + + $s = "A\x{ffff}B"; + $s =~ s/\x{ffff}//i; + ok $s eq "AB", "U+FFFF, EXACTF"; + + $s = "\x{ffff}A"; + $s =~ s/\bA//; + ok $s eq "\x{ffff}", "U+FFFF, BOUND"; + + $s = "\x{ffff}!"; + $s =~ s/\B!//; + ok $s eq "\x{ffff}", "U+FFFF, NBOUND"; + } + + + { + local $BugId = '39583'; + + # The printing characters + my @chars = ("A" .. "Z"); + my $delim = ","; + my $size = 32771 - 4; + my $str = ''; + + # Create some random junk. Inefficient, but it works. + for (my $i = 0; $i < $size; $ i++) { + $str .= $chars [rand @chars]; + } + + $str .= ($delim x 4); + my $res; + my $matched; + ok $str =~ s/^(.*?)${delim}{4}//s, "Pattern matches"; + iseq $str, "", "Empty string"; + ok defined $1 && length ($1) == $size, '$1 is correct size'; + } + + + { + local $BugId = '27940'; + ok "\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'; + ok "\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'; + ok "X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'; + ok "X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'; + + ok "X\0A" =~ /X\c@?A/, '\c@?'; + ok "X\0A" =~ /X\c@*A/, '\c@*'; + ok "X\0A" =~ /X\c@(A)/, '\c@('; + ok "X\0A" =~ /X(\c@)A/, '\c@)'; + ok "X\0A" =~ /X\c@|ZA/, '\c@|'; + + ok "X\@A" =~ /X@?A/, '@?'; + ok "X\@A" =~ /X@*A/, '@*'; + ok "X\@A" =~ /X@(A)/, '@('; + ok "X\@A" =~ /X(@)A/, '@)'; + ok "X\@A" =~ /X@|ZA/, '@|'; + + local $" = ','; # non-whitespace and non-RE-specific + ok 'abc' =~ /(.)(.)(.)/, 'The last successful match is bogus'; + ok "A@+B" =~ /A@{+}B/, 'Interpolation of @+ in /@{+}/'; + ok "A@-B" =~ /A@{-}B/, 'Interpolation of @- in /@{-}/'; + ok "A@+B" =~ /A@{+}B/x, 'Interpolation of @+ in /@{+}/x'; + ok "A@-B" =~ /A@{-}B/x, 'Interpolation of @- in /@{-}/x'; + } + + + { + use lib 'lib'; + use Cname; + + ok 'fooB' =~ /\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; + my $test = 1233; + # + # Why doesn't must_warn work here? + # + my $w; + local $SIG {__WARN__} = sub {$w .= "@_"}; + eval 'q(xxWxx) =~ /[\N{WARN}]/'; + ok $w && $w =~ /^Ignoring excess chars from/, + "Ignoring excess chars warning"; + + undef $w; + eval q [ok "\0" !~ /[\N{EMPTY-STR}XY]/, + "Zerolength charname in charclass doesn't match \\0"]; + ok $w && $w =~ /^Ignoring zero length/, + 'Ignoring zero length \N{%} in character class warning'; + + ok 'AB' =~ /(\N{EVIL})/ && $1 eq 'A', 'Charname caching $1'; + ok 'ABC' =~ /(\N{EVIL})/, 'Charname caching $1'; + ok 'xy' =~ /x\N{EMPTY-STR}y/, + 'Empty string charname produces NOTHING node'; + ok '' =~ /\N{EMPTY-STR}/, + 'Empty string charname produces NOTHING node'; + + } + + + { + use charnames ':full'; + + ok 'aabc' !~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against aabc'; + ok 'a+bc' =~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against a+bc'; + + ok ' A B' =~ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + 'Intermixed named and unicode escapes'; + ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ + /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + 'Intermixed named and unicode escapes'; + ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ + /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, + 'Intermixed named and unicode escapes'; + } + + + { + our $brackets; + $brackets = qr{ + { (?> [^{}]+ | (??{ $brackets }) )* } + }x; + + ok "{b{c}d" !~ m/^((??{ $brackets }))/, "Bracket mismatch"; + + SKIP: { + our @stack = (); + my @expect = qw( + stuff1 + stuff2 + <stuff1>and<stuff2> + right + <right> + <<right>> + <<<right>>> + <<stuff1>and<stuff2>><<<<right>>>> + ); + + local $_ = '<<<stuff1>and<stuff2>><<<<right>>>>>'; + ok /^(<((?:(?>[^<>]+)|(?1))*)>(?{push @stack, $2 }))$/, + "Recursion matches"; + iseq @stack, @expect, "Right amount of matches" + or skip "Won't test individual results as count isn't equal", + 0 + @expect; + my $idx = 0; + foreach my $expect (@expect) { + iseq $stack [$idx], $expect, + "Expecting '$expect' at stack pos #$idx"; + $idx ++; + } + } + } + + + { + my $s = '123453456'; + $s =~ s/(?<digits>\d+)\k<digits>/$+{digits}/; + ok $s eq '123456', 'Named capture (angle brackets) s///'; + $s = '123453456'; + $s =~ s/(?'digits'\d+)\k'digits'/$+{digits}/; + ok $s eq '123456', 'Named capture (single quotes) s///'; + } + + + { + my @ary = ( + pack('U', 0x00F1), # n-tilde + '_'.pack('U', 0x00F1), # _ + n-tilde + 'c'.pack('U', 0x0327), # c + cedilla + pack('U*', 0x00F1, 0x0327), # n-tilde + cedilla + 'a'.pack('U', 0x00B2), # a + superscript two + pack('U', 0x0391), # ALPHA + pack('U', 0x0391).'2', # ALPHA + 2 + pack('U', 0x0391).'_', # ALPHA + _ + ); + + for my $uni (@ary) { + my ($r1, $c1, $r2, $c2) = eval qq { + use utf8; + scalar ("..foo foo.." =~ /(?'${uni}'foo) \\k'${uni}'/), + \$+{${uni}}, + scalar ("..bar bar.." =~ /(?<${uni}>bar) \\k<${uni}>/), + \$+{${uni}}; + }; + ok $r1, "Named capture UTF (?'')"; + ok defined $c1 && $c1 eq 'foo', "Named capture UTF \%+"; + ok $r2, "Named capture UTF (?<>)"; + ok defined $c2 && $c2 eq 'bar', "Named capture UTF \%+"; + } + } + + + { + my $s = 'foo bar baz'; + my (@k, @v, @fetch, $res); + my $count = 0; + my @names = qw ($+{A} $+{B} $+{C}); + if ($s =~ /(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz)/) { + while (my ($k, $v) = each (%+)) { + $count++; + } + @k = sort keys (%+); + @v = sort values (%+); + $res = 1; + push @fetch, + ["$+{A}", "$1"], + ["$+{B}", "$2"], + ["$+{C}", "$3"], + ; + } + foreach (0 .. 2) { + if ($fetch [$_]) { + iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; + } else { + ok 0, $names[$_]; + } + } + iseq $res, 1, "'$s' =~ /(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz)/"; + iseq $count, 3, "Got 3 keys in %+ via each"; + iseq 0 + @k, 3, 'Got 3 keys in %+ via keys'; + iseq "@k", "A B C", "Got expected keys"; + iseq "@v", "bar baz foo", "Got expected values"; + eval ' + no warnings "uninitialized"; + print for $+ {this_key_doesnt_exist}; + '; + ok !$@, 'lvalue $+ {...} should not throw an exception'; + } + + + { + # + # Almost the same as the block above, except that the capture is nested. + # + local $BugId = '50496'; + my $s = 'foo bar baz'; + my (@k, @v, @fetch, $res); + my $count = 0; + my @names = qw ($+{A} $+{B} $+{C} $+{D}); + if ($s =~ /(?<D>(?<A>foo)\s+(?<B>bar)?\s+(?<C>baz))/) { + while (my ($k,$v) = each(%+)) { + $count++; + } + @k = sort keys (%+); + @v = sort values (%+); + $res = 1; + push @fetch, + ["$+{A}", "$2"], + ["$+{B}", "$3"], + ["$+{C}", "$4"], + ["$+{D}", "$1"], + ; + } + foreach (0 .. 3) { + if ($fetch [$_]) { + iseq $fetch [$_] [0], $fetch [$_] [1], $names [$_]; + } else { + ok 0, $names [$_]; + } + } + iseq $res, 1, "'$s' =~ /(?<D>(?<A>foo)\\s+(?<B>bar)?\\s+(?<C>baz))/"; + iseq $count, 4, "Got 4 keys in %+ via each"; + iseq @k, 4, 'Got 4 keys in %+ via keys'; + iseq "@k", "A B C D", "Got expected keys"; + iseq "@v", "bar baz foo foo bar baz", "Got expected values"; + eval ' + no warnings "uninitialized"; + print for $+ {this_key_doesnt_exist}; + '; + ok !$@,'lvalue $+ {...} should not throw an exception'; + } + + + { + my $s = 'foo bar baz'; + my @res; + if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { + foreach my $name (sort keys(%-)) { + my $ary = $- {$name}; + foreach my $idx (0 .. $#$ary) { + push @res, "$name:$idx:$ary->[$idx]"; + } + } + } + my @expect = qw (A:0:1 A:1:3 B:0:2 B:1:4); + iseq "@res", "@expect", "Check %-"; + eval' + no warnings "uninitialized"; + print for $- {this_key_doesnt_exist}; + '; + ok !$@,'lvalue $- {...} should not throw an exception'; + } + + + SKIP: + { + # stress test CURLYX/WHILEM. + # + # This test includes varying levels of nesting, and according to + # profiling done against build 28905, exercises every code line in the + # CURLYX and WHILEM blocks, except those related to LONGJMP, the + # super-linear cache and warnings. It executes about 0.5M regexes + + skip "No psycho tests" if $ENV {PERL_SKIP_PSYCHO_TEST}; + print "# Set PERL_SKIP_PSYCHO_TEST to skip this test\n"; + my $r = qr/^ + (?: + ( (?:a|z+)+ ) + (?: + ( (?:b|z+){3,}? ) + ( + (?: + (?: + (?:c|z+){1,1}?z + )? + (?:c|z+){1,1} + )* + ) + (?:z*){2,} + ( (?:z+|d)+ ) + (?: + ( (?:e|z+)+ ) + )* + ( (?:f|z+)+ ) + )* + ( (?:z+|g)+ ) + (?: + ( (?:h|z+)+ ) + )* + ( (?:i|z+)+ ) + )+ + ( (?:j|z+)+ ) + (?: + ( (?:k|z+)+ ) + )* + ( (?:l|z+)+ ) + $/x; + + my $ok = 1; + my $msg = "CURLYX stress test"; + OUTER: + for my $a ("x","a","aa") { + for my $b ("x","bbb","bbbb") { + my $bs = $a.$b; + for my $c ("x","c","cc") { + my $cs = $bs.$c; + for my $d ("x","d","dd") { + my $ds = $cs.$d; + for my $e ("x","e","ee") { + my $es = $ds.$e; + for my $f ("x","f","ff") { + my $fs = $es.$f; + for my $g ("x","g","gg") { + my $gs = $fs.$g; + for my $h ("x","h","hh") { + my $hs = $gs.$h; + for my $i ("x","i","ii") { + my $is = $hs.$i; + for my $j ("x","j","jj") { + my $js = $is.$j; + for my $k ("x","k","kk") { + my $ks = $js.$k; + for my $l ("x","l","ll") { + my $ls = $ks.$l; + if ($ls =~ $r) { + if ($ls =~ /x/) { + $msg .= ": unexpected match for [$ls]"; + $ok = 0; + last OUTER; + } + my $cap = "$1$2$3$4$5$6$7$8$9$10$11$12"; + unless ($ls eq $cap) { + $msg .= ": capture: [$ls], got [$cap]"; + $ok = 0; + last OUTER; + } + } + else { + unless ($ls =~ /x/) { + $msg = ": failed for [$ls]"; + $ok = 0; + last OUTER; + } + } + } + } + } + } + } + } + } + } + } + } + } + } + ok($ok, $msg); + } + + + { + # \, breaks {3,4} + ok "xaaay" !~ /xa{3\,4}y/, '\, in a pattern'; + ok "xa{3,4}y" =~ /xa{3\,4}y/, '\, in a pattern'; + + # \c\ followed by _ + ok "x\c_y" !~ /x\c\_y/, '\_ in a pattern'; + ok "x\c\_y" =~ /x\c\_y/, '\_ in a pattern'; + + # \c\ followed by other characters + for my $c ("z", "\0", "!", chr(254), chr(256)) { + my $targ = "a\034$c"; + my $reg = "a\\c\\$c"; + ok eval ("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern"; + } + } + + + { + local $BugId = '36046'; + my $str = 'abc'; + my $count = 0; + my $mval = 0; + my $pval = 0; + while ($str =~ /b/g) {$mval = $#-; $pval = $#+; $count ++} + iseq $mval, 0, '@- should be empty'; + iseq $pval, 0, '@+ should be empty'; + iseq $count, 1, 'Should have matched once only'; + } + + + { # Test the (*PRUNE) pattern + our $count = 0; + 'aaab' =~ /a+b?(?{$count++})(*FAIL)/; + iseq $count, 9, "Expect 9 for no (*PRUNE)"; + $count = 0; + 'aaab' =~ /a+b?(*PRUNE)(?{$count++})(*FAIL)/; + iseq $count, 3, "Expect 3 with (*PRUNE)"; + local $_ = 'aaab'; + $count = 0; + 1 while /.(*PRUNE)(?{$count++})(*FAIL)/g; + iseq $count, 4, "/.(*PRUNE)/"; + $count = 0; + 'aaab' =~ /a+b?(??{'(*PRUNE)'})(?{$count++})(*FAIL)/; + iseq $count, 3, "Expect 3 with (*PRUNE)"; + local $_ = 'aaab'; + $count = 0; + 1 while /.(??{'(*PRUNE)'})(?{$count++})(*FAIL)/g; + iseq $count, 4, "/.(*PRUNE)/"; + } + + + { # Test the (*SKIP) pattern + our $count = 0; + 'aaab' =~ /a+b?(*SKIP)(?{$count++})(*FAIL)/; + iseq $count, 1, "Expect 1 with (*SKIP)"; + local $_ = 'aaab'; + $count = 0; + 1 while /.(*SKIP)(?{$count++})(*FAIL)/g; + iseq $count, 4, "/.(*SKIP)/"; + $_ = 'aaabaaab'; + $count = 0; + our @res = (); + 1 while /(a+b?)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; + iseq $count, 2, "Expect 2 with (*SKIP)"; + iseq "@res", "aaab aaab", "Adjacent (*SKIP) works as expected"; + } + + + { # Test the (*SKIP) pattern + our $count = 0; + 'aaab' =~ /a+b?(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/; + iseq $count, 1, "Expect 1 with (*SKIP)"; + local $_ = 'aaab'; + $count = 0; + 1 while /.(*MARK:foo)(*SKIP)(?{$count++})(*FAIL)/g; + iseq $count, 4, "/.(*SKIP)/"; + $_ = 'aaabaaab'; + $count = 0; + our @res = (); + 1 while /(a+b?)(*MARK:foo)(*SKIP)(?{$count++; push @res,$1})(*FAIL)/g; + iseq $count, 2, "Expect 2 with (*SKIP)"; + iseq "@res", "aaab aaab", "Adjacent (*SKIP) works as expected"; + } + + + { # Test the (*SKIP) pattern + our $count = 0; + 'aaab' =~ /a*(*MARK:a)b?(*MARK:b)(*SKIP:a)(?{$count++})(*FAIL)/; + iseq $count, 3, "Expect 3 with *MARK:a)b?(*MARK:b)(*SKIP:a)"; + local $_ = 'aaabaaab'; + $count = 0; + our @res = (); + 1 while + /(a*(*MARK:a)b?)(*MARK:x)(*SKIP:a)(?{$count++; push @res,$1})(*FAIL)/g; + iseq $count, 5, "Expect 5 with (*MARK:a)b?)(*MARK:x)(*SKIP:a)"; + iseq "@res", "aaab b aaab b ", + "Adjacent (*MARK:a)b?)(*MARK:x)(*SKIP:a) works as expected"; + } + + + { # Test the (*COMMIT) pattern + our $count = 0; + 'aaabaaab' =~ /a+b?(*COMMIT)(?{$count++})(*FAIL)/; + iseq $count, 1, "Expect 1 with (*COMMIT)"; + local $_ = 'aaab'; + $count = 0; + 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g; + iseq $count, 1, "/.(*COMMIT)/"; + $_ = 'aaabaaab'; + $count = 0; + our @res = (); + 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g; + iseq $count, 1, "Expect 1 with (*COMMIT)"; + iseq "@res", "aaab", "Adjacent (*COMMIT) works as expected"; + } + + + { + # Test named commits and the $REGERROR var + our $REGERROR; + for my $name ('', ':foo') { + for my $pat ("(*PRUNE$name)", + ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", + "(*COMMIT$name)") { + for my $suffix ('(*FAIL)', '') { + 'aaaab' =~ /a+b$pat$suffix/; + iseq $REGERROR, + ($suffix ? ($name ? 'foo' : "1") : ""), + "Test $pat and \$REGERROR $suffix"; + } + } + } + } + + + { + # Test named commits and the $REGERROR var + package Fnorble; + our $REGERROR; + for my $name ('', ':foo') { + for my $pat ("(*PRUNE$name)", + ($name ? "(*MARK$name)" : "") . "(*SKIP$name)", + "(*COMMIT$name)") { + for my $suffix ('(*FAIL)','') { + 'aaaab' =~ /a+b$pat$suffix/; + ::iseq $REGERROR, + ($suffix ? ($name ? 'foo' : "1") : ""), + "Test $pat and \$REGERROR $suffix"; + } + } + } + } + + + { + # Test named commits and the $REGERROR var + local $Message = '$REGERROR'; + our $REGERROR; + for my $word (qw (bar baz bop)) { + $REGERROR = ""; + "aaaaa$word" =~ + /a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/; + iseq $REGERROR, $word; + } + } + + + { + local $BugId = '40684'; + local $Message = '/m in precompiled regexp'; + my $s = "abc\ndef"; + my $rex = qr'^abc$'m; + ok $s =~ m/$rex/; + ok $s =~ m/^abc$/m; + } + + + { + #Mindnumbingly simple test of (*THEN) + for ("ABC","BAX") { + ok /A (*THEN) X | B (*THEN) C/x, "Simple (*THEN) test"; + } + } + + + { + local $Message = "Relative Recursion"; + my $parens = qr/(\((?:[^()]++|(?-1))*+\))/; + local $_ = 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; + my ($all, $one, $two) = ('', '', ''); + ok /foo $parens \s* \+ \s* bar $parens/x; + iseq $1, '((2*3)+4-3)'; + iseq $2, '(2*(3+4)-1*(2-3))'; + iseq $&, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))'; + iseq $&, $_; + } + + { + my $spaces=" "; + local $_ = join 'bar', $spaces, $spaces; + our $count = 0; + s/(?>\s+bar)(?{$count++})//g; + iseq $_, $spaces, "SUSPEND final string"; + iseq $count, 1, "Optimiser should have prevented more than one match"; + } + + { + local $BugId = '36909'; + local $Message = '(?: ... )? should not lose $^R'; + $^R = 'Nothing'; + { + local $^R = "Bad"; + ok 'x foofoo y' =~ m { + (foo) # $^R correctly set + (?{ "last regexp code result" }) + }x; + iseq $^R, 'last regexp code result'; + } + iseq $^R, 'Nothing'; + + { + local $^R = "Bad"; + + ok 'x foofoo y' =~ m { + (?:foo|bar)+ # $^R correctly set + (?{ "last regexp code result" }) + }x; + iseq $^R, 'last regexp code result'; + } + iseq $^R, 'Nothing'; + + { + local $^R = "Bad"; + ok 'x foofoo y' =~ m { + (foo|bar)\1+ # $^R undefined + (?{ "last regexp code result" }) + }x; + iseq $^R, 'last regexp code result'; + } + iseq $^R, 'Nothing'; + + { + local $^R = "Bad"; + ok 'x foofoo y' =~ m { + (foo|bar)\1 # This time without the + + (?{"last regexp code result"}) + }x; + iseq $^R, 'last regexp code result'; + } + iseq $^R, 'Nothing'; + } + + + { + local $BugId = '22395'; + local $Message = 'Match is linear, not quadratic'; + our $count; + for my $l (10, 100, 1000) { + $count = 0; + ('a' x $l) =~ /(.*)(?{$count++})[bc]/; + local $TODO = "Should be L+1 not L*(L+3)/2 (L=$l)"; + iseq $count, $l + 1; + } + } + + + { + local $BugId = '22614'; + local $Message = '@-/@+ should not have undefined values'; + local $_ = 'ab'; + our @len = (); + /(.){1,}(?{push @len,0+@-})(.){1,}(?{})^/; + iseq "@len", "2 2 2"; + } + + + { + local $BugId = '18209'; + local $Message = '$& set on s///'; + my $text = ' word1 word2 word3 word4 word5 word6 '; + + my @words = ('word1', 'word3', 'word5'); + my $count; + foreach my $word (@words) { + $text =~ s/$word\s//gi; # Leave a space to seperate words + # in the resultant str. + # The following block is not working. + if ($&) { + $count ++; + } + # End bad block + } + iseq $count, 3; + iseq $text, ' word2 word4 word6 '; + } + + + { + # RT#6893 + local $BugId = '6893'; + local $_ = qq (A\nB\nC\n); + my @res; + while (m#(\G|\n)([^\n]*)\n#gsx) { + push @res, "$2"; + last if @res > 3; + } + iseq "@res", "A B C", "/g pattern shouldn't infinite loop"; + } + + + { + # From Message-ID: <877ixs6oa6.fsf@k75.linux.bogus> + my $dow_name = "nada"; + my $parser = "(\$dow_name) = \$time_string =~ /(D\x{e9}\\ " . + "C\x{e9}adaoin|D\x{e9}\\ Sathairn|\\w+|\x{100})/"; + my $time_string = "D\x{e9} C\x{e9}adaoin"; + eval $parser; + ok !$@, "Test Eval worked"; + iseq $dow_name, $time_string, "UTF-8 trie common prefix extraction"; + } + + + { + my $v; + ($v = 'bar') =~ /(\w+)/g; + $v = 'foo'; + iseq "$1", 'bar', '$1 is safe after /g - may fail due ' . + 'to specialized config in pp_hot.c' + } + + + { + local $Message = "http://nntp.perl.org/group/perl.perl5.porters/118663"; + my $qr_barR1 = qr/(bar)\g-1/; + ok "foobarbarxyz" =~ $qr_barR1; + ok "foobarbarxyz" =~ qr/foo${qr_barR1}xyz/; + ok "foobarbarxyz" =~ qr/(foo)${qr_barR1}xyz/; + ok "foobarbarxyz" =~ qr/(foo)(bar)\g{-1}xyz/; + ok "foobarbarxyz" =~ qr/(foo${qr_barR1})xyz/; + ok "foobarbarxyz" =~ qr/(foo(bar)\g{-1})xyz/; + } + + + { + local $BugId = '41010'; + local $Message = 'No optimizer bug'; + my @tails = ('', '(?(1))', '(|)', '()?'); + my @quants = ('*','+'); + my $doit = sub { + my $pats = shift; + for (@_) { + for my $pat (@$pats) { + for my $quant (@quants) { + for my $tail (@tails) { + my $re = "($pat$quant\$)$tail"; + ok /$re/ && $1 eq $_, "'$_' =~ /$re/"; + ok /$re/m && $1 eq $_, "'$_' =~ /$re/m"; + } + } + } + } + }; + + my @dpats = ('\d', + '[1234567890]', + '(1|[23]|4|[56]|[78]|[90])', + '(?:1|[23]|4|[56]|[78]|[90])', + '(1|2|3|4|5|6|7|8|9|0)', + '(?:1|2|3|4|5|6|7|8|9|0)'); + my @spats = ('[ ]', ' ', '( |\t)', '(?: |\t)', '[ \t]', '\s'); + my @sstrs = (' '); + my @dstrs = ('12345'); + $doit -> (\@spats, @sstrs); + $doit -> (\@dpats, @dstrs); + } + + + { + local $Message = '$REGMARK'; + our @r = (); + our ($REGMARK, $REGERROR); + ok 'foofoo' =~ /foo (*MARK:foo) (?{push @r,$REGMARK}) /x; + iseq "@r","foo"; + iseq $REGMARK, "foo"; + ok 'foofoo' !~ /foo (*MARK:foo) (*FAIL) /x; + ok !$REGMARK; + iseq $REGERROR, 'foo'; + } + + + { + local $Message = '\K test'; + my $x; + $x = "abc.def.ghi.jkl"; + $x =~ s/.*\K\..*//; + iseq $x, "abc.def.ghi"; + + $x = "one two three four"; + $x =~ s/o+ \Kthree//g; + iseq $x, "one two four"; + + $x = "abcde"; + $x =~ s/(.)\K/$1/g; + iseq $x, "aabbccddee"; + } + + + { + sub kt { + return '4' if $_[0] eq '09028623'; + } + # Nested EVAL using PL_curpm (via $1 or friends) + my $re; + our $grabit = qr/ ([0-6][0-9]{7}) (??{ kt $1 }) [890] /x; + $re = qr/^ ( (??{ $grabit }) ) $ /x; + my @res = '0902862349' =~ $re; + iseq join ("-", @res), "0902862349", + 'PL_curpm is set properly on nested eval'; + + our $qr = qr/ (o) (??{ $1 }) /x; + ok 'boob'=~/( b (??{ $qr }) b )/x && 1, "PL_curpm, nested eval"; + } + + + { + use charnames ":full"; + ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic"; + ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase"; + ok "\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase"; + ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start"; + ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue"; + ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic"; + ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase"; + ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase"; + ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start"; + ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue" + } + + + { + # requirement of Unicode Technical Standard #18, 1.7 Code Points + # cf. http://www.unicode.org/reports/tr18/#Supplementary_Characters + for my $u (0x7FF, 0x800, 0xFFFF, 0x10000) { + no warnings 'utf8'; # oops + my $c = chr $u; + my $x = sprintf '%04X', $u; + ok "A${c}B" =~ /A[\0-\x{10000}]B/, "Unicode range - $x"; + } + } + + + { + my $res=""; + + if ('1' =~ /(?|(?<digit>1)|(?<digit>2))/) { + $res = "@{$- {digit}}"; + } + iseq $res, "1", + "Check that (?|...) doesnt cause dupe entries in the names array"; + + $res = ""; + if ('11' =~ /(?|(?<digit>1)|(?<digit>2))(?&digit)/) { + $res = "@{$- {digit}}"; + } + iseq $res, "1", "Check that (?&..) to a buffer inside " . + "a (?|...) goes to the leftmost"; + } + + + { + use warnings; + local $Message = "ASCII pattern that really is UTF-8"; + my @w; + local $SIG {__WARN__} = sub {push @w, "@_"}; + my $c = qq (\x{DF}); + ok $c =~ /${c}|\x{100}/; + ok @w == 0; + } + + + { + local $Message = "Corruption of match results of qr// across scopes"; + my $qr = qr/(fo+)(ba+r)/; + 'foobar' =~ /$qr/; + iseq "$1$2", "foobar"; + { + 'foooooobaaaaar' =~ /$qr/; + iseq "$1$2", 'foooooobaaaaar'; + } + iseq "$1$2", "foobar"; + } + + + { + local $Message = "HORIZWS"; + local $_ = "\t \r\n \n \t".chr(11)."\n"; + s/\H/H/g; + s/\h/h/g; + iseq $_, "hhHHhHhhHH"; + $_ = "\t \r\n \n \t" . chr (11) . "\n"; + utf8::upgrade ($_); + s/\H/H/g; + s/\h/h/g; + iseq $_, "hhHHhHhhHH"; + } + + + { + local $Message = "Various whitespace special patterns"; + my @h = map {chr $_} 0x09, 0x20, 0xa0, 0x1680, 0x180e, 0x2000, + 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, + 0x2007, 0x2008, 0x2009, 0x200a, 0x202f, 0x205f, + 0x3000; + my @v = map {chr $_} 0x0a, 0x0b, 0x0c, 0x0d, 0x85, 0x2028, + 0x2029; + my @lb = ("\x0D\x0A", map {chr $_} 0x0A .. 0x0D, 0x85, 0x2028, 0x2029); + foreach my $t ([\@h, qr/\h/, qr/\h+/], + [\@v, qr/\v/, qr/\v+/], + [\@lb, qr/\R/, qr/\R+/],) { + my $ary = shift @$t; + foreach my $pat (@$t) { + foreach my $str (@$ary) { + ok $str =~ /($pat)/, $pat; + iseq $1, $str, $pat; + utf8::upgrade ($str); + ok $str =~ /($pat)/, "Upgraded string - $pat"; + iseq $1, $str, "Upgraded string - $pat"; + } + } + } + } + + + { + local $Message = "Check that \\xDF match properly in its various forms"; + # Test that \xDF matches properly. this is pretty hacky stuff, + # but its actually needed. The malarky with '-' is to prevent + # compilation caching from playing any role in the test. + my @df = (chr (0xDF), '-', chr (0xDF)); + utf8::upgrade ($df [2]); + my @strs = ('ss', 'sS', 'Ss', 'SS', chr (0xDF)); + my @ss = map {("$_", "$_")} @strs; + utf8::upgrade ($ss [$_ * 2 + 1]) for 0 .. $#strs; + + for my $ssi (0 .. $#ss) { + for my $dfi (0 .. $#df) { + my $pat = $df [$dfi]; + my $str = $ss [$ssi]; + my $utf_df = ($dfi > 1) ? 'utf8' : ''; + my $utf_ss = ($ssi % 2) ? 'utf8' : ''; + (my $sstr = $str) =~ s/\xDF/\\xDF/; + + if ($utf_df || $utf_ss || length ($ss [$ssi]) == 1) { + my $ret = $str =~ /$pat/i; + next if $pat eq '-'; + ok $ret, "\"$sstr\" =~ /\\xDF/i " . + "(str is @{[$utf_ss||'latin']}, pat is " . + "@{[$utf_df||'latin']})"; + } + else { + my $ret = $str !~ /$pat/i; + next if $pat eq '-'; + ok $ret, "\"$sstr\" !~ /\\xDF/i " . + "(str is @{[$utf_ss||'latin']}, pat is " . + "@{[$utf_df||'latin']})"; + } + } + } + } + + + { + local $Message = "BBC(Bleadperl Breaks CPAN) Today: String::Multibyte"; + my $re = qr/(?:[\x00-\xFF]{4})/; + my $hyp = "\0\0\0-"; + my $esc = "\0\0\0\\"; + + my $str = "$esc$hyp$hyp$esc$esc"; + my @a = ($str =~ /\G(?:\Q$esc$esc\E|\Q$esc$hyp\E|$re)/g); + + iseq @a,3; + local $" = "="; + iseq "@a","$esc$hyp=$hyp=$esc$esc"; + } + + + { + # Test for keys in %+ and %- + local $Message = 'Test keys in %+ and %-'; + no warnings 'uninitialized'; + my $_ = "abcdef"; + /(?<foo>a)|(?<foo>b)/; + iseq ((join ",", sort keys %+), "foo"); + iseq ((join ",", sort keys %-), "foo"); + iseq ((join ",", sort values %+), "a"); + iseq ((join ",", sort map "@$_", values %-), "a "); + /(?<bar>a)(?<bar>b)(?<quux>.)/; + iseq ((join ",", sort keys %+), "bar,quux"); + iseq ((join ",", sort keys %-), "bar,quux"); + iseq ((join ",", sort values %+), "a,c"); # leftmost + iseq ((join ",", sort map "@$_", values %-), "a b,c"); + /(?<un>a)(?<deux>c)?/; # second buffer won't capture + iseq ((join ",", sort keys %+), "un"); + iseq ((join ",", sort keys %-), "deux,un"); + iseq ((join ",", sort values %+), "a"); + iseq ((join ",", sort map "@$_", values %-), ",a"); + } + + + { + # length() on captures, the numbered ones end up in Perl_magic_len + my $_ = "aoeu \xe6var ook"; + /^ \w+ \s (?<eek>\S+)/x; + + iseq length ($`), 0, q[length $`]; + iseq length ($'), 4, q[length $']; + iseq length ($&), 9, q[length $&]; + iseq length ($1), 4, q[length $1]; + iseq length ($+{eek}), 4, q[length $+{eek} == length $1]; + } + + + { + my $ok = -1; + + $ok = exists ($-{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; + iseq $ok, 1, '$-{x} exists after "bar"=~/(?<x>foo)|bar/'; + iseq scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'; + iseq scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'; + + $ok = -1; + $ok = exists ($+{x}) ? 1 : 0 if 'bar' =~ /(?<x>foo)|bar/; + iseq $ok, 0, '$+{x} not exists after "bar"=~/(?<x>foo)|bar/'; + iseq scalar (%+), 0, 'scalar %+ == 0 after "bar"=~/(?<x>foo)|bar/'; + iseq scalar (%-), 1, 'scalar %- == 1 after "bar"=~/(?<x>foo)|bar/'; + + $ok = -1; + $ok = exists ($-{x}) ? 1 : 0 if 'foo' =~ /(?<x>foo)|bar/; + iseq $ok, 1, '$-{x} exists after "foo"=~/(?<x>foo)|bar/'; + iseq scalar (%+), 1, 'scalar %+ == 1 after "foo"=~/(?<x>foo)|bar/'; + iseq scalar (%-), 1, 'scalar %- == 1 after "foo"=~/(?<x>foo)|bar/'; + + $ok = -1; + $ok = exists ($+{x}) ? 1 : 0 if 'foo'=~/(?<x>foo)|bar/; + iseq $ok, 1, '$+{x} exists after "foo"=~/(?<x>foo)|bar/'; + } + + + { + local $_; + ($_ = 'abc') =~ /(abc)/g; + $_ = '123'; + iseq "$1", 'abc', "/g leads to unsafe match vars: $1"; + } + + + { + local $Message = 'Message-ID: <20070818091501.7eff4831@r2d2>'; + my $str = ""; + for (0 .. 5) { + my @x; + $str .= "@x"; # this should ALWAYS be the empty string + 'a' =~ /(a|)/; + push @x, 1; + } + iseq length ($str), 0, "Trie scope error, string should be empty"; + $str = ""; + my @foo = ('a') x 5; + for (@foo) { + my @bar; + $str .= "@bar"; + s/a|/push @bar, 1/e; + } + iseq length ($str), 0, "Trie scope error, string should be empty"; + } + + + { + local $BugId = '45605'; + # [perl #45605] Regexp failure with utf8-flagged and byte-flagged string + + my $utf_8 = "\xd6schel"; + utf8::upgrade ($utf_8); + $utf_8 =~ m {(\xd6|Ö)schel}; + iseq $1, "\xd6", "Upgrade error"; + } + + { +# more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding + for my $chr (160 .. 255) { + my $chr_byte = chr($chr); + my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8); + my $rx = qr{$chr_byte|X}i; + ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr"); + } + } + + { + # Regardless of utf8ness any character matches itself when + # doing a case insensitive match. See also [perl #36207] + local $BugId = '36207'; + for my $o (0 .. 255) { + my @ch = (chr ($o), chr ($o)); + utf8::upgrade ($ch [1]); + for my $u_str (0, 1) { + for my $u_pat (0, 1) { + ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E/i, + "\$c =~ /\$c/i : chr ($o) : u_str = $u_str u_pat = $u_pat"; + ok $ch [$u_str] =~ /\Q$ch[$u_pat]\E|xyz/i, + "\$c=~/\$c|xyz/i : chr($o) : u_str = $u_str u_pat = $u_pat"; + } + } + } + } + + + { + our $a = 3; "" =~ /(??{ $a })/; + our $b = $a; + iseq $b, $a, "Copy of scalar used for postponed subexpression"; + } + + + { + local $BugId = '49190'; + local $Message = '$REGMARK in replacement'; + our $REGMARK; + my $_ = "A"; + ok s/(*:B)A/$REGMARK/; + iseq $_, "B"; + $_ = "CCCCBAA"; + ok s/(*:X)A+|(*:Y)B+|(*:Z)C+/$REGMARK/g; + iseq $_, "ZYX"; + } + + + { + our @ctl_n = (); + our @plus = (); + our $nested_tags; + $nested_tags = qr{ + < + (\w+) + (?{ + push @ctl_n,$^N; + push @plus,$+; + }) + > + (??{$nested_tags})* + </\s* \w+ \s*> + }x; + + my $match = '<bla><blubb></blubb></bla>' =~ m/^$nested_tags$/; + ok $match, 'nested construct matches'; + iseq "@ctl_n", "bla blubb", '$^N inside of (?{}) works as expected'; + iseq "@plus", "bla blubb", '$+ inside of (?{}) works as expected'; + } + + + { + local $BugId = '52658'; + local $Message = 'Substitution evaluation in list context'; + my $reg = '../xxx/'; + my @te = ($reg =~ m{^(/?(?:\.\./)*)}, + $reg =~ s/(x)/'b'/eg > 1 ? '##' : '++'); + iseq $reg, '../bbb/'; + iseq $te [0], '../'; + } + + # This currently has to come before any "use encoding" in this file. + { + local $Message; + local $BugId = '59342'; + must_warn 'qr/\400/', '^Use of octal value above 377'; + } + + + SKIP: { + # XXX: This set of tests is essentially broken, POSIX character classes + # should not have differing definitions under Unicode. + # There are property names for that. + skip "Tests assume ASCII", 4 unless $IS_ASCII; + + my @notIsPunct = grep {/[[:punct:]]/ and not /\p{IsPunct}/} + map {chr} 0x20 .. 0x7f; + iseq join ('', @notIsPunct), '$+<=>^`|~', + '[:punct:] disagress with IsPunct on Symbols'; + + my @isPrint = grep {not /[[:print:]]/ and /\p{IsPrint}/} + map {chr} 0 .. 0x1f, 0x7f .. 0x9f; + iseq join ('', @isPrint), "\x09\x0a\x0b\x0c\x0d\x85", + 'IsPrint disagrees with [:print:] on control characters'; + + my @isPunct = grep {/[[:punct:]]/ != /\p{IsPunct}/} + map {chr} 0x80 .. 0xff; + iseq join ('', @isPunct), "\xa1\xab\xb7\xbb\xbf", # ¡ « · » ¿ + 'IsPunct disagrees with [:punct:] outside ASCII'; + + my @isPunctLatin1 = eval q { + use encoding 'latin1'; + grep {/[[:punct:]]/ != /\p{IsPunct}/} map {chr} 0x80 .. 0xff; + }; + skip "Eval failed ($@)", 1 if $@; + skip "PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS set to 0", 1 + if !$ENV{PERL_TEST_LEGACY_POSIX_CC}; + iseq join ('', @isPunctLatin1), '', + 'IsPunct agrees with [:punct:] with explicit Latin1'; + } + + + { + local $BugId = '60034'; + my $a = "xyzt" x 8192; + ok $a =~ /\A(?>[a-z])*\z/, + '(?>) does not cause wrongness on long string'; + my $b = $a . chr 256; + chop $b; + { + iseq $a, $b; + } + ok $b =~ /\A(?>[a-z])*\z/, + '(?>) does not cause wrongness on long string with UTF-8'; + } + + + # + # Keep the following tests last -- they may crash perl + # + print "# Tests that follow may crash perl\n"; + { + local $BugId = '19049/38869'; + local $Message = 'Pattern in a loop, failure should not ' . + 'affect previous success'; + my @list = ( + 'ab cdef', # Matches regex + ('e' x 40000 ) .'ab c' # Matches not, but 'ab c' matches part of it + ); + my $y; + my $x; + foreach (@list) { + m/ab(.+)cd/i; # The ignore-case seems to be important + $y = $1; # Use $1, which might not be from the last match! + $x = substr ($list [0], $- [0], $+ [0] - $- [0]); + } + iseq $y, ' '; + iseq $x, 'ab cd'; + } + + + { + local $BugId = '24274'; + + ok (("a" x (2 ** 15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker"); + ok ((q(a)x 100) =~ /^(??{'(.)'x 100})/, + "Regexp /^(??{'(.)'x 100})/ crashes older perls"); + } + + + { + eval '/\k/'; + ok $@ =~ /\QSequence \k... not terminated in regex;\E/, + 'Lone \k not allowed'; + } + + + { + local $Message = "Substitution with lookahead (possible segv)"; + $_ = "ns1ns1ns1"; + s/ns(?=\d)/ns_/g; + iseq $_, "ns_1ns_1ns_1"; + $_ = "ns1"; + s/ns(?=\d)/ns_/; + iseq $_, "ns_1"; + $_ = "123"; + s/(?=\d+)|(?<=\d)/!Bang!/g; + iseq $_, "!Bang!1!Bang!2!Bang!3!Bang!"; + } + + + { + # [perl #45337] utf8 + "[a]a{2}" + /$.../ = panic: sv_len_utf8 cache + local $BugId = '45337'; + local ${^UTF8CACHE} = -1; + local $Message = "Shouldn't panic"; + my $s = "[a]a{2}"; + utf8::upgrade $s; + ok "aaa" =~ /$s/; + } + { + local $BugId = '57042'; + local $Message = "Check if tree logic breaks \$^R"; + my $cond_re = qr/\s* + \s* (?: + \( \s* A (?{1}) + | \( \s* B (?{2}) + ) + /x; + my @res; + for my $line ("(A)","(B)") { + if ($line =~ m/$cond_re/) { + push @res, $^R ? "#$^R" : "UNDEF"; + } + } + iseq "@res","#1 #2"; + } + { + no warnings 'closure'; + my $re = qr/A(??{"1"})/; + ok "A1B" =~ m/^((??{ $re }))((??{"B"}))$/; + ok $1 eq "A1"; + ok $2 eq "B"; + } + + + { + use re 'eval'; + local $Message = 'Test if $^N and $+ work in (?{{})'; + our @ctl_n = (); + our @plus = (); + our $nested_tags; + $nested_tags = qr{ + < + ((\w)+) + (?{ + push @ctl_n, (defined $^N ? $^N : "undef"); + push @plus, (defined $+ ? $+ : "undef"); + }) + > + (??{$nested_tags})* + </\s* \w+ \s*> + }x; + + + my $c = 0; + for my $test ( + # Test structure: + # [ Expected result, Regex, Expected value(s) of $^N, Expected value(s) of $+ ] + [ 1, qr#^$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^($nested_tags)$#, "bla blubb <bla><blubb></blubb></bla>", "a b a" ], + [ 1, qr#^(|)$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^(?:|)$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^<(bl|bla)>$nested_tags<(/\1)>$#, "blubb /bla", "b /bla" ], + [ 1, qr#(??{"(|)"})$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^(??{"(bla|)"})$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^(??{"(|)"})(??{$nested_tags})$#, "bla blubb undef", "a b undef" ], + [ 1, qr#^(??{"(?:|)"})$nested_tags$#, "bla blubb bla", "a b a" ], + [ 1, qr#^((??{"(?:bla|)"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], + [ 1, qr#^((??{"(?!)?"}))((??{$nested_tags}))$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], + [ 1, qr#^((??{"(?:|<(/?bla)>)"}))((??{$nested_tags}))\1$#, "bla blubb <bla><blubb></blubb></bla>", "a b <bla><blubb></blubb></bla>" ], + [ 0, qr#^((??{"(?!)"}))?((??{$nested_tags}))(?!)$#, "bla blubb undef", "a b undef" ], + + ) { #"#silence vim highlighting + $c++; + @ctl_n = (); + @plus = (); + my $match = (("<bla><blubb></blubb></bla>" =~ $test->[1]) ? 1 : 0); + push @ctl_n, (defined $^N ? $^N : "undef"); + push @plus, (defined $+ ? $+ : "undef"); + ok($test->[0] == $match, "match $c"); + if ($test->[0] != $match) { + # unset @ctl_n and @plus + @ctl_n = @plus = (); + } + iseq("@ctl_n", $test->[2], "ctl_n $c"); + iseq("@plus", $test->[3], "plus $c"); + } + } + + { + use re 'eval'; + local $BugId = '56194'; + + our $f; + local $f; + $f = sub { + defined $_[0] ? $_[0] : "undef"; + }; + + ok("123" =~ m/^(\d)(((??{1 + $^N})))+$/); + + our @ctl_n; + our @plus; + + my $re = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})#; + my $re2 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))*(?{$^N})(|a(b)c|def)(??{"$^R"})#; + my $re3 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})(|a(b)c|def)(??{"$^R"})#; + our $re5; + local $re5 = qr#(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})){2}(?{$^N})#; + my $re6 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; + my $re7 = qr#(??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1})#; + my $re8 = qr/(\d+)/; + my $c = 0; + for my $test ( + # Test structure: + # [ + # String to match + # Regex too match + # Expected values of $^N + # Expected values of $+ + # Expected values of $1, $2, $3, $4 and $5 + # ] + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(??{$^N})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$+})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$+})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(abc|def|)?(??{$^N})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1233", + qr#^(1)((??{ push @ctl_n, $f->($^N); push @plus, $f->($+); $^N + 1}))+(|abc|def)?(??{$^N})$#, + "1 2 3 3", + "1 2 3 3", + "\$1 = 1, \$2 = 3, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "123abc3", + qr#^($re)(|a(b)c|def)(??{$^R})$#, + "1 2 3 abc", + "1 2 3 b", + "\$1 = 123, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", + ], + [ + "123abc3", + qr#^($re2)$#, + "1 2 3 123abc3", + "1 2 3 b", + "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", + ], + [ + "123abc3", + qr#^($re3)$#, + "1 2 123abc3", + "1 2 b", + "\$1 = 123abc3, \$2 = 1, \$3 = 3, \$4 = abc, \$5 = b", + ], + [ + "123abc3", + qr#^(??{$re5})(|abc|def)(??{"$^R"})$#, + "1 2 abc", + "1 2 abc", + "\$1 = abc, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "123abc3", + qr#^(??{$re5})(|a(b)c|def)(??{"$^R"})$#, + "1 2 abc", + "1 2 b", + "\$1 = abc, \$2 = b, \$3 = undef, \$4 = undef, \$5 = undef", + ], + [ + "1234", + qr#^((\d+)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1}))((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})))$#, + "1234 123 12 1 2 3 1234", + "1234 123 12 1 2 3 4", + "\$1 = 1234, \$2 = 1, \$3 = 2, \$4 = 3, \$5 = 4", + ], + [ + "1234556", + qr#^(\d+)($re6)($re6)($re6)$re6(($re6)$re6)$#, + "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 56", + "1234556 123455 12345 1234 123 12 1 2 3 4 4 5 5", + "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 56", + ], + [ + "12345562", + qr#^((??{$re8}))($re7)($re7)($re7)$re7($re7)($re7(\2))$#, + "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 62", + "12345562 1234556 123455 12345 1234 123 12 1 2 3 4 4 5 2", + "\$1 = 1, \$2 = 2, \$3 = 3, \$4 = 4, \$5 = 5", + ], + ) { + $c++; + @ctl_n = (); + @plus = (); + undef $^R; + my $match = $test->[0] =~ $test->[1]; + my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5)); + push @ctl_n, $f->($^N); + push @plus, $f->($+); + ok($match, "match $c"); + if (not $match) { + # unset $str, @ctl_n and @plus + $str = ""; + @ctl_n = @plus = (); + } + iseq("@ctl_n", $test->[2], "ctl_n $c"); + iseq("@plus", $test->[3], "plus $c"); + iseq($str, $test->[4], "str $c"); + } + SKIP: { + if ($] le '5.010') { + skip "test segfaults on perl < 5.10", 4; + } + + @ctl_n = (); + @plus = (); + + our $re4; + local $re4 = qr#(1)((??{push @ctl_n, $f->($^N); push @plus, $f->($+);$^N + 1})){2}(?{$^N})(|abc|def)(??{"$^R"})#; + undef $^R; + my $match = "123abc3" =~ m/^(??{$re4})$/; + my $str = join(", ", '$1 = '.$f->($1), '$2 = '.$f->($2), '$3 = '.$f->($3), '$4 = '.$f->($4),'$5 = '.$f->($5),'$^R = '.$f->($^R)); + push @ctl_n, $f->($^N); + push @plus, $f->($+); + ok($match); + if (not $match) { + # unset $str + @ctl_n = (); + @plus = (); + $str = ""; + } + iseq("@ctl_n", "1 2 undef"); + iseq("@plus", "1 2 undef"); + iseq($str, "\$1 = undef, \$2 = undef, \$3 = undef, \$4 = undef, \$5 = undef, \$^R = undef"); + } + } + + # This only works under -DEBUGGING because it relies on an assert(). + { + local $BugId = '60508'; + local $Message = "Check capture offset re-entrancy of utf8 code."; + + sub fswash { $_[0] =~ s/([>X])//g; } + + my $k1 = "." x 4 . ">>"; + fswash($k1); + + my $k2 = "\x{f1}\x{2022}"; + $k2 =~ s/([\360-\362])/>/g; + fswash($k2); + + iseq($k2, "\x{2022}", "utf8::SWASHNEW doesn't cause capture leaks"); + } + + + { + local $BugId = 65372; # minimal CURLYM limited to 32767 matches + my @pat = ( + qr{a(x|y)*b}, # CURLYM + qr{a(x|y)*?b}, # .. with minmod + qr{a([wx]|[yz])*b}, # .. and without tries + qr{a([wx]|[yz])*?b}, + ); + my $len = 32768; + my $s = join '', 'a', 'x' x $len, 'b'; + for my $pat (@pat) { + ok($s =~ $pat, $pat); + } + } + # + # This should be the last test. + # + iseq $test + 1, $EXPECTED_TESTS, "Got the right number of tests!"; + +} # End of sub run_tests + +1; diff --git a/t/re/pat_thr.t b/t/re/pat_thr.t new file mode 100644 index 0000000000..3228b33a65 --- /dev/null +++ b/t/re/pat_thr.t @@ -0,0 +1,7 @@ +#!./perl + +chdir 't' if -d 't'; +@INC = ('../lib', '.'); + +require 'thread_it.pl'; +thread_it(qw(op pat.t)); diff --git a/t/re/qr.t b/t/re/qr.t new file mode 100644 index 0000000000..ff9449e759 --- /dev/null +++ b/t/re/qr.t @@ -0,0 +1,13 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +plan tests => 1; + +my $rx = qr//; + +is(ref $rx, "Regexp", "qr// blessed into `Regexp' by default"); diff --git a/t/re/qr_gc.t b/t/re/qr_gc.t new file mode 100644 index 0000000000..db2e96ed2c --- /dev/null +++ b/t/re/qr_gc.t @@ -0,0 +1,35 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; + undef &Regexp::DESTROY; +} + +plan tests => 2; + +if ($] >= 5.011) { # doesn't leak on 5.10.x + $TODO = "leaking since 32751"; +} + +my $destroyed; +{ + sub Regexp::DESTROY { $destroyed++ } +} + +{ + my $rx = qr//; +} + +is( $destroyed, 1, "destroyed regexp" ); + +undef $destroyed; + +{ + my $var = bless {}, "Foo"; + my $rx = qr/(?{ $var })/; +} + +is( $destroyed, 1, "destroyed regexp with closure capture" ); + diff --git a/t/re/qrstack.t b/t/re/qrstack.t new file mode 100644 index 0000000000..47d190d055 --- /dev/null +++ b/t/re/qrstack.t @@ -0,0 +1,11 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +plan tests => 1; + +ok(defined [(1)x127,qr//,1]->[127], "qr// should extend the stack properly"); diff --git a/t/re/re.t b/t/re/re.t new file mode 100644 index 0000000000..8c1c1f8db0 --- /dev/null +++ b/t/re/re.t @@ -0,0 +1,46 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +use strict; +use warnings; + +use re qw(is_regexp regexp_pattern + regname regnames regnames_count); +{ + my $qr=qr/foo/pi; + ok(is_regexp($qr),'is_regexp($qr)'); + ok(!is_regexp(''),'is_regexp("")'); + is((regexp_pattern($qr))[0],'foo','regexp_pattern[0]'); + is((regexp_pattern($qr))[1],'ip','regexp_pattern[1]'); + is(regexp_pattern($qr),'(?pi-xsm:foo)','scalar regexp_pattern'); + ok(!regexp_pattern(''),'!regexp_pattern("")'); +} + +if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){ + my @names = sort +regnames(); + is("@names","A B","regnames"); + @names = sort +regnames(0); + is("@names","A B","regnames"); + my $names = regnames(); + is($names, "B", "regnames in scalar context"); + @names = sort +regnames(1); + is("@names","A B C","regnames"); + is(join("", @{regname("A",1)}),"13"); + is(join("", @{regname("B",1)}),"24"); + { + if ('foobar'=~/(?<foo>foo)(?<bar>bar)/) { + is(regnames_count(),2); + } else { + ok(0); ok(0); + } + } + is(regnames_count(),3); +} +# New tests above this line, don't forget to update the test count below! +BEGIN { plan tests => 14 } +# No tests here! diff --git a/t/re/re_tests b/t/re/re_tests new file mode 100644 index 0000000000..b9177e92ca --- /dev/null +++ b/t/re/re_tests @@ -0,0 +1,1395 @@ +# This stops me getting screenfulls of syntax errors every time I accidentally +# run this file via a shell glob +__END__ +abc abc y $& abc +abc abc y $-[0] 0 +abc abc y $+[0] 3 +abc xbc n - - +abc axc n - - +abc abx n - - +abc xabcy y $& abc +abc xabcy y $-[0] 1 +abc xabcy y $+[0] 4 +abc ababc y $& abc +abc ababc y $-[0] 2 +abc ababc y $+[0] 5 +ab*c abc y $& abc +ab*c abc y $-[0] 0 +ab*c abc y $+[0] 3 +ab*bc abc y $& abc +ab*bc abc y $-[0] 0 +ab*bc abc y $+[0] 3 +ab*bc abbc y $& abbc +ab*bc abbc y $-[0] 0 +ab*bc abbc y $+[0] 4 +ab*bc abbbbc y $& abbbbc +ab*bc abbbbc y $-[0] 0 +ab*bc abbbbc y $+[0] 6 +.{1} abbbbc y $& a +.{1} abbbbc y $-[0] 0 +.{1} abbbbc y $+[0] 1 +.{3,4} abbbbc y $& abbb +.{3,4} abbbbc y $-[0] 0 +.{3,4} abbbbc y $+[0] 4 +\N{1} abbbbc y $& a +\N{1} abbbbc y $-[0] 0 +\N{1} abbbbc y $+[0] 1 +\N{3,4} abbbbc y $& abbb +\N{3,4} abbbbc y $-[0] 0 +\N{3,4} abbbbc y $+[0] 4 +ab{0,}bc abbbbc y $& abbbbc +ab{0,}bc abbbbc y $-[0] 0 +ab{0,}bc abbbbc y $+[0] 6 +ab+bc abbc y $& abbc +ab+bc abbc y $-[0] 0 +ab+bc abbc y $+[0] 4 +ab+bc abc n - - +ab+bc abq n - - +ab{1,}bc abq n - - +ab+bc abbbbc y $& abbbbc +ab+bc abbbbc y $-[0] 0 +ab+bc abbbbc y $+[0] 6 +ab{1,}bc abbbbc y $& abbbbc +ab{1,}bc abbbbc y $-[0] 0 +ab{1,}bc abbbbc y $+[0] 6 +ab{1,3}bc abbbbc y $& abbbbc +ab{1,3}bc abbbbc y $-[0] 0 +ab{1,3}bc abbbbc y $+[0] 6 +ab{3,4}bc abbbbc y $& abbbbc +ab{3,4}bc abbbbc y $-[0] 0 +ab{3,4}bc abbbbc y $+[0] 6 +ab{4,5}bc abbbbc n - - +ab?bc abbc y $& abbc +ab?bc abc y $& abc +ab{0,1}bc abc y $& abc +ab?bc abbbbc n - - +ab?c abc y $& abc +ab{0,1}c abc y $& abc +^abc$ abc y $& abc +^abc$ abcc n - - +^abc abcc y $& abc +^abc$ aabc n - - +abc$ aabc y $& abc +abc$ aabcd n - - +^ abc y $& +$ abc y $& +a.c abc y $& abc +a.c axc y $& axc +a\Nc abc y $& abc +a.*c axyzc y $& axyzc +a\N*c axyzc y $& axyzc +a.*c axyzd n - - +a\N*c axyzd n - - +a[bc]d abc n - - +a[bc]d abd y $& abd +a[b]d abd y $& abd +[a][b][d] abd y $& abd +.[b]. abd y $& abd +.[b]. aBd n - - +(?i:.[b].) abd y $& abd +(?i:\N[b]\N) abd y $& abd +a[b-d]e abd n - - +a[b-d]e ace y $& ace +a[b-d] aac y $& ac +a[-b] a- y $& a- +a[b-] a- y $& a- +a[b-a] - c - Invalid [] range \"b-a\" +a[]b - c - Unmatched [ +a[ - c - Unmatched [ +a] a] y $& a] +a[]]b a]b y $& a]b +a[^bc]d aed y $& aed +a[^bc]d abd n - - +a[^-b]c adc y $& adc +a[^-b]c a-c n - - +a[^]b]c a]c n - - +a[^]b]c adc y $& adc +\ba\b a- y - - +\ba\b -a y - - +\ba\b -a- y - - +\by\b xy n - - +\by\b yz n - - +\by\b xyz n - - +\Ba\B a- n - - +\Ba\B -a n - - +\Ba\B -a- n - - +\By\b xy y - - +\By\b xy y $-[0] 1 +\By\b xy y $+[0] 2 +\By\b xy y - - +\by\B yz y - - +\By\B xyz y - - +\w a y - - +\w - n - - +\W a n - - +\W - y - - +a\sb a b y - - +a\sb a-b n - - +a\Sb a b n - - +a\Sb a-b y - - +\d 1 y - - +\d - n - - +\D 1 n - - +\D - y - - +[\w] a y - - +[\w] - n - - +[\W] a n - - +[\W] - y - - +a[\s]b a b y - - +a[\s]b a-b n - - +a[\S]b a b n - - +a[\S]b a-b y - - +[\d] 1 y - - +[\d] - n - - +[\D] 1 n - - +[\D] - y - - +ab|cd abc y $& ab +ab|cd abcd y $& ab +()ef def y $&-$1 ef- +()ef def y $-[0] 1 +()ef def y $+[0] 3 +()ef def y $-[1] 1 +()ef def y $+[1] 1 +*a - c - Quantifier follows nothing +(|*)b - c - Quantifier follows nothing +(*)b - c - Unknown verb +$b b n - - +a\ - c - Search pattern not terminated +a\(b a(b y $&-$1 a(b- +a\(*b ab y $& ab +a\(*b a((b y $& a((b +a\\b a\\b y $& a\\b +abc) - c - Unmatched ) +(abc - c - Unmatched ( +((a)) abc y $&-$1-$2 a-a-a +((a)) abc y $-[0]-$-[1]-$-[2] 0-0-0 +((a)) abc y $+[0]-$+[1]-$+[2] 1-1-1 +((a)) abc b @- 0 0 0 +((a)) abc b @+ 1 1 1 +(a)b(c) abc y $&-$1-$2 abc-a-c +(a)b(c) abc y $-[0]-$-[1]-$-[2] 0-0-2 +(a)b(c) abc y $+[0]-$+[1]-$+[2] 3-1-3 +a+b+c aabbabc y $& abc +a{1,}b{1,}c aabbabc y $& abc +a** - c - Nested quantifiers +a.+?c abcabc y $& abc +(a+|b)* ab y $&-$1 ab-b +(a+|b)* ab y $-[0] 0 +(a+|b)* ab y $+[0] 2 +(a+|b)* ab y $-[1] 1 +(a+|b)* ab y $+[1] 2 +(a+|b){0,} ab y $&-$1 ab-b +(a+|b)+ ab y $&-$1 ab-b +(a+|b){1,} ab y $&-$1 ab-b +(a+|b)? ab y $&-$1 a-a +(a+|b){0,1} ab y $&-$1 a-a +)( - c - Unmatched ) +[^ab]* cde y $& cde +abc n - - +a* y $& +([abc])*d abbbcd y $&-$1 abbbcd-c +([abc])*bcd abcd y $&-$1 abcd-a +a|b|c|d|e e y $& e +(a|b|c|d|e)f ef y $&-$1 ef-e +(a|b|c|d|e)f ef y $-[0] 0 +(a|b|c|d|e)f ef y $+[0] 2 +(a|b|c|d|e)f ef y $-[1] 0 +(a|b|c|d|e)f ef y $+[1] 1 +abcd*efg abcdefg y $& abcdefg +ab* xabyabbbz y $& ab +ab* xayabbbz y $& a +(ab|cd)e abcde y $&-$1 cde-cd +[abhgefdc]ij hij y $& hij +^(ab|cd)e abcde n x$1y xy +(abc|)ef abcdef y $&-$1 ef- +(a|b)c*d abcd y $&-$1 bcd-b +(ab|ab*)bc abc y $&-$1 abc-a +a([bc]*)c* abc y $&-$1 abc-bc +a([bc]*)(c*d) abcd y $&-$1-$2 abcd-bc-d +a([bc]*)(c*d) abcd y $-[0] 0 +a([bc]*)(c*d) abcd y $+[0] 4 +a([bc]*)(c*d) abcd y $-[1] 1 +a([bc]*)(c*d) abcd y $+[1] 3 +a([bc]*)(c*d) abcd y $-[2] 3 +a([bc]*)(c*d) abcd y $+[2] 4 +a([bc]+)(c*d) abcd y $&-$1-$2 abcd-bc-d +a([bc]*)(c+d) abcd y $&-$1-$2 abcd-b-cd +a([bc]*)(c+d) abcd y $-[0] 0 +a([bc]*)(c+d) abcd y $+[0] 4 +a([bc]*)(c+d) abcd y $-[1] 1 +a([bc]*)(c+d) abcd y $+[1] 2 +a([bc]*)(c+d) abcd y $-[2] 2 +a([bc]*)(c+d) abcd y $+[2] 4 +a[bcd]*dcdcde adcdcde y $& adcdcde +a[bcd]+dcdcde adcdcde n - - +(ab|a)b*c abc y $&-$1 abc-ab +(ab|a)b*c abc y $-[0] 0 +(ab|a)b*c abc y $+[0] 3 +(ab|a)b*c abc y $-[1] 0 +(ab|a)b*c abc y $+[1] 2 +((a)(b)c)(d) abcd y $1-$2-$3-$4 abc-a-b-d +((a)(b)c)(d) abcd y $-[0] 0 +((a)(b)c)(d) abcd y $+[0] 4 +((a)(b)c)(d) abcd y $-[1] 0 +((a)(b)c)(d) abcd y $+[1] 3 +((a)(b)c)(d) abcd y $-[2] 0 +((a)(b)c)(d) abcd y $+[2] 1 +((a)(b)c)(d) abcd y $-[3] 1 +((a)(b)c)(d) abcd y $+[3] 2 +((a)(b)c)(d) abcd y $-[4] 3 +((a)(b)c)(d) abcd y $+[4] 4 +[a-zA-Z_][a-zA-Z0-9_]* alpha y $& alpha +^a(bc+|b[eh])g|.h$ abh y $&-$1 bh- +(bc+d$|ef*g.|h?i(j|k)) effgz y $&-$1-$2 effgz-effgz- +(bc+d$|ef*g.|h?i(j|k)) ij y $&-$1-$2 ij-ij-j +(bc+d$|ef*g.|h?i(j|k)) effg n - - +(bc+d$|ef*g.|h?i(j|k)) bcdd n - - +(bc+d$|ef*g.|h?i(j|k)) reffgz y $&-$1-$2 effgz-effgz- +((((((((((a)))))))))) a y $10 a +((((((((((a)))))))))) a y $-[0] 0 +((((((((((a)))))))))) a y $+[0] 1 +((((((((((a)))))))))) a y $-[10] 0 +((((((((((a)))))))))) a y $+[10] 1 +((((((((((a))))))))))\10 aa y $& aa +((((((((((a))))))))))${bang} aa n - - +((((((((((a))))))))))${bang} a! y $& a! +(((((((((a))))))))) a y $& a +multiple words of text uh-uh n - - +multiple words multiple words, yeah y $& multiple words +(.*)c(.*) abcde y $&-$1-$2 abcde-ab-de +\((.*), (.*)\) (a, b) y ($2, $1) (b, a) +[k] ab n - - +abcd abcd y $&-\$&-\\$& abcd-\$&-\\abcd +a(bc)d abcd y $1-\$1-\\$1 bc-\$1-\\bc +a[-]?c ac y $& ac +(abc)\1 abcabc y $1 abc +([a-c]*)\1 abcabc y $1 abc +\1 - c - Reference to nonexistent group +\2 - c - Reference to nonexistent group +\g1 - c - Reference to nonexistent group +\g-1 - c - Reference to nonexistent or unclosed group +\g{1} - c - Reference to nonexistent group +\g{-1} - c - Reference to nonexistent or unclosed group +\g0 - c - Reference to invalid group 0 +\g-0 - c - Reference to invalid group 0 +\g{0} - c - Reference to invalid group 0 +\g{-0} - c - Reference to invalid group 0 +(a)|\1 a y - - +(a)|\1 x n - - +(a)|\2 - c - Reference to nonexistent group +(([a-c])b*?\2)* ababbbcbc y $&-$1-$2 ababb-bb-b +(([a-c])b*?\2){3} ababbbcbc y $&-$1-$2 ababbbcbc-cbc-c +((\3|b)\2(a)x)+ aaxabxbaxbbx n - - +((\3|b)\2(a)x)+ aaaxabaxbaaxbbax y $&-$1-$2-$3 bbax-bbax-b-a +((\3|b)\2(a)){2,} bbaababbabaaaaabbaaaabba y $&-$1-$2-$3 bbaaaabba-bba-b-a +#Bug #3589 - up to perl-5.6.0 matches incorrectly, from 5.6.1 not anymore +^((.)?a\2)+$ babadad n - - +(a)|(b) b y $-[0] 0 +(a)|(b) b y $+[0] 1 +(a)|(b) b y x$-[1] x +(a)|(b) b y x$+[1] x +(a)|(b) b y $-[2] 0 +(a)|(b) b y $+[2] 1 +'abc'i ABC y $& ABC +'abc'i XBC n - - +'abc'i AXC n - - +'abc'i ABX n - - +'abc'i XABCY y $& ABC +'abc'i ABABC y $& ABC +'ab*c'i ABC y $& ABC +'ab*bc'i ABC y $& ABC +'ab*bc'i ABBC y $& ABBC +'ab*?bc'i ABBBBC y $& ABBBBC +'ab{0,}?bc'i ABBBBC y $& ABBBBC +'ab+?bc'i ABBC y $& ABBC +'ab+bc'i ABC n - - +'ab+bc'i ABQ n - - +'ab{1,}bc'i ABQ n - - +'ab+bc'i ABBBBC y $& ABBBBC +'ab{1,}?bc'i ABBBBC y $& ABBBBC +'ab{1,3}?bc'i ABBBBC y $& ABBBBC +'ab{3,4}?bc'i ABBBBC y $& ABBBBC +'ab{4,5}?bc'i ABBBBC n - - +'ab??bc'i ABBC y $& ABBC +'ab??bc'i ABC y $& ABC +'ab{0,1}?bc'i ABC y $& ABC +'ab??bc'i ABBBBC n - - +'ab??c'i ABC y $& ABC +'ab{0,1}?c'i ABC y $& ABC +'^abc$'i ABC y $& ABC +'^abc$'i ABCC n - - +'^abc'i ABCC y $& ABC +'^abc$'i AABC n - - +'abc$'i AABC y $& ABC +'^'i ABC y $& +'$'i ABC y $& +'a.c'i ABC y $& ABC +'a.c'i AXC y $& AXC +'a\Nc'i ABC y $& ABC +'a.*?c'i AXYZC y $& AXYZC +'a.*c'i AXYZD n - - +'a[bc]d'i ABC n - - +'a[bc]d'i ABD y $& ABD +'a[b-d]e'i ABD n - - +'a[b-d]e'i ACE y $& ACE +'a[b-d]'i AAC y $& AC +'a[-b]'i A- y $& A- +'a[b-]'i A- y $& A- +'a[b-a]'i - c - Invalid [] range \"b-a\" +'a[]b'i - c - Unmatched [ +'a['i - c - Unmatched [ +'a]'i A] y $& A] +'a[]]b'i A]B y $& A]B +'a[^bc]d'i AED y $& AED +'a[^bc]d'i ABD n - - +'a[^-b]c'i ADC y $& ADC +'a[^-b]c'i A-C n - - +'a[^]b]c'i A]C n - - +'a[^]b]c'i ADC y $& ADC +'ab|cd'i ABC y $& AB +'ab|cd'i ABCD y $& AB +'()ef'i DEF y $&-$1 EF- +'*a'i - c - Quantifier follows nothing +'(|*)b'i - c - Quantifier follows nothing +'(*)b'i - c - Unknown verb +'$b'i B n - - +'a\'i - c - Search pattern not terminated +'a\(b'i A(B y $&-$1 A(B- +'a\(*b'i AB y $& AB +'a\(*b'i A((B y $& A((B +'a\\b'i A\\B y $& A\\B +'abc)'i - c - Unmatched ) +'(abc'i - c - Unmatched ( +'((a))'i ABC y $&-$1-$2 A-A-A +'(a)b(c)'i ABC y $&-$1-$2 ABC-A-C +'a+b+c'i AABBABC y $& ABC +'a{1,}b{1,}c'i AABBABC y $& ABC +'a**'i - c - Nested quantifiers +'a.+?c'i ABCABC y $& ABC +'a.*?c'i ABCABC y $& ABC +'a.{0,5}?c'i ABCABC y $& ABC +'(a+|b)*'i AB y $&-$1 AB-B +'(a+|b){0,}'i AB y $&-$1 AB-B +'(a+|b)+'i AB y $&-$1 AB-B +'(a+|b){1,}'i AB y $&-$1 AB-B +'(a+|b)?'i AB y $&-$1 A-A +'(a+|b){0,1}'i AB y $&-$1 A-A +'(a+|b){0,1}?'i AB y $&-$1 - +')('i - c - Unmatched ) +'[^ab]*'i CDE y $& CDE +'abc'i n - - +'a*'i y $& +'([abc])*d'i ABBBCD y $&-$1 ABBBCD-C +'([abc])*bcd'i ABCD y $&-$1 ABCD-A +'a|b|c|d|e'i E y $& E +'(a|b|c|d|e)f'i EF y $&-$1 EF-E +'abcd*efg'i ABCDEFG y $& ABCDEFG +'ab*'i XABYABBBZ y $& AB +'ab*'i XAYABBBZ y $& A +'(ab|cd)e'i ABCDE y $&-$1 CDE-CD +'[abhgefdc]ij'i HIJ y $& HIJ +'^(ab|cd)e'i ABCDE n x$1y XY +'(abc|)ef'i ABCDEF y $&-$1 EF- +'(a|b)c*d'i ABCD y $&-$1 BCD-B +'(ab|ab*)bc'i ABC y $&-$1 ABC-A +'a([bc]*)c*'i ABC y $&-$1 ABC-BC +'a([bc]*)(c*d)'i ABCD y $&-$1-$2 ABCD-BC-D +'a([bc]+)(c*d)'i ABCD y $&-$1-$2 ABCD-BC-D +'a([bc]*)(c+d)'i ABCD y $&-$1-$2 ABCD-B-CD +'a[bcd]*dcdcde'i ADCDCDE y $& ADCDCDE +'a[bcd]+dcdcde'i ADCDCDE n - - +'(ab|a)b*c'i ABC y $&-$1 ABC-AB +'((a)(b)c)(d)'i ABCD y $1-$2-$3-$4 ABC-A-B-D +'[a-zA-Z_][a-zA-Z0-9_]*'i ALPHA y $& ALPHA +'^a(bc+|b[eh])g|.h$'i ABH y $&-$1 BH- +'(bc+d$|ef*g.|h?i(j|k))'i EFFGZ y $&-$1-$2 EFFGZ-EFFGZ- +'(bc+d$|ef*g.|h?i(j|k))'i IJ y $&-$1-$2 IJ-IJ-J +'(bc+d$|ef*g.|h?i(j|k))'i EFFG n - - +'(bc+d$|ef*g.|h?i(j|k))'i BCDD n - - +'(bc+d$|ef*g.|h?i(j|k))'i REFFGZ y $&-$1-$2 EFFGZ-EFFGZ- +'((((((((((a))))))))))'i A y $10 A +'((((((((((a))))))))))\10'i AA y $& AA +'((((((((((a))))))))))${bang}'i AA n - - +'((((((((((a))))))))))${bang}'i A! y $& A! +'(((((((((a)))))))))'i A y $& A +'(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))'i A y $1 A +'(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))'i C y $1 C +'multiple words of text'i UH-UH n - - +'multiple words'i MULTIPLE WORDS, YEAH y $& MULTIPLE WORDS +'(.*)c(.*)'i ABCDE y $&-$1-$2 ABCDE-AB-DE +'\((.*), (.*)\)'i (A, B) y ($2, $1) (B, A) +'[k]'i AB n - - +'abcd'i ABCD y $&-\$&-\\$& ABCD-\$&-\\ABCD +'a(bc)d'i ABCD y $1-\$1-\\$1 BC-\$1-\\BC +'a[-]?c'i AC y $& AC +'(abc)\1'i ABCABC y $1 ABC +'([a-c]*)\1'i ABCABC y $1 ABC +a(?!b). abad y $& ad +(?=)a a y $& a +a(?=d). abad y $& ad +a(?=c|d). abad y $& ad +a(?:b|c|d)(.) ace y $1 e +a(?:b|c|d)*(.) ace y $1 e +a(?:b|c|d)+?(.) ace y $1 e +a(?:b|c|d)+?(.) acdbcdbe y $1 d +a(?:b|c|d)+(.) acdbcdbe y $1 e +a(?:b|c|d){2}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d +((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar +:(?: - c - Sequence (? incomplete +a(?:b|c|d){6,7}(.) acdbcdbe y $1 e +a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}?(.) acdbcdbe y $1 b +a(?:b|c|d){5,7}(.) acdbcdbe y $1 e +a(?:b|c|d){5,7}?(.) acdbcdbe y $1 b +a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce +^(.+)?B AB y $1 A +^([^a-z])|(\^)$ . y $1 . +^[<>]& <&OUT y $& <& +^(a\1?){4}$ aaaaaaaaaa y $1 aaaa +^(a\1?){4}$ aaaaaaaaa n - - +^(a\1?){4}$ aaaaaaaaaaa n - - +^(a(?(1)\1)){4}$ aaaaaaaaaa y $1 aaaa +^(a(?(1)\1)){4}$ aaaaaaaaa n - - +^(a(?(1)\1)){4}$ aaaaaaaaaaa n - - +((a{4})+) aaaaaaaaa y $1 aaaaaaaa +(((aa){2})+) aaaaaaaaaa y $1 aaaaaaaa +(((a{2}){2})+) aaaaaaaaaa y $1 aaaaaaaa +(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r +(?<=a)b ab y $& b +(?<=a)b cb n - - +(?<=a)b b n - - +(?<!c)b ab y $& b +(?<!c)b cb n - - +(?<!c)b b y - - +(?<!c)b b y $& b +(?<%)b - c - Sequence (?<%...) not recognized +(?:..)*a aba y $& aba +(?:..)*?a aba y $& a +^(?:b|a(?=(.)))*\1 abc y $& ab +^(){3,5} abc y a$1 a +^(a+)*ax aax y $1 a +^((a|b)+)*ax aax y $1 a +^((a|bc)+)*ax aax y $1 a +(a|x)*ab cab y y$1 y +(a)*ab cab y y$1 y +(?:(?i)a)b ab y $& ab +((?i)a)b ab y $&:$1 ab:a +(?:(?i)a)b Ab y $& Ab +((?i)a)b Ab y $&:$1 Ab:A +(?:(?i)a)b aB n - - +((?i)a)b aB n - - +(?i:a)b ab y $& ab +((?i:a))b ab y $&:$1 ab:a +(?i:a)b Ab y $& Ab +((?i:a))b Ab y $&:$1 Ab:A +(?i:a)b aB n - - +((?i:a))b aB n - - +'(?:(?-i)a)b'i ab y $& ab +'((?-i)a)b'i ab y $&:$1 ab:a +'(?:(?-i)a)b'i aB y $& aB +'((?-i)a)b'i aB y $&:$1 aB:a +'(?:(?-i)a)b'i Ab n - - +'((?-i)a)b'i Ab n - - +'(?:(?-i)a)b'i aB y $& aB +'((?-i)a)b'i aB y $1 a +'(?:(?-i)a)b'i AB n - - +'((?-i)a)b'i AB n - - +'(?-i:a)b'i ab y $& ab +'((?-i:a))b'i ab y $&:$1 ab:a +'(?-i:a)b'i aB y $& aB +'((?-i:a))b'i aB y $&:$1 aB:a +'(?-i:a)b'i Ab n - - +'((?-i:a))b'i Ab n - - +'(?-i:a)b'i aB y $& aB +'((?-i:a))b'i aB y $1 a +'(?-i:a)b'i AB n - - +'((?-i:a))b'i AB n - - +'((?-i:a.))b'i a\nB n - - +'((?-i:a\N))b'i a\nB n - - +'((?s-i:a.))b'i a\nB y $1 a\n +'((?s-i:a\N))b'i a\nB n - - +'((?s-i:a.))b'i B\nB n - - +'((?s-i:a\N))b'i B\nB n - - +(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb +(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +'(ab)\d\1'i Ab4ab y $1 Ab +'(ab)\d\1'i ab4Ab y $1 ab +foo\w*\d{4}baz foobar1234baz y $& foobar1234baz +a(?{})b cabd y $& ab +a(?{)b - c - Sequence (?{...}) not terminated or not {}-balanced +a(?{{})b - c - Sequence (?{...}) not terminated or not {}-balanced +a(?{}})b - c - +a(?{"{"})b - c - Sequence (?{...}) not terminated or not {}-balanced +a(?{"\{"})b cabd y $& ab +a(?{"{"}})b - c - Unmatched right curly bracket +a(?{$::bl="\{"}).b caxbd y $::bl { +x(~~)*(?:(?:F)?)? x~~ y - - +^a(?#xxx){3}c aaac y $& aaac +'^a (?#xxx) (?#yyy) {3}c'x aaac y $& aaac +(?<![cd])b dbcb n - - +(?<![cd])[ab] dbaacb y $& a +(?<!(c|d))b dbcb n - - +(?<!(c|d))[ab] dbaacb y $& a +(?<!cd)[ab] cdaccb y $& b +^(?:a?b?)*$ a-- n - - +((?s)^a(.))((?m)^b$) a\nb\nc\n y $1;$2;$3 a\n;\n;b +((?m)^b$) a\nb\nc\n y $1 b +(?m)^b a\nb\n y $& b +(?m)^(b) a\nb\n y $1 b +((?m)^b) a\nb\n y $1 b +\n((?m)^b) a\nb\n y $1 b +((?s).)c(?!.) a\nb\nc\n y $1 \n +((?s).)c(?!.) a\nb\nc\n y $1:$& \n:\nc +((?s)b.)c(?!.) a\nb\nc\n y $1 b\n +((?s)b.)c(?!.) a\nb\nc\n y $1:$& b\n:b\nc +((?s)b.)c(?!\N) a\nb\nc\n y $1:$& b\n:b\nc +'(b.)c(?!\N)'s a\nb\nc\n y $1:$& b\n:b\nc +^b a\nb\nc\n n - - +()^b a\nb\nc\n n - - +((?m)^b) a\nb\nc\n y $1 b +(?(1)a|b) a n - - +(?(1)b|a) a y $& a +(x)?(?(1)a|b) a n - - +(x)?(?(1)b|a) a y $& a +()?(?(1)b|a) a y $& a +()(?(1)b|a) a n - - +()?(?(1)a|b) a y $& a +^(\()?blah(?(1)(\)))$ (blah) y $2 ) +^(\()?blah(?(1)(\)))$ blah y ($2) () +^(\()?blah(?(1)(\)))$ blah) n - - +^(\()?blah(?(1)(\)))$ (blah n - - +^(\(+)?blah(?(1)(\)))$ (blah) y $2 ) +^(\(+)?blah(?(1)(\)))$ blah y ($2) () +^(\(+)?blah(?(1)(\)))$ blah) n - - +^(\(+)?blah(?(1)(\)))$ (blah n - - +(?(1?)a|b) a c - Switch condition not recognized +(?(1)a|b|c) a c - Switch (?(condition)... contains too many branches +(?(?{0})a|b) a n - - +(?(?{0})b|a) a y $& a +(?(?{1})b|a) a n - - +(?(?{1})a|b) a y $& a +(?(?!a)a|b) a n - - +(?(?!a)b|a) a y $& a +(?(?=a)b|a) a n - - +(?(?=a)a|b) a y $& a +(?=(a+?))(\1ab) aaab y $2 aab +^(?=(a+?))\1ab aaab n - - +(\w+:)+ one: y $1 one: +$(?<=^(a)) a y $1 a +(?=(a+?))(\1ab) aaab y $2 aab +^(?=(a+?))\1ab aaab n - - +([\w:]+::)?(\w+)$ abcd: n - - +([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd +([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd +^[^bcd]*(c+) aexycd y $1 c +(a*)b+ caab y $1 aa +([\w:]+::)?(\w+)$ abcd: n - - +([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd +([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd +^[^bcd]*(c+) aexycd y $1 c +(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd y $b 3 +(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd y $b 4 +(>a+)ab aaab n - - +(?>a+)b aaab y - - +([[:]+) a:[b]: y $1 :[ +([[=]+) a=[b]= y $1 =[ +([[.]+) a.[b]. y $1 .[ +[a[:xyz: - c - Unmatched [ +[a[:xyz:] - c - POSIX class [:xyz:] unknown +[a[:]b[:c] abc y $& abc +([a[:xyz:]b]+) pbaq c - POSIX class [:xyz:] unknown +[a[:]b[:c] abc y $& abc +([[:alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd +([[:alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy +([[:ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- ${nulnul} +([[:cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${nulnul} +([[:digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01 +([[:graph:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 cd +([[:print:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:punct:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- +([[:space:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 +([[:word:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__ +([[:upper:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB +([[:xdigit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01 +([[:^alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01 +([[:^alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- ${nulnul}${ffff} +([[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} +([[:^cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:^digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd +([[:^lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB +([[:^print:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${nulnul}${ffff} +([[:^punct:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy +([[:^space:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:^word:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 -- ${nulnul}${ffff} +([[:^upper:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 cd01 +([[:^xdigit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 Xy__-- ${nulnul}${ffff} +[[:foo:]] - c - POSIX class [:foo:] unknown +[[:^foo:]] - c - POSIX class [:^foo:] unknown +((?>a+)b) aaab y $1 aaab +(?>(a+))b aaab y $1 aaa +((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x +(?<=x+)y - c - Variable length lookbehind not implemented +a{37,17} - c - Can't do {n,m} with n > m +a{37,0} - c - Can't do {n,m} with n > m +\Z a\nb\n y $-[0] 3 +\z a\nb\n y $-[0] 4 +$ a\nb\n y $-[0] 3 +\Z b\na\n y $-[0] 3 +\z b\na\n y $-[0] 4 +$ b\na\n y $-[0] 3 +\Z b\na y $-[0] 3 +\z b\na y $-[0] 3 +$ b\na y $-[0] 3 +'\Z'm a\nb\n y $-[0] 3 +'\z'm a\nb\n y $-[0] 4 +'$'m a\nb\n y $-[0] 1 +'\Z'm b\na\n y $-[0] 3 +'\z'm b\na\n y $-[0] 4 +'$'m b\na\n y $-[0] 1 +'\Z'm b\na y $-[0] 3 +'\z'm b\na y $-[0] 3 +'$'m b\na y $-[0] 1 +a\Z a\nb\n n - - +a\z a\nb\n n - - +a$ a\nb\n n - - +a\Z b\na\n y $-[0] 2 +a\z b\na\n n - - +a$ b\na\n y $-[0] 2 +a\Z b\na y $-[0] 2 +a\z b\na y $-[0] 2 +a$ b\na y $-[0] 2 +'a\Z'm a\nb\n n - - +'a\z'm a\nb\n n - - +'a$'m a\nb\n y $-[0] 0 +'a\Z'm b\na\n y $-[0] 2 +'a\z'm b\na\n n - - +'a$'m b\na\n y $-[0] 2 +'a\Z'm b\na y $-[0] 2 +'a\z'm b\na y $-[0] 2 +'a$'m b\na y $-[0] 2 +aa\Z aa\nb\n n - - +aa\z aa\nb\n n - - +aa$ aa\nb\n n - - +aa\Z b\naa\n y $-[0] 2 +aa\z b\naa\n n - - +aa$ b\naa\n y $-[0] 2 +aa\Z b\naa y $-[0] 2 +aa\z b\naa y $-[0] 2 +aa$ b\naa y $-[0] 2 +'aa\Z'm aa\nb\n n - - +'aa\z'm aa\nb\n n - - +'aa$'m aa\nb\n y $-[0] 0 +'aa\Z'm b\naa\n y $-[0] 2 +'aa\z'm b\naa\n n - - +'aa$'m b\naa\n y $-[0] 2 +'aa\Z'm b\naa y $-[0] 2 +'aa\z'm b\naa y $-[0] 2 +'aa$'m b\naa y $-[0] 2 +aa\Z ac\nb\n n - - +aa\z ac\nb\n n - - +aa$ ac\nb\n n - - +aa\Z b\nac\n n - - +aa\z b\nac\n n - - +aa$ b\nac\n n - - +aa\Z b\nac n - - +aa\z b\nac n - - +aa$ b\nac n - - +'aa\Z'm ac\nb\n n - - +'aa\z'm ac\nb\n n - - +'aa$'m ac\nb\n n - - +'aa\Z'm b\nac\n n - - +'aa\z'm b\nac\n n - - +'aa$'m b\nac\n n - - +'aa\Z'm b\nac n - - +'aa\z'm b\nac n - - +'aa$'m b\nac n - - +aa\Z ca\nb\n n - - +aa\z ca\nb\n n - - +aa$ ca\nb\n n - - +aa\Z b\nca\n n - - +aa\z b\nca\n n - - +aa$ b\nca\n n - - +aa\Z b\nca n - - +aa\z b\nca n - - +aa$ b\nca n - - +'aa\Z'm ca\nb\n n - - +'aa\z'm ca\nb\n n - - +'aa$'m ca\nb\n n - - +'aa\Z'm b\nca\n n - - +'aa\z'm b\nca\n n - - +'aa$'m b\nca\n n - - +'aa\Z'm b\nca n - - +'aa\z'm b\nca n - - +'aa$'m b\nca n - - +ab\Z ab\nb\n n - - +ab\z ab\nb\n n - - +ab$ ab\nb\n n - - +ab\Z b\nab\n y $-[0] 2 +ab\z b\nab\n n - - +ab$ b\nab\n y $-[0] 2 +ab\Z b\nab y $-[0] 2 +ab\z b\nab y $-[0] 2 +ab$ b\nab y $-[0] 2 +'ab\Z'm ab\nb\n n - - +'ab\z'm ab\nb\n n - - +'ab$'m ab\nb\n y $-[0] 0 +'ab\Z'm b\nab\n y $-[0] 2 +'ab\z'm b\nab\n n - - +'ab$'m b\nab\n y $-[0] 2 +'ab\Z'm b\nab y $-[0] 2 +'ab\z'm b\nab y $-[0] 2 +'ab$'m b\nab y $-[0] 2 +ab\Z ac\nb\n n - - +ab\z ac\nb\n n - - +ab$ ac\nb\n n - - +ab\Z b\nac\n n - - +ab\z b\nac\n n - - +ab$ b\nac\n n - - +ab\Z b\nac n - - +ab\z b\nac n - - +ab$ b\nac n - - +'ab\Z'm ac\nb\n n - - +'ab\z'm ac\nb\n n - - +'ab$'m ac\nb\n n - - +'ab\Z'm b\nac\n n - - +'ab\z'm b\nac\n n - - +'ab$'m b\nac\n n - - +'ab\Z'm b\nac n - - +'ab\z'm b\nac n - - +'ab$'m b\nac n - - +ab\Z ca\nb\n n - - +ab\z ca\nb\n n - - +ab$ ca\nb\n n - - +ab\Z b\nca\n n - - +ab\z b\nca\n n - - +ab$ b\nca\n n - - +ab\Z b\nca n - - +ab\z b\nca n - - +ab$ b\nca n - - +'ab\Z'm ca\nb\n n - - +'ab\z'm ca\nb\n n - - +'ab$'m ca\nb\n n - - +'ab\Z'm b\nca\n n - - +'ab\z'm b\nca\n n - - +'ab$'m b\nca\n n - - +'ab\Z'm b\nca n - - +'ab\z'm b\nca n - - +'ab$'m b\nca n - - +abb\Z abb\nb\n n - - +abb\z abb\nb\n n - - +abb$ abb\nb\n n - - +abb\Z b\nabb\n y $-[0] 2 +abb\z b\nabb\n n - - +abb$ b\nabb\n y $-[0] 2 +abb\Z b\nabb y $-[0] 2 +abb\z b\nabb y $-[0] 2 +abb$ b\nabb y $-[0] 2 +'abb\Z'm abb\nb\n n - - +'abb\z'm abb\nb\n n - - +'abb$'m abb\nb\n y $-[0] 0 +'abb\Z'm b\nabb\n y $-[0] 2 +'abb\z'm b\nabb\n n - - +'abb$'m b\nabb\n y $-[0] 2 +'abb\Z'm b\nabb y $-[0] 2 +'abb\z'm b\nabb y $-[0] 2 +'abb$'m b\nabb y $-[0] 2 +abb\Z ac\nb\n n - - +abb\z ac\nb\n n - - +abb$ ac\nb\n n - - +abb\Z b\nac\n n - - +abb\z b\nac\n n - - +abb$ b\nac\n n - - +abb\Z b\nac n - - +abb\z b\nac n - - +abb$ b\nac n - - +'abb\Z'm ac\nb\n n - - +'abb\z'm ac\nb\n n - - +'abb$'m ac\nb\n n - - +'abb\Z'm b\nac\n n - - +'abb\z'm b\nac\n n - - +'abb$'m b\nac\n n - - +'abb\Z'm b\nac n - - +'abb\z'm b\nac n - - +'abb$'m b\nac n - - +abb\Z ca\nb\n n - - +abb\z ca\nb\n n - - +abb$ ca\nb\n n - - +abb\Z b\nca\n n - - +abb\z b\nca\n n - - +abb$ b\nca\n n - - +abb\Z b\nca n - - +abb\z b\nca n - - +abb$ b\nca n - - +'abb\Z'm ca\nb\n n - - +'abb\z'm ca\nb\n n - - +'abb$'m ca\nb\n n - - +'abb\Z'm b\nca\n n - - +'abb\z'm b\nca\n n - - +'abb$'m b\nca\n n - - +'abb\Z'm b\nca n - - +'abb\z'm b\nca n - - +'abb$'m b\nca n - - +(^|x)(c) ca y $2 c +a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - - +a(?{$a=2;$b=3;($b)=$a})b yabz y $b 2 +round\(((?>[^()]+))\) _I(round(xs * sz),1) y $1 xs * sz +'((?x:.) )' x y $1- x - +'((?-x:.) )'x x y $1- x- +foo.bart foo.bart y - - +'^d[x][x][x]'m abcd\ndxxx y - - +.X(.+)+X bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.X(.+)+XX bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.XX(.+)+X bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.X(.+)+X bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.X(.+)+XX bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.XX(.+)+X bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.X(.+)+[X] bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.X(.+)+[X][X] bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.XX(.+)+[X] bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.X(.+)+[X] bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.X(.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.XX(.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.[X](.+)+[X] bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.[X](.+)+[X][X] bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.[X][X](.+)+[X] bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y - - +.[X](.+)+[X] bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.[X](.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +.[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa n - - +tt+$ xxxtt y - - +([a-\d]+) za-9z y $1 a-9 +([\d-z]+) a0-za y $1 0-z +([\d-\s]+) a0- z y $1 0- +([a-[:digit:]]+) za-9z y $1 a-9 +([[:digit:]-z]+) =0-z= y $1 0-z +([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z +\GX.*X aaaXbX n - - +(\d+\.\d+) 3.1415926 y $1 3.1415926 +(\ba.{0,10}br) have a web browser y $1 a web br +'\.c(pp|xx|c)?$'i Changes n - - +'\.c(pp|xx|c)?$'i IO.c y - - +'(\.c(pp|xx|c)?$)'i IO.c y $1 .c +^([a-z]:) C:/ n - - +'^\S\s+aa$'m \nx aa y - - +(^|a)b ab y - - +^([ab]*?)(b)?(c)$ abac y -$2- -- +(\w)?(abc)\1b abcab n - - +^(?:.,){2}c a,b,c y - - +^(.,){2}c a,b,c y $1 b, +^(?:[^,]*,){2}c a,b,c y - - +^([^,]*,){2}c a,b,c y $1 b, +^([^,]*,){3}d aaa,b,c,d y $1 c, +^([^,]*,){3,}d aaa,b,c,d y $1 c, +^([^,]*,){0,3}d aaa,b,c,d y $1 c, +^([^,]{1,3},){3}d aaa,b,c,d y $1 c, +^([^,]{1,3},){3,}d aaa,b,c,d y $1 c, +^([^,]{1,3},){0,3}d aaa,b,c,d y $1 c, +^([^,]{1,},){3}d aaa,b,c,d y $1 c, +^([^,]{1,},){3,}d aaa,b,c,d y $1 c, +^([^,]{1,},){0,3}d aaa,b,c,d y $1 c, +^([^,]{0,3},){3}d aaa,b,c,d y $1 c, +^([^,]{0,3},){3,}d aaa,b,c,d y $1 c, +^([^,]{0,3},){0,3}d aaa,b,c,d y $1 c, +(?i) y - - +'(?!\A)x'm a\nxb\n y - - +^(a(b)?)+$ aba y -$1-$2- -a-- +^(aa(bb)?)+$ aabbaa y -$1-$2- -aa-- +'^.{9}abc.*\n'm 123\nabcabcabcabc\n y - - +^(a)?a$ a y -$1- -- +^(a)?(?(1)a|b)+$ a n - - +^(a\1?)(a\1?)(a\2?)(a\3?)$ aaaaaa y $1,$2,$3,$4 a,aa,a,aa +^(a\1?){4}$ aaaaaa y $1 aa +^(0+)?(?:x(1))? x1 y - - +^([0-9a-fA-F]+)(?:x([0-9a-fA-F]+)?)(?:x([0-9a-fA-F]+))? 012cxx0190 y - - +^(b+?|a){1,2}c bbbac y $1 a +^(b+?|a){1,2}c bbbbac y $1 a +\((\w\. \w+)\) cd. (A. Tw) y -$1- -A. Tw- +((?:aaaa|bbbb)cccc)? aaaacccc y - - +((?:aaaa|bbbb)cccc)? bbbbcccc y - - +(a)?(a)+ a y $1:$2 :a - +(ab)?(ab)+ ab y $1:$2 :ab - +(abc)?(abc)+ abc y $1:$2 :abc - +'b\s^'m a\nb\n n - - +\ba a y - - +^(a(??{"(?!)"})|(a)(?{1}))b ab y $2 a # [ID 20010811.006] +ab(?i)cd AbCd n - - # [ID 20010809.023] +ab(?i)cd abCd y - - +(A|B)*(?(1)(CD)|(CD)) CD y $2-$3 -CD +(A|B)*(?(1)(CD)|(CD)) ABCD y $2-$3 CD- +(A|B)*?(?(1)(CD)|(CD)) CD y $2-$3 -CD # [ID 20010803.016] +(A|B)*?(?(1)(CD)|(CD)) ABCD y $2-$3 CD- +'^(o)(?!.*\1)'i Oo n - - +(.*)\d+\1 abc12bc y $1 bc +(?m:(foo\s*$)) foo\n bar y $1 foo +(.*)c abcd y $1 ab +(.*)(?=c) abcd y $1 ab +(.*)(?=c)c abcd yB $1 ab +(.*)(?=b|c) abcd y $1 ab +(.*)(?=b|c)c abcd y $1 ab +(.*)(?=c|b) abcd y $1 ab +(.*)(?=c|b)c abcd y $1 ab +(.*)(?=[bc]) abcd y $1 ab +(.*)(?=[bc])c abcd yB $1 ab +(.*)(?<=b) abcd y $1 ab +(.*)(?<=b)c abcd y $1 ab +(.*)(?<=b|c) abcd y $1 abc +(.*)(?<=b|c)c abcd y $1 ab +(.*)(?<=c|b) abcd y $1 abc +(.*)(?<=c|b)c abcd y $1 ab +(.*)(?<=[bc]) abcd y $1 abc +(.*)(?<=[bc])c abcd y $1 ab +(.*?)c abcd y $1 ab +(.*?)(?=c) abcd y $1 ab +(.*?)(?=c)c abcd yB $1 ab +(.*?)(?=b|c) abcd y $1 a +(.*?)(?=b|c)c abcd y $1 ab +(.*?)(?=c|b) abcd y $1 a +(.*?)(?=c|b)c abcd y $1 ab +(.*?)(?=[bc]) abcd y $1 a +(.*?)(?=[bc])c abcd yB $1 ab +(.*?)(?<=b) abcd y $1 ab +(.*?)(?<=b)c abcd y $1 ab +(.*?)(?<=b|c) abcd y $1 ab +(.*?)(?<=b|c)c abcd y $1 ab +(.*?)(?<=c|b) abcd y $1 ab +(.*?)(?<=c|b)c abcd y $1 ab +(.*?)(?<=[bc]) abcd y $1 ab +(.*?)(?<=[bc])c abcd y $1 ab +2(]*)?$\1 2 y $& 2 +(??{}) x y - - +a(b)?? abc y <$1> <> # undef [perl #16773] +(\d{1,3}\.){3,} 128.134.142.8 y <$1> <142.> # [perl #18019] +^.{3,4}(.+)\1\z foobarbar y $1 bar # 16 tests for [perl #23171] +^(?:f|o|b){3,4}(.+)\1\z foobarbar y $1 bar +^.{3,4}((?:b|a|r)+)\1\z foobarbar y $1 bar +^(?:f|o|b){3,4}((?:b|a|r)+)\1\z foobarbar y $1 bar +^.{3,4}(.+?)\1\z foobarbar y $1 bar +^(?:f|o|b){3,4}(.+?)\1\z foobarbar y $1 bar +^.{3,4}((?:b|a|r)+?)\1\z foobarbar y $1 bar +^(?:f|o|b){3,4}((?:b|a|r)+?)\1\z foobarbar y $1 bar +^.{2,3}?(.+)\1\z foobarbar y $1 bar +^(?:f|o|b){2,3}?(.+)\1\z foobarbar y $1 bar +^.{2,3}?((?:b|a|r)+)\1\z foobarbar y $1 bar +^(?:f|o|b){2,3}?((?:b|a|r)+)\1\z foobarbar y $1 bar +^.{2,3}?(.+?)\1\z foobarbar y $1 bar +^(?:f|o|b){2,3}?(.+?)\1\z foobarbar y $1 bar +^.{2,3}?((?:b|a|r)+?)\1\z foobarbar y $1 bar +^(?:f|o|b){2,3}?((?:b|a|r)+?)\1\z foobarbar y $1 bar +.*a(?!(b|cd)*e).*f ......abef n - - # [perl #23030] +x(?# x c - Sequence (?#... not terminated +:x(?#: x c - Sequence (?#... not terminated +(WORDS|WORD)S WORDS y $1 WORD +(X.|WORDS|X.|WORD)S WORDS y $1 WORD +(WORDS|WORLD|WORD)S WORDS y $1 WORD +(X.|WORDS|WORD|Y.)S WORDS y $1 WORD +(foo|fool|x.|money|parted)$ fool y $1 fool +(x.|foo|fool|x.|money|parted|y.)$ fool y $1 fool +(foo|fool|money|parted)$ fool y $1 fool +(foo|fool|x.|money|parted)$ fools n - - +(x.|foo|fool|x.|money|parted|y.)$ fools n - - +(foo|fool|money|parted)$ fools n - - +(a|aa|aaa||aaaa|aaaaa|aaaaaa)(b|c) aaaaaaaaaaaaaaab y $1$2 aaaaaab +(a|aa|aaa||aaaa|aaaaa|aaaaaa)(??{$1&&""})(b|c) aaaaaaaaaaaaaaab y $1$2 aaaaaab +(a|aa|aaa|aaaa|aaaaa|aaaaaa)(??{$1&&"foo"})(b|c) aaaaaaaaaaaaaaab n - - +^(a*?)(?!(aa|aaaa)*$) aaaaaaaaaaaaaaaaaaaa y $1 a # [perl #34195] +^(a*?)(?!(aa|aaaa)*$)(?=a\z) aaaaaaaa y $1 aaaaaaa +^(.)\s+.$(?(1)) A B y $1 A # [perl #37688] +(?:r?)*?r|(.{2,4}) abcde y $1 abcd +(?!)+?|(.{2,4}) abcde y $1 abcd +^(a*?)(?!(a{6}|a{5})*$) aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa y $+[1] 12 # super-linear cache bug may return 18 +^((?>(?:aa)?b)?) aab y $1 aab +^((?:aa)*)(?:X+((?:\d+|-)(?:X+(.+))?))?$ aaaaX5 y $1 aaaa +X(A|B||C|D)Y XXXYYY y $& XY # Trie w/ NOTHING +(?i:X([A]|[B]|y[Y]y|[D]|)Y) XXXYYYB y $& XY # Trie w/ NOTHING +^([a]{1})*$ aa y $1 a +a(?!b(?!c))(..) abababc y $1 bc # test nested negatives +a(?!b(?=a))(..) abababc y $1 bc # test nested lookaheads +a(?!b(?!c(?!d(?!e))))...(.) abxabcdxabcde y $1 e +X(?!b+(?!(c+)*(?!(c+)*d))).*X aXbbbbbbbcccccccccccccaaaX y - - +^(XXXXXXXXXX|YYYYYYYYYY|Z.Q*X|Z[TE]Q*P): ZEQQQQQQQQQQQQQQQQQQP: y $1 ZEQQQQQQQQQQQQQQQQQQP +^(XXXXXXXXXX|YYYYYYYYYY|Z.Q*X|Z[TE]Q*P): ZEQQQX: y $1 ZEQQQX +^([TUV]+|XXXXXXXXXX|YYYYYYYYYY|Z.Q*X|Z[TE]Q*P): ZEQQQQQQQQQQQQQQQQQQP: y $1 ZEQQQQQQQQQQQQQQQQQQP +^([TUV]+|XXXXXXXXXX|YYYYYYYYYY|Z.Q*X|Z[TE]Q*P): ZEQQQX: y $1 ZEQQQX +^([TUV]+|XXXXXXXXXX|YYYYYYYYYY|Z.Q*X|Z[TE]Q*P|[MKJ]): ZEQQQQQQQQQQQQQQQQQQP: y $1 ZEQQQQQQQQQQQQQQQQQQP +^([TUV]+|XXXXXXXXXX|YYYYYYYYYY|Z.Q*X|Z[TE]Q*P|[MKJ]): ZEQQQX: y $1 ZEQQQX +^(XXX|YYY|Z.Q*X|Z[TE]Q*P): ZEQQQQQQQQQQQQQQQQQQP: y $1 ZEQQQQQQQQQQQQQQQQQQP +^(XXX|YYY|Z.Q*X|Z[TE]Q*P): ZEQQQX: y $1 ZEQQQX +^([TUV]+|XXX|YYY|Z.Q*X|Z[TE]Q*P): ZEQQQQQQQQQQQQQQQQQQP: y $1 ZEQQQQQQQQQQQQQQQQQQP +^([TUV]+|XXX|YYY|Z.Q*X|Z[TE]Q*P): ZEQQQX: y $1 ZEQQQX +^([TUV]+|XXX|YYY|Z.Q*X|Z[TE]Q*P|[MKJ]): ZEQQQQQQQQQQQQQQQQQQP: y $1 ZEQQQQQQQQQQQQQQQQQQP +^([TUV]+|XXX|YYY|Z.Q*X|Z[TE]Q*P|[MKJ]): ZEQQQX: y $1 ZEQQQX +X(?:ABCF[cC]x*|ABCD|ABCF):(?:DIT|DID|DIM) XABCFCxxxxxxxxxx:DIM y $& XABCFCxxxxxxxxxx:DIM +(((ABCD|ABCE|ABCF)))(A|B|C[xy]*): ABCFCxxxxxxxxxx:DIM y $& ABCFCxxxxxxxxxx: +(?=foo) foo y pos 0 +(?=foo) XfooY y pos 1 +.*(?=foo) XfooY y pos 1 +(?<=foo) foo y pos 3 +(?<=foo) XfooY y pos 4 +.*(?<=foo) foo y pos 3 +.*(?<=foo) XfooY y pos 4 +(?<=foo)Y XfooY y pos 5 +o(?<=foo)Y ..XfooY.. y pos 7 +X(?=foo)f ..XfooY.. y pos 4 +X(?=foo) ..XfooY.. y pos 3 +X(?<=foo.)[YZ] ..XfooXY.. y pos 8 +(?=XY*foo) Xfoo y pos 0 +^(?=XY*foo) Xfoo y pos 0 +^(??{"a+"})a aa y $& aa +^(?:(??{"a+"})|b)a aa y $& aa +^(??{chr 0x100}).$ \x{100}\x{100} y $& \x{100}\x{100} +^(??{q(\x{100})}). \x{100}\x{100} y $& \x{100}\x{100} +^(??{q(.+)})\x{100} \x{100}\x{100} y $& \x{100}\x{100} +^(??{q(.)})\x{100} \x{100}\x{100} y $& \x{100}\x{100} +^(??{chr 0x100})\xbb \x{100}\x{bb} y $& \x{100}\x{bb} +^(.)(??{"(.)(.)"})(.)$ abcd y $1-$2 a-d +^(.)(??{"(bz+|.)(.)"})(.)$ abcd y $1-$2 a-d +^(.)((??{"(.)(cz+)"})|.) abcd y $1-$2 a-b +^a(?>(??{q(b)}))(??{q(c)})d abcd y - - +^x(??{""})+$ x y $& x +^(<(?:[^<>]+|(?3)|(?1))*>)()(!>!>!>)$ <<!>!>!>><>>!>!>!> y $1 <<!>!>!>><>> +^(<(?:[^<>]+|(?1))*>)$ <<><<<><>>>> y $1 <<><<<><>>>> +((?2)*)([fF]o+) fooFoFoo y $1-$2 fooFo-Foo +(<(?:[^<>]+|(?R))*>) <<><<<><>>>> y $1 <<><<<><>>>> +(?<n>foo|bar|baz) snofooewa y $1 foo +(?<n>foo|bar|baz) snofooewa y $+{n} foo +(?<n>foo|bar|baz)(?<m>[ew]+) snofooewa y $+{n} foo +(?<n>foo|bar|baz)(?<m>[ew]+) snofooewa y $+{m} ew +(?<n>foo)|(?<n>bar)|(?<n>baz) snofooewa y $+{n} foo +(?<n>foo)(??{ $+{n} }) snofooefoofoowaa y $+{n} foo +(?P<n>foo|bar|baz) snofooewa y $1 foo +(?P<n>foo|bar|baz) snofooewa y $+{n} foo +(?P<n>foo|bar|baz)(?P<m>[ew]+) snofooewa y $+{n} foo +(?P<n>foo|bar|baz)(?P<m>[ew]+) snofooewa y $+{m} ew +(?P<n>foo)|(?P<n>bar)|(?P<n>baz) snofooewa y $+{n} foo +(?P<n>foo)(??{ $+{n} }) snofooefoofoowaa y $+{n} foo +(?P<=n>foo|bar|baz) snofooewa c - Sequence (?P<=...) not recognized +(?P<!n>foo|bar|baz) snofooewa c - Sequence (?P<!...) not recognized +(?PX<n>foo|bar|baz) snofooewa c - Sequence (?PX<...) not recognized +/(?'n'foo|bar|baz)/ snofooewa y $1 foo +/(?'n'foo|bar|baz)/ snofooewa y $+{n} foo +/(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa y $+{n} foo +/(?'n'foo|bar|baz)(?'m'[ew]+)/ snofooewa y $+{m} ew +/(?'n'foo)|(?'n'bar)|(?<n>baz)/ snobazewa y $+{n} baz +/(?'n'foo)(??{ $+{n} })/ snofooefoofoowaa y $+{n} foo +/(?'n'foo)\k<n>/ ..foofoo.. y $1 foo +/(?'n'foo)\k<n>/ ..foofoo.. y $+{n} foo +/(?<n>foo)\k'n'/ ..foofoo.. y $1 foo +/(?<n>foo)\k'n'/ ..foofoo.. y $+{n} foo +/(?:(?<n>foo)|(?<n>bar))\k<n>/ ..barbar.. y $+{n} bar +/^(?'main'<(?:[^<>]+|(?&crap)|(?&main))*>)(?'empty')(?'crap'!>!>!>)$/ <<!>!>!>><>>!>!>!> y $+{main} <<!>!>!>><>> +/^(?'main'<(?:[^<>]+|(?&main))*>)$/ <<><<<><>>>> y $1 <<><<<><>>>> +/(?'first'(?&second)*)(?'second'[fF]o+)/ fooFoFoo y $+{first}-$+{second} fooFo-Foo +(?<A>foo)?(?(<A>)bar|nada) foobar y $+{A} foo +(?<A>foo)?(?(<A>)bar|nada) foo-barnada y $& nada +(?<A>foo)?(?(1)bar|nada) foo-barnada y $& nada +(?<A>foo(?(R)bar))?(?1) foofoobar y $1 foo +(?<A>foo(?(R)bar))?(?1) foofoobar y $& foofoobar +(x)(?<A>foo(?(R&A)bar))?(?&A) xfoofoobar y $2 foo +(x)(?<A>foo(?(R&A)bar))?(?&A) xfoofoobar y $& xfoofoobar +(x)(?<A>foo(?(R2)bar))?(?&A) xfoofoobar y $2 foo +(x)(?<A>foo(?(R2)bar))?(?&A) xfoofoobar y $& xfoofoobar +(?1)(?(DEFINE)(blah)) blah y $& blah +/^(?<PAL>(?<CHAR>.)((?&PAL)|.?)\k<CHAR>)$/ madamimadam y $& madamimadam +/^(?<PAL>(?<CHAR>.)((?&PAL)|.?)\k<CHAR>)$/ madamiamadam n - - +/(a)?((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a)*((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a)+((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a){1,100}((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(a){0,100}((?1))(fox)/ aafox y $1-$2-$3 a-a-fox +/(ab)?((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox +/(ab)*((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox +/(ab)+((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox +/(ab){1,100}((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox +/(ab){0,100}((?1))(fox)/ ababfox y $1-$2-$3 ab-ab-fox +# possessive captures +a++a aaaaa n - - +a*+a aaaaa n - - +a{1,5}+a aaaaa n - - +a?+a ab n - - +a++b aaaaab y $& aaaaab +a*+b aaaaab y $& aaaaab +a{1,5}+b aaaaab y $& aaaaab +a?+b ab y $& ab +fooa++a fooaaaaa n - - +fooa*+a fooaaaaa n - - +fooa{1,5}+a fooaaaaa n - - +fooa?+a fooab n - - +fooa++b fooaaaaab y $& fooaaaaab +fooa*+b fooaaaaab y $& fooaaaaab +fooa{1,5}+b fooaaaaab y $& fooaaaaab +fooa?+b fooab y $& fooab +(?:aA)++(?:aA) aAaAaAaAaA n - aAaAaAaAaA +(aA)++(aA) aAaAaAaAaA n - aAaAaAaAaA +(aA|bB)++(aA|bB) aAaAbBaAbB n - aAaAbBaAbB +(?:aA|bB)++(?:aA|bB) aAbBbBbBaA n - aAbBbBbBaA +(?:aA)*+(?:aA) aAaAaAaAaA n - aAaAaAaAaA +(aA)*+(aA) aAaAaAaAaA n - aAaAaAaAaA +(aA|bB)*+(aA|bB) aAaAbBaAaA n - aAaAbBaAaA +(?:aA|bB)*+(?:aA|bB) aAaAaAbBaA n - aAaAaAbBaA +(?:aA){1,5}+(?:aA) aAaAaAaAaA n - aAaAaAaAaA +(aA){1,5}+(aA) aAaAaAaAaA n - aAaAaAaAaA +(aA|bB){1,5}+(aA|bB) aAaAbBaAaA n - aAaAbBaAaA +(?:aA|bB){1,5}+(?:aA|bB) bBbBbBbBbB n - bBbBbBbBbB +(?:aA)?+(?:aA) aAb n - aAb +(aA)?+(aA) aAb n - aAb +(aA|bB)?+(aA|bB) bBb n - bBb +(?:aA|bB)?+(?:aA|bB) aAb n - aAb +(?:aA)++b aAaAaAaAaAb y $& aAaAaAaAaAb +(aA)++b aAaAaAaAaAb y $& aAaAaAaAaAb +(aA|bB)++b aAbBaAaAbBb y $& aAbBaAaAbBb +(?:aA|bB)++b aAbBbBaAaAb y $& aAbBbBaAaAb +(?:aA)*+b aAaAaAaAaAb y $& aAaAaAaAaAb +(aA)*+b aAaAaAaAaAb y $& aAaAaAaAaAb +(aA|bB)*+b bBbBbBbBbBb y $& bBbBbBbBbBb +(?:aA|bB)*+b bBaAbBbBaAb y $& bBaAbBbBaAb +(?:aA){1,5}+b aAaAaAaAaAb y $& aAaAaAaAaAb +(aA){1,5}+b aAaAaAaAaAb y $& aAaAaAaAaAb +(aA|bB){1,5}+b bBaAbBaAbBb y $& bBaAbBaAbBb +(?:aA|bB){1,5}+b aAbBaAbBbBb y $& aAbBaAbBbBb +(?:aA)?+b aAb y $& aAb +(aA)?+b aAb y $& aAb +(aA|bB)?+b bBb y $& bBb +(?:aA|bB)?+b bBb y $& bBb +foo(?:aA)++(?:aA) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(aA)++(aA) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(aA|bB)++(aA|bB) foobBbBbBaAaA n - foobBbBbBaAaA +foo(?:aA|bB)++(?:aA|bB) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(?:aA)*+(?:aA) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(aA)*+(aA) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(aA|bB)*+(aA|bB) foobBaAbBaAaA n - foobBaAbBaAaA +foo(?:aA|bB)*+(?:aA|bB) fooaAaAbBbBaA n - fooaAaAbBbBaA +foo(?:aA){1,5}+(?:aA) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(aA){1,5}+(aA) fooaAaAaAaAaA n - fooaAaAaAaAaA +foo(aA|bB){1,5}+(aA|bB) fooaAbBbBaAaA n - fooaAbBbBaAaA +foo(?:aA|bB){1,5}+(?:aA|bB) fooaAbBbBaAbB n - fooaAbBbBaAbB +foo(?:aA)?+(?:aA) fooaAb n - fooaAb +foo(aA)?+(aA) fooaAb n - fooaAb +foo(aA|bB)?+(aA|bB) foobBb n - foobBb +foo(?:aA|bB)?+(?:aA|bB) fooaAb n - fooaAb +foo(?:aA)++b fooaAaAaAaAaAb y $& fooaAaAaAaAaAb +foo(aA)++b fooaAaAaAaAaAb y $& fooaAaAaAaAaAb +foo(aA|bB)++b foobBaAbBaAbBb y $& foobBaAbBaAbBb +foo(?:aA|bB)++b fooaAaAbBaAaAb y $& fooaAaAbBaAaAb +foo(?:aA)*+b fooaAaAaAaAaAb y $& fooaAaAaAaAaAb +foo(aA)*+b fooaAaAaAaAaAb y $& fooaAaAaAaAaAb +foo(aA|bB)*+b foobBbBaAaAaAb y $& foobBbBaAaAaAb +foo(?:aA|bB)*+b foobBaAaAbBaAb y $& foobBaAaAbBaAb +foo(?:aA){1,5}+b fooaAaAaAaAaAb y $& fooaAaAaAaAaAb +foo(aA){1,5}+b fooaAaAaAaAaAb y $& fooaAaAaAaAaAb +foo(aA|bB){1,5}+b foobBaAaAaAaAb y $& foobBaAaAaAaAb +foo(?:aA|bB){1,5}+b fooaAbBaAbBbBb y $& fooaAbBaAbBbBb +foo(?:aA)?+b fooaAb y $& fooaAb +foo(aA)?+b fooaAb y $& fooaAb +foo(aA|bB)?+b foobBb y $& foobBb +foo(?:aA|bB)?+b foobBb y $& foobBb + +([^()]++|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x +round\(([^()]++)\) _I(round(xs * sz),1) y $1 xs * sz + +(foo[1x]|bar[2x]|baz[3x])+y foo1bar2baz3y y $1 baz3 +(foo[1x]|bar[2x]|baz[3x])+y foo1bar2baz3y y $& foo1bar2baz3y +(foo[1x]|bar[2x]|baz[3x])*y foo1bar2baz3y y $1 baz3 +(foo[1x]|bar[2x]|baz[3x])*y foo1bar2baz3y y $& foo1bar2baz3y + +([yX].|WORDS|[yX].|WORD)S WORDS y $1 WORD +(WORDS|WORLD|WORD)S WORDS y $1 WORD +([yX].|WORDS|WORD|[xY].)S WORDS y $1 WORD +(foo|fool|[zx].|money|parted)$ fool y $1 fool +([zx].|foo|fool|[zq].|money|parted|[yx].)$ fool y $1 fool +(foo|fool|[zx].|money|parted)$ fools n - - +([zx].|foo|fool|[qx].|money|parted|[py].)$ fools n - - + +([yX].|WORDS|[yX].|WORD)+S WORDS y $1 WORD +(WORDS|WORLD|WORD)+S WORDS y $1 WORD +([yX].|WORDS|WORD|[xY].)+S WORDS y $1 WORD +(foo|fool|[zx].|money|parted)+$ fool y $1 fool +([zx].|foo|fool|[zq].|money|parted|[yx].)+$ fool y $1 fool +(foo|fool|[zx].|money|parted)+$ fools n - - +([zx].|foo|fool|[qx].|money|parted|[py].)+$ fools n - - + +(x|y|z[QW])+(longish|loquatious|excessive|overblown[QW])+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW])*(longish|loquatious|excessive|overblown[QW])* xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW]){1,5}(longish|loquatious|excessive|overblown[QW]){1,5} xyzQzWlongishoverblownW y $1-$2 zW-overblownW + +(x|y|z[QW])++(longish|loquatious|excessive|overblown[QW])++ xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW])*+(longish|loquatious|excessive|overblown[QW])*+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW]){1,5}+(longish|loquatious|excessive|overblown[QW]){1,5}+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW + +a*(?!) aaaab n - - +a*(*FAIL) aaaab n - - +a*(*F) aaaab n - - + +(A(A|B(*ACCEPT)|C)D)(E) AB y $1 AB +(A(A|B(*ACCEPT)|C)D)(E) ACDE y $1$2$3 ACDCE + +(a)(?:(?-1)|(?+1))(b) aab y $&-$1-$2 aab-a-b +(a)(?:(?-1)|(?+1))(b) abb y $1-$2 a-b +(a)(?:(?-1)|(?+1))(b) acb n - - + +(foo)(\g-2) foofoo y $1-$2 foo-foo +(foo)(\g-2)(foo)(\g-2) foofoofoofoo y $1-$2-$3-$4 foo-foo-foo-foo +(([abc]+) \g-1)(([abc]+) \g{-1}) abc abccba cba y $2-$4 abc-cba +(a)(b)(c)\g1\g2\g3 abcabc y $1$2$3 abc + +# \k<n> preceded by a literal +/(?'n'foo) \k<n>/ ..foo foo.. y $1 foo +/(?'n'foo) \k<n>/ ..foo foo.. y $+{n} foo +/(?<n>foo) \k'n'/ ..foo foo.. y $1 foo +/(?<n>foo) \k'n'/ ..foo foo.. y $+{n} foo +/(?'a1'foo) \k'a1'/ ..foo foo.. y $+{a1} foo +/(?<a1>foo) \k<a1>/ ..foo foo.. y $+{a1} foo +/(?'_'foo) \k'_'/ ..foo foo.. y $+{_} foo +/(?<_>foo) \k<_>/ ..foo foo.. y $+{_} foo +/(?'_0_'foo) \k'_0_'/ ..foo foo.. y $+{_0_} foo +/(?<_0_>foo) \k<_0_>/ ..foo foo.. y $+{_0_} foo +/(?'0'foo) bar/ ..foo bar.. c - Sequence (?' +/(?<0>foo) bar/ ..foo bar.. c - Sequence (?< +/(?'12'foo) bar/ ..foo bar.. c - Sequence (?' +/(?<12>foo) bar/ ..foo bar.. c - Sequence (?< +/(?'1a'foo) bar/ ..foo bar.. c - Sequence (?' +/(?<1a>foo) bar/ ..foo bar.. c - Sequence (?< +/(?''foo) bar/ ..foo bar.. c - Sequence (?'' +/(?<>foo) bar/ ..foo bar.. c - Sequence (?<> +/foo \k'n'/ foo foo c - Reference to nonexistent named group +/foo \k<n>/ foo foo c - Reference to nonexistent named group +/foo \k'a1'/ foo foo c - Reference to nonexistent named group +/foo \k<a1>/ foo foo c - Reference to nonexistent named group +/foo \k'_'/ foo foo c - Reference to nonexistent named group +/foo \k<_>/ foo foo c - Reference to nonexistent named group +/foo \k'_0_'/ foo foo c - Reference to nonexistent named group +/foo \k<_0_>/ foo foo c - Reference to nonexistent named group +/foo \k'0'/ foo foo c - Sequence \\k' +/foo \k<0>/ foo foo c - Sequence \\k< +/foo \k'12'/ foo foo c - Sequence \\k' +/foo \k<12>/ foo foo c - Sequence \\k< +/foo \k'1a'/ foo foo c - Sequence \\k' +/foo \k<1a>/ foo foo c - Sequence \\k< +/foo \k''/ foo foo c - Sequence \\k' +/foo \k<>/ foo foo c - Sequence \\k< +/(?<as>as) (\w+) \k<as> (\w+)/ as easy as pie y $1-$2-$3 as-easy-pie + +# \g{...} with a name as the argument +/(?'n'foo) \g{n}/ ..foo foo.. y $1 foo +/(?'n'foo) \g{n}/ ..foo foo.. y $+{n} foo +/(?<n>foo) \g{n}/ ..foo foo.. y $1 foo +/(?<n>foo) \g{n}/ ..foo foo.. y $+{n} foo +/(?<as>as) (\w+) \g{as} (\w+)/ as easy as pie y $1-$2-$3 as-easy-pie + +# Python style named capture buffer stuff +/(?P<n>foo)(?P=n)/ ..foofoo.. y $1 foo +/(?P<n>foo)(?P=n)/ ..foofoo.. y $+{n} foo +/(?:(?P<n>foo)|(?P<n>bar))(?P=n)/ ..barbar.. y $+{n} bar +/^(?P<PAL>(?P<CHAR>.)((?P>PAL)|.?)(?P=CHAR))$/ madamimadam y $& madamimadam +/^(?P<PAL>(?P<CHAR>.)((?P>PAL)|.?)(?P=CHAR))$/ madamiamadam n - - +/(?P<n>foo) (?P=n)/ ..foo foo.. y $1 foo +/(?P<n>foo) (?P=n)/ ..foo foo.. y $+{n} foo +/(?P<as>as) (\w+) (?P=as) (\w+)/ as easy as pie y $1-$2-$3 as-easy-pie + +#check that non identifiers as names are treated as the appropriate lookaround +(?<=bar>)foo bar>foo y $& foo +(?<!bar>)foo bar>foo n - - +(?<=bar>ABC)foo bar>ABCfoo y $& foo +(?<!bar>ABC)foo bar>ABCfoo n - - +(?<bar>)foo bar>ABCfoo y $& foo +(?<bar>ABC)foo bar>ABCfoo y $& ABCfoo + +(?<=abcd(?<=(aaaabcd))) ..aaaabcd.. y $1 aaaabcd +(?=xy(?<=(aaxy))) ..aaxy.. y $1 aaxy + +X(\w+)(?=\s)|X(\w+) Xab y [$1-$2] [-ab] + +#check that branch reset works ok. +(?|(a)) a y $1-$+-$^N a-a-a +(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) d!o!da y $1-$2-$3 !o!-o-a +(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) aabc y $1-$2-$3 a--c +(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) ixyjp y $1-$2-$3 x-y-p +(?|(?|(a)|(b))|(?|(c)|(d))) a y $1 a +(?|(?|(a)|(b))|(?|(c)|(d))) b y $1 b +(?|(?|(a)|(b))|(?|(c)|(d))) c y $1 c +(?|(?|(a)|(b))|(?|(c)|(d))) d y $1 d +(.)(?|(.)(.)x|(.)d)(.) abcde y $1-$2-$3-$4-$5- b-c--e-- +(\N)(?|(\N)(\N)x|(\N)d)(\N) abcde y $1-$2-$3-$4-$5- b-c--e-- +(?|(?<foo>x)) x y $+{foo} x +(?|(?<foo>x)|(?<bar>y)) x y $+{foo} x +(?|(?<bar>y)|(?<foo>x)) x y $+{foo} x +(?<bar>)(?|(?<foo>x)) x y $+{foo} x + +#Bug #41492 +(?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A) a y $& a +(?(DEFINE)(?<A>(?&B)+)(?<B>a))(?&A) aa y $& aa +\x{100}?(??{""})xxx xxx y $& xxx + +foo(\R)bar foo\r\nbar y $1 \r\n +foo(\R)bar foo\nbar y $1 \n +foo(\R)bar foo\rbar y $1 \r + +foo(\R+)bar foo\r\n\x{85}\r\n\nbar y $1 \r\n\x{85}\r\n\n +(\V+)(\R) foo\r\n\x{85}\r\n\nbar y $1-$2 foo-\r\n +(\R+)(\V) foo\r\n\x{85}\r\n\nbar y $1-$2 \r\n\x{85}\r\n\n-b +foo(\R)bar foo\x{85}bar y $1 \x{85} +(\V)(\R) foo\x{85}bar y $1-$2 o-\x{85} +(\R)(\V) foo\x{85}bar y $1-$2 \x{85}-b +foo(\R)bar foo\r\nbar y $1 \r\n +(\V)(\R) foo\r\nbar y $1-$2 o-\r\n +(\R)(\V) foo\r\nbar y $1-$2 \r\n-b +foo(\R)bar foo\r\nbar y $1 \r\n +(\V)(\R) foo\r\nbar y $1-$2 o-\r\n +(\R)(\V) foo\r\nbar y $1-$2 \r\n-b +foo(\R)bar foo\rbar y $1 \r +(\V)(\R) foo\rbar y $1-$2 o-\r +(\R)(\V) foo\rbar y $1-$2 \r-b + +foo(\v+)bar foo\r\n\x{85}\r\n\nbar y $1 \r\n\x{85}\r\n\n +(\V+)(\v) foo\r\n\x{85}\r\n\nbar y $1-$2 foo-\r +(\v+)(\V) foo\r\n\x{85}\r\n\nbar y $1-$2 \r\n\x{85}\r\n\n-b +foo(\v)bar foo\x{85}bar y $1 \x{85} +(\V)(\v) foo\x{85}bar y $1-$2 o-\x{85} +(\v)(\V) foo\x{85}bar y $1-$2 \x{85}-b +foo(\v)bar foo\rbar y $1 \r +(\V)(\v) foo\rbar y $1-$2 o-\r +(\v)(\V) foo\rbar y $1-$2 \r-b + + +foo(\h+)bar foo\t\x{A0}bar y $1 \t\x{A0} +(\H+)(\h) foo\t\x{A0}bar y $1-$2 foo-\t +(\h+)(\H) foo\t\x{A0}bar y $1-$2 \t\x{A0}-b +foo(\h)bar foo\x{A0}bar y $1 \x{A0} +(\H)(\h) foo\x{A0}bar y $1-$2 o-\x{A0} +(\h)(\H) foo\x{A0}bar y $1-$2 \x{A0}-b +foo(\h)bar foo\tbar y $1 \t +(\H)(\h) foo\tbar y $1-$2 o-\t +(\h)(\H) foo\tbar y $1-$2 \t-b + +.*\z foo\n y -$&- -- +\N*\z foo\n y -$&- -- +.*\Z foo\n y -$&- -foo- +\N*\Z foo\n y -$&- -foo- +^(?:(\d)x)?\d$ 1 y ${\(defined($1)?1:0)} 0 +.*?(?:(\w)|(\w))x abx y $1-$2 b- + +0{50} 000000000000000000000000000000000000000000000000000 y - - +^a?(?=b)b ab y $& ab # Bug #56690 +^a*(?=b)b ab y $& ab # Bug #56690 +/>\d+$ \n/ix >10\n y $& >10 +/>\d+$ \n/ix >1\n y $& >1 +/\d+$ \n/ix >10\n y $& 10 +/>\d\d$ \n/ix >10\n y $& >10 +/>\d+$ \n/x >10\n y $& >10 + +# Two regressions in 5.8.x (only) introduced by change 30638 +# Simplification of the test failure in XML::LibXML::Simple: +/^\s*i.*?o\s*$/s io\n io y - - +# As reported in #59168 by Father Chrysostomos: +/(.*?)a(?!(a+)b\2c)/ baaabaac y $&-$1 baa-ba +# [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10 +/\A(?(?=db2)db2|\D+)(?<!processed)\.csv\z/xms sql_processed.csv n - - +/\N{U+0100}/ \x{100} y $& \x{100} # Bug #59328 +[\s][\S] \x{a0}\x{a0} n - - # Unicode complements should not match same character + +# was generating malformed utf8 +'[\x{100}\xff]'i \x{ff} y $& \x{ff} + +((??{ "(?:|)" }))\s C\x20 y - - diff --git a/t/re/reg_60508.t b/t/re/reg_60508.t new file mode 100644 index 0000000000..e370ca0071 --- /dev/null +++ b/t/re/reg_60508.t @@ -0,0 +1,40 @@ +#!./perl + +# This is a test for [perl #60508] which I can't figure out where else +# to put it or what the underlying problem is, but it has to go somewhere. +# --Schwern + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +use utf8; +plan tests => 1; + +{ + my $expect = <<"EXPECT"; +k1 = .... +k2.1 = >\x{2022} +k2.2 = \x{2022} +EXPECT + utf8::encode($expect); + + #local $TODO = "[perl #60508]"; + + fresh_perl_is(<<'CODE', $expect, {}); +binmode STDOUT, ":utf8"; +sub f { $_[0] =~ s/([>X])//g; } + +$k1 = "." x 4 . ">>"; +f($k1); +print "k1 = $k1\n"; + +$k2 = "\x{f1}\x{2022}"; +$k2 =~ s/([\360-\362])/>/g; +print "k2.1 = $k2\n"; +f($k2); +print "k2.2 = $k2\n"; +CODE +} diff --git a/t/re/reg_email.t b/t/re/reg_email.t new file mode 100644 index 0000000000..6255ee3d72 --- /dev/null +++ b/t/re/reg_email.t @@ -0,0 +1,103 @@ +#!./perl +# +# Tests to make sure the regexp engine doesn't run into limits too soon. +# + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +print "1..13\n"; + +my $email = qr { + (?(DEFINE) + (?<address> (?&mailbox) | (?&group)) + (?<mailbox> (?&name_addr) | (?&addr_spec)) + (?<name_addr> (?&display_name)? (?&angle_addr)) + (?<angle_addr> (?&CFWS)? < (?&addr_spec) > (?&CFWS)?) + (?<group> (?&display_name) : (?:(?&mailbox_list) | (?&CFWS))? ; + (?&CFWS)?) + (?<display_name> (?&phrase)) + (?<mailbox_list> (?&mailbox) (?: , (?&mailbox))*) + + (?<addr_spec> (?&local_part) \@ (?&domain)) + (?<local_part> (?&dot_atom) | (?"ed_string)) + (?<domain> (?&dot_atom) | (?&domain_literal)) + (?<domain_literal> (?&CFWS)? \[ (?: (?&FWS)? (?&dcontent))* (?&FWS)? + \] (?&CFWS)?) + (?<dcontent> (?&dtext) | (?"ed_pair)) + (?<dtext> (?&NO_WS_CTL) | [\x21-\x5a\x5e-\x7e]) + + (?<atext> (?&ALPHA) | (?&DIGIT) | [!#\$%&'*+-/=?^_`{|}~]) + (?<atom> (?&CFWS)? (?&atext)+ (?&CFWS)?) + (?<dot_atom> (?&CFWS)? (?&dot_atom_text) (?&CFWS)?) + (?<dot_atom_text> (?&atext)+ (?: \. (?&atext)+)*) + + (?<text> [\x01-\x09\x0b\x0c\x0e-\x7f]) + (?<quoted_pair> \\ (?&text)) + + (?<qtext> (?&NO_WS_CTL) | [\x21\x23-\x5b\x5d-\x7e]) + (?<qcontent> (?&qtext) | (?"ed_pair)) + (?<quoted_string> (?&CFWS)? (?&DQUOTE) (?:(?&FWS)? (?&qcontent))* + (?&FWS)? (?&DQUOTE) (?&CFWS)?) + + (?<word> (?&atom) | (?"ed_string)) + (?<phrase> (?&word)+) + + # Folding white space + (?<FWS> (?: (?&WSP)* (?&CRLF))? (?&WSP)+) + (?<ctext> (?&NO_WS_CTL) | [\x21-\x27\x2a-\x5b\x5d-\x7e]) + (?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment)) + (?<comment> \( (?: (?&FWS)? (?&ccontent))* (?&FWS)? \) ) + (?<CFWS> (?: (?&FWS)? (?&comment))* + (?: (?:(?&FWS)? (?&comment)) | (?&FWS))) + + # No whitespace control + (?<NO_WS_CTL> [\x01-\x08\x0b\x0c\x0e-\x1f\x7f]) + + (?<ALPHA> [A-Za-z]) + (?<DIGIT> [0-9]) + (?<CRLF> \x0d \x0a) + (?<DQUOTE> ") + (?<WSP> [\x20\x09]) + ) + + (?&address) +}x; + + +run_tests() unless caller; + +sub run_tests { + my $count = 0; + + $| = 1; + # rewinding DATA is necessary with PERLIO=stdio when this + # test is run from another thread + seek *DATA, 0, 0; + while (<DATA>) { last if /^__DATA__/ } + while (<DATA>) { + chomp; + next if /^#/; + print /^$email$/ ? "ok " : "not ok ", ++ $count, "\n"; + } +} + +# +# Acme::MetaSyntactic ++ +# +__DATA__ +Jeff_Tracy@thunderbirds.org +"Lady Penelope"@thunderbirds.org +"The\ Hood"@thunderbirds.org +fred @ flintstones.net +barney (rubble) @ flintstones.org +bammbamm (bam! bam! (bam! bam! (bam!)) bam!) @ flintstones.org +Michelangelo@[127.0.0.1] +Donatello @ [127.0.0.1] +Raphael (He as well) @ [127.0.0.1] +"Leonardo" @ [127.0.0.1] +Barbapapa <barbapapa @ barbapapa.net> +"Barba Mama" <barbamama @ [127.0.0.1]> +Barbalala (lalalalalalalala) <barbalala (Yes, her!) @ (barba) barbapapa.net> diff --git a/t/re/reg_email_thr.t b/t/re/reg_email_thr.t new file mode 100644 index 0000000000..8eafc0588d --- /dev/null +++ b/t/re/reg_email_thr.t @@ -0,0 +1,7 @@ +#!./perl + +chdir 't' if -d 't'; +@INC = ('../lib', '.'); + +require 'thread_it.pl'; +thread_it(qw(op reg_email.t)); diff --git a/t/re/reg_fold.t b/t/re/reg_fold.t new file mode 100644 index 0000000000..25144521a5 --- /dev/null +++ b/t/re/reg_fold.t @@ -0,0 +1,42 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +use strict; +use warnings; +my $count=1; +my @tests; + +my $file="../lib/unicore/CaseFolding.txt"; +open my $fh,"<",$file or die "Failed to read '$file': $!"; +while (<$fh>) { + chomp; + my ($line,$comment)= split/\s+#\s+/, $_; + my ($cp,$type,@fc)=split/[\s;]+/,$line||''; + next unless $type and ($type eq 'F' or $type eq 'C'); + $_="\\x{$_}" for @fc; + my $cpv=hex("0x$cp"); + my $chr="chr(0x$cp)"; + my @str; + push @str,$chr if $cpv<128 or $cpv>256; + if ($cpv<256) { + push @str,"do{my \$c=$chr; utf8::upgrade(\$c); \$c}" + } + + foreach my $str ( @str ) { + my $expr="$str=~/@fc/ix"; + my $t=($cpv > 256 || $str=~/^do/) ? "unicode" : "latin"; + push @tests, + qq[ok($expr,'$chr=~/@fc/ix - $comment ($t string)')]; + $tests[-1]="TODO: { local \$::TODO='[13:41] <BinGOs> cue *It is all Greek to me* joke.';\n$tests[-1] }" + if $cp eq '0390' or $cp eq '03B0'; + $count++; + } +} +eval join ";\n","plan tests=>".($count-1),@tests,"1" + or die $@; +__DATA__ diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t new file mode 100644 index 0000000000..4e8f3c4924 --- /dev/null +++ b/t/re/reg_mesg.t @@ -0,0 +1,194 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +my $debug = 1; + +## +## If the markers used are changed (search for "MARKER1" in regcomp.c), +## update only these two variables, and leave the {#} in the @death/@warning +## arrays below. The {#} is a meta-marker -- it marks where the marker should +## go. + +my $marker1 = "<-- HERE"; +my $marker2 = " <-- HERE "; + +## +## Key-value pairs of code/error of code that should have fatal errors. +## + +eval 'use Config'; # assume defaults if fail +our %Config; +my $inf_m1 = ($Config{reg_infty} || 32767) - 1; +my $inf_p1 = $inf_m1 + 2; +my @death = +( + '/[[=foo=]]/' => 'POSIX syntax [= =] is reserved for future extensions in regex; marked by {#} in m/[[=foo=]{#}]/', + + '/(?<= .*)/' => 'Variable length lookbehind not implemented in regex m/(?<= .*)/', + + '/(?<= x{1000})/' => 'Lookbehind longer than 255 not implemented in regex m/(?<= x{1000})/', + + '/(?@)/' => 'Sequence (?@...) not implemented in regex; marked by {#} in m/(?@{#})/', + + '/(?{ 1/' => 'Sequence (?{...}) not terminated or not {}-balanced in regex; marked by {#} in m/(?{{#} 1/', + + '/(?(1x))/' => 'Switch condition not recognized in regex; marked by {#} in m/(?(1x{#}))/', + + '/(?(1)x|y|z)/' => 'Switch (?(condition)... contains too many branches in regex; marked by {#} in m/(?(1)x|y|{#}z)/', + + '/(?(x)y|x)/' => 'Unknown switch condition (?(x) in regex; marked by {#} in m/(?({#}x)y|x)/', + + '/(?/' => 'Sequence (? incomplete in regex; marked by {#} in m/(?{#}/', + + '/(?;x/' => 'Sequence (?;...) not recognized in regex; marked by {#} in m/(?;{#}x/', + '/(?<;x/' => 'Sequence (?<;...) not recognized in regex; marked by {#} in m/(?<;{#}x/', + + '/(?\ix/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}ix/', + '/(?\mx/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}mx/', + '/(?\:x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}:x/', + '/(?\=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}=x/', + '/(?\!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}!x/', + '/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/', + '/(?\<!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<!x/', + '/(?\>x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/', + + '/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/', + + "/x{$inf_p1}/" => "Quantifier in {,} bigger than $inf_m1 in regex; marked by {#} in m/x{{#}$inf_p1}/", + + '/x{3,1}/' => 'Can\'t do {n,m} with n > m in regex; marked by {#} in m/x{3,1}{#}/', + + '/x**/' => 'Nested quantifiers in regex; marked by {#} in m/x**{#}/', + + '/x[/' => 'Unmatched [ in regex; marked by {#} in m/x[{#}/', + + '/*/', => 'Quantifier follows nothing in regex; marked by {#} in m/*{#}/', + + '/\p{x/' => 'Missing right brace on \p{} in regex; marked by {#} in m/\p{{#}x/', + + '/[\p{x]/' => 'Missing right brace on \p{} in regex; marked by {#} in m/[\p{{#}x]/', + + '/(x)\2/' => 'Reference to nonexistent group in regex; marked by {#} in m/(x)\2{#}/', + + 'my $m = "\\\"; $m =~ $m', => 'Trailing \ in regex m/\/', + + '/\x{1/' => 'Missing right brace on \x{} in regex; marked by {#} in m/\x{{#}1/', + + '/[\x{X]/' => 'Missing right brace on \x{} in regex; marked by {#} in m/[\x{{#}X]/', + + '/[[:barf:]]/' => 'POSIX class [:barf:] unknown in regex; marked by {#} in m/[[:barf:]{#}]/', + + '/[[=barf=]]/' => 'POSIX syntax [= =] is reserved for future extensions in regex; marked by {#} in m/[[=barf=]{#}]/', + + '/[[.barf.]]/' => 'POSIX syntax [. .] is reserved for future extensions in regex; marked by {#} in m/[[.barf.]{#}]/', + + '/[z-a]/' => 'Invalid [] range "z-a" in regex; marked by {#} in m/[z-a{#}]/', + + '/\p/' => 'Empty \p{} in regex; marked by {#} in m/\p{#}/', + + '/\P{}/' => 'Empty \P{} in regex; marked by {#} in m/\P{{#}}/', +); + +## +## Key-value pairs of code/error of code that should have non-fatal warnings. +## +@warning = ( + 'm/\b*/' => '\b* matches null string many times in regex; marked by {#} in m/\b*{#}/', + + 'm/[:blank:]/' => 'POSIX syntax [: :] belongs inside character classes in regex; marked by {#} in m/[:blank:]{#}/', + + "m'[\\y]'" => 'Unrecognized escape \y in character class passed through in regex; marked by {#} in m/[\y{#}]/', + + 'm/[a-\d]/' => 'False [] range "a-\d" in regex; marked by {#} in m/[a-\d{#}]/', + 'm/[\w-x]/' => 'False [] range "\w-" in regex; marked by {#} in m/[\w-{#}x]/', + 'm/[a-\pM]/' => 'False [] range "a-\pM" in regex; marked by {#} in m/[a-\pM{#}]/', + 'm/[\pM-x]/' => 'False [] range "\pM-" in regex; marked by {#} in m/[\pM-{#}x]/', + "m'\\y'" => 'Unrecognized escape \y passed through in regex; marked by {#} in m/\y{#}/', +); + +my $total = (@death + @warning)/2; + +# utf8 is a noop on EBCDIC platforms, it is not fatal +my $Is_EBCDIC = (ord('A') == 193); +if ($Is_EBCDIC) { + my @utf8_death = grep(/utf8/, @death); + $total = $total - @utf8_death; +} + +print "1..$total\n"; + +my $count = 0; + +while (@death) +{ + my $regex = shift @death; + my $result = shift @death; + # skip the utf8 test on EBCDIC since they do not die + next if ($Is_EBCDIC && $regex =~ /utf8/); + $count++; + + $_ = "x"; + eval $regex; + if (not $@) { + print "# oops, $regex didn't die\nnot ok $count\n"; + next; + } + chomp $@; + $result =~ s/{\#}/$marker1/; + $result =~ s/{\#}/$marker2/; + $result .= " at "; + if ($@ !~ /^\Q$result/) { + print "# For $regex, expected:\n# $result\n# Got:\n# $@\n#\nnot "; + } + print "ok $count - $regex\n"; +} + + +our $warning; +$SIG{__WARN__} = sub { $warning = shift }; + +while (@warning) +{ + $count++; + my $regex = shift @warning; + my $result = shift @warning; + + undef $warning; + $_ = "x"; + eval $regex; + + if ($@) + { + print "# oops, $regex died with:\n#\t$@#\nnot ok $count\n"; + next; + } + + if (not $warning) + { + print "# oops, $regex didn't generate a warning\nnot ok $count\n"; + next; + } + $result =~ s/{\#}/$marker1/; + $result =~ s/{\#}/$marker2/; + $result .= " at "; + if ($warning !~ /^\Q$result/) + { + print <<"EOM"; +# For $regex, expected: +# $result +# Got: +# $warning +# +not ok $count +EOM + next; + } + print "ok $count - $regex\n"; +} + + + diff --git a/t/re/reg_namedcapture.t b/t/re/reg_namedcapture.t new file mode 100644 index 0000000000..e3a837459d --- /dev/null +++ b/t/re/reg_namedcapture.t @@ -0,0 +1,26 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + unless ( -r "$INC[0]/Errno.pm") { + print "1..0 # Skip: Errno.pm not yet available\n"; + exit 0; + } +} + +# WARNING: Do not directly use any modules as part of this test code. +# We could get action at a distance that would invalidate the tests. + +print "1..2\n"; + +# This tests whether glob assignment fails to load the tie. +*X = *-; +'X'=~/(?<X>X)/; +print eval '*X{HASH}{X} || 1' ? "" :"not ","ok ",++$test,"\n"; + +# And since it's a similar case we check %! as well. Note that +# this can't be done until ../lib/Errno.pm is in place, as the +# glob hits $!, which needs that module. +*Y = *!; +print 0<keys(%Y) ? "" :"not ","ok ",++$test,"\n"; diff --git a/t/re/reg_nc_tie.t b/t/re/reg_nc_tie.t new file mode 100644 index 0000000000..7a79a8e6da --- /dev/null +++ b/t/re/reg_nc_tie.t @@ -0,0 +1,53 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +# Do a basic test on all the tied methods of Tie::Hash::NamedCapture + +print "1..13\n"; + +# PL_curpm->paren_names can be a null pointer. See that this succeeds anyway. +'x' =~ /(.)/; +() = %+; +pass( 'still alive' ); + +"hlagh" =~ / + (?<a>.) + (?<b>.) + (?<a>.) + .* + (?<e>$) +/x; + +# FETCH +is($+{a}, "h", "FETCH"); +is($+{b}, "l", "FETCH"); +is($-{a}[0], "h", "FETCH"); +is($-{a}[1], "a", "FETCH"); + +# STORE +eval { $+{a} = "yon" }; +ok(index($@, "read-only") != -1, "STORE"); + +# DELETE +eval { delete $+{a} }; +ok(index($@, "read-only") != -1, "DELETE"); + +# CLEAR +eval { %+ = () }; +ok(index($@, "read-only") != -1, "CLEAR"); + +# EXISTS +ok(exists $+{e}, "EXISTS"); +ok(!exists $+{d}, "EXISTS"); + +# FIRSTKEY/NEXTKEY +is(join('|', sort keys %+), "a|b|e", "FIRSTKEY/NEXTKEY"); + +# SCALAR +is(scalar(%+), 3, "SCALAR"); +is(scalar(%-), 3, "SCALAR"); diff --git a/t/re/reg_pmod.t b/t/re/reg_pmod.t new file mode 100644 index 0000000000..301aeefc6d --- /dev/null +++ b/t/re/reg_pmod.t @@ -0,0 +1,49 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +use strict; +use warnings; + +our @tests = ( + # /p Pattern PRE MATCH POST + [ '/p', "456", "123-", "456", "-789"], + [ '(?p)', "456", "123-", "456", "-789"], + [ '', "(456)", "123-", "456", "-789"], + [ '', "456", undef, undef, undef ], +); + +plan tests => 4 * @tests + 2; +my $W = ""; + +$SIG{__WARN__} = sub { $W.=join("",@_); }; +sub _u($$) { "$_[0] is ".(defined $_[1] ? "'$_[1]'" : "undef") } + +$_ = '123-456-789'; +foreach my $test (@tests) { + my ($p, $pat,$l,$m,$r) = @$test; + my $test_name = $p eq '/p' ? "/$pat/p" + : $p eq '(?p)' ? "/(?p)$pat/" + : "/$pat/"; + + # + # Cannot use if/else due to the scope invalidating ${^MATCH} and friends. + # + my $ok = ok $p eq '/p' ? /$pat/p + : $p eq '(?p)' ? /(?p)$pat/ + : /$pat/ + => $test_name; + SKIP: { + skip "/$pat/$p failed to match", 3 + unless $ok; + is(${^PREMATCH}, $l,_u "$test_name: ^PREMATCH",$l); + is(${^MATCH}, $m,_u "$test_name: ^MATCH",$m ); + is(${^POSTMATCH}, $r,_u "$test_name: ^POSTMATCH",$r ); + } +} +is($W,"","No warnings should be produced"); +ok(!defined ${^MATCH}, "No /p in scope so ^MATCH is undef"); diff --git a/t/re/reg_posixcc.t b/t/re/reg_posixcc.t new file mode 100644 index 0000000000..8b25d7de52 --- /dev/null +++ b/t/re/reg_posixcc.t @@ -0,0 +1,160 @@ +#!perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +use strict; +use warnings; +plan "no_plan"; + +my @pats=( + "\\w", + "\\W", + "\\s", + "\\S", + "\\d", + "\\D", + "[:alnum:]", + "[:^alnum:]", + "[:alpha:]", + "[:^alpha:]", + "[:ascii:]", + "[:^ascii:]", + "[:cntrl:]", + "[:^cntrl:]", + "[:graph:]", + "[:^graph:]", + "[:lower:]", + "[:^lower:]", + "[:print:]", + "[:^print:]", + "[:punct:]", + "[:^punct:]", + "[:upper:]", + "[:^upper:]", + "[:xdigit:]", + "[:^xdigit:]", + "[:space:]", + "[:^space:]", + "[:blank:]", + "[:^blank:]" ); +if ($ENV{PERL_TEST_LEGACY_POSIX_CC}) { + $::TODO = "Only works under PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS = 0"; +} + +sub rangify { + my $ary= shift; + my $fmt= shift || '%d'; + my $sep= shift || ' '; + my $rng= shift || '..'; + + + my $first= $ary->[0]; + my $last= $ary->[0]; + my $ret= sprintf $fmt, $first; + for my $idx (1..$#$ary) { + if ( $ary->[$idx] != $last + 1) { + if ($last!=$first) { + $ret.=sprintf "%s$fmt",$rng, $last; + } + $first= $last= $ary->[$idx]; + $ret.=sprintf "%s$fmt",$sep,$first; + } else { + $last= $ary->[$idx]; + } + } + if ( $last != $first) { + $ret.=sprintf "%s$fmt",$rng, $last; + } + return $ret; +} + +my $description = ""; +while (@pats) { + my ($yes,$no)= splice @pats,0,2; + + my %err_by_type; + my %singles; + my %complements; + foreach my $b (0..255) { + my %got; + for my $type ('unicode','not-unicode') { + my $str=chr($b).chr($b); + if ($type eq 'unicode') { + $str.=chr(256); + chop $str; + } + if ($str=~/[$yes][$no]/){ + TODO: { + unlike($str,qr/[$yes][$no]/, + "chr($b)=~/[$yes][$no]/ should not match under $type"); + } + push @{$err_by_type{$type}},$b; + } + $got{"[$yes]"}{$type} = $str=~/[$yes]/ ? 1 : 0; + $got{"[$no]"}{$type} = $str=~/[$no]/ ? 1 : 0; + $got{"[^$yes]"}{$type} = $str=~/[^$yes]/ ? 1 : 0; + $got{"[^$no]"}{$type} = $str=~/[^$no]/ ? 1 : 0; + } + foreach my $which ("[$yes]","[$no]","[^$yes]","[^$no]") { + if ($got{$which}{'unicode'} != $got{$which}{'not-unicode'}){ + TODO: { + is($got{$which}{'unicode'},$got{$which}{'not-unicode'}, + "chr($b)=~/$which/ should have the same results regardless of internal string encoding"); + } + push @{$singles{$which}},$b; + } + } + foreach my $which ($yes,$no) { + foreach my $strtype ('unicode','not-unicode') { + if ($got{"[$which]"}{$strtype} == $got{"[^$which]"}{$strtype}) { + TODO: { + isnt($got{"[$which]"}{$strtype},$got{"[^$which]"}{$strtype}, + "chr($b)=~/[$which]/ should not have the same result as chr($b)=~/[^$which]/"); + } + push @{$complements{$which}{$strtype}},$b; + } + } + } + } + + + if (%err_by_type || %singles || %complements) { + $description||=" Error:\n"; + $description .= "/[$yes][$no]/\n"; + if (%err_by_type) { + foreach my $type (sort keys %err_by_type) { + $description .= "\tmatches $type codepoints:\t"; + $description .= rangify($err_by_type{$type}); + $description .= "\n"; + } + $description .= "\n"; + } + if (%singles) { + $description .= "Unicode/Nonunicode mismatches:\n"; + foreach my $type (sort keys %singles) { + $description .= "\t$type:\t"; + $description .= rangify($singles{$type}); + $description .= "\n"; + } + $description .= "\n"; + } + if (%complements) { + foreach my $class (sort keys %complements) { + foreach my $strtype (sort keys %{$complements{$class}}) { + $description .= "\t$class has complement failures under $strtype for:\t"; + $description .= rangify($complements{$class}{$strtype}); + $description .= "\n"; + } + } + } + } +} +TODO: { + is( $description, "", "POSIX and perl charclasses should not depend on string type"); +} + +__DATA__ diff --git a/t/re/reg_unsafe.t b/t/re/reg_unsafe.t new file mode 100644 index 0000000000..6b19108bdc --- /dev/null +++ b/t/re/reg_unsafe.t @@ -0,0 +1,19 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + +} +print "1..1\n"; + +# there is an equivelent test in t/op/pat.t which does NOT fail +# its not clear why it doesnt fail, so this todo gets its own test +# file until we can work it out. + +my $x; +($x='abc')=~/(abc)/g; +$x='123'; + +print "not " if $1 ne 'abc'; +print "ok 1 # TODO safe match vars make /g slow\n"; diff --git a/t/re/regexp.t b/t/re/regexp.t new file mode 100644 index 0000000000..e970ba9f84 --- /dev/null +++ b/t/re/regexp.t @@ -0,0 +1,207 @@ +#!./perl + +# The tests are in a separate file 't/op/re_tests'. +# Each line in that file is a separate test. +# There are five columns, separated by tabs. +# +# Column 1 contains the pattern, optionally enclosed in C<''>. +# Modifiers can be put after the closing C<'>. +# +# Column 2 contains the string to be matched. +# +# Column 3 contains the expected result: +# y expect a match +# n expect no match +# c expect an error +# T the test is a TODO (can be combined with y/n/c) +# B test exposes a known bug in Perl, should be skipped +# b test exposes a known bug in Perl, should be skipped if noamp +# t test exposes a bug with threading, TODO if qr_embed_thr +# +# Columns 4 and 5 are used only if column 3 contains C<y> or C<c>. +# +# Column 4 contains a string, usually C<$&>. +# +# Column 5 contains the expected result of double-quote +# interpolating that string after the match, or start of error message. +# +# Column 6, if present, contains a reason why the test is skipped. +# This is printed with "skipped", for harness to pick up. +# +# \n in the tests are interpolated, as are variables of the form ${\w+}. +# +# Blanks lines are treated as PASSING tests to keep the line numbers +# linked to the test number. +# +# If you want to add a regular expression test that can't be expressed +# in this format, don't add it here: put it in op/pat.t instead. +# +# Note that columns 2,3 and 5 are all enclosed in double quotes and then +# evalled; so something like a\"\x{100}$1 has length 3+length($1). + +my $file; +BEGIN { + $iters = shift || 1; # Poor man performance suite, 10000 is OK. + + # Do this open before any chdir + $file = shift; + if (defined $file) { + open TESTS, $file or die "Can't open $file"; + } + + chdir 't' if -d 't'; + @INC = '../lib'; + + if ($qr_embed_thr) { + require Config; + if (!$Config::Config{useithreads}) { + print "1..0 # Skip: no ithreads\n"; + exit 0; + } + if ($ENV{PERL_CORE_MINITEST}) { + print "1..0 # Skip: no dynamic loading on miniperl, no threads\n"; + exit 0; + } + require threads; + } +} + +use strict; +use warnings FATAL=>"all"; +use vars qw($iters $numtests $bang $ffff $nulnul $OP); +use vars qw($qr $skip_amp $qr_embed $qr_embed_thr); # set by our callers + + +if (!defined $file) { + open(TESTS,'re/re_tests') || open(TESTS,'t/re/re_tests') + || open(TESTS,':re:re_tests') || die "Can't open re_tests"; +} + +my @tests = <TESTS>; + +close TESTS; + +$bang = sprintf "\\%03o", ord "!"; # \41 would not be portable. +$ffff = chr(0xff) x 2; +$nulnul = "\0" x 2; +$OP = $qr ? 'qr' : 'm'; + +$| = 1; +printf "1..%d\n# $iters iterations\n", scalar @tests; + +my $test; +TEST: +foreach (@tests) { + $test++; + if (!/\S/ || /^\s*#/ || /^__END__$/) { + print "ok $test # (Blank line or comment)\n"; + if (/#/) { print $_ }; + next; + } + chomp; + s/\\n/\n/g; + my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6); + $reason = '' unless defined $reason; + my $input = join(':',$pat,$subject,$result,$repl,$expect); + # the double '' below keeps simple syntax highlighters from going crazy + $pat = "'$pat'" unless $pat =~ /^[:''\/]/; + $pat =~ s/(\$\{\w+\})/$1/eeg; + $pat =~ s/\\n/\n/g; + $subject = eval qq("$subject"); die $@ if $@; + $expect = eval qq("$expect"); die $@ if $@; + $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/; + my $todo_qr = $qr_embed_thr && ($result =~ s/t//); + my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//)); + $reason = 'skipping $&' if $reason eq '' && $skip_amp; + $result =~ s/B//i unless $skip; + my $todo= $result =~ s/T// ? " # TODO" : ""; + + + for my $study ('', 'study $subject', 'utf8::upgrade($subject)', + 'utf8::upgrade($subject); study $subject') { + # Need to make a copy, else the utf8::upgrade of an alreay studied + # scalar confuses things. + my $subject = $subject; + my $c = $iters; + my ($code, $match, $got); + if ($repl eq 'pos') { + $code= <<EOFCODE; + $study; + pos(\$subject)=0; + \$match = ( \$subject =~ m${pat}g ); + \$got = pos(\$subject); +EOFCODE + } + elsif ($qr_embed) { + $code= <<EOFCODE; + my \$RE = qr$pat; + $study; + \$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--; + \$got = "$repl"; +EOFCODE + } + elsif ($qr_embed_thr) { + $code= <<EOFCODE; + # Can't run the match in a subthread, but can do this and + # clone the pattern the other way. + my \$RE = threads->new(sub {qr$pat})->join(); + $study; + \$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--; + \$got = "$repl"; +EOFCODE + } + else { + $code= <<EOFCODE; + $study; + \$match = (\$subject =~ $OP$pat) while \$c--; + \$got = "$repl"; +EOFCODE + } + #$code.=qq[\n\$expect="$expect";\n]; + #use Devel::Peek; + #die Dump($code) if $pat=~/\\h/ and $subject=~/\x{A0}/; + { + # Probably we should annotate specific tests with which warnings + # categories they're known to trigger, and hence should be + # disabled just for that test + no warnings qw(uninitialized regexp); + eval $code; + } + chomp( my $err = $@ ); + if ($result eq 'c') { + if ($err !~ m!^\Q$expect!) { print "not ok $test$todo (compile) $input => `$err'\n"; next TEST } + last; # no need to study a syntax error + } + elsif ( $skip ) { + print "ok $test # skipped", length($reason) ? " $reason" : '', "\n"; + next TEST; + } + elsif ( $todo_qr ) { + print "not ok $test # TODO", length($reason) ? " - $reason" : '', "\n"; + next TEST; + } + elsif ($@) { + print "not ok $test$todo $input => error `$err'\n$code\n$@\n"; next TEST; + } + elsif ($result =~ /^n/) { + if ($match) { print "not ok $test$todo ($study) $input => false positive\n"; next TEST } + } + else { + if (!$match || $got ne $expect) { + eval { require Data::Dumper }; + if ($@) { + print "not ok $test$todo ($study) $input => `$got', match=$match\n$code\n"; + } + else { # better diagnostics + my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump; + my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump; + print "not ok $test$todo ($study) $input => `$got', match=$match\n$s\n$g\n$code\n"; + } + next TEST; + } + } + } + print "ok $test$todo\n"; +} + +1; diff --git a/t/re/regexp_noamp.t b/t/re/regexp_noamp.t new file mode 100644 index 0000000000..6b2952d3f5 --- /dev/null +++ b/t/re/regexp_noamp.t @@ -0,0 +1,10 @@ +#!./perl + +$skip_amp = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; diff --git a/t/re/regexp_notrie.t b/t/re/regexp_notrie.t new file mode 100644 index 0000000000..9ace6ed13d --- /dev/null +++ b/t/re/regexp_notrie.t @@ -0,0 +1,15 @@ +#!./perl +#use re 'debug'; +BEGIN { + ${^RE_TRIE_MAXBUF}=-1; + #${^RE_DEBUG_FLAGS}=0; +} + +$qr = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; diff --git a/t/re/regexp_qr.t b/t/re/regexp_qr.t new file mode 100644 index 0000000000..18b1cf802a --- /dev/null +++ b/t/re/regexp_qr.t @@ -0,0 +1,10 @@ +#!./perl + +$qr = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; diff --git a/t/re/regexp_qr_embed.t b/t/re/regexp_qr_embed.t new file mode 100644 index 0000000000..f0366ca645 --- /dev/null +++ b/t/re/regexp_qr_embed.t @@ -0,0 +1,11 @@ +#!./perl + +$qr = 1; +$qr_embed = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; diff --git a/t/re/regexp_qr_embed_thr.t b/t/re/regexp_qr_embed_thr.t new file mode 100644 index 0000000000..db89c1a461 --- /dev/null +++ b/t/re/regexp_qr_embed_thr.t @@ -0,0 +1,11 @@ +#!./perl + +$qr = 1; +$qr_embed_thr = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; diff --git a/t/re/regexp_trielist.t b/t/re/regexp_trielist.t new file mode 100644 index 0000000000..55379e86d8 --- /dev/null +++ b/t/re/regexp_trielist.t @@ -0,0 +1,15 @@ +#!./perl +#use re 'debug'; +BEGIN { + ${^RE_TRIE_MAXBUFF}=0; + #${^RE_DEBUG_FLAGS}=0; + } + +$qr = 1; +for $file ('./re/regexp.t', './t/re/regexp.t', ':re:regexp.t') { + if (-r $file) { + do $file or die $@; + exit; + } +} +die "Cannot find ./re/regexp.t or ./t/re/regexp.t\n"; diff --git a/t/re/regexp_unicode_prop.t b/t/re/regexp_unicode_prop.t new file mode 100644 index 0000000000..8cc09f1ade --- /dev/null +++ b/t/re/regexp_unicode_prop.t @@ -0,0 +1,303 @@ +#!./perl +# +# Tests that have to do with checking whether characters have (or not have) +# certain Unicode properties; belong (or not belong) to blocks, scripts, etc. +# + +use strict; +use warnings; +use 5.010; + +my $IS_EBCDIC = ord ('A') == 193; + +sub run_tests; + +# +# This is the data to test. +# +# This is a hash; keys are the property to test. +# Values are arrays containing characters to test. The characters can +# have the following formats: +# '\N{CHARACTER NAME}' - Use character with that name +# '\x{1234}' - Use character with that hex escape +# '0x1234' - Use chr() to get that character +# "a" - Character to use +# +# If a character entry starts with ! the character does not belong to the class +# +# If the class is just single letter, we use both \pL and \p{L} +# + +use charnames ':full'; + +my @CLASSES = ( + L => ["a", "A"], + Ll => ["b", "!B"], + Lu => ["!c", "C"], + IsLl => ["d", "!D"], + IsLu => ["!e", "E"], + LC => ["f", "!1"], + 'L&' => ["g", "!2"], + 'Lowercase Letter' => ["h", "!H"], + + Common => ["!i", "3"], + Inherited => ["!j", '\x{300}'], + + InBasicLatin => ['\N{LATIN CAPITAL LETTER A}'], + InLatin1Supplement => ['\N{LATIN CAPITAL LETTER A WITH GRAVE}'], + InLatinExtendedA => ['\N{LATIN CAPITAL LETTER A WITH MACRON}'], + InLatinExtendedB => ['\N{LATIN SMALL LETTER B WITH STROKE}'], + InKatakana => ['\N{KATAKANA LETTER SMALL A}'], + IsLatin => ["0x100", "0x212b"], + IsHebrew => ["0x5d0", "0xfb4f"], + IsGreek => ["0x37a", "0x386", "!0x387", "0x388", + "0x38a", "!0x38b", "0x38c"], + HangulSyllables => ['\x{AC00}'], + 'Script=Latin' => ['\x{0100}'], + 'Block=LatinExtendedA' => ['\x{0100}'], + 'Category=UppercaseLetter' => ['\x{0100}'], + + # + # It's ok to repeat class names. + # + InLatin1Supplement => + $IS_EBCDIC ? ['!\x{7f}', '\x{80}', '!\x{100}'] + : ['!\x{7f}', '\x{80}', '\x{ff}', '!\x{100}'], + InLatinExtendedA => + ['!\x{7f}', '!\x{80}', '!\x{ff}', '\x{100}'], + + # + # Properties are case-insensitive, and may have whitespace, + # dashes and underscores. + # + 'in-latin1_SUPPLEMENT' => ['\x{80}', + '\N{LATIN SMALL LETTER Y WITH DIAERESIS}'], + ' ^ In Latin 1 Supplement ' + => ['!\x{80}', '\N{COFFIN}'], + 'latin-1 supplement' => ['\x{80}', "0xDF"], + +); + +my @USER_DEFINED_PROPERTIES = ( + # + # User defined properties + # + InKana1 => ['\x{3040}', '!\x{303F}'], + InKana2 => ['\x{3040}', '!\x{303F}'], + InKana3 => ['\x{3041}', '!\x{3040}'], + InNotKana => ['\x{3040}', '!\x{3041}'], + InConsonant => ['d', '!e'], + IsSyriac1 => ['\x{0712}', '!\x{072F}'], + Syriac1 => ['\x{0712}', '!\x{072F}'], + '# User-defined character properties my lack \n at the end', + InGreekSmall => ['\N{GREEK SMALL LETTER PI}', + '\N{GREEK SMALL LETTER FINAL SIGMA}'], + InGreekCapital => ['\N{GREEK CAPITAL LETTER PI}', '!\x{03A2}'], + Dash => ['-'], + ASCII_Hex_Digit => ['!-', 'A'], + AsciiHexAndDash => ['-', 'A'], +); + + +# +# From the short properties we populate POSIX-like classes. +# +my %SHORT_PROPERTIES = ( + 'Ll' => ['m', '\N{CYRILLIC SMALL LETTER A}'], + 'Lu' => ['M', '\N{GREEK CAPITAL LETTER ALPHA}'], + 'Lo' => ['\N{HIRAGANA LETTER SMALL A}'], + 'Mn' => ['\N{COMBINING GRAVE ACCENT}'], + 'Nd' => ["0", '\N{ARABIC-INDIC DIGIT ZERO}'], + 'Pc' => ["_"], + 'Po' => ["!"], + 'Zs' => [" "], + 'Cc' => ['\x{00}'], +); + +# +# Illegal properties +# +my @ILLEGAL_PROPERTIES = qw [q qrst]; + +my %d; + +while (my ($class, $chars) = each %SHORT_PROPERTIES) { + push @{$d {IsAlpha}} => map {$class =~ /^[LM]/ ? $_ : "!$_"} @$chars; + push @{$d {IsAlnum}} => map {$class =~ /^[LMN]./ ? $_ : "!$_"} @$chars; + push @{$d {IsASCII}} => map {length ($_) == 1 || $_ eq '\x{00}' + ? $_ : "!$_"} @$chars; + push @{$d {IsCntrl}} => map {$class =~ /^C/ ? $_ : "!$_"} @$chars; + push @{$d {IsBlank}} => map {$class =~ /^Z[lps]/ ? $_ : "!$_"} @$chars; + push @{$d {IsDigit}} => map {$class =~ /^Nd$/ ? $_ : "!$_"} @$chars; + push @{$d {IsGraph}} => map {$class =~ /^([LMNPS]|Co)/ + ? $_ : "!$_"} @$chars; + push @{$d {IsPrint}} => map {$class =~ /^([LMNPS]|Co|Zs)/ + ? $_ : "!$_"} @$chars; + push @{$d {IsLower}} => map {$class =~ /^Ll$/ ? $_ : "!$_"} @$chars; + push @{$d {IsUpper}} => map {$class =~ /^L[ut]/ ? $_ : "!$_"} @$chars; + push @{$d {IsPunct}} => map {$class =~ /^P/ ? $_ : "!$_"} @$chars; + push @{$d {IsWord}} => map {$class =~ /^[LMN]/ || $_ eq "_" + ? $_ : "!$_"} @$chars; + push @{$d {IsSpace}} => map {$class =~ /^Z/ || + length ($_) == 1 && ord ($_) >= 0x09 + && ord ($_) <= 0x0D + ? $_ : "!$_"} @$chars; +} + +delete $d {IsASCII} if $IS_EBCDIC; + +push @CLASSES => "# Short properties" => %SHORT_PROPERTIES, + "# POSIX like properties" => %d, + "# User defined properties" => @USER_DEFINED_PROPERTIES; + + +# +# Calculate the number of tests. +# +my $count = 0; +for (my $i = 0; $i < @CLASSES; $i += 2) { + $i ++, redo if $CLASSES [$i] =~ /^\h*#\h*(.*)/; + $count += (length $CLASSES [$i] == 1 ? 4 : 2) * @{$CLASSES [$i + 1]}; +} +$count += 2 * @ILLEGAL_PROPERTIES; +$count += 2 * grep {length $_ == 1} @ILLEGAL_PROPERTIES; + +my $tests = 0; + +say "1..$count"; + +run_tests unless caller (); + +sub match { + my ($char, $match, $nomatch) = @_; + + my ($str, $name); + + given ($char) { + when (/^\\/) { + $str = eval qq ["$char"]; + $name = qq ["$char"]; + } + when (/^0x([0-9A-Fa-f]+)$/) { + $str = chr hex $1; + $name = "chr ($char)"; + } + default { + $str = $char; + $name = qq ["$char"]; + } + } + + print "not " unless $str =~ /$match/; + print "ok ", ++ $tests, " - $name =~ /$match/\n"; + print "not " unless $str !~ /$nomatch/; + print "ok ", ++ $tests, " - $name !~ /$nomatch/\n"; +} + +sub run_tests { + + while (@CLASSES) { + my $class = shift @CLASSES; + if ($class =~ /^\h*#\h*(.*)/) { + print "# $1\n"; + next; + } + last unless @CLASSES; + my $chars = shift @CLASSES; + my @in = grep {!/^!./} @$chars; + my @out = map {s/^!(?=.)//; $_} grep { /^!./} @$chars; + my $in_pat = eval qq ['\\p{$class}']; + my $out_pat = eval qq ['\\P{$class}']; + + match $_, $in_pat, $out_pat for @in; + match $_, $out_pat, $in_pat for @out; + + if (1 == length $class) { + my $in_pat = eval qq ['\\p$class']; + my $out_pat = eval qq ['\\P$class']; + + match $_, $in_pat, $out_pat for @in; + match $_, $out_pat, $in_pat for @out; + } + } + + + my $pat = qr /^Can't find Unicode property definition/; + print "# Illegal properties\n"; + foreach my $p (@ILLEGAL_PROPERTIES) { + undef $@; + my $r = eval "'a' =~ /\\p{$p}/; 1"; + print "not " unless !$r && $@ && $@ =~ $pat; + print "ok ", ++ $tests, " - Unknown Unicode property \\p{$p}\n"; + undef $@; + my $s = eval "'a' =~ /\\P{$p}/; 1"; + print "not " unless !$s && $@ && $@ =~ $pat; + print "ok ", ++ $tests, " - Unknown Unicode property \\P{$p}\n"; + if (length $p == 1) { + undef $@; + my $r = eval "'a' =~ /\\p$p/; 1"; + print "not " unless !$r && $@ && $@ =~ $pat; + print "ok ", ++ $tests, " - Unknown Unicode property \\p$p\n"; + undef $@; + my $s = eval "'a' =~ /\\P$p/; 1"; + print "not " unless !$s && $@ && $@ =~ $pat; + print "ok ", ++ $tests, " - Unknown Unicode property \\P$p\n"; + } + } +} + + +# +# User defined properties +# + +sub InKana1 {<<'--'} +3040 309F +30A0 30FF +-- + +sub InKana2 {<<'--'} ++utf8::InHiragana ++utf8::InKatakana +-- + +sub InKana3 {<<'--'} ++utf8::InHiragana ++utf8::InKatakana +-utf8::IsCn +-- + +sub InNotKana {<<'--'} +!utf8::InHiragana +-utf8::InKatakana ++utf8::IsCn +-- + +sub InConsonant {<<'--'} # Not EBCDIC-aware. +0061 007f +-0061 +-0065 +-0069 +-006f +-0075 +-- + +sub IsSyriac1 {<<'--'} +0712 072C +0730 074A +-- + +sub Syriac1 {<<'--'} +0712 072C +0730 074A +-- + +sub InGreekSmall {return "03B1\t03C9"} +sub InGreekCapital {return "0391\t03A9\n-03A2"} + +sub AsciiHexAndDash {<<'--'} ++utf8::ASCII_Hex_Digit ++utf8::Dash +-- + +__END__ diff --git a/t/re/regexp_unicode_prop_thr.t b/t/re/regexp_unicode_prop_thr.t new file mode 100644 index 0000000000..ae40d43af1 --- /dev/null +++ b/t/re/regexp_unicode_prop_thr.t @@ -0,0 +1,7 @@ +#!./perl + +chdir 't' if -d 't'; +@INC = ('../lib', '.'); + +require 'thread_it.pl'; +thread_it(qw(op regexp_unicode_prop.t)); diff --git a/t/re/rxcode.t b/t/re/rxcode.t new file mode 100644 index 0000000000..eb144f9807 --- /dev/null +++ b/t/re/rxcode.t @@ -0,0 +1,86 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require './test.pl'; +} + +plan tests => 38; + +$^R = undef; +like( 'a', qr/^a(?{1})(?:b(?{2}))?/, 'a =~ ab?' ); +cmp_ok( $^R, '==', 1, '..$^R after a =~ ab?' ); + +$^R = undef; +unlike( 'abc', qr/^a(?{3})(?:b(?{4}))$/, 'abc !~ a(?:b)$' ); +ok( !defined $^R, '..$^R after abc !~ a(?:b)$' ); + +$^R = undef; +like( 'ab', qr/^a(?{5})b(?{6})/, 'ab =~ ab' ); +cmp_ok( $^R, '==', 6, '..$^R after ab =~ ab' ); + +$^R = undef; +like( 'ab', qr/^a(?{7})(?:b(?{8}))?/, 'ab =~ ab?' ); + +cmp_ok( $^R, '==', 8, '..$^R after ab =~ ab?' ); + +$^R = undef; +like( 'ab', qr/^a(?{9})b?(?{10})/, 'ab =~ ab? (2)' ); +cmp_ok( $^R, '==', 10, '..$^R after ab =~ ab? (2)' ); + +$^R = undef; +like( 'ab', qr/^(a(?{11})(?:b(?{12})))?/, 'ab =~ (ab)? (3)' ); +cmp_ok( $^R, '==', 12, '..$^R after ab =~ ab? (3)' ); + +$^R = undef; +unlike( 'ac', qr/^a(?{13})b(?{14})/, 'ac !~ ab' ); +ok( !defined $^R, '..$^R after ac !~ ab' ); + +$^R = undef; +like( 'ac', qr/^a(?{15})(?:b(?{16}))?/, 'ac =~ ab?' ); +cmp_ok( $^R, '==', 15, '..$^R after ac =~ ab?' ); + +my @ar; +like( 'ab', qr/^a(?{push @ar,101})(?:b(?{push @ar,102}))?/, 'ab =~ ab? with code push' ); +cmp_ok( scalar(@ar), '==', 2, '..@ar pushed' ); +cmp_ok( $ar[0], '==', 101, '..first element pushed' ); +cmp_ok( $ar[1], '==', 102, '..second element pushed' ); + +$^R = undef; +unlike( 'a', qr/^a(?{103})b(?{104})/, 'a !~ ab with code push' ); +ok( !defined $^R, '..$^R after a !~ ab with code push' ); + +@ar = (); +unlike( 'a', qr/^a(?{push @ar,105})b(?{push @ar,106})/, 'a !~ ab (push)' ); +cmp_ok( scalar(@ar), '==', 0, '..nothing pushed' ); + +@ar = (); +unlike( 'abc', qr/^a(?{push @ar,107})b(?{push @ar,108})$/, 'abc !~ ab$ (push)' ); +cmp_ok( scalar(@ar), '==', 0, '..still nothing pushed' ); + +use vars '@var'; + +like( 'ab', qr/^a(?{push @var,109})(?:b(?{push @var,110}))?/, 'ab =~ ab? push to package var' ); +cmp_ok( scalar(@var), '==', 2, '..@var pushed' ); +cmp_ok( $var[0], '==', 109, '..first element pushed (package)' ); +cmp_ok( $var[1], '==', 110, '..second element pushed (package)' ); + +@var = (); +unlike( 'a', qr/^a(?{push @var,111})b(?{push @var,112})/, 'a !~ ab (push package var)' ); +cmp_ok( scalar(@var), '==', 0, '..nothing pushed (package)' ); + +@var = (); +unlike( 'abc', qr/^a(?{push @var,113})b(?{push @var,114})$/, 'abc !~ ab$ (push package var)' ); +cmp_ok( scalar(@var), '==', 0, '..still nothing pushed (package)' ); + +{ + local $^R = undef; + ok( 'ac' =~ /^a(?{30})(?:b(?{31})|c(?{32}))?/, 'ac =~ a(?:b|c)?' ); + ok( $^R == 32, '$^R == 32' ); +} +{ + local $^R = undef; + ok( 'abbb' =~ /^a(?{36})(?:b(?{37})|c(?{38}))+/, 'abbbb =~ a(?:b|c)+' ); + ok( $^R == 37, '$^R == 37' ) or print "# \$^R=$^R\n"; +} diff --git a/t/re/subst.t b/t/re/subst.t new file mode 100644 index 0000000000..2f6e759287 --- /dev/null +++ b/t/re/subst.t @@ -0,0 +1,600 @@ +#!./perl -w + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; import Config; +} + +require './test.pl'; +plan( tests => 142 ); + +$x = 'foo'; +$_ = "x"; +s/x/\$x/; +ok( $_ eq '$x', ":$_: eq :\$x:" ); + +$_ = "x"; +s/x/$x/; +ok( $_ eq 'foo', ":$_: eq :foo:" ); + +$_ = "x"; +s/x/\$x $x/; +ok( $_ eq '$x foo', ":$_: eq :\$x foo:" ); + +$b = 'cd'; +($a = 'abcdef') =~ s<(b${b}e)>'\n$1'; +ok( $1 eq 'bcde' && $a eq 'a\n$1f', ":$1: eq :bcde: ; :$a: eq :a\\n\$1f:" ); + +$a = 'abacada'; +ok( ($a =~ s/a/x/g) == 4 && $a eq 'xbxcxdx' ); + +ok( ($a =~ s/a/y/g) == 0 && $a eq 'xbxcxdx' ); + +ok( ($a =~ s/b/y/g) == 1 && $a eq 'xyxcxdx' ); + +$_ = 'ABACADA'; +ok( /a/i && s///gi && $_ eq 'BCD' ); + +$_ = '\\' x 4; +ok( length($_) == 4 ); +$snum = s/\\/\\\\/g; +ok( $_ eq '\\' x 8 && $snum == 4 ); + +$_ = '\/' x 4; +ok( length($_) == 8 ); +$snum = s/\//\/\//g; +ok( $_ eq '\\//' x 4 && $snum == 4 ); +ok( length($_) == 12 ); + +$_ = 'aaaXXXXbbb'; +s/^a//; +ok( $_ eq 'aaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/a//; +ok( $_ eq 'aaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/^a/b/; +ok( $_ eq 'baaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/a/b/; +ok( $_ eq 'baaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/aa//; +ok( $_ eq 'aXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/aa/b/; +ok( $_ eq 'baXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/b$//; +ok( $_ eq 'aaaXXXXbb' ); + +$_ = 'aaaXXXXbbb'; +s/b//; +ok( $_ eq 'aaaXXXXbb' ); + +$_ = 'aaaXXXXbbb'; +s/bb//; +ok( $_ eq 'aaaXXXXb' ); + +$_ = 'aaaXXXXbbb'; +s/aX/y/; +ok( $_ eq 'aayXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +s/Xb/z/; +ok( $_ eq 'aaaXXXzbb' ); + +$_ = 'aaaXXXXbbb'; +s/aaX.*Xbb//; +ok( $_ eq 'ab' ); + +$_ = 'aaaXXXXbbb'; +s/bb/x/; +ok( $_ eq 'aaaXXXXxb' ); + +# now for some unoptimized versions of the same. + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/^a//; +ok( $_ eq 'aaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/a//; +ok( $_ eq 'aaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/^a/b/; +ok( $_ eq 'baaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/a/b/; +ok( $_ eq 'baaXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/aa//; +ok( $_ eq 'aXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/aa/b/; +ok( $_ eq 'baXXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/b$//; +ok( $_ eq 'aaaXXXXbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/b//; +ok( $_ eq 'aaaXXXXbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/bb//; +ok( $_ eq 'aaaXXXXb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/aX/y/; +ok( $_ eq 'aayXXXbbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/Xb/z/; +ok( $_ eq 'aaaXXXzbb' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/aaX.*Xbb//; +ok( $_ eq 'ab' ); + +$_ = 'aaaXXXXbbb'; +$x ne $x || s/bb/x/; +ok( $_ eq 'aaaXXXXxb' ); + +$_ = 'abc123xyz'; +s/(\d+)/$1*2/e; # yields 'abc246xyz' +ok( $_ eq 'abc246xyz' ); +s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc 246xyz' +ok( $_ eq 'abc 246xyz' ); +s/(\w)/$1 x 2/eg; # yields 'aabbcc 224466xxyyzz' +ok( $_ eq 'aabbcc 224466xxyyzz' ); + +$_ = "aaaaa"; +ok( y/a/b/ == 5 ); +ok( y/a/b/ == 0 ); +ok( y/b// == 5 ); +ok( y/b/c/s == 5 ); +ok( y/c// == 1 ); +ok( y/c//d == 1 ); +ok( $_ eq "" ); + +$_ = "Now is the %#*! time for all good men..."; +ok( ($x=(y/a-zA-Z //cd)) == 7 ); +ok( y/ / /s == 8 ); + +$_ = 'abcdefghijklmnopqrstuvwxyz0123456789'; +tr/a-z/A-Z/; + +ok( $_ eq 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' ); + +# same as tr/A-Z/a-z/; +if (defined $Config{ebcdic} && $Config{ebcdic} eq 'define') { # EBCDIC. + no utf8; + y[\301-\351][\201-\251]; +} else { # Ye Olde ASCII. Or something like it. + y[\101-\132][\141-\172]; +} + +ok( $_ eq 'abcdefghijklmnopqrstuvwxyz0123456789' ); + +SKIP: { + skip("not ASCII",1) unless (ord("+") == ord(",") - 1 + && ord(",") == ord("-") - 1 + && ord("a") == ord("b") - 1 + && ord("b") == ord("c") - 1); + $_ = '+,-'; + tr/+--/a-c/; + ok( $_ eq 'abc' ); +} + +$_ = '+,-'; +tr/+\--/a\/c/; +ok( $_ eq 'a,/' ); + +$_ = '+,-'; +tr/-+,/ab\-/; +ok( $_ eq 'b-a' ); + + +# test recursive substitutions +# code based on the recursive expansion of makefile variables + +my %MK = ( + AAAAA => '$(B)', B=>'$(C)', C => 'D', # long->short + E => '$(F)', F=>'p $(G) q', G => 'HHHHH', # short->long + DIR => '$(UNDEFINEDNAME)/xxx', +); +sub var { + my($var,$level) = @_; + return "\$($var)" unless exists $MK{$var}; + return exp_vars($MK{$var}, $level+1); # can recurse +} +sub exp_vars { + my($str,$level) = @_; + $str =~ s/\$\((\w+)\)/var($1, $level+1)/ge; # can recurse + #warn "exp_vars $level = '$str'\n"; + $str; +} + +ok( exp_vars('$(AAAAA)',0) eq 'D' ); +ok( exp_vars('$(E)',0) eq 'p HHHHH q' ); +ok( exp_vars('$(DIR)',0) eq '$(UNDEFINEDNAME)/xxx' ); +ok( exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar' ); + +$_ = "abcd"; +s/(..)/$x = $1, m#.#/eg; +ok( $x eq "cd", 'a match nested in the RHS of a substitution' ); + +# Subst and lookbehind + +$_="ccccc"; +$snum = s/(?<!x)c/x/g; +ok( $_ eq "xxxxx" && $snum == 5 ); + +$_="ccccc"; +$snum = s/(?<!x)(c)/x/g; +ok( $_ eq "xxxxx" && $snum == 5 ); + +$_="foobbarfoobbar"; +$snum = s/(?<!r)foobbar/foobar/g; +ok( $_ eq "foobarfoobbar" && $snum == 1 ); + +$_="foobbarfoobbar"; +$snum = s/(?<!ar)(foobbar)/foobar/g; +ok( $_ eq "foobarfoobbar" && $snum == 1 ); + +$_="foobbarfoobbar"; +$snum = s/(?<!ar)foobbar/foobar/g; +ok( $_ eq "foobarfoobbar" && $snum == 1 ); + +eval 's{foo} # this is a comment, not a delimiter + {bar};'; +ok( ! @?, 'parsing of split subst with comment' ); + +$snum = eval '$_="exactly"; s sxsys;m 3(yactl)3;$1'; +is( $snum, 'yactl', 'alpha delimiters are allowed' ); + +$_="baacbaa"; +$snum = tr/a/b/s; +ok( $_ eq "bbcbb" && $snum == 4, + 'check if squashing works at the end of string' ); + +$_ = "ab"; +ok( s/a/b/ == 1 ); + +$_ = <<'EOL'; + $url = new URI::URL "http://www/"; die if $url eq "xXx"; +EOL +$^R = 'junk'; + +$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' . + ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' . + ' lowercase $@%#MiXeD$@%# '; + +$snum = +s{ \d+ \b [,.;]? (?{ 'digits' }) + | + [a-z]+ \b [,.;]? (?{ 'lowercase' }) + | + [A-Z]+ \b [,.;]? (?{ 'UPPERCASE' }) + | + [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' }) + | + [A-Za-z]+ \b [,.;]? (?{ 'MiXeD' }) + | + [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' }) + | + \s+ (?{ ' ' }) + | + [^A-Za-z0-9\s]+ (?{ '$@%#' }) +}{$^R}xg; +ok( $_ eq $foo ); +ok( $snum == 31 ); + +$_ = 'a' x 6; +$snum = s/a(?{})//g; +ok( $_ eq '' && $snum == 6 ); + +$_ = 'x' x 20; +$snum = s/(\d*|x)/<$1>/g; +$foo = '<>' . ('<x><>' x 20) ; +ok( $_ eq $foo && $snum == 41 ); + +$t = 'aaaaaaaaa'; + +$_ = $t; +pos = 6; +$snum = s/\Ga/xx/g; +ok( $_ eq 'aaaaaaxxxxxx' && $snum == 3 ); + +$_ = $t; +pos = 6; +$snum = s/\Ga/x/g; +ok( $_ eq 'aaaaaaxxx' && $snum == 3 ); + +$_ = $t; +pos = 6; +s/\Ga/xx/; +ok( $_ eq 'aaaaaaxxaa' ); + +$_ = $t; +pos = 6; +s/\Ga/x/; +ok( $_ eq 'aaaaaaxaa' ); + +$_ = $t; +$snum = s/\Ga/xx/g; +ok( $_ eq 'xxxxxxxxxxxxxxxxxx' && $snum == 9 ); + +$_ = $t; +$snum = s/\Ga/x/g; +ok( $_ eq 'xxxxxxxxx' && $snum == 9 ); + +$_ = $t; +s/\Ga/xx/; +ok( $_ eq 'xxaaaaaaaa' ); + +$_ = $t; +s/\Ga/x/; +ok( $_ eq 'xaaaaaaaa' ); + +$_ = 'aaaa'; +$snum = s/\ba/./g; +ok( $_ eq '.aaa' && $snum == 1 ); + +eval q% s/a/"b"}/e %; +ok( $@ =~ /Bad evalled substitution/ ); +eval q% ($_ = "x") =~ s/(.)/"$1 "/e %; +ok( $_ eq "x " and !length $@ ); +$x = $x = 'interp'; +eval q% ($_ = "x") =~ s/x(($x)*)/"$1"/e %; +ok( $_ eq '' and !length $@ ); + +$_ = "C:/"; +ok( !s/^([a-z]:)/\u$1/ ); + +$_ = "Charles Bronson"; +$snum = s/\B\w//g; +ok( $_ eq "C B" && $snum == 12 ); + +{ + use utf8; + my $s = "H\303\266he"; + my $l = my $r = $s; + $l =~ s/[^\w]//g; + $r =~ s/[^\w\.]//g; + is($l, $r, "use utf8 \\w"); +} + +my $pv1 = my $pv2 = "Andreas J. K\303\266nig"; +$pv1 =~ s/A/\x{100}/; +substr($pv2,0,1) = "\x{100}"; +is($pv1, $pv2); + +SKIP: { + skip("EBCDIC", 3) if ord("A") == 193; + + { + # Gregor Chrupala <gregor.chrupala@star-group.net> + use utf8; + $a = 'España'; + $a =~ s/ñ/ñ/; + like($a, qr/ñ/, "use utf8 RHS"); + } + + { + use utf8; + $a = 'España España'; + $a =~ s/ñ/ñ/; + like($a, qr/ñ/, "use utf8 LHS"); + } + + { + use utf8; + $a = 'España'; + $a =~ s/ñ/ñ/; + like($a, qr/ñ/, "use utf8 LHS and RHS"); + } +} + +{ + # SADAHIRO Tomoyuki <bqw10602@nifty.com> + + $a = "\x{100}\x{101}"; + $a =~ s/\x{101}/\xFF/; + like($a, qr/\xFF/); + is(length($a), 2, "SADAHIRO utf8 s///"); + + $a = "\x{100}\x{101}"; + $a =~ s/\x{101}/"\xFF"/e; + like($a, qr/\xFF/); + is(length($a), 2); + + $a = "\x{100}\x{101}"; + $a =~ s/\x{101}/\xFF\xFF\xFF/; + like($a, qr/\xFF\xFF\xFF/); + is(length($a), 4); + + $a = "\x{100}\x{101}"; + $a =~ s/\x{101}/"\xFF\xFF\xFF"/e; + like($a, qr/\xFF\xFF\xFF/); + is(length($a), 4); + + $a = "\xFF\x{101}"; + $a =~ s/\xFF/\x{100}/; + like($a, qr/\x{100}/); + is(length($a), 2); + + $a = "\xFF\x{101}"; + $a =~ s/\xFF/"\x{100}"/e; + like($a, qr/\x{100}/); + is(length($a), 2); + + $a = "\xFF"; + $a =~ s/\xFF/\x{100}/; + like($a, qr/\x{100}/); + is(length($a), 1); + + $a = "\xFF"; + $a =~ s/\xFF/"\x{100}"/e; + like($a, qr/\x{100}/); + is(length($a), 1); +} + +{ + # subst with mixed utf8/non-utf8 type + my($ua, $ub, $uc, $ud) = ("\x{101}", "\x{102}", "\x{103}", "\x{104}"); + my($na, $nb) = ("\x{ff}", "\x{fe}"); + my $a = "$ua--$ub"; + my $b; + ($b = $a) =~ s/--/$na/; + is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8"); + ($b = $a) =~ s/--/--$na--/; + is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8"); + ($b = $a) =~ s/--/$uc/; + is($b, "$ua$uc$ub", "s///: replace utf8 into utf8"); + ($b = $a) =~ s/--/--$uc--/; + is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8"); + $a = "$na--$nb"; + ($b = $a) =~ s/--/$ua/; + is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8"); + ($b = $a) =~ s/--/--$ua--/; + is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8"); + + # now with utf8 pattern + $a = "$ua--$ub"; + ($b = $a) =~ s/-($ud)?-/$na/; + is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8 (utf8 pattern)"); + ($b = $a) =~ s/-($ud)?-/--$na--/; + is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8 (utf8 pattern)"); + ($b = $a) =~ s/-($ud)?-/$uc/; + is($b, "$ua$uc$ub", "s///: replace utf8 into utf8 (utf8 pattern)"); + ($b = $a) =~ s/-($ud)?-/--$uc--/; + is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8 (utf8 pattern)"); + $a = "$na--$nb"; + ($b = $a) =~ s/-($ud)?-/$ua/; + is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8 (utf8 pattern)"); + ($b = $a) =~ s/-($ud)?-/--$ua--/; + is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8 (utf8 pattern)"); + ($b = $a) =~ s/-($ud)?-/$na/; + is($b, "$na$na$nb", "s///: replace non-utf8 into non-utf8 (utf8 pattern)"); + ($b = $a) =~ s/-($ud)?-/--$na--/; + is($b, "$na--$na--$nb", "s///: replace long non-utf8 into non-utf8 (utf8 pattern)"); +} + +$_ = 'aaaa'; +$r = 'x'; +$s = s/a(?{})/$r/g; +is("<$_> <$s>", "<xxxx> <4>", "[perl #7806]"); + +$_ = 'aaaa'; +$s = s/a(?{})//g; +is("<$_> <$s>", "<> <4>", "[perl #7806]"); + +# [perl #19048] Coredump in silly replacement +{ + local $^W = 0; + $_="abcdef\n"; + s!.!!eg; + is($_, "\n", "[perl #19048]"); +} + +# [perl #17757] interaction between saw_ampersand and study +{ + my $f = eval q{ $& }; + $f = "xx"; + study $f; + $f =~ s/x/y/g; + is($f, "yy", "[perl #17757]"); +} + +# [perl #20684] returned a zero count +$_ = "1111"; +is(s/(??{1})/2/eg, 4, '#20684 s/// with (??{..}) inside'); + +# [perl #20682] @- not visible in replacement +$_ = "123"; +/(2)/; # seed @- with something else +s/(1)(2)(3)/$#- (@-)/; +is($_, "3 (0 0 1 2)", '#20682 @- not visible in replacement'); + +# [perl #20682] $^N not visible in replacement +$_ = "abc"; +/(a)/; s/(b)|(c)/-$^N/g; +is($_,'a-b-c','#20682 $^N not visible in replacement'); + +# [perl #22351] perl bug with 'e' substitution modifier +my $name = "chris"; +{ + no warnings 'uninitialized'; + $name =~ s/hr//e; +} +is($name, "cis", q[#22351 bug with 'e' substitution modifier]); + + +# [perl #34171] $1 didn't honour 'use bytes' in s//e +{ + my $s="\x{100}"; + my $x; + { + use bytes; + $s=~ s/(..)/$x=$1/e + } + is(length($x), 2, '[perl #34171]'); +} + + +{ # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not + my $c; + + ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g; + is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g"); + + ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g; + is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g"); +} +{ + $_ = "xy"; + no warnings 'uninitialized'; + /(((((((((x)))))))))(z)/; # clear $10 + s/(((((((((x)))))))))(y)/${10}/; + is($_,"y","RT#6006: \$_ eq '$_'"); + $_ = "xr"; + s/(((((((((x)))))))))(r)/fooba${10}/; + is($_,"foobar","RT#6006: \$_ eq '$_'"); +} +{ + my $want=("\n" x 11).("B\n" x 11)."B"; + $_="B"; + our $i; + for $i(1..11){ + s/^.*$/$&/gm; + $_="\n$_\n$&"; + } + is($want,$_,"RT#17542"); +} + +{ + my @tests = ('ABC', "\xA3\xA4\xA5", "\x{410}\x{411}\x{412}"); + foreach (@tests) { + my $id = ord $_; + s/./pos/ge; + is($_, "012", "RT#52104: $id"); + } +} + +fresh_perl_is( '$_=q(foo);s/(.)\G//g;print' => 'foo', '[perl #69056] positive GPOS regex segfault' ); +fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'aXX[c-e][e-f]f', 'positive GPOS regex substitution failure' ); + diff --git a/t/re/substT.t b/t/re/substT.t new file mode 100644 index 0000000000..22c3c504ef --- /dev/null +++ b/t/re/substT.t @@ -0,0 +1,9 @@ +#!perl -wT + +for $file ('re/subst.t', 't/re/subst.t', ':re:subst.t') { + if (-r $file) { + do "./$file"; + exit; + } +} +die "Cannot find re/subst.t or t/re/subst.t\n"; diff --git a/t/re/subst_amp.t b/t/re/subst_amp.t new file mode 100644 index 0000000000..71895720f7 --- /dev/null +++ b/t/re/subst_amp.t @@ -0,0 +1,104 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; import Config; +} + +print "1..13\n"; + +$_ = 'x' x 20; +s/\d*|x/<$&>/g; +$foo = '<>' . ('<x><>' x 20) ; +print ($_ eq $foo ? "ok 1\n" : "not ok 1\n#'$_'\n#'$foo'\n"); + +$t = 'aaa'; + +$_ = $t; +@res = (); +pos = 1; +s/\Ga(?{push @res, $_, $`})/xx/g; +print "not " unless "$_ @res" eq 'axxxx aaa a aaa aa'; +print "ok 2\n"; + +$_ = $t; +@res = (); +pos = 1; +s/\Ga(?{push @res, $_, $`})/x/g; +print "not " unless "$_ @res" eq 'axx aaa a aaa aa'; +print "ok 3\n"; + +$_ = $t; +@res = (); +pos = 1; +s/\Ga(?{push @res, $_, $`})/xx/; +print "not " unless "$_ @res" eq 'axxa aaa a'; +print "ok 4\n"; + +$_ = $t; +@res = (); +pos = 1; +s/\Ga(?{push @res, $_, $`})/x/; +print "not " unless "$_ @res" eq 'axa aaa a'; +print "ok 5\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/xx/g; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axxxx aaa a aaa aa'; +print "ok 6\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/x/g; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axx aaa a aaa aa'; +print "ok 7\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/xx/; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axxa aaa a'; +print "ok 8\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/x/; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axa aaa a'; +print "ok 9\n"; + +sub x2 {'xx'} +sub x1 {'x'} + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/x2/ge; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axxxx aaa a aaa aa'; +print "ok 10\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/x1/ge; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axx aaa a aaa aa'; +print "ok 11\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/x2/e; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axxa aaa a'; +print "ok 12\n"; + +$a = $t; +@res = (); +pos ($a) = 1; +$a =~ s/\Ga(?{push @res, $_, $`})/x1/e; +print "#'$a' '@res'\nnot " unless "$a @res" eq 'axa aaa a'; +print "ok 13\n"; + diff --git a/t/re/subst_wamp.t b/t/re/subst_wamp.t new file mode 100644 index 0000000000..286141cfcd --- /dev/null +++ b/t/re/subst_wamp.t @@ -0,0 +1,11 @@ +#!./perl + +$dummy = defined $&; # Now we have it... +for $file ('re/subst.t', 't/re/subst.t', ':re:subst.t') { + if (-r $file) { + do "./$file"; + exit; + } +} +die "Cannot find re/subst.t or t/re/subst.t\n"; + diff --git a/t/re/substr.t b/t/re/substr.t new file mode 100644 index 0000000000..c3fa6e10e7 --- /dev/null +++ b/t/re/substr.t @@ -0,0 +1,685 @@ +#!./perl + +#P = start of string Q = start of substr R = end of substr S = end of string + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} +use warnings ; +no warnings 'deprecated'; + +$a = 'abcdefxyz'; +$SIG{__WARN__} = sub { + if ($_[0] =~ /^substr outside of string/) { + $w++; + } elsif ($_[0] =~ /^Attempt to use reference as lvalue in substr/) { + $w += 2; + } elsif ($_[0] =~ /^Use of uninitialized value/) { + $w += 3; + } else { + warn $_[0]; + } +}; + +require './test.pl'; + +plan(334); + +run_tests() unless caller; + +my $krunch = "a"; + +sub run_tests { + +$FATAL_MSG = qr/^substr outside of string/; + +is(substr($a,0,3), 'abc'); # P=Q R S +is(substr($a,3,3), 'def'); # P Q R S +is(substr($a,6,999), 'xyz'); # P Q S R +$b = substr($a,999,999) ; # warn # P R Q S +is ($w--, 1); +eval{substr($a,999,999) = "" ; };# P R Q S +like ($@, $FATAL_MSG); +is(substr($a,0,-6), 'abc'); # P=Q R S +is(substr($a,-3,1), 'x'); # P Q R S + +$[ = 1; + +is(substr($a,1,3), 'abc' ); # P=Q R S +is(substr($a,4,3), 'def' ); # P Q R S +is(substr($a,7,999), 'xyz');# P Q S R +$b = substr($a,999,999) ; # warn # P R Q S +is($w--, 1); +eval{substr($a,999,999) = "" ; } ; # P R Q S +like ($@, $FATAL_MSG); +is(substr($a,1,-6), 'abc' );# P=Q R S +is(substr($a,-3,1), 'x' ); # P Q R S + +$[ = 0; + +substr($a,3,3) = 'XYZ'; +is($a, 'abcXYZxyz' ); +substr($a,0,2) = ''; +is($a, 'cXYZxyz' ); +substr($a,0,0) = 'ab'; +is($a, 'abcXYZxyz' ); +substr($a,0,0) = '12345678'; +is($a, '12345678abcXYZxyz' ); +substr($a,-3,3) = 'def'; +is($a, '12345678abcXYZdef'); +substr($a,-3,3) = '<'; +is($a, '12345678abcXYZ<' ); +substr($a,-1,1) = '12345678'; +is($a, '12345678abcXYZ12345678' ); + +$a = 'abcdefxyz'; + +is(substr($a,6), 'xyz' ); # P Q R=S +is(substr($a,-3), 'xyz' ); # P Q R=S +$b = substr($a,999,999) ; # warning # P R=S Q +is($w--, 1); +eval{substr($a,999,999) = "" ; } ; # P R=S Q +like($@, $FATAL_MSG); +is(substr($a,0), 'abcdefxyz'); # P=Q R=S +is(substr($a,9), ''); # P Q=R=S +is(substr($a,-11), 'abcdefxyz'); # Q P R=S +is(substr($a,-9), 'abcdefxyz'); # P=Q R=S + +$a = '54321'; + +$b = substr($a,-7, 1) ; # warn # Q R P S +is($w--, 1); +eval{substr($a,-7, 1) = "" ; }; # Q R P S +like($@, $FATAL_MSG); +$b = substr($a,-7,-6) ; # warn # Q R P S +is($w--, 1); +eval{substr($a,-7,-6) = "" ; }; # Q R P S +like($@, $FATAL_MSG); +is(substr($a,-5,-7), ''); # R P=Q S +is(substr($a, 2,-7), ''); # R P Q S +is(substr($a,-3,-7), ''); # R P Q S +is(substr($a, 2,-5), ''); # P=R Q S +is(substr($a,-3,-5), ''); # P=R Q S +is(substr($a, 2,-4), ''); # P R Q S +is(substr($a,-3,-4), ''); # P R Q S +is(substr($a, 5,-6), ''); # R P Q=S +is(substr($a, 5,-5), ''); # P=R Q S +is(substr($a, 5,-3), ''); # P R Q=S +$b = substr($a, 7,-7) ; # warn # R P S Q +is($w--, 1); +eval{substr($a, 7,-7) = "" ; }; # R P S Q +like($@, $FATAL_MSG); +$b = substr($a, 7,-5) ; # warn # P=R S Q +is($w--, 1); +eval{substr($a, 7,-5) = "" ; }; # P=R S Q +like($@, $FATAL_MSG); +$b = substr($a, 7,-3) ; # warn # P Q S Q +is($w--, 1); +eval{substr($a, 7,-3) = "" ; }; # P Q S Q +like($@, $FATAL_MSG); +$b = substr($a, 7, 0) ; # warn # P S Q=R +is($w--, 1); +eval{substr($a, 7, 0) = "" ; }; # P S Q=R +like($@, $FATAL_MSG); + +is(substr($a,-7,2), ''); # Q P=R S +is(substr($a,-7,4), '54'); # Q P R S +is(substr($a,-7,7), '54321');# Q P R=S +is(substr($a,-7,9), '54321');# Q P S R +is(substr($a,-5,0), ''); # P=Q=R S +is(substr($a,-5,3), '543');# P=Q R S +is(substr($a,-5,5), '54321');# P=Q R=S +is(substr($a,-5,7), '54321');# P=Q S R +is(substr($a,-3,0), ''); # P Q=R S +is(substr($a,-3,3), '321');# P Q R=S +is(substr($a,-2,3), '21'); # P Q S R +is(substr($a,0,-5), ''); # P=Q=R S +is(substr($a,2,-3), ''); # P Q=R S +is(substr($a,0,0), ''); # P=Q=R S +is(substr($a,0,5), '54321');# P=Q R=S +is(substr($a,0,7), '54321');# P=Q S R +is(substr($a,2,0), ''); # P Q=R S +is(substr($a,2,3), '321'); # P Q R=S +is(substr($a,5,0), ''); # P Q=R=S +is(substr($a,5,2), ''); # P Q=S R +is(substr($a,-7,-5), ''); # Q P=R S +is(substr($a,-7,-2), '543');# Q P R S +is(substr($a,-5,-5), ''); # P=Q=R S +is(substr($a,-5,-2), '543');# P=Q R S +is(substr($a,-3,-3), ''); # P Q=R S +is(substr($a,-3,-1), '32');# P Q R S + +$a = ''; + +is(substr($a,-2,2), ''); # Q P=R=S +is(substr($a,0,0), ''); # P=Q=R=S +is(substr($a,0,1), ''); # P=Q=S R +is(substr($a,-2,3), ''); # Q P=S R +is(substr($a,-2), ''); # Q P=R=S +is(substr($a,0), ''); # P=Q=R=S + + +is(substr($a,0,-1), ''); # R P=Q=S +$b = substr($a,-2, 0) ; # warn # Q=R P=S +is($w--, 1); +eval{substr($a,-2, 0) = "" ; }; # Q=R P=S +like($@, $FATAL_MSG); + +$b = substr($a,-2, 1) ; # warn # Q R P=S +is($w--, 1); +eval{substr($a,-2, 1) = "" ; }; # Q R P=S +like($@, $FATAL_MSG); + +$b = substr($a,-2,-1) ; # warn # Q R P=S +is($w--, 1); +eval{substr($a,-2,-1) = "" ; }; # Q R P=S +like($@, $FATAL_MSG); + +$b = substr($a,-2,-2) ; # warn # Q=R P=S +is($w--, 1); +eval{substr($a,-2,-2) = "" ; }; # Q=R P=S +like($@, $FATAL_MSG); + +$b = substr($a, 1,-2) ; # warn # R P=S Q +is($w--, 1); +eval{substr($a, 1,-2) = "" ; }; # R P=S Q +like($@, $FATAL_MSG); + +$b = substr($a, 1, 1) ; # warn # P=S Q R +is($w--, 1); +eval{substr($a, 1, 1) = "" ; }; # P=S Q R +like($@, $FATAL_MSG); + +$b = substr($a, 1, 0) ;# warn # P=S Q=R +is($w--, 1); +eval{substr($a, 1, 0) = "" ; }; # P=S Q=R +like($@, $FATAL_MSG); + +$b = substr($a,1) ; # warning # P=R=S Q +is($w--, 1); +eval{substr($a,1) = "" ; }; # P=R=S Q +like($@, $FATAL_MSG); + +my $a = 'zxcvbnm'; +substr($a,2,0) = ''; +is($a, 'zxcvbnm'); +substr($a,7,0) = ''; +is($a, 'zxcvbnm'); +substr($a,5,0) = ''; +is($a, 'zxcvbnm'); +substr($a,0,2) = 'pq'; +is($a, 'pqcvbnm'); +substr($a,2,0) = 'r'; +is($a, 'pqrcvbnm'); +substr($a,8,0) = 'asd'; +is($a, 'pqrcvbnmasd'); +substr($a,0,2) = 'iop'; +is($a, 'ioprcvbnmasd'); +substr($a,0,5) = 'fgh'; +is($a, 'fghvbnmasd'); +substr($a,3,5) = 'jkl'; +is($a, 'fghjklsd'); +substr($a,3,2) = '1234'; +is($a, 'fgh1234lsd'); + + +# with lexicals (and in re-entered scopes) +for (0,1) { + my $txt; + unless ($_) { + $txt = "Foo"; + substr($txt, -1) = "X"; + is($txt, "FoX"); + } + else { + substr($txt, 0, 1) = "X"; + is($txt, "X"); + } +} + +$w = 0 ; +# coercion of references +{ + my $s = []; + substr($s, 0, 1) = 'Foo'; + is (substr($s,0,7), "FooRRAY"); + is ($w,2); + $w = 0; +} + +# check no spurious warnings +is($w, 0); + +# check new 4 arg replacement syntax +$a = "abcxyz"; +$w = 0; +is(substr($a, 0, 3, ""), "abc"); +is($a, "xyz"); +is(substr($a, 0, 0, "abc"), ""); +is($a, "abcxyz"); +is(substr($a, 3, -1, ""), "xy"); +is($a, "abcz"); + +is(substr($a, 3, undef, "xy"), ""); +is($a, "abcxyz"); +is($w, 3); + +$w = 0; + +is(substr($a, 3, 9999999, ""), "xyz"); +is($a, "abc"); +eval{substr($a, -99, 0, "") }; +like($@, $FATAL_MSG); +eval{substr($a, 99, 3, "") }; +like($@, $FATAL_MSG); + +substr($a, 0, length($a), "foo"); +is ($a, "foo"); +is ($w, 0); + +# using 4 arg substr as lvalue is a compile time error +eval 'substr($a,0,0,"") = "abc"'; +like ($@, qr/Can't modify substr/); +is ($a, "foo"); + +$a = "abcdefgh"; +is(sub { shift }->(substr($a, 0, 4, "xxxx")), 'abcd'); +is($a, 'xxxxefgh'); + +{ + my $y = 10; + $y = "2" . $y; + is ($y, 210); +} + +# utf8 sanity +{ + my $x = substr("a\x{263a}b",0); + is(length($x), 3); + $x = substr($x,1,1); + is($x, "\x{263a}"); + $x = $x x 2; + is(length($x), 2); + substr($x,0,1) = "abcd"; + is($x, "abcd\x{263a}"); + is(length($x), 5); + $x = reverse $x; + is(length($x), 5); + is($x, "\x{263a}dcba"); + + my $z = 10; + $z = "21\x{263a}" . $z; + is(length($z), 5); + is($z, "21\x{263a}10"); +} + +# replacement should work on magical values +require Tie::Scalar; +my %data; +tie $data{'a'}, 'Tie::StdScalar'; # makes $data{'a'} magical +$data{a} = "firstlast"; +is(substr($data{'a'}, 0, 5, ""), "first"); +is($data{'a'}, "last"); + +# more utf8 + +# The following two originally from Ignasi Roca. + +$x = "\xF1\xF2\xF3"; +substr($x, 0, 1) = "\x{100}"; # Ignasi had \x{FF} +is(length($x), 3); +is($x, "\x{100}\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 0, 1) = "\x{100}\x{FF}"; # Ignasi had \x{FF} +is(length($x), 4); +is($x, "\x{100}\x{FF}\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F2}"); +is(substr($x, 3, 1), "\x{F3}"); + +# more utf8 lval exercise + +$x = "\xF1\xF2\xF3"; +substr($x, 0, 2) = "\x{100}\xFF"; +is(length($x), 3); +is($x, "\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 1, 1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\xF1\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{100}"); +is(substr($x, 2, 1), "\x{FF}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 2, 1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\xF1\xF2\x{100}\xFF"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 3, 1) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\xF1\xF2\xF3\x{100}\xFF"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{F3}"); +is(substr($x, 3, 1), "\x{100}"); +is(substr($x, 4, 1), "\x{FF}"); + +$x = "\xF1\xF2\xF3"; +substr($x, -1, 1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\xF1\xF2\x{100}\xFF"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); + +$x = "\xF1\xF2\xF3"; +substr($x, -1, 0) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\xF1\xF2\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); +is(substr($x, 4, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 0, -1) = "\x{100}\xFF"; +is(length($x), 3); +is($x, "\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 0, -2) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\x{100}\xFF\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F2}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 0, -3) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\x{100}\xFF\xF1\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F1}"); +is(substr($x, 3, 1), "\x{F2}"); +is(substr($x, 4, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, 1, -1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\xF1\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{100}"); +is(substr($x, 2, 1), "\x{FF}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\xF1\xF2\xF3"; +substr($x, -1, -1) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\xF1\xF2\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{F1}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); +is(substr($x, 4, 1), "\x{F3}"); + +# And tests for already-UTF8 one + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 0, 1) = "\x{100}"; +is(length($x), 3); +is($x, "\x{100}\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 0, 1) = "\x{100}\x{FF}"; +is(length($x), 4); +is($x, "\x{100}\x{FF}\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F2}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 0, 2) = "\x{100}\xFF"; +is(length($x), 3); +is($x, "\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 1, 1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\x{101}\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{100}"); +is(substr($x, 2, 1), "\x{FF}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 2, 1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\x{101}\xF2\x{100}\xFF"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 3, 1) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\x{101}\x{F2}\x{F3}\x{100}\xFF"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{F3}"); +is(substr($x, 3, 1), "\x{100}"); +is(substr($x, 4, 1), "\x{FF}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, -1, 1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\x{101}\xF2\x{100}\xFF"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, -1, 0) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\x{101}\xF2\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); +is(substr($x, 4, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 0, -1) = "\x{100}\xFF"; +is(length($x), 3); +is($x, "\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 0, -2) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\x{100}\xFF\xF2\xF3"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{F2}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 0, -3) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\x{100}\xFF\x{101}\x{F2}\x{F3}"); +is(substr($x, 0, 1), "\x{100}"); +is(substr($x, 1, 1), "\x{FF}"); +is(substr($x, 2, 1), "\x{101}"); +is(substr($x, 3, 1), "\x{F2}"); +is(substr($x, 4, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, 1, -1) = "\x{100}\xFF"; +is(length($x), 4); +is($x, "\x{101}\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{100}"); +is(substr($x, 2, 1), "\x{FF}"); +is(substr($x, 3, 1), "\x{F3}"); + +$x = "\x{101}\x{F2}\x{F3}"; +substr($x, -1, -1) = "\x{100}\xFF"; +is(length($x), 5); +is($x, "\x{101}\xF2\x{100}\xFF\xF3"); +is(substr($x, 0, 1), "\x{101}"); +is(substr($x, 1, 1), "\x{F2}"); +is(substr($x, 2, 1), "\x{100}"); +is(substr($x, 3, 1), "\x{FF}"); +is(substr($x, 4, 1), "\x{F3}"); + +substr($x = "ab", 0, 0, "\x{100}\x{200}"); +is($x, "\x{100}\x{200}ab"); + +substr($x = "\x{100}\x{200}", 0, 0, "ab"); +is($x, "ab\x{100}\x{200}"); + +substr($x = "ab", 1, 0, "\x{100}\x{200}"); +is($x, "a\x{100}\x{200}b"); + +substr($x = "\x{100}\x{200}", 1, 0, "ab"); +is($x, "\x{100}ab\x{200}"); + +substr($x = "ab", 2, 0, "\x{100}\x{200}"); +is($x, "ab\x{100}\x{200}"); + +substr($x = "\x{100}\x{200}", 2, 0, "ab"); +is($x, "\x{100}\x{200}ab"); + +substr($x = "\xFFb", 0, 0, "\x{100}\x{200}"); +is($x, "\x{100}\x{200}\xFFb"); + +substr($x = "\x{100}\x{200}", 0, 0, "\xFFb"); +is($x, "\xFFb\x{100}\x{200}"); + +substr($x = "\xFFb", 1, 0, "\x{100}\x{200}"); +is($x, "\xFF\x{100}\x{200}b"); + +substr($x = "\x{100}\x{200}", 1, 0, "\xFFb"); +is($x, "\x{100}\xFFb\x{200}"); + +substr($x = "\xFFb", 2, 0, "\x{100}\x{200}"); +is($x, "\xFFb\x{100}\x{200}"); + +substr($x = "\x{100}\x{200}", 2, 0, "\xFFb"); +is($x, "\x{100}\x{200}\xFFb"); + +# [perl #20933] +{ + my $s = "ab"; + my @r; + $r[$_] = \ substr $s, $_, 1 for (0, 1); + is(join("", map { $$_ } @r), "ab"); +} + +# [perl #23207] +{ + sub ss { + substr($_[0],0,1) ^= substr($_[0],1,1) ^= + substr($_[0],0,1) ^= substr($_[0],1,1); + } + my $x = my $y = 'AB'; ss $x; ss $y; + is($x, $y); +} + +# [perl #24605] +{ + my $x = "0123456789\x{500}"; + my $y = substr $x, 4; + is(substr($x, 7, 1), "7"); +} + +# multiple assignments to lvalue [perl #24346] +{ + my $x = "abcdef"; + for (substr($x,1,3)) { + is($_, 'bcd'); + $_ = 'XX'; + is($_, 'XX'); + is($x, 'aXXef'); + $_ = "\xFF"; + is($_, "\xFF"); + is($x, "a\xFFef"); + $_ = "\xF1\xF2\xF3\xF4\xF5\xF6"; + is($_, "\xF1\xF2\xF3\xF4\xF5\xF6"); + is($x, "a\xF1\xF2\xF3\xF4\xF5\xF6ef"); + $_ = 'YYYY'; + is($_, 'YYYY'); + is($x, 'aYYYYef'); + } +} + +# [perl #24200] string corruption with lvalue sub + +{ + sub bar: lvalue { substr $krunch, 0 } + bar = "XXX"; + is(bar, 'XXX'); + $krunch = '123456789'; + is(bar, '123456789'); +} + +# [perl #29149] +{ + my $text = "0123456789\xED "; + utf8::upgrade($text); + my $pos = 5; + pos($text) = $pos; + my $a = substr($text, $pos, $pos); + is(substr($text,$pos,1), $pos); + +} + +# [perl #23765] +{ + my $a = pack("C", 0xbf); + substr($a, -1) &= chr(0xfeff); + is($a, "\xbf"); +} + +# [perl #34976] incorrect caching of utf8 substr length +{ + my $a = "abcd\x{100}"; + is(substr($a,1,2), 'bc'); + is(substr($a,1,1), 'b'); +} + +} diff --git a/t/re/substr_thr.t b/t/re/substr_thr.t new file mode 100644 index 0000000000..babb48d8ec --- /dev/null +++ b/t/re/substr_thr.t @@ -0,0 +1,7 @@ +#!./perl + +chdir 't' if -d 't'; +@INC = ('../lib', '.'); + +require 'thread_it.pl'; +thread_it(qw(op substr.t)); |