diff options
author | Ilya Zakharevich <ilya@math.berkeley.edu> | 1997-11-15 19:29:39 -0500 |
---|---|---|
committer | Malcolm Beattie <mbeattie@sable.ox.ac.uk> | 1997-11-19 11:04:15 +0000 |
commit | c277df42229d99fecbc76f5da53793a409ac66e1 (patch) | |
tree | de3cf73b51d3455f54655dc5b9fdaa68e3da9a7a /t | |
parent | 5d5aaa5e70a8a8ab4803cdb506e2096b6e190e80 (diff) | |
download | perl-c277df42229d99fecbc76f5da53793a409ac66e1.tar.gz |
Jumbo regexp patch applied (with minor fix-up tweaks):
Subject: Version 7 of Jumbo RE patch available
p4raw-id: //depot/perl@267
Diffstat (limited to 't')
-rwxr-xr-x | t/op/misc.t | 10 | ||||
-rwxr-xr-x | t/op/pat.t | 113 | ||||
-rw-r--r-- | t/op/re_tests | 133 | ||||
-rwxr-xr-x | t/op/regexp.t | 25 | ||||
-rwxr-xr-x | t/op/split.t | 23 | ||||
-rwxr-xr-x | t/op/subst.t | 34 |
6 files changed, 320 insertions, 18 deletions
diff --git a/t/op/misc.t b/t/op/misc.t index 5a61acd55d..c529830123 100755 --- a/t/op/misc.t +++ b/t/op/misc.t @@ -335,3 +335,13 @@ print "eat flaming death\n" unless ($s == 7); sub foo { local $_ = shift; split; @_ } @x = foo(' x y z '); print "you die joe!\n" unless "@x" eq 'x y z'; +######## +/(?{"{"})/ # Check it outside of eval too +EXPECT +/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1. +######## +/(?{"{"}})/ # Check it outside of eval too +EXPECT +Unmatched right bracket at (re_eval 1) line 1, at end of line +syntax error at (re_eval 1) line 1, near ""{"}" +Compilation failed in regexp at - line 1. diff --git a/t/op/pat.t b/t/op/pat.t index 0478911db3..03af1227ca 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -2,7 +2,7 @@ # $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $ -print "1..62\n"; +print "1..97\n"; $x = "abc\ndef\n"; @@ -217,3 +217,114 @@ print "ok 61\n"; /\Gc/g; print "not " if defined pos $_; print "ok 62\n"; + +$out = 1; +'abc' =~ m'a(?{ $out = 2 })b'; +print "not " if $out != 2; +print "ok 63\n"; + +$out = 1; +'abc' =~ m'a(?{ $out = 3 })c'; +print "not " if $out != 1; +print "ok 64\n"; + +$_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; +@out = /(?<!foo)bar./g; +print "not " if "@out" ne 'bar2 barf'; +print "ok 65\n"; + +# Long Monsters +$test = 66; +for $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory + $a = 'a' x $l; + print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/; + print "ok $test\n"; + $test++; + + print "not " if "b$a=" =~ /a$a=/; + print "ok $test\n"; + $test++; +} + +# 20000 nodes, each taking 3 words per string, and 1 per branch +$long_constant_len = join '|', 12120 .. 32645; +$long_var_len = join '|', 8120 .. 28645; +%ans = ( 'ax13876y25677lbc' => 1, + 'ax13876y25677mcb' => 0, # not b. + 'ax13876y35677nbc' => 0, # Num too big + 'ax13876y25677y21378obc' => 1, + 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] + 'ax13876y25677y21378y21378kbc' => 1, + 'ax13876y25677y21378y21378kcb' => 0, # Not b. + 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs + ); + +for ( keys %ans ) { + print "# const-len `$_' not => $ans{$_}\nnot " + if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o; + print "ok $test\n"; + $test++; + print "# var-len `$_' not => $ans{$_}\nnot " + if $ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o; + print "ok $test\n"; + $test++; +} + +$_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; +$expect = "(bla()) ((l)u((e))) (l(e)e)"; + +sub matchit { + m' + ( + \( + (?{ $c = 1 }) # Initialize + (?: + (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop + (?! + ) # Fail: will unwind one iteration back + ) + (?: + [^()]+ # Match a big chunk + (?= + [()] + ) # Do not try to match subchunks + | + \( + (?{ ++$c }) + | + \) + (?{ --$c }) + ) + )+ # This may not match with different subblocks + ) + (?(?{ $c != 0 }) + (?! + ) # Fail + ) # Otherwise the chunk 1 may succeed with $c>0 + 'xg; +} + +push @ans, $res while $res = matchit; + +print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1"; +print "ok $test\n"; +$test++; + +@ans = matchit; + +print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect; +print "ok $test\n"; +$test++; + +@ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad +print "not " if "@ans" ne 'a/ b'; +print "ok $test\n"; +$test++; + +$code = '$blah = 45'; +$blah = 12; +/(?{$code})/; +print "not " if $blah != 45; +print "ok $test\n"; +$test++; + diff --git a/t/op/re_tests b/t/op/re_tests index ce4c5a51a2..29a6518cd9 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -8,6 +8,8 @@ ab*c abc y $& abc ab*bc abc y $& abc ab*bc abbc y $& abbc ab*bc abbbbc y $& abbbbc +.{1} abbbbc y $& a +.{3,4} abbbbc y $& abbb ab{0,}bc abbbbc y $& abbbbc ab+bc abbc y $& abbc ab+bc abc n - - @@ -29,6 +31,7 @@ ab{0,1}c abc y $& abc ^abc abcc y $& abc ^abc$ aabc n - - abc$ aabc y $& abc +abc$ aabcd n - - ^ abc y $& $ abc y $& a.c abc y $& abc @@ -299,10 +302,132 @@ a(?=c|d). abad y $& ad a(?:b|c|d)(.) ace y $1 e a(?:b|c|d)*(.) ace y $1 e a(?:b|c|d)+?(.) ace y $1 e +a(?:b|c|d)+?(.) acdbcdbe y $1 d +a(?:b|c|d)+(.) acdbcdbe y $1 e +a(?:b|c|d){2}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d +((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar +:(?: - c - /(?/: Sequence (? incomplete +a(?:b|c|d){6,7}(.) acdbcdbe y $1 e +a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}?(.) acdbcdbe y $1 b +a(?:b|c|d){5,7}(.) acdbcdbe y $1 e +a(?:b|c|d){5,7}?(.) acdbcdbe y $1 b a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce ^(.+)?B AB y $1 A -'([a-z]+)\s\1'i Aa aa y $&-$1 Aa aa-Aa -'([a-z]+)\s\1'i Ab ab y $&-$1 Ab ab-Ab +^([^a-z])|(\^)$ . y $1 . +^[<>]& <&OUT y $& <& +^(a\1?){4}$ aaaaaaaaaa y $1 aaaa +^(a\1?){4}$ aaaaaaaaa n - - +^(a\1?){4}$ aaaaaaaaaaa n - - +^(a\1){4}$ aaaaaaaaaa y $1 aaaa +^(a\1){4}$ aaaaaaaaa n - - +^(a\1){4}$ aaaaaaaaaaa n - - +(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r +(?<=a)b ab y $& b +(?<=a)b cb n - - +(?<=a)b b n - - +(?<!c)b ab y $& b +(?<!c)b cb n - - +(?<!c)b b y - - +(?<!c)b b y $& b +(?<%)b - c - /(?<%)b/: Sequence (?%...) not recognized +(?:..)*a aba y $& aba +(?:..)*?a aba y $& a +^(?:b|a(?=(.)))*\1 abc y $& ab +^(){3,5} abc y a$1 a +^(a+)*ax aax y $1 a +^((a|b)+)*ax aax y $1 a +^((a|bc)+)*ax aax y $1 a +(a|x)*ab cab y y$1 y +(a)*ab cab y y$1 y +(?:(?i)a)b ab y $& ab +((?i)a)b ab y $&:$1 ab:a +(?:(?i)a)b Ab y $& Ab +((?i)a)b Ab y $&:$1 Ab:A +(?:(?i)a)b aB n - - +((?i)a)b aB n - - +(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb +(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb +'(ab)\d\1'i Ab4ab y $1 Ab +'(ab)\d\1'i ab4Ab y $1 ab foo\w*\d{4}baz foobar1234baz y $& foobar1234baz -((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar -:(?: - c - Sequence (? incomplete +a{1,32766} aaa y $& aaa +a{1,32767} - c - /a{1,32767}/: Quantifier in {,} bigger than +a{1,32768} - c - /a{1,32768}/: Quantifier in {,} bigger than +a(?{})b cabd y $& ab +a(?{)b - c - /a(?{)b/: Sequence (?{...}) not terminated or not {}-balanced +a(?{{})b - c - /a(?{{})b/: Sequence (?{...}) not terminated or not {}-balanced +a(?{}})b - c - /a(?{}})b/: Sequence (?{...}) not terminated or not {}-balanced +a(?{"{"})b - c - /a(?{"{"})b/: Sequence (?{...}) not terminated or not {}-balanced +a(?{"\{"})b cabd y $& ab +a(?{"{"}})b - c - Unmatched right bracket +a(?{$bl="\{"}).b caxbd y $bl { +x(~~)*(?:(?:F)?)? x~~ y - - +^a(?#xxx){3}c aaac y $& aaac +'^a (?#xxx) (?#yyy) {3}c'x aaac y $& aaac +(?<![cd])b dbcb n - - +(?<![cd])[ab] dbaacb y $& a +(?<!(c|d))b dbcb n - - +(?<!(c|d))[ab] dbaacb y $& a +(?<!cd)[ab] cdaccb y $& b +^(?:a?b?)*$ a-- n - - +((?s)^a(.))((?m)^b$) a\nb\nc\n y $1;$2;$3 a\n;\n;b +((?m)^b$) a\nb\nc\n y $1 b +(?m)^b a\nb\n y $& b +(?m)^(b) a\nb\n y $1 b +((?m)^b) a\nb\n y $1 b +\n((?m)^b) a\nb\n y $1 b +((?s).)c(?!.) a\nb\nc\n y $1 \n +((?s).)c(?!.) a\nb\nc\n y $1:$& \n:\nc +((?s)b.)c(?!.) a\nb\nc\n y $1 b\n +((?s)b.)c(?!.) a\nb\nc\n y $1:$& b\n:b\nc +^b a\nb\nc\n n - - +()^b a\nb\nc\n n - - +((?m)^b) a\nb\nc\n y $1 b +(?(1)a|b) a n - - +(?(1)b|a) a y $& a +(x)?(?(1)a|b) a n - - +(x)?(?(1)b|a) a y $& a +()?(?(1)b|a) a y $& a +()(?(1)b|a) a n - - +()?(?(1)a|b) a y $& a +^(\()?blah(?(1)(\)))$ (blah) y $2 ) +^(\()?blah(?(1)(\)))$ blah y ($2) () +^(\()?blah(?(1)(\)))$ blah) n - - +^(\()?blah(?(1)(\)))$ (blah n - - +^(\(+)?blah(?(1)(\)))$ (blah) y $2 ) +^(\(+)?blah(?(1)(\)))$ blah y ($2) () +^(\(+)?blah(?(1)(\)))$ blah) n - - +^(\(+)?blah(?(1)(\)))$ (blah n - - +(?(1?)a|b) a c - /(?(1?)a|b)/: Switch (?(number? not recognized +(?(1)a|b|c) a c - /(?(1)a|b|c)/: Switch (?(condition)... contains too many branches +(?(?{0})a|b) a n - - +(?(?{0})b|a) a y $& a +(?(?{1})b|a) a n - - +(?(?{1})a|b) a y $& a +(?(?!a)a|b) a n - - +(?(?!a)b|a) a y $& a +(?(?=a)b|a) a n - - +(?(?=a)a|b) a y $& a +(?=(a+?))(\1ab) aaab y $2 aab +^(?=(a+?))\1ab aaab n - - +(\w+:)+ one: y $1 one: +$(?<=^(a)) a y $1 a +(?=(a+?))(\1ab) aaab y $2 aab +^(?=(a+?))\1ab aaab n - - +([\w:]+::)?(\w+)$ abcd: n - - +([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd +([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd +^[^bcd]*(c+) aexycd y $1 c +(a*)b+ caab y $1 aa +([\w:]+::)?(\w+)$ abcd: n - - +([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd +([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd +^[^bcd]*(c+) aexycd y $1 c +(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd y $b 3 +(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd y $b 4 +(>a+)ab aaab n - - +((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x diff --git a/t/op/regexp.t b/t/op/regexp.t index 803f1d0dab..273608433e 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -19,7 +19,11 @@ # Column 4 contains a string, usually C<$&>. # # Column 5 contains the expected result of double-quote -# interpolating that string after the match. +# interpolating that string after the match, or start of error message. +# +# Columns 1, 2 and 5 are \n-interpolated. + +$iters = shift || 1; # Poor man performance suite, 10000 is OK. open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || die "Can't open re_tests"; @@ -30,24 +34,33 @@ seek(TESTS,0,0); $. = 0; $| = 1; -print "1..$numtests\n"; +print "1..$numtests\n# $iters iterations\n"; TEST: while (<TESTS>) { ($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_); $input = join(':',$pat,$subject,$result,$repl,$expect); $pat = "'$pat'" unless $pat =~ /^[:']/; + $pat =~ s/\\n/\n/g; + $subject =~ s/\\n/\n/g; + $expect =~ s/\\n/\n/g; + $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/; for $study ("", "study \$subject") { - eval "$study; \$match = (\$subject =~ m$pat); \$got = \"$repl\";"; + $c = $iters; + eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";"; + chomp( $err = $@ ); if ($result eq 'c') { - if ($@ !~ m!^\Q$expect!) { print "not ok $.\n"; next TEST } + if ($err !~ m!^\Q$expect!) { print "not ok $. (compile) $input => `$err'\n"; next TEST } last; # no need to study a syntax error } + elsif ($@) { + print "not ok $. $input => error `$err'\n"; next TEST; + } elsif ($result eq 'n') { - if ($match) { print "not ok $. $input => $got\n"; next TEST } + if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST } } else { if (!$match || $got ne $expect) { - print "not ok $. $input => $got\n"; + print "not ok $. ($study) $input => `$got', match=$match\n"; next TEST; } } diff --git a/t/op/split.t b/t/op/split.t index 07246522ee..7f0accea5e 100755 --- a/t/op/split.t +++ b/t/op/split.t @@ -2,7 +2,7 @@ # $RCSfile: split.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:26 $ -print "1..20\n"; +print "1..25\n"; $FS = ':'; @@ -90,3 +90,24 @@ print $_ eq "Z" ? "ok 19\n" : "#$_\nnot ok 19\n"; $_ = join('|', split(/.?/, '',-1), 'Z'); print $_ eq "Z" ? "ok 20\n" : "#$_\nnot ok 20\n"; + +# Are /^/m patterns scanned? +$_ = join '|', split(/^a/m, "a b a\na d a", 20); +print $_ eq "| b a\n| d a" ? "ok 21\n" : "not ok 21\n# `$_'\n"; + +# Are /$/m patterns scanned? +$_ = join '|', split(/a$/m, "a b a\na d a", 20); +print $_ eq "a b |\na d |" ? "ok 22\n" : "not ok 22\n# `$_'\n"; + +# Are /^/m patterns scanned? +$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20); +print $_ eq "| b aa\n| d aa" ? "ok 23\n" : "not ok 23\n# `$_'\n"; + +# Are /$/m patterns scanned? +$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20); +print $_ eq "aa b |\naa d |" ? "ok 24\n" : "not ok 24\n# `$_'\n"; + +# Greedyness: +$_ = "a : b :c: d"; +@ary = split(/\s*:\s*/); +if (($res = join(".",@ary)) eq "a.b.c.d") {print "ok 25\n";} else {print "not ok 25\n# res=`$res' != `a.b.c.d'\n";} diff --git a/t/op/subst.t b/t/op/subst.t index efea970dfc..c6cfb8c96d 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -2,7 +2,7 @@ # $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $ -print "1..62\n"; +print "1..67\n"; $x = 'foo'; $_ = "x"; @@ -157,11 +157,11 @@ $x ne $x || s/bb/x/; print $_ eq 'aaaXXXXxb' ? "ok 39\n" : "not ok 39\n"; $_ = 'abc123xyz'; -s/\d+/$&*2/e; # yields 'abc246xyz' +s/(\d+)/$1*2/e; # yields 'abc246xyz' print $_ eq 'abc246xyz' ? "ok 40\n" : "not ok 40\n"; -s/\d+/sprintf("%5d",$&)/e; # yields 'abc 246xyz' +s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc 246xyz' print $_ eq 'abc 246xyz' ? "ok 41\n" : "not ok 41\n"; -s/\w/$& x 2/eg; # yields 'aabbcc 224466xxyyzz' +s/(\w)/$1 x 2/eg; # yields 'aabbcc 224466xxyyzz' print $_ eq 'aabbcc 224466xxyyzz' ? "ok 42\n" : "not ok 42\n"; $_ = "aaaaa"; @@ -232,10 +232,32 @@ print exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar' # a match nested in the RHS of a substitution: $_ = "abcd"; -s/../$x = $&, m#.#/eg; +s/(..)/$x = $1, m#.#/eg; print $x eq "cd" ? "ok 61\n" : "not ok 61\n"; +# Subst and lookbehind + +$_="ccccc"; +s/(?<!x)c/x/g; +print $_ eq "xxxxx" ? "ok 62\n" : "not ok 62 # `$_' ne `xxxxx'\n"; + +$_="ccccc"; +s/(?<!x)(c)/x/g; +print $_ eq "xxxxx" ? "ok 63\n" : "not ok 63 # `$_' ne `xxxxx'\n"; + +$_="foobbarfoobbar"; +s/(?<!r)foobbar/foobar/g; +print $_ eq "foobarfoobbar" ? "ok 64\n" : "not ok 64 # `$_' ne `foobarfoobbar'\n"; + +$_="foobbarfoobbar"; +s/(?<!ar)(foobbar)/foobar/g; +print $_ eq "foobarfoobbar" ? "ok 65\n" : "not ok 65 # `$_' ne `foobarfoobbar'\n"; + +$_="foobbarfoobbar"; +s/(?<!ar)foobbar/foobar/g; +print $_ eq "foobarfoobbar" ? "ok 66\n" : "not ok 66 # `$_' ne `foobarfoobbar'\n"; + # check parsing of split subst with comment eval 's{foo} # this is a comment, not a delimiter {bar};'; -print @? ? "not ok 62\n" : "ok 62\n"; +print @? ? "not ok 67\n" : "ok 67\n"; |