summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>1997-11-15 19:29:39 -0500
committerMalcolm Beattie <mbeattie@sable.ox.ac.uk>1997-11-19 11:04:15 +0000
commitc277df42229d99fecbc76f5da53793a409ac66e1 (patch)
treede3cf73b51d3455f54655dc5b9fdaa68e3da9a7a /t
parent5d5aaa5e70a8a8ab4803cdb506e2096b6e190e80 (diff)
downloadperl-c277df42229d99fecbc76f5da53793a409ac66e1.tar.gz
Jumbo regexp patch applied (with minor fix-up tweaks):
Subject: Version 7 of Jumbo RE patch available p4raw-id: //depot/perl@267
Diffstat (limited to 't')
-rwxr-xr-xt/op/misc.t10
-rwxr-xr-xt/op/pat.t113
-rw-r--r--t/op/re_tests133
-rwxr-xr-xt/op/regexp.t25
-rwxr-xr-xt/op/split.t23
-rwxr-xr-xt/op/subst.t34
6 files changed, 320 insertions, 18 deletions
diff --git a/t/op/misc.t b/t/op/misc.t
index 5a61acd55d..c529830123 100755
--- a/t/op/misc.t
+++ b/t/op/misc.t
@@ -335,3 +335,13 @@ print "eat flaming death\n" unless ($s == 7);
sub foo { local $_ = shift; split; @_ }
@x = foo(' x y z ');
print "you die joe!\n" unless "@x" eq 'x y z';
+########
+/(?{"{"})/ # Check it outside of eval too
+EXPECT
+/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1.
+########
+/(?{"{"}})/ # Check it outside of eval too
+EXPECT
+Unmatched right bracket at (re_eval 1) line 1, at end of line
+syntax error at (re_eval 1) line 1, near ""{"}"
+Compilation failed in regexp at - line 1.
diff --git a/t/op/pat.t b/t/op/pat.t
index 0478911db3..03af1227ca 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -2,7 +2,7 @@
# $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $
-print "1..62\n";
+print "1..97\n";
$x = "abc\ndef\n";
@@ -217,3 +217,114 @@ print "ok 61\n";
/\Gc/g;
print "not " if defined pos $_;
print "ok 62\n";
+
+$out = 1;
+'abc' =~ m'a(?{ $out = 2 })b';
+print "not " if $out != 2;
+print "ok 63\n";
+
+$out = 1;
+'abc' =~ m'a(?{ $out = 3 })c';
+print "not " if $out != 1;
+print "ok 64\n";
+
+$_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6';
+@out = /(?<!foo)bar./g;
+print "not " if "@out" ne 'bar2 barf';
+print "ok 65\n";
+
+# Long Monsters
+$test = 66;
+for $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
+ $a = 'a' x $l;
+ print "# length=$l\nnot " unless "ba$a=" =~ /a$a=/;
+ print "ok $test\n";
+ $test++;
+
+ print "not " if "b$a=" =~ /a$a=/;
+ print "ok $test\n";
+ $test++;
+}
+
+# 20000 nodes, each taking 3 words per string, and 1 per branch
+$long_constant_len = join '|', 12120 .. 32645;
+$long_var_len = join '|', 8120 .. 28645;
+%ans = ( 'ax13876y25677lbc' => 1,
+ 'ax13876y25677mcb' => 0, # not b.
+ 'ax13876y35677nbc' => 0, # Num too big
+ 'ax13876y25677y21378obc' => 1,
+ 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o]
+ 'ax13876y25677y21378y21378kbc' => 1,
+ 'ax13876y25677y21378y21378kcb' => 0, # Not b.
+ 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs
+ );
+
+for ( keys %ans ) {
+ print "# const-len `$_' not => $ans{$_}\nnot "
+ if $ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o;
+ print "ok $test\n";
+ $test++;
+ print "# var-len `$_' not => $ans{$_}\nnot "
+ if $ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o;
+ print "ok $test\n";
+ $test++;
+}
+
+$_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e";
+$expect = "(bla()) ((l)u((e))) (l(e)e)";
+
+sub matchit {
+ m'
+ (
+ \(
+ (?{ $c = 1 }) # Initialize
+ (?:
+ (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop
+ (?!
+ ) # Fail: will unwind one iteration back
+ )
+ (?:
+ [^()]+ # Match a big chunk
+ (?=
+ [()]
+ ) # Do not try to match subchunks
+ |
+ \(
+ (?{ ++$c })
+ |
+ \)
+ (?{ --$c })
+ )
+ )+ # This may not match with different subblocks
+ )
+ (?(?{ $c != 0 })
+ (?!
+ ) # Fail
+ ) # Otherwise the chunk 1 may succeed with $c>0
+ 'xg;
+}
+
+push @ans, $res while $res = matchit;
+
+print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne "1 1 1";
+print "ok $test\n";
+$test++;
+
+@ans = matchit;
+
+print "# ans='@ans'\n# expect='$expect'\nnot " if "@ans" ne $expect;
+print "ok $test\n";
+$test++;
+
+@ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad
+print "not " if "@ans" ne 'a/ b';
+print "ok $test\n";
+$test++;
+
+$code = '$blah = 45';
+$blah = 12;
+/(?{$code})/;
+print "not " if $blah != 45;
+print "ok $test\n";
+$test++;
+
diff --git a/t/op/re_tests b/t/op/re_tests
index ce4c5a51a2..29a6518cd9 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -8,6 +8,8 @@ ab*c abc y $& abc
ab*bc abc y $& abc
ab*bc abbc y $& abbc
ab*bc abbbbc y $& abbbbc
+.{1} abbbbc y $& a
+.{3,4} abbbbc y $& abbb
ab{0,}bc abbbbc y $& abbbbc
ab+bc abbc y $& abbc
ab+bc abc n - -
@@ -29,6 +31,7 @@ ab{0,1}c abc y $& abc
^abc abcc y $& abc
^abc$ aabc n - -
abc$ aabc y $& abc
+abc$ aabcd n - -
^ abc y $&
$ abc y $&
a.c abc y $& abc
@@ -299,10 +302,132 @@ a(?=c|d). abad y $& ad
a(?:b|c|d)(.) ace y $1 e
a(?:b|c|d)*(.) ace y $1 e
a(?:b|c|d)+?(.) ace y $1 e
+a(?:b|c|d)+?(.) acdbcdbe y $1 d
+a(?:b|c|d)+(.) acdbcdbe y $1 e
+a(?:b|c|d){2}(.) acdbcdbe y $1 b
+a(?:b|c|d){4,5}(.) acdbcdbe y $1 b
+a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d
+((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar
+:(?: - c - /(?/: Sequence (? incomplete
+a(?:b|c|d){6,7}(.) acdbcdbe y $1 e
+a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e
+a(?:b|c|d){5,6}(.) acdbcdbe y $1 e
+a(?:b|c|d){5,6}?(.) acdbcdbe y $1 b
+a(?:b|c|d){5,7}(.) acdbcdbe y $1 e
+a(?:b|c|d){5,7}?(.) acdbcdbe y $1 b
a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce
^(.+)?B AB y $1 A
-'([a-z]+)\s\1'i Aa aa y $&-$1 Aa aa-Aa
-'([a-z]+)\s\1'i Ab ab y $&-$1 Ab ab-Ab
+^([^a-z])|(\^)$ . y $1 .
+^[<>]& <&OUT y $& <&
+^(a\1?){4}$ aaaaaaaaaa y $1 aaaa
+^(a\1?){4}$ aaaaaaaaa n - -
+^(a\1?){4}$ aaaaaaaaaaa n - -
+^(a\1){4}$ aaaaaaaaaa y $1 aaaa
+^(a\1){4}$ aaaaaaaaa n - -
+^(a\1){4}$ aaaaaaaaaaa n - -
+(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r
+(?<=a)b ab y $& b
+(?<=a)b cb n - -
+(?<=a)b b n - -
+(?<!c)b ab y $& b
+(?<!c)b cb n - -
+(?<!c)b b y - -
+(?<!c)b b y $& b
+(?<%)b - c - /(?<%)b/: Sequence (?%...) not recognized
+(?:..)*a aba y $& aba
+(?:..)*?a aba y $& a
+^(?:b|a(?=(.)))*\1 abc y $& ab
+^(){3,5} abc y a$1 a
+^(a+)*ax aax y $1 a
+^((a|b)+)*ax aax y $1 a
+^((a|bc)+)*ax aax y $1 a
+(a|x)*ab cab y y$1 y
+(a)*ab cab y y$1 y
+(?:(?i)a)b ab y $& ab
+((?i)a)b ab y $&:$1 ab:a
+(?:(?i)a)b Ab y $& Ab
+((?i)a)b Ab y $&:$1 Ab:A
+(?:(?i)a)b aB n - -
+((?i)a)b aB n - -
+(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb
+(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
+'(ab)\d\1'i Ab4ab y $1 Ab
+'(ab)\d\1'i ab4Ab y $1 ab
foo\w*\d{4}baz foobar1234baz y $& foobar1234baz
-((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar
-:(?: - c - Sequence (? incomplete
+a{1,32766} aaa y $& aaa
+a{1,32767} - c - /a{1,32767}/: Quantifier in {,} bigger than
+a{1,32768} - c - /a{1,32768}/: Quantifier in {,} bigger than
+a(?{})b cabd y $& ab
+a(?{)b - c - /a(?{)b/: Sequence (?{...}) not terminated or not {}-balanced
+a(?{{})b - c - /a(?{{})b/: Sequence (?{...}) not terminated or not {}-balanced
+a(?{}})b - c - /a(?{}})b/: Sequence (?{...}) not terminated or not {}-balanced
+a(?{"{"})b - c - /a(?{"{"})b/: Sequence (?{...}) not terminated or not {}-balanced
+a(?{"\{"})b cabd y $& ab
+a(?{"{"}})b - c - Unmatched right bracket
+a(?{$bl="\{"}).b caxbd y $bl {
+x(~~)*(?:(?:F)?)? x~~ y - -
+^a(?#xxx){3}c aaac y $& aaac
+'^a (?#xxx) (?#yyy) {3}c'x aaac y $& aaac
+(?<![cd])b dbcb n - -
+(?<![cd])[ab] dbaacb y $& a
+(?<!(c|d))b dbcb n - -
+(?<!(c|d))[ab] dbaacb y $& a
+(?<!cd)[ab] cdaccb y $& b
+^(?:a?b?)*$ a-- n - -
+((?s)^a(.))((?m)^b$) a\nb\nc\n y $1;$2;$3 a\n;\n;b
+((?m)^b$) a\nb\nc\n y $1 b
+(?m)^b a\nb\n y $& b
+(?m)^(b) a\nb\n y $1 b
+((?m)^b) a\nb\n y $1 b
+\n((?m)^b) a\nb\n y $1 b
+((?s).)c(?!.) a\nb\nc\n y $1 \n
+((?s).)c(?!.) a\nb\nc\n y $1:$& \n:\nc
+((?s)b.)c(?!.) a\nb\nc\n y $1 b\n
+((?s)b.)c(?!.) a\nb\nc\n y $1:$& b\n:b\nc
+^b a\nb\nc\n n - -
+()^b a\nb\nc\n n - -
+((?m)^b) a\nb\nc\n y $1 b
+(?(1)a|b) a n - -
+(?(1)b|a) a y $& a
+(x)?(?(1)a|b) a n - -
+(x)?(?(1)b|a) a y $& a
+()?(?(1)b|a) a y $& a
+()(?(1)b|a) a n - -
+()?(?(1)a|b) a y $& a
+^(\()?blah(?(1)(\)))$ (blah) y $2 )
+^(\()?blah(?(1)(\)))$ blah y ($2) ()
+^(\()?blah(?(1)(\)))$ blah) n - -
+^(\()?blah(?(1)(\)))$ (blah n - -
+^(\(+)?blah(?(1)(\)))$ (blah) y $2 )
+^(\(+)?blah(?(1)(\)))$ blah y ($2) ()
+^(\(+)?blah(?(1)(\)))$ blah) n - -
+^(\(+)?blah(?(1)(\)))$ (blah n - -
+(?(1?)a|b) a c - /(?(1?)a|b)/: Switch (?(number? not recognized
+(?(1)a|b|c) a c - /(?(1)a|b|c)/: Switch (?(condition)... contains too many branches
+(?(?{0})a|b) a n - -
+(?(?{0})b|a) a y $& a
+(?(?{1})b|a) a n - -
+(?(?{1})a|b) a y $& a
+(?(?!a)a|b) a n - -
+(?(?!a)b|a) a y $& a
+(?(?=a)b|a) a n - -
+(?(?=a)a|b) a y $& a
+(?=(a+?))(\1ab) aaab y $2 aab
+^(?=(a+?))\1ab aaab n - -
+(\w+:)+ one: y $1 one:
+$(?<=^(a)) a y $1 a
+(?=(a+?))(\1ab) aaab y $2 aab
+^(?=(a+?))\1ab aaab n - -
+([\w:]+::)?(\w+)$ abcd: n - -
+([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd
+([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd
+^[^bcd]*(c+) aexycd y $1 c
+(a*)b+ caab y $1 aa
+([\w:]+::)?(\w+)$ abcd: n - -
+([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd
+([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd
+^[^bcd]*(c+) aexycd y $1 c
+(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd y $b 3
+(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd y $b 4
+(>a+)ab aaab n - -
+((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 803f1d0dab..273608433e 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -19,7 +19,11 @@
# Column 4 contains a string, usually C<$&>.
#
# Column 5 contains the expected result of double-quote
-# interpolating that string after the match.
+# interpolating that string after the match, or start of error message.
+#
+# Columns 1, 2 and 5 are \n-interpolated.
+
+$iters = shift || 1; # Poor man performance suite, 10000 is OK.
open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests')
|| die "Can't open re_tests";
@@ -30,24 +34,33 @@ seek(TESTS,0,0);
$. = 0;
$| = 1;
-print "1..$numtests\n";
+print "1..$numtests\n# $iters iterations\n";
TEST:
while (<TESTS>) {
($pat, $subject, $result, $repl, $expect) = split(/[\t\n]/,$_);
$input = join(':',$pat,$subject,$result,$repl,$expect);
$pat = "'$pat'" unless $pat =~ /^[:']/;
+ $pat =~ s/\\n/\n/g;
+ $subject =~ s/\\n/\n/g;
+ $expect =~ s/\\n/\n/g;
+ $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
for $study ("", "study \$subject") {
- eval "$study; \$match = (\$subject =~ m$pat); \$got = \"$repl\";";
+ $c = $iters;
+ eval "$study; \$match = (\$subject =~ m$pat) while \$c--; \$got = \"$repl\";";
+ chomp( $err = $@ );
if ($result eq 'c') {
- if ($@ !~ m!^\Q$expect!) { print "not ok $.\n"; next TEST }
+ if ($err !~ m!^\Q$expect!) { print "not ok $. (compile) $input => `$err'\n"; next TEST }
last; # no need to study a syntax error
}
+ elsif ($@) {
+ print "not ok $. $input => error `$err'\n"; next TEST;
+ }
elsif ($result eq 'n') {
- if ($match) { print "not ok $. $input => $got\n"; next TEST }
+ if ($match) { print "not ok $. ($study) $input => false positive\n"; next TEST }
}
else {
if (!$match || $got ne $expect) {
- print "not ok $. $input => $got\n";
+ print "not ok $. ($study) $input => `$got', match=$match\n";
next TEST;
}
}
diff --git a/t/op/split.t b/t/op/split.t
index 07246522ee..7f0accea5e 100755
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -2,7 +2,7 @@
# $RCSfile: split.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:26 $
-print "1..20\n";
+print "1..25\n";
$FS = ':';
@@ -90,3 +90,24 @@ print $_ eq "Z" ? "ok 19\n" : "#$_\nnot ok 19\n";
$_ = join('|', split(/.?/, '',-1), 'Z');
print $_ eq "Z" ? "ok 20\n" : "#$_\nnot ok 20\n";
+
+# Are /^/m patterns scanned?
+$_ = join '|', split(/^a/m, "a b a\na d a", 20);
+print $_ eq "| b a\n| d a" ? "ok 21\n" : "not ok 21\n# `$_'\n";
+
+# Are /$/m patterns scanned?
+$_ = join '|', split(/a$/m, "a b a\na d a", 20);
+print $_ eq "a b |\na d |" ? "ok 22\n" : "not ok 22\n# `$_'\n";
+
+# Are /^/m patterns scanned?
+$_ = join '|', split(/^aa/m, "aa b aa\naa d aa", 20);
+print $_ eq "| b aa\n| d aa" ? "ok 23\n" : "not ok 23\n# `$_'\n";
+
+# Are /$/m patterns scanned?
+$_ = join '|', split(/aa$/m, "aa b aa\naa d aa", 20);
+print $_ eq "aa b |\naa d |" ? "ok 24\n" : "not ok 24\n# `$_'\n";
+
+# Greedyness:
+$_ = "a : b :c: d";
+@ary = split(/\s*:\s*/);
+if (($res = join(".",@ary)) eq "a.b.c.d") {print "ok 25\n";} else {print "not ok 25\n# res=`$res' != `a.b.c.d'\n";}
diff --git a/t/op/subst.t b/t/op/subst.t
index efea970dfc..c6cfb8c96d 100755
--- a/t/op/subst.t
+++ b/t/op/subst.t
@@ -2,7 +2,7 @@
# $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $
-print "1..62\n";
+print "1..67\n";
$x = 'foo';
$_ = "x";
@@ -157,11 +157,11 @@ $x ne $x || s/bb/x/;
print $_ eq 'aaaXXXXxb' ? "ok 39\n" : "not ok 39\n";
$_ = 'abc123xyz';
-s/\d+/$&*2/e; # yields 'abc246xyz'
+s/(\d+)/$1*2/e; # yields 'abc246xyz'
print $_ eq 'abc246xyz' ? "ok 40\n" : "not ok 40\n";
-s/\d+/sprintf("%5d",$&)/e; # yields 'abc 246xyz'
+s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc 246xyz'
print $_ eq 'abc 246xyz' ? "ok 41\n" : "not ok 41\n";
-s/\w/$& x 2/eg; # yields 'aabbcc 224466xxyyzz'
+s/(\w)/$1 x 2/eg; # yields 'aabbcc 224466xxyyzz'
print $_ eq 'aabbcc 224466xxyyzz' ? "ok 42\n" : "not ok 42\n";
$_ = "aaaaa";
@@ -232,10 +232,32 @@ print exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar'
# a match nested in the RHS of a substitution:
$_ = "abcd";
-s/../$x = $&, m#.#/eg;
+s/(..)/$x = $1, m#.#/eg;
print $x eq "cd" ? "ok 61\n" : "not ok 61\n";
+# Subst and lookbehind
+
+$_="ccccc";
+s/(?<!x)c/x/g;
+print $_ eq "xxxxx" ? "ok 62\n" : "not ok 62 # `$_' ne `xxxxx'\n";
+
+$_="ccccc";
+s/(?<!x)(c)/x/g;
+print $_ eq "xxxxx" ? "ok 63\n" : "not ok 63 # `$_' ne `xxxxx'\n";
+
+$_="foobbarfoobbar";
+s/(?<!r)foobbar/foobar/g;
+print $_ eq "foobarfoobbar" ? "ok 64\n" : "not ok 64 # `$_' ne `foobarfoobbar'\n";
+
+$_="foobbarfoobbar";
+s/(?<!ar)(foobbar)/foobar/g;
+print $_ eq "foobarfoobbar" ? "ok 65\n" : "not ok 65 # `$_' ne `foobarfoobbar'\n";
+
+$_="foobbarfoobbar";
+s/(?<!ar)foobbar/foobar/g;
+print $_ eq "foobarfoobbar" ? "ok 66\n" : "not ok 66 # `$_' ne `foobarfoobbar'\n";
+
# check parsing of split subst with comment
eval 's{foo} # this is a comment, not a delimiter
{bar};';
-print @? ? "not ok 62\n" : "ok 62\n";
+print @? ? "not ok 67\n" : "ok 67\n";