summaryrefslogtreecommitdiff
path: root/ext/re
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2006-06-09 02:56:37 +0200
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2006-06-09 14:06:24 +0000
commit07be1b83a6b2d24b492356181ddf70e1c7917ae3 (patch)
treec0050e3a1ae2933d9871008a8bd127ba0b97b33f /ext/re
parentb23a565decf7acb33d46fc5bb7bed5ad79774efe (diff)
downloadperl-07be1b83a6b2d24b492356181ddf70e1c7917ae3.tar.gz
Re: [PATCH] Better version of the Aho-Corasick patch and lots of benchmarks.
Message-ID: <9b18b3110606081556t779de698r82f361d82a05fbc8@mail.gmail.com> (with tweaks) p4raw-id: //depot/perl@28373
Diffstat (limited to 'ext/re')
-rw-r--r--ext/re/t/re.t9
-rw-r--r--ext/re/t/regop.pl8
-rw-r--r--ext/re/t/regop.t113
3 files changed, 111 insertions, 19 deletions
diff --git a/ext/re/t/re.t b/ext/re/t/re.t
index 2a1923ea79..5f09966d81 100644
--- a/ext/re/t/re.t
+++ b/ext/re/t/re.t
@@ -12,7 +12,7 @@ BEGIN {
use strict;
-use Test::More tests => 13;
+use Test::More tests => 14;
require_ok( 're' );
# setcolor
@@ -58,6 +58,13 @@ re->unimport('taint');
ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' );
re->unimport('eval');
ok( !( $^H & 0x00200000 ), '... and again' );
+my $reg=qr/(foo|bar|baz|blah)/;
+close STDERR;
+eval"use re Debug=>'ALL'";
+my $ok='foo'=~/$reg/;
+eval"no re Debug=>'ALL'";
+ok( $ok, 'No segv!' );
+
package Term::Cap;
diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl
index 05a34ad154..a548fe4410 100644
--- a/ext/re/t/regop.pl
+++ b/ext/re/t/regop.pl
@@ -1,16 +1,20 @@
-use re Debug=>qw(COMPILE EXECUTE);
+use re Debug=>qw(COMPILE EXECUTE OFFSETS);
my @tests=(
XY => 'X(A|[B]Q||C|D)Y' ,
foobar => '[f][o][o][b][a][r]',
x => '.[XY].',
'ABCD' => '(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)',
+ 'D:\\dev/perl/ver/28321_/perl.exe'=>
+ '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i',
+ 'q'=>'[q]',
);
while (@tests) {
my ($str,$pat)=splice @tests,0,2;
warn "\n";
+ $pat="/$pat/" if substr($pat,0,1) ne '/';
# string eval to get the free regex message in the right place.
eval qq[
- warn "$str"=~/$pat/ ? "%MATCHED%" : "%FAILED%","\n";
+ warn "$str"=~$pat ? "%MATCHED%" : "%FAILED%","\n";
];
die $@ if $@;
}
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
index 8bfacda26a..be82dc925d 100644
--- a/ext/re/t/regop.t
+++ b/ext/re/t/regop.t
@@ -12,29 +12,45 @@ BEGIN {
use strict;
require "./test.pl";
+our $NUM_SECTS;
+chomp(my @strs= grep { !/^\s*\#/ } <DATA>);
+my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 );
+my @tests = grep { /\S/ } split /(?=Compiling REx)/, $out;
+# on debug builds we get an EXECUTING... message in there at the top
+shift @tests
+ if $tests[0] =~ /EXECUTING.../;
-chomp(my @strs=grep { !/^\s*\#/ } <DATA>);
-my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1);
-my @tests = grep { /\S/ && !/EXECUTING/ } split /(?=Compiling REx)/,$out;
+plan( @tests + 2 + ( @strs - grep { !$_ or /^---/ } @strs ));
-plan(2 + (@strs - grep { !$_ or /^---/ } @strs) + @tests);
+is( scalar @tests, $NUM_SECTS,
+ "Expecting output for $NUM_SECTS patterns" );
+ok( defined $out, 'regop.pl returned something defined' );
-my $numtests=4;
-is(scalar @tests, $numtests, "Expecting output for $numtests patterns");
-ok(defined $out,'regop.pl');
-$out||="";
-my $test=1;
-foreach my $testout (@tests) {
- my ($pattern)=$testout=~/Compiling REx "([^"]+)"/;
- ok($pattern, "Pattern found for test ".($test++));
+$out ||= "";
+my $test= 1;
+foreach my $testout ( @tests ) {
+ my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/;
+ ok( $pattern, "Pattern for test " . ($test++) );
while (@strs) {
- my $str=shift @strs;
- last if !$str or $str=~/^---/;
- next if $str=~/^\s*#/;
- ok($testout=~/\Q$str\E/,"$str: /$pattern/");
+ local $_= shift @strs;
+ last if !$_
+ or /^---/;
+ next if /^\s*#/;
+ s/^\s+//;
+ s/\s+$//;
+ ok( $testout=~/\Q$_\E/, "$_: /$pattern/" );
}
}
+# The format below is simple. Each line is an exact
+# string that must be found in the output.
+# Lines starting the # are comments.
+# Lines starting with --- are seperators indicating
+# that the tests for this result set are finished.
+# If you add a test make sure you update $NUM_SECTS
+# the commented output is just for legacy/debugging purposes
+BEGIN{ $NUM_SECTS= 6 }
+
__END__
#Compiling REx "X(A|[B]Q||C|D)Y"
#size 34
@@ -146,3 +162,68 @@ Start-Class:A-EGP
only one match : #6 <ABCD>
Start:4
minlen 4
+---
+#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$"
+#size 48 nodes first at 3
+#first at 3
+#rarest char
+# at 0
+# 1: OPEN1(3)
+# 3: EXACTF <.>(5)
+# 5: TRIE-EXACTF(45)
+# [Start:2 Words:14 Chars:54 Unique:18 States:29 Minlen:2 Maxlen:3 Start-Class:BCEJPVWbcejpvw]
+# <.COM>
+# ... yada yada ... (dmq)
+# <.py>
+# 45: CLOSE1(47)
+# 47: EOL(48)
+# 48: END(0)
+#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3
+#Offsets: [48]
+# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
+#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...
+#Found floating substr ""$ at offset 30...
+#Starting position does not contradict /^/m...
+#Does not contradict STCLASS...
+#Guessed: match at offset 26
+#Matching REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$..." against ".exe"
+#Matching stclass "EXACTF <.>" against ".exe"
+# Setting an EVAL scope, savestack=140
+# 26 <21_/perl> <.exe> | 1: OPEN1
+# 26 <21_/perl> <.exe> | 3: EXACTF <.>
+# 27 <21_/perl.> <exe> | 5: TRIE-EXACTF
+# only one match : #2 <.EXE>
+# 30 <21_/perl.exe> <> | 45: CLOSE1
+# 30 <21_/perl.exe> <> | 47: EOL
+# 30 <21_/perl.exe> <> | 48: END
+#Match successful!
+#POP STATE(1)
+#%MATCHED%
+#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
+%MATCHED%
+floating ""$ at 3..4 (checking floating)
+1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
+stclass "EXACTF <.>" minlen 3
+Found floating substr ""$ at offset 30...
+Does not contradict STCLASS...
+Guessed: match at offset 26
+Matching stclass "EXACTF <.>" against ".exe"
+---
+#Compiling REx "[q]"
+#size 12 nodes Got 100 bytes for offset annotations.
+#first at 1
+#Final program:
+# 1: EXACT <q>(3)
+# 3: END(0)
+#anchored "q" at 0 (checking anchored isall) minlen 1
+#Offsets: [12]
+# 1:1[3] 3:4[0]
+#Guessing start of match, REx "[q]" against "q"...
+#Found anchored substr "q" at offset 0...
+#Guessed: match at offset 0
+#%MATCHED%
+#Freeing REx: "[q]"
+Got 100 bytes for offset annotations.
+Offsets: [12]
+1:1[3] 3:4[0]
+%MATCHED%