Re: [PATCH] Better version of the Aho-Corasick patch and lots of benchmarks.

Message-ID: <9b18b3110606081556t779de698r82f361d82a05fbc8@mail.gmail.com> (with tweaks) p4raw-id: //depot/perl@28373
author: Yves Orton <demerphq@gmail.com> 2006-06-09 02:56:37 +0200
committer: Rafael Garcia-Suarez <rgarciasuarez@gmail.com> 2006-06-09 14:06:24 +0000
commit: 07be1b83a6b2d24b492356181ddf70e1c7917ae3 (patch)
tree: c0050e3a1ae2933d9871008a8bd127ba0b97b33f /ext/re
parent: b23a565decf7acb33d46fc5bb7bed5ad79774efe (diff)
download: perl-07be1b83a6b2d24b492356181ddf70e1c7917ae3.tar.gz
3 files changed, 111 insertions, 19 deletions
diff --git a/ext/re/t/re.t b/ext/re/t/re.t
index 2a1923ea79..5f09966d81 100644
--- a/ext/re/t/re.t
+++ b/ext/re/t/re.t
@@ -12,7 +12,7 @@ BEGIN {
 
 use strict;
 
-use Test::More tests => 13;
+use Test::More tests => 14;
 require_ok( 're' );
 
 # setcolor
@@ -58,6 +58,13 @@ re->unimport('taint');
 ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' );
 re->unimport('eval');
 ok( !( $^H & 0x00200000 ), '... and again' );
+my $reg=qr/(foo|bar|baz|blah)/;
+close STDERR;
+eval"use re Debug=>'ALL'";
+my $ok='foo'=~/$reg/;
+eval"no re Debug=>'ALL'";
+ok( $ok, 'No segv!' );
+
 
 package Term::Cap;
 
diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl
index 05a34ad154..a548fe4410 100644
--- a/ext/re/t/regop.pl
+++ b/ext/re/t/regop.pl
@@ -1,16 +1,20 @@
-use re Debug=>qw(COMPILE EXECUTE);
+use re Debug=>qw(COMPILE EXECUTE OFFSETS);
 my @tests=(
   XY     =>  'X(A|[B]Q||C|D)Y' ,
   foobar =>  '[f][o][o][b][a][r]',
   x  =>  '.[XY].',
   'ABCD' => '(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)',
+  'D:\\dev/perl/ver/28321_/perl.exe'=>
+  '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i',
+  'q'=>'[q]',
 );
 while (@tests) {
     my ($str,$pat)=splice @tests,0,2;
     warn "\n";
+    $pat="/$pat/" if substr($pat,0,1) ne '/';
     # string eval to get the free regex message in the right place.
     eval qq[
-        warn "$str"=~/$pat/ ? "%MATCHED%" : "%FAILED%","\n";
+        warn "$str"=~$pat ? "%MATCHED%" : "%FAILED%","\n";
     ];
     die $@ if $@;
 }
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
index 8bfacda26a..be82dc925d 100644
--- a/ext/re/t/regop.t
+++ b/ext/re/t/regop.t
@@ -12,29 +12,45 @@ BEGIN {
 
 use strict;
 require "./test.pl";
+our $NUM_SECTS;
+chomp(my @strs= grep { !/^\s*\#/ } <DATA>);
+my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 );
+my @tests = grep { /\S/ } split /(?=Compiling REx)/, $out;
+# on debug builds we get an EXECUTING... message in there at the top
+shift @tests
+    if $tests[0] =~ /EXECUTING.../;
 
-chomp(my @strs=grep { !/^\s*\#/ } <DATA>);
-my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1);
-my @tests = grep { /\S/ && !/EXECUTING/ } split /(?=Compiling REx)/,$out;
+plan( @tests + 2 + ( @strs - grep { !$_ or /^---/ } @strs ));
 
-plan(2 + (@strs - grep { !$_ or /^---/ } @strs) + @tests);
+is( scalar @tests, $NUM_SECTS,
+    "Expecting output for $NUM_SECTS patterns" );
+ok( defined $out, 'regop.pl returned something defined' );
 
-my $numtests=4;
-is(scalar @tests, $numtests, "Expecting output for $numtests patterns");
-ok(defined $out,'regop.pl');
-$out||="";
-my $test=1;
-foreach my $testout (@tests) {
-    my ($pattern)=$testout=~/Compiling REx "([^"]+)"/;
-    ok($pattern, "Pattern found for test ".($test++));
+$out ||= "";
+my $test= 1;
+foreach my $testout ( @tests ) {
+    my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/;
+    ok( $pattern, "Pattern for test " . ($test++) );
     while (@strs) {
-        my $str=shift @strs;
-        last if !$str or $str=~/^---/;
-        next if $str=~/^\s*#/;
-        ok($testout=~/\Q$str\E/,"$str: /$pattern/");
+        local $_= shift @strs;
+        last if !$_
+             or /^---/;
+        next if /^\s*#/;
+        s/^\s+//;
+        s/\s+$//;
+        ok( $testout=~/\Q$_\E/, "$_: /$pattern/" );
     }
 }
 
+# The format below is simple. Each line is an exact
+# string that must be found in the output.
+# Lines starting the # are comments.
+# Lines starting with --- are seperators indicating
+# that the tests for this result set are finished.
+# If you add a test make sure you update $NUM_SECTS
+# the commented output is just for legacy/debugging purposes
+BEGIN{ $NUM_SECTS= 6 }
+
 __END__
 #Compiling REx "X(A|[B]Q||C|D)Y"
 #size 34
@@ -146,3 +162,68 @@ Start-Class:A-EGP
 only one match : #6 <ABCD>
 Start:4
 minlen 4
+---
+#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$"
+#size 48 nodes first at 3
+#first at 3
+#rarest char
+# at 0
+#   1: OPEN1(3)
+#   3:   EXACTF <.>(5)
+#   5:   TRIE-EXACTF(45)
+#        [Start:2 Words:14 Chars:54 Unique:18 States:29 Minlen:2 Maxlen:3 Start-Class:BCEJPVWbcejpvw]
+#          <.COM>
+#          ...  yada yada ... (dmq)
+#          <.py>
+#  45: CLOSE1(47)
+#  47: EOL(48)
+#  48: END(0)
+#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3
+#Offsets: [48]
+#        1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
+#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...
+#Found floating substr ""$ at offset 30...
+#Starting position does not contradict /^/m...
+#Does not contradict STCLASS...
+#Guessed: match at offset 26
+#Matching REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$..." against ".exe"
+#Matching stclass "EXACTF <.>" against ".exe"
+#  Setting an EVAL scope, savestack=140
+#  26 <21_/perl> <.exe>    |  1:  OPEN1
+#  26 <21_/perl> <.exe>    |  3:  EXACTF <.>
+#  27 <21_/perl.> <exe>    |  5:  TRIE-EXACTF
+#                                 only one match : #2 <.EXE>
+#  30 <21_/perl.exe> <>    | 45:    CLOSE1
+#  30 <21_/perl.exe> <>    | 47:    EOL
+#  30 <21_/perl.exe> <>    | 48:    END
+#Match successful!
+#POP STATE(1)
+#%MATCHED%
+#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
+%MATCHED%
+floating ""$ at 3..4 (checking floating)
+1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
+stclass "EXACTF <.>" minlen 3
+Found floating substr ""$ at offset 30...
+Does not contradict STCLASS...
+Guessed: match at offset 26
+Matching stclass "EXACTF <.>" against ".exe"
+---
+#Compiling REx "[q]"
+#size 12 nodes Got 100 bytes for offset annotations.
+#first at 1
+#Final program:
+#   1: EXACT <q>(3)
+#   3: END(0)
+#anchored "q" at 0 (checking anchored isall) minlen 1
+#Offsets: [12]
+#        1:1[3] 3:4[0]
+#Guessing start of match, REx "[q]" against "q"...
+#Found anchored substr "q" at offset 0...
+#Guessed: match at offset 0
+#%MATCHED%
+#Freeing REx: "[q]"
+Got 100 bytes for offset annotations.
+Offsets: [12]
+1:1[3] 3:4[0]
+%MATCHED%
author	Yves Orton <demerphq@gmail.com>	2006-06-09 02:56:37 +0200
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>	2006-06-09 14:06:24 +0000
commit	07be1b83a6b2d24b492356181ddf70e1c7917ae3 (patch)
tree	c0050e3a1ae2933d9871008a8bd127ba0b97b33f /ext/re
parent	b23a565decf7acb33d46fc5bb7bed5ad79774efe (diff)
download	perl-07be1b83a6b2d24b492356181ddf70e1c7917ae3.tar.gz