diff options
author | Yves Orton <demerphq@gmail.com> | 2006-06-09 02:56:37 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2006-06-09 14:06:24 +0000 |
commit | 07be1b83a6b2d24b492356181ddf70e1c7917ae3 (patch) | |
tree | c0050e3a1ae2933d9871008a8bd127ba0b97b33f /ext/re | |
parent | b23a565decf7acb33d46fc5bb7bed5ad79774efe (diff) | |
download | perl-07be1b83a6b2d24b492356181ddf70e1c7917ae3.tar.gz |
Re: [PATCH] Better version of the Aho-Corasick patch and lots of benchmarks.
Message-ID: <9b18b3110606081556t779de698r82f361d82a05fbc8@mail.gmail.com>
(with tweaks)
p4raw-id: //depot/perl@28373
Diffstat (limited to 'ext/re')
-rw-r--r-- | ext/re/t/re.t | 9 | ||||
-rw-r--r-- | ext/re/t/regop.pl | 8 | ||||
-rw-r--r-- | ext/re/t/regop.t | 113 |
3 files changed, 111 insertions, 19 deletions
diff --git a/ext/re/t/re.t b/ext/re/t/re.t index 2a1923ea79..5f09966d81 100644 --- a/ext/re/t/re.t +++ b/ext/re/t/re.t @@ -12,7 +12,7 @@ BEGIN { use strict; -use Test::More tests => 13; +use Test::More tests => 14; require_ok( 're' ); # setcolor @@ -58,6 +58,13 @@ re->unimport('taint'); ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' ); re->unimport('eval'); ok( !( $^H & 0x00200000 ), '... and again' ); +my $reg=qr/(foo|bar|baz|blah)/; +close STDERR; +eval"use re Debug=>'ALL'"; +my $ok='foo'=~/$reg/; +eval"no re Debug=>'ALL'"; +ok( $ok, 'No segv!' ); + package Term::Cap; diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl index 05a34ad154..a548fe4410 100644 --- a/ext/re/t/regop.pl +++ b/ext/re/t/regop.pl @@ -1,16 +1,20 @@ -use re Debug=>qw(COMPILE EXECUTE); +use re Debug=>qw(COMPILE EXECUTE OFFSETS); my @tests=( XY => 'X(A|[B]Q||C|D)Y' , foobar => '[f][o][o][b][a][r]', x => '.[XY].', 'ABCD' => '(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)', + 'D:\\dev/perl/ver/28321_/perl.exe'=> + '/(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\.WSF|\\.WSH|\\.pyo|\\.pyc|\\.pyw|\\.py)$/i', + 'q'=>'[q]', ); while (@tests) { my ($str,$pat)=splice @tests,0,2; warn "\n"; + $pat="/$pat/" if substr($pat,0,1) ne '/'; # string eval to get the free regex message in the right place. eval qq[ - warn "$str"=~/$pat/ ? "%MATCHED%" : "%FAILED%","\n"; + warn "$str"=~$pat ? "%MATCHED%" : "%FAILED%","\n"; ]; die $@ if $@; } diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t index 8bfacda26a..be82dc925d 100644 --- a/ext/re/t/regop.t +++ b/ext/re/t/regop.t @@ -12,29 +12,45 @@ BEGIN { use strict; require "./test.pl"; +our $NUM_SECTS; +chomp(my @strs= grep { !/^\s*\#/ } <DATA>); +my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 ); +my @tests = grep { /\S/ } split /(?=Compiling REx)/, $out; +# on debug builds we get an EXECUTING... message in there at the top +shift @tests + if $tests[0] =~ /EXECUTING.../; -chomp(my @strs=grep { !/^\s*\#/ } <DATA>); -my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1); -my @tests = grep { /\S/ && !/EXECUTING/ } split /(?=Compiling REx)/,$out; +plan( @tests + 2 + ( @strs - grep { !$_ or /^---/ } @strs )); -plan(2 + (@strs - grep { !$_ or /^---/ } @strs) + @tests); +is( scalar @tests, $NUM_SECTS, + "Expecting output for $NUM_SECTS patterns" ); +ok( defined $out, 'regop.pl returned something defined' ); -my $numtests=4; -is(scalar @tests, $numtests, "Expecting output for $numtests patterns"); -ok(defined $out,'regop.pl'); -$out||=""; -my $test=1; -foreach my $testout (@tests) { - my ($pattern)=$testout=~/Compiling REx "([^"]+)"/; - ok($pattern, "Pattern found for test ".($test++)); +$out ||= ""; +my $test= 1; +foreach my $testout ( @tests ) { + my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/; + ok( $pattern, "Pattern for test " . ($test++) ); while (@strs) { - my $str=shift @strs; - last if !$str or $str=~/^---/; - next if $str=~/^\s*#/; - ok($testout=~/\Q$str\E/,"$str: /$pattern/"); + local $_= shift @strs; + last if !$_ + or /^---/; + next if /^\s*#/; + s/^\s+//; + s/\s+$//; + ok( $testout=~/\Q$_\E/, "$_: /$pattern/" ); } } +# The format below is simple. Each line is an exact +# string that must be found in the output. +# Lines starting the # are comments. +# Lines starting with --- are seperators indicating +# that the tests for this result set are finished. +# If you add a test make sure you update $NUM_SECTS +# the commented output is just for legacy/debugging purposes +BEGIN{ $NUM_SECTS= 6 } + __END__ #Compiling REx "X(A|[B]Q||C|D)Y" #size 34 @@ -146,3 +162,68 @@ Start-Class:A-EGP only one match : #6 <ABCD> Start:4 minlen 4 +--- +#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$" +#size 48 nodes first at 3 +#first at 3 +#rarest char +# at 0 +# 1: OPEN1(3) +# 3: EXACTF <.>(5) +# 5: TRIE-EXACTF(45) +# [Start:2 Words:14 Chars:54 Unique:18 States:29 Minlen:2 Maxlen:3 Start-Class:BCEJPVWbcejpvw] +# <.COM> +# ... yada yada ... (dmq) +# <.py> +# 45: CLOSE1(47) +# 47: EOL(48) +# 48: END(0) +#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3 +#Offsets: [48] +# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0] +#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"... +#Found floating substr ""$ at offset 30... +#Starting position does not contradict /^/m... +#Does not contradict STCLASS... +#Guessed: match at offset 26 +#Matching REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$..." against ".exe" +#Matching stclass "EXACTF <.>" against ".exe" +# Setting an EVAL scope, savestack=140 +# 26 <21_/perl> <.exe> | 1: OPEN1 +# 26 <21_/perl> <.exe> | 3: EXACTF <.> +# 27 <21_/perl.> <exe> | 5: TRIE-EXACTF +# only one match : #2 <.EXE> +# 30 <21_/perl.exe> <> | 45: CLOSE1 +# 30 <21_/perl.exe> <> | 47: EOL +# 30 <21_/perl.exe> <> | 48: END +#Match successful! +#POP STATE(1) +#%MATCHED% +#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."...... +%MATCHED% +floating ""$ at 3..4 (checking floating) +1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0] +stclass "EXACTF <.>" minlen 3 +Found floating substr ""$ at offset 30... +Does not contradict STCLASS... +Guessed: match at offset 26 +Matching stclass "EXACTF <.>" against ".exe" +--- +#Compiling REx "[q]" +#size 12 nodes Got 100 bytes for offset annotations. +#first at 1 +#Final program: +# 1: EXACT <q>(3) +# 3: END(0) +#anchored "q" at 0 (checking anchored isall) minlen 1 +#Offsets: [12] +# 1:1[3] 3:4[0] +#Guessing start of match, REx "[q]" against "q"... +#Found anchored substr "q" at offset 0... +#Guessed: match at offset 0 +#%MATCHED% +#Freeing REx: "[q]" +Got 100 bytes for offset annotations. +Offsets: [12] +1:1[3] 3:4[0] +%MATCHED% |