summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/re/re.pm18
-rw-r--r--ext/re/t/lexical_debug.pl4
-rw-r--r--ext/re/t/lexical_debug.t25
-rw-r--r--ext/re/t/regop.pl2
-rw-r--r--ext/re/t/regop.t101
-rw-r--r--pp_ctl.c15
-rw-r--r--regcomp.c60
-rw-r--r--regcomp.h3
8 files changed, 148 insertions, 80 deletions
diff --git a/ext/re/re.pm b/ext/re/re.pm
index c9ea5804e5..fe64d4ab50 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -23,16 +23,16 @@ re - Perl pragma to alter regular expression behaviour
/foo${pat}bar/; # disallowed (with or without -T switch)
}
- use re 'debug'; # NOT lexically scoped (as others are)
- /^(.*)$/s; # output debugging info during
- # compile and run time
+ use re 'debug'; # output debugging info during
+ /^(.*)$/s; # compile and run time
+
use re 'debugcolor'; # same as 'debug', but with colored output
...
use re qw(Debug All); # Finer tuned debugging options.
- use re qw(Debug More); # Similarly not lexically scoped.
- no re qw(Debug ALL); # Turn of all re dugging and unload the module.
+ use re qw(Debug More);
+ no re qw(Debug ALL); # Turn of all re dugging in this scope
(We use $^X in these examples because it's tainted by default.)
@@ -188,9 +188,9 @@ Enable TRIE_MORE and all execute compile and execute options.
=back
-The directive C<use re 'debug'> and its equivalents are I<not> lexically
-scoped, as the other directives are. They have both compile-time and run-time
-effects.
+As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
+lexically scoped, as the other directives are. However they have both
+compile-time and run-time effects.
See L<perlmodlib/Pragmatic Modules>.
@@ -297,7 +297,7 @@ sub bits {
} else {
require Carp;
Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
- join(", ",sort { $flags{$a} <=> $flags{$b} } keys %flags ) );
+ join(", ",sort keys %flags ) );
}
}
_load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
diff --git a/ext/re/t/lexical_debug.pl b/ext/re/t/lexical_debug.pl
index c8b7c5bc67..6cdfa49b66 100644
--- a/ext/re/t/lexical_debug.pl
+++ b/ext/re/t/lexical_debug.pl
@@ -20,6 +20,10 @@ no re 'debug';
/fop/ and $count++;
+use re 'debug';
+my $var='zoo|zil|zap';
+/($var)/ or $count++;
+
print "Count=$count\n";
diff --git a/ext/re/t/lexical_debug.t b/ext/re/t/lexical_debug.t
index affa7c50fc..b6a3dcb8ab 100644
--- a/ext/re/t/lexical_debug.t
+++ b/ext/re/t/lexical_debug.t
@@ -11,20 +11,27 @@ BEGIN {
}
use strict;
-require "./test.pl";
+
+# must use a BEGIN or the prototypes wont be respected meaning
+ # tests could pass that shouldn't
+BEGIN { require "./test.pl"; }
my $out = runperl(progfile => "../ext/re/t/lexical_debug.pl", stderr => 1 );
-print "1..7\n";
+print "1..10\n";
# Each pattern will produce an EXACT node with a specific string in
# it, so we will look for that. We can't just look for the string
# alone as the string being matched against contains all of them.
-ok( $out =~ /EXACT <foo>/, "Expect 'foo'");
-ok( $out !~ /EXACT <bar>/, "No 'bar'");
-ok( $out =~ /EXACT <baz>/, "Expect 'baz'");
-ok( $out !~ /EXACT <bop>/, "No 'bop'");
-ok( $out =~ /EXACT <fip>/, "Expect 'fip'");
-ok( $out !~ /EXACT <fop>/, "No 'baz'");
-ok( $out =~ /Count=6\n/,"Count is 6");
+ok( $out =~ /EXACT <foo>/, "Expect 'foo'" );
+ok( $out !~ /EXACT <bar>/, "No 'bar'" );
+ok( $out =~ /EXACT <baz>/, "Expect 'baz'" );
+ok( $out !~ /EXACT <bop>/, "No 'bop'" );
+ok( $out =~ /EXACT <fip>/, "Expect 'fip'" );
+ok( $out !~ /EXACT <fop>/, "No 'baz'" );
+ok( $out =~ /<zil>/, "Got 'zil'" ); # in a TRIE so no EXACT
+ok( $out =~ /<zoo>/, "Got 'zoo'" ); # in a TRIE so no EXACT
+ok( $out =~ /<zap>/, "Got 'zap'" ); # in a TRIE so no EXACT
+ok( $out =~ /Count=7\n/, "Count is 7")
+ or diag($out);
diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl
index 88f9f28cdb..8969335220 100644
--- a/ext/re/t/regop.pl
+++ b/ext/re/t/regop.pl
@@ -1,4 +1,4 @@
-use re Debug=>qw(DUMP EXECUTE OFFSETS);
+use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC);
my @tests=(
XY => 'X(A|[B]Q||C|D)Y' ,
foobar => '[f][o][o][b][a][r]',
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
index be82dc925d..1ccf8b3eca 100644
--- a/ext/re/t/regop.t
+++ b/ext/re/t/regop.t
@@ -11,7 +11,7 @@ BEGIN {
}
use strict;
-require "./test.pl";
+BEGIN { require "./test.pl"; }
our $NUM_SECTS;
chomp(my @strs= grep { !/^\s*\#/ } <DATA>);
my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 );
@@ -31,6 +31,7 @@ my $test= 1;
foreach my $testout ( @tests ) {
my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/;
ok( $pattern, "Pattern for test " . ($test++) );
+ my $diaged;
while (@strs) {
local $_= shift @strs;
last if !$_
@@ -38,7 +39,10 @@ foreach my $testout ( @tests ) {
next if /^\s*#/;
s/^\s+//;
s/\s+$//;
- ok( $testout=~/\Q$_\E/, "$_: /$pattern/" );
+ ok( $testout=~/\Q$_\E/, "$_: /$pattern/" )
+ or do {
+ !$diaged++ and diag("$_: /$pattern/\n$testout");
+ };
}
}
@@ -85,7 +89,7 @@ __END__
#%MATCHED%
#Freeing REx: "X(A|[B]Q||C|D)Y"
Compiling REx "X(A|[B]Q||C|D)Y"
-Start-Class:A-D]
+[A-D]
TRIE-EXACT
<BQ>
matched empty string
@@ -95,9 +99,10 @@ Found anchored substr "X" at offset 0...
Guessed: match at offset 0
checking floating
minlen 2
-Words:5
-Unique:5
-States:6
+S:1/6
+W:5
+L:0/2
+C:5/5
%MATCHED%
---
#Compiling REx "[f][o][o][b][a][r]"
@@ -132,36 +137,60 @@ Freeing REx: "[f][o][o][b][a][r]"
%FAILED%
minlen 3
---
-#Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
-#size 20 nodes
-# 1: EXACT <ABC>(3)
-# 3: TRIE-EXACT(20)
-# [Start:4 Words:6 Chars:24 Unique:7 States:10 Minlen:1 Maxlen:1 Start-Class:A-EGP]
-# <ABCP>
-# <ABCG>
-# <ABCE>
-# <ABCB>
-# <ABCA>
-# <ABCD>
-# 19: TAIL(20)
-# 20: END(0)
-#minlen 4
-#Matching REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
-# Setting an EVAL scope, savestack=140
-# 0 <> <ABCD> | 1: EXACT <ABC>
-# 3 <ABC> <D> | 3: TRIE-EXACT
-# only one match : #6 <ABCD>
-# 4 <ABCD> <> | 20: END
-#Match successful!
-#POP STATE(1)
-#%MATCHED%
-#Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
+# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
+# Got 164 bytes for offset annotations.
+# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4
+# Char : Match Base Ofs A B C P G E D
+# State|---------------------------------------------------
+# # 1| @ 7 + 0[ 2 . . . . . .]
+# # 2| @ 7 + 1[ . 3 . . . . .]
+# # 3| @ 7 + 2[ . . 4 . . . .]
+# # 4| @ A + 0[ 9 8 0 5 6 7 A]
+# # 5| W 1 @ 0
+# # 6| W 2 @ 0
+# # 7| W 3 @ 0
+# # 8| W 4 @ 0
+# # 9| W 5 @ 0
+# # A| W 6 @ 0
+# Final program:
+# 1: EXACT <ABC>(3)
+# 3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+# <ABCP>
+# <ABCG>
+# <ABCE>
+# <ABCB>
+# <ABCA>
+# <ABCD>
+# 20: END(0)
+# anchored "ABC" at 0 (checking anchored) minlen 4
+# Offsets: [20]
+# 1:4[3] 3:4[15] 19:32[0] 20:34[0]
+# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
+# Found anchored substr "ABC" at offset 0...
+# Guessed: match at offset 0
+# Matching REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
+# 0 <> <ABCD> | 1:EXACT <ABC>(3)
+# 3 <ABC> <D> | 3:TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+# 3 <ABC> <D> | State: 4 Accepted: 0 Charid: 7 CP: 44 After State: a
+# 4 <ABCD> <> | State: a Accepted: 1 Charid: 6 CP: 0 After State: 0
+# got 1 possible matches
+# only one match left: #6 <ABCD>
+# 4 <ABCD> <> | 20:END(0)
+# Match successful!
+# %MATCHED%
+# Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
%MATCHED%
EXACT <ABC>
-Start-Class:A-EGP
-only one match : #6 <ABCD>
-Start:4
+TRIEC-EXACT
+[A-EGP]
+only one match left: #6 <ABCD>
+S:4/10
+W:6
+L:1/1
+C:24/7
minlen 4
+(checking anchored)
+anchored "ABC" at 0
---
#Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$"
#size 48 nodes first at 3
@@ -202,12 +231,12 @@ minlen 4
#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
%MATCHED%
floating ""$ at 3..4 (checking floating)
-1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
-stclass "EXACTF <.>" minlen 3
+1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
+stclass EXACTF <.> minlen 3
Found floating substr ""$ at offset 30...
Does not contradict STCLASS...
Guessed: match at offset 26
-Matching stclass "EXACTF <.>" against ".exe"
+Matching stclass EXACTF <.> against ".exe"
---
#Compiling REx "[q]"
#size 12 nodes Got 100 bytes for offset annotations.
diff --git a/pp_ctl.c b/pp_ctl.c
index 8b1159e099..0a59e6234b 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -131,10 +131,19 @@ PP(pp_regcomp)
if (!re || !re->precomp || re->prelen != (I32)len ||
memNE(re->precomp, t, len))
{
+ regexp_engine * eng = NULL;
+
if (re) {
+ eng = re->engine;
ReREFCNT_dec(re);
PM_SETRE(pm, NULL); /* crucial if regcomp aborts */
+ } else if (PL_curcop->cop_hints_hash) {
+ SV *ptr = Perl_refcounted_he_fetch(aTHX_ PL_curcop->cop_hints_hash, 0,
+ "regcomp", 7, 0, 0);
+ if (ptr && SvIOK(ptr) && SvIV(ptr))
+ eng = INT2PTR(regexp_engine*,SvIV(ptr));
}
+
if (PL_op->op_flags & OPf_SPECIAL)
PL_reginterp_cnt = I32_MAX; /* Mark as safe. */
@@ -146,7 +155,11 @@ PP(pp_regcomp)
if (pm->op_pmdynflags & PMdf_UTF8)
t = (char*)bytes_to_utf8((U8*)t, &len);
}
- PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm));
+ if (eng)
+ PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm));
+ else
+ PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm));
+
if (!DO_UTF8(tmpstr) && (pm->op_pmdynflags & PMdf_UTF8))
Safefree(t);
PL_reginterp_cnt = 0; /* XXXX Be extra paranoid - needed
diff --git a/regcomp.c b/regcomp.c
index 86e6865a5f..0a5f2fd963 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -820,7 +820,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------");
PerlIO_printf( Perl_debug_log, "\n");
- for( state = 1 ; state < trie->laststate ; state++ ) {
+ for( state = 1 ; state < trie->statecount ; state++ ) {
const U32 base = trie->states[ state ].trans.base;
PerlIO_printf( Perl_debug_log, "%*s#%4"UVXf"|", (int)depth * 2 + 2,"", (UV)state);
@@ -903,10 +903,13 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc
(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
PERL_PV_ESCAPE_FIRSTCHAR
) ,
- TRIE_LIST_ITEM(state,charid).forid,
- (UV)TRIE_LIST_ITEM(state,charid).newstate
- );
- }
+ TRIE_LIST_ITEM(state,charid).forid,
+ (UV)TRIE_LIST_ITEM(state,charid).newstate
+ );
+ if (!(charid % 10))
+ PerlIO_printf( Perl_debug_log, "\n%*s| ",
+ (depth * 2) + 14,"");
+ }
}
PerlIO_printf( Perl_debug_log, "\n");
}
@@ -1098,10 +1101,11 @@ is the recommended Unicode-aware way of saying
*(d++) = uv;
*/
-#define TRIE_STORE_REVCHAR \
+#define TRIE_STORE_REVCHAR \
STMT_START { \
- SV *tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc ); \
+ SV *tmp = newSVpvs(""); \
if (UTF) SvUTF8_on(tmp); \
+ Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc ); \
av_push( TRIE_REVCHARMAP(trie), tmp ); \
} STMT_END
@@ -1393,6 +1397,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
STRLEN transcount = 1;
+ DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
+ "%*sCompiling trie using list compiler\n",
+ (int)depth * 2 + 2, ""));
+
Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state );
TRIE_LIST_NEW(1);
next_alloc = 2;
@@ -1455,13 +1463,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
} /* end second pass */
- trie->laststate = next_alloc;
+ /* next alloc is the NEXT state to be allocated */
+ trie->statecount = next_alloc;
Renew( trie->states, next_alloc, reg_trie_state );
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(
dump_trie_interim_list(trie,next_alloc,depth+1)
- );
+ );
Newxz( trie->trans, transcount ,reg_trie_trans );
{
@@ -1570,7 +1579,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
use TRIE_NODENUM() to convert.
*/
-
+ DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log,
+ "%*sCompiling trie using table compiler\n",
+ (int)depth * 2 + 2, ""));
Newxz( trie->trans, ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1,
reg_trie_trans );
@@ -1694,7 +1705,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
const U32 laststate = TRIE_NODENUM( next_alloc );
U32 state, charid;
U32 pos = 0, zp=0;
- trie->laststate = laststate;
+ trie->statecount = laststate;
for ( state = 1 ; state < laststate ; state++ ) {
U8 flag = 0;
@@ -1731,7 +1742,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
}
}
trie->lasttrans = pos + 1;
- Renew( trie->states, laststate + 1, reg_trie_state);
+ Renew( trie->states, laststate, reg_trie_state);
DEBUG_TRIE_COMPILE_MORE_r(
PerlIO_printf( Perl_debug_log,
"%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
@@ -1744,6 +1755,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
} /* end table compress */
}
+ DEBUG_TRIE_COMPILE_MORE_r(
+ PerlIO_printf(Perl_debug_log, "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n",
+ (int)depth * 2 + 2, "",
+ (UV)trie->statecount,
+ (UV)trie->lasttrans)
+ );
/* resize the trans array to remove unused space */
Renew( trie->trans, trie->lasttrans, reg_trie_trans);
@@ -1799,12 +1816,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
trie->startstate= 1;
if ( trie->bitmap && !trie->widecharmap && !trie->jump ) {
U32 state;
- DEBUG_OPTIMISE_r(
- PerlIO_printf(Perl_debug_log, "%*sLaststate:%"UVuf"\n",
- (int)depth * 2 + 2, "",
- (UV)trie->laststate)
- );
- for ( state = 1 ; state < trie->laststate-1 ; state++ ) {
+ for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
U32 ofs = 0;
I32 idx = -1;
U32 count = 0;
@@ -1981,7 +1993,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode
reg_trie_data *trie=(reg_trie_data *)RExC_rx->data->data[ARG(source)];
U32 *q;
const U32 ucharcount = trie->uniquecharcount;
- const U32 numstates = trie->laststate;
+ const U32 numstates = trie->statecount;
const U32 ubound = trie->lasttrans + ucharcount;
U32 q_read = 0;
U32 q_write = 0;
@@ -2001,7 +2013,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode
RExC_rx->data->data[ data_slot ] = (void*)aho;
aho->trie=trie;
aho->states=(reg_trie_state *)savepvn((const char*)trie->states,
- (trie->laststate+1)*sizeof(reg_trie_state));
+ numstates * sizeof(reg_trie_state));
Newxz( q, numstates, U32);
Newxz( aho->fail, numstates, U32 );
aho->refcount = 1;
@@ -2050,7 +2062,9 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode
*/
fail[ 0 ] = fail[ 1 ] = 0;
DEBUG_TRIE_COMPILE_r({
- PerlIO_printf(Perl_debug_log, "%*sStclass Failtable: 0", (int)(depth * 2), "");
+ PerlIO_printf(Perl_debug_log, "%*sStclass Failtable (%"UVuf" states): 0",
+ (int)(depth * 2), "", numstates
+ );
for( q_read=1; q_read<numstates; q_read++ ) {
PerlIO_printf(Perl_debug_log, ", %"UVuf, (UV)fail[q_read]);
}
@@ -3725,7 +3739,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
HV * const table = GvHV(PL_hintgv);
if (table) {
SV **ptr= hv_fetchs(table, "regcomp", FALSE);
- if (ptr && SvIOK(*ptr)) {
+ if (ptr && SvIOK(*ptr) && SvIV(*ptr)) {
const regexp_engine *eng=INT2PTR(regexp_engine*,SvIV(*ptr));
DEBUG_COMPILE_r({
PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
@@ -7703,7 +7717,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
Perl_sv_catpvf(aTHX_ sv,
"<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
(UV)trie->startstate,
- (IV)trie->laststate-1,
+ (IV)trie->statecount-1, /* -1 because of the unused 0 element */
(UV)trie->wordcount,
(UV)trie->minlen,
(UV)trie->maxlen,
diff --git a/regcomp.h b/regcomp.h
index 5fb6b14ae6..f7082bf5bb 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -519,7 +519,8 @@ struct _reg_trie_data {
for the node following a given word. */
U16 *nextword; /* optional 1 indexed array to support linked list
of duplicate wordnums */
- U32 laststate; /* Build only */
+ U32 statecount; /* Build only - number of states in the states array
+ (including the unused zero state) */
U32 wordcount; /* Build only */
#ifdef DEBUGGING
STRLEN charcount; /* Build only */