8 files changed, 148 insertions, 80 deletions
diff --git a/ext/re/re.pm b/ext/re/re.pm
index c9ea5804e5..fe64d4ab50 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -23,16 +23,16 @@ re - Perl pragma to alter regular expression behaviour
 	/foo${pat}bar/;		   # disallowed (with or without -T switch)
     }
 
-    use re 'debug';		   # NOT lexically scoped (as others are)
-    /^(.*)$/s;			   # output debugging info during
-    				   #     compile and run time
+    use re 'debug';		   # output debugging info during
+    /^(.*)$/s;			   #     compile and run time
+
 
     use re 'debugcolor';	   # same as 'debug', but with colored output
     ...
 
     use re qw(Debug All);          # Finer tuned debugging options.
-    use re qw(Debug More);         # Similarly not lexically scoped.
-    no re qw(Debug ALL);           # Turn of all re dugging and unload the module.
+    use re qw(Debug More);         
+    no re qw(Debug ALL);           # Turn of all re dugging in this scope
 
 (We use $^X in these examples because it's tainted by default.)
 
@@ -188,9 +188,9 @@ Enable TRIE_MORE and all execute compile and execute options.
 
 =back
 
-The directive C<use re 'debug'> and its equivalents are I<not> lexically
-scoped, as the other directives are.  They have both compile-time and run-time
-effects.
+As of 5.9.5 the directive C<use re 'debug'> and its equivalents are
+lexically scoped, as the other directives are.  However they have both 
+compile-time and run-time effects.
 
 See L<perlmodlib/Pragmatic Modules>.
 
@@ -297,7 +297,7 @@ sub bits {
                 } else {
                     require Carp;
                     Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ",
-                               join(", ",sort { $flags{$a} <=> $flags{$b} } keys %flags ) );
+                               join(", ",sort keys %flags ) );
                 }
             }
             _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS});
diff --git a/ext/re/t/lexical_debug.pl b/ext/re/t/lexical_debug.pl
index c8b7c5bc67..6cdfa49b66 100644
--- a/ext/re/t/lexical_debug.pl
+++ b/ext/re/t/lexical_debug.pl
@@ -20,6 +20,10 @@ no re 'debug';
 
 /fop/ and $count++;
 
+use re 'debug';
+my $var='zoo|zil|zap';
+/($var)/ or $count++;
+
 print "Count=$count\n";
 
 
diff --git a/ext/re/t/lexical_debug.t b/ext/re/t/lexical_debug.t
index affa7c50fc..b6a3dcb8ab 100644
--- a/ext/re/t/lexical_debug.t
+++ b/ext/re/t/lexical_debug.t
@@ -11,20 +11,27 @@ BEGIN {
 }
 
 use strict;
-require "./test.pl";
+
+# must use a BEGIN or the prototypes wont be respected meaning 
+    # tests could pass that shouldn't
+BEGIN { require "./test.pl"; }
 my $out = runperl(progfile => "../ext/re/t/lexical_debug.pl", stderr => 1 );
 
-print "1..7\n";
+print "1..10\n";
 
 # Each pattern will produce an EXACT node with a specific string in 
 # it, so we will look for that. We can't just look for the string
 # alone as the string being matched against contains all of them.
 
-ok( $out =~ /EXACT <foo>/, "Expect 'foo'");
-ok( $out !~ /EXACT <bar>/, "No 'bar'");
-ok( $out =~ /EXACT <baz>/, "Expect 'baz'");
-ok( $out !~ /EXACT <bop>/, "No 'bop'");
-ok( $out =~ /EXACT <fip>/, "Expect 'fip'");
-ok( $out !~ /EXACT <fop>/, "No 'baz'");
-ok( $out =~ /Count=6\n/,"Count is 6");
+ok( $out =~ /EXACT <foo>/, "Expect 'foo'"    );
+ok( $out !~ /EXACT <bar>/, "No 'bar'"        );
+ok( $out =~ /EXACT <baz>/, "Expect 'baz'"    );
+ok( $out !~ /EXACT <bop>/, "No 'bop'"        );
+ok( $out =~ /EXACT <fip>/, "Expect 'fip'"    );
+ok( $out !~ /EXACT <fop>/, "No 'baz'"        );
+ok( $out =~ /<zil>/,       "Got 'zil'"       ); # in a TRIE so no EXACT
+ok( $out =~ /<zoo>/,       "Got 'zoo'"       ); # in a TRIE so no EXACT
+ok( $out =~ /<zap>/,       "Got 'zap'"       ); # in a TRIE so no EXACT
+ok( $out =~ /Count=7\n/,   "Count is 7") 
+    or diag($out);
 
diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl
index 88f9f28cdb..8969335220 100644
--- a/ext/re/t/regop.pl
+++ b/ext/re/t/regop.pl
@@ -1,4 +1,4 @@
-use re Debug=>qw(DUMP EXECUTE OFFSETS);
+use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC);
 my @tests=(
   XY     =>  'X(A|[B]Q||C|D)Y' ,
   foobar =>  '[f][o][o][b][a][r]',
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
index be82dc925d..1ccf8b3eca 100644
--- a/ext/re/t/regop.t
+++ b/ext/re/t/regop.t
@@ -11,7 +11,7 @@ BEGIN {
 }
 
 use strict;
-require "./test.pl";
+BEGIN { require "./test.pl"; }
 our $NUM_SECTS;
 chomp(my @strs= grep { !/^\s*\#/ } <DATA>);
 my $out = runperl(progfile => "../ext/re/t/regop.pl", stderr => 1 );
@@ -31,6 +31,7 @@ my $test= 1;
 foreach my $testout ( @tests ) {
     my ( $pattern )= $testout=~/Compiling REx "([^"]+)"/;
     ok( $pattern, "Pattern for test " . ($test++) );
+    my $diaged;
     while (@strs) {
         local $_= shift @strs;
         last if !$_
@@ -38,7 +39,10 @@ foreach my $testout ( @tests ) {
         next if /^\s*#/;
         s/^\s+//;
         s/\s+$//;
-        ok( $testout=~/\Q$_\E/, "$_: /$pattern/" );
+        ok( $testout=~/\Q$_\E/, "$_: /$pattern/" )
+            or do {
+                !$diaged++ and diag("$_: /$pattern/\n$testout");
+            };
     }
 }
 
@@ -85,7 +89,7 @@ __END__
 #%MATCHED%
 #Freeing REx: "X(A|[B]Q||C|D)Y"
 Compiling REx "X(A|[B]Q||C|D)Y"
-Start-Class:A-D]
+[A-D]
 TRIE-EXACT
 <BQ>
 matched empty string
@@ -95,9 +99,10 @@ Found anchored substr "X" at offset 0...
 Guessed: match at offset 0
 checking floating
 minlen 2
-Words:5
-Unique:5
-States:6
+S:1/6   
+W:5
+L:0/2
+C:5/5
 %MATCHED%
 ---
 #Compiling REx "[f][o][o][b][a][r]"
@@ -132,36 +137,60 @@ Freeing REx: "[f][o][o][b][a][r]"
 %FAILED%
 minlen 3
 ---
-#Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
-#size 20 nodes
-#   1: EXACT <ABC>(3)
-#   3: TRIE-EXACT(20)
-#      [Start:4 Words:6 Chars:24 Unique:7 States:10 Minlen:1 Maxlen:1 Start-Class:A-EGP]
-#        <ABCP>
-#        <ABCG>
-#        <ABCE>
-#        <ABCB>
-#        <ABCA>
-#        <ABCD>
-#  19: TAIL(20)
-#  20: END(0)
-#minlen 4
-#Matching REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
-#  Setting an EVAL scope, savestack=140
-#   0 <> <ABCD>            |  1:  EXACT <ABC>
-#   3 <ABC> <D>            |  3:  TRIE-EXACT
-#                                 only one match : #6 <ABCD>
-#   4 <ABCD> <>            | 20:    END
-#Match successful!
-#POP STATE(1)
-#%MATCHED%
-#Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
+# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
+# Got 164 bytes for offset annotations.
+#     TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4
+#       Char : Match Base  Ofs     A   B   C   P   G   E   D
+#       State|---------------------------------------------------
+#       #   1|       @   7 + 0[    2   .   .   .   .   .   .]
+#       #   2|       @   7 + 1[    .   3   .   .   .   .   .]
+#       #   3|       @   7 + 2[    .   .   4   .   .   .   .]
+#       #   4|       @   A + 0[    9   8   0   5   6   7   A]
+#       #   5| W   1 @   0 
+#       #   6| W   2 @   0 
+#       #   7| W   3 @   0 
+#       #   8| W   4 @   0 
+#       #   9| W   5 @   0 
+#       #   A| W   6 @   0 
+# Final program:
+#    1: EXACT <ABC>(3)
+#    3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+#       <ABCP> 
+#       <ABCG> 
+#       <ABCE> 
+#       <ABCB> 
+#       <ABCA> 
+#       <ABCD> 
+#   20: END(0)
+# anchored "ABC" at 0 (checking anchored) minlen 4 
+# Offsets: [20]
+# 	1:4[3] 3:4[15] 19:32[0] 20:34[0] 
+# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
+# Found anchored substr "ABC" at offset 0...
+# Guessed: match at offset 0
+# Matching REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
+#    0 <> <ABCD>               |  1:EXACT <ABC>(3)
+#    3 <ABC> <D>               |  3:TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+#    3 <ABC> <D>               |    State:    4 Accepted:    0 Charid:  7 CP:  44 After State:    a
+#    4 <ABCD> <>               |    State:    a Accepted:    1 Charid:  6 CP:   0 After State:    0
+#                                   got 1 possible matches
+#                                   only one match left: #6 <ABCD>
+#    4 <ABCD> <>               | 20:END(0)
+# Match successful!
+# %MATCHED%
+# Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
 %MATCHED%
 EXACT <ABC>
-Start-Class:A-EGP
-only one match : #6 <ABCD>
-Start:4
+TRIEC-EXACT
+[A-EGP]
+only one match left: #6 <ABCD>
+S:4/10
+W:6
+L:1/1
+C:24/7
 minlen 4
+(checking anchored)
+anchored "ABC" at 0
 ---
 #Compiling REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|\.pyo|\.pyc|\.pyw|\.py)$"
 #size 48 nodes first at 3
@@ -202,12 +231,12 @@ minlen 4
 #Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
 %MATCHED%
 floating ""$ at 3..4 (checking floating)
-1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
-stclass "EXACTF <.>" minlen 3
+1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
+stclass EXACTF <.> minlen 3
 Found floating substr ""$ at offset 30...
 Does not contradict STCLASS...
 Guessed: match at offset 26
-Matching stclass "EXACTF <.>" against ".exe"
+Matching stclass EXACTF <.> against ".exe"
 ---
 #Compiling REx "[q]"
 #size 12 nodes Got 100 bytes for offset annotations.
diff --git a/pp_ctl.c b/pp_ctl.c
index 8b1159e099..0a59e6234b 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -131,10 +131,19 @@ PP(pp_regcomp)
 	if (!re || !re->precomp || re->prelen != (I32)len ||
 	    memNE(re->precomp, t, len))
 	{
+	    regexp_engine * eng = NULL;
+	    
 	    if (re) {
+	        eng = re->engine;
 	        ReREFCNT_dec(re);
 		PM_SETRE(pm, NULL);	/* crucial if regcomp aborts */
+	    } else if (PL_curcop->cop_hints_hash) {
+	        SV *ptr = Perl_refcounted_he_fetch(aTHX_ PL_curcop->cop_hints_hash, 0,
+				       "regcomp", 7, 0, 0);
+                if (ptr && SvIOK(ptr) && SvIV(ptr))
+                    eng = INT2PTR(regexp_engine*,SvIV(ptr));
 	    }
+	        
 	    if (PL_op->op_flags & OPf_SPECIAL)
 		PL_reginterp_cnt = I32_MAX; /* Mark as safe.  */
 
@@ -146,7 +155,11 @@ PP(pp_regcomp)
 		if (pm->op_pmdynflags & PMdf_UTF8)
 		    t = (char*)bytes_to_utf8((U8*)t, &len);
 	    }
-	    PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm));
+	    if (eng) 
+	        PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm));
+            else
+                PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm));
+                
 	    if (!DO_UTF8(tmpstr) && (pm->op_pmdynflags & PMdf_UTF8))
 		Safefree(t);
 	    PL_reginterp_cnt = 0;	/* XXXX Be extra paranoid - needed
diff --git a/regcomp.c b/regcomp.c
index 86e6865a5f..0a5f2fd963 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -820,7 +820,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
         PerlIO_printf( Perl_debug_log, "%.*s", colwidth, "--------");
     PerlIO_printf( Perl_debug_log, "\n");
 
-    for( state = 1 ; state < trie->laststate ; state++ ) {
+    for( state = 1 ; state < trie->statecount ; state++ ) {
 	const U32 base = trie->states[ state ].trans.base;
 
         PerlIO_printf( Perl_debug_log, "%*s#%4"UVXf"|", (int)depth * 2 + 2,"", (UV)state);
@@ -903,10 +903,13 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc
 	                    (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
 	                    PERL_PV_ESCAPE_FIRSTCHAR 
                     ) ,
-                TRIE_LIST_ITEM(state,charid).forid,
-                (UV)TRIE_LIST_ITEM(state,charid).newstate
-            );
-        }
+                    TRIE_LIST_ITEM(state,charid).forid,
+                    (UV)TRIE_LIST_ITEM(state,charid).newstate
+                );
+                if (!(charid % 10)) 
+                    PerlIO_printf( Perl_debug_log, "\n%*s| ",
+                        (depth * 2) + 14,"");
+            }
         }
         PerlIO_printf( Perl_debug_log, "\n");
     }
@@ -1098,10 +1101,11 @@ is the recommended Unicode-aware way of saying
     *(d++) = uv;
 */
 
-#define TRIE_STORE_REVCHAR                                                    \
+#define TRIE_STORE_REVCHAR                                                 \
     STMT_START {                                                           \
-	SV *tmp = Perl_newSVpvf_nocontext( "%c", (int)uvc );               \
+	SV *tmp = newSVpvs("");                                            \
 	if (UTF) SvUTF8_on(tmp);                                           \
+	Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc );                       \
 	av_push( TRIE_REVCHARMAP(trie), tmp );                             \
     } STMT_END
 
@@ -1393,6 +1397,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         STRLEN transcount = 1;
 
+        DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, 
+            "%*sCompiling trie using list compiler\n",
+            (int)depth * 2 + 2, ""));
+
         Newxz( trie->states, TRIE_CHARCOUNT(trie) + 2, reg_trie_state );
         TRIE_LIST_NEW(1);
         next_alloc = 2;
@@ -1455,13 +1463,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         } /* end second pass */
 
-        trie->laststate = next_alloc;
+        /* next alloc is the NEXT state to be allocated */
+        trie->statecount = next_alloc; 
         Renew( trie->states, next_alloc, reg_trie_state );
 
         /* and now dump it out before we compress it */
         DEBUG_TRIE_COMPILE_MORE_r(
             dump_trie_interim_list(trie,next_alloc,depth+1)
-                    );
+        );
 
         Newxz( trie->trans, transcount ,reg_trie_trans );
         {
@@ -1570,7 +1579,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
            use TRIE_NODENUM() to convert.
 
         */
-
+        DEBUG_TRIE_COMPILE_MORE_r( PerlIO_printf( Perl_debug_log, 
+            "%*sCompiling trie using table compiler\n",
+            (int)depth * 2 + 2, ""));
 
         Newxz( trie->trans, ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1,
               reg_trie_trans );
@@ -1694,7 +1705,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         const U32 laststate = TRIE_NODENUM( next_alloc );
 	U32 state, charid;
         U32 pos = 0, zp=0;
-        trie->laststate = laststate;
+        trie->statecount = laststate;
 
         for ( state = 1 ; state < laststate ; state++ ) {
             U8 flag = 0;
@@ -1731,7 +1742,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
             }
         }
         trie->lasttrans = pos + 1;
-        Renew( trie->states, laststate + 1, reg_trie_state);
+        Renew( trie->states, laststate, reg_trie_state);
         DEBUG_TRIE_COMPILE_MORE_r(
                 PerlIO_printf( Perl_debug_log,
 		    "%*sAlloc: %d Orig: %"IVdf" elements, Final:%"IVdf". Savings of %%%5.2f\n",
@@ -1744,6 +1755,12 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 
         } /* end table compress */
     }
+    DEBUG_TRIE_COMPILE_MORE_r(
+            PerlIO_printf(Perl_debug_log, "%*sStatecount:%"UVxf" Lasttrans:%"UVxf"\n",
+                (int)depth * 2 + 2, "",
+                (UV)trie->statecount,
+                (UV)trie->lasttrans)
+    );
     /* resize the trans array to remove unused space */
     Renew( trie->trans, trie->lasttrans, reg_trie_trans);
 
@@ -1799,12 +1816,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
         trie->startstate= 1;
         if ( trie->bitmap && !trie->widecharmap && !trie->jump  ) {
             U32 state;
-            DEBUG_OPTIMISE_r(
-                PerlIO_printf(Perl_debug_log, "%*sLaststate:%"UVuf"\n",
-                    (int)depth * 2 + 2, "",
-                    (UV)trie->laststate)
-	    );
-            for ( state = 1 ; state < trie->laststate-1 ; state++ ) {
+            for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
                 U32 ofs = 0;
                 I32 idx = -1;
                 U32 count = 0;
@@ -1981,7 +1993,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
     reg_trie_data *trie=(reg_trie_data *)RExC_rx->data->data[ARG(source)];
     U32 *q;
     const U32 ucharcount = trie->uniquecharcount;
-    const U32 numstates = trie->laststate;
+    const U32 numstates = trie->statecount;
     const U32 ubound = trie->lasttrans + ucharcount;
     U32 q_read = 0;
     U32 q_write = 0;
@@ -2001,7 +2013,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
     RExC_rx->data->data[ data_slot ] = (void*)aho;
     aho->trie=trie;
     aho->states=(reg_trie_state *)savepvn((const char*)trie->states,
-        (trie->laststate+1)*sizeof(reg_trie_state));
+        numstates * sizeof(reg_trie_state));
     Newxz( q, numstates, U32);
     Newxz( aho->fail, numstates, U32 );
     aho->refcount = 1;
@@ -2050,7 +2062,9 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source,  regnode
      */
     fail[ 0 ] = fail[ 1 ] = 0;
     DEBUG_TRIE_COMPILE_r({
-        PerlIO_printf(Perl_debug_log, "%*sStclass Failtable: 0", (int)(depth * 2), "");
+        PerlIO_printf(Perl_debug_log, "%*sStclass Failtable (%"UVuf" states): 0", 
+            (int)(depth * 2), "", numstates
+        );
         for( q_read=1; q_read<numstates; q_read++ ) {
             PerlIO_printf(Perl_debug_log, ", %"UVuf, (UV)fail[q_read]);
         }
@@ -3725,7 +3739,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
     HV * const table = GvHV(PL_hintgv);
     if (table) {
         SV **ptr= hv_fetchs(table, "regcomp", FALSE);
-        if (ptr && SvIOK(*ptr)) {
+        if (ptr && SvIOK(*ptr) && SvIV(*ptr)) {
             const regexp_engine *eng=INT2PTR(regexp_engine*,SvIV(*ptr));
             DEBUG_COMPILE_r({
                 PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
@@ -7703,7 +7717,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
             Perl_sv_catpvf(aTHX_ sv,
                 "<S:%"UVuf"/%"IVdf" W:%"UVuf" L:%"UVuf"/%"UVuf" C:%"UVuf"/%"UVuf">",
                 (UV)trie->startstate,
-                (IV)trie->laststate-1,
+                (IV)trie->statecount-1, /* -1 because of the unused 0 element */
                 (UV)trie->wordcount,
                 (UV)trie->minlen,
                 (UV)trie->maxlen,
diff --git a/regcomp.h b/regcomp.h
index 5fb6b14ae6..f7082bf5bb 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -519,7 +519,8 @@ struct _reg_trie_data {
                                         for the node following a given word. */
     U16	            *nextword;       /* optional 1 indexed array to support linked list
                                         of duplicate wordnums */
-    U32             laststate;       /* Build only */
+    U32             statecount;      /* Build only - number of states in the states array 
+                                        (including the unused zero state) */
     U32             wordcount;       /* Build only */
 #ifdef DEBUGGING
     STRLEN          charcount;       /* Build only */