summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-04-10 16:10:28 +0100
committerDavid Mitchell <davem@iabyn.com>2013-04-12 11:29:55 +0100
commit4f3e2518850e12605980071a25c189c30710bcfd (patch)
treec6456af41e20ed20741f24701229cb38bb4582d1
parent3a54fd60e3d777bf86f4eec331b79a61c23d8393 (diff)
downloadperl-4f3e2518850e12605980071a25c189c30710bcfd.tar.gz
fix runtime /(?{})/ with overload::constant qr
There are two issues fixed here. First, when a pattern has a run-time code-block included, such as $code = '(?{...})' /foo$code/ the mechanism used to parse those run-time blocks: of feeding the resultant pattern into a call to eval_sv() with the string qr'foo(?{...})' and then extracting out any resulting opcode trees from the returned qr object -- suffered from the re-parsed qr'..' also being subject to overload:constant qr processing, which could result in Bad Things happening. Since we now have the PL_parser->lex_re_reparsing flag in scope throughout the parsing of the pattern, this is easy to detect and avoid. The second issue is a mechanism to avoid recursion when getting false positives in S_has_runtime_code() for code like '[(?{})]'. For patterns like this, we would suspect that the pattern may have code (even though it doesn't), so feed it into qr'...' and reparse, and again it looks like runtime code, so feed it in, rinse and repeat. The thing to stop recursion was when we saw a qr with a single OP_CONST string, we assumed it couldn't have any run-time component, and thus no run-time code blocks. However, this broke qr/foo/ in the presence of overload::constant qr overloading, which could convert foo into a string containing code blocks. The fix for this is to change the recursion-avoidance mechanism (in a way which also turns out to be simpler too). Basically, when we fake up a qr'...' and eval it, we turn off any 'use re eval' in scope: its not needed, since we know the .... will be a constant string without any overloading. Then we use the lack of 'use re eval' in scope to skip calling S_has_runtime_code() and just assume that the code has no run-time patterns (if it has, then eventually the regex parser will rightly complain about 'Eval-group not allowed at runtime'). This commit also adds some fairly comprehensive tests for this.
-rw-r--r--pp_ctl.c9
-rw-r--r--regcomp.c37
-rw-r--r--t/re/overload.t75
-rw-r--r--toke.c4
4 files changed, 100 insertions, 25 deletions
diff --git a/pp_ctl.c b/pp_ctl.c
index 721a8a941f..bdbd75a819 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -3422,6 +3422,15 @@ S_doeval(pTHX_ int gimme, CV* outside, U32 seq, HV *hh)
else {
PL_hints = saveop->op_private & OPpEVAL_COPHH
? oldcurcop->cop_hints : saveop->op_targ;
+
+ /* making 'use re eval' not be in scope when compiling the
+ * qr/mabye_has_runtime_code_block/ ensures that we don't get
+ * infinite recursion when S_has_runtime_code() gives a false
+ * positive: the second time round, HINT_RE_EVAL isn't set so we
+ * don't bother calling S_has_runtime_code() */
+ if (PL_in_eval & EVAL_RE_REPARSING)
+ PL_hints &= ~HINT_RE_EVAL;
+
if (hh) {
/* SAVEHINTS created a new HV in PL_hintgv, which we need to GC */
SvREFCNT_dec(GvHV(PL_hintgv));
diff --git a/regcomp.c b/regcomp.c
index 0853815a58..ee843e3d12 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4877,19 +4877,12 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
* False positives are allowed */
static bool
-S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
- U32 pm_flags, char *pat, STRLEN plen)
+S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
+ char *pat, STRLEN plen)
{
int n = 0;
STRLEN s;
- /* avoid infinitely recursing when we recompile the pattern parcelled up
- * as qr'...'. A single constant qr// string can't have have any
- * run-time component in it, and thus, no runtime code. (A non-qr
- * string, however, can, e.g. $x =~ '(?{})') */
- if ((pm_flags & PMf_IS_QR) && expr && expr->op_type == OP_CONST)
- return 0;
-
for (s = 0; s < plen; s++) {
if (n < pRExC_state->num_code_blocks
&& s == pRExC_state->code_blocks[n].start)
@@ -5626,6 +5619,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
}
+ if ((pm_flags & PMf_USE_RE_EVAL)
+ /* this second condition covers the non-regex literal case,
+ * i.e. $foo =~ '(?{})'. */
+ || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL))
+ )
+ runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen);
+
/* return old regex if pattern hasn't changed */
/* XXX: note in the below we have to check the flags as well as the pattern.
*
@@ -5639,23 +5639,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&& ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) )
&& RX_PRECOMP(old_re)
&& RX_PRELEN(old_re) == plen
- && memEQ(RX_PRECOMP(old_re), exp, plen))
+ && memEQ(RX_PRECOMP(old_re), exp, plen)
+ && !runtime_code /* with runtime code, always recompile */ )
{
- /* with runtime code, always recompile */
- runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
- exp, plen);
- if (!runtime_code) {
- Safefree(pRExC_state->code_blocks);
- return old_re;
- }
+ Safefree(pRExC_state->code_blocks);
+ return old_re;
}
- else if ((pm_flags & PMf_USE_RE_EVAL)
- /* this second condition covers the non-regex literal case,
- * i.e. $foo =~ '(?{})'. */
- || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL))
- )
- runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
- exp, plen);
rx_flags = orig_rx_flags;
diff --git a/t/re/overload.t b/t/re/overload.t
index 34b65b8c9e..38d5140e0e 100644
--- a/t/re/overload.t
+++ b/t/re/overload.t
@@ -85,6 +85,19 @@ no warnings 'syntax';
}
+ {
+ # returns a string
+
+ package OL_STR;
+ use overload q{""} => sub {
+ my $re = shift;
+ return qq/(?{ \$OL_STR::count++ })$$re/;
+ },
+ fallback => 1;
+
+ }
+
+
my $qr;
$::CONST_QR_CLASS = 'OL_QR';
@@ -99,6 +112,68 @@ no warnings 'syntax';
ok("foo" =~ $qr, "compile-time, OL_QR, multiple constant segments");
is($OL_QR::count, 2, "qr2 flag");
+
+ # test /foo.../ when foo is given string overloading,
+ # for various permutations of '...'
+
+ $::CONST_QR_CLASS = 'OL_STR';
+
+ for my $has_re_eval (0, 1) {
+ for my $has_qr (0, 1) {
+ for my $has_code (0, 1) {
+ for my $has_runtime (0, 1) {
+ for my $has_runtime_code (0, 1) {
+ if ($has_runtime_code) {
+ next unless $has_runtime;
+ }
+ note( "re_eval=$has_re_eval "
+ . "qr=$has_qr "
+ . "code=$has_code "
+ . "runtime=$has_runtime "
+ . "runtime_code=$has_runtime_code");
+ my $eval = '';
+ $eval .= q{use re 'eval'; } if $has_re_eval;
+ $eval .= q{$match = $str =~ };
+ $eval .= q{qr} if $has_qr;
+ $eval .= q{/^abc};
+ $eval .= q{(?{$blocks++})} if $has_code;
+ $eval .= q{$runtime} if $has_runtime;
+ $eval .= q{/; 1;};
+
+ my $runtime = q{def};
+ $runtime .= q{(?{$run_blocks++})} if $has_runtime_code;
+
+ my $blocks = 0;
+ my $run_blocks = 0;
+ my $match;
+ my $str = "abc";
+ $str .= "def" if $runtime;
+
+ my $result = eval $eval;
+ my $err = $@;
+ $result = $result ? 1 : 0;
+
+ if (!$has_re_eval) {
+ is($result, 0, "EVAL: $eval");
+ like($err, qr/Eval-group not allowed at runtime/,
+ "\$\@: $eval");
+ next;
+ }
+
+ is($result, 1, "EVAL: $eval");
+ diag("\$@=[$err]") unless $result;
+
+ is($match, 1, "MATCH: $eval");
+ is($blocks, $has_code, "blocks");
+ is($run_blocks, $has_runtime_code, "run_blocks");
+
+ }
+ }
+ }
+ }
+ }
+
+
undef $::CONST_QR_CLASS;
}
diff --git a/toke.c b/toke.c
index e97f3fa774..43adb3e4b8 100644
--- a/toke.c
+++ b/toke.c
@@ -3755,7 +3755,9 @@ S_scan_const(pTHX_ char *start)
/* return the substring (via pl_yylval) only if we parsed anything */
if (s > PL_bufptr) {
SvREFCNT_inc_simple_void_NN(sv);
- if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
+ if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
+ && ! PL_parser->lex_re_reparsing)
+ {
const char *const key = PL_lex_inpat ? "qr" : "q";
const STRLEN keylen = PL_lex_inpat ? 2 : 1;
const char *type;