summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-04-12 11:30:25 +0100
committerDavid Mitchell <davem@iabyn.com>2013-04-12 11:30:25 +0100
commite501306eca0fea1cc9fc53e2eb024ad37e85ce59 (patch)
treec6456af41e20ed20741f24701229cb38bb4582d1
parent335e2ee52f38eaea7888c33d9c4f0d703130625e (diff)
parent4f3e2518850e12605980071a25c189c30710bcfd (diff)
downloadperl-e501306eca0fea1cc9fc53e2eb024ad37e85ce59.tar.gz
[MERGE] handle /(?{})/ with overload::constant qr
The reworking of the re_eval implementation for 5.17.1 made the assumption that constant strings within literal patterns were, um, constant. It turns out this this is an invalid assumption, because overload::constant qr => { sub return bless [], 'Foo' } can cause the constant bits of a pattern, like foo, bar in /foo(?{...})bar/ to get replaced with (for example) blessed objects: so the 'constant' SV attached to an OP_CONST is actually a blessed object, that could itself be overloaded with string or concat methods say, or could be a qr// object etc. The commits in this merge (hopefully) fix the various problems this assumption caused: chiefly with qr// objects containing compiled (?{}) code that were getting re-stringified and thus failing unless in the presence of use re 'eval' (and sometimes failing even in its presence). Also, runtime patterns could trigger a recursive call to the overload method, and eventually stack overflow and SEGV. See [perl #116823].
-rw-r--r--cop.h2
-rw-r--r--op.h1
-rw-r--r--parser.h2
-rw-r--r--perl.c5
-rw-r--r--pp_ctl.c13
-rw-r--r--regcomp.c398
-rw-r--r--regexec.c2
-rw-r--r--regexp.h1
-rw-r--r--t/re/overload.t145
-rw-r--r--toke.c21
10 files changed, 359 insertions, 231 deletions
diff --git a/cop.h b/cop.h
index 086cd22dd0..b20eddbde0 100644
--- a/cop.h
+++ b/cop.h
@@ -1048,6 +1048,7 @@ L<perlcall>.
Perl_magic_methcall(). */
#define G_WRITING_TO_STDERR 1024 /* Perl_write_to_stderr() is calling
Perl_magic_methcall(). */
+#define G_RE_REPARSING 0x800 /* compiling a run-time /(?{..})/ */
/* flag bits for PL_in_eval */
#define EVAL_NULL 0 /* not in an eval */
@@ -1055,6 +1056,7 @@ L<perlcall>.
#define EVAL_WARNONLY 2 /* used by yywarn() when calling yyerror() */
#define EVAL_KEEPERR 4 /* set by Perl_call_sv if G_KEEPERR */
#define EVAL_INREQUIRE 8 /* The code is being required. */
+#define EVAL_RE_REPARSING 0x10 /* eval_sv() called with G_RE_REPARSING */
/* Support for switching (stack and block) contexts.
* This ensures magic doesn't invalidate local stack and cx pointers.
diff --git a/op.h b/op.h
index 8b87a9c14d..7c5030dde7 100644
--- a/op.h
+++ b/op.h
@@ -308,6 +308,7 @@ Deprecated. Use C<GIMME_V> instead.
#define OPpEVAL_UNICODE 4
#define OPpEVAL_BYTES 8
#define OPpEVAL_COPHH 16 /* Construct %^H from cop hints */
+#define OPpEVAL_RE_REPARSING 32 /* eval_sv(..., G_RE_REPARSING) */
/* Private for OP_CALLER, OP_WANTARRAY and OP_RUNCV */
#define OPpOFFBYONE 128 /* Treat caller(1) as caller(2) */
diff --git a/parser.h b/parser.h
index 05735bfe76..e7b887ec3b 100644
--- a/parser.h
+++ b/parser.h
@@ -71,7 +71,7 @@ typedef struct yy_parser {
char multi_open; /* delimiter of said string */
char multi_close; /* delimiter of said string */
bool preambled;
- /*** 8-bit hole ***/
+ bool lex_re_reparsing; /* we're doing G_RE_REPARSING */
I32 lex_allbrackets;/* (), [], {}, ?: bracket count */
SUBLEXINFO sublex_info;
LEXSHARED *lex_shared;
diff --git a/perl.c b/perl.c
index 87d98dca38..a39d66fb6b 100644
--- a/perl.c
+++ b/perl.c
@@ -2808,8 +2808,9 @@ Perl_eval_sv(pTHX_ SV *sv, I32 flags)
myop.op_flags |= OP_GIMME_REVERSE(flags);
if (flags & G_KEEPERR)
myop.op_flags |= OPf_SPECIAL;
- if (PL_reg_state.re_reparsing)
- myop.op_private = OPpEVAL_COPHH;
+
+ if (flags & G_RE_REPARSING)
+ myop.op_private = (OPpEVAL_COPHH | OPpEVAL_RE_REPARSING);
/* fail now; otherwise we could fail after the JMPENV_PUSH but
* before a PUSHEVAL, which corrupts the stack after a croak */
diff --git a/pp_ctl.c b/pp_ctl.c
index f518bc2c95..bdbd75a819 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -3358,7 +3358,9 @@ S_doeval(pTHX_ int gimme, CV* outside, U32 seq, HV *hh)
PL_in_eval = (in_require
? (EVAL_INREQUIRE | (PL_in_eval & EVAL_INEVAL))
- : EVAL_INEVAL);
+ : (EVAL_INEVAL |
+ ((PL_op->op_private & OPpEVAL_RE_REPARSING)
+ ? EVAL_RE_REPARSING : 0)));
PUSHMARK(SP);
@@ -3420,6 +3422,15 @@ S_doeval(pTHX_ int gimme, CV* outside, U32 seq, HV *hh)
else {
PL_hints = saveop->op_private & OPpEVAL_COPHH
? oldcurcop->cop_hints : saveop->op_targ;
+
+ /* making 'use re eval' not be in scope when compiling the
+ * qr/mabye_has_runtime_code_block/ ensures that we don't get
+ * infinite recursion when S_has_runtime_code() gives a false
+ * positive: the second time round, HINT_RE_EVAL isn't set so we
+ * don't bother calling S_has_runtime_code() */
+ if (PL_in_eval & EVAL_RE_REPARSING)
+ PL_hints &= ~HINT_RE_EVAL;
+
if (hh) {
/* SAVEHINTS created a new HV in PL_hintgv, which we need to GC */
SvREFCNT_dec(GvHV(PL_hintgv));
diff --git a/regcomp.c b/regcomp.c
index 34a4e9f9a6..ee843e3d12 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4877,19 +4877,12 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags)
* False positives are allowed */
static bool
-S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr,
- U32 pm_flags, char *pat, STRLEN plen)
+S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
+ char *pat, STRLEN plen)
{
int n = 0;
STRLEN s;
- /* avoid infinitely recursing when we recompile the pattern parcelled up
- * as qr'...'. A single constant qr// string can't have have any
- * run-time component in it, and thus, no runtime code. (A non-qr
- * string, however, can, e.g. $x =~ '(?{})') */
- if ((pm_flags & PMf_IS_QR) && expr && expr->op_type == OP_CONST)
- return 0;
-
for (s = 0; s < plen; s++) {
if (n < pRExC_state->num_code_blocks
&& s == pRExC_state->code_blocks[n].start)
@@ -5003,11 +4996,10 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
SAVETMPS;
save_re_context();
PUSHSTACKi(PERLSI_REQUIRE);
- /* this causes the toker to collapse \\ into \ when parsing
- * qr''; normally only q'' does this. It also alters hints
- * handling */
- PL_reg_state.re_reparsing = TRUE;
- eval_sv(sv, G_SCALAR);
+ /* G_RE_REPARSING causes the toker to collapse \\ into \ when
+ * parsing qr''; normally only q'' does this. It also alters
+ * hints handling */
+ eval_sv(sv, G_SCALAR|G_RE_REPARSING);
SvREFCNT_dec_NN(sv);
SPAGAIN;
qr_ref = POPs;
@@ -5212,8 +5204,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
I32 flags;
I32 minlen = 0;
U32 rx_flags;
- SV *pat;
+ SV *pat = NULL;
SV *code_blocksv = NULL;
+ SV** new_patternp = patternp;
/* these are all flags - maybe they should be turned
* into a single int with different bit masks */
@@ -5221,7 +5214,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
I32 sawplus = 0;
I32 sawopen = 0;
regex_charset initial_charset = get_regex_charset(orig_rx_flags);
- bool code_is_utf8 = 0;
bool recompile = 0;
bool runtime_code = 0;
scan_data_t data;
@@ -5308,40 +5300,68 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
if (expr && (expr->op_type == OP_LIST ||
(expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
-
- /* is the source UTF8, and how many code blocks are there? */
+ /* allocate code_blocks if needed */
OP *o;
int ncode = 0;
- for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
- if (o->op_type == OP_CONST) {
- /* skip if we have SVs as well as OPs. In this case,
- * a) we decide utf8 based on SVs not OPs;
- * b) the current pad may not match that which the ops
- * were compiled in, so, so on threaded builds,
- * cSVOPo_sv would look in the wrong pad */
- if (!pat_count && SvUTF8(cSVOPo_sv))
- code_is_utf8 = 1;
- }
- else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
- /* count of DO blocks */
- ncode++;
- }
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling)
+ if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+ ncode++; /* count of DO blocks */
if (ncode) {
pRExC_state->num_code_blocks = ncode;
Newx(pRExC_state->code_blocks, ncode, struct reg_code_block);
}
}
- if (pat_count) {
- /* handle a list of SVs */
+ if (!pat_count) {
+ /* compile-time pattern with just OP_CONSTs and DO blocks */
+
+ int n;
+ OP *o;
+
+ /* find how many CONSTs there are */
+ assert(expr);
+ n = 0;
+ if (expr->op_type == OP_CONST)
+ n = 1;
+ else
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST)
+ n++;
+ }
+
+ /* fake up an SV array */
+
+ assert(!new_patternp);
+ Newx(new_patternp, n, SV*);
+ SAVEFREEPV(new_patternp);
+ pat_count = n;
+
+ n = 0;
+ if (expr->op_type == OP_CONST)
+ new_patternp[n] = cSVOPx_sv(expr);
+ else
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST)
+ new_patternp[n++] = cSVOPo_sv;
+ }
+
+ }
+
+ {
+ /* concat args, handling magic, overloading etc */
SV **svp;
+ OP *o = NULL;
+ int n = 0;
+ STRLEN orig_patlen = 0;
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
- "Compiling List of SVs %d elements%s\n",pat_count, orig_rx_flags & RXf_SPLIT ? " for split" : ""));
+ "Assembling pattern from %d elements%s\n", pat_count,
+ orig_rx_flags & RXf_SPLIT ? " for split" : ""));
+
/* apply magic and RE overloading to each arg */
- for (svp = patternp; svp < patternp + pat_count; svp++) {
+ for (svp = new_patternp; svp < new_patternp + pat_count; svp++) {
SV *rx = *svp;
SvGETMAGIC(rx);
if (SvROK(rx) && SvAMAGIC(rx)) {
@@ -5356,21 +5376,19 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
}
- if (pat_count > 1) {
- /* concat multiple args and find any code block indexes */
-
- OP *o = NULL;
- int n = 0;
- bool utf8 = 0;
- STRLEN orig_patlen = 0;
-
- if (pRExC_state->num_code_blocks) {
- o = cLISTOPx(expr)->op_first;
- assert( o->op_type == OP_PUSHMARK
+ if (pRExC_state->num_code_blocks) {
+ if (expr->op_type == OP_CONST)
+ o = expr;
+ else {
+ o = cLISTOPx(expr)->op_first;
+ assert( o->op_type == OP_PUSHMARK
|| (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK)
|| o->op_type == OP_PADRANGE);
- o = o->op_sibling;
- }
+ o = o->op_sibling;
+ }
+ }
+
+ if (pat_count > 1) {
pat = newSVpvn("", 0);
SAVEFREESV(pat);
@@ -5381,124 +5399,120 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
* overloading but not concat overloading; but the main effect
* in this obscure case is to need a 'use re eval' for a
* literal code block */
- for (svp = patternp; svp < patternp + pat_count; svp++) {
+ for (svp = new_patternp; svp < new_patternp + pat_count; svp++) {
if (SvUTF8(*svp))
- utf8 = 1;
+ SvUTF8_on(pat);
}
- if (utf8)
- SvUTF8_on(pat);
-
- for (svp = patternp; svp < patternp + pat_count; svp++) {
- SV *sv, *msv = *svp;
- SV *rx;
- bool code = 0;
- /* we make the assumption here that each op in the list of
- * op_siblings maps to one SV pushed onto the stack,
- * except for code blocks, with have both an OP_NULL and
- * and OP_CONST.
- * This allows us to match up the list of SVs against the
- * list of OPs to find the next code block.
- *
- * Note that PUSHMARK PADSV PADSV ..
- * is optimised to
- * PADRANGE NULL NULL ..
- * so the alignment still works. */
- if (o) {
- if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
- assert(n < pRExC_state->num_code_blocks);
- pRExC_state->code_blocks[n].start = SvCUR(pat);
- pRExC_state->code_blocks[n].block = o;
- pRExC_state->code_blocks[n].src_regex = NULL;
- n++;
- code = 1;
- o = o->op_sibling; /* skip CONST */
- assert(o);
- }
- o = o->op_sibling;;
- }
+ }
- if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
- (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
- {
- sv_setsv(pat, sv);
- /* overloading involved: all bets are off over literal
- * code. Pretend we haven't seen it */
- pRExC_state->num_code_blocks -= n;
- n = 0;
- rx = NULL;
+ /* process args, concat them if there are multiple ones,
+ * and find any code block indexes */
+
+
+ for (svp = new_patternp; svp < new_patternp + pat_count; svp++) {
+ SV *sv, *msv = *svp;
+ SV *rx = NULL;
+ bool code = 0;
+ /* we make the assumption here that each op in the list of
+ * op_siblings maps to one SV pushed onto the stack,
+ * except for code blocks, with have both an OP_NULL and
+ * and OP_CONST.
+ * This allows us to match up the list of SVs against the
+ * list of OPs to find the next code block.
+ *
+ * Note that PUSHMARK PADSV PADSV ..
+ * is optimised to
+ * PADRANGE NULL NULL ..
+ * so the alignment still works. */
+ if (o) {
+ if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ assert(n < pRExC_state->num_code_blocks);
+ pRExC_state->code_blocks[n].start = pat ? SvCUR(pat) : 0;
+ pRExC_state->code_blocks[n].block = o;
+ pRExC_state->code_blocks[n].src_regex = NULL;
+ n++;
+ code = 1;
+ o = o->op_sibling; /* skip CONST */
+ assert(o);
+ }
+ o = o->op_sibling;;
+ }
- }
- else {
- while (SvAMAGIC(msv)
- && (sv = AMG_CALLunary(msv, string_amg))
- && sv != msv
- && !( SvROK(msv)
- && SvROK(sv)
- && SvRV(msv) == SvRV(sv))
- ) {
- msv = sv;
- SvGETMAGIC(msv);
- }
- if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
- msv = SvRV(msv);
+ /* try concatenation overload ... */
+ if (pat && (SvAMAGIC(pat) || SvAMAGIC(msv)) &&
+ (sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+ {
+ sv_setsv(pat, sv);
+ /* overloading involved: all bets are off over literal
+ * code. Pretend we haven't seen it */
+ pRExC_state->num_code_blocks -= n;
+ n = 0;
+ }
+ else {
+ /* ... or failing that, try "" overload */
+ while (SvAMAGIC(msv)
+ && (sv = AMG_CALLunary(msv, string_amg))
+ && sv != msv
+ && !( SvROK(msv)
+ && SvROK(sv)
+ && SvRV(msv) == SvRV(sv))
+ ) {
+ msv = sv;
+ SvGETMAGIC(msv);
+ }
+ if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
+ msv = SvRV(msv);
+ if (pat) {
orig_patlen = SvCUR(pat);
sv_catsv_nomg(pat, msv);
rx = msv;
- if (code)
- pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
}
+ else
+ pat = msv;
+ if (code)
+ pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
+ }
- /* extract any code blocks within any embedded qr//'s */
- if (rx && SvTYPE(rx) == SVt_REGEXP
- && RX_ENGINE((REGEXP*)rx)->op_comp)
- {
-
- RXi_GET_DECL(ReANY((REGEXP *)rx), ri);
- if (ri->num_code_blocks) {
- int i;
- /* the presence of an embedded qr// with code means
- * we should always recompile: the text of the
- * qr// may not have changed, but it may be a
- * different closure than last time */
- recompile = 1;
- Renew(pRExC_state->code_blocks,
- pRExC_state->num_code_blocks + ri->num_code_blocks,
- struct reg_code_block);
- pRExC_state->num_code_blocks += ri->num_code_blocks;
- for (i=0; i < ri->num_code_blocks; i++) {
- struct reg_code_block *src, *dst;
- STRLEN offset = orig_patlen
- + ReANY((REGEXP *)rx)->pre_prefix;
- assert(n < pRExC_state->num_code_blocks);
- src = &ri->code_blocks[i];
- dst = &pRExC_state->code_blocks[n];
- dst->start = src->start + offset;
- dst->end = src->end + offset;
- dst->block = src->block;
- dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*)
- src->src_regex
- ? src->src_regex
- : (REGEXP*)rx);
- n++;
- }
- }
- }
- }
- SvSETMAGIC(pat);
- }
- else {
- SV *sv;
- pat = *patternp;
- while (SvAMAGIC(pat)
- && (sv = AMG_CALLunary(pat, string_amg))
- && sv != pat)
+ /* extract any code blocks within any embedded qr//'s */
+ if (rx && SvTYPE(rx) == SVt_REGEXP
+ && RX_ENGINE((REGEXP*)rx)->op_comp)
{
- pat = sv;
- SvGETMAGIC(pat);
+
+ RXi_GET_DECL(ReANY((REGEXP *)rx), ri);
+ if (ri->num_code_blocks) {
+ int i;
+ /* the presence of an embedded qr// with code means
+ * we should always recompile: the text of the
+ * qr// may not have changed, but it may be a
+ * different closure than last time */
+ recompile = 1;
+ Renew(pRExC_state->code_blocks,
+ pRExC_state->num_code_blocks + ri->num_code_blocks,
+ struct reg_code_block);
+ pRExC_state->num_code_blocks += ri->num_code_blocks;
+ for (i=0; i < ri->num_code_blocks; i++) {
+ struct reg_code_block *src, *dst;
+ STRLEN offset = orig_patlen
+ + ReANY((REGEXP *)rx)->pre_prefix;
+ assert(n < pRExC_state->num_code_blocks);
+ src = &ri->code_blocks[i];
+ dst = &pRExC_state->code_blocks[n];
+ dst->start = src->start + offset;
+ dst->end = src->end + offset;
+ dst->block = src->block;
+ dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*)
+ src->src_regex
+ ? src->src_regex
+ : (REGEXP*)rx);
+ n++;
+ }
+ }
}
}
+ if (pat_count > 1)
+ SvSETMAGIC(pat);
- /* handle bare regex: foo =~ $re */
+ /* handle bare (possibly after overloading) regex: foo =~ $re */
{
SV *re = pat;
if (SvROK(re))
@@ -5509,58 +5523,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
SvREFCNT_inc(re);
Safefree(pRExC_state->code_blocks);
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
- "Precompiled pattern%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : ""));
+ "Precompiled pattern%s\n",
+ orig_rx_flags & RXf_SPLIT ? " for split" : ""));
return (REGEXP*)re;
}
}
}
- else {
- /* not a list of SVs, so must be a list of OPs */
- assert(expr);
- if (expr->op_type == OP_LIST) {
- int i = -1;
- bool is_code = 0;
- OP *o;
-
- DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
- "Compiling OP_LIST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : ""));
-
- pat = newSVpvn("", 0);
- SAVEFREESV(pat);
- if (code_is_utf8)
- SvUTF8_on(pat);
-
- /* given a list of CONSTs and DO blocks in expr, append all
- * the CONSTs to pat, and record the start and end of each
- * code block in code_blocks[] (each DO{} op is followed by an
- * OP_CONST containing the corresponding literal '(?{...})
- * text)
- */
- for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
- if (o->op_type == OP_CONST) {
- sv_catsv(pat, cSVOPo_sv);
- if (is_code) {
- pRExC_state->code_blocks[i].end = SvCUR(pat)-1;
- is_code = 0;
- }
- }
- else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
- assert(i+1 < pRExC_state->num_code_blocks);
- pRExC_state->code_blocks[++i].start = SvCUR(pat);
- pRExC_state->code_blocks[i].block = o;
- pRExC_state->code_blocks[i].src_regex = NULL;
- is_code = 1;
- }
- }
- }
- else {
- assert(expr->op_type == OP_CONST);
- DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
- "Compiling OP_CONST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : ""));
- pat = cSVOPx_sv(expr);
- }
- }
exp = SvPV_nomg(pat, plen);
xend = exp + plen;
@@ -5650,6 +5619,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
}
+ if ((pm_flags & PMf_USE_RE_EVAL)
+ /* this second condition covers the non-regex literal case,
+ * i.e. $foo =~ '(?{})'. */
+ || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL))
+ )
+ runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen);
+
/* return old regex if pattern hasn't changed */
/* XXX: note in the below we have to check the flags as well as the pattern.
*
@@ -5663,24 +5639,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&& ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) )
&& RX_PRECOMP(old_re)
&& RX_PRELEN(old_re) == plen
- && memEQ(RX_PRECOMP(old_re), exp, plen))
+ && memEQ(RX_PRECOMP(old_re), exp, plen)
+ && !runtime_code /* with runtime code, always recompile */ )
{
- /* with runtime code, always recompile */
- runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
- exp, plen);
- if (!runtime_code) {
- Safefree(pRExC_state->code_blocks);
- return old_re;
- }
+ Safefree(pRExC_state->code_blocks);
+ return old_re;
}
- else if ((pm_flags & PMf_USE_RE_EVAL)
- /* this second condition covers the non-regex literal case,
- * i.e. $foo =~ '(?{})'. */
- || ( !PL_reg_state.re_reparsing && IN_PERL_COMPILETIME
- && (PL_hints & HINT_RE_EVAL))
- )
- runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
- exp, plen);
rx_flags = orig_rx_flags;
diff --git a/regexec.c b/regexec.c
index d376e26bd4..45bd09ede3 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4878,8 +4878,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
*/
Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
- PL_reg_state.re_reparsing = FALSE;
-
if (!caller_cv)
caller_cv = find_runcv(NULL);
diff --git a/regexp.h b/regexp.h
index 6b16d14593..31fb879c66 100644
--- a/regexp.h
+++ b/regexp.h
@@ -776,7 +776,6 @@ typedef struct regmatch_slab {
struct re_save_state {
bool re_state_eval_setup_done; /* from regexec.c */
bool re_state_reg_match_utf8; /* from regexec.c */
- bool re_reparsing; /* runtime (?{}) fed back into parser */
/* Space for U8 */
I32 re_state_reg_oldpos; /* from regexec.c */
I32 re_state_reg_maxiter; /* max wait until caching pos */
diff --git a/t/re/overload.t b/t/re/overload.t
index 4e99bd3ec6..38d5140e0e 100644
--- a/t/re/overload.t
+++ b/t/re/overload.t
@@ -33,4 +33,149 @@ no warnings 'syntax';
is $1, $TAG, "void context //g against overloaded object";
}
+{
+ # an overloaded stringify returning itself shouldn't loop indefinitely
+
+
+ {
+ package Self;
+ use overload q{""} => sub {
+ return shift;
+ },
+ fallback => 1;
+ }
+
+ my $obj = bless [], 'Self';
+ my $r = qr/$obj/;
+ pass("self object, 1 arg");
+ $r = qr/foo$obj/;
+ pass("self object, 2 args");
+}
+
+{
+ # [perl #116823]
+ # when overloading regex string constants, a different code path
+ # was taken if the regex was compile-time, leading to overloaded
+ # regex constant string segments not being handled correctly.
+ # They were just treated as OP_CONST strings to be concatted together.
+ # In particular, if the overload returned a regex object, it would
+ # just be stringified rather than having any code blocks processed.
+
+ BEGIN {
+ overload::constant qr => sub {
+ my ($raw, $cooked, $type) = @_;
+ return $cooked unless defined $::CONST_QR_CLASS;
+ if ($type =~ /qq?/) {
+ return bless \$cooked, $::CONST_QR_CLASS;
+ } else {
+ return $cooked;
+ }
+ };
+ }
+
+ {
+ # returns a qr// object
+
+ package OL_QR;
+ use overload q{""} => sub {
+ my $re = shift;
+ return qr/(?{ $OL_QR::count++ })$$re/;
+ },
+ fallback => 1;
+
+ }
+
+ {
+ # returns a string
+
+ package OL_STR;
+ use overload q{""} => sub {
+ my $re = shift;
+ return qq/(?{ \$OL_STR::count++ })$$re/;
+ },
+ fallback => 1;
+
+ }
+
+
+ my $qr;
+
+ $::CONST_QR_CLASS = 'OL_QR';
+
+ $OL_QR::count = 0;
+ $qr = eval q{ qr/^foo$/; };
+ ok("foo" =~ $qr, "compile-time, OL_QR, single constant segment");
+ is($OL_QR::count, 1, "flag");
+
+ $OL_QR::count = 0;
+ $qr = eval q{ qr/^foo$(?{ $OL_QR::count++ })/; };
+ ok("foo" =~ $qr, "compile-time, OL_QR, multiple constant segments");
+ is($OL_QR::count, 2, "qr2 flag");
+
+
+ # test /foo.../ when foo is given string overloading,
+ # for various permutations of '...'
+
+ $::CONST_QR_CLASS = 'OL_STR';
+
+ for my $has_re_eval (0, 1) {
+ for my $has_qr (0, 1) {
+ for my $has_code (0, 1) {
+ for my $has_runtime (0, 1) {
+ for my $has_runtime_code (0, 1) {
+ if ($has_runtime_code) {
+ next unless $has_runtime;
+ }
+ note( "re_eval=$has_re_eval "
+ . "qr=$has_qr "
+ . "code=$has_code "
+ . "runtime=$has_runtime "
+ . "runtime_code=$has_runtime_code");
+ my $eval = '';
+ $eval .= q{use re 'eval'; } if $has_re_eval;
+ $eval .= q{$match = $str =~ };
+ $eval .= q{qr} if $has_qr;
+ $eval .= q{/^abc};
+ $eval .= q{(?{$blocks++})} if $has_code;
+ $eval .= q{$runtime} if $has_runtime;
+ $eval .= q{/; 1;};
+
+ my $runtime = q{def};
+ $runtime .= q{(?{$run_blocks++})} if $has_runtime_code;
+
+ my $blocks = 0;
+ my $run_blocks = 0;
+ my $match;
+ my $str = "abc";
+ $str .= "def" if $runtime;
+
+ my $result = eval $eval;
+ my $err = $@;
+ $result = $result ? 1 : 0;
+
+ if (!$has_re_eval) {
+ is($result, 0, "EVAL: $eval");
+ like($err, qr/Eval-group not allowed at runtime/,
+ "\$\@: $eval");
+ next;
+ }
+
+ is($result, 1, "EVAL: $eval");
+ diag("\$@=[$err]") unless $result;
+
+ is($match, 1, "MATCH: $eval");
+ is($blocks, $has_code, "blocks");
+ is($run_blocks, $has_runtime_code, "run_blocks");
+
+ }
+ }
+ }
+ }
+ }
+
+
+ undef $::CONST_QR_CLASS;
+}
+
+
done_testing();
diff --git a/toke.c b/toke.c
index 275c95755b..43adb3e4b8 100644
--- a/toke.c
+++ b/toke.c
@@ -2525,6 +2525,7 @@ S_sublex_push(pTHX)
SAVEGENERICPV(PL_lex_brackstack);
SAVEGENERICPV(PL_lex_casestack);
SAVEGENERICPV(PL_parser->lex_shared);
+ SAVEBOOL(PL_parser->lex_re_reparsing);
/* The here-doc parser needs to be able to peek into outer lexing
scopes to find the body of the here-doc. So we put PL_linestr and
@@ -2568,6 +2569,9 @@ S_sublex_push(pTHX)
else
PL_lex_inpat = NULL;
+ PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
+ PL_in_eval &= ~EVAL_RE_REPARSING;
+
return '(';
}
@@ -3751,7 +3755,9 @@ S_scan_const(pTHX_ char *start)
/* return the substring (via pl_yylval) only if we parsed anything */
if (s > PL_bufptr) {
SvREFCNT_inc_simple_void_NN(sv);
- if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
+ if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
+ && ! PL_parser->lex_re_reparsing)
+ {
const char *const key = PL_lex_inpat ? "qr" : "q";
const STRLEN keylen = PL_lex_inpat ? 2 : 1;
const char *type;
@@ -9047,7 +9053,9 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
}
}
-/* Either returns sv, or mortalizes/frees sv and returns a new SV*.
+/* S_new_constant(): do any overload::constant lookup.
+
+ Either returns sv, or mortalizes/frees sv and returns a new SV*.
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
@@ -9502,8 +9510,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing,
- TRUE /* look for escaped bracketed metas */ );
+ char *s;
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
@@ -9513,9 +9520,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
PERL_ARGS_ASSERT_SCAN_PAT;
- /* this was only needed for the initial scan_str; set it to false
- * so that any (?{}) code blocks etc are parsed normally */
- PL_reg_state.re_reparsing = FALSE;
+ s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
+ TRUE /* look for escaped bracketed metas */ );
+
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_