diff options
author | David Mitchell <davem@iabyn.com> | 2013-04-12 11:30:25 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2013-04-12 11:30:25 +0100 |
commit | e501306eca0fea1cc9fc53e2eb024ad37e85ce59 (patch) | |
tree | c6456af41e20ed20741f24701229cb38bb4582d1 | |
parent | 335e2ee52f38eaea7888c33d9c4f0d703130625e (diff) | |
parent | 4f3e2518850e12605980071a25c189c30710bcfd (diff) | |
download | perl-e501306eca0fea1cc9fc53e2eb024ad37e85ce59.tar.gz |
[MERGE] handle /(?{})/ with overload::constant qr
The reworking of the re_eval implementation for 5.17.1 made the assumption
that constant strings within literal patterns were, um, constant.
It turns out this this is an invalid assumption, because
overload::constant qr => { sub return bless [], 'Foo' }
can cause the constant bits of a pattern, like foo, bar in
/foo(?{...})bar/
to get replaced with (for example) blessed objects: so the 'constant' SV
attached to an OP_CONST is actually a blessed object, that could itself be
overloaded with string or concat methods say, or could be a qr// object
etc.
The commits in this merge (hopefully) fix the various problems this
assumption caused: chiefly with qr// objects containing compiled (?{})
code that were getting re-stringified and thus failing unless in the
presence of use re 'eval' (and sometimes failing even in its presence).
Also, runtime patterns could trigger a recursive call to the overload
method, and eventually stack overflow and SEGV.
See [perl #116823].
-rw-r--r-- | cop.h | 2 | ||||
-rw-r--r-- | op.h | 1 | ||||
-rw-r--r-- | parser.h | 2 | ||||
-rw-r--r-- | perl.c | 5 | ||||
-rw-r--r-- | pp_ctl.c | 13 | ||||
-rw-r--r-- | regcomp.c | 398 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rw-r--r-- | regexp.h | 1 | ||||
-rw-r--r-- | t/re/overload.t | 145 | ||||
-rw-r--r-- | toke.c | 21 |
10 files changed, 359 insertions, 231 deletions
@@ -1048,6 +1048,7 @@ L<perlcall>. Perl_magic_methcall(). */ #define G_WRITING_TO_STDERR 1024 /* Perl_write_to_stderr() is calling Perl_magic_methcall(). */ +#define G_RE_REPARSING 0x800 /* compiling a run-time /(?{..})/ */ /* flag bits for PL_in_eval */ #define EVAL_NULL 0 /* not in an eval */ @@ -1055,6 +1056,7 @@ L<perlcall>. #define EVAL_WARNONLY 2 /* used by yywarn() when calling yyerror() */ #define EVAL_KEEPERR 4 /* set by Perl_call_sv if G_KEEPERR */ #define EVAL_INREQUIRE 8 /* The code is being required. */ +#define EVAL_RE_REPARSING 0x10 /* eval_sv() called with G_RE_REPARSING */ /* Support for switching (stack and block) contexts. * This ensures magic doesn't invalidate local stack and cx pointers. @@ -308,6 +308,7 @@ Deprecated. Use C<GIMME_V> instead. #define OPpEVAL_UNICODE 4 #define OPpEVAL_BYTES 8 #define OPpEVAL_COPHH 16 /* Construct %^H from cop hints */ +#define OPpEVAL_RE_REPARSING 32 /* eval_sv(..., G_RE_REPARSING) */ /* Private for OP_CALLER, OP_WANTARRAY and OP_RUNCV */ #define OPpOFFBYONE 128 /* Treat caller(1) as caller(2) */ @@ -71,7 +71,7 @@ typedef struct yy_parser { char multi_open; /* delimiter of said string */ char multi_close; /* delimiter of said string */ bool preambled; - /*** 8-bit hole ***/ + bool lex_re_reparsing; /* we're doing G_RE_REPARSING */ I32 lex_allbrackets;/* (), [], {}, ?: bracket count */ SUBLEXINFO sublex_info; LEXSHARED *lex_shared; @@ -2808,8 +2808,9 @@ Perl_eval_sv(pTHX_ SV *sv, I32 flags) myop.op_flags |= OP_GIMME_REVERSE(flags); if (flags & G_KEEPERR) myop.op_flags |= OPf_SPECIAL; - if (PL_reg_state.re_reparsing) - myop.op_private = OPpEVAL_COPHH; + + if (flags & G_RE_REPARSING) + myop.op_private = (OPpEVAL_COPHH | OPpEVAL_RE_REPARSING); /* fail now; otherwise we could fail after the JMPENV_PUSH but * before a PUSHEVAL, which corrupts the stack after a croak */ @@ -3358,7 +3358,9 @@ S_doeval(pTHX_ int gimme, CV* outside, U32 seq, HV *hh) PL_in_eval = (in_require ? (EVAL_INREQUIRE | (PL_in_eval & EVAL_INEVAL)) - : EVAL_INEVAL); + : (EVAL_INEVAL | + ((PL_op->op_private & OPpEVAL_RE_REPARSING) + ? EVAL_RE_REPARSING : 0))); PUSHMARK(SP); @@ -3420,6 +3422,15 @@ S_doeval(pTHX_ int gimme, CV* outside, U32 seq, HV *hh) else { PL_hints = saveop->op_private & OPpEVAL_COPHH ? oldcurcop->cop_hints : saveop->op_targ; + + /* making 'use re eval' not be in scope when compiling the + * qr/mabye_has_runtime_code_block/ ensures that we don't get + * infinite recursion when S_has_runtime_code() gives a false + * positive: the second time round, HINT_RE_EVAL isn't set so we + * don't bother calling S_has_runtime_code() */ + if (PL_in_eval & EVAL_RE_REPARSING) + PL_hints &= ~HINT_RE_EVAL; + if (hh) { /* SAVEHINTS created a new HV in PL_hintgv, which we need to GC */ SvREFCNT_dec(GvHV(PL_hintgv)); @@ -4877,19 +4877,12 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 rx_flags) * False positives are allowed */ static bool -S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr, - U32 pm_flags, char *pat, STRLEN plen) +S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, + char *pat, STRLEN plen) { int n = 0; STRLEN s; - /* avoid infinitely recursing when we recompile the pattern parcelled up - * as qr'...'. A single constant qr// string can't have have any - * run-time component in it, and thus, no runtime code. (A non-qr - * string, however, can, e.g. $x =~ '(?{})') */ - if ((pm_flags & PMf_IS_QR) && expr && expr->op_type == OP_CONST) - return 0; - for (s = 0; s < plen; s++) { if (n < pRExC_state->num_code_blocks && s == pRExC_state->code_blocks[n].start) @@ -5003,11 +4996,10 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, SAVETMPS; save_re_context(); PUSHSTACKi(PERLSI_REQUIRE); - /* this causes the toker to collapse \\ into \ when parsing - * qr''; normally only q'' does this. It also alters hints - * handling */ - PL_reg_state.re_reparsing = TRUE; - eval_sv(sv, G_SCALAR); + /* G_RE_REPARSING causes the toker to collapse \\ into \ when + * parsing qr''; normally only q'' does this. It also alters + * hints handling */ + eval_sv(sv, G_SCALAR|G_RE_REPARSING); SvREFCNT_dec_NN(sv); SPAGAIN; qr_ref = POPs; @@ -5212,8 +5204,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, I32 flags; I32 minlen = 0; U32 rx_flags; - SV *pat; + SV *pat = NULL; SV *code_blocksv = NULL; + SV** new_patternp = patternp; /* these are all flags - maybe they should be turned * into a single int with different bit masks */ @@ -5221,7 +5214,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, I32 sawplus = 0; I32 sawopen = 0; regex_charset initial_charset = get_regex_charset(orig_rx_flags); - bool code_is_utf8 = 0; bool recompile = 0; bool runtime_code = 0; scan_data_t data; @@ -5308,40 +5300,68 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (expr && (expr->op_type == OP_LIST || (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) { - - /* is the source UTF8, and how many code blocks are there? */ + /* allocate code_blocks if needed */ OP *o; int ncode = 0; - for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) { - if (o->op_type == OP_CONST) { - /* skip if we have SVs as well as OPs. In this case, - * a) we decide utf8 based on SVs not OPs; - * b) the current pad may not match that which the ops - * were compiled in, so, so on threaded builds, - * cSVOPo_sv would look in the wrong pad */ - if (!pat_count && SvUTF8(cSVOPo_sv)) - code_is_utf8 = 1; - } - else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) - /* count of DO blocks */ - ncode++; - } + for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) + if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) + ncode++; /* count of DO blocks */ if (ncode) { pRExC_state->num_code_blocks = ncode; Newx(pRExC_state->code_blocks, ncode, struct reg_code_block); } } - if (pat_count) { - /* handle a list of SVs */ + if (!pat_count) { + /* compile-time pattern with just OP_CONSTs and DO blocks */ + + int n; + OP *o; + + /* find how many CONSTs there are */ + assert(expr); + n = 0; + if (expr->op_type == OP_CONST) + n = 1; + else + for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) { + if (o->op_type == OP_CONST) + n++; + } + + /* fake up an SV array */ + + assert(!new_patternp); + Newx(new_patternp, n, SV*); + SAVEFREEPV(new_patternp); + pat_count = n; + + n = 0; + if (expr->op_type == OP_CONST) + new_patternp[n] = cSVOPx_sv(expr); + else + for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) { + if (o->op_type == OP_CONST) + new_patternp[n++] = cSVOPo_sv; + } + + } + + { + /* concat args, handling magic, overloading etc */ SV **svp; + OP *o = NULL; + int n = 0; + STRLEN orig_patlen = 0; DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, - "Compiling List of SVs %d elements%s\n",pat_count, orig_rx_flags & RXf_SPLIT ? " for split" : "")); + "Assembling pattern from %d elements%s\n", pat_count, + orig_rx_flags & RXf_SPLIT ? " for split" : "")); + /* apply magic and RE overloading to each arg */ - for (svp = patternp; svp < patternp + pat_count; svp++) { + for (svp = new_patternp; svp < new_patternp + pat_count; svp++) { SV *rx = *svp; SvGETMAGIC(rx); if (SvROK(rx) && SvAMAGIC(rx)) { @@ -5356,21 +5376,19 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } } - if (pat_count > 1) { - /* concat multiple args and find any code block indexes */ - - OP *o = NULL; - int n = 0; - bool utf8 = 0; - STRLEN orig_patlen = 0; - - if (pRExC_state->num_code_blocks) { - o = cLISTOPx(expr)->op_first; - assert( o->op_type == OP_PUSHMARK + if (pRExC_state->num_code_blocks) { + if (expr->op_type == OP_CONST) + o = expr; + else { + o = cLISTOPx(expr)->op_first; + assert( o->op_type == OP_PUSHMARK || (o->op_type == OP_NULL && o->op_targ == OP_PUSHMARK) || o->op_type == OP_PADRANGE); - o = o->op_sibling; - } + o = o->op_sibling; + } + } + + if (pat_count > 1) { pat = newSVpvn("", 0); SAVEFREESV(pat); @@ -5381,124 +5399,120 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, * overloading but not concat overloading; but the main effect * in this obscure case is to need a 'use re eval' for a * literal code block */ - for (svp = patternp; svp < patternp + pat_count; svp++) { + for (svp = new_patternp; svp < new_patternp + pat_count; svp++) { if (SvUTF8(*svp)) - utf8 = 1; + SvUTF8_on(pat); } - if (utf8) - SvUTF8_on(pat); - - for (svp = patternp; svp < patternp + pat_count; svp++) { - SV *sv, *msv = *svp; - SV *rx; - bool code = 0; - /* we make the assumption here that each op in the list of - * op_siblings maps to one SV pushed onto the stack, - * except for code blocks, with have both an OP_NULL and - * and OP_CONST. - * This allows us to match up the list of SVs against the - * list of OPs to find the next code block. - * - * Note that PUSHMARK PADSV PADSV .. - * is optimised to - * PADRANGE NULL NULL .. - * so the alignment still works. */ - if (o) { - if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) { - assert(n < pRExC_state->num_code_blocks); - pRExC_state->code_blocks[n].start = SvCUR(pat); - pRExC_state->code_blocks[n].block = o; - pRExC_state->code_blocks[n].src_regex = NULL; - n++; - code = 1; - o = o->op_sibling; /* skip CONST */ - assert(o); - } - o = o->op_sibling;; - } + } - if ((SvAMAGIC(pat) || SvAMAGIC(msv)) && - (sv = amagic_call(pat, msv, concat_amg, AMGf_assign))) - { - sv_setsv(pat, sv); - /* overloading involved: all bets are off over literal - * code. Pretend we haven't seen it */ - pRExC_state->num_code_blocks -= n; - n = 0; - rx = NULL; + /* process args, concat them if there are multiple ones, + * and find any code block indexes */ + + + for (svp = new_patternp; svp < new_patternp + pat_count; svp++) { + SV *sv, *msv = *svp; + SV *rx = NULL; + bool code = 0; + /* we make the assumption here that each op in the list of + * op_siblings maps to one SV pushed onto the stack, + * except for code blocks, with have both an OP_NULL and + * and OP_CONST. + * This allows us to match up the list of SVs against the + * list of OPs to find the next code block. + * + * Note that PUSHMARK PADSV PADSV .. + * is optimised to + * PADRANGE NULL NULL .. + * so the alignment still works. */ + if (o) { + if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) { + assert(n < pRExC_state->num_code_blocks); + pRExC_state->code_blocks[n].start = pat ? SvCUR(pat) : 0; + pRExC_state->code_blocks[n].block = o; + pRExC_state->code_blocks[n].src_regex = NULL; + n++; + code = 1; + o = o->op_sibling; /* skip CONST */ + assert(o); + } + o = o->op_sibling;; + } - } - else { - while (SvAMAGIC(msv) - && (sv = AMG_CALLunary(msv, string_amg)) - && sv != msv - && !( SvROK(msv) - && SvROK(sv) - && SvRV(msv) == SvRV(sv)) - ) { - msv = sv; - SvGETMAGIC(msv); - } - if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP) - msv = SvRV(msv); + /* try concatenation overload ... */ + if (pat && (SvAMAGIC(pat) || SvAMAGIC(msv)) && + (sv = amagic_call(pat, msv, concat_amg, AMGf_assign))) + { + sv_setsv(pat, sv); + /* overloading involved: all bets are off over literal + * code. Pretend we haven't seen it */ + pRExC_state->num_code_blocks -= n; + n = 0; + } + else { + /* ... or failing that, try "" overload */ + while (SvAMAGIC(msv) + && (sv = AMG_CALLunary(msv, string_amg)) + && sv != msv + && !( SvROK(msv) + && SvROK(sv) + && SvRV(msv) == SvRV(sv)) + ) { + msv = sv; + SvGETMAGIC(msv); + } + if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP) + msv = SvRV(msv); + if (pat) { orig_patlen = SvCUR(pat); sv_catsv_nomg(pat, msv); rx = msv; - if (code) - pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1; } + else + pat = msv; + if (code) + pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1; + } - /* extract any code blocks within any embedded qr//'s */ - if (rx && SvTYPE(rx) == SVt_REGEXP - && RX_ENGINE((REGEXP*)rx)->op_comp) - { - - RXi_GET_DECL(ReANY((REGEXP *)rx), ri); - if (ri->num_code_blocks) { - int i; - /* the presence of an embedded qr// with code means - * we should always recompile: the text of the - * qr// may not have changed, but it may be a - * different closure than last time */ - recompile = 1; - Renew(pRExC_state->code_blocks, - pRExC_state->num_code_blocks + ri->num_code_blocks, - struct reg_code_block); - pRExC_state->num_code_blocks += ri->num_code_blocks; - for (i=0; i < ri->num_code_blocks; i++) { - struct reg_code_block *src, *dst; - STRLEN offset = orig_patlen - + ReANY((REGEXP *)rx)->pre_prefix; - assert(n < pRExC_state->num_code_blocks); - src = &ri->code_blocks[i]; - dst = &pRExC_state->code_blocks[n]; - dst->start = src->start + offset; - dst->end = src->end + offset; - dst->block = src->block; - dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*) - src->src_regex - ? src->src_regex - : (REGEXP*)rx); - n++; - } - } - } - } - SvSETMAGIC(pat); - } - else { - SV *sv; - pat = *patternp; - while (SvAMAGIC(pat) - && (sv = AMG_CALLunary(pat, string_amg)) - && sv != pat) + /* extract any code blocks within any embedded qr//'s */ + if (rx && SvTYPE(rx) == SVt_REGEXP + && RX_ENGINE((REGEXP*)rx)->op_comp) { - pat = sv; - SvGETMAGIC(pat); + + RXi_GET_DECL(ReANY((REGEXP *)rx), ri); + if (ri->num_code_blocks) { + int i; + /* the presence of an embedded qr// with code means + * we should always recompile: the text of the + * qr// may not have changed, but it may be a + * different closure than last time */ + recompile = 1; + Renew(pRExC_state->code_blocks, + pRExC_state->num_code_blocks + ri->num_code_blocks, + struct reg_code_block); + pRExC_state->num_code_blocks += ri->num_code_blocks; + for (i=0; i < ri->num_code_blocks; i++) { + struct reg_code_block *src, *dst; + STRLEN offset = orig_patlen + + ReANY((REGEXP *)rx)->pre_prefix; + assert(n < pRExC_state->num_code_blocks); + src = &ri->code_blocks[i]; + dst = &pRExC_state->code_blocks[n]; + dst->start = src->start + offset; + dst->end = src->end + offset; + dst->block = src->block; + dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*) + src->src_regex + ? src->src_regex + : (REGEXP*)rx); + n++; + } + } } } + if (pat_count > 1) + SvSETMAGIC(pat); - /* handle bare regex: foo =~ $re */ + /* handle bare (possibly after overloading) regex: foo =~ $re */ { SV *re = pat; if (SvROK(re)) @@ -5509,58 +5523,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, SvREFCNT_inc(re); Safefree(pRExC_state->code_blocks); DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, - "Precompiled pattern%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); + "Precompiled pattern%s\n", + orig_rx_flags & RXf_SPLIT ? " for split" : "")); return (REGEXP*)re; } } } - else { - /* not a list of SVs, so must be a list of OPs */ - assert(expr); - if (expr->op_type == OP_LIST) { - int i = -1; - bool is_code = 0; - OP *o; - - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, - "Compiling OP_LIST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); - - pat = newSVpvn("", 0); - SAVEFREESV(pat); - if (code_is_utf8) - SvUTF8_on(pat); - - /* given a list of CONSTs and DO blocks in expr, append all - * the CONSTs to pat, and record the start and end of each - * code block in code_blocks[] (each DO{} op is followed by an - * OP_CONST containing the corresponding literal '(?{...}) - * text) - */ - for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) { - if (o->op_type == OP_CONST) { - sv_catsv(pat, cSVOPo_sv); - if (is_code) { - pRExC_state->code_blocks[i].end = SvCUR(pat)-1; - is_code = 0; - } - } - else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) { - assert(i+1 < pRExC_state->num_code_blocks); - pRExC_state->code_blocks[++i].start = SvCUR(pat); - pRExC_state->code_blocks[i].block = o; - pRExC_state->code_blocks[i].src_regex = NULL; - is_code = 1; - } - } - } - else { - assert(expr->op_type == OP_CONST); - DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, - "Compiling OP_CONST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); - pat = cSVOPx_sv(expr); - } - } exp = SvPV_nomg(pat, plen); xend = exp + plen; @@ -5650,6 +5619,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } } + if ((pm_flags & PMf_USE_RE_EVAL) + /* this second condition covers the non-regex literal case, + * i.e. $foo =~ '(?{})'. */ + || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL)) + ) + runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen); + /* return old regex if pattern hasn't changed */ /* XXX: note in the below we have to check the flags as well as the pattern. * @@ -5663,24 +5639,12 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, && ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) ) && RX_PRECOMP(old_re) && RX_PRELEN(old_re) == plen - && memEQ(RX_PRECOMP(old_re), exp, plen)) + && memEQ(RX_PRECOMP(old_re), exp, plen) + && !runtime_code /* with runtime code, always recompile */ ) { - /* with runtime code, always recompile */ - runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags, - exp, plen); - if (!runtime_code) { - Safefree(pRExC_state->code_blocks); - return old_re; - } + Safefree(pRExC_state->code_blocks); + return old_re; } - else if ((pm_flags & PMf_USE_RE_EVAL) - /* this second condition covers the non-regex literal case, - * i.e. $foo =~ '(?{})'. */ - || ( !PL_reg_state.re_reparsing && IN_PERL_COMPILETIME - && (PL_hints & HINT_RE_EVAL)) - ) - runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags, - exp, plen); rx_flags = orig_rx_flags; @@ -4878,8 +4878,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) */ Copy(&PL_reg_state, &saved_state, 1, struct re_save_state); - PL_reg_state.re_reparsing = FALSE; - if (!caller_cv) caller_cv = find_runcv(NULL); @@ -776,7 +776,6 @@ typedef struct regmatch_slab { struct re_save_state { bool re_state_eval_setup_done; /* from regexec.c */ bool re_state_reg_match_utf8; /* from regexec.c */ - bool re_reparsing; /* runtime (?{}) fed back into parser */ /* Space for U8 */ I32 re_state_reg_oldpos; /* from regexec.c */ I32 re_state_reg_maxiter; /* max wait until caching pos */ diff --git a/t/re/overload.t b/t/re/overload.t index 4e99bd3ec6..38d5140e0e 100644 --- a/t/re/overload.t +++ b/t/re/overload.t @@ -33,4 +33,149 @@ no warnings 'syntax'; is $1, $TAG, "void context //g against overloaded object"; } +{ + # an overloaded stringify returning itself shouldn't loop indefinitely + + + { + package Self; + use overload q{""} => sub { + return shift; + }, + fallback => 1; + } + + my $obj = bless [], 'Self'; + my $r = qr/$obj/; + pass("self object, 1 arg"); + $r = qr/foo$obj/; + pass("self object, 2 args"); +} + +{ + # [perl #116823] + # when overloading regex string constants, a different code path + # was taken if the regex was compile-time, leading to overloaded + # regex constant string segments not being handled correctly. + # They were just treated as OP_CONST strings to be concatted together. + # In particular, if the overload returned a regex object, it would + # just be stringified rather than having any code blocks processed. + + BEGIN { + overload::constant qr => sub { + my ($raw, $cooked, $type) = @_; + return $cooked unless defined $::CONST_QR_CLASS; + if ($type =~ /qq?/) { + return bless \$cooked, $::CONST_QR_CLASS; + } else { + return $cooked; + } + }; + } + + { + # returns a qr// object + + package OL_QR; + use overload q{""} => sub { + my $re = shift; + return qr/(?{ $OL_QR::count++ })$$re/; + }, + fallback => 1; + + } + + { + # returns a string + + package OL_STR; + use overload q{""} => sub { + my $re = shift; + return qq/(?{ \$OL_STR::count++ })$$re/; + }, + fallback => 1; + + } + + + my $qr; + + $::CONST_QR_CLASS = 'OL_QR'; + + $OL_QR::count = 0; + $qr = eval q{ qr/^foo$/; }; + ok("foo" =~ $qr, "compile-time, OL_QR, single constant segment"); + is($OL_QR::count, 1, "flag"); + + $OL_QR::count = 0; + $qr = eval q{ qr/^foo$(?{ $OL_QR::count++ })/; }; + ok("foo" =~ $qr, "compile-time, OL_QR, multiple constant segments"); + is($OL_QR::count, 2, "qr2 flag"); + + + # test /foo.../ when foo is given string overloading, + # for various permutations of '...' + + $::CONST_QR_CLASS = 'OL_STR'; + + for my $has_re_eval (0, 1) { + for my $has_qr (0, 1) { + for my $has_code (0, 1) { + for my $has_runtime (0, 1) { + for my $has_runtime_code (0, 1) { + if ($has_runtime_code) { + next unless $has_runtime; + } + note( "re_eval=$has_re_eval " + . "qr=$has_qr " + . "code=$has_code " + . "runtime=$has_runtime " + . "runtime_code=$has_runtime_code"); + my $eval = ''; + $eval .= q{use re 'eval'; } if $has_re_eval; + $eval .= q{$match = $str =~ }; + $eval .= q{qr} if $has_qr; + $eval .= q{/^abc}; + $eval .= q{(?{$blocks++})} if $has_code; + $eval .= q{$runtime} if $has_runtime; + $eval .= q{/; 1;}; + + my $runtime = q{def}; + $runtime .= q{(?{$run_blocks++})} if $has_runtime_code; + + my $blocks = 0; + my $run_blocks = 0; + my $match; + my $str = "abc"; + $str .= "def" if $runtime; + + my $result = eval $eval; + my $err = $@; + $result = $result ? 1 : 0; + + if (!$has_re_eval) { + is($result, 0, "EVAL: $eval"); + like($err, qr/Eval-group not allowed at runtime/, + "\$\@: $eval"); + next; + } + + is($result, 1, "EVAL: $eval"); + diag("\$@=[$err]") unless $result; + + is($match, 1, "MATCH: $eval"); + is($blocks, $has_code, "blocks"); + is($run_blocks, $has_runtime_code, "run_blocks"); + + } + } + } + } + } + + + undef $::CONST_QR_CLASS; +} + + done_testing(); @@ -2525,6 +2525,7 @@ S_sublex_push(pTHX) SAVEGENERICPV(PL_lex_brackstack); SAVEGENERICPV(PL_lex_casestack); SAVEGENERICPV(PL_parser->lex_shared); + SAVEBOOL(PL_parser->lex_re_reparsing); /* The here-doc parser needs to be able to peek into outer lexing scopes to find the body of the here-doc. So we put PL_linestr and @@ -2568,6 +2569,9 @@ S_sublex_push(pTHX) else PL_lex_inpat = NULL; + PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING); + PL_in_eval &= ~EVAL_RE_REPARSING; + return '('; } @@ -3751,7 +3755,9 @@ S_scan_const(pTHX_ char *start) /* return the substring (via pl_yylval) only if we parsed anything */ if (s > PL_bufptr) { SvREFCNT_inc_simple_void_NN(sv); - if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) { + if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING )) + && ! PL_parser->lex_re_reparsing) + { const char *const key = PL_lex_inpat ? "qr" : "q"; const STRLEN keylen = PL_lex_inpat ? 2 : 1; const char *type; @@ -9047,7 +9053,9 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what) } } -/* Either returns sv, or mortalizes/frees sv and returns a new SV*. +/* S_new_constant(): do any overload::constant lookup. + + Either returns sv, or mortalizes/frees sv and returns a new SV*. Best used as sv=new_constant(..., sv, ...). If s, pv are NULL, calls subroutine with one argument, and <type> is used with error messages only. @@ -9502,8 +9510,7 @@ S_scan_pat(pTHX_ char *start, I32 type) { dVAR; PMOP *pm; - char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing, - TRUE /* look for escaped bracketed metas */ ); + char *s; const char * const valid_flags = (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS); char charset = '\0'; /* character set modifier */ @@ -9513,9 +9520,9 @@ S_scan_pat(pTHX_ char *start, I32 type) PERL_ARGS_ASSERT_SCAN_PAT; - /* this was only needed for the initial scan_str; set it to false - * so that any (?{}) code blocks etc are parsed normally */ - PL_reg_state.re_reparsing = FALSE; + s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING), + TRUE /* look for escaped bracketed metas */ ); + if (!s) { const char * const delimiter = skipspace(start); Perl_croak(aTHX_ |