diff options
author | Father Chrysostomos <sprout@cpan.org> | 2012-10-11 02:03:35 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2012-10-11 23:07:35 -0700 |
commit | 1754320d025df69f39aefec8568947369f4b13cb (patch) | |
tree | 197fa62f4712b007e1310175f9b39fa4e0957bc6 /pp_hot.c | |
parent | 2ed8c61f1921fabddfeb2a099733b977cb66ae42 (diff) | |
download | perl-1754320d025df69f39aefec8568947369f4b13cb.tar.gz |
[perl #49190] Stringify repl repeatedly in s///g
pm_runtime in op.c examines the rhs of s/// to see whether it is safe
to execute that set of ops just once. If it sees a match var or an
expression with side effects, it creates a pp_substcont op, which
results in the rhs being executed multiple times.
If the rhs seems constant enough, pp_subst does the substitution in a
tight loop.
This unfortunately causes s/a/$a/ to fail if *a has been aliased to
*1. Furthermore, $REGMARK and $REGERROR did not count as match vars.
pp_subst actually has two separate loops. One of them modifies the
target in place. The other appends to a new scalar and then copies it
back to the target. The first loop is used if it seems safe.
This commit makes $REGMARK, $REGERROR and aliases to match vars work=
when the replacement consists solely of the variable.
It does this by setting PL_curpm before stringifying the replacement,
so that $1 et al. see the right pattern. It also stringifies the
variable for each iteration of the second loop, so that $1 and
$REGMARK update.
The first loop, which requires the rhs to be constant, is skipped if
the regexp contains the special backtracking control verbs that mod-
ify $REGMARK and $REGERROR.
Diffstat (limited to 'pp_hot.c')
-rw-r--r-- | pp_hot.c | 27 |
1 files changed, 23 insertions, 4 deletions
@@ -2168,6 +2168,8 @@ PP(pp_subst) RETURN; } + PL_curpm = pm; + /* known replacement string? */ if (dstr) { if (SvTAINTED(dstr)) @@ -2201,7 +2203,7 @@ PP(pp_subst) #endif && (I32)clen <= RX_MINLENRET(rx) && (once || !(r_flags & REXEC_COPY_STR)) - && !(RX_EXTFLAGS(rx) & RXf_LOOKBEHIND_SEEN) + && !(RX_EXTFLAGS(rx) & (RXf_LOOKBEHIND_SEEN|RXf_MODIFIES_VARS)) && (!doutf8 || SvUTF8(TARG)) && !(rpm->op_pmflags & PMf_NONDESTRUCT)) { @@ -2218,7 +2220,6 @@ PP(pp_subst) goto force_it; } d = s; - PL_curpm = pm; if (once) { if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ rxtainted |= SUBST_TAINT_PAT; @@ -2288,6 +2289,8 @@ PP(pp_subst) } } else { + bool first; + SV *repl; if (force_on_match) { force_on_match = 0; if (rpm->op_pmflags & PMf_NONDESTRUCT) { @@ -2306,8 +2309,8 @@ PP(pp_subst) #endif if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ rxtainted |= SUBST_TAINT_PAT; + repl = dstr; dstr = newSVpvn_flags(m, s-m, SVs_TEMP | (DO_UTF8(TARG) ? SVf_UTF8 : 0)); - PL_curpm = pm; if (!c) { PERL_CONTEXT *cx; SPAGAIN; @@ -2320,6 +2323,7 @@ PP(pp_subst) RETURNOP(cPMOP->op_pmreplrootu.op_pmreplroot); } r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST; + first = TRUE; do { if (iters++ > maxiters) DIE(aTHX_ "Substitution loop"); @@ -2336,8 +2340,23 @@ PP(pp_subst) m = RX_OFFS(rx)[0].start + orig; sv_catpvn_nomg_maybeutf8(dstr, s, m - s, DO_UTF8(TARG)); s = RX_OFFS(rx)[0].end + orig; - if (clen) + if (first) { + /* replacement already stringified */ + if (clen) sv_catpvn_nomg_maybeutf8(dstr, c, clen, doutf8); + first = FALSE; + } + else { + if (SvTAINTED(dstr)) + rxtainted |= SUBST_TAINT_REPL; + if (PL_encoding) { + if (!nsv) nsv = sv_newmortal(); + sv_copypv(nsv, repl); + if (!DO_UTF8(nsv)) sv_recode_to_utf8(nsv, PL_encoding); + sv_catsv(dstr, nsv); + } + else sv_catsv(dstr, repl); + } if (once) break; } while (CALLREGEXEC(rx, s, strend, orig, s == m, |