summaryrefslogtreecommitdiff
path: root/pp_hot.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2012-10-11 02:03:35 -0700
committerFather Chrysostomos <sprout@cpan.org>2012-10-11 23:07:35 -0700
commit1754320d025df69f39aefec8568947369f4b13cb (patch)
tree197fa62f4712b007e1310175f9b39fa4e0957bc6 /pp_hot.c
parent2ed8c61f1921fabddfeb2a099733b977cb66ae42 (diff)
downloadperl-1754320d025df69f39aefec8568947369f4b13cb.tar.gz
[perl #49190] Stringify repl repeatedly in s///g
pm_runtime in op.c examines the rhs of s/// to see whether it is safe to execute that set of ops just once. If it sees a match var or an expression with side effects, it creates a pp_substcont op, which results in the rhs being executed multiple times. If the rhs seems constant enough, pp_subst does the substitution in a tight loop. This unfortunately causes s/a/$a/ to fail if *a has been aliased to *1. Furthermore, $REGMARK and $REGERROR did not count as match vars. pp_subst actually has two separate loops. One of them modifies the target in place. The other appends to a new scalar and then copies it back to the target. The first loop is used if it seems safe. This commit makes $REGMARK, $REGERROR and aliases to match vars work= when the replacement consists solely of the variable. It does this by setting PL_curpm before stringifying the replacement, so that $1 et al. see the right pattern. It also stringifies the variable for each iteration of the second loop, so that $1 and $REGMARK update. The first loop, which requires the rhs to be constant, is skipped if the regexp contains the special backtracking control verbs that mod- ify $REGMARK and $REGERROR.
Diffstat (limited to 'pp_hot.c')
-rw-r--r--pp_hot.c27
1 files changed, 23 insertions, 4 deletions
diff --git a/pp_hot.c b/pp_hot.c
index 9c66684f63..ec0b3b8232 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2168,6 +2168,8 @@ PP(pp_subst)
RETURN;
}
+ PL_curpm = pm;
+
/* known replacement string? */
if (dstr) {
if (SvTAINTED(dstr))
@@ -2201,7 +2203,7 @@ PP(pp_subst)
#endif
&& (I32)clen <= RX_MINLENRET(rx)
&& (once || !(r_flags & REXEC_COPY_STR))
- && !(RX_EXTFLAGS(rx) & RXf_LOOKBEHIND_SEEN)
+ && !(RX_EXTFLAGS(rx) & (RXf_LOOKBEHIND_SEEN|RXf_MODIFIES_VARS))
&& (!doutf8 || SvUTF8(TARG))
&& !(rpm->op_pmflags & PMf_NONDESTRUCT))
{
@@ -2218,7 +2220,6 @@ PP(pp_subst)
goto force_it;
}
d = s;
- PL_curpm = pm;
if (once) {
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
@@ -2288,6 +2289,8 @@ PP(pp_subst)
}
}
else {
+ bool first;
+ SV *repl;
if (force_on_match) {
force_on_match = 0;
if (rpm->op_pmflags & PMf_NONDESTRUCT) {
@@ -2306,8 +2309,8 @@ PP(pp_subst)
#endif
if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */
rxtainted |= SUBST_TAINT_PAT;
+ repl = dstr;
dstr = newSVpvn_flags(m, s-m, SVs_TEMP | (DO_UTF8(TARG) ? SVf_UTF8 : 0));
- PL_curpm = pm;
if (!c) {
PERL_CONTEXT *cx;
SPAGAIN;
@@ -2320,6 +2323,7 @@ PP(pp_subst)
RETURNOP(cPMOP->op_pmreplrootu.op_pmreplroot);
}
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
+ first = TRUE;
do {
if (iters++ > maxiters)
DIE(aTHX_ "Substitution loop");
@@ -2336,8 +2340,23 @@ PP(pp_subst)
m = RX_OFFS(rx)[0].start + orig;
sv_catpvn_nomg_maybeutf8(dstr, s, m - s, DO_UTF8(TARG));
s = RX_OFFS(rx)[0].end + orig;
- if (clen)
+ if (first) {
+ /* replacement already stringified */
+ if (clen)
sv_catpvn_nomg_maybeutf8(dstr, c, clen, doutf8);
+ first = FALSE;
+ }
+ else {
+ if (SvTAINTED(dstr))
+ rxtainted |= SUBST_TAINT_REPL;
+ if (PL_encoding) {
+ if (!nsv) nsv = sv_newmortal();
+ sv_copypv(nsv, repl);
+ if (!DO_UTF8(nsv)) sv_recode_to_utf8(nsv, PL_encoding);
+ sv_catsv(dstr, nsv);
+ }
+ else sv_catsv(dstr, repl);
+ }
if (once)
break;
} while (CALLREGEXEC(rx, s, strend, orig, s == m,