summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pp_hot.c38
-rw-r--r--sv.h2
-rw-r--r--t/re/subst.t8
3 files changed, 15 insertions, 33 deletions
diff --git a/pp_hot.c b/pp_hot.c
index 0ea4c66848..6d56d66edf 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2081,6 +2081,7 @@ PP(pp_subst)
EXTEND(SP,1);
}
+ SvGETMAGIC(TARG); /* must come before cow check */
#ifdef PERL_OLD_COPY_ON_WRITE
/* Awooga. Awooga. "bool" types that are actually char are dangerous,
because they make integers such as 256 "false". */
@@ -2100,8 +2101,7 @@ PP(pp_subst)
Perl_croak_no_modify(aTHX);
PUTBACK;
- s = SvPV_mutable(TARG, len);
- setup_match:
+ s = SvPV_nomg(TARG, len);
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV || SvVOK(TARG))
force_on_match = 1;
@@ -2173,24 +2173,6 @@ PP(pp_subst)
if (SvTAINTED(dstr))
rxtainted |= SUBST_TAINT_REPL;
- /* Upgrade the source if the replacement is utf8 but the source is not,
- * but only if it matched; see
- * http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html
- */
- if (DO_UTF8(dstr) && ! DO_UTF8(TARG)) {
- char * const orig_pvx = SvPOKp(TARG) ? SvPVX(TARG) : NULL;
- const STRLEN new_len = sv_utf8_upgrade_nomg(TARG);
-
- /* If the lengths are the same, the pattern contains only
- * invariants, can keep going; otherwise, various internal markers
- * could be off, so redo */
- if (new_len != len || orig_pvx != SvPVX(TARG)) {
- /* Do this here, to avoid multiple FETCHes. */
- s = SvPV_nomg(TARG, len);
- goto setup_match;
- }
- }
-
/* replacement needing upgrading? */
if (DO_UTF8(TARG) && !doutf8) {
nsv = sv_newmortal();
@@ -2352,21 +2334,15 @@ PP(pp_subst)
strend = s + (strend - m);
}
m = RX_OFFS(rx)[0].start + orig;
- if (doutf8 && !SvUTF8(dstr))
- sv_catpvn_nomg_utf8_upgrade(dstr, s, m - s, nsv);
- else
- sv_catpvn_nomg(dstr, s, m-s);
+ sv_catpvn_nomg_maybeutf8(dstr, s, m - s, DO_UTF8(TARG));
s = RX_OFFS(rx)[0].end + orig;
if (clen)
- sv_catpvn_nomg(dstr, c, clen);
+ sv_catpvn_nomg_maybeutf8(dstr, c, clen, doutf8);
if (once)
break;
} while (CALLREGEXEC(rx, s, strend, orig, s == m,
TARG, NULL, r_flags));
- if (doutf8 && !DO_UTF8(TARG))
- sv_catpvn_nomg_utf8_upgrade(dstr, s, strend - s, nsv);
- else
- sv_catpvn_nomg(dstr, s, strend - s);
+ sv_catpvn_nomg_maybeutf8(dstr, s, strend - s, DO_UTF8(TARG));
if (rpm->op_pmflags & PMf_NONDESTRUCT) {
/* From here on down we're using the copy, and leaving the original
@@ -2391,7 +2367,7 @@ PP(pp_subst)
SvPV_set(TARG, SvPVX(dstr));
SvCUR_set(TARG, SvCUR(dstr));
SvLEN_set(TARG, SvLEN(dstr));
- doutf8 |= DO_UTF8(dstr);
+ SvFLAGS(TARG) |= SvUTF8(dstr);
SvPV_set(dstr, NULL);
SPAGAIN;
@@ -2401,8 +2377,6 @@ PP(pp_subst)
if (!(rpm->op_pmflags & PMf_NONDESTRUCT)) {
(void)SvPOK_only_UTF8(TARG);
- if (doutf8)
- SvUTF8_on(TARG);
}
/* See "how taint works" above */
diff --git a/sv.h b/sv.h
index 69a73801b0..4cac64a6ea 100644
--- a/sv.h
+++ b/sv.h
@@ -1819,6 +1819,8 @@ mg.c:1024: warning: left-hand operand of comma expression has no effect
sv_utf8_upgrade(nsv); \
sv_catsv_nomg(dsv, nsv); \
} STMT_END
+#define sv_catpvn_nomg_maybeutf8(dsv, sstr, slen, is_utf8) \
+ sv_catpvn_flags(dsv, sstr, slen, (is_utf8)?SV_CATUTF8:SV_CATBYTES)
#ifdef PERL_CORE
# define sv_or_pv_len_utf8(sv, pv, bytelen) \
diff --git a/t/re/subst.t b/t/re/subst.t
index 0016843c8a..b70053778c 100644
--- a/t/re/subst.t
+++ b/t/re/subst.t
@@ -7,7 +7,7 @@ BEGIN {
require './test.pl';
}
-plan( tests => 201 );
+plan( tests => 202 );
$_ = 'david';
$a = s/david/rules/r;
@@ -848,3 +848,9 @@ $_ = "hello";
{ s/(.)/$l{my $a||$1}/g }
is $_, "HELLO",
'logop in s/// repl does not result in "constant" repl optimisation';
+
+$_ = "\xc4\x80";
+$a = "";
+utf8::upgrade $a;
+$_ =~ s/$/$a/;
+is $_, "\xc4\x80", "empty utf8 repl does not result in mangled utf8";