summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pp_hot.c17
-rw-r--r--t/re/subst.t22
2 files changed, 38 insertions, 1 deletions
diff --git a/pp_hot.c b/pp_hot.c
index ea949b828e..ab3659306e 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2126,6 +2126,7 @@ PP(pp_subst)
DIE(aTHX_ "%s", PL_no_modify);
PUTBACK;
+ setup_match:
s = SvPV_mutable(TARG, len);
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV)
force_on_match = 1;
@@ -2181,6 +2182,22 @@ PP(pp_subst)
r_flags | REXEC_CHECKED);
/* known replacement string? */
if (dstr) {
+
+ /* Upgrade the source if the replacement is utf8 but the source is not,
+ * but only if it matched; see
+ * http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html
+ */
+ if (matched && DO_UTF8(dstr) && ! DO_UTF8(TARG)) {
+ const STRLEN new_len = sv_utf8_upgrade(TARG);
+
+ /* If the lengths are the same, the pattern contains only
+ * invariants, can keep going; otherwise, various internal markers
+ * could be off, so redo */
+ if (new_len != len) {
+ goto setup_match;
+ }
+ }
+
/* replacement needing upgrading? */
if (DO_UTF8(TARG) && !doutf8) {
nsv = sv_newmortal();
diff --git a/t/re/subst.t b/t/re/subst.t
index 042f67acc7..82c4a6ff8b 100644
--- a/t/re/subst.t
+++ b/t/re/subst.t
@@ -7,7 +7,7 @@ BEGIN {
}
require './test.pl';
-plan( tests => 143 );
+plan( tests => 149 );
$x = 'foo';
$_ = "x";
@@ -614,3 +614,23 @@ fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'a
'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var',
);
}
+
+{ # Bug #41530; replacing non-utf8 with a utf8 causes problems
+ my $string = "a\x{a0}a";
+ my $sub_string = $string;
+ ok(! utf8::is_utf8($sub_string), "Verify that string isn't initially utf8");
+ $sub_string =~ s/a/\x{100}/g;
+ ok(utf8::is_utf8($sub_string),
+ 'Verify replace of non-utf8 with utf8 upgrades to utf8');
+ is($sub_string, "\x{100}\x{A0}\x{100}",
+ 'Verify #41530 fixed: replace of non-utf8 with utf8');
+
+ my $non_sub_string = $string;
+ ok(! utf8::is_utf8($non_sub_string),
+ "Verify that string isn't initially utf8");
+ $non_sub_string =~ s/b/\x{100}/g;
+ ok(! utf8::is_utf8($non_sub_string),
+ "Verify that failed substitute doesn't change string's utf8ness");
+ is($non_sub_string, $string,
+ "Verify that failed substitute doesn't change string");
+}