diff options
-rw-r--r-- | pp_hot.c | 17 | ||||
-rw-r--r-- | t/re/subst.t | 22 |
2 files changed, 38 insertions, 1 deletions
@@ -2126,6 +2126,7 @@ PP(pp_subst) DIE(aTHX_ "%s", PL_no_modify); PUTBACK; + setup_match: s = SvPV_mutable(TARG, len); if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV) force_on_match = 1; @@ -2181,6 +2182,22 @@ PP(pp_subst) r_flags | REXEC_CHECKED); /* known replacement string? */ if (dstr) { + + /* Upgrade the source if the replacement is utf8 but the source is not, + * but only if it matched; see + * http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html + */ + if (matched && DO_UTF8(dstr) && ! DO_UTF8(TARG)) { + const STRLEN new_len = sv_utf8_upgrade(TARG); + + /* If the lengths are the same, the pattern contains only + * invariants, can keep going; otherwise, various internal markers + * could be off, so redo */ + if (new_len != len) { + goto setup_match; + } + } + /* replacement needing upgrading? */ if (DO_UTF8(TARG) && !doutf8) { nsv = sv_newmortal(); diff --git a/t/re/subst.t b/t/re/subst.t index 042f67acc7..82c4a6ff8b 100644 --- a/t/re/subst.t +++ b/t/re/subst.t @@ -7,7 +7,7 @@ BEGIN { } require './test.pl'; -plan( tests => 143 ); +plan( tests => 149 ); $x = 'foo'; $_ = "x"; @@ -614,3 +614,23 @@ fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'a 'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var', ); } + +{ # Bug #41530; replacing non-utf8 with a utf8 causes problems + my $string = "a\x{a0}a"; + my $sub_string = $string; + ok(! utf8::is_utf8($sub_string), "Verify that string isn't initially utf8"); + $sub_string =~ s/a/\x{100}/g; + ok(utf8::is_utf8($sub_string), + 'Verify replace of non-utf8 with utf8 upgrades to utf8'); + is($sub_string, "\x{100}\x{A0}\x{100}", + 'Verify #41530 fixed: replace of non-utf8 with utf8'); + + my $non_sub_string = $string; + ok(! utf8::is_utf8($non_sub_string), + "Verify that string isn't initially utf8"); + $non_sub_string =~ s/b/\x{100}/g; + ok(! utf8::is_utf8($non_sub_string), + "Verify that failed substitute doesn't change string's utf8ness"); + is($non_sub_string, $string, + "Verify that failed substitute doesn't change string"); +} |