summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@khw-desktop.(none)>2010-05-11 10:57:41 -0600
committerRafael Garcia-Suarez <rgs@consttype.org>2010-05-17 09:51:56 +0200
commit3e462cdc2087ddf90984010fabd80c30db92bfa0 (patch)
treeab2329f812aa22c7ea9553d4d2d8299aadc7327b
parent618c9ef5ca707d1f047f20c323241c7349ab59c9 (diff)
downloadperl-3e462cdc2087ddf90984010fabd80c30db92bfa0.tar.gz
[perl #41530] s/non-utf8/is-utf8/ fails.
When the replacement is in utf8, there was failure to upgrade the result when the source and the pattern weren't in utf8. This simply checks that when there is a match that will lead to the replacement being done. It then does the upgrade. If this led to changes in the source, we redo the match because pointers to saved buffers could have changed. There may be other cases where we don't need to redo the match, but I don't know the code well-enough to easily figure it out.
-rw-r--r--pp_hot.c17
-rw-r--r--t/re/subst.t22
2 files changed, 38 insertions, 1 deletions
diff --git a/pp_hot.c b/pp_hot.c
index ea949b828e..ab3659306e 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2126,6 +2126,7 @@ PP(pp_subst)
DIE(aTHX_ "%s", PL_no_modify);
PUTBACK;
+ setup_match:
s = SvPV_mutable(TARG, len);
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV)
force_on_match = 1;
@@ -2181,6 +2182,22 @@ PP(pp_subst)
r_flags | REXEC_CHECKED);
/* known replacement string? */
if (dstr) {
+
+ /* Upgrade the source if the replacement is utf8 but the source is not,
+ * but only if it matched; see
+ * http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html
+ */
+ if (matched && DO_UTF8(dstr) && ! DO_UTF8(TARG)) {
+ const STRLEN new_len = sv_utf8_upgrade(TARG);
+
+ /* If the lengths are the same, the pattern contains only
+ * invariants, can keep going; otherwise, various internal markers
+ * could be off, so redo */
+ if (new_len != len) {
+ goto setup_match;
+ }
+ }
+
/* replacement needing upgrading? */
if (DO_UTF8(TARG) && !doutf8) {
nsv = sv_newmortal();
diff --git a/t/re/subst.t b/t/re/subst.t
index 042f67acc7..82c4a6ff8b 100644
--- a/t/re/subst.t
+++ b/t/re/subst.t
@@ -7,7 +7,7 @@ BEGIN {
}
require './test.pl';
-plan( tests => 143 );
+plan( tests => 149 );
$x = 'foo';
$_ = "x";
@@ -614,3 +614,23 @@ fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'a
'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var',
);
}
+
+{ # Bug #41530; replacing non-utf8 with a utf8 causes problems
+ my $string = "a\x{a0}a";
+ my $sub_string = $string;
+ ok(! utf8::is_utf8($sub_string), "Verify that string isn't initially utf8");
+ $sub_string =~ s/a/\x{100}/g;
+ ok(utf8::is_utf8($sub_string),
+ 'Verify replace of non-utf8 with utf8 upgrades to utf8');
+ is($sub_string, "\x{100}\x{A0}\x{100}",
+ 'Verify #41530 fixed: replace of non-utf8 with utf8');
+
+ my $non_sub_string = $string;
+ ok(! utf8::is_utf8($non_sub_string),
+ "Verify that string isn't initially utf8");
+ $non_sub_string =~ s/b/\x{100}/g;
+ ok(! utf8::is_utf8($non_sub_string),
+ "Verify that failed substitute doesn't change string's utf8ness");
+ is($non_sub_string, $string,
+ "Verify that failed substitute doesn't change string");
+}