diff options
author | Karl Williamson <khw@khw-desktop.(none)> | 2010-05-11 10:57:41 -0600 |
---|---|---|
committer | Rafael Garcia-Suarez <rgs@consttype.org> | 2010-05-17 09:51:56 +0200 |
commit | 3e462cdc2087ddf90984010fabd80c30db92bfa0 (patch) | |
tree | ab2329f812aa22c7ea9553d4d2d8299aadc7327b /pp_hot.c | |
parent | 618c9ef5ca707d1f047f20c323241c7349ab59c9 (diff) | |
download | perl-3e462cdc2087ddf90984010fabd80c30db92bfa0.tar.gz |
[perl #41530] s/non-utf8/is-utf8/ fails.
When the replacement is in utf8, there was failure to upgrade the result
when the source and the pattern weren't in utf8. This simply checks
that when there is a match that will lead to the replacement being done.
It then does the upgrade. If this led to changes in the source, we redo
the match because pointers to saved buffers could have changed. There
may be other cases where we don't need to redo the match, but I don't
know the code well-enough to easily figure it out.
Diffstat (limited to 'pp_hot.c')
-rw-r--r-- | pp_hot.c | 17 |
1 files changed, 17 insertions, 0 deletions
@@ -2126,6 +2126,7 @@ PP(pp_subst) DIE(aTHX_ "%s", PL_no_modify); PUTBACK; + setup_match: s = SvPV_mutable(TARG, len); if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV) force_on_match = 1; @@ -2181,6 +2182,22 @@ PP(pp_subst) r_flags | REXEC_CHECKED); /* known replacement string? */ if (dstr) { + + /* Upgrade the source if the replacement is utf8 but the source is not, + * but only if it matched; see + * http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html + */ + if (matched && DO_UTF8(dstr) && ! DO_UTF8(TARG)) { + const STRLEN new_len = sv_utf8_upgrade(TARG); + + /* If the lengths are the same, the pattern contains only + * invariants, can keep going; otherwise, various internal markers + * could be off, so redo */ + if (new_len != len) { + goto setup_match; + } + } + /* replacement needing upgrading? */ if (DO_UTF8(TARG) && !doutf8) { nsv = sv_newmortal(); |