diff options
author | Father Chrysostomos <sprout@cpan.org> | 2012-09-29 11:27:35 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2012-10-01 12:51:56 -0700 |
commit | 6582db62b4fd96c49eac4d2a93589be8f4c17da3 (patch) | |
tree | 189426bc0a0ca1f27640df1f5fc83a32e2807015 /pp.c | |
parent | 9bb29b6866a80dfaa3765b219ca04942676a2fae (diff) | |
download | perl-6582db62b4fd96c49eac4d2a93589be8f4c17da3.tar.gz |
Make 4-arg substr check SvUTF8(target) after stringfying
If it checks the UTF8 flag first, it might be looking at a stale flag,
resulting in malformed UTF8. Both tests added produced malformed utf8
strings before this commit.
Simply moving this:
if (!DO_UTF8(sv))
sv_utf8_upgrade(sv);
after the stringification is not enough to fix this, as the string
retrieved will be out of date after we do an upgrade. To avoid
stringifying twice, we use SvPV_force if there is a replacement. This
means rearranging if() blocks a little.
The use of SvPV_force also means that string overloading is no longer
called twice on the target scalar. This rearrangement also means
that targets upgraded to utf8 are no longer exempt from the refer-
ence warning. (Oh, and the test for that warning was not testing any-
thing in its no warnings test, because the target was no longer a ref-
erence; so I corrected the test.)
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 39 |
1 files changed, 19 insertions, 20 deletions
@@ -3017,7 +3017,6 @@ PP(pp_substr) STRLEN repl_len; int num_args = PL_op->op_private & 7; bool repl_need_utf8_upgrade = FALSE; - bool repl_is_utf8 = FALSE; if (num_args > 2) { if (num_args > 3) { @@ -3038,17 +3037,7 @@ PP(pp_substr) repl_sv = POPs; } PUTBACK; - if (repl_sv) { - repl = SvPV_const(repl_sv, repl_len); - repl_is_utf8 = DO_UTF8(repl_sv) && repl_len; - if (repl_is_utf8) { - if (!DO_UTF8(sv)) - sv_utf8_upgrade(sv); - } - else if (DO_UTF8(sv)) - repl_need_utf8_upgrade = TRUE; - } - else if (lvalue) { + if (lvalue && !repl_sv) { SV * ret; ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); @@ -3067,7 +3056,24 @@ PP(pp_substr) PUSHs(ret); /* avoid SvSETMAGIC here */ RETURN; } - tmps = SvPV_const(sv, curlen); + if (repl_sv) { + repl = SvPV_const(repl_sv, repl_len); + SvGETMAGIC(sv); + if (SvROK(sv)) + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr" + ); + tmps = SvPV_force_nomg(sv, curlen); + if (DO_UTF8(repl_sv) && repl_len) { + if (!DO_UTF8(sv)) { + sv_utf8_upgrade(sv); + curlen = SvCUR(sv); + } + } + else if (DO_UTF8(sv)) + repl_need_utf8_upgrade = TRUE; + } + else tmps = SvPV_const(sv, curlen); if (DO_UTF8(sv)) { utf8_curlen = sv_len_utf8_nomg(sv); if (utf8_curlen == curlen) @@ -3109,17 +3115,10 @@ PP(pp_substr) repl_sv_copy = newSVsv(repl_sv); sv_utf8_upgrade(repl_sv_copy); repl = SvPV_const(repl_sv_copy, repl_len); - repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len; } - if (SvROK(sv)) - Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), - "Attempt to use reference as lvalue in substr" - ); if (!SvOK(sv)) sv_setpvs(sv, ""); sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0); - if (repl_is_utf8) - SvUTF8_on(sv); SvREFCNT_dec(repl_sv_copy); } } |