summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2012-09-29 11:27:35 -0700
committerFather Chrysostomos <sprout@cpan.org>2012-10-01 12:51:56 -0700
commit6582db62b4fd96c49eac4d2a93589be8f4c17da3 (patch)
tree189426bc0a0ca1f27640df1f5fc83a32e2807015 /pp.c
parent9bb29b6866a80dfaa3765b219ca04942676a2fae (diff)
downloadperl-6582db62b4fd96c49eac4d2a93589be8f4c17da3.tar.gz
Make 4-arg substr check SvUTF8(target) after stringfying
If it checks the UTF8 flag first, it might be looking at a stale flag, resulting in malformed UTF8. Both tests added produced malformed utf8 strings before this commit. Simply moving this: if (!DO_UTF8(sv)) sv_utf8_upgrade(sv); after the stringification is not enough to fix this, as the string retrieved will be out of date after we do an upgrade. To avoid stringifying twice, we use SvPV_force if there is a replacement. This means rearranging if() blocks a little. The use of SvPV_force also means that string overloading is no longer called twice on the target scalar. This rearrangement also means that targets upgraded to utf8 are no longer exempt from the refer- ence warning. (Oh, and the test for that warning was not testing any- thing in its no warnings test, because the target was no longer a ref- erence; so I corrected the test.)
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c39
1 files changed, 19 insertions, 20 deletions
diff --git a/pp.c b/pp.c
index eef948352e..171cb87358 100644
--- a/pp.c
+++ b/pp.c
@@ -3017,7 +3017,6 @@ PP(pp_substr)
STRLEN repl_len;
int num_args = PL_op->op_private & 7;
bool repl_need_utf8_upgrade = FALSE;
- bool repl_is_utf8 = FALSE;
if (num_args > 2) {
if (num_args > 3) {
@@ -3038,17 +3037,7 @@ PP(pp_substr)
repl_sv = POPs;
}
PUTBACK;
- if (repl_sv) {
- repl = SvPV_const(repl_sv, repl_len);
- repl_is_utf8 = DO_UTF8(repl_sv) && repl_len;
- if (repl_is_utf8) {
- if (!DO_UTF8(sv))
- sv_utf8_upgrade(sv);
- }
- else if (DO_UTF8(sv))
- repl_need_utf8_upgrade = TRUE;
- }
- else if (lvalue) {
+ if (lvalue && !repl_sv) {
SV * ret;
ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
@@ -3067,7 +3056,24 @@ PP(pp_substr)
PUSHs(ret); /* avoid SvSETMAGIC here */
RETURN;
}
- tmps = SvPV_const(sv, curlen);
+ if (repl_sv) {
+ repl = SvPV_const(repl_sv, repl_len);
+ SvGETMAGIC(sv);
+ if (SvROK(sv))
+ Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
+ "Attempt to use reference as lvalue in substr"
+ );
+ tmps = SvPV_force_nomg(sv, curlen);
+ if (DO_UTF8(repl_sv) && repl_len) {
+ if (!DO_UTF8(sv)) {
+ sv_utf8_upgrade(sv);
+ curlen = SvCUR(sv);
+ }
+ }
+ else if (DO_UTF8(sv))
+ repl_need_utf8_upgrade = TRUE;
+ }
+ else tmps = SvPV_const(sv, curlen);
if (DO_UTF8(sv)) {
utf8_curlen = sv_len_utf8_nomg(sv);
if (utf8_curlen == curlen)
@@ -3109,17 +3115,10 @@ PP(pp_substr)
repl_sv_copy = newSVsv(repl_sv);
sv_utf8_upgrade(repl_sv_copy);
repl = SvPV_const(repl_sv_copy, repl_len);
- repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len;
}
- if (SvROK(sv))
- Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
- "Attempt to use reference as lvalue in substr"
- );
if (!SvOK(sv))
sv_setpvs(sv, "");
sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
- if (repl_is_utf8)
- SvUTF8_on(sv);
SvREFCNT_dec(repl_sv_copy);
}
}