diff options
author | Father Chrysostomos <sprout@cpan.org> | 2011-12-04 10:47:40 -0800 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2011-12-04 10:50:48 -0800 |
commit | 83f78d1a27d5727dabfc8bcc2b961cb405b831e9 (patch) | |
tree | 77674fd3efedbd14bec2f2ffa6176678ff28fa67 /mg.c | |
parent | 7ba26d48121ff365601a73eefc7798693a3a9118 (diff) | |
download | perl-83f78d1a27d5727dabfc8bcc2b961cb405b831e9.tar.gz |
Adjust substr offsets when using, not when creating, lvalue
When substr() occurs in potential lvalue context, the offsets are
adjusted to the current string (negative being converted to positive,
lengths reaching beyond the end of the string being shortened, etc.)
as soon as the special lvalue to be returned is created.
When that lvalue is assigned to, the original scalar is stringified
once more.
That implementation results in two bugs:
1) Fetch is called twice in a simple substr() assignment (except in
void context, due to the special optimisation of commit 24fcb59fc).
2) These two calls are not equivalent:
$SIG{__WARN__} = sub { warn "w ",shift};
sub myprint { print @_; $_[0] = 1 }
print substr("", 2);
myprint substr("", 2);
The second one dies. The first one only warns. That’s mean. The
error is also wrong, sometimes, if the original string is going to get
longer before the substr lvalue is actually used.
The behaviour of \substr($str, -1) if $str changes length is com-
pletely undocumented. Before 5.10, it was documented as being unreli-
able and subject to change.
What this commit does is make the lvalue returned by substr remember
the original arguments and only adjust the offsets when the assign-
ment happens.
This means that the following now prints z, instead of xyz (which is
actually what I would expect):
$str = "a";
$substr = \substr($str,-1);
$str = "xyz";
print $substr;
Diffstat (limited to 'mg.c')
-rw-r--r-- | mg.c | 37 |
1 files changed, 29 insertions, 8 deletions
@@ -2163,16 +2163,24 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg) const char * const tmps = SvPV_const(lsv,len); STRLEN offs = LvTARGOFF(sv); STRLEN rem = LvTARGLEN(sv); + const bool negoff = LvFLAGS(sv) & 1; + const bool negrem = LvFLAGS(sv) & 2; PERL_ARGS_ASSERT_MAGIC_GETSUBSTR; PERL_UNUSED_ARG(mg); + if (!translate_substr_offsets( + SvUTF8(lsv) ? sv_len_utf8(lsv) : len, + negoff ? -(IV)offs : (IV)offs, !negoff, + negrem ? -(IV)rem : (IV)rem, !negrem, &offs, &rem + )) { + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); + sv_setsv_nomg(sv, &PL_sv_undef); + return 0; + } + if (SvUTF8(lsv)) offs = sv_pos_u2b_flags(lsv, offs, &rem, SV_CONST_RETURN); - if (offs > len) - offs = len; - if (rem > len - offs) - rem = len - offs; sv_setpvn(sv, tmps + offs, rem); if (SvUTF8(lsv)) SvUTF8_on(sv); @@ -2183,11 +2191,13 @@ int Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) { dVAR; - STRLEN len; + STRLEN len, lsv_len, oldtarglen, newtarglen; const char * const tmps = SvPV_const(sv, len); SV * const lsv = LvTARG(sv); STRLEN lvoff = LvTARGOFF(sv); STRLEN lvlen = LvTARGLEN(sv); + const bool negoff = LvFLAGS(sv) & 1; + const bool neglen = LvFLAGS(sv) & 2; PERL_ARGS_ASSERT_MAGIC_SETSUBSTR; PERL_UNUSED_ARG(mg); @@ -2197,25 +2207,36 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "Attempt to use reference as lvalue in substr" ); + if (SvUTF8(lsv)) lsv_len = sv_len_utf8(lsv); + else (void)SvPV_nomg(lsv,lsv_len); + if (!translate_substr_offsets( + lsv_len, + negoff ? -(IV)lvoff : (IV)lvoff, !negoff, + neglen ? -(IV)lvlen : (IV)lvlen, !neglen, &lvoff, &lvlen + )) + Perl_croak(aTHX_ "substr outside of string"); + oldtarglen = lvlen; if (DO_UTF8(sv)) { sv_utf8_upgrade(lsv); lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0); - LvTARGLEN(sv) = sv_len_utf8(sv); + newtarglen = sv_len_utf8(sv); SvUTF8_on(lsv); } else if (lsv && SvUTF8(lsv)) { const char *utf8; lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); - LvTARGLEN(sv) = len; + newtarglen = len; utf8 = (char*)bytes_to_utf8((U8*)tmps, &len); sv_insert_flags(lsv, lvoff, lvlen, utf8, len, 0); Safefree(utf8); } else { sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0); - LvTARGLEN(sv) = len; + newtarglen = len; } + if (!neglen) LvTARGLEN(sv) = newtarglen; + if (negoff) LvTARGOFF(sv) += newtarglen - oldtarglen; return 0; } |