summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2011-12-04 10:47:40 -0800
committerFather Chrysostomos <sprout@cpan.org>2011-12-04 10:50:48 -0800
commit83f78d1a27d5727dabfc8bcc2b961cb405b831e9 (patch)
tree77674fd3efedbd14bec2f2ffa6176678ff28fa67 /pp.c
parent7ba26d48121ff365601a73eefc7798693a3a9118 (diff)
downloadperl-83f78d1a27d5727dabfc8bcc2b961cb405b831e9.tar.gz
Adjust substr offsets when using, not when creating, lvalue
When substr() occurs in potential lvalue context, the offsets are adjusted to the current string (negative being converted to positive, lengths reaching beyond the end of the string being shortened, etc.) as soon as the special lvalue to be returned is created. When that lvalue is assigned to, the original scalar is stringified once more. That implementation results in two bugs: 1) Fetch is called twice in a simple substr() assignment (except in void context, due to the special optimisation of commit 24fcb59fc). 2) These two calls are not equivalent: $SIG{__WARN__} = sub { warn "w ",shift}; sub myprint { print @_; $_[0] = 1 } print substr("", 2); myprint substr("", 2); The second one dies. The first one only warns. That’s mean. The error is also wrong, sometimes, if the original string is going to get longer before the substr lvalue is actually used. The behaviour of \substr($str, -1) if $str changes length is com- pletely undocumented. Before 5.10, it was documented as being unreli- able and subject to change. What this commit does is make the lvalue returned by substr remember the original arguments and only adjust the offsets when the assign- ment happens. This means that the following now prints z, instead of xyz (which is actually what I would expect): $str = "a"; $substr = \substr($str,-1); $str = "xyz"; print $substr;
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c171
1 files changed, 96 insertions, 75 deletions
diff --git a/pp.c b/pp.c
index 0ecd144c99..d55c7fdf95 100644
--- a/pp.c
+++ b/pp.c
@@ -2954,6 +2954,73 @@ PP(pp_length)
RETURN;
}
+/* Returns false if substring is completely outside original string.
+ No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
+ always be true for an explicit 0.
+*/
+bool
+Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
+ bool pos1_is_uv, IV len_iv,
+ bool len_is_uv, STRLEN *posp,
+ STRLEN *lenp)
+{
+ IV pos2_iv;
+ int pos2_is_uv;
+
+ PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
+
+ if (!pos1_is_uv && pos1_iv < 0 && curlen) {
+ pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+ pos1_iv += curlen;
+ }
+ if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
+ return FALSE;
+
+ if (len_iv || len_is_uv) {
+ if (!len_is_uv && len_iv < 0) {
+ pos2_iv = curlen + len_iv;
+ if (curlen)
+ pos2_is_uv = curlen-1 > ~(UV)len_iv;
+ else
+ pos2_is_uv = 0;
+ } else { /* len_iv >= 0 */
+ if (!pos1_is_uv && pos1_iv < 0) {
+ pos2_iv = pos1_iv + len_iv;
+ pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
+ } else {
+ if ((UV)len_iv > curlen-(UV)pos1_iv)
+ pos2_iv = curlen;
+ else
+ pos2_iv = pos1_iv+len_iv;
+ pos2_is_uv = 1;
+ }
+ }
+ }
+ else {
+ pos2_iv = curlen;
+ pos2_is_uv = 1;
+ }
+
+ if (!pos2_is_uv && pos2_iv < 0) {
+ if (!pos1_is_uv && pos1_iv < 0)
+ return FALSE;
+ pos2_iv = 0;
+ }
+ else if (!pos1_is_uv && pos1_iv < 0)
+ pos1_iv = 0;
+
+ if ((UV)pos2_iv < (UV)pos1_iv)
+ pos2_iv = pos1_iv;
+ if ((UV)pos2_iv > curlen)
+ pos2_iv = curlen;
+
+ /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
+ *posp = (STRLEN)( (UV)pos1_iv );
+ *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
+
+ return TRUE;
+}
+
PP(pp_substr)
{
dVAR; dSP; dTARGET;
@@ -2963,11 +3030,9 @@ PP(pp_substr)
SV * pos_sv;
IV pos1_iv;
int pos1_is_uv;
- IV pos2_iv;
- int pos2_is_uv;
SV * len_sv;
IV len_iv = 0;
- int len_is_uv = 1;
+ int len_is_uv = 0;
I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
const bool rvalue = (GIMME_V != G_VOID);
const char *tmps;
@@ -2984,7 +3049,7 @@ PP(pp_substr)
}
if ((len_sv = POPs)) {
len_iv = SvIV(len_sv);
- len_is_uv = SvIOK_UV(len_sv);
+ len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
}
else num_args--;
}
@@ -3006,15 +3071,27 @@ PP(pp_substr)
}
else if (DO_UTF8(sv))
repl_need_utf8_upgrade = TRUE;
- lvalue = 0;
}
- if (lvalue) {
- tmps = NULL; /* unused */
- SvGETMAGIC(sv);
- if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen);
- else curlen = 0;
+ else if (lvalue) {
+ SV * ret;
+ ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
+ sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
+ LvTYPE(ret) = 'x';
+ LvTARG(ret) = SvREFCNT_inc_simple(sv);
+ LvTARGOFF(ret) =
+ pos1_is_uv || pos1_iv >= 0
+ ? (STRLEN)(UV)pos1_iv
+ : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
+ LvTARGLEN(ret) =
+ len_is_uv || len_iv > 0
+ ? (STRLEN)(UV)len_iv
+ : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
+
+ SPAGAIN;
+ PUSHs(ret); /* avoid SvSETMAGIC here */
+ RETURN;
}
- else tmps = SvPV_const(sv, curlen);
+ tmps = SvPV_const(sv, curlen);
if (DO_UTF8(sv)) {
utf8_curlen = sv_len_utf8(sv);
if (utf8_curlen == curlen)
@@ -3025,72 +3102,16 @@ PP(pp_substr)
else
utf8_curlen = 0;
- if (!pos1_is_uv && pos1_iv < 0 && curlen) {
- pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
- pos1_iv += curlen;
- }
- if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
- goto bound_fail;
-
- if (num_args > 2) {
- if (!len_is_uv && len_iv < 0) {
- pos2_iv = curlen + len_iv;
- if (curlen)
- pos2_is_uv = curlen-1 > ~(UV)len_iv;
- else
- pos2_is_uv = 0;
- } else { /* len_iv >= 0 */
- if (!pos1_is_uv && pos1_iv < 0) {
- pos2_iv = pos1_iv + len_iv;
- pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
- } else {
- if ((UV)len_iv > curlen-(UV)pos1_iv)
- pos2_iv = curlen;
- else
- pos2_iv = pos1_iv+len_iv;
- pos2_is_uv = 1;
- }
- }
- }
- else {
- pos2_iv = curlen;
- pos2_is_uv = 1;
- }
-
- if (!pos2_is_uv && pos2_iv < 0) {
- if (!pos1_is_uv && pos1_iv < 0)
- goto bound_fail;
- pos2_iv = 0;
- }
- else if (!pos1_is_uv && pos1_iv < 0)
- pos1_iv = 0;
-
- if ((UV)pos2_iv < (UV)pos1_iv)
- pos2_iv = pos1_iv;
- if ((UV)pos2_iv > curlen)
- pos2_iv = curlen;
-
{
- /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
- const STRLEN pos = (STRLEN)( (UV)pos1_iv );
- const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
- STRLEN byte_len = len;
- STRLEN byte_pos = utf8_curlen
- ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
+ STRLEN pos, len, byte_len, byte_pos;
- if (lvalue) {
- SV * ret;
- ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
- sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
- LvTYPE(ret) = 'x';
- LvTARG(ret) = SvREFCNT_inc_simple(sv);
- LvTARGOFF(ret) = pos;
- LvTARGLEN(ret) = len;
+ if (!translate_substr_offsets(
+ curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
+ )) goto bound_fail;
- SPAGAIN;
- PUSHs(ret); /* avoid SvSETMAGIC here */
- RETURN;
- }
+ byte_len = len;
+ byte_pos = utf8_curlen
+ ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
tmps += byte_pos;
@@ -3134,7 +3155,7 @@ PP(pp_substr)
RETURN;
bound_fail:
- if (lvalue || repl)
+ if (repl)
Perl_croak(aTHX_ "substr outside of string");
Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
RETPUSHUNDEF;