summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dump.c1
-rw-r--r--embed.fnc6
-rw-r--r--embed.h3
-rw-r--r--mg.c37
-rw-r--r--pp.c171
-rw-r--r--proto.h8
-rw-r--r--sv.h2
-rw-r--r--t/op/substr.t47
8 files changed, 191 insertions, 84 deletions
diff --git a/dump.c b/dump.c
index 3cb7167bb4..2c635deec9 100644
--- a/dump.c
+++ b/dump.c
@@ -1941,6 +1941,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
Perl_dump_indent(aTHX_ level, file, " TARGOFF = %"IVdf"\n", (IV)LvTARGOFF(sv));
Perl_dump_indent(aTHX_ level, file, " TARGLEN = %"IVdf"\n", (IV)LvTARGLEN(sv));
Perl_dump_indent(aTHX_ level, file, " TARG = 0x%"UVxf"\n", PTR2UV(LvTARG(sv)));
+ Perl_dump_indent(aTHX_ level, file, " FLAGS = %"IVdf"\n", (IV)LvFLAGS(sv));
if (LvTYPE(sv) != 't' && LvTYPE(sv) != 'T')
do_sv_dump(level+1, file, LvTARG(sv), nest+1, maxnest,
dumpops, pvlim);
diff --git a/embed.fnc b/embed.fnc
index 4d2b6668dc..470b11d1e1 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1391,6 +1391,12 @@ Apd |UV |to_utf8_upper |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
Apd |UV |to_utf8_title |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
Ampd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
AMp |UV |_to_utf8_fold_flags|NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp|U8 flags
+#if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C)
+p |bool |translate_substr_offsets|STRLEN curlen|IV pos1_iv \
+ |bool pos1_is_uv|IV len_iv \
+ |bool len_is_uv|NN STRLEN *posp \
+ |NN STRLEN *lenp
+#endif
#if defined(UNLINK_ALL_VERSIONS)
Ap |I32 |unlnk |NN const char* f
#endif
diff --git a/embed.h b/embed.h
index 03aefc1fc8..d05dd8a178 100644
--- a/embed.h
+++ b/embed.h
@@ -1346,6 +1346,9 @@
#define save_magic(a,b) S_save_magic(aTHX_ a,b)
#define unwind_handler_stack(a) S_unwind_handler_stack(aTHX_ a)
# endif
+# if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C)
+#define translate_substr_offsets(a,b,c,d,e,f,g) Perl_translate_substr_offsets(aTHX_ a,b,c,d,e,f,g)
+# endif
# if defined(PERL_IN_MRO_C)
#define mro_clean_isarev(a,b,c,d,e) S_mro_clean_isarev(aTHX_ a,b,c,d,e)
#define mro_gather_and_rename(a,b,c,d,e) S_mro_gather_and_rename(aTHX_ a,b,c,d,e)
diff --git a/mg.c b/mg.c
index fa4b4468ec..c55ca63a58 100644
--- a/mg.c
+++ b/mg.c
@@ -2163,16 +2163,24 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg)
const char * const tmps = SvPV_const(lsv,len);
STRLEN offs = LvTARGOFF(sv);
STRLEN rem = LvTARGLEN(sv);
+ const bool negoff = LvFLAGS(sv) & 1;
+ const bool negrem = LvFLAGS(sv) & 2;
PERL_ARGS_ASSERT_MAGIC_GETSUBSTR;
PERL_UNUSED_ARG(mg);
+ if (!translate_substr_offsets(
+ SvUTF8(lsv) ? sv_len_utf8(lsv) : len,
+ negoff ? -(IV)offs : (IV)offs, !negoff,
+ negrem ? -(IV)rem : (IV)rem, !negrem, &offs, &rem
+ )) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
+ sv_setsv_nomg(sv, &PL_sv_undef);
+ return 0;
+ }
+
if (SvUTF8(lsv))
offs = sv_pos_u2b_flags(lsv, offs, &rem, SV_CONST_RETURN);
- if (offs > len)
- offs = len;
- if (rem > len - offs)
- rem = len - offs;
sv_setpvn(sv, tmps + offs, rem);
if (SvUTF8(lsv))
SvUTF8_on(sv);
@@ -2183,11 +2191,13 @@ int
Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg)
{
dVAR;
- STRLEN len;
+ STRLEN len, lsv_len, oldtarglen, newtarglen;
const char * const tmps = SvPV_const(sv, len);
SV * const lsv = LvTARG(sv);
STRLEN lvoff = LvTARGOFF(sv);
STRLEN lvlen = LvTARGLEN(sv);
+ const bool negoff = LvFLAGS(sv) & 1;
+ const bool neglen = LvFLAGS(sv) & 2;
PERL_ARGS_ASSERT_MAGIC_SETSUBSTR;
PERL_UNUSED_ARG(mg);
@@ -2197,25 +2207,36 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg)
Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
"Attempt to use reference as lvalue in substr"
);
+ if (SvUTF8(lsv)) lsv_len = sv_len_utf8(lsv);
+ else (void)SvPV_nomg(lsv,lsv_len);
+ if (!translate_substr_offsets(
+ lsv_len,
+ negoff ? -(IV)lvoff : (IV)lvoff, !negoff,
+ neglen ? -(IV)lvlen : (IV)lvlen, !neglen, &lvoff, &lvlen
+ ))
+ Perl_croak(aTHX_ "substr outside of string");
+ oldtarglen = lvlen;
if (DO_UTF8(sv)) {
sv_utf8_upgrade(lsv);
lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN);
sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0);
- LvTARGLEN(sv) = sv_len_utf8(sv);
+ newtarglen = sv_len_utf8(sv);
SvUTF8_on(lsv);
}
else if (lsv && SvUTF8(lsv)) {
const char *utf8;
lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN);
- LvTARGLEN(sv) = len;
+ newtarglen = len;
utf8 = (char*)bytes_to_utf8((U8*)tmps, &len);
sv_insert_flags(lsv, lvoff, lvlen, utf8, len, 0);
Safefree(utf8);
}
else {
sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0);
- LvTARGLEN(sv) = len;
+ newtarglen = len;
}
+ if (!neglen) LvTARGLEN(sv) = newtarglen;
+ if (negoff) LvTARGOFF(sv) += newtarglen - oldtarglen;
return 0;
}
diff --git a/pp.c b/pp.c
index 0ecd144c99..d55c7fdf95 100644
--- a/pp.c
+++ b/pp.c
@@ -2954,6 +2954,73 @@ PP(pp_length)
RETURN;
}
+/* Returns false if substring is completely outside original string.
+ No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
+ always be true for an explicit 0.
+*/
+bool
+Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv,
+ bool pos1_is_uv, IV len_iv,
+ bool len_is_uv, STRLEN *posp,
+ STRLEN *lenp)
+{
+ IV pos2_iv;
+ int pos2_is_uv;
+
+ PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
+
+ if (!pos1_is_uv && pos1_iv < 0 && curlen) {
+ pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+ pos1_iv += curlen;
+ }
+ if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
+ return FALSE;
+
+ if (len_iv || len_is_uv) {
+ if (!len_is_uv && len_iv < 0) {
+ pos2_iv = curlen + len_iv;
+ if (curlen)
+ pos2_is_uv = curlen-1 > ~(UV)len_iv;
+ else
+ pos2_is_uv = 0;
+ } else { /* len_iv >= 0 */
+ if (!pos1_is_uv && pos1_iv < 0) {
+ pos2_iv = pos1_iv + len_iv;
+ pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
+ } else {
+ if ((UV)len_iv > curlen-(UV)pos1_iv)
+ pos2_iv = curlen;
+ else
+ pos2_iv = pos1_iv+len_iv;
+ pos2_is_uv = 1;
+ }
+ }
+ }
+ else {
+ pos2_iv = curlen;
+ pos2_is_uv = 1;
+ }
+
+ if (!pos2_is_uv && pos2_iv < 0) {
+ if (!pos1_is_uv && pos1_iv < 0)
+ return FALSE;
+ pos2_iv = 0;
+ }
+ else if (!pos1_is_uv && pos1_iv < 0)
+ pos1_iv = 0;
+
+ if ((UV)pos2_iv < (UV)pos1_iv)
+ pos2_iv = pos1_iv;
+ if ((UV)pos2_iv > curlen)
+ pos2_iv = curlen;
+
+ /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
+ *posp = (STRLEN)( (UV)pos1_iv );
+ *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
+
+ return TRUE;
+}
+
PP(pp_substr)
{
dVAR; dSP; dTARGET;
@@ -2963,11 +3030,9 @@ PP(pp_substr)
SV * pos_sv;
IV pos1_iv;
int pos1_is_uv;
- IV pos2_iv;
- int pos2_is_uv;
SV * len_sv;
IV len_iv = 0;
- int len_is_uv = 1;
+ int len_is_uv = 0;
I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
const bool rvalue = (GIMME_V != G_VOID);
const char *tmps;
@@ -2984,7 +3049,7 @@ PP(pp_substr)
}
if ((len_sv = POPs)) {
len_iv = SvIV(len_sv);
- len_is_uv = SvIOK_UV(len_sv);
+ len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
}
else num_args--;
}
@@ -3006,15 +3071,27 @@ PP(pp_substr)
}
else if (DO_UTF8(sv))
repl_need_utf8_upgrade = TRUE;
- lvalue = 0;
}
- if (lvalue) {
- tmps = NULL; /* unused */
- SvGETMAGIC(sv);
- if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen);
- else curlen = 0;
+ else if (lvalue) {
+ SV * ret;
+ ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
+ sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
+ LvTYPE(ret) = 'x';
+ LvTARG(ret) = SvREFCNT_inc_simple(sv);
+ LvTARGOFF(ret) =
+ pos1_is_uv || pos1_iv >= 0
+ ? (STRLEN)(UV)pos1_iv
+ : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv);
+ LvTARGLEN(ret) =
+ len_is_uv || len_iv > 0
+ ? (STRLEN)(UV)len_iv
+ : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv);
+
+ SPAGAIN;
+ PUSHs(ret); /* avoid SvSETMAGIC here */
+ RETURN;
}
- else tmps = SvPV_const(sv, curlen);
+ tmps = SvPV_const(sv, curlen);
if (DO_UTF8(sv)) {
utf8_curlen = sv_len_utf8(sv);
if (utf8_curlen == curlen)
@@ -3025,72 +3102,16 @@ PP(pp_substr)
else
utf8_curlen = 0;
- if (!pos1_is_uv && pos1_iv < 0 && curlen) {
- pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
- pos1_iv += curlen;
- }
- if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
- goto bound_fail;
-
- if (num_args > 2) {
- if (!len_is_uv && len_iv < 0) {
- pos2_iv = curlen + len_iv;
- if (curlen)
- pos2_is_uv = curlen-1 > ~(UV)len_iv;
- else
- pos2_is_uv = 0;
- } else { /* len_iv >= 0 */
- if (!pos1_is_uv && pos1_iv < 0) {
- pos2_iv = pos1_iv + len_iv;
- pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
- } else {
- if ((UV)len_iv > curlen-(UV)pos1_iv)
- pos2_iv = curlen;
- else
- pos2_iv = pos1_iv+len_iv;
- pos2_is_uv = 1;
- }
- }
- }
- else {
- pos2_iv = curlen;
- pos2_is_uv = 1;
- }
-
- if (!pos2_is_uv && pos2_iv < 0) {
- if (!pos1_is_uv && pos1_iv < 0)
- goto bound_fail;
- pos2_iv = 0;
- }
- else if (!pos1_is_uv && pos1_iv < 0)
- pos1_iv = 0;
-
- if ((UV)pos2_iv < (UV)pos1_iv)
- pos2_iv = pos1_iv;
- if ((UV)pos2_iv > curlen)
- pos2_iv = curlen;
-
{
- /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
- const STRLEN pos = (STRLEN)( (UV)pos1_iv );
- const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
- STRLEN byte_len = len;
- STRLEN byte_pos = utf8_curlen
- ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
+ STRLEN pos, len, byte_len, byte_pos;
- if (lvalue) {
- SV * ret;
- ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
- sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
- LvTYPE(ret) = 'x';
- LvTARG(ret) = SvREFCNT_inc_simple(sv);
- LvTARGOFF(ret) = pos;
- LvTARGLEN(ret) = len;
+ if (!translate_substr_offsets(
+ curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
+ )) goto bound_fail;
- SPAGAIN;
- PUSHs(ret); /* avoid SvSETMAGIC here */
- RETURN;
- }
+ byte_len = len;
+ byte_pos = utf8_curlen
+ ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos;
tmps += byte_pos;
@@ -3134,7 +3155,7 @@ PP(pp_substr)
RETURN;
bound_fail:
- if (lvalue || repl)
+ if (repl)
Perl_croak(aTHX_ "substr outside of string");
Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
RETPUSHUNDEF;
diff --git a/proto.h b/proto.h
index d32431406f..ff84a2265d 100644
--- a/proto.h
+++ b/proto.h
@@ -5601,6 +5601,14 @@ STATIC void S_save_magic(pTHX_ I32 mgs_ix, SV *sv)
STATIC void S_unwind_handler_stack(pTHX_ const void *p);
#endif
+#if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C)
+PERL_CALLCONV bool Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv, bool pos1_is_uv, IV len_iv, bool len_is_uv, STRLEN *posp, STRLEN *lenp)
+ __attribute__nonnull__(pTHX_6)
+ __attribute__nonnull__(pTHX_7);
+#define PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS \
+ assert(posp); assert(lenp)
+
+#endif
#if defined(PERL_IN_MRO_C)
STATIC void S_mro_clean_isarev(pTHX_ HV * const isa, const char * const name, const STRLEN len, HV * const exceptions, U32 flags)
__attribute__nonnull__(pTHX_1)
diff --git a/sv.h b/sv.h
index 33a61be109..42821444b4 100644
--- a/sv.h
+++ b/sv.h
@@ -482,6 +482,7 @@ struct xpvlv {
SV* xlv_targ;
char xlv_type; /* k=keys .=pos x=substr v=vec /=join/re
* y=alem/helem/iter t=tie T=tied HE */
+ char xlv_flags; /* 1 = negative offset 2 = negative len */
};
/* This structure works in 3 ways - regular scalar, GV with GP, or fast
@@ -1325,6 +1326,7 @@ the scalar's value cannot change unless written to.
#define LvTARG(sv) ((XPVLV*) SvANY(sv))->xlv_targ
#define LvTARGOFF(sv) ((XPVLV*) SvANY(sv))->xlv_targoff
#define LvTARGLEN(sv) ((XPVLV*) SvANY(sv))->xlv_targlen
+#define LvFLAGS(sv) ((XPVLV*) SvANY(sv))->xlv_flags
#define IoIFP(sv) (sv)->sv_u.svu_fp
#define IoOFP(sv) ((XPVIO*) SvANY(sv))->xio_ofp
diff --git a/t/op/substr.t b/t/op/substr.t
index 2673fc7782..f93b64cf5e 100644
--- a/t/op/substr.t
+++ b/t/op/substr.t
@@ -23,7 +23,7 @@ $SIG{__WARN__} = sub {
BEGIN { require './test.pl'; }
-plan(360);
+plan(380);
run_tests() unless caller;
@@ -635,6 +635,51 @@ is($x, "\x{100}\x{200}\xFFb");
is($_, 'YYYY');
is($x, 'aYYYYef');
}
+ $x = "abcdef";
+ for (substr($x,1)) {
+ is($_, 'bcdef');
+ $_ = 'XX';
+ is($_, 'XX');
+ is($x, 'aXX');
+ $x .= "frompswiggle";
+ is $_, "XXfrompswiggle";
+ }
+ $x = "abcdef";
+ for (substr($x,1,-1)) {
+ is($_, 'bcde');
+ $_ = 'XX';
+ is($_, 'XX');
+ is($x, 'aXXf');
+ $x .= "frompswiggle";
+ is $_, "XXffrompswiggl";
+ }
+ $x = "abcdef";
+ for (substr($x,-5,3)) {
+ is($_, 'bcd');
+ $_ = 'XX'; # now $_ is substr($x, -4, 2)
+ is($_, 'XX');
+ is($x, 'aXXef');
+ $x .= "frompswiggle";
+ is $_, "gg";
+ }
+ $x = "abcdef";
+ for (substr($x,-5)) {
+ is($_, 'bcdef');
+ $_ = 'XX'; # now substr($x, -2)
+ is($_, 'XX');
+ is($x, 'aXX');
+ $x .= "frompswiggle";
+ is $_, "le";
+ }
+ $x = "abcdef";
+ for (substr($x,-5,-1)) {
+ is($_, 'bcde');
+ $_ = 'XX'; # now substr($x, -3, -1)
+ is($_, 'XX');
+ is($x, 'aXXf');
+ $x .= "frompswiggle";
+ is $_, "gl";
+ }
}
# [perl #24200] string corruption with lvalue sub