diff options
-rw-r--r-- | dump.c | 1 | ||||
-rw-r--r-- | embed.fnc | 6 | ||||
-rw-r--r-- | embed.h | 3 | ||||
-rw-r--r-- | mg.c | 37 | ||||
-rw-r--r-- | pp.c | 171 | ||||
-rw-r--r-- | proto.h | 8 | ||||
-rw-r--r-- | sv.h | 2 | ||||
-rw-r--r-- | t/op/substr.t | 47 |
8 files changed, 191 insertions, 84 deletions
@@ -1941,6 +1941,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo Perl_dump_indent(aTHX_ level, file, " TARGOFF = %"IVdf"\n", (IV)LvTARGOFF(sv)); Perl_dump_indent(aTHX_ level, file, " TARGLEN = %"IVdf"\n", (IV)LvTARGLEN(sv)); Perl_dump_indent(aTHX_ level, file, " TARG = 0x%"UVxf"\n", PTR2UV(LvTARG(sv))); + Perl_dump_indent(aTHX_ level, file, " FLAGS = %"IVdf"\n", (IV)LvFLAGS(sv)); if (LvTYPE(sv) != 't' && LvTYPE(sv) != 'T') do_sv_dump(level+1, file, LvTARG(sv), nest+1, maxnest, dumpops, pvlim); @@ -1391,6 +1391,12 @@ Apd |UV |to_utf8_upper |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp Apd |UV |to_utf8_title |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp Ampd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp AMp |UV |_to_utf8_fold_flags|NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp|U8 flags +#if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C) +p |bool |translate_substr_offsets|STRLEN curlen|IV pos1_iv \ + |bool pos1_is_uv|IV len_iv \ + |bool len_is_uv|NN STRLEN *posp \ + |NN STRLEN *lenp +#endif #if defined(UNLINK_ALL_VERSIONS) Ap |I32 |unlnk |NN const char* f #endif @@ -1346,6 +1346,9 @@ #define save_magic(a,b) S_save_magic(aTHX_ a,b) #define unwind_handler_stack(a) S_unwind_handler_stack(aTHX_ a) # endif +# if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C) +#define translate_substr_offsets(a,b,c,d,e,f,g) Perl_translate_substr_offsets(aTHX_ a,b,c,d,e,f,g) +# endif # if defined(PERL_IN_MRO_C) #define mro_clean_isarev(a,b,c,d,e) S_mro_clean_isarev(aTHX_ a,b,c,d,e) #define mro_gather_and_rename(a,b,c,d,e) S_mro_gather_and_rename(aTHX_ a,b,c,d,e) @@ -2163,16 +2163,24 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg) const char * const tmps = SvPV_const(lsv,len); STRLEN offs = LvTARGOFF(sv); STRLEN rem = LvTARGLEN(sv); + const bool negoff = LvFLAGS(sv) & 1; + const bool negrem = LvFLAGS(sv) & 2; PERL_ARGS_ASSERT_MAGIC_GETSUBSTR; PERL_UNUSED_ARG(mg); + if (!translate_substr_offsets( + SvUTF8(lsv) ? sv_len_utf8(lsv) : len, + negoff ? -(IV)offs : (IV)offs, !negoff, + negrem ? -(IV)rem : (IV)rem, !negrem, &offs, &rem + )) { + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); + sv_setsv_nomg(sv, &PL_sv_undef); + return 0; + } + if (SvUTF8(lsv)) offs = sv_pos_u2b_flags(lsv, offs, &rem, SV_CONST_RETURN); - if (offs > len) - offs = len; - if (rem > len - offs) - rem = len - offs; sv_setpvn(sv, tmps + offs, rem); if (SvUTF8(lsv)) SvUTF8_on(sv); @@ -2183,11 +2191,13 @@ int Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) { dVAR; - STRLEN len; + STRLEN len, lsv_len, oldtarglen, newtarglen; const char * const tmps = SvPV_const(sv, len); SV * const lsv = LvTARG(sv); STRLEN lvoff = LvTARGOFF(sv); STRLEN lvlen = LvTARGLEN(sv); + const bool negoff = LvFLAGS(sv) & 1; + const bool neglen = LvFLAGS(sv) & 2; PERL_ARGS_ASSERT_MAGIC_SETSUBSTR; PERL_UNUSED_ARG(mg); @@ -2197,25 +2207,36 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "Attempt to use reference as lvalue in substr" ); + if (SvUTF8(lsv)) lsv_len = sv_len_utf8(lsv); + else (void)SvPV_nomg(lsv,lsv_len); + if (!translate_substr_offsets( + lsv_len, + negoff ? -(IV)lvoff : (IV)lvoff, !negoff, + neglen ? -(IV)lvlen : (IV)lvlen, !neglen, &lvoff, &lvlen + )) + Perl_croak(aTHX_ "substr outside of string"); + oldtarglen = lvlen; if (DO_UTF8(sv)) { sv_utf8_upgrade(lsv); lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0); - LvTARGLEN(sv) = sv_len_utf8(sv); + newtarglen = sv_len_utf8(sv); SvUTF8_on(lsv); } else if (lsv && SvUTF8(lsv)) { const char *utf8; lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); - LvTARGLEN(sv) = len; + newtarglen = len; utf8 = (char*)bytes_to_utf8((U8*)tmps, &len); sv_insert_flags(lsv, lvoff, lvlen, utf8, len, 0); Safefree(utf8); } else { sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0); - LvTARGLEN(sv) = len; + newtarglen = len; } + if (!neglen) LvTARGLEN(sv) = newtarglen; + if (negoff) LvTARGOFF(sv) += newtarglen - oldtarglen; return 0; } @@ -2954,6 +2954,73 @@ PP(pp_length) RETURN; } +/* Returns false if substring is completely outside original string. + No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must + always be true for an explicit 0. +*/ +bool +Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv, + bool pos1_is_uv, IV len_iv, + bool len_is_uv, STRLEN *posp, + STRLEN *lenp) +{ + IV pos2_iv; + int pos2_is_uv; + + PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS; + + if (!pos1_is_uv && pos1_iv < 0 && curlen) { + pos1_is_uv = curlen-1 > ~(UV)pos1_iv; + pos1_iv += curlen; + } + if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen) + return FALSE; + + if (len_iv || len_is_uv) { + if (!len_is_uv && len_iv < 0) { + pos2_iv = curlen + len_iv; + if (curlen) + pos2_is_uv = curlen-1 > ~(UV)len_iv; + else + pos2_is_uv = 0; + } else { /* len_iv >= 0 */ + if (!pos1_is_uv && pos1_iv < 0) { + pos2_iv = pos1_iv + len_iv; + pos2_is_uv = (UV)len_iv > (UV)IV_MAX; + } else { + if ((UV)len_iv > curlen-(UV)pos1_iv) + pos2_iv = curlen; + else + pos2_iv = pos1_iv+len_iv; + pos2_is_uv = 1; + } + } + } + else { + pos2_iv = curlen; + pos2_is_uv = 1; + } + + if (!pos2_is_uv && pos2_iv < 0) { + if (!pos1_is_uv && pos1_iv < 0) + return FALSE; + pos2_iv = 0; + } + else if (!pos1_is_uv && pos1_iv < 0) + pos1_iv = 0; + + if ((UV)pos2_iv < (UV)pos1_iv) + pos2_iv = pos1_iv; + if ((UV)pos2_iv > curlen) + pos2_iv = curlen; + + /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ + *posp = (STRLEN)( (UV)pos1_iv ); + *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); + + return TRUE; +} + PP(pp_substr) { dVAR; dSP; dTARGET; @@ -2963,11 +3030,9 @@ PP(pp_substr) SV * pos_sv; IV pos1_iv; int pos1_is_uv; - IV pos2_iv; - int pos2_is_uv; SV * len_sv; IV len_iv = 0; - int len_is_uv = 1; + int len_is_uv = 0; I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; const bool rvalue = (GIMME_V != G_VOID); const char *tmps; @@ -2984,7 +3049,7 @@ PP(pp_substr) } if ((len_sv = POPs)) { len_iv = SvIV(len_sv); - len_is_uv = SvIOK_UV(len_sv); + len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1; } else num_args--; } @@ -3006,15 +3071,27 @@ PP(pp_substr) } else if (DO_UTF8(sv)) repl_need_utf8_upgrade = TRUE; - lvalue = 0; } - if (lvalue) { - tmps = NULL; /* unused */ - SvGETMAGIC(sv); - if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen); - else curlen = 0; + else if (lvalue) { + SV * ret; + ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ + sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); + LvTYPE(ret) = 'x'; + LvTARG(ret) = SvREFCNT_inc_simple(sv); + LvTARGOFF(ret) = + pos1_is_uv || pos1_iv >= 0 + ? (STRLEN)(UV)pos1_iv + : (LvFLAGS(ret) |= 1, (STRLEN)(UV)-pos1_iv); + LvTARGLEN(ret) = + len_is_uv || len_iv > 0 + ? (STRLEN)(UV)len_iv + : (LvFLAGS(ret) |= 2, (STRLEN)(UV)-len_iv); + + SPAGAIN; + PUSHs(ret); /* avoid SvSETMAGIC here */ + RETURN; } - else tmps = SvPV_const(sv, curlen); + tmps = SvPV_const(sv, curlen); if (DO_UTF8(sv)) { utf8_curlen = sv_len_utf8(sv); if (utf8_curlen == curlen) @@ -3025,72 +3102,16 @@ PP(pp_substr) else utf8_curlen = 0; - if (!pos1_is_uv && pos1_iv < 0 && curlen) { - pos1_is_uv = curlen-1 > ~(UV)pos1_iv; - pos1_iv += curlen; - } - if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen) - goto bound_fail; - - if (num_args > 2) { - if (!len_is_uv && len_iv < 0) { - pos2_iv = curlen + len_iv; - if (curlen) - pos2_is_uv = curlen-1 > ~(UV)len_iv; - else - pos2_is_uv = 0; - } else { /* len_iv >= 0 */ - if (!pos1_is_uv && pos1_iv < 0) { - pos2_iv = pos1_iv + len_iv; - pos2_is_uv = (UV)len_iv > (UV)IV_MAX; - } else { - if ((UV)len_iv > curlen-(UV)pos1_iv) - pos2_iv = curlen; - else - pos2_iv = pos1_iv+len_iv; - pos2_is_uv = 1; - } - } - } - else { - pos2_iv = curlen; - pos2_is_uv = 1; - } - - if (!pos2_is_uv && pos2_iv < 0) { - if (!pos1_is_uv && pos1_iv < 0) - goto bound_fail; - pos2_iv = 0; - } - else if (!pos1_is_uv && pos1_iv < 0) - pos1_iv = 0; - - if ((UV)pos2_iv < (UV)pos1_iv) - pos2_iv = pos1_iv; - if ((UV)pos2_iv > curlen) - pos2_iv = curlen; - { - /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ - const STRLEN pos = (STRLEN)( (UV)pos1_iv ); - const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); - STRLEN byte_len = len; - STRLEN byte_pos = utf8_curlen - ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos; + STRLEN pos, len, byte_len, byte_pos; - if (lvalue) { - SV * ret; - ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ - sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); - LvTYPE(ret) = 'x'; - LvTARG(ret) = SvREFCNT_inc_simple(sv); - LvTARGOFF(ret) = pos; - LvTARGLEN(ret) = len; + if (!translate_substr_offsets( + curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len + )) goto bound_fail; - SPAGAIN; - PUSHs(ret); /* avoid SvSETMAGIC here */ - RETURN; - } + byte_len = len; + byte_pos = utf8_curlen + ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos; tmps += byte_pos; @@ -3134,7 +3155,7 @@ PP(pp_substr) RETURN; bound_fail: - if (lvalue || repl) + if (repl) Perl_croak(aTHX_ "substr outside of string"); Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); RETPUSHUNDEF; @@ -5601,6 +5601,14 @@ STATIC void S_save_magic(pTHX_ I32 mgs_ix, SV *sv) STATIC void S_unwind_handler_stack(pTHX_ const void *p); #endif +#if defined(PERL_IN_MG_C) || defined(PERL_IN_PP_C) +PERL_CALLCONV bool Perl_translate_substr_offsets(pTHX_ STRLEN curlen, IV pos1_iv, bool pos1_is_uv, IV len_iv, bool len_is_uv, STRLEN *posp, STRLEN *lenp) + __attribute__nonnull__(pTHX_6) + __attribute__nonnull__(pTHX_7); +#define PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS \ + assert(posp); assert(lenp) + +#endif #if defined(PERL_IN_MRO_C) STATIC void S_mro_clean_isarev(pTHX_ HV * const isa, const char * const name, const STRLEN len, HV * const exceptions, U32 flags) __attribute__nonnull__(pTHX_1) @@ -482,6 +482,7 @@ struct xpvlv { SV* xlv_targ; char xlv_type; /* k=keys .=pos x=substr v=vec /=join/re * y=alem/helem/iter t=tie T=tied HE */ + char xlv_flags; /* 1 = negative offset 2 = negative len */ }; /* This structure works in 3 ways - regular scalar, GV with GP, or fast @@ -1325,6 +1326,7 @@ the scalar's value cannot change unless written to. #define LvTARG(sv) ((XPVLV*) SvANY(sv))->xlv_targ #define LvTARGOFF(sv) ((XPVLV*) SvANY(sv))->xlv_targoff #define LvTARGLEN(sv) ((XPVLV*) SvANY(sv))->xlv_targlen +#define LvFLAGS(sv) ((XPVLV*) SvANY(sv))->xlv_flags #define IoIFP(sv) (sv)->sv_u.svu_fp #define IoOFP(sv) ((XPVIO*) SvANY(sv))->xio_ofp diff --git a/t/op/substr.t b/t/op/substr.t index 2673fc7782..f93b64cf5e 100644 --- a/t/op/substr.t +++ b/t/op/substr.t @@ -23,7 +23,7 @@ $SIG{__WARN__} = sub { BEGIN { require './test.pl'; } -plan(360); +plan(380); run_tests() unless caller; @@ -635,6 +635,51 @@ is($x, "\x{100}\x{200}\xFFb"); is($_, 'YYYY'); is($x, 'aYYYYef'); } + $x = "abcdef"; + for (substr($x,1)) { + is($_, 'bcdef'); + $_ = 'XX'; + is($_, 'XX'); + is($x, 'aXX'); + $x .= "frompswiggle"; + is $_, "XXfrompswiggle"; + } + $x = "abcdef"; + for (substr($x,1,-1)) { + is($_, 'bcde'); + $_ = 'XX'; + is($_, 'XX'); + is($x, 'aXXf'); + $x .= "frompswiggle"; + is $_, "XXffrompswiggl"; + } + $x = "abcdef"; + for (substr($x,-5,3)) { + is($_, 'bcd'); + $_ = 'XX'; # now $_ is substr($x, -4, 2) + is($_, 'XX'); + is($x, 'aXXef'); + $x .= "frompswiggle"; + is $_, "gg"; + } + $x = "abcdef"; + for (substr($x,-5)) { + is($_, 'bcdef'); + $_ = 'XX'; # now substr($x, -2) + is($_, 'XX'); + is($x, 'aXX'); + $x .= "frompswiggle"; + is $_, "le"; + } + $x = "abcdef"; + for (substr($x,-5,-1)) { + is($_, 'bcde'); + $_ = 'XX'; # now substr($x, -3, -1) + is($_, 'XX'); + is($x, 'aXXf'); + $x .= "frompswiggle"; + is $_, "gl"; + } } # [perl #24200] string corruption with lvalue sub |