diff options
author | Eric Brine <ikegami@adaelis.com> | 2010-02-11 20:28:29 -0500 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2010-02-14 16:32:01 +0000 |
commit | 777f7c561610dee641c77666e5a4a0d9ac1d4230 (patch) | |
tree | a5af4c59239052b2538c566a2b9dfecf437e9b08 /pp.c | |
parent | 6e3b7bfa2b063f4ce0c55f84474edb7d2c652387 (diff) | |
download | perl-777f7c561610dee641c77666e5a4a0d9ac1d4230.tar.gz |
Removes 32-bit limit on substr arguments. The full range of IV and UV is available for the pos and len arguments, with safe conversion to STRLEN where it's smaller than an IV.
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 143 |
1 files changed, 93 insertions, 50 deletions
@@ -3079,15 +3079,19 @@ PP(pp_substr) { dVAR; dSP; dTARGET; SV *sv; - I32 len = 0; STRLEN curlen; STRLEN utf8_curlen; - I32 pos; - I32 rem; - I32 fail; + SV * pos_sv; + IV pos1_iv; + int pos1_is_uv; + IV pos2_iv; + int pos2_is_uv; + SV * len_sv; + IV len_iv = 0; + int len_is_uv = 1; const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; const char *tmps; - const I32 arybase = CopARYBASE_get(PL_curcop); + const IV arybase = CopARYBASE_get(PL_curcop); SV *repl_sv = NULL; const char *repl = NULL; STRLEN repl_len; @@ -3103,9 +3107,13 @@ PP(pp_substr) repl = SvPV_const(repl_sv, repl_len); repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv); } - len = POPi; + len_sv = POPs; + len_iv = SvIV(len_sv); + len_is_uv = SvIOK_UV(len_sv); } - pos = POPi; + pos_sv = POPs; + pos1_iv = SvIV(pos_sv); + pos1_is_uv = SvIOK_UV(pos_sv); sv = POPs; PUTBACK; if (repl_sv) { @@ -3127,51 +3135,80 @@ PP(pp_substr) else utf8_curlen = 0; - if (pos >= arybase) { - pos -= arybase; - rem = curlen-pos; - fail = rem; - if (num_args > 2) { - if (len < 0) { - rem += len; - if (rem < 0) - rem = 0; - } - else if (rem > len) - rem = len; + if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */ + UV pos1_uv = pos1_iv-arybase; + /* Overflow can occur when $[ < 0 */ + if (arybase < 0 && pos1_uv < (UV)pos1_iv) + goto BOUND_FAIL; + pos1_iv = pos1_uv; + pos1_is_uv = 1; + } + else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) { + goto BOUND_FAIL; /* $[=3; substr($_,2,...) */ + } + else { /* pos < $[ */ + if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */ + pos1_iv = curlen; + pos1_is_uv = 1; + } else { + if (curlen) { + pos1_is_uv = curlen-1 > ~(UV)pos1_iv; + pos1_iv += curlen; + } } } - else { - pos += curlen; - if (num_args < 3) - rem = curlen; - else if (len >= 0) { - rem = pos+len; - if (rem > (I32)curlen) - rem = curlen; + if (pos1_is_uv || pos1_iv > 0) { + if ((UV)pos1_iv > curlen) + goto BOUND_FAIL; + } + + if (num_args > 2) { + if (!len_is_uv && len_iv < 0) { + pos2_iv = curlen + len_iv; + if (curlen) + pos2_is_uv = curlen-1 > ~(UV)len_iv; + else + pos2_is_uv = 0; + } else { /* len_iv >= 0 */ + if (!pos1_is_uv && pos1_iv < 0) { + pos2_iv = pos1_iv + len_iv; + pos2_is_uv = (UV)len_iv > (UV)IV_MAX; + } else { + if ((UV)len_iv > curlen-(UV)pos1_iv) + pos2_iv = curlen; + else + pos2_iv = pos1_iv+len_iv; + pos2_is_uv = 1; + } } - else { - rem = curlen+len; - if (rem < pos) - rem = pos; - } - if (pos < 0) - pos = 0; - fail = rem; - rem -= pos; - } - if (fail < 0) { - if (lvalue || repl) - Perl_croak(aTHX_ "substr outside of string"); - Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); - RETPUSHUNDEF; } else { - const I32 upos = pos; - const I32 urem = rem; + pos2_iv = curlen; + pos2_is_uv = 1; + } + + if (!pos2_is_uv && pos2_iv < 0) { + if (!pos1_is_uv && pos1_iv < 0) + goto BOUND_FAIL; + pos2_iv = 0; + } + else if (!pos1_is_uv && pos1_iv < 0) + pos1_iv = 0; + + if ((UV)pos2_iv < (UV)pos1_iv) + pos2_iv = pos1_iv; + if ((UV)pos2_iv > curlen) + pos2_iv = curlen; + + { + /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */ + const STRLEN pos = (STRLEN)( (UV)pos1_iv ); + const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv ); + STRLEN byte_pos = pos; + STRLEN byte_len = len; if (utf8_curlen) - sv_pos_u2b(sv, (I32 *)&pos, (I32 *)&rem); - tmps += pos; + sv_pos_u2b_proper(sv, &byte_pos, &byte_len); + tmps += byte_pos; /* we either return a PV or an LV. If the TARG hasn't been used * before, or is of that type, reuse it; otherwise use a mortal * instead. Note that LVs can have an extended lifetime, so also @@ -3185,7 +3222,7 @@ PP(pp_substr) } } - sv_setpvn(TARG, tmps, rem); + sv_setpvn(TARG, tmps, byte_len); #ifdef USE_LOCALE_COLLATE sv_unmagic(TARG, PERL_MAGIC_collxfrm); #endif @@ -3202,7 +3239,7 @@ PP(pp_substr) } if (!SvOK(sv)) sv_setpvs(sv, ""); - sv_insert_flags(sv, pos, rem, repl, repl_len, 0); + sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0); if (repl_is_utf8) SvUTF8_on(sv); SvREFCNT_dec(repl_sv_copy); @@ -3232,13 +3269,19 @@ PP(pp_substr) SvREFCNT_dec(LvTARG(TARG)); LvTARG(TARG) = SvREFCNT_inc_simple(sv); } - LvTARGOFF(TARG) = upos; - LvTARGLEN(TARG) = urem; + LvTARGOFF(TARG) = pos; + LvTARGLEN(TARG) = len; } } SPAGAIN; PUSHs(TARG); /* avoid SvSETMAGIC here */ RETURN; + +BOUND_FAIL: + if (lvalue || repl) + Perl_croak(aTHX_ "substr outside of string"); + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string"); + RETPUSHUNDEF; } PP(pp_vec) |