summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorEric Brine <ikegami@adaelis.com>2010-02-11 20:28:29 -0500
committerNicholas Clark <nick@ccl4.org>2010-02-14 16:32:01 +0000
commit777f7c561610dee641c77666e5a4a0d9ac1d4230 (patch)
treea5af4c59239052b2538c566a2b9dfecf437e9b08 /pp.c
parent6e3b7bfa2b063f4ce0c55f84474edb7d2c652387 (diff)
downloadperl-777f7c561610dee641c77666e5a4a0d9ac1d4230.tar.gz
Removes 32-bit limit on substr arguments. The full range of IV and UV is available for the pos and len arguments, with safe conversion to STRLEN where it's smaller than an IV.
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c143
1 files changed, 93 insertions, 50 deletions
diff --git a/pp.c b/pp.c
index 2f4703bd20..95dc5fd15e 100644
--- a/pp.c
+++ b/pp.c
@@ -3079,15 +3079,19 @@ PP(pp_substr)
{
dVAR; dSP; dTARGET;
SV *sv;
- I32 len = 0;
STRLEN curlen;
STRLEN utf8_curlen;
- I32 pos;
- I32 rem;
- I32 fail;
+ SV * pos_sv;
+ IV pos1_iv;
+ int pos1_is_uv;
+ IV pos2_iv;
+ int pos2_is_uv;
+ SV * len_sv;
+ IV len_iv = 0;
+ int len_is_uv = 1;
const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
const char *tmps;
- const I32 arybase = CopARYBASE_get(PL_curcop);
+ const IV arybase = CopARYBASE_get(PL_curcop);
SV *repl_sv = NULL;
const char *repl = NULL;
STRLEN repl_len;
@@ -3103,9 +3107,13 @@ PP(pp_substr)
repl = SvPV_const(repl_sv, repl_len);
repl_is_utf8 = DO_UTF8(repl_sv) && SvCUR(repl_sv);
}
- len = POPi;
+ len_sv = POPs;
+ len_iv = SvIV(len_sv);
+ len_is_uv = SvIOK_UV(len_sv);
}
- pos = POPi;
+ pos_sv = POPs;
+ pos1_iv = SvIV(pos_sv);
+ pos1_is_uv = SvIOK_UV(pos_sv);
sv = POPs;
PUTBACK;
if (repl_sv) {
@@ -3127,51 +3135,80 @@ PP(pp_substr)
else
utf8_curlen = 0;
- if (pos >= arybase) {
- pos -= arybase;
- rem = curlen-pos;
- fail = rem;
- if (num_args > 2) {
- if (len < 0) {
- rem += len;
- if (rem < 0)
- rem = 0;
- }
- else if (rem > len)
- rem = len;
+ if ( (pos1_is_uv && arybase < 0) || (pos1_iv >= arybase) ) { /* pos >= $[ */
+ UV pos1_uv = pos1_iv-arybase;
+ /* Overflow can occur when $[ < 0 */
+ if (arybase < 0 && pos1_uv < (UV)pos1_iv)
+ goto BOUND_FAIL;
+ pos1_iv = pos1_uv;
+ pos1_is_uv = 1;
+ }
+ else if (pos1_is_uv ? (UV)pos1_iv > 0 : pos1_iv > 0) {
+ goto BOUND_FAIL; /* $[=3; substr($_,2,...) */
+ }
+ else { /* pos < $[ */
+ if (pos1_iv == 0) { /* $[=1; substr($_,0,...) */
+ pos1_iv = curlen;
+ pos1_is_uv = 1;
+ } else {
+ if (curlen) {
+ pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
+ pos1_iv += curlen;
+ }
}
}
- else {
- pos += curlen;
- if (num_args < 3)
- rem = curlen;
- else if (len >= 0) {
- rem = pos+len;
- if (rem > (I32)curlen)
- rem = curlen;
+ if (pos1_is_uv || pos1_iv > 0) {
+ if ((UV)pos1_iv > curlen)
+ goto BOUND_FAIL;
+ }
+
+ if (num_args > 2) {
+ if (!len_is_uv && len_iv < 0) {
+ pos2_iv = curlen + len_iv;
+ if (curlen)
+ pos2_is_uv = curlen-1 > ~(UV)len_iv;
+ else
+ pos2_is_uv = 0;
+ } else { /* len_iv >= 0 */
+ if (!pos1_is_uv && pos1_iv < 0) {
+ pos2_iv = pos1_iv + len_iv;
+ pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
+ } else {
+ if ((UV)len_iv > curlen-(UV)pos1_iv)
+ pos2_iv = curlen;
+ else
+ pos2_iv = pos1_iv+len_iv;
+ pos2_is_uv = 1;
+ }
}
- else {
- rem = curlen+len;
- if (rem < pos)
- rem = pos;
- }
- if (pos < 0)
- pos = 0;
- fail = rem;
- rem -= pos;
- }
- if (fail < 0) {
- if (lvalue || repl)
- Perl_croak(aTHX_ "substr outside of string");
- Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
- RETPUSHUNDEF;
}
else {
- const I32 upos = pos;
- const I32 urem = rem;
+ pos2_iv = curlen;
+ pos2_is_uv = 1;
+ }
+
+ if (!pos2_is_uv && pos2_iv < 0) {
+ if (!pos1_is_uv && pos1_iv < 0)
+ goto BOUND_FAIL;
+ pos2_iv = 0;
+ }
+ else if (!pos1_is_uv && pos1_iv < 0)
+ pos1_iv = 0;
+
+ if ((UV)pos2_iv < (UV)pos1_iv)
+ pos2_iv = pos1_iv;
+ if ((UV)pos2_iv > curlen)
+ pos2_iv = curlen;
+
+ {
+ /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
+ const STRLEN pos = (STRLEN)( (UV)pos1_iv );
+ const STRLEN len = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
+ STRLEN byte_pos = pos;
+ STRLEN byte_len = len;
if (utf8_curlen)
- sv_pos_u2b(sv, (I32 *)&pos, (I32 *)&rem);
- tmps += pos;
+ sv_pos_u2b_proper(sv, &byte_pos, &byte_len);
+ tmps += byte_pos;
/* we either return a PV or an LV. If the TARG hasn't been used
* before, or is of that type, reuse it; otherwise use a mortal
* instead. Note that LVs can have an extended lifetime, so also
@@ -3185,7 +3222,7 @@ PP(pp_substr)
}
}
- sv_setpvn(TARG, tmps, rem);
+ sv_setpvn(TARG, tmps, byte_len);
#ifdef USE_LOCALE_COLLATE
sv_unmagic(TARG, PERL_MAGIC_collxfrm);
#endif
@@ -3202,7 +3239,7 @@ PP(pp_substr)
}
if (!SvOK(sv))
sv_setpvs(sv, "");
- sv_insert_flags(sv, pos, rem, repl, repl_len, 0);
+ sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
if (repl_is_utf8)
SvUTF8_on(sv);
SvREFCNT_dec(repl_sv_copy);
@@ -3232,13 +3269,19 @@ PP(pp_substr)
SvREFCNT_dec(LvTARG(TARG));
LvTARG(TARG) = SvREFCNT_inc_simple(sv);
}
- LvTARGOFF(TARG) = upos;
- LvTARGLEN(TARG) = urem;
+ LvTARGOFF(TARG) = pos;
+ LvTARGLEN(TARG) = len;
}
}
SPAGAIN;
PUSHs(TARG); /* avoid SvSETMAGIC here */
RETURN;
+
+BOUND_FAIL:
+ if (lvalue || repl)
+ Perl_croak(aTHX_ "substr outside of string");
+ Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
+ RETPUSHUNDEF;
}
PP(pp_vec)