diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-24 02:55:33 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-24 02:55:33 +0000 |
commit | ba210ebec161cde003bc967e8e460c72f71fb70c (patch) | |
tree | 7eefd78e8e365cbf64ddf49314681d17b83c3025 /pp.c | |
parent | 177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff) | |
download | perl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz |
Make the UTF-8 decoding stricter and more verbose when
malformation happens. This involved adding an argument
to utf8_to_uv_chk(), which involved changing its prototype,
and prefer STRLEN over I32 for the UTF-8 length, which as
a domino effect necessitated changing the prototypes of
scan_bin(), scan_oct(), scan_hex(), and reg_uni().
The stricter UTF-8 decoding checking uses Markus Kuhn's
UTF-8 Decode Stress Tester from
http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
p4raw-id: //depot/perl@7416
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 36 |
1 files changed, 18 insertions, 18 deletions
@@ -1480,7 +1480,7 @@ PP(pp_complement) STRLEN targlen = 0; U8 *result; U8 *send; - I32 l; + STRLEN l; send = tmps + len; while (tmps < send) { @@ -1944,7 +1944,7 @@ PP(pp_hex) { djSP; dTARGET; char *tmps; - I32 argtype; + STRLEN argtype; STRLEN n_a; tmps = POPpx; @@ -1957,7 +1957,7 @@ PP(pp_oct) { djSP; dTARGET; NV value; - I32 argtype; + STRLEN argtype; char *tmps; STRLEN n_a; @@ -2234,13 +2234,13 @@ PP(pp_ord) { djSP; dTARGET; UV value; - STRLEN n_a; SV *tmpsv = POPs; - U8 *tmps = (U8*)SvPVx(tmpsv,n_a); - I32 retlen; + STRLEN len; + U8 *tmps = (U8*)SvPVx(tmpsv, len); + STRLEN retlen; if ((*tmps & 0x80) && DO_UTF8(tmpsv)) - value = utf8_to_uv_chk(tmps, &retlen, 0); + value = utf8_to_uv_chk(tmps, len, &retlen, 0); else value = (UV)(*tmps & 255); XPUSHu(value); @@ -2304,10 +2304,10 @@ PP(pp_ucfirst) STRLEN slen; if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { - I32 ulen; + STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN]; U8 *tend; - UV uv = utf8_to_uv_chk(s, &ulen, 0); + UV uv = utf8_to_uv_chk(s, slen, &ulen, 0); if (PL_op->op_private & OPpLOCALE) { TAINT; @@ -2363,10 +2363,10 @@ PP(pp_lcfirst) STRLEN slen; if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { - I32 ulen; + STRLEN ulen; U8 tmpbuf[UTF8_MAXLEN]; U8 *tend; - UV uv = utf8_to_uv_chk(s, &ulen, 0); + UV uv = utf8_to_uv_chk(s, slen, &ulen, 0); if (PL_op->op_private & OPpLOCALE) { TAINT; @@ -2423,7 +2423,7 @@ PP(pp_uc) if (DO_UTF8(sv)) { dTARGET; - I32 ulen; + STRLEN ulen; register U8 *d; U8 *send; @@ -2443,7 +2443,7 @@ PP(pp_uc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, &ulen, 0))); + d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0))); s += ulen; } } @@ -2497,7 +2497,7 @@ PP(pp_lc) if (DO_UTF8(sv)) { dTARGET; - I32 ulen; + STRLEN ulen; register U8 *d; U8 *send; @@ -2517,7 +2517,7 @@ PP(pp_lc) TAINT; SvTAINTED_on(TARG); while (s < send) { - d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, &ulen, 0))); + d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0))); s += ulen; } } @@ -3363,7 +3363,7 @@ PP(pp_unpack) /* These must not be in registers: */ I16 ashort; int aint; - I32 along; + STRLEN along; #ifdef HAS_QUAD Quad_t aquad; #endif @@ -3659,7 +3659,7 @@ PP(pp_unpack) len = strend - s; if (checksum) { while (len-- > 0 && s < strend) { - auint = utf8_to_uv_chk((U8*)s, &along, 0); + auint = utf8_to_uv_chk((U8*)s, strend - s, &along, 0); s += along; if (checksum > 32) cdouble += (NV)auint; @@ -3671,7 +3671,7 @@ PP(pp_unpack) EXTEND(SP, len); EXTEND_MORTAL(len); while (len-- > 0 && s < strend) { - auint = utf8_to_uv_chk((U8*)s, &along, 0); + auint = utf8_to_uv_chk((U8*)s, strend - s, &along, 0); s += along; sv = NEWSV(37, 0); sv_setuv(sv, (UV)auint); |