summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-10-24 02:55:33 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-10-24 02:55:33 +0000
commitba210ebec161cde003bc967e8e460c72f71fb70c (patch)
tree7eefd78e8e365cbf64ddf49314681d17b83c3025 /pp.c
parent177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff)
downloadperl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz
Make the UTF-8 decoding stricter and more verbose when
malformation happens. This involved adding an argument to utf8_to_uv_chk(), which involved changing its prototype, and prefer STRLEN over I32 for the UTF-8 length, which as a domino effect necessitated changing the prototypes of scan_bin(), scan_oct(), scan_hex(), and reg_uni(). The stricter UTF-8 decoding checking uses Markus Kuhn's UTF-8 Decode Stress Tester from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt p4raw-id: //depot/perl@7416
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c36
1 files changed, 18 insertions, 18 deletions
diff --git a/pp.c b/pp.c
index 98d31cb05e..35f5956829 100644
--- a/pp.c
+++ b/pp.c
@@ -1480,7 +1480,7 @@ PP(pp_complement)
STRLEN targlen = 0;
U8 *result;
U8 *send;
- I32 l;
+ STRLEN l;
send = tmps + len;
while (tmps < send) {
@@ -1944,7 +1944,7 @@ PP(pp_hex)
{
djSP; dTARGET;
char *tmps;
- I32 argtype;
+ STRLEN argtype;
STRLEN n_a;
tmps = POPpx;
@@ -1957,7 +1957,7 @@ PP(pp_oct)
{
djSP; dTARGET;
NV value;
- I32 argtype;
+ STRLEN argtype;
char *tmps;
STRLEN n_a;
@@ -2234,13 +2234,13 @@ PP(pp_ord)
{
djSP; dTARGET;
UV value;
- STRLEN n_a;
SV *tmpsv = POPs;
- U8 *tmps = (U8*)SvPVx(tmpsv,n_a);
- I32 retlen;
+ STRLEN len;
+ U8 *tmps = (U8*)SvPVx(tmpsv, len);
+ STRLEN retlen;
if ((*tmps & 0x80) && DO_UTF8(tmpsv))
- value = utf8_to_uv_chk(tmps, &retlen, 0);
+ value = utf8_to_uv_chk(tmps, len, &retlen, 0);
else
value = (UV)(*tmps & 255);
XPUSHu(value);
@@ -2304,10 +2304,10 @@ PP(pp_ucfirst)
STRLEN slen;
if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
- I32 ulen;
+ STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN];
U8 *tend;
- UV uv = utf8_to_uv_chk(s, &ulen, 0);
+ UV uv = utf8_to_uv_chk(s, slen, &ulen, 0);
if (PL_op->op_private & OPpLOCALE) {
TAINT;
@@ -2363,10 +2363,10 @@ PP(pp_lcfirst)
STRLEN slen;
if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
- I32 ulen;
+ STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN];
U8 *tend;
- UV uv = utf8_to_uv_chk(s, &ulen, 0);
+ UV uv = utf8_to_uv_chk(s, slen, &ulen, 0);
if (PL_op->op_private & OPpLOCALE) {
TAINT;
@@ -2423,7 +2423,7 @@ PP(pp_uc)
if (DO_UTF8(sv)) {
dTARGET;
- I32 ulen;
+ STRLEN ulen;
register U8 *d;
U8 *send;
@@ -2443,7 +2443,7 @@ PP(pp_uc)
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, &ulen, 0)));
+ d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0)));
s += ulen;
}
}
@@ -2497,7 +2497,7 @@ PP(pp_lc)
if (DO_UTF8(sv)) {
dTARGET;
- I32 ulen;
+ STRLEN ulen;
register U8 *d;
U8 *send;
@@ -2517,7 +2517,7 @@ PP(pp_lc)
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, &ulen, 0)));
+ d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv_chk(s, len, &ulen, 0)));
s += ulen;
}
}
@@ -3363,7 +3363,7 @@ PP(pp_unpack)
/* These must not be in registers: */
I16 ashort;
int aint;
- I32 along;
+ STRLEN along;
#ifdef HAS_QUAD
Quad_t aquad;
#endif
@@ -3659,7 +3659,7 @@ PP(pp_unpack)
len = strend - s;
if (checksum) {
while (len-- > 0 && s < strend) {
- auint = utf8_to_uv_chk((U8*)s, &along, 0);
+ auint = utf8_to_uv_chk((U8*)s, strend - s, &along, 0);
s += along;
if (checksum > 32)
cdouble += (NV)auint;
@@ -3671,7 +3671,7 @@ PP(pp_unpack)
EXTEND(SP, len);
EXTEND_MORTAL(len);
while (len-- > 0 && s < strend) {
- auint = utf8_to_uv_chk((U8*)s, &along, 0);
+ auint = utf8_to_uv_chk((U8*)s, strend - s, &along, 0);
s += along;
sv = NEWSV(37, 0);
sv_setuv(sv, (UV)auint);