summaryrefslogtreecommitdiff
path: root/op.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-10-24 02:55:33 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-10-24 02:55:33 +0000
commitba210ebec161cde003bc967e8e460c72f71fb70c (patch)
tree7eefd78e8e365cbf64ddf49314681d17b83c3025 /op.c
parent177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff)
downloadperl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz
Make the UTF-8 decoding stricter and more verbose when
malformation happens. This involved adding an argument to utf8_to_uv_chk(), which involved changing its prototype, and prefer STRLEN over I32 for the UTF-8 length, which as a domino effect necessitated changing the prototypes of scan_bin(), scan_oct(), scan_hex(), and reg_uni(). The stricter UTF-8 decoding checking uses Markus Kuhn's UTF-8 Decode Stress Tester from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt p4raw-id: //depot/perl@7416
Diffstat (limited to 'op.c')
-rw-r--r--op.c18
1 files changed, 11 insertions, 7 deletions
diff --git a/op.c b/op.c
index 6ef4bfe777..9e256a3537 100644
--- a/op.c
+++ b/op.c
@@ -2621,7 +2621,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
SV* transv = 0;
U8* tend = t + tlen;
U8* rend = r + rlen;
- I32 ulen;
+ STRLEN ulen;
U32 tfirst = 1;
U32 tlast = 0;
I32 tdiff;
@@ -2641,6 +2641,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
if (complement) {
U8 tmpbuf[UTF8_MAXLEN];
U8** cp;
+ I32* cl;
UV nextmin = 0;
New(1109, cp, tlen, U8*);
i = 0;
@@ -2656,7 +2657,8 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
qsort(cp, i, sizeof(U8*), utf8compare);
for (j = 0; j < i; j++) {
U8 *s = cp[j];
- UV val = utf8_to_uv_chk(s, &ulen, 0);
+ I32 cur = j < i ? cp[j+1] - s : tend - s;
+ UV val = utf8_to_uv_chk(s, cur, &ulen, 0);
s += ulen;
diff = val - nextmin;
if (diff > 0) {
@@ -2669,7 +2671,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
}
}
if (*s == 0xff)
- val = utf8_to_uv_chk(s+1, &ulen, 0);
+ val = utf8_to_uv_chk(s+1, cur - 1, &ulen, 0);
if (val >= nextmin)
nextmin = val + 1;
}
@@ -2696,10 +2698,11 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
while (t < tend || tfirst <= tlast) {
/* see if we need more "t" chars */
if (tfirst > tlast) {
- tfirst = (I32)utf8_to_uv_chk(t, &ulen, 0);
+ tfirst = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0);
t += ulen;
if (t < tend && *t == 0xff) { /* illegal utf8 val indicates range */
- tlast = (I32)utf8_to_uv_chk(++t, &ulen, 0);
+ t++;
+ tlast = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0);
t += ulen;
}
else
@@ -2709,10 +2712,11 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
/* now see if we need more "r" chars */
if (rfirst > rlast) {
if (r < rend) {
- rfirst = (I32)utf8_to_uv_chk(r, &ulen, 0);
+ rfirst = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0);
r += ulen;
if (r < rend && *r == 0xff) { /* illegal utf8 val indicates range */
- rlast = (I32)utf8_to_uv_chk(++r, &ulen, 0);
+ r++;
+ rlast = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0);
r += ulen;
}
else