diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-24 02:55:33 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-24 02:55:33 +0000 |
commit | ba210ebec161cde003bc967e8e460c72f71fb70c (patch) | |
tree | 7eefd78e8e365cbf64ddf49314681d17b83c3025 /op.c | |
parent | 177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff) | |
download | perl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz |
Make the UTF-8 decoding stricter and more verbose when
malformation happens. This involved adding an argument
to utf8_to_uv_chk(), which involved changing its prototype,
and prefer STRLEN over I32 for the UTF-8 length, which as
a domino effect necessitated changing the prototypes of
scan_bin(), scan_oct(), scan_hex(), and reg_uni().
The stricter UTF-8 decoding checking uses Markus Kuhn's
UTF-8 Decode Stress Tester from
http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
p4raw-id: //depot/perl@7416
Diffstat (limited to 'op.c')
-rw-r--r-- | op.c | 18 |
1 files changed, 11 insertions, 7 deletions
@@ -2621,7 +2621,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) SV* transv = 0; U8* tend = t + tlen; U8* rend = r + rlen; - I32 ulen; + STRLEN ulen; U32 tfirst = 1; U32 tlast = 0; I32 tdiff; @@ -2641,6 +2641,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) if (complement) { U8 tmpbuf[UTF8_MAXLEN]; U8** cp; + I32* cl; UV nextmin = 0; New(1109, cp, tlen, U8*); i = 0; @@ -2656,7 +2657,8 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) qsort(cp, i, sizeof(U8*), utf8compare); for (j = 0; j < i; j++) { U8 *s = cp[j]; - UV val = utf8_to_uv_chk(s, &ulen, 0); + I32 cur = j < i ? cp[j+1] - s : tend - s; + UV val = utf8_to_uv_chk(s, cur, &ulen, 0); s += ulen; diff = val - nextmin; if (diff > 0) { @@ -2669,7 +2671,7 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) } } if (*s == 0xff) - val = utf8_to_uv_chk(s+1, &ulen, 0); + val = utf8_to_uv_chk(s+1, cur - 1, &ulen, 0); if (val >= nextmin) nextmin = val + 1; } @@ -2696,10 +2698,11 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) while (t < tend || tfirst <= tlast) { /* see if we need more "t" chars */ if (tfirst > tlast) { - tfirst = (I32)utf8_to_uv_chk(t, &ulen, 0); + tfirst = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0); t += ulen; if (t < tend && *t == 0xff) { /* illegal utf8 val indicates range */ - tlast = (I32)utf8_to_uv_chk(++t, &ulen, 0); + t++; + tlast = (I32)utf8_to_uv_chk(t, tend - t, &ulen, 0); t += ulen; } else @@ -2709,10 +2712,11 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) /* now see if we need more "r" chars */ if (rfirst > rlast) { if (r < rend) { - rfirst = (I32)utf8_to_uv_chk(r, &ulen, 0); + rfirst = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0); r += ulen; if (r < rend && *r == 0xff) { /* illegal utf8 val indicates range */ - rlast = (I32)utf8_to_uv_chk(++r, &ulen, 0); + r++; + rlast = (I32)utf8_to_uv_chk(r, rend - r, &ulen, 0); r += ulen; } else |