summaryrefslogtreecommitdiff
path: root/doop.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2000-10-24 02:55:33 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-10-24 02:55:33 +0000
commitba210ebec161cde003bc967e8e460c72f71fb70c (patch)
tree7eefd78e8e365cbf64ddf49314681d17b83c3025 /doop.c
parent177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff)
downloadperl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz
Make the UTF-8 decoding stricter and more verbose when
malformation happens. This involved adding an argument to utf8_to_uv_chk(), which involved changing its prototype, and prefer STRLEN over I32 for the UTF-8 length, which as a domino effect necessitated changing the prototypes of scan_bin(), scan_oct(), scan_hex(), and reg_uni(). The stricter UTF-8 decoding checking uses Markus Kuhn's UTF-8 Decode Stress Tester from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt p4raw-id: //depot/perl@7416
Diffstat (limited to 'doop.c')
-rw-r--r--doop.c30
1 files changed, 15 insertions, 15 deletions
diff --git a/doop.c b/doop.c
index b75ffaa69f..3cd8f07b20 100644
--- a/doop.c
+++ b/doop.c
@@ -72,12 +72,12 @@ S_do_trans_simple(pTHX_ SV *sv)
Newz(0, d, len*2+1, U8);
dstart = d;
while (s < send) {
- I32 ulen;
+ STRLEN ulen;
short c;
ulen = 1;
/* Need to check this, otherwise 128..255 won't match */
- c = utf8_to_uv_chk(s, &ulen, 0);
+ c = utf8_to_uv_chk(s, send - s, &ulen, 0);
if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
matches++;
if (ch < 0x80)
@@ -122,10 +122,10 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
s += UTF8SKIP(s);
else {
UV c;
- I32 ulen;
+ STRLEN ulen;
ulen = 1;
if (hasutf)
- c = utf8_to_uv_chk(s,&ulen, 0);
+ c = utf8_to_uv_chk(s, send - s, &ulen, 0);
else
c = *s;
if (c < 0x100 && tbl[c] >= 0)
@@ -363,8 +363,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- I32 ulen;
- *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+ STRLEN ulen;
+ *d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
s += ulen;
puv = 0xfeedface;
continue;
@@ -404,8 +404,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- I32 ulen;
- *d++ = (U8)utf8_to_uv_chk(s, &ulen, 0);
+ STRLEN ulen;
+ *d++ = (U8)utf8_to_uv_chk(s, send - s, &ulen, 0);
s += ulen;
continue;
}
@@ -964,15 +964,15 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
char *dcsave = dc;
STRLEN lulen = leftlen;
STRLEN rulen = rightlen;
- I32 ulen;
+ STRLEN ulen;
switch (optype) {
case OP_BIT_AND:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc & ruc;
@@ -984,10 +984,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
break;
case OP_BIT_XOR:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc ^ ruc;
@@ -996,10 +996,10 @@ Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right)
goto mop_up_utf;
case OP_BIT_OR:
while (lulen && rulen) {
- luc = utf8_to_uv_chk((U8*)lc, &ulen, 0);
+ luc = utf8_to_uv_chk((U8*)lc, lulen, &ulen, 0);
lc += ulen;
lulen -= ulen;
- ruc = utf8_to_uv_chk((U8*)rc, &ulen, 0);
+ ruc = utf8_to_uv_chk((U8*)rc, rulen, &ulen, 0);
rc += ulen;
rulen -= ulen;
duc = luc | ruc;