Make the UTF-8 decoding stricter and more verbose when

malformation happens. This involved adding an argument to utf8_to_uv_chk(), which involved changing its prototype, and prefer STRLEN over I32 for the UTF-8 length, which as a domino effect necessitated changing the prototypes of scan_bin(), scan_oct(), scan_hex(), and reg_uni(). The stricter UTF-8 decoding checking uses Markus Kuhn's UTF-8 Decode Stress Tester from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt p4raw-id: //depot/perl@7416
author: Jarkko Hietaniemi <jhi@iki.fi> 2000-10-24 02:55:33 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2000-10-24 02:55:33 +0000
commit: ba210ebec161cde003bc967e8e460c72f71fb70c (patch)
tree: 7eefd78e8e365cbf64ddf49314681d17b83c3025 /handy.h
parent: 177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff)
download: perl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz
1 files changed, 17 insertions, 15 deletions
diff --git a/handy.h b/handy.h
index f0e39af51d..73410123fb 100644
--- a/handy.h
+++ b/handy.h
@@ -448,21 +448,23 @@ Converts the specified character to lowercase.
 #define isPSXSPC_utf8(c)	(isSPACE_utf8(c) ||(c) == '\f')
 #define isBLANK_utf8(c)		isBLANK(c) /* could be wrong */
 
-#define isALNUM_LC_utf8(p)	isALNUM_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isIDFIRST_LC_utf8(p)	isIDFIRST_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isALPHA_LC_utf8(p)	isALPHA_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isSPACE_LC_utf8(p)	isSPACE_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isDIGIT_LC_utf8(p)	isDIGIT_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isUPPER_LC_utf8(p)	isUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isLOWER_LC_utf8(p)	isLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isALNUMC_LC_utf8(p)	isALNUMC_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isCNTRL_LC_utf8(p)	isCNTRL_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isGRAPH_LC_utf8(p)	isGRAPH_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isPRINT_LC_utf8(p)	isPRINT_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define isPUNCT_LC_utf8(p)	isPUNCT_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define toUPPER_LC_utf8(p)	toUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define toTITLE_LC_utf8(p)	toTITLE_LC_uni(utf8_to_uv_chk(p, 0, 0))
-#define toLOWER_LC_utf8(p)	toLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0))
+#define STRLEN_MAX	((STRLEN)-1)
+
+#define isALNUM_LC_utf8(p)	isALNUM_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isIDFIRST_LC_utf8(p)	isIDFIRST_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isALPHA_LC_utf8(p)	isALPHA_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isSPACE_LC_utf8(p)	isSPACE_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isDIGIT_LC_utf8(p)	isDIGIT_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isUPPER_LC_utf8(p)	isUPPER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isLOWER_LC_utf8(p)	isLOWER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isALNUMC_LC_utf8(p)	isALNUMC_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isCNTRL_LC_utf8(p)	isCNTRL_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isGRAPH_LC_utf8(p)	isGRAPH_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isPRINT_LC_utf8(p)	isPRINT_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define isPUNCT_LC_utf8(p)	isPUNCT_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define toUPPER_LC_utf8(p)	toUPPER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define toTITLE_LC_utf8(p)	toTITLE_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
+#define toLOWER_LC_utf8(p)	toLOWER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0))
 
 #define isPSXSPC_LC_utf8(c)	(isSPACE_LC_utf8(c) ||(c) == '\f')
 #define isBLANK_LC_utf8(c)	isBLANK(c) /* could be wrong */
author	Jarkko Hietaniemi <jhi@iki.fi>	2000-10-24 02:55:33 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2000-10-24 02:55:33 +0000
commit	ba210ebec161cde003bc967e8e460c72f71fb70c (patch)
tree	7eefd78e8e365cbf64ddf49314681d17b83c3025 /handy.h
parent	177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff)
download	perl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz