diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-24 02:55:33 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-24 02:55:33 +0000 |
commit | ba210ebec161cde003bc967e8e460c72f71fb70c (patch) | |
tree | 7eefd78e8e365cbf64ddf49314681d17b83c3025 /handy.h | |
parent | 177b92d2814bfc842f28f277e0a2f353c652a5e3 (diff) | |
download | perl-ba210ebec161cde003bc967e8e460c72f71fb70c.tar.gz |
Make the UTF-8 decoding stricter and more verbose when
malformation happens. This involved adding an argument
to utf8_to_uv_chk(), which involved changing its prototype,
and prefer STRLEN over I32 for the UTF-8 length, which as
a domino effect necessitated changing the prototypes of
scan_bin(), scan_oct(), scan_hex(), and reg_uni().
The stricter UTF-8 decoding checking uses Markus Kuhn's
UTF-8 Decode Stress Tester from
http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
p4raw-id: //depot/perl@7416
Diffstat (limited to 'handy.h')
-rw-r--r-- | handy.h | 32 |
1 files changed, 17 insertions, 15 deletions
@@ -448,21 +448,23 @@ Converts the specified character to lowercase. #define isPSXSPC_utf8(c) (isSPACE_utf8(c) ||(c) == '\f') #define isBLANK_utf8(c) isBLANK(c) /* could be wrong */ -#define isALNUM_LC_utf8(p) isALNUM_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isALPHA_LC_utf8(p) isALPHA_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isSPACE_LC_utf8(p) isSPACE_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isDIGIT_LC_utf8(p) isDIGIT_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isUPPER_LC_utf8(p) isUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isLOWER_LC_utf8(p) isLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isALNUMC_LC_utf8(p) isALNUMC_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isCNTRL_LC_utf8(p) isCNTRL_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isGRAPH_LC_utf8(p) isGRAPH_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isPRINT_LC_utf8(p) isPRINT_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define isPUNCT_LC_utf8(p) isPUNCT_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define toUPPER_LC_utf8(p) toUPPER_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define toTITLE_LC_utf8(p) toTITLE_LC_uni(utf8_to_uv_chk(p, 0, 0)) -#define toLOWER_LC_utf8(p) toLOWER_LC_uni(utf8_to_uv_chk(p, 0, 0)) +#define STRLEN_MAX ((STRLEN)-1) + +#define isALNUM_LC_utf8(p) isALNUM_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isALPHA_LC_utf8(p) isALPHA_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isSPACE_LC_utf8(p) isSPACE_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isDIGIT_LC_utf8(p) isDIGIT_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isUPPER_LC_utf8(p) isUPPER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isLOWER_LC_utf8(p) isLOWER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isALNUMC_LC_utf8(p) isALNUMC_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isCNTRL_LC_utf8(p) isCNTRL_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isGRAPH_LC_utf8(p) isGRAPH_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isPRINT_LC_utf8(p) isPRINT_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define isPUNCT_LC_utf8(p) isPUNCT_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define toUPPER_LC_utf8(p) toUPPER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define toTITLE_LC_utf8(p) toTITLE_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) +#define toLOWER_LC_utf8(p) toLOWER_LC_uni(utf8_to_uv_chk(p, STRLEN_MAX, 0, 0)) #define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f') #define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */ |