diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-11-19 13:55:13 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-11-19 17:13:02 -0700 |
commit | 4ac6419dea3d3b14ab477d0cd4d87f251b709e28 (patch) | |
tree | c64e3c7e180ecdf77741675b518d43e4c74ea0fc | |
parent | 2cafb56b1b34a7d194edbc8deedcd3e3242a2994 (diff) | |
download | perl-4ac6419dea3d3b14ab477d0cd4d87f251b709e28.tar.gz |
Refactor is_XDIGIT_uni(), is_XDIGIT_utf8() and macros
This adds macros to regen/regcharclass.pl that are usable as part of the
is_XDIGIT_foo() macros in handy.h, so that no function call need be done
to handle above Latin1 input. These macros are quite small, and
unlikely to grow over time. The functions that implement these in
utf8.c are also changed to use the macros instead of generating a swash.
This should speed things up slightly, with less memory used over time as
the swash fills.
-rw-r--r-- | handy.h | 4 | ||||
-rw-r--r-- | regcharclass.h | 27 | ||||
-rwxr-xr-x | regen/regcharclass.pl | 4 | ||||
-rw-r--r-- | utf8.c | 6 |
4 files changed, 35 insertions, 6 deletions
@@ -942,7 +942,7 @@ EXTCONST U32 PL_charclass[]; #define isGRAPH_uni(c) _generic_uni(_CC_GRAPH, is_uni_graph, c) #define isPRINT_uni(c) _generic_uni(_CC_PRINT, is_uni_print, c) #define isPUNCT_uni(c) _generic_uni(_CC_PUNCT, is_uni_punct, c) -#define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_uni_xdigit, c) +#define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c) /* Posix and regular space differ only in U+000B, which is in Latin1 */ #define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, is_uni_space, c) @@ -1014,7 +1014,7 @@ EXTCONST U32 PL_charclass[]; #define isGRAPH_utf8(p) _generic_utf8(_CC_GRAPH, is_utf8_graph, p) #define isPRINT_utf8(p) _generic_utf8(_CC_PRINT, is_utf8_print, p) #define isPUNCT_utf8(p) _generic_utf8(_CC_PUNCT, is_utf8_punct, p) -#define isXDIGIT_utf8(p) _generic_utf8(_CC_XDIGIT, is_utf8_xdigit, p) +#define isXDIGIT_utf8(p) _generic_utf8(_CC_XDIGIT, is_XDIGIT_high, p) #define toUPPER_utf8(p,s,l) to_utf8_upper(p,s,l) #define toTITLE_utf8(p,s,l) to_utf8_title(p,s,l) #define toLOWER_utf8(p,s,l) to_utf8_lower(p,s,l) diff --git a/regcharclass.h b/regcharclass.h index b3a24eb8f5..3bdaffa1ca 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -331,6 +331,33 @@ ( 0x2028 == cp || 0x2029 == cp ) /* + XDIGIT: Hexadecimal digits + + \p{XDigit} +*/ +/*** GENERATED CODE ***/ +#define is_XDIGIT_utf8(s) \ +( ( ( 0x30 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x39 ) || ( 0x41 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x46 ) || ( 0x61 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x66 ) ) ? 1\ +: ( 0xEF == ((U8*)s)[0] ) ? \ + ( ( 0xBC == ((U8*)s)[1] ) ? \ + ( ( ( 0x90 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x99 ) || ( 0xA1 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA6 ) ) ? 3 : 0 )\ + : ( ( 0xBD == ((U8*)s)[1] ) && ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ) ? 3 : 0 )\ +: 0 ) + +/*** GENERATED CODE ***/ +#define is_XDIGIT_high(s) \ +( ( 0xEF == ((U8*)s)[0] ) ? \ + ( ( 0xBC == ((U8*)s)[1] ) ? \ + ( ( ( 0x90 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x99 ) || ( 0xA1 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA6 ) ) ? 3 : 0 )\ + : ( ( 0xBD == ((U8*)s)[1] ) && ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ) ? 3 : 0 )\ +: 0 ) + +/*** GENERATED CODE ***/ +#define is_XDIGIT_cp_high(cp) \ +( ( 0xFF10 <= cp && cp <= 0xFF19 ) || ( 0xFF19 < cp && \ +( ( 0xFF21 <= cp && cp <= 0xFF26 ) || ( 0xFF41 <= cp && cp <= 0xFF46 ) ) ) ) + +/* REPLACEMENT: Unicode REPLACEMENT CHARACTER 0xFFFD diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index ee0fe5d87b..46425e4965 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1396,6 +1396,10 @@ VERTWS: Vertical Whitespace: \v \V => generic UTF8 high LATIN1 cp cp_high :fast safe \p{VertSpace} +XDIGIT: Hexadecimal digits +=> UTF8 high cp_high :fast +\p{XDigit} + REPLACEMENT: Unicode REPLACEMENT CHARACTER => UTF8 :safe 0xFFFD @@ -1583,9 +1583,7 @@ Perl_is_uni_punct(pTHX_ UV c) bool Perl_is_uni_xdigit(pTHX_ UV c) { - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - uvchr_to_utf8(tmpbuf, c); - return is_utf8_xdigit(tmpbuf); + return isXDIGIT_uni(c); } UV @@ -2185,7 +2183,7 @@ Perl_is_utf8_xdigit(pTHX_ const U8 *p) PERL_ARGS_ASSERT_IS_UTF8_XDIGIT; - return is_utf8_common(p, &PL_utf8_xdigit, "IsXDigit"); + return is_XDIGIT_utf8(p); } bool |