Refactor is_XDIGIT_uni(), is_XDIGIT_utf8() and macros

This adds macros to regen/regcharclass.pl that are usable as part of the is_XDIGIT_foo() macros in handy.h, so that no function call need be done to handle above Latin1 input. These macros are quite small, and unlikely to grow over time. The functions that implement these in utf8.c are also changed to use the macros instead of generating a swash. This should speed things up slightly, with less memory used over time as the swash fills.
author: Karl Williamson <public@khwilliamson.com> 2012-11-19 13:55:13 -0700
committer: Karl Williamson <public@khwilliamson.com> 2012-11-19 17:13:02 -0700
commit: 4ac6419dea3d3b14ab477d0cd4d87f251b709e28 (patch)
tree: c64e3c7e180ecdf77741675b518d43e4c74ea0fc
parent: 2cafb56b1b34a7d194edbc8deedcd3e3242a2994 (diff)
download: perl-4ac6419dea3d3b14ab477d0cd4d87f251b709e28.tar.gz
4 files changed, 35 insertions, 6 deletions
diff --git a/handy.h b/handy.h
index 161e936913..80792e3c11 100644
--- a/handy.h
+++ b/handy.h
@@ -942,7 +942,7 @@ EXTCONST U32 PL_charclass[];
 #define isGRAPH_uni(c)          _generic_uni(_CC_GRAPH, is_uni_graph, c)
 #define isPRINT_uni(c)          _generic_uni(_CC_PRINT, is_uni_print, c)
 #define isPUNCT_uni(c)          _generic_uni(_CC_PUNCT, is_uni_punct, c)
-#define isXDIGIT_uni(c)         _generic_uni(_CC_XDIGIT, is_uni_xdigit, c)
+#define isXDIGIT_uni(c)         _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c)
 
 /* Posix and regular space differ only in U+000B, which is in Latin1 */
 #define isPSXSPC_uni(c)         _generic_uni(_CC_PSXSPC, is_uni_space, c)
@@ -1014,7 +1014,7 @@ EXTCONST U32 PL_charclass[];
 #define isGRAPH_utf8(p)         _generic_utf8(_CC_GRAPH, is_utf8_graph, p)
 #define isPRINT_utf8(p)         _generic_utf8(_CC_PRINT, is_utf8_print, p)
 #define isPUNCT_utf8(p)         _generic_utf8(_CC_PUNCT, is_utf8_punct, p)
-#define isXDIGIT_utf8(p)        _generic_utf8(_CC_XDIGIT, is_utf8_xdigit, p)
+#define isXDIGIT_utf8(p)        _generic_utf8(_CC_XDIGIT, is_XDIGIT_high, p)
 #define toUPPER_utf8(p,s,l)	to_utf8_upper(p,s,l)
 #define toTITLE_utf8(p,s,l)	to_utf8_title(p,s,l)
 #define toLOWER_utf8(p,s,l)	to_utf8_lower(p,s,l)
diff --git a/regcharclass.h b/regcharclass.h
index b3a24eb8f5..3bdaffa1ca 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -331,6 +331,33 @@
 ( 0x2028 == cp || 0x2029 == cp )
 
 /*
+	XDIGIT: Hexadecimal digits
+
+	\p{XDigit}
+*/
+/*** GENERATED CODE ***/
+#define is_XDIGIT_utf8(s)                                                   \
+( ( ( 0x30 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x39 ) || ( 0x41 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x46 ) || ( 0x61 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x66 ) ) ? 1\
+: ( 0xEF == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0xBC == ((U8*)s)[1] ) ?                                             \
+	( ( ( 0x90 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x99 ) || ( 0xA1 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA6 ) ) ? 3 : 0 )\
+    : ( ( 0xBD == ((U8*)s)[1] ) && ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ) ? 3 : 0 )\
+: 0 )
+
+/*** GENERATED CODE ***/
+#define is_XDIGIT_high(s)                                                   \
+( ( 0xEF == ((U8*)s)[0] ) ?                                                 \
+    ( ( 0xBC == ((U8*)s)[1] ) ?                                             \
+	( ( ( 0x90 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x99 ) || ( 0xA1 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA6 ) ) ? 3 : 0 )\
+    : ( ( 0xBD == ((U8*)s)[1] ) && ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x86 ) ) ? 3 : 0 )\
+: 0 )
+
+/*** GENERATED CODE ***/
+#define is_XDIGIT_cp_high(cp)                                               \
+( ( 0xFF10 <= cp && cp <= 0xFF19 ) || ( 0xFF19 < cp &&                      \
+( ( 0xFF21 <= cp && cp <= 0xFF26 ) || ( 0xFF41 <= cp && cp <= 0xFF46 ) ) ) )
+
+/*
 	REPLACEMENT: Unicode REPLACEMENT CHARACTER
 
 	0xFFFD
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index ee0fe5d87b..46425e4965 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -1396,6 +1396,10 @@ VERTWS: Vertical Whitespace: \v \V
 => generic UTF8 high LATIN1 cp cp_high :fast safe
 \p{VertSpace}
 
+XDIGIT: Hexadecimal digits
+=> UTF8 high cp_high :fast
+\p{XDigit}
+
 REPLACEMENT: Unicode REPLACEMENT CHARACTER
 => UTF8 :safe
 0xFFFD
diff --git a/utf8.c b/utf8.c
index 2d7eb08270..7092d0648e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1583,9 +1583,7 @@ Perl_is_uni_punct(pTHX_ UV c)
 bool
 Perl_is_uni_xdigit(pTHX_ UV c)
 {
-    U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-    uvchr_to_utf8(tmpbuf, c);
-    return is_utf8_xdigit(tmpbuf);
+    return isXDIGIT_uni(c);
 }
 
 UV
@@ -2185,7 +2183,7 @@ Perl_is_utf8_xdigit(pTHX_ const U8 *p)
 
     PERL_ARGS_ASSERT_IS_UTF8_XDIGIT;
 
-    return is_utf8_common(p, &PL_utf8_xdigit, "IsXDigit");
+    return is_XDIGIT_utf8(p);
 }
 
 bool
author	Karl Williamson <public@khwilliamson.com>	2012-11-19 13:55:13 -0700
committer	Karl Williamson <public@khwilliamson.com>	2012-11-19 17:13:02 -0700
commit	4ac6419dea3d3b14ab477d0cd4d87f251b709e28 (patch)
tree	c64e3c7e180ecdf77741675b518d43e4c74ea0fc
parent	2cafb56b1b34a7d194edbc8deedcd3e3242a2994 (diff)
download	perl-4ac6419dea3d3b14ab477d0cd4d87f251b709e28.tar.gz