summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-11-19 14:24:29 -0700
committerKarl Williamson <public@khwilliamson.com>2012-11-19 17:13:02 -0700
commitbedac28b0a4795ca4264d391bfdd7dd3190f80d3 (patch)
tree37f911d99ef79040bc0bd8837e60ca89d84a7156 /regcharclass.h
parent4ac6419dea3d3b14ab477d0cd4d87f251b709e28 (diff)
downloadperl-bedac28b0a4795ca4264d391bfdd7dd3190f80d3.tar.gz
regexec.c: Use SPACE macros instead of swash
This will avoid loading a swash when an above Latin1 code point is tested to see if it matches \s. The SPACE macro is quite small, and unlikely to grow over time, as Unicode has mostly finished adding white space equivalents to the Standard. The CCC_TRY_U macro in regexec.c could not be used for this, and I just expanded out what it would generate, modified to use the macro instead of a swash.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h59
1 files changed, 59 insertions, 0 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 3bdaffa1ca..64e4453e58 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -358,6 +358,65 @@
( ( 0xFF21 <= cp && cp <= 0xFF26 ) || ( 0xFF41 <= cp && cp <= 0xFF46 ) ) ) )
/*
+ XPERLSPACE: \p{XPerlSpace}
+
+ \p{XPerlSpace}
+*/
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE(s,is_utf8) \
+( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] ) ? 1\
+: ( is_utf8 ) ? \
+ ( ( 0xC2 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] || 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
+: ( 0x85 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] ) )
+
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE_utf8(s) \
+( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] ) ? 1\
+: ( 0xC2 == ((U8*)s)[0] ) ? \
+ ( ( 0x85 == ((U8*)s)[1] || 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \
+: ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )
+
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE_high(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )
+
+/*** GENERATED CODE ***/
+#define is_XPERLSPACE_cp_high(cp) \
+( 0x1680 == cp || ( 0x1680 < cp && \
+( 0x180E == cp || ( 0x180E < cp && \
+( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \
+( 0x2028 == cp || ( 0x2028 < cp && \
+( 0x2029 == cp || ( 0x2029 < cp && \
+( 0x202F == cp || ( 0x202F < cp && \
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) )
+
+/*
REPLACEMENT: Unicode REPLACEMENT CHARACTER
0xFFFD