summaryrefslogtreecommitdiff
path: root/regcharclass.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-11-19 13:36:56 -0700
committerKarl Williamson <public@khwilliamson.com>2012-11-19 17:13:02 -0700
commit2cafb56b1b34a7d194edbc8deedcd3e3242a2994 (patch)
treeb7811ab1d344b368a3043c52dfd55663e8d58b1e /regcharclass.h
parent840f8e9207353b89e0c2790e03fe16f44071615b (diff)
downloadperl-2cafb56b1b34a7d194edbc8deedcd3e3242a2994.tar.gz
Refactor is_BLANK_uni() and is_BLANK_utf8() macros
This adds macros to regen/regcharclass.pl that are usable as part of the is_BLANK_foo() macros in handy.h, so that no function call need be done to handle above Latin1 input. These macros are quite small, and unlikely to grow over time, as Unicode has mostly finished adding white space equivalents to the Standard. The functions that implement these in utf8.c are also changed to use the macros instead of generating a swash. This should speed things up slightly, with less memory used over time as the swash fills.
Diffstat (limited to 'regcharclass.h')
-rw-r--r--regcharclass.h34
1 files changed, 34 insertions, 0 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 497d0914cd..b3a24eb8f5 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -202,6 +202,32 @@
: 0 )
/*** GENERATED CODE ***/
+#define is_HORIZWS_high(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )
+
+/*** GENERATED CODE ***/
+#define is_HORIZWS_high_safe(s,e) \
+( ((e)-(s) > 2) ? \
+ ( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
+: 0 )
+
+/*** GENERATED CODE ***/
#define is_HORIZWS_cp(cp) \
( 0x09 == cp || ( 0x09 < cp && \
( 0x20 == cp || ( 0x20 < cp && \
@@ -212,6 +238,14 @@
( 0x202F == cp || ( 0x202F < cp && \
( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+/*** GENERATED CODE ***/
+#define is_HORIZWS_cp_high(cp) \
+( 0x1680 == cp || ( 0x1680 < cp && \
+( 0x180E == cp || ( 0x180E < cp && \
+( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \
+( 0x202F == cp || ( 0x202F < cp && \
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )
+
/*
VERTWS: Vertical Whitespace: \v \V