diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-11-19 13:36:56 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-11-19 17:13:02 -0700 |
commit | 2cafb56b1b34a7d194edbc8deedcd3e3242a2994 (patch) | |
tree | b7811ab1d344b368a3043c52dfd55663e8d58b1e /regcharclass.h | |
parent | 840f8e9207353b89e0c2790e03fe16f44071615b (diff) | |
download | perl-2cafb56b1b34a7d194edbc8deedcd3e3242a2994.tar.gz |
Refactor is_BLANK_uni() and is_BLANK_utf8() macros
This adds macros to regen/regcharclass.pl that are usable as part of the
is_BLANK_foo() macros in handy.h, so that no function call need be done
to handle above Latin1 input. These macros are quite small, and
unlikely to grow over time, as Unicode has mostly finished adding white
space equivalents to the Standard. The functions that implement these
in utf8.c are also changed to use the macros instead of generating a
swash. This should speed things up slightly, with less memory used over
time as the swash fills.
Diffstat (limited to 'regcharclass.h')
-rw-r--r-- | regcharclass.h | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/regcharclass.h b/regcharclass.h index 497d0914cd..b3a24eb8f5 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -202,6 +202,32 @@ : 0 ) /*** GENERATED CODE ***/ +#define is_HORIZWS_high(s) \ +( ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) + +/*** GENERATED CODE ***/ +#define is_HORIZWS_high_safe(s,e) \ +( ((e)-(s) > 2) ? \ + ( ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\ +: 0 ) + +/*** GENERATED CODE ***/ #define is_HORIZWS_cp(cp) \ ( 0x09 == cp || ( 0x09 < cp && \ ( 0x20 == cp || ( 0x20 < cp && \ @@ -212,6 +238,14 @@ ( 0x202F == cp || ( 0x202F < cp && \ ( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) +/*** GENERATED CODE ***/ +#define is_HORIZWS_cp_high(cp) \ +( 0x1680 == cp || ( 0x1680 < cp && \ +( 0x180E == cp || ( 0x180E < cp && \ +( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \ +( 0x202F == cp || ( 0x202F < cp && \ +( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) + /* VERTWS: Vertical Whitespace: \v \V |