diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-11-19 14:24:29 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-11-19 17:13:02 -0700 |
commit | bedac28b0a4795ca4264d391bfdd7dd3190f80d3 (patch) | |
tree | 37f911d99ef79040bc0bd8837e60ca89d84a7156 /regcharclass.h | |
parent | 4ac6419dea3d3b14ab477d0cd4d87f251b709e28 (diff) | |
download | perl-bedac28b0a4795ca4264d391bfdd7dd3190f80d3.tar.gz |
regexec.c: Use SPACE macros instead of swash
This will avoid loading a swash when an above Latin1 code point is
tested to see if it matches \s. The SPACE macro is quite small, and
unlikely to grow over time, as Unicode has mostly finished adding white
space equivalents to the Standard.
The CCC_TRY_U macro in regexec.c could not be used for this, and I just
expanded out what it would generate, modified to use the macro instead
of a swash.
Diffstat (limited to 'regcharclass.h')
-rw-r--r-- | regcharclass.h | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/regcharclass.h b/regcharclass.h index 3bdaffa1ca..64e4453e58 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -358,6 +358,65 @@ ( ( 0xFF21 <= cp && cp <= 0xFF26 ) || ( 0xFF41 <= cp && cp <= 0xFF46 ) ) ) ) /* + XPERLSPACE: \p{XPerlSpace} + + \p{XPerlSpace} +*/ +/*** GENERATED CODE ***/ +#define is_XPERLSPACE(s,is_utf8) \ +( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] ) ? 1\ +: ( is_utf8 ) ? \ + ( ( 0xC2 == ((U8*)s)[0] ) ? \ + ( ( 0x85 == ((U8*)s)[1] || 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\ +: ( 0x85 == ((U8*)s)[0] || 0xA0 == ((U8*)s)[0] ) ) + +/*** GENERATED CODE ***/ +#define is_XPERLSPACE_utf8(s) \ +( ( ( 0x09 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x20 == ((U8*)s)[0] ) ? 1\ +: ( 0xC2 == ((U8*)s)[0] ) ? \ + ( ( 0x85 == ((U8*)s)[1] || 0xA0 == ((U8*)s)[1] ) ? 2 : 0 ) \ +: ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) + +/*** GENERATED CODE ***/ +#define is_XPERLSPACE_high(s) \ +( ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( ((U8*)s)[2] <= 0x8A ) || ( ((U8*)s)[2] & 0xFE ) == 0xA8 || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) + +/*** GENERATED CODE ***/ +#define is_XPERLSPACE_cp_high(cp) \ +( 0x1680 == cp || ( 0x1680 < cp && \ +( 0x180E == cp || ( 0x180E < cp && \ +( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \ +( 0x2028 == cp || ( 0x2028 < cp && \ +( 0x2029 == cp || ( 0x2029 < cp && \ +( 0x202F == cp || ( 0x202F < cp && \ +( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) + +/* REPLACEMENT: Unicode REPLACEMENT CHARACTER 0xFFFD |