diff options
-rw-r--r-- | handy.h | 4 | ||||
-rw-r--r-- | regcharclass.h | 34 | ||||
-rwxr-xr-x | regen/regcharclass.pl | 2 | ||||
-rw-r--r-- | utf8.c | 6 |
4 files changed, 39 insertions, 7 deletions
@@ -926,7 +926,7 @@ EXTCONST U32 PL_charclass[]; #define isWORDCHAR_uni(c) _generic_uni(_CC_WORDCHAR, is_uni_alnum, c) #define isALNUM_uni(c) isWORDCHAR_uni(c) -#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_uni_blank, c) +#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c) #define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, is_uni_idfirst, c) #define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c) #define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_uni_space, c) @@ -1000,7 +1000,7 @@ EXTCONST U32 PL_charclass[]; #define isIDCONT_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p) #define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p) -#define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_utf8_blank, p) +#define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_HORIZWS_high, p) #define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_utf8_space, p) #define isVERTWS_utf8(p) _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p) #define isDIGIT_utf8(p) _generic_utf8(_CC_DIGIT, is_utf8_digit, p) diff --git a/regcharclass.h b/regcharclass.h index 497d0914cd..b3a24eb8f5 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -202,6 +202,32 @@ : 0 ) /*** GENERATED CODE ***/ +#define is_HORIZWS_high(s) \ +( ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ +: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 ) + +/*** GENERATED CODE ***/ +#define is_HORIZWS_high_safe(s,e) \ +( ((e)-(s) > 2) ? \ + ( ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0x9A == ((U8*)s)[1] ) ? \ + ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \ + : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0xE2 == ((U8*)s)[0] ) ? \ + ( ( 0x80 == ((U8*)s)[1] ) ? \ + ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\ + : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\ +: 0 ) + +/*** GENERATED CODE ***/ #define is_HORIZWS_cp(cp) \ ( 0x09 == cp || ( 0x09 < cp && \ ( 0x20 == cp || ( 0x20 < cp && \ @@ -212,6 +238,14 @@ ( 0x202F == cp || ( 0x202F < cp && \ ( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) +/*** GENERATED CODE ***/ +#define is_HORIZWS_cp_high(cp) \ +( 0x1680 == cp || ( 0x1680 < cp && \ +( 0x180E == cp || ( 0x180E < cp && \ +( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \ +( 0x202F == cp || ( 0x202F < cp && \ +( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) + /* VERTWS: Vertical Whitespace: \v \V diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 830a014fed..ee0fe5d87b 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1389,7 +1389,7 @@ LNBREAK: Line Break: \R \p{VertSpace} HORIZWS: Horizontal Whitespace: \h \H -=> generic UTF8 LATIN1 cp :fast safe +=> generic UTF8 LATIN1 high cp cp_high :fast safe \p{HorizSpace} VERTWS: Vertical Whitespace: \v \V @@ -1515,9 +1515,7 @@ Perl_is_uni_ascii(pTHX_ UV c) bool Perl_is_uni_blank(pTHX_ UV c) { - U8 tmpbuf[UTF8_MAXBYTES+1]; - uvchr_to_utf8(tmpbuf, c); - return is_utf8_blank(tmpbuf); + return isBLANK_uni(c); } bool @@ -2061,7 +2059,7 @@ Perl_is_utf8_blank(pTHX_ const U8 *p) PERL_ARGS_ASSERT_IS_UTF8_BLANK; - return is_utf8_common(p, &PL_utf8_blank, "XPosixBlank"); + return isBLANK_utf8(p); } bool |