summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--handy.h4
-rw-r--r--regcharclass.h34
-rwxr-xr-xregen/regcharclass.pl2
-rw-r--r--utf8.c6
4 files changed, 39 insertions, 7 deletions
diff --git a/handy.h b/handy.h
index e657919e44..161e936913 100644
--- a/handy.h
+++ b/handy.h
@@ -926,7 +926,7 @@ EXTCONST U32 PL_charclass[];
#define isWORDCHAR_uni(c) _generic_uni(_CC_WORDCHAR, is_uni_alnum, c)
#define isALNUM_uni(c) isWORDCHAR_uni(c)
-#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_uni_blank, c)
+#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
#define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, is_uni_idfirst, c)
#define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c)
#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_uni_space, c)
@@ -1000,7 +1000,7 @@ EXTCONST U32 PL_charclass[];
#define isIDCONT_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p)
#define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
-#define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_utf8_blank, p)
+#define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_HORIZWS_high, p)
#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_utf8_space, p)
#define isVERTWS_utf8(p) _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
#define isDIGIT_utf8(p) _generic_utf8(_CC_DIGIT, is_utf8_digit, p)
diff --git a/regcharclass.h b/regcharclass.h
index 497d0914cd..b3a24eb8f5 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -202,6 +202,32 @@
: 0 )
/*** GENERATED CODE ***/
+#define is_HORIZWS_high(s) \
+( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+: ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )
+
+/*** GENERATED CODE ***/
+#define is_HORIZWS_high_safe(s,e) \
+( ((e)-(s) > 2) ? \
+ ( ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0x9A == ((U8*)s)[1] ) ? \
+ ( ( 0x80 == ((U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0xA0 == ((U8*)s)[1] ) && ( 0x8E == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0xE2 == ((U8*)s)[0] ) ? \
+ ( ( 0x80 == ((U8*)s)[1] ) ? \
+ ( ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x8A ) || 0xAF == ((U8*)s)[2] ) ? 3 : 0 )\
+ : ( ( 0x81 == ((U8*)s)[1] ) && ( 0x9F == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( ( ( 0xE3 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( 0x80 == ((U8*)s)[2] ) ) ? 3 : 0 )\
+: 0 )
+
+/*** GENERATED CODE ***/
#define is_HORIZWS_cp(cp) \
( 0x09 == cp || ( 0x09 < cp && \
( 0x20 == cp || ( 0x20 < cp && \
@@ -212,6 +238,14 @@
( 0x202F == cp || ( 0x202F < cp && \
( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
+/*** GENERATED CODE ***/
+#define is_HORIZWS_cp_high(cp) \
+( 0x1680 == cp || ( 0x1680 < cp && \
+( 0x180E == cp || ( 0x180E < cp && \
+( ( 0x2000 <= cp && cp <= 0x200A ) || ( 0x200A < cp && \
+( 0x202F == cp || ( 0x202F < cp && \
+( 0x205F == cp || 0x3000 == cp ) ) ) ) ) ) ) ) )
+
/*
VERTWS: Vertical Whitespace: \v \V
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 830a014fed..ee0fe5d87b 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -1389,7 +1389,7 @@ LNBREAK: Line Break: \R
\p{VertSpace}
HORIZWS: Horizontal Whitespace: \h \H
-=> generic UTF8 LATIN1 cp :fast safe
+=> generic UTF8 LATIN1 high cp cp_high :fast safe
\p{HorizSpace}
VERTWS: Vertical Whitespace: \v \V
diff --git a/utf8.c b/utf8.c
index cc38b0ff05..2d7eb08270 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1515,9 +1515,7 @@ Perl_is_uni_ascii(pTHX_ UV c)
bool
Perl_is_uni_blank(pTHX_ UV c)
{
- U8 tmpbuf[UTF8_MAXBYTES+1];
- uvchr_to_utf8(tmpbuf, c);
- return is_utf8_blank(tmpbuf);
+ return isBLANK_uni(c);
}
bool
@@ -2061,7 +2059,7 @@ Perl_is_utf8_blank(pTHX_ const U8 *p)
PERL_ARGS_ASSERT_IS_UTF8_BLANK;
- return is_utf8_common(p, &PL_utf8_blank, "XPosixBlank");
+ return isBLANK_utf8(p);
}
bool