summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-11-19 14:36:12 -0700
committerKarl Williamson <public@khwilliamson.com>2012-11-19 17:13:02 -0700
commitadd4123adc9db7056121c97112791dbf273707c4 (patch)
treee87b87ee0b61cd7910dc346eb25c9128812b016c /handy.h
parentbedac28b0a4795ca4264d391bfdd7dd3190f80d3 (diff)
downloadperl-add4123adc9db7056121c97112791dbf273707c4.tar.gz
Refactor is(SPACE|PSXSP)_(uni|utf8) macros and utf8.c
This refactors the isSPACE_uni, is_SPACE_utf8, isPSXSPC_uni, and is_PSXSPC_utf8 macros in handy.h, so that no function call need be done to handle above Latin1 input. These macros are quite small, and unlikely to grow over time, as Unicode has mostly finished adding white space equivalents to the Standard. The functions that implement these in utf8.c are also changed to use the macros instead of generating a swash. This should speed things up slightly, with less memory used over time as the swash fills.
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h9
1 files changed, 5 insertions, 4 deletions
diff --git a/handy.h b/handy.h
index 80792e3c11..178d975a50 100644
--- a/handy.h
+++ b/handy.h
@@ -929,7 +929,7 @@ EXTCONST U32 PL_charclass[];
#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
#define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, is_uni_idfirst, c)
#define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c)
-#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_uni_space, c)
+#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c)
#define isVERTWS_uni(c) _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c)
#define isDIGIT_uni(c) _generic_uni(_CC_DIGIT, is_uni_digit, c)
#define isUPPER_uni(c) _generic_uni(_CC_UPPER, is_uni_upper, c)
@@ -945,7 +945,8 @@ EXTCONST U32 PL_charclass[];
#define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c)
/* Posix and regular space differ only in U+000B, which is in Latin1 */
-#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, is_uni_space, c)
+#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, \
+ is_XPERLSPACE_cp_high, c)
#define toUPPER_uni(c,s,l) to_uni_upper(c,s,l)
#define toTITLE_uni(c,s,l) to_uni_title(c,s,l)
@@ -1001,7 +1002,7 @@ EXTCONST U32 PL_charclass[];
#define isIDCONT_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p)
#define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
#define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_HORIZWS_high, p)
-#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_utf8_space, p)
+#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_XPERLSPACE_high, p)
#define isVERTWS_utf8(p) _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
#define isDIGIT_utf8(p) _generic_utf8(_CC_DIGIT, is_utf8_digit, p)
#define isUPPER_utf8(p) _generic_utf8(_CC_UPPER, is_utf8_upper, p)
@@ -1021,7 +1022,7 @@ EXTCONST U32 PL_charclass[];
/* Posix and regular space differ only in U+000B, which is in ASCII (and hence
* Latin1 */
-#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_utf8_space, p)
+#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)
#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0))