summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-11-19 14:36:12 -0700
committerKarl Williamson <public@khwilliamson.com>2012-11-19 17:13:02 -0700
commitadd4123adc9db7056121c97112791dbf273707c4 (patch)
treee87b87ee0b61cd7910dc346eb25c9128812b016c
parentbedac28b0a4795ca4264d391bfdd7dd3190f80d3 (diff)
downloadperl-add4123adc9db7056121c97112791dbf273707c4.tar.gz
Refactor is(SPACE|PSXSP)_(uni|utf8) macros and utf8.c
This refactors the isSPACE_uni, is_SPACE_utf8, isPSXSPC_uni, and is_PSXSPC_utf8 macros in handy.h, so that no function call need be done to handle above Latin1 input. These macros are quite small, and unlikely to grow over time, as Unicode has mostly finished adding white space equivalents to the Standard. The functions that implement these in utf8.c are also changed to use the macros instead of generating a swash. This should speed things up slightly, with less memory used over time as the swash fills.
-rw-r--r--handy.h9
-rw-r--r--utf8.c6
2 files changed, 7 insertions, 8 deletions
diff --git a/handy.h b/handy.h
index 80792e3c11..178d975a50 100644
--- a/handy.h
+++ b/handy.h
@@ -929,7 +929,7 @@ EXTCONST U32 PL_charclass[];
#define isBLANK_uni(c) _generic_uni(_CC_BLANK, is_HORIZWS_cp_high, c)
#define isIDFIRST_uni(c) _generic_uni(_CC_IDFIRST, is_uni_idfirst, c)
#define isALPHA_uni(c) _generic_uni(_CC_ALPHA, is_uni_alpha, c)
-#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_uni_space, c)
+#define isSPACE_uni(c) _generic_uni(_CC_SPACE, is_XPERLSPACE_cp_high, c)
#define isVERTWS_uni(c) _generic_uni(_CC_VERTSPACE, is_VERTWS_cp_high, c)
#define isDIGIT_uni(c) _generic_uni(_CC_DIGIT, is_uni_digit, c)
#define isUPPER_uni(c) _generic_uni(_CC_UPPER, is_uni_upper, c)
@@ -945,7 +945,8 @@ EXTCONST U32 PL_charclass[];
#define isXDIGIT_uni(c) _generic_uni(_CC_XDIGIT, is_XDIGIT_cp_high, c)
/* Posix and regular space differ only in U+000B, which is in Latin1 */
-#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, is_uni_space, c)
+#define isPSXSPC_uni(c) _generic_uni(_CC_PSXSPC, \
+ is_XPERLSPACE_cp_high, c)
#define toUPPER_uni(c,s,l) to_uni_upper(c,s,l)
#define toTITLE_uni(c,s,l) to_uni_title(c,s,l)
@@ -1001,7 +1002,7 @@ EXTCONST U32 PL_charclass[];
#define isIDCONT_utf8(p) _generic_utf8(_CC_WORDCHAR, is_utf8_xidcont, p)
#define isALPHA_utf8(p) _generic_utf8(_CC_ALPHA, is_utf8_alpha, p)
#define isBLANK_utf8(p) _generic_utf8(_CC_BLANK, is_HORIZWS_high, p)
-#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_utf8_space, p)
+#define isSPACE_utf8(p) _generic_utf8(_CC_SPACE, is_XPERLSPACE_high, p)
#define isVERTWS_utf8(p) _generic_utf8(_CC_VERTSPACE, is_VERTWS_high, p)
#define isDIGIT_utf8(p) _generic_utf8(_CC_DIGIT, is_utf8_digit, p)
#define isUPPER_utf8(p) _generic_utf8(_CC_UPPER, is_utf8_upper, p)
@@ -1021,7 +1022,7 @@ EXTCONST U32 PL_charclass[];
/* Posix and regular space differ only in U+000B, which is in ASCII (and hence
* Latin1 */
-#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_utf8_space, p)
+#define isPSXSPC_utf8(p) _generic_utf8(_CC_PSXSPC, is_XPERLSPACE_high, p)
#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0))
diff --git a/utf8.c b/utf8.c
index 7092d0648e..56213176f0 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1521,9 +1521,7 @@ Perl_is_uni_blank(pTHX_ UV c)
bool
Perl_is_uni_space(pTHX_ UV c)
{
- U8 tmpbuf[UTF8_MAXBYTES+1];
- uvchr_to_utf8(tmpbuf, c);
- return is_utf8_space(tmpbuf);
+ return isSPACE_uni(c);
}
bool
@@ -2067,7 +2065,7 @@ Perl_is_utf8_space(pTHX_ const U8 *p)
PERL_ARGS_ASSERT_IS_UTF8_SPACE;
- return is_utf8_common(p, &PL_utf8_space, "IsXPerlSpace");
+ return isSPACE_utf8(p);
}
bool