diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-09-26 13:30:40 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-10-01 09:58:09 -0600 |
commit | c8362b00a2c72add5b4b3004cbde7ea473a3623d (patch) | |
tree | c76b42be72e3f372a5cdb74f93dc4c024b0a2e1a /handy.h | |
parent | c0249a20a804e6fc6fcddc544485daf1e2dffac8 (diff) | |
download | perl-c8362b00a2c72add5b4b3004cbde7ea473a3623d.tar.gz |
handy.h: Don't call _utf8 fcns if ASCII
This patch avoids the overhead of calling eg. is_utf8_alpha() on ASCII
inputs. The result is known to Perl's core, and this can avoid a swash
load.
Diffstat (limited to 'handy.h')
-rw-r--r-- | handy.h | 48 |
1 files changed, 31 insertions, 17 deletions
@@ -907,31 +907,45 @@ EXTCONST U32 PL_charclass[]; #define isPSXSPC_LC_uni(c) (isSPACE_LC_uni(c) ||(c) == '\f') #define isBLANK_LC_uni(c) isBLANK(c) /* could be wrong */ -#define isALNUM_utf8(p) is_utf8_alnum(p) +/* For use in the macros just below. If the input is ASCII, use the ASCII (_A) + * version of the macro; otherwise use the function. This relies on the fact + * that ASCII characters have the same representation whether utf8 or not */ +#define generic_utf8(macro, function, p) (isASCII(*(p)) \ + ? CAT2(macro, _A)(*(p)) \ + : function(p)) + +#define isALNUM_utf8(p) generic_utf8(isWORDCHAR, is_utf8_alnum, p) /* To prevent S_scan_word in toke.c from hanging, we have to make sure that * IDFIRST is an alnum. See - * http://rt.perl.org/rt3/Ticket/Display.html?id=74022 - * for more detail than you ever wanted to know about. This used to be not the - * XID version, but we decided to go with the more modern Unicode definition */ -#define isIDFIRST_utf8(p) (is_utf8_xidfirst(p) && is_utf8_alnum(p)) -#define isIDCONT_utf8(p) is_utf8_xidcont(p) -#define isALPHA_utf8(p) is_utf8_alpha(p) -#define isSPACE_utf8(p) is_utf8_space(p) -#define isDIGIT_utf8(p) is_utf8_digit(p) -#define isUPPER_utf8(p) is_utf8_upper(p) -#define isLOWER_utf8(p) is_utf8_lower(p) + * http://rt.perl.org/rt3/Ticket/Display.html?id=74022 for more detail than you + * ever wanted to know about. (In the ASCII range, there isn't a difference.) + * This used to be not the XID version, but we decided to go with the more + * modern Unicode definition */ +#define isIDFIRST_utf8(p) (isASCII(*(p)) \ + ? isIDFIRST_A(*(p)) \ + : (is_utf8_xidfirst(p) && is_utf8_alnum(p))) +#define isIDCONT_utf8(p) generic_utf8(isWORDCHAR, is_utf8_xidcont, p) +#define isALPHA_utf8(p) generic_utf8(isALPHA, is_utf8_alpha, p) +#define isSPACE_utf8(p) generic_utf8(isSPACE, is_utf8_space, p) +#define isDIGIT_utf8(p) generic_utf8(isDIGIT, is_utf8_digit, p) +#define isUPPER_utf8(p) generic_utf8(isUPPER, is_utf8_upper, p) +#define isLOWER_utf8(p) generic_utf8(isLOWER, is_utf8_lower, p) /* Because ASCII is invariant under utf8, the non-utf8 macro works */ #define isASCII_utf8(p) isASCII(p) -#define isCNTRL_utf8(p) is_utf8_cntrl(p) -#define isGRAPH_utf8(p) is_utf8_graph(p) -#define isPRINT_utf8(p) is_utf8_print(p) -#define isPUNCT_utf8(p) is_utf8_punct(p) -#define isXDIGIT_utf8(p) is_utf8_xdigit(p) +#define isCNTRL_utf8(p) generic_utf8(isCNTRL, is_utf8_cntrl, p) +#define isGRAPH_utf8(p) generic_utf8(isGRAPH, is_utf8_graph, p) +#define isPRINT_utf8(p) generic_utf8(isPRINT, is_utf8_print, p) +#define isPUNCT_utf8(p) generic_utf8(isPUNCT, is_utf8_punct, p) +#define isXDIGIT_utf8(p) generic_utf8(isXDIGIT, is_utf8_xdigit, p) #define toUPPER_utf8(p,s,l) to_utf8_upper(p,s,l) #define toTITLE_utf8(p,s,l) to_utf8_title(p,s,l) #define toLOWER_utf8(p,s,l) to_utf8_lower(p,s,l) -#define isPSXSPC_utf8(c) (isSPACE_utf8(c) ||(c) == '\f') +/* Posix and regular space differ only in U+000B, which is in ASCII (and hence + * Latin1 */ +#define isPSXSPC_utf8(p) ((isASCII(*(p))) \ + ? isPSXSPC_A(*(p)) \ + : isSPACE_utf8(p)) #define isBLANK_utf8(c) isBLANK(c) /* could be wrong */ #define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(utf8_to_uvchr(p, 0)) |