summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-09-24 15:31:33 -0600
committerKarl Williamson <public@khwilliamson.com>2011-10-01 09:58:09 -0600
commit7d7a6efc88f38442c73da47646216496c17de4db (patch)
tree9369abd7a337749e1bd528e40d47d68a76eb8514 /handy.h
parentc8362b00a2c72add5b4b3004cbde7ea473a3623d (diff)
downloadperl-7d7a6efc88f38442c73da47646216496c17de4db.tar.gz
handy.h: Don't call _utf8 fcns if Latin1
This patch avoids the overhead of calling eg. is_utf8_alpha() on Latin1 inputs. The result is known to Perl's core, and this can avoid a swash load.
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h26
1 files changed, 19 insertions, 7 deletions
diff --git a/handy.h b/handy.h
index d7b6d04e88..374926f97c 100644
--- a/handy.h
+++ b/handy.h
@@ -908,11 +908,17 @@ EXTCONST U32 PL_charclass[];
#define isBLANK_LC_uni(c) isBLANK(c) /* could be wrong */
/* For use in the macros just below. If the input is ASCII, use the ASCII (_A)
- * version of the macro; otherwise use the function. This relies on the fact
- * that ASCII characters have the same representation whether utf8 or not */
-#define generic_utf8(macro, function, p) (isASCII(*(p)) \
- ? CAT2(macro, _A)(*(p)) \
- : function(p))
+ * version of the macro; if the input is in the upper Latin1 range, use the
+ * Latin1 (_L1) version of the macro, after converting from utf8; otherwise use
+ * the function. This relies on the fact that ASCII characters have the same
+ * representation whether utf8 or not */
+#define generic_utf8(macro, function, p) (isASCII(*(p)) \
+ ? CAT2(macro, _A)(*(p)) \
+ : (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
+ ? CAT2(macro, _L1) \
+ (TWO_BYTE_UTF8_TO_UNI(*(p), \
+ *((p)+1))) \
+ : function(p))
#define isALNUM_utf8(p) generic_utf8(isWORDCHAR, is_utf8_alnum, p)
/* To prevent S_scan_word in toke.c from hanging, we have to make sure that
@@ -923,7 +929,10 @@ EXTCONST U32 PL_charclass[];
* modern Unicode definition */
#define isIDFIRST_utf8(p) (isASCII(*(p)) \
? isIDFIRST_A(*(p)) \
- : (is_utf8_xidfirst(p) && is_utf8_alnum(p)))
+ : (UTF8_IS_DOWNGRADEABLE_START(*(p))) \
+ ? isIDFIRST_L1(TWO_BYTE_UTF8_TO_UNI(*(p), \
+ *((p)+1)))\
+ : (is_utf8_xidfirst(p) && is_utf8_alnum(p)))
#define isIDCONT_utf8(p) generic_utf8(isWORDCHAR, is_utf8_xidcont, p)
#define isALPHA_utf8(p) generic_utf8(isALPHA, is_utf8_alpha, p)
#define isSPACE_utf8(p) generic_utf8(isSPACE, is_utf8_space, p)
@@ -945,7 +954,10 @@ EXTCONST U32 PL_charclass[];
* Latin1 */
#define isPSXSPC_utf8(p) ((isASCII(*(p))) \
? isPSXSPC_A(*(p)) \
- : isSPACE_utf8(p))
+ : (UTF8_IS_DOWNGRADEABLE_START(*(p)) \
+ ? isPSXSPC_L1(TWO_BYTE_UTF8_TO_UNI(*(p), \
+ *((p)+1)))\
+ : isSPACE_utf8(p)))
#define isBLANK_utf8(c) isBLANK(c) /* could be wrong */
#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(utf8_to_uvchr(p, 0))