summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-02-17 14:43:10 -0700
committerKarl Williamson <public@khwilliamson.com>2011-02-17 15:41:54 -0700
commitc11ff9433950cda8448b773418d1cb2592eea29d (patch)
treef0284cd5865f0db8c7484123153ab8fb860aa129 /handy.h
parent0167186c6da6afb0eb6708879a543c70c612fc45 (diff)
downloadperl-c11ff9433950cda8448b773418d1cb2592eea29d.tar.gz
handy.h: isIDFIRST_utf8() changed to use XIDStart
Previously this used a home-grown definition of an identifier start, stemming from a bug in some early Unicode versions. This led to some problems, fixed by #74022. But the home-grown solution did not track Unicode, and allowed for characters, like marks, to begin words when they shouldn't. This change brings this macro into compliance with Unicode going-forward.
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h17
1 files changed, 7 insertions, 10 deletions
diff --git a/handy.h b/handy.h
index ad2e4b68b2..6541c95c35 100644
--- a/handy.h
+++ b/handy.h
@@ -883,16 +883,13 @@ EXTCONST U32 PL_charclass[];
#define isBLANK_LC_uni(c) isBLANK(c) /* could be wrong */
#define isALNUM_utf8(p) is_utf8_alnum(p)
-/* The ID_Start of Unicode was originally quite limiting: it assumed an
- * L-class character (meaning that you could not have, say, a CJK charac-
- * ter). So, instead, perl has for a long time allowed ID_Continue but
- * not digits.
- * We still preserve that for backward compatibility. But we also make sure
- * that it is alphanumeric, so S_scan_word in toke.c will not hang. See
- * http://rt.perl.org/rt3/Ticket/Display.html?id=74022
- * for more detail than you ever wanted to know about. */
-#define isIDFIRST_utf8(p) \
- (is_utf8_idcont(p) && !is_utf8_digit(p) && is_utf8_alnum(p))
+/* To prevent S_scan_word in toke.c from hanging, we have to make sure that
+ * IDFIRST is an alnum. See
+ * http://rt.perl.org/rt3/Ticket/Display.html?id=74022
+ * for more detail than you ever wanted to know about. This used to be not the
+ * XID version, but we decided to go with the more modern Unicode definition */
+#define isIDFIRST_utf8(p) (is_utf8_xidfirst(p) && is_utf8_alnum(p))
+#define isIDCONT_utf8(p) is_utf8_xidcont(p)
#define isALPHA_utf8(p) is_utf8_alpha(p)
#define isSPACE_utf8(p) is_utf8_space(p)
#define isDIGIT_utf8(p) is_utf8_digit(p)