handy.h: isIDFIRST_utf8() changed to use XIDStart

Previously this used a home-grown definition of an identifier start, stemming from a bug in some early Unicode versions. This led to some problems, fixed by #74022. But the home-grown solution did not track Unicode, and allowed for characters, like marks, to begin words when they shouldn't. This change brings this macro into compliance with Unicode going-forward.
author: Karl Williamson <public@khwilliamson.com> 2011-02-17 14:43:10 -0700
committer: Karl Williamson <public@khwilliamson.com> 2011-02-17 15:41:54 -0700
commit: c11ff9433950cda8448b773418d1cb2592eea29d (patch)
tree: f0284cd5865f0db8c7484123153ab8fb860aa129 /handy.h
parent: 0167186c6da6afb0eb6708879a543c70c612fc45 (diff)
download: perl-c11ff9433950cda8448b773418d1cb2592eea29d.tar.gz
1 files changed, 7 insertions, 10 deletions
diff --git a/handy.h b/handy.h
index ad2e4b68b2..6541c95c35 100644
--- a/handy.h
+++ b/handy.h
@@ -883,16 +883,13 @@ EXTCONST U32 PL_charclass[];
 #define isBLANK_LC_uni(c)	isBLANK(c) /* could be wrong */
 
 #define isALNUM_utf8(p)		is_utf8_alnum(p)
-/* The ID_Start of Unicode was originally quite limiting: it assumed an
- * L-class character (meaning that you could not have, say, a CJK charac-
- * ter). So, instead, perl has for a long time allowed ID_Continue but
- * not digits.
- * We still preserve that for backward compatibility. But we also make sure
- * that it is alphanumeric, so S_scan_word in toke.c will not hang. See
- *    http://rt.perl.org/rt3/Ticket/Display.html?id=74022
- * for more detail than you ever wanted to know about. */
-#define isIDFIRST_utf8(p) \
-    (is_utf8_idcont(p) && !is_utf8_digit(p) && is_utf8_alnum(p))
+/* To prevent S_scan_word in toke.c from hanging, we have to make sure that
+ * IDFIRST is an alnum.  See
+ * http://rt.perl.org/rt3/Ticket/Display.html?id=74022
+ * for more detail than you ever wanted to know about.  This used to be not the
+ * XID version, but we decided to go with the more modern Unicode definition */
+#define isIDFIRST_utf8(p)	(is_utf8_xidfirst(p) && is_utf8_alnum(p))
+#define isIDCONT_utf8(p)	is_utf8_xidcont(p)
 #define isALPHA_utf8(p)		is_utf8_alpha(p)
 #define isSPACE_utf8(p)		is_utf8_space(p)
 #define isDIGIT_utf8(p)		is_utf8_digit(p)
author	Karl Williamson <public@khwilliamson.com>	2011-02-17 14:43:10 -0700
committer	Karl Williamson <public@khwilliamson.com>	2011-02-17 15:41:54 -0700
commit	c11ff9433950cda8448b773418d1cb2592eea29d (patch)
tree	f0284cd5865f0db8c7484123153ab8fb860aa129 /handy.h
parent	0167186c6da6afb0eb6708879a543c70c612fc45 (diff)
download	perl-c11ff9433950cda8448b773418d1cb2592eea29d.tar.gz