summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-02-17 14:43:10 -0700
committerKarl Williamson <public@khwilliamson.com>2011-02-17 15:41:54 -0700
commitc11ff9433950cda8448b773418d1cb2592eea29d (patch)
treef0284cd5865f0db8c7484123153ab8fb860aa129 /utf8.c
parent0167186c6da6afb0eb6708879a543c70c612fc45 (diff)
downloadperl-c11ff9433950cda8448b773418d1cb2592eea29d.tar.gz
handy.h: isIDFIRST_utf8() changed to use XIDStart
Previously this used a home-grown definition of an identifier start, stemming from a bug in some early Unicode versions. This led to some problems, fixed by #74022. But the home-grown solution did not track Unicode, and allowed for characters, like marks, to begin words when they shouldn't. This change brings this macro into compliance with Unicode going-forward.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c25
1 files changed, 25 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index b5d853188b..808d9a80a7 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1501,6 +1501,19 @@ Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */
}
bool
+Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST;
+
+ if (*p == '_')
+ return TRUE;
+ /* is_utf8_idstart would be more logical. */
+ return is_utf8_common(p, &PL_utf8_xidstart, "XIdStart");
+}
+
+bool
Perl_is_utf8_idcont(pTHX_ const U8 *p)
{
dVAR;
@@ -1513,6 +1526,18 @@ Perl_is_utf8_idcont(pTHX_ const U8 *p)
}
bool
+Perl_is_utf8_xidcont(pTHX_ const U8 *p)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_IS_UTF8_XIDCONT;
+
+ if (*p == '_')
+ return TRUE;
+ return is_utf8_common(p, &PL_utf8_idcont, "XIdContinue");
+}
+
+bool
Perl_is_utf8_alpha(pTHX_ const U8 *p)
{
dVAR;