diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-02-17 14:43:10 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-02-17 15:41:54 -0700 |
commit | c11ff9433950cda8448b773418d1cb2592eea29d (patch) | |
tree | f0284cd5865f0db8c7484123153ab8fb860aa129 /utf8.c | |
parent | 0167186c6da6afb0eb6708879a543c70c612fc45 (diff) | |
download | perl-c11ff9433950cda8448b773418d1cb2592eea29d.tar.gz |
handy.h: isIDFIRST_utf8() changed to use XIDStart
Previously this used a home-grown definition of an identifier start,
stemming from a bug in some early Unicode versions. This led to some
problems, fixed by #74022.
But the home-grown solution did not track Unicode, and allowed for
characters, like marks, to begin words when they shouldn't. This change
brings this macro into compliance with Unicode going-forward.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 25 |
1 files changed, 25 insertions, 0 deletions
@@ -1501,6 +1501,19 @@ Perl_is_utf8_idfirst(pTHX_ const U8 *p) /* The naming is historical. */ } bool +Perl_is_utf8_xidfirst(pTHX_ const U8 *p) /* The naming is historical. */ +{ + dVAR; + + PERL_ARGS_ASSERT_IS_UTF8_XIDFIRST; + + if (*p == '_') + return TRUE; + /* is_utf8_idstart would be more logical. */ + return is_utf8_common(p, &PL_utf8_xidstart, "XIdStart"); +} + +bool Perl_is_utf8_idcont(pTHX_ const U8 *p) { dVAR; @@ -1513,6 +1526,18 @@ Perl_is_utf8_idcont(pTHX_ const U8 *p) } bool +Perl_is_utf8_xidcont(pTHX_ const U8 *p) +{ + dVAR; + + PERL_ARGS_ASSERT_IS_UTF8_XIDCONT; + + if (*p == '_') + return TRUE; + return is_utf8_common(p, &PL_utf8_idcont, "XIdContinue"); +} + +bool Perl_is_utf8_alpha(pTHX_ const U8 *p) { dVAR; |