summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-04-16 03:59:00 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-04-16 03:59:00 +0000
commit82686b017bb20f55e16f84c47f7ac0bf8d0c714b (patch)
treee7ad28a90ea768b323c2fb37103841ceb7b8dd93 /utf8.c
parent58858581d2d18dc2bff021fb2c755408c36929c4 (diff)
downloadperl-82686b017bb20f55e16f84c47f7ac0bf8d0c714b.tar.gz
my $utf8here, our $utf8here, and package variable $utf8here.
The actual minimal fix is in utf8.c and from NI-S, the rest are the tests (in fresh_perl since I couldn't get them easily to work elsewhere) and a slight behaviour change: previously UTF-8 identifiers had to start with an alphabetic character. No more so, now they can start with an (Unicode) ID_Continue character (which however is not a (Unicode) digit). (Limiting the first character to ID_Start would be rather restrictive, since ID_Start allows only alphabetic letters.) TODO: use vars qw($utf8here). This I don't find to be a showstopper. p4raw-id: //depot/perl@15943
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c35
1 files changed, 27 insertions, 8 deletions
diff --git a/utf8.c b/utf8.c
index 1b13809dcd..3ad3a9573e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -170,12 +170,11 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
=for apidoc A|STRLEN|is_utf8_char|U8 *s
Tests if some arbitrary number of bytes begins in a valid UTF-8
-character. Note that an INVARIANT (i.e. ASCII) character is a valid UTF-8 character.
-The actual number of bytes in the UTF-8 character will be returned if
-it is valid, otherwise 0.
+character. Note that an INVARIANT (i.e. ASCII) character is a valid
+UTF-8 character. The actual number of bytes in the UTF-8 character
+will be returned if it is valid, otherwise 0.
-=cut
-*/
+=cut */
STRLEN
Perl_is_utf8_char(pTHX_ U8 *s)
{
@@ -1156,9 +1155,27 @@ Perl_is_utf8_alnumc(pTHX_ U8 *p)
}
bool
-Perl_is_utf8_idfirst(pTHX_ U8 *p)
+Perl_is_utf8_idfirst(pTHX_ U8 *p) /* The naming is historical. */
{
- return *p == '_' || is_utf8_alpha(p);
+ if (*p == '_')
+ return TRUE;
+ if (!is_utf8_char(p))
+ return FALSE;
+ if (!PL_utf8_idstart) /* is_utf8_idstart would be more logical. */
+ PL_utf8_idstart = swash_init("utf8", "IdStart", &PL_sv_undef, 0, 0);
+ return swash_fetch(PL_utf8_idstart, p, TRUE);
+}
+
+bool
+Perl_is_utf8_idcont(pTHX_ U8 *p)
+{
+ if (*p == '_')
+ return TRUE;
+ if (!is_utf8_char(p))
+ return FALSE;
+ if (!PL_utf8_idcont)
+ PL_utf8_idcont = swash_init("utf8", "IdContinue", &PL_sv_undef, 0, 0);
+ return swash_fetch(PL_utf8_idcont, p, TRUE);
}
bool
@@ -1514,9 +1531,11 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none)
SAVEI32(PL_hints);
PL_hints = 0;
save_re_context();
- if (PL_curcop == &PL_compiling)
+ if (PL_curcop == &PL_compiling) {
/* XXX ought to be handled by lex_start */
+ SAVEI32(PL_in_my);
sv_setpv(tokenbufsv, PL_tokenbuf);
+ }
errsv_save = newSVsv(ERRSV);
if (call_method("SWASHNEW", G_SCALAR))
retval = newSVsv(*PL_stack_sp--);