my $utf8here, our $utf8here, and package variable $utf8here.

The actual minimal fix is in utf8.c and from NI-S, the rest are the tests (in fresh_perl since I couldn't get them easily to work elsewhere) and a slight behaviour change: previously UTF-8 identifiers had to start with an alphabetic character. No more so, now they can start with an (Unicode) ID_Continue character (which however is not a (Unicode) digit). (Limiting the first character to ID_Start would be rather restrictive, since ID_Start allows only alphabetic letters.) TODO: use vars qw($utf8here). This I don't find to be a showstopper. p4raw-id: //depot/perl@15943
author: Jarkko Hietaniemi <jhi@iki.fi> 2002-04-16 03:59:00 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2002-04-16 03:59:00 +0000
commit: 82686b017bb20f55e16f84c47f7ac0bf8d0c714b (patch)
tree: e7ad28a90ea768b323c2fb37103841ceb7b8dd93 /utf8.c
parent: 58858581d2d18dc2bff021fb2c755408c36929c4 (diff)
download: perl-82686b017bb20f55e16f84c47f7ac0bf8d0c714b.tar.gz
1 files changed, 27 insertions, 8 deletions
diff --git a/utf8.c b/utf8.c
index 1b13809dcd..3ad3a9573e 100644
--- a/utf8.c
+++ b/utf8.c
@@ -170,12 +170,11 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
 =for apidoc A|STRLEN|is_utf8_char|U8 *s
 
 Tests if some arbitrary number of bytes begins in a valid UTF-8
-character.  Note that an INVARIANT (i.e. ASCII) character is a valid UTF-8 character.
-The actual number of bytes in the UTF-8 character will be returned if
-it is valid, otherwise 0.
+character.  Note that an INVARIANT (i.e. ASCII) character is a valid
+UTF-8 character.  The actual number of bytes in the UTF-8 character
+will be returned if it is valid, otherwise 0.
 
-=cut
-*/
+=cut */
 STRLEN
 Perl_is_utf8_char(pTHX_ U8 *s)
 {
@@ -1156,9 +1155,27 @@ Perl_is_utf8_alnumc(pTHX_ U8 *p)
 }
 
 bool
-Perl_is_utf8_idfirst(pTHX_ U8 *p)
+Perl_is_utf8_idfirst(pTHX_ U8 *p) /* The naming is historical. */
 {
-    return *p == '_' || is_utf8_alpha(p);
+    if (*p == '_')
+	return TRUE;
+    if (!is_utf8_char(p))
+	return FALSE;
+    if (!PL_utf8_idstart) /* is_utf8_idstart would be more logical. */
+	PL_utf8_idstart = swash_init("utf8", "IdStart", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_idstart, p, TRUE);
+}
+
+bool
+Perl_is_utf8_idcont(pTHX_ U8 *p)
+{
+    if (*p == '_')
+	return TRUE;
+    if (!is_utf8_char(p))
+	return FALSE;
+    if (!PL_utf8_idcont)
+	PL_utf8_idcont = swash_init("utf8", "IdContinue", &PL_sv_undef, 0, 0);
+    return swash_fetch(PL_utf8_idcont, p, TRUE);
 }
 
 bool
@@ -1514,9 +1531,11 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none)
     SAVEI32(PL_hints);
     PL_hints = 0;
     save_re_context();
-    if (PL_curcop == &PL_compiling)
+    if (PL_curcop == &PL_compiling) {
 	/* XXX ought to be handled by lex_start */
+	SAVEI32(PL_in_my);
 	sv_setpv(tokenbufsv, PL_tokenbuf);
+    }
     errsv_save = newSVsv(ERRSV);
     if (call_method("SWASHNEW", G_SCALAR))
 	retval = newSVsv(*PL_stack_sp--);
author	Jarkko Hietaniemi <jhi@iki.fi>	2002-04-16 03:59:00 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2002-04-16 03:59:00 +0000
commit	82686b017bb20f55e16f84c47f7ac0bf8d0c714b (patch)
tree	e7ad28a90ea768b323c2fb37103841ceb7b8dd93 /utf8.c
parent	58858581d2d18dc2bff021fb2c755408c36929c4 (diff)
download	perl-82686b017bb20f55e16f84c47f7ac0bf8d0c714b.tar.gz