diff options
author | Marc Lehmann <pcg@goof.com> | 2000-09-03 11:44:29 +0200 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-09-07 19:12:28 +0000 |
commit | 289d4f092c382a985bdc9f6c69b13b9b784f67f9 (patch) | |
tree | 34cfb66cb3f8667f7f699a06f1802f46cd16193d | |
parent | 50577dabfdb49e7c306a4e348cf2b16310eb61aa (diff) | |
download | perl-289d4f092c382a985bdc9f6c69b13b9b784f67f9.tar.gz |
Fix for
Subject: [ID 20000903.001] \w in utf8-strings
Message-Id: <E13VUS5-0000cv-00.pgcc-forever-2000-09-03-09-44-29@fuji>
and various related nits.
p4raw-id: //depot/perl@7030
-rwxr-xr-x | lib/unicode/mktables.PL | 6 | ||||
-rw-r--r-- | regcomp.h | 8 | ||||
-rw-r--r-- | regexec.c | 1 | ||||
-rw-r--r-- | utf8.c | 5 |
4 files changed, 11 insertions, 9 deletions
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index 608a3259d3..37b6e84874 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -9,9 +9,9 @@ $PropData = "PropList.txt"; # Note: we try to keep filenames unique within first 8 chars. Using # subdirectories for the following helps. -mkdir "In", 0777; -mkdir "Is", 0777; -mkdir "To", 0777; +mkdir "In", 0755; +mkdir "Is", 0755; +mkdir "To", 0755; @todo = ( # typical @@ -192,13 +192,13 @@ struct regnode_charclass_class { /* Should be synchronized with a table in regprop() */ /* 2n should pair with 2n+1 */ -#define ANYOF_ALNUM 0 /* \w, utf8::IsWord, isALNUM() */ +#define ANYOF_ALNUM 0 /* \w, PL_utf8_alnum, utf8::IsWord, ALNUM */ #define ANYOF_NALNUM 1 #define ANYOF_SPACE 2 /* \s */ #define ANYOF_NSPACE 3 #define ANYOF_DIGIT 4 #define ANYOF_NDIGIT 5 -#define ANYOF_ALNUMC 6 /* isalnum(3), utf8::IsAlnum, isALNUMC() */ +#define ANYOF_ALNUMC 6 /* isalnum(3), utf8::IsAlnum, ALNUMC */ #define ANYOF_NALNUMC 7 #define ANYOF_ALPHA 8 #define ANYOF_NALPHA 9 @@ -220,8 +220,8 @@ struct regnode_charclass_class { #define ANYOF_NXDIGIT 25 #define ANYOF_PSXSPC 26 /* POSIX space: \s plus the vertical tab */ #define ANYOF_NPSXSPC 27 -#define ANYOF_BLANK 28 -#define ANYOF_NBLANK 29 /* GNU extension: space and tab */ +#define ANYOF_BLANK 28 /* GNU extension: space and tab */ +#define ANYOF_NBLANK 29 #define ANYOF_MAX 32 @@ -3735,4 +3735,3 @@ restore_pos(pTHXo_ void *arg) PL_curpm = PL_reg_oldcurpm; } } - @@ -645,7 +645,10 @@ Perl_is_utf8_alnum(pTHX_ U8 *p) if (!is_utf8_char(p)) return FALSE; if (!PL_utf8_alnum) - PL_utf8_alnum = swash_init("utf8", "IsAlnum", &PL_sv_undef, 0, 0); + /* NOTE: "IsWord", not "IsAlnum", since Alnum is a true + * descendant of isalnum(3), in other words, it doesn't + * contain the '_'. --jhi */ + PL_utf8_alnum = swash_init("utf8", "IsWord", &PL_sv_undef, 0, 0); return swash_fetch(PL_utf8_alnum, p); /* return *p == '_' || is_utf8_alpha(p) || is_utf8_digit(p); */ #ifdef SURPRISINGLY_SLOWER /* probably because alpha is usually true */ |