summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-04-16 09:21:25 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-04-16 09:21:25 +0000
commitc65e4d19dcce98b8a7e895060b6fe141cfaef8a6 (patch)
tree581563a8247f0242f08f728ca4c88b3032c28606
parent03476e8e225023314ac8982933b2825edd7b8b8c (diff)
downloadperl-c65e4d19dcce98b8a7e895060b6fe141cfaef8a6.tar.gz
Synchronize the specifications of the POSIX character
classes alnum, graph, and print closer to the planned Unicode proposal. p4raw-id: //depot/perl@19231
-rw-r--r--lib/unicore/lib/Alnum.pl35
-rw-r--r--lib/unicore/lib/Graph.pl6
-rw-r--r--lib/unicore/lib/Print.pl6
-rw-r--r--lib/unicore/mktables12
-rw-r--r--utf8.c4
5 files changed, 24 insertions, 39 deletions
diff --git a/lib/unicore/lib/Alnum.pl b/lib/unicore/lib/Alnum.pl
index 0ec13c55c4..5f2b1f0f65 100644
--- a/lib/unicore/lib/Alnum.pl
+++ b/lib/unicore/lib/Alnum.pl
@@ -13,10 +13,8 @@ return <<'END';
0041 005A
0061 007A
00AA
-00B2 00B3
00B5
-00B9 00BA
-00BC 00BE
+00BA
00C0 00D6
00D8 00F6
00F8 0220
@@ -85,7 +83,6 @@ return <<'END';
09DC 09DD
09DF 09E3
09E6 09F1
-09F4 09F9
0A02
0A05 0A0A
0A0F 0A10
@@ -144,7 +141,7 @@ return <<'END';
0BC6 0BC8
0BCA 0BCD
0BD7
-0BE7 0BF2
+0BE7 0BEF
0C01 0C03
0C05 0C0C
0C0E 0C10
@@ -215,7 +212,7 @@ return <<'END';
0EDC 0EDD
0F00
0F18 0F19
-0F20 0F33
+0F20 0F29
0F35
0F37
0F39
@@ -263,13 +260,12 @@ return <<'END';
1318 131E
1320 1346
1348 135A
-1369 137C
+1369 1371
13A0 13F4
1401 166C
166F 1676
1681 169A
16A0 16EA
-16EE 16F0
1700 170C
170E 1714
1720 1734
@@ -306,9 +302,8 @@ return <<'END';
1FE0 1FEC
1FF2 1FF4
1FF6 1FFC
-2070 2071
-2074 2079
-207F 2089
+2071
+207F
20D0 20EA
2102
2107
@@ -323,14 +318,10 @@ return <<'END';
2133 2139
213D 213F
2145 2149
-2153 2183
-2460 249B
-24EA 24FE
-2776 2793
-3005 3007
-3021 302F
+3005 3006
+302A 302F
3031 3035
-3038 303C
+303B 303C
3041 3096
3099 309A
309D 309F
@@ -338,13 +329,8 @@ return <<'END';
30FC 30FF
3105 312C
3131 318E
-3192 3195
31A0 31B7
31F0 31FF
-3220 3229
-3251 325F
-3280 3289
-32B1 32BF
3400 4DB5
4E00 9FA5
A000 A48C
@@ -377,8 +363,7 @@ FFCA FFCF
FFD2 FFD7
FFDA FFDC
10300 1031E
-10320 10323
-10330 1034A
+10330 10349
10400 10425
10428 1044D
1D165 1D169
diff --git a/lib/unicore/lib/Graph.pl b/lib/unicore/lib/Graph.pl
index 212c4c637f..1eff89d911 100644
--- a/lib/unicore/lib/Graph.pl
+++ b/lib/unicore/lib/Graph.pl
@@ -277,7 +277,7 @@ return <<'END';
1FDD 1FEF
1FF2 1FF4
1FF6 1FFE
-2010 2027
+2010 2029
2030 2052
2057
2070 2071
@@ -331,7 +331,7 @@ return <<'END';
A000 A48C
A490 A4C6
AC00 D7A3
-E000 FA2D
+F900 FA2D
FA30 FA6A
FB00 FB06
FB13 FB17
@@ -394,6 +394,4 @@ FFFC FFFD
1D7CE 1D7FF
20000 2A6D6
2F800 2FA1D
-F0000 FFFFD
-100000 10FFFD
END
diff --git a/lib/unicore/lib/Print.pl b/lib/unicore/lib/Print.pl
index d1eb1e23c2..ea9c5538f6 100644
--- a/lib/unicore/lib/Print.pl
+++ b/lib/unicore/lib/Print.pl
@@ -278,7 +278,7 @@ return <<'END';
1FF2 1FF4
1FF6 1FFE
2000 200B
-2010 2027
+2010 2029
202F 2052
2057
205F
@@ -333,7 +333,7 @@ return <<'END';
A000 A48C
A490 A4C6
AC00 D7A3
-E000 FA2D
+F900 FA2D
FA30 FA6A
FB00 FB06
FB13 FB17
@@ -396,6 +396,4 @@ FFFC FFFD
1D7CE 1D7FF
20000 2A6D6
2F800 2FA1D
-F0000 FFFFD
-100000 10FFFD
END
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 654301eeb4..fa07346b21 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -2,6 +2,9 @@
use strict;
use Carp;
+die "$0: Please run me as ./mktables to avoid unnecessary differences\n"
+ unless $0 eq "./mktables";
+
##
## mktables -- create the runtime Perl Unicode files (lib/unicore/**/*.pl)
## from the Unicode database files (lib/unicore/*.txt).
@@ -625,7 +628,7 @@ sub UnicodeData_Txt()
# 005F: SPACING UNDERSCORE
$Cat{Word}->$op($code) if $cat =~ /^[LMN]/ || $code == 0x005F;
- $Cat{Alnum}->$op($code) if $cat =~ /^[LMN]/;
+ $Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/;
$Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
@@ -647,7 +650,7 @@ sub UnicodeData_Txt()
|| $code == 0x2028 # 2028: LINE SEPARATOR
|| $code == 0x2029;# 2029: PARAGRAPH SEP.
- $Cat{Blank}->$op($code) if $cat =~ /^Z[^lp]$/
+ $Cat{Blank}->$op($code) if $cat eq "Zs"
|| $code == 0x0009 # 0009: HORIZONTAL TAB
|| $code == 0x0020; # 0020: SPACE
@@ -657,8 +660,9 @@ sub UnicodeData_Txt()
$Cat{Title}->$op($code) if $cat eq "Lt";
$Cat{ASCII}->$op($code) if $code <= 0x007F;
$Cat{Cntrl}->$op($code) if $cat =~ /^C/;
- $Cat{Graph}->$op($code) if $cat =~ /^([LMNPS]|Co)/;
- $Cat{Print}->$op($code) if $cat =~ /^([LMNPS]|Co|Zs)/;
+ $Cat{Graph}->$op($code) if $cat =~ /^[^C]/
+ && $cat ne "Zs";
+ $Cat{Print}->$op($code) if $cat =~ /^[^C]/;
$Cat{Punct}->$op($code) if $cat =~ /^P/;
$Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9
diff --git a/utf8.c b/utf8.c
index b112d4c9a3..0dd9ad8083 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1235,7 +1235,7 @@ Perl_is_utf8_upper(pTHX_ U8 *p)
if (!is_utf8_char(p))
return FALSE;
if (!PL_utf8_upper)
- PL_utf8_upper = swash_init("utf8", "IsUpper", &PL_sv_undef, 0, 0);
+ PL_utf8_upper = swash_init("utf8", "IsUppercase", &PL_sv_undef, 0, 0);
return swash_fetch(PL_utf8_upper, p, TRUE) != 0;
}
@@ -1245,7 +1245,7 @@ Perl_is_utf8_lower(pTHX_ U8 *p)
if (!is_utf8_char(p))
return FALSE;
if (!PL_utf8_lower)
- PL_utf8_lower = swash_init("utf8", "IsLower", &PL_sv_undef, 0, 0);
+ PL_utf8_lower = swash_init("utf8", "IsLowercase", &PL_sv_undef, 0, 0);
return swash_fetch(PL_utf8_lower, p, TRUE) != 0;
}