summaryrefslogtreecommitdiff
path: root/lib/unicore
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-06-04 11:02:54 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-06-04 11:02:54 +0000
commitd75d706fd8fb3b6eda1b75fcf6c854883a424bcd (patch)
tree7e3be54aaa1a0b7d4d3a89b1e85f4886ac176077 /lib/unicore
parent2ad57d45e67703bb0b4bf345bf850bd2212eed8a (diff)
downloadperl-d75d706fd8fb3b6eda1b75fcf6c854883a424bcd.tar.gz
On closer reading the proposed UTS#18 update required
even more changes. p4raw-id: //depot/perl@19686
Diffstat (limited to 'lib/unicore')
-rw-r--r--lib/unicore/lib/Blank.pl2
-rw-r--r--lib/unicore/lib/Graph.pl3
-rw-r--r--lib/unicore/lib/Print.pl34
-rw-r--r--lib/unicore/lib/Space.pl2
-rw-r--r--lib/unicore/lib/SpacePer.pl2
-rw-r--r--lib/unicore/lib/Word.pl10
-rw-r--r--lib/unicore/mktables39
7 files changed, 51 insertions, 41 deletions
diff --git a/lib/unicore/lib/Blank.pl b/lib/unicore/lib/Blank.pl
index 5de7d7d1fd..639ccf4c16 100644
--- a/lib/unicore/lib/Blank.pl
+++ b/lib/unicore/lib/Blank.pl
@@ -14,7 +14,7 @@ return <<'END';
00A0
1680
180E
-2000 200B
+2000 200A
202F
205F
3000
diff --git a/lib/unicore/lib/Graph.pl b/lib/unicore/lib/Graph.pl
index 943056c0c3..79ef3dbb0b 100644
--- a/lib/unicore/lib/Graph.pl
+++ b/lib/unicore/lib/Graph.pl
@@ -278,7 +278,8 @@ return <<'END';
1FDD 1FEF
1FF2 1FF4
1FF6 1FFE
-200C 202E
+200B 2027
+202A 202E
2030 2054
2057
2060 2063
diff --git a/lib/unicore/lib/Print.pl b/lib/unicore/lib/Print.pl
index 54954e7e25..a5bae46845 100644
--- a/lib/unicore/lib/Print.pl
+++ b/lib/unicore/lib/Print.pl
@@ -9,9 +9,10 @@
# Meaning: [[:Print:]]
#
return <<'END';
+0009 000D
0020 007E
-00A0 00AC
-00AE 0236
+0085
+00A0 0236
0250 0357
035D 036F
0374 0375
@@ -36,14 +37,14 @@ return <<'END';
05BB 05C4
05D0 05EA
05F0 05F4
+0600 0603
060C 0615
061B
061F
0621 063A
0640 0658
-0660 06DC
-06DE 070D
-0710 074A
+0660 070D
+070F 074A
074D 074F
0780 07B1
0901 0939
@@ -246,8 +247,7 @@ return <<'END';
1760 176C
176E 1770
1772 1773
-1780 17B3
-17B6 17DD
+1780 17DD
17E0 17E9
17F0 17F9
1800 180E
@@ -280,12 +280,10 @@ return <<'END';
1FDD 1FEF
1FF2 1FF4
1FF6 1FFE
-2000 200B
-2010 2027
-202F 2054
+2000 2054
2057
-205F
-2070 2071
+205F 2063
+206A 2071
2074 208E
20A0 20B1
20D0 20EA
@@ -331,7 +329,7 @@ return <<'END';
A000 A48C
A490 A4C6
AC00 D7A3
-F900 FA2D
+E000 FA2D
FA30 FA6A
FB00 FB06
FB13 FB17
@@ -352,6 +350,7 @@ FE54 FE66
FE68 FE6B
FE70 FE74
FE76 FEFC
+FEFF
FF01 FFBE
FFC2 FFC7
FFCA FFCF
@@ -359,7 +358,7 @@ FFD2 FFD7
FFDA FFDC
FFE0 FFE6
FFE8 FFEE
-FFFC FFFD
+FFF9 FFFD
10000 1000B
1000D 10026
10028 1003A
@@ -385,8 +384,7 @@ FFFC FFFD
1083F
1D000 1D0F5
1D100 1D126
-1D12A 1D172
-1D17B 1D1DD
+1D12A 1D1DD
1D300 1D356
1D400 1D454
1D456 1D49C
@@ -411,5 +409,9 @@ FFFC FFFD
1D7CE 1D7FF
20000 2A6D6
2F800 2FA1D
+E0001
+E0020 E007F
E0100 E01EF
+F0000 FFFFD
+100000 10FFFD
END
diff --git a/lib/unicore/lib/Space.pl b/lib/unicore/lib/Space.pl
index 9aa12c8600..6c1cc2e1b2 100644
--- a/lib/unicore/lib/Space.pl
+++ b/lib/unicore/lib/Space.pl
@@ -15,7 +15,7 @@ return <<'END';
00A0
1680
180E
-2000 200B
+2000 200A
2028 2029
202F
205F
diff --git a/lib/unicore/lib/SpacePer.pl b/lib/unicore/lib/SpacePer.pl
index 18911cfdcc..cc12e07919 100644
--- a/lib/unicore/lib/SpacePer.pl
+++ b/lib/unicore/lib/SpacePer.pl
@@ -16,7 +16,7 @@ return <<'END';
00A0
1680
180E
-2000 200B
+2000 200A
2028 2029
202F
205F
diff --git a/lib/unicore/lib/Word.pl b/lib/unicore/lib/Word.pl
index 0bf2ea4408..20936b1d62 100644
--- a/lib/unicore/lib/Word.pl
+++ b/lib/unicore/lib/Word.pl
@@ -312,6 +312,8 @@ return <<'END';
1FE0 1FEC
1FF2 1FF4
1FF6 1FFC
+203F 2040
+2054
2070 2071
2074 2079
207F 2089
@@ -340,8 +342,7 @@ return <<'END';
3041 3096
3099 309A
309D 309F
-30A1 30FA
-30FC 30FF
+30A1 30FF
3105 312C
3131 318E
3192 3195
@@ -372,12 +373,15 @@ FD92 FDC7
FDF0 FDFB
FE00 FE0F
FE20 FE23
+FE33 FE34
+FE4D FE4F
FE70 FE74
FE76 FEFC
FF10 FF19
FF21 FF3A
+FF3F
FF41 FF5A
-FF66 FFBE
+FF65 FFBE
FFC2 FFC7
FFCA FFCF
FFD2 FFD7
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 083bc581a0..5fdac52dc6 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -627,32 +627,35 @@ sub UnicodeData_Txt()
($General{$name} ||= Table->New)->$op($code, $name);
# 005F: SPACING UNDERSCORE
- $Cat{Word}->$op($code) if $cat =~ /^[LMN]/ || $code == 0x005F;
+ $Cat{Word}->$op($code) if $cat =~ /^[LMN]|Pc/;
$Cat{Alnum}->$op($code) if $cat =~ /^[LM]|Nd/;
$Cat{Alpha}->$op($code) if $cat =~ /^[LM]/;
-
-
- $Cat{Space}->$op($code) if $cat =~ /^Z/
+ my $isspace =
+ ($cat =~ /Zs|Zl|Zp/ &&
+ $code != 0x200B) # 200B is ZWSP which is for line break control
+ # and therefore it is not part of "space" even while it is "Zs".
|| $code == 0x0009 # 0009: HORIZONTAL TAB
|| $code == 0x000A # 000A: LINE FEED
|| $code == 0x000B # 000B: VERTICAL TAB
|| $code == 0x000C # 000C: FORM FEED
|| $code == 0x000D # 000D: CARRIAGE RETURN
- || $code == 0x0085; # 0085: NEL
+ || $code == 0x0085 # 0085: NEL
+
+ ;
+ $Cat{Space}->$op($code) if $isspace;
- $Cat{SpacePerl}->$op($code) if $cat =~ /^Z/
- || $code == 0x0009 # 0009: HORIZONTAL TAB
- || $code == 0x000A # 000A: LINE FEED
- || $code == 0x000C # 000C: FORM FEED
- || $code == 0x000D # 000D: CARRIAGE RETURN
- || $code == 0x0085 # 0085: <NEXT LINE>
- || $code == 0x2028 # 2028: LINE SEPARATOR
- || $code == 0x2029;# 2029: PARAGRAPH SEP.
+ $Cat{SpacePerl}->$op($code) if $isspace
+ && $code != 0x000B; # Backward compat.
- $Cat{Blank}->$op($code) if $cat eq "Zs"
- || $code == 0x0009; # 0009: HORIZONTAL TAB
+ $Cat{Blank}->$op($code) if $isspace
+ && !($code == 0x000A ||
+ $code == 0x000B ||
+ $code == 0x000C ||
+ $code == 0x000D ||
+ $code == 0x0085 ||
+ $cat =~ /^Z[lp]/);
$Cat{Digit}->$op($code) if $cat eq "Nd";
$Cat{Upper}->$op($code) if $cat eq "Lu";
@@ -660,9 +663,9 @@ sub UnicodeData_Txt()
$Cat{Title}->$op($code) if $cat eq "Lt";
$Cat{ASCII}->$op($code) if $code <= 0x007F;
$Cat{Cntrl}->$op($code) if $cat =~ /^C/;
- $Cat{Graph}->$op($code) if $cat !~ /Zs|Cc|Cs|Cn/;
- $Cat{Print}->$op($code) if $cat =~ /^[LMNPS]/
- || $cat eq "Zs";
+ my $isgraph = !$isspace && $cat !~ /Cc|Cs|Cn/;
+ $Cat{Graph}->$op($code) if $isgraph;
+ $Cat{Print}->$op($code) if $isgraph || $isspace;
$Cat{Punct}->$op($code) if $cat =~ /^P/;
$Cat{XDigit}->$op($code) if ($code >= 0x30 && $code <= 0x39) ## 0..9