summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-10-17 00:54:28 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-10-17 00:54:28 +0000
commitd2d499f5a831730fa4ee7eedade0afc419d869bc (patch)
tree1df16b40c338b4b81bdfdf5dcb28871ad34ea4b5
parent7e3b091dcbe93a1c94db1d9abc2570cf54821a92 (diff)
downloadperl-d2d499f5a831730fa4ee7eedade0afc419d869bc.tar.gz
Add the special casing mappings (from SpecCase.txt)
(except for the hyper special case mappings that have the condition list); the special casing mappings are unused for now. Small tidying up on mktables. p4raw-id: //depot/perl@12465
-rw-r--r--lib/unicore/To/SpecLower.pl107
-rw-r--r--lib/unicore/To/SpecTitle.pl106
-rw-r--r--lib/unicore/To/SpecUpper.pl106
-rw-r--r--lib/unicore/mktables85
4 files changed, 376 insertions, 28 deletions
diff --git a/lib/unicore/To/SpecLower.pl b/lib/unicore/To/SpecLower.pl
new file mode 100644
index 0000000000..18c073b98b
--- /dev/null
+++ b/lib/unicore/To/SpecLower.pl
@@ -0,0 +1,107 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+00DF 00DF
+0149 0149
+01F0 01F0
+0390 0390
+03B0 03B0
+0587 0587
+1E96 1E96
+1E97 1E97
+1E98 1E98
+1E99 1E99
+1E9A 1E9A
+1F50 1F50
+1F52 1F52
+1F54 1F54
+1F56 1F56
+1F80 1F80
+1F81 1F81
+1F82 1F82
+1F83 1F83
+1F84 1F84
+1F85 1F85
+1F86 1F86
+1F87 1F87
+1F88 1F80
+1F89 1F81
+1F8A 1F82
+1F8B 1F83
+1F8C 1F84
+1F8D 1F85
+1F8E 1F86
+1F8F 1F87
+1F90 1F90
+1F91 1F91
+1F92 1F92
+1F93 1F93
+1F94 1F94
+1F95 1F95
+1F96 1F96
+1F97 1F97
+1F98 1F90
+1F99 1F91
+1F9A 1F92
+1F9B 1F93
+1F9C 1F94
+1F9D 1F95
+1F9E 1F96
+1F9F 1F97
+1FA0 1FA0
+1FA1 1FA1
+1FA2 1FA2
+1FA3 1FA3
+1FA4 1FA4
+1FA5 1FA5
+1FA6 1FA6
+1FA7 1FA7
+1FA8 1FA0
+1FA9 1FA1
+1FAA 1FA2
+1FAB 1FA3
+1FAC 1FA4
+1FAD 1FA5
+1FAE 1FA6
+1FAF 1FA7
+1FB2 1FB2
+1FB3 1FB3
+1FB4 1FB4
+1FB6 1FB6
+1FB7 1FB7
+1FBC 1FB3
+1FC2 1FC2
+1FC3 1FC3
+1FC4 1FC4
+1FC6 1FC6
+1FC7 1FC7
+1FCC 1FC3
+1FD2 1FD2
+1FD3 1FD3
+1FD6 1FD6
+1FD7 1FD7
+1FE2 1FE2
+1FE3 1FE3
+1FE4 1FE4
+1FE6 1FE6
+1FE7 1FE7
+1FF2 1FF2
+1FF3 1FF3
+1FF4 1FF4
+1FF6 1FF6
+1FF7 1FF7
+1FFC 1FF3
+FB00 FB00
+FB01 FB01
+FB02 FB02
+FB03 FB03
+FB04 FB04
+FB05 FB05
+FB06 FB06
+FB13 FB13
+FB14 FB14
+FB15 FB15
+FB16 FB16
+FB17 FB17
+END
diff --git a/lib/unicore/To/SpecTitle.pl b/lib/unicore/To/SpecTitle.pl
new file mode 100644
index 0000000000..c3e1911408
--- /dev/null
+++ b/lib/unicore/To/SpecTitle.pl
@@ -0,0 +1,106 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+00DF 0053 0073
+0149 02BC 004E
+01F0 004A 030C
+0390 0399 0308 0301
+03B0 03A5 0308 0301
+0587 0535 0582
+1E96 0048 0331
+1E97 0054 0308
+1E98 0057 030A
+1E99 0059 030A
+1E9A 0041 02BE
+1F50 03A5 0313
+1F52 03A5 0313 0300
+1F54 03A5 0313 0301
+1F56 03A5 0313 0342
+1F80 1F88
+1F81 1F89
+1F82 1F8A
+1F83 1F8B
+1F84 1F8C
+1F85 1F8D
+1F86 1F8E
+1F87 1F8F
+1F88 1F88
+1F89 1F89
+1F8A 1F8A
+1F8B 1F8B
+1F8C 1F8C
+1F8D 1F8D
+1F8E 1F8E
+1F8F 1F8F
+1F90 1F98
+1F91 1F99
+1F92 1F9A
+1F93 1F9B
+1F94 1F9C
+1F95 1F9D
+1F96 1F9E
+1F97 1F9F
+1F98 1F98
+1F99 1F99
+1F9A 1F9A
+1F9B 1F9B
+1F9C 1F9C
+1F9D 1F9D
+1F9E 1F9E
+1F9F 1F9F
+1FA0 1FA8
+1FA1 1FA9
+1FA2 1FAA
+1FA3 1FAB
+1FA4 1FAC
+1FA5 1FAD
+1FA6 1FAE
+1FA7 1FAF
+1FA8 1FA8
+1FA9 1FA9
+1FAA 1FAA
+1FAB 1FAB
+1FAC 1FAC
+1FAD 1FAD
+1FAE 1FAE
+1FAF 1FAF
+1FB2 1FBA 0345
+1FB3 1FBC
+1FB4 0386 0345
+1FB6 0391 0342
+1FB7 0391 0342 0345
+1FBC 1FBC
+1FC2 1FCA 0345
+1FC3 1FCC
+1FC4 0389 0345
+1FC6 0397 0342
+1FC7 0397 0342 0345
+1FCC 1FCC
+1FD2 0399 0308 0300
+1FD3 0399 0308 0301
+1FD6 0399 0342
+1FD7 0399 0308 0342
+1FE2 03A5 0308 0300
+1FE3 03A5 0308 0301
+1FE4 03A1 0313
+1FE6 03A5 0342
+1FE7 03A5 0308 0342
+1FF2 1FFA 0345
+1FF3 1FFC
+1FF4 038F 0345
+1FF6 03A9 0342
+1FF7 03A9 0342 0345
+1FFC 1FFC
+FB00 0046 0066
+FB01 0046 0069
+FB02 0046 006C
+FB03 0046 0066 0069
+FB04 0046 0066 006C
+FB05 FB06 0053 0074
+FB13 0544 0576
+FB14 0544 0565
+FB15 0544 056B
+FB16 054E 0576
+FB17 0544 056D
+END
diff --git a/lib/unicore/To/SpecUpper.pl b/lib/unicore/To/SpecUpper.pl
new file mode 100644
index 0000000000..e5af4b1089
--- /dev/null
+++ b/lib/unicore/To/SpecUpper.pl
@@ -0,0 +1,106 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables from e.g. Unicode.txt.
+# Any changes made here will be lost!
+return <<'END';
+00DF 0053 0053
+0149 02BC 004E
+01F0 004A 030C
+0390 0399 0308 0301
+03B0 03A5 0308 0301
+0587 0535 0552
+1E96 0048 0331
+1E97 0054 0308
+1E98 0057 030A
+1E99 0059 030A
+1E9A 0041 02BE
+1F50 03A5 0313
+1F52 03A5 0313 0300
+1F54 03A5 0313 0301
+1F56 03A5 0313 0342
+1F80 1F08 0399
+1F81 1F09 0399
+1F82 1F0A 0399
+1F83 1F0B 0399
+1F84 1F0C 0399
+1F85 1F0D 0399
+1F86 1F0E 0399
+1F87 1F0F 0399
+1F88 1F08 0399
+1F89 1F09 0399
+1F8A 1F0A 0399
+1F8B 1F0B 0399
+1F8C 1F0C 0399
+1F8D 1F0D 0399
+1F8E 1F0E 0399
+1F8F 1F0F 0399
+1F90 1F28 0399
+1F91 1F29 0399
+1F92 1F2A 0399
+1F93 1F2B 0399
+1F94 1F2C 0399
+1F95 1F2D 0399
+1F96 1F2E 0399
+1F97 1F2F 0399
+1F98 1F28 0399
+1F99 1F29 0399
+1F9A 1F2A 0399
+1F9B 1F2B 0399
+1F9C 1F2C 0399
+1F9D 1F2D 0399
+1F9E 1F2E 0399
+1F9F 1F2F 0399
+1FA0 1F68 0399
+1FA1 1F69 0399
+1FA2 1F6A 0399
+1FA3 1F6B 0399
+1FA4 1F6C 0399
+1FA5 1F6D 0399
+1FA6 1F6E 0399
+1FA7 1F6F 0399
+1FA8 1F68 0399
+1FA9 1F69 0399
+1FAA 1F6A 0399
+1FAB 1F6B 0399
+1FAC 1F6C 0399
+1FAD 1F6D 0399
+1FAE 1F6E 0399
+1FAF 1F6F 0399
+1FB2 1FBA 0399
+1FB3 0391 0399
+1FB4 0386 0399
+1FB6 0391 0342
+1FB7 0391 0342 0399
+1FBC 0391 0399
+1FC2 1FCA 0399
+1FC3 0397 0399
+1FC4 0389 0399
+1FC6 0397 0342
+1FC7 0397 0342 0399
+1FCC 0397 0399
+1FD2 0399 0308 0300
+1FD3 0399 0308 0301
+1FD6 0399 0342
+1FD7 0399 0308 0342
+1FE2 03A5 0308 0300
+1FE3 03A5 0308 0301
+1FE4 03A1 0313
+1FE6 03A5 0342
+1FE7 03A5 0308 0342
+1FF2 1FFA 0399
+1FF3 03A9 0399
+1FF4 038F 0399
+1FF6 03A9 0342
+1FF7 03A9 0342 0399
+1FFC 03A9 0399
+FB00 0046 0046
+FB01 0046 0049
+FB02 0046 004C
+FB03 0046 0046 0049
+FB04 0046 0046 004C
+FB05 FB06 0053 0054
+FB13 0544 0546
+FB14 0544 0535
+FB15 0544 053B
+FB16 054E 0546
+FB17 0544 053D
+END
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 5b2d7869f5..f851302d34 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -5,10 +5,10 @@
# from the Unicode database files (lib/unicore/*.txt).
#
-my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
-
use strict;
+my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1.
+
mkdir("In", 0755);
mkdir("Is", 0755);
mkdir("To", 0755);
@@ -123,9 +123,9 @@ if (open(my $Unicode, "Unicode.txt")) {
my @Mirrored;
my %To;
while (<$Unicode>) {
- next if /^\#/ || /^\s*$/;
- next unless /^[0-9a-f]+\s*;/i;
+ next unless /^[0-9A-Fa-f]+;/;
s/\s+$//;
+
my ($code, $name, $cat, $comb, $bidi, $deco,
$decimal, $digit, $number,
$mirrored, $unicode10, $comment,
@@ -259,10 +259,7 @@ if (open(my $LineBrk, "LineBrk.txt")) {
my %Lbrk;
while (<$LineBrk>) {
- next if /^\#/ || /^\s*$/;
- s/\s+$//;
- s/\s*\#.*//;
- next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i;
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(\w+)/;
my ($first, $last, $lbrk) = ($1, $2, $3);
@@ -291,9 +288,9 @@ if (open(my $ArabShap, "ArabShap.txt")) {
my @ArabLinkGroup;
while (<$ArabShap>) {
- next if /^\#/ || /^\s*$/;
- next unless /^[0-9a-f]+\s*;/i;
+ next unless /^[0-9A-Fa-f]+;/;
s/\s+$//;
+
my ($code, $name, $link, $linkgroup) = split(/\s*;\s*/);
append(\@ArabLink, $code, $link);
@@ -314,11 +311,9 @@ if (open(my $Jamo, "Jamo.txt")) {
my @Short;
while (<$Jamo>) {
- next if /^\#/ || /^\s*$/;
- next unless /^[0-9a-f]+\s*;/i;
- s/\s*\#.*//;
- s/\s+$//;
- my ($code, $short) = split(/\s*;\s*/);
+ next unless /^([0-9A-Fa-f]+)\s*;\s*(\w*)/;
+
+ my ($code, $short) = ($1, $2);
append(\@Short, $code, $short);
}
@@ -336,10 +331,7 @@ my @Scripts;
if (open(my $Scripts, "Scripts.txt")) {
while (<$Scripts>) {
- next if /^\#/ || /^\s*$/;
- s/\s*\#.*//;
- s/\s+$//;
- next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(.+)$/i;
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
# Wait until all the scripts have been read since
# they are not listed in numeric order.
@@ -387,11 +379,8 @@ my %Blocks;
if (open(my $Blocks, "Blocks.txt")) {
while (<$Blocks>) {
- next if /^\#/ || /^\s*$/;
- s/\s*\#.*//;
- s/\s+$//;
- next unless /^([0-9a-f]+)\.\.([0-9a-f]+)\s*;\s*(.+)$/i;
-
+ next unless /^([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.+?)\s*$/;
+
my ($first, $last, $name) = ($1, $2, $3);
# If there's a naming conflict (the script names are
@@ -430,10 +419,7 @@ my @Props;
if (open(my $Props, "PropList.txt")) {
while (<$Props>) {
- next if /^\#/ || /^\s*$/;
- s/\s*\#.*//;
- s/\s+$//;
- next unless /^([0-9a-f]+)(?:\.\.([0-9a-f]+))?\s*;\s*(\w+)/i;
+ next unless /^([0-9A-Fa-f]+)(?:\.\.([0-9A-Fa-f]+))?\s*;\s*(.+?)\s*\#/;
# Wait until all the extended properties have been read since
# they are not listed in numeric order.
@@ -682,5 +668,48 @@ my %Is = (
mapping(\%Is, "Is");
+#
+# Read in the special cases.
+#
+
+my %Case;
+
+if (open(my $SpecCase, "SpecCase.txt")) {
+ while (<$SpecCase>) {
+ next unless /^[0-9A-Fa-f]+;/;
+ s/\#.*//;
+ s/\s+$//;
+
+ my ($code, $lower, $title, $upper, $condition) = split(/\s*;\s*/);
+
+ if ($condition) { # not implemented yet
+ print "# SKIPPING $_\n";
+ next;
+ }
+
+ # Wait until all the special cases have been read since
+ # they are not listed in numeric order.
+ my $ix = hex($code);
+ push @{$Case{Lower}}, [ $ix, $code, $lower ];
+ push @{$Case{Title}}, [ $ix, $code, $title ];
+ push @{$Case{Upper}}, [ $ix, $code, $upper ];
+ }
+} else {
+ die "$0: SpecCase.txt: $!\n";
+}
+
+# Now write out the special cases properties in their code point order.
+# The To/Spec{Lower,Title,Upper}.pl are unused for now since the swash
+# routines do not do returning multiple characters.
+
+for my $case (qw(Lower Title Upper)) {
+ my @case;
+ for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) {
+ my ($ix, $code, $to) = @$prop;
+ append(\@case, $code, $to);
+ }
+ flush(\@case, "To/Spec$case.pl");
+}
+
# That's all, folks!