summaryrefslogtreecommitdiff
path: root/lib/unicore
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-02-03 14:49:44 -0700
committerKarl Williamson <khw@cpan.org>2020-02-03 16:34:55 -0700
commit7e6fafba8de0dae89c5a2a65b1f974f1122fa4f2 (patch)
tree06dc338f558cedb379f3dc8577481762001f3e0c /lib/unicore
parent9664dd62ebbb75178efd21930107a01d78ba255c (diff)
downloadperl-7e6fafba8de0dae89c5a2a65b1f974f1122fa4f2.tar.gz
mktables: Generalize the scx property handling
Until now, this property was unique in that it specifies a set of possible values for scripts that a character can be in, rather than a single script. That multiplicity has been handled specially. But the next couple of commits will introduce another property that has similar characteristics. This commit makes the scx handling more general, so as to also be usable for the new property.
Diffstat (limited to 'lib/unicore')
-rw-r--r--lib/unicore/mktables82
-rw-r--r--lib/unicore/uni_keywords.pl2
2 files changed, 50 insertions, 34 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index dc0d7db10b..f11e466823 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -13989,6 +13989,51 @@ END
);
}
} # End of loop through aliases for this property
+
+
+ # Properties that have sets of values for some characters are now
+ # converted. For example, the Script_Extensions property started out
+ # as a clone of the Script property. But processing its data file
+ # caused some elements to be replaced with different data. (These
+ # elements were for the Common and Inherited properties.) This data
+ # is a qw() list of all the scripts that the code points in the given
+ # range are in. An example line is:
+ #
+ # 060C ; Arab Syrc Thaa # Po ARABIC COMMA
+ #
+ # Code executed earlier has created a new match table named "Arab Syrc
+ # Thaa" which contains 060C. (The cloned table started out with this
+ # code point mapping to "Common".) Now we add 060C to each of the
+ # Arab, Syrc, and Thaa match tables. Then we delete the now spurious
+ # "Arab Syrc Thaa" match table. This is repeated for all these tables
+ # and ranges. The map data is retained in the map table for
+ # reference, but the spurious match tables are deleted.
+ my $format = $property->format;
+ if (defined $format && $format eq $STRING_WHITE_SPACE_LIST) {
+ foreach my $table ($property->tables) {
+
+ # Space separates the entries which should go in multiple
+ # tables
+ next unless $table->name =~ /\s/;
+
+ # The list of the entries, hence the names of the tables that
+ # everything in this combo table should be added to.
+ my @list = split /\s+/, $table->name;
+
+ # Add the entries from the combo table to each individual
+ # table
+ foreach my $individual (@list) {
+ my $existing_table = $property->table($individual);
+
+ # This should only be necessary if this particular entry
+ # occurs only in combo with others.
+ $existing_table = $property->add_match_table($individual)
+ unless defined $existing_table;
+ $existing_table += $table;
+ }
+ $property->delete_match_table($table);
+ }
+ }
} # End of loop through all Unicode properties.
# Fill in the mappings that Unicode doesn't completely furnish. First the
@@ -14077,40 +14122,11 @@ END
Initialize => property_ref('cf'),
);
- # The Script_Extensions property started out as a clone of the Script
- # property. But processing its data file caused some elements to be
- # replaced with different data. (These elements were for the Common and
- # Inherited properties.) This data is a qw() list of all the scripts that
- # the code points in the given range are in. An example line is:
- # 060C ; Arab Syrc Thaa # Po ARABIC COMMA
- #
- # The code above has created a new match table named "Arab Syrc Thaa"
- # which contains 060C. (The cloned table started out with this code point
- # mapping to "Common".) Now we add 060C to each of the Arab, Syrc, and
- # Thaa match tables. Then we delete the now spurious "Arab Syrc Thaa"
- # match table. This is repeated for all these tables and ranges. The map
- # data is retained in the map table for reference, but the spurious match
- # tables are deleted.
-
+ # Mark the scx table as the parent of the corresponding sc table for those
+ # which are identical. This causes the pod for the script table to refer
+ # to the corresponding scx one. This is done after everything, so as to
+ # wait until the tables are stabilized before checking for equivalency.
if (defined $scx) {
- foreach my $table ($scx->tables) {
- next unless $table->name =~ /\s/; # All the new and only the new
- # tables have a space in their
- # names
- my @scripts = split /\s+/, $table->name;
- foreach my $script (@scripts) {
- my $script_table = $scx->table($script);
- $script_table += $table;
- }
- $scx->delete_match_table($table);
- }
-
- # Mark the scx table as the parent of the corresponding sc table for
- # those which are identical. This causes the pod for the script table
- # to refer to the corresponding scx one.
- #
- # This has to be in a separate loop from above, so as to wait until
- # the tables are stabilized before checking for equivalency.
if (defined $pod_directory) {
foreach my $table ($scx->tables) {
my $plain_sc_equiv = $script->table($table->name);
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index 63c3dd9b11..894ede5df4 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1280,7 +1280,7 @@
# 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt
# 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
# 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# e3cbd908e180d39554e2c7351cebfd939a567c796c79e2e4a71ec2af874adfa3 lib/unicore/mktables
+# eefd34f5f03191cb297d62f66273ca68efc972703f5323543427a9b285f28657 lib/unicore/mktables
# 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
# 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
# 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl