diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-01-21 20:04:51 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-04 16:29:30 -0700 |
commit | ae1bcb1f4fc588e844f2ebe20bb0812b72d6e531 (patch) | |
tree | b9fc7ea7c52b53238551d5cdbe05c2d863825d1b /lib | |
parent | cdc18eb6b4561d68a21651783cc62a89fd936ae2 (diff) | |
download | perl-ae1bcb1f4fc588e844f2ebe20bb0812b72d6e531.tar.gz |
mktables: Include simple mappings in full tables
This changes the case change mapping tables to include the simple
mappings. This was done in 5.14 for the case folding table. The full
mappings are contained, as before, in a hash. Now the simple mappings
they override (when doing multi-char case changing) are added to the
main body of the table, to the already existing simple mappings that
aren't overridden.
If the caller wants to do full mapping, it should look first in the
hash, and only if not found, look in the main body. If the caller wants
only simple mapping, it ignores the hash.
This is already how the code in utf8.c that reads these tables is
constructed.
The .t is modified to take into account that these code points are now
in the main table body.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Unicode/UCD.t | 60 | ||||
-rw-r--r-- | lib/unicore/mktables | 28 |
2 files changed, 32 insertions, 56 deletions
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 0178eba3af..6171619e3e 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1387,9 +1387,7 @@ foreach my $prop (keys %props) { # Certain of the proxy properties have to be adjusted to match the # real ones. - if (($proxy_prop ne $name && $full_name =~ 'Mapping') - || $full_name eq 'Case_Folding') - { + if ($full_name =~ /^(Case_Folding|(Lower|Title|Upper)case_Mapping)/) { # Here we have either # 1) Case_Folding; or @@ -1414,38 +1412,15 @@ foreach my $prop (keys %props) { push @list, [ hex $start, $value ]; } - # For Case_Folding, the file contains all the simple mappings, + # For these mappings, the file contains all the simple mappings, # including the ones that are overridden by the specials. These - # need to be removed as the list is for just the full ones. For - # the other files, the proxy is missing the simple mappings that - # are overridden by the specials, so we need to add them. - - # For the missing simples, we get the correct values by calling - # charinfo(). Set up which element of the hash returned by - # charinfo to look at - my $charinfo_element; - if ($full_name =~ / ^ Simple_ (Lower | Upper | Title) case_Mapping/x) - { - $charinfo_element = lc $1; # e.g. Upper is referred to by the - # key 'upper' in the charinfo() - # returned hash - } + # need to be removed as the list is for just the full ones. # Go through any special mappings one by one. They are packed. my $i = 0; foreach my $utf8_cp (sort keys %$specials_ref) { my $cp = unpack("C0U", $utf8_cp); - # Get what the simple value for this should be; either nothing - # for Case_Folding, or what charinfo returns for the others. - my $simple = ($full_name eq "Case_Folding") - ? "" - : charinfo($cp)->{$charinfo_element}; - - # And create an entry to add to the list, if appropriate - my $replacement; - $replacement = [ $cp, $simple ] if $simple ne ""; - # Find the spot in the @list of simple mappings that this # special applies to; uses a linear search. while ($i < @list -1 ) { @@ -1457,25 +1432,9 @@ foreach my $prop (keys %props) { #note $i-0 . ": " . join " => ", @{$list[$i-0]}; #note $i+1 . ": " . join " => ", @{$list[$i+1]}; - if (! defined $replacement) { - - # Here, are to remove any existing entry for this code - # point. + # Then, remove any existing entry for this code point. next if $cp != $list[$i][0]; splice @list, $i, 1; - } - elsif ($cp == $list[$i][0]) { - - # Here, are to add something, but there is an existing - # entry, so this just replaces it. - $list[$i] = $replacement; - } - else { - - # Here, are to add something, and there isn't an existing - # entry. - splice @list, $i, 0, $replacement; - } #note __LINE__ . ": $cp"; #note $i-1 . ": " . join " => ", @{$list[$i-1]}; @@ -1486,16 +1445,13 @@ foreach my $prop (keys %props) { # Here, have gone through all the specials, modifying @list as # needed. Turn it back into what the file should look like. $official = join "\n", map { sprintf "%04X\t\t%s", @$_ } @list; - - # And, no longer need the specials for the simple mappings, as are - # all incorporated into $official - undef $specials_ref if $full_name ne 'Case_Folding'; } - elsif ($full_name eq 'Simple_Case_Folding') { + elsif ($full_name =~ /Simple_(Case_Folding|(Lower|Title|Upper)case_Mapping)/) + { - # This property has everything in the regular array, and the + # These properties have everything in the regular array, and the # specials are superfluous. - undef $specials_ref if $full_name ne 'Case_Folding'; + undef $specials_ref; } # Here, in $official, we have what the file looks like, or should like diff --git a/lib/unicore/mktables b/lib/unicore/mktables index ccfb1778c5..bdd8ce1185 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -10720,7 +10720,15 @@ sub filter_arabic_shaping_line { # Create a table in the old-style format and with the original # file name for backwards compatibility with applications that - # read it directly. + # read it directly. The new tables contain both the simple and + # full maps, and the old are missing simple maps when there is a + # conflicting full one. Probably it would have been ok to add + # those to the legacy version, as was already done in 5.14 to the + # case folding one, but this was not done, out of an abundance of + # caution. The tables are set up here before we deal with the + # full maps so that as we handle those, we can override the simple + # maps for them in the legacy table, and merely add them in the + # new-style one. my $legacy = Property->new("Legacy_" . $full_table->full_name, File => $full_table->full_name =~ s/case_Mapping//r, @@ -10731,6 +10739,13 @@ sub filter_arabic_shaping_line { Initialize => $full_table, ); + $full_table->add_comment(join_lines( <<END +This file includes both the simple and full case changing maps. The simple +ones are in the main body of the table below, and the full ones adding to or +overriding them are in the hash. +END + )); + # The simple version's name in each mapping merely has an 's' in # front of the full one's my $simple_name = 's' . $full_name; @@ -10752,7 +10767,7 @@ END )); unless ($simple->to_output_map()) { - $simple_only->set_proxy_for($simple_name); + $full_table->set_proxy_for($simple_name); } } @@ -10845,15 +10860,20 @@ END } else { - # The mapping goes into both the legacy table ... + # The mapping goes into both the legacy table, in which it + # replaces the simple one... $file->insert_adjusted_lines("$fields[0]; Legacy_" . $object->full_name . "; $fields[$i]"); - # ... and, the The regular table + # ... and, the The regular table, in which it is additional, + # beyond the simple mapping. $file->insert_adjusted_lines("$fields[0]; " . $object->name . "; " + . $CMD_DELIM + . "$REPLACE_CMD=$MULTIPLE_BEFORE" + . $CMD_DELIM . $fields[$i]); # Copy any simple case change to the special tables |