diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-01-21 15:27:00 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-04 16:29:29 -0700 |
commit | cdc18eb6b4561d68a21651783cc62a89fd936ae2 (patch) | |
tree | 09aa307813903059043a2c959d6f3bfcc2c5126e /lib | |
parent | 66474459308655bbcd923ce8d6872641e6595b4d (diff) | |
download | perl-cdc18eb6b4561d68a21651783cc62a89fd936ae2.tar.gz |
mktables: Add duplicate tables
This is for backwards compatibility. Future commits will change these
tables that are generated by mktables to be more efficient. But the
existence of them was advertised in v5.12 and v5.14, as something a Perl
program could use because the Perl core did not provide access to their
contents. We can't change the format of those without some notice.
The solution adopted is to have two versions of the tables, one kept in
the original file name has the original format; and the other is free to
change formats at will.
This commit just creates copies of the original, with the same format.
Later commits will change the format to be more efficient.
We state in v5.16 that using these files is now deprecated, as the
information is now available through Unicode::UCD in a stable API. But
we don't test for whether someone is opening and reading these files; so
the deprecation cycle should be somewhat long; they will be unused, and
the only drawbacks to having them are some extra disk space and the time
spent in having to generate them at Perl build time.
This commit also changes the Perl core to use the original tables, so
that the new format can be gradually developed in a series of patches
without having to cut over the whole thing at once.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicore/mktables | 66 |
1 files changed, 55 insertions, 11 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 1fe3696ec2..ccfb1778c5 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -8631,15 +8631,6 @@ sub finish_property_setup { # Perl adds this alias. $gc->add_alias('Category'); - # For backwards compatibility, these property files have particular names. - property_ref('Uppercase_Mapping')->set_file('Upper'); # This is what - # utf8.c calls it - property_ref('Lowercase_Mapping')->set_file('Lower'); - property_ref('Titlecase_Mapping')->set_file('Title'); - - my $fold = property_ref('Case_Folding'); - $fold->set_file('Fold') if defined $fold; - # Unicode::Normalize expects this file with this name and directory. my $ccc = property_ref('Canonical_Combining_Class'); if (defined $ccc) { @@ -10000,7 +9991,6 @@ END my $Decimal_Digit = Property->new("Perl_Decimal_Digit", Default_Map => "", Perl_Extension => 1, - File => 'Digit', # Trad. location Directory => $map_directory, Type => $STRING, Range_Size_1 => 1, @@ -10728,6 +10718,19 @@ sub filter_arabic_shaping_line { Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing. Only special casing will be generated."); } + # Create a table in the old-style format and with the original + # file name for backwards compatibility with applications that + # read it directly. + my $legacy = Property->new("Legacy_" . $full_table->full_name, + File => $full_table->full_name =~ + s/case_Mapping//r, + Range_Size_1 => 1, + Format => $HEX_FORMAT, + Default_Map => $CODE_POINT, + UCD => 0, + Initialize => $full_table, + ); + # The simple version's name in each mapping merely has an 's' in # front of the full one's my $simple_name = 's' . $full_name; @@ -10738,6 +10741,7 @@ sub filter_arabic_shaping_line { Type => $STRING, Default_Map => $CODE_POINT, Perl_Extension => 1, + Range_Size_1 => 1, Fate => $INTERNAL_ONLY, Description => "This contains the simple mappings for $full_name for just the code points that have different full mappings"); $simple_only->set_to_output_map($INTERNAL_MAP); @@ -10840,10 +10844,18 @@ END } } else { - $file->insert_adjusted_lines("$fields[0]; " + + # The mapping goes into both the legacy table ... + $file->insert_adjusted_lines("$fields[0]; Legacy_" . $object->full_name . "; $fields[$i]"); + # ... and, the The regular table + $file->insert_adjusted_lines("$fields[0]; " + . $object->name + . "; " + . $fields[$i]); + # Copy any simple case change to the special tables # constructed if being overridden by a multi-character case # change. @@ -11751,6 +11763,38 @@ END } } + # Create digit and case fold tables with the original file names for + # backwards compatibility with applications that read them directly. + my $Digit = Property->new("Legacy_Perl_Decimal_Digit", + Default_Map => "", + Perl_Extension => 1, + File => 'Digit', # Trad. location + Directory => $map_directory, + UCD => 0, + Type => $STRING, + Range_Size_1 => 1, + Initialize => property_ref('Perl_Decimal_Digit'), + ); + $Digit->add_comment(join_lines(<<END +This file gives the mapping of all code points which represent a single +decimal digit [0-9] to their respective digits. For example, the code point +U+0031 (an ASCII '1') is mapped to a numeric 1. These code points are those +that have Numeric_Type=Decimal; not special things, like subscripts nor Roman +numerals. +END + )); + + Property->new('Legacy_Case_Folding', + File => "Fold", + Directory => $map_directory, + Default_Map => $CODE_POINT, + UCD => 0, + Range_Size_1 => 1, + Type => $STRING, + Format => $HEX_FORMAT, + Initialize => property_ref('cf'), + ); + # The Script_Extensions property started out as a clone of the Script # property. But processing its data file caused some elements to be # replaced with different data. (These elements were for the Common and |