summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/Unicode/UCD.t143
-rw-r--r--lib/unicore/mktables98
-rw-r--r--lib/unicore/uni_keywords.pl2
-rw-r--r--regcharclass.h2
-rw-r--r--uni_keywords.h2
6 files changed, 25 insertions, 224 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 6c4fd15e9d..6b96778939 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -419900,7 +419900,7 @@ static const U8 WB_table[23][23] = {
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables
+ * 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index eb7fbd8f35..5e2aa8b86a 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -1596,45 +1596,9 @@ is(@list, 0, "prop_invmap('Perl_Charnames') returns <undef> since internal-Perl-
@list = prop_invmap("Is_Is_Any");
is(@list, 0, "prop_invmap('Is_Is_Any') returns <undef> since two is's");
-# The files for these properties are not used by Perl, but are retained for
-# backwards compatibility with applications that read them directly, with
-# comments in them that their use is deprecated. Until such time as we remove
-# them completely, we test that they exist, are correct, and that their
-# formats haven't changed. This hash contains the info needed to test them as
-# if they were regular properties. 'replaced_by' gives the equivalent
-# property now used by Perl.
-my %legacy_props = (
- Legacy_Case_Folding => { replaced_by => 'cf',
- file => 'To/Fold',
- swash_name => 'ToFold'
- },
- Legacy_Lowercase_Mapping => { replaced_by => 'lc',
- file => 'To/Lower',
- swash_name => 'ToLower'
- },
- Legacy_Titlecase_Mapping => { replaced_by => 'tc',
- file => 'To/Title',
- swash_name => 'ToTitle'
- },
- Legacy_Uppercase_Mapping => { replaced_by => 'uc',
- file => 'To/Upper',
- swash_name => 'ToUpper'
- },
- Legacy_Perl_Decimal_Digit => { replaced_by => 'Perl_Decimal_Digit',
- file => 'To/Digit',
- swash_name => 'ToDigit'
- },
- );
-
-foreach my $legacy_prop (keys %legacy_props) {
- @list = prop_invmap($legacy_prop);
- is(@list, 0, "'$legacy_prop' is unknown to prop_invmap");
-}
-
# The files for these properties shouldn't have their formats changed in case
# applications use them (though such use is deprecated).
-my @legacy_file_format = (keys %legacy_props,
- qw( Bidi_Mirroring_Glyph
+my @legacy_file_format = (qw( Bidi_Mirroring_Glyph
NFKC_Casefold
)
);
@@ -1658,8 +1622,7 @@ my %tested_invmaps;
# lists returned by prop_invlist(), which has already been tested.
PROPERTY:
-foreach my $prop (sort(keys %props), sort keys %legacy_props) {
- my $is_legacy = 0;
+foreach my $prop (sort(keys %props)) {
my $loose_prop = &Unicode::UCD::loose_name(lc $prop);
my $suppressed = grep { $_ eq $loose_prop }
@Unicode::UCD::suppressed_properties;
@@ -1673,39 +1636,12 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
if (! $name) {
# Here, Perl doesn't know about this property. It could be a
- # suppressed one, or a legacy one.
- if (grep { $prop eq $_ } keys %legacy_props) {
-
- # For legacy properties, we look up the modern equivalent
- # property instead; later massaging the results to look like the
- # known format of the legacy property. We add info about the
- # legacy property to the data structures for the rest of the
- # properties; this is to avoid more special cases for the legacies
- # in the code below
- $full_name = $name = $prop;
- $actual_lookup_prop = $legacy_props{$prop}->{'replaced_by'};
- my $base_file = $legacy_props{$prop}->{'file'};
-
- # This legacy property is otherwise unknown to Perl; so shouldn't
- # have any information about it already.
- ok(! exists $Unicode::UCD::loose_property_to_file_of{$loose_prop},
- "There isn't a hash entry for file lookup of $prop");
- $Unicode::UCD::loose_property_to_file_of{$loose_prop} = $base_file;
-
- ok(! exists $Unicode::UCD::file_to_swash_name{$loose_prop},
- "There isn't a hash entry for swash lookup of $prop");
- $Unicode::UCD::file_to_swash_name{$base_file}
- = $legacy_props{$prop}->{'swash_name'};
- $display_prop = $prop;
- $is_legacy = 1;
- }
- else {
+ # suppressed one
if (! $suppressed) {
fail("prop_invmap('$prop')");
diag("is unknown to prop_aliases(), and we need it in order to test prop_invmap");
}
next PROPERTY;
- }
}
# Normalize the short name, as it is stored in the hashes under the
@@ -1728,49 +1664,6 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
my ($invlist_ref, $invmap_ref, $format, $missing) = prop_invmap($actual_lookup_prop);
my $return_ref = [ $invlist_ref, $invmap_ref, $format, $missing ];
-
- # The legacy property files all are expanded out so that each range is 1
- # element long. That isn't true of the modern equivalent we use to check
- # those files for correctness against. So take the output of the proxy
- # and expand it to match the legacy file.
- if ($is_legacy) {
- my @expanded_list;
- my @expanded_map;
- for my $i (0 .. @$invlist_ref - 1 - 1) {
- if (ref $invmap_ref->[$i] || $invmap_ref->[$i] eq $missing) {
-
- # No adjustments should be done for the default mapping and
- # the multi-char ones.
- push @expanded_list, $invlist_ref->[$i];
- push @expanded_map, $invmap_ref->[$i];
- }
- else {
-
- # Expand the range into separate elements for each item.
- my $offset = 0;
- for my $j ($invlist_ref->[$i] .. $invlist_ref->[$i+1] -1) {
- push @expanded_list, $j;
- push @expanded_map, $invmap_ref->[$i] + $offset;
-
- # The 'ae' format is for Legacy_Perl_Decimal_Digit; the
- # other 4 are kept with leading zeros in the file, so
- # convert to that.
- $expanded_map[-1] = sprintf("%04X", $expanded_map[-1])
- if $format ne 'ae';
- $offset++;
- }
- }
- }
-
- # Final element is taken as is. The map should always be to the
- # default value, so don't do a sprintf like we did above.
- push @expanded_list, $invlist_ref->[-1];
- push @expanded_map, $invmap_ref->[-1];
-
- $invlist_ref = \@expanded_list;
- $invmap_ref = \@expanded_map;
- }
-
# If have already tested this property under a different name, merely
# compare the return from now with the saved one from before.
if (exists $tested_invmaps{$name}) {
@@ -1861,11 +1754,6 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
next PROPERTY;
}
}
- elsif ($missing ne "0" && ! grep { $prop eq $_ } keys %legacy_props) {
- fail("prop_invmap('$display_prop')");
- diag("The missings should be '0'; got '$missing'");
- next PROPERTY;
- }
}
elsif ($missing =~ /[<>]/) {
fail("prop_invmap('$display_prop')");
@@ -2060,21 +1948,18 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
? "%04X"
: $file_range_format;
- # Certain of the proxy properties have to be adjusted to match the
- # real ones.
- if ($full_name
- =~ /^(Legacy_)?(Case_Folding|(Lower|Title|Upper)case_Mapping)/)
+ # Combination properties, where the same file contains mappings to both
+ # the simple and full versions, have to be adjusted when looking at
+ # the full versions.
+ if ($full_name =~ /^ ( Case_Folding
+ | (Lower|Title|Upper) case_Mapping )
+ $ /x)
{
-
- # Here we have either
- # 1) Case_Folding; or
- # 2) a proxy that is a full mapping, which means that what the
- # real property is is the equivalent simple mapping.
- # In both cases, the file will have a standard list containing
- # simple mappings (to a single code point), and a specials hash
- # which contains all the mappings that are to multiple code
- # points. First, extract a list containing all the file's simple
- # mappings.
+ # The file will have a standard list containing simple mappings
+ # (to a single code point), and a specials hash which contains all
+ # the mappings that are to multiple code points.
+ #
+ # First, extract a list containing all the file's simple mappings.
my @list;
for (split "\n", $official) {
my ($start, $end, $value) = / ^ (.+?) \t (.*?) \t (.+?)
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 3c1da94b53..5732c616df 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -6717,23 +6717,13 @@ sub trace { return main::trace(@_); }
main::set_access('anomalous_entries', # Append singular, read plural
\%anomalous_entries,
'readable_array');
-
- my %replacement_property;
- # Certain files are unused by Perl itself, and are kept only for backwards
- # compatibility for programs that used them before Unicode::UCD existed.
- # These are termed legacy properties. At some point they may be removed,
- # but for now mark them as legacy. If non empty, this is the name of the
- # property to use instead (i.e., the modern equivalent).
- main::set_access('replacement_property', \%replacement_property, 'r');
-
my %to_output_map;
# Enum as to whether or not to write out this map table, and how:
# 0 don't output
# $EXTERNAL_MAP means its existence is noted in the documentation, and
# it should not be removed nor its format changed. This
# is done for those files that have traditionally been
- # output. Maps of legacy-only properties default to
- # this.
+ # output.
# $INTERNAL_MAP means Perl reserves the right to do anything it wants
# with this file
# $OUTPUT_ADJUSTED means that it is an $INTERNAL_MAP, and instead of
@@ -6758,17 +6748,9 @@ sub trace { return main::trace(@_); }
my $default_map = delete $args{'Default_Map'};
my $property = delete $args{'_Property'};
my $full_name = delete $args{'Full_Name'};
- my $replacement_property = delete $args{'Replacement_Property'} // "";
my $to_output_map = delete $args{'To_Output_Map'};
- # Rest of parameters passed on; legacy properties have several common
- # other attributes
- if ($replacement_property) {
- $args{"Fate"} = $LEGACY_ONLY;
- $args{"Range_Size_1"} = 1;
- $args{"Perl_Extension"} = 1;
- $args{"UCD"} = 0;
- }
+ # Rest of parameters passed on
my $range_list = Range_Map->new(Owner => $property);
@@ -6785,9 +6767,6 @@ sub trace { return main::trace(@_); }
$anomalous_entries{$addr} = [];
$default_map{$addr} = $default_map;
- $replacement_property{$addr} = $replacement_property;
- $to_output_map = $EXTERNAL_MAP if ! defined $to_output_map
- && $replacement_property;
$to_output_map{$addr} = $to_output_map;
$self->initialize($initialize) if defined $initialize;
@@ -6993,12 +6972,8 @@ sub trace { return main::trace(@_); }
$return .= $INTERNAL_ONLY_HEADER;
}
else {
- my $property_name = $self->property->replacement_property;
-
- # The legacy-only properties were gotten above; but there are some
- # other properties whose files are in current use that have fixed
- # formats.
- $property_name = $self->property->full_name unless $property_name;
+ # Other properties have fixed formats.
+ my $property_name = $self->property->full_name;
$return .= <<END;
@@ -9106,10 +9081,8 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
my $addr = do { no overloading; pack 'J', $self; };
# Swash names are used only on either
- # 1) legacy-only properties, because the formats for these are
- # unchangeable, and they have had these lines in them; or
- # 2) regular or internal-only map tables
- # 3) otherwise there should be no access to the
+ # 1) regular or internal-only map tables
+ # 2) otherwise there should be no access to the
# property map table from other parts of Perl.
return if $map{$addr}->fate != $ORDINARY
&& $map{$addr}->fate != $LEGACY_ONLY
@@ -9379,7 +9352,6 @@ sub trace { return main::trace(@_) if main::DEBUG && $to_trace }
initialize
inverse_list
is_empty
- replacement_property
name
note
perl_extension
@@ -12274,26 +12246,6 @@ sub filter_old_style_arabic_shaping {
Carp::my_carp_bug("Need to process UnicodeData before SpecialCasing. Only special casing will be generated.");
}
- # Create a table in the old-style format and with the original
- # file name for backwards compatibility with applications that
- # read it directly. The new tables contain both the simple and
- # full maps, and the old are missing simple maps when there is a
- # conflicting full one. Probably it would have been ok to add
- # those to the legacy version, as was already done in 5.14 to the
- # case folding one, but this was not done, out of an abundance of
- # caution. The tables are set up here before we deal with the
- # full maps so that as we handle those, we can override the simple
- # maps for them in the legacy table, and merely add them in the
- # new-style one.
- my $legacy = Property->new("Legacy_" . $full_casing_full_name,
- File => $full_casing_full_name
- =~ s/case_Mapping//r,
- Format => $HEX_FORMAT,
- Default_Map => $CODE_POINT,
- Initialize => $full_casing_table,
- Replacement_Property => $full_casing_full_name,
- );
-
$full_casing_table->add_comment(join_lines( <<END
This file includes both the simple and full case changing maps. The simple
ones are in the main body of the table below, and the full ones adding to or
@@ -12409,14 +12361,7 @@ END
}
else {
- # The mapping goes into both the legacy table, in which it
- # replaces the simple one...
- $file->insert_adjusted_lines("$fields[0]; Legacy_"
- . $object->full_name
- . "; $fields[$i]");
-
- # ... and the regular table, in which it is additional,
- # beyond the simple mapping.
+ # The mapping is additional, beyond the simple mapping.
$file->insert_adjusted_lines("$fields[0]; "
. $object->name
. "; "
@@ -13705,25 +13650,6 @@ END
$gc->table('Ll')->set_caseless_equivalent($LC);
$gc->table('Lu')->set_caseless_equivalent($LC);
- # Create digit and case fold tables with the original file names for
- # backwards compatibility with applications that read them directly.
- my $Digit = Property->new("Legacy_Perl_Decimal_Digit",
- Default_Map => "",
- File => 'Digit', # Trad. location
- Directory => $map_directory,
- Type => $STRING,
- Replacement_Property => "Perl_Decimal_Digit",
- Initialize => property_ref('Perl_Decimal_Digit'),
- );
- $Digit->add_comment(join_lines(<<END
-This file gives the mapping of all code points which represent a single
-decimal digit [0-9] to their respective digits. For example, the code point
-U+0031 (an ASCII '1') is mapped to a numeric 1. These code points are those
-that have Numeric_Type=Decimal; not special things, like subscripts nor Roman
-numerals.
-END
- ));
-
# Make sure this assumption in perl core code is valid in this Unicode
# release, with known exceptions
foreach my $range (property_ref('Numeric-Type')->table('Decimal')->ranges) {
@@ -13736,16 +13662,6 @@ END
. " and will have to be fixed. Proceeding anyway.");
}
- Property->new('Legacy_Case_Folding',
- File => "Fold",
- Directory => $map_directory,
- Default_Map => $CODE_POINT,
- Type => $STRING,
- Replacement_Property => "Case_Folding",
- Format => $HEX_FORMAT,
- Initialize => property_ref('cf'),
- );
-
# Mark the scx table as the parent of the corresponding sc table for those
# which are identical. This causes the pod for the script table to refer
# to the corresponding scx one. This is done after everything, so as to
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index 4e6246445f..70d486c7b2 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1295,7 +1295,7 @@
# baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
# 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
# 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables
+# 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables
# 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
# 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
# 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
diff --git a/regcharclass.h b/regcharclass.h
index f967d54c5a..27b5845b33 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -3762,7 +3762,7 @@
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables
+ * 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 1aa94679c695efd507b7e4491629dba1021b74c21a5324dfd3a582a5d654bd32 regen/regcharclass.pl
diff --git a/uni_keywords.h b/uni_keywords.h
index 6f09587f07..97bb7b3f72 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7542,7 +7542,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * e1bf3b84f01cb4fec63c65f71c1a1ea1af644d7c142810d17497ff8a23b781c7 lib/unicore/mktables
+ * 497b589915a64625b274215e8b8ca02b7051ecbd4dd85a488807e76a41bc707d lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl