summaryrefslogtreecommitdiff
path: root/lib/Unicode/UCD.t
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Unicode/UCD.t')
-rw-r--r--lib/Unicode/UCD.t117
1 files changed, 42 insertions, 75 deletions
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index c4b5a85098..b2caf8934c 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -1058,25 +1058,12 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of
# If we are to test against an inverted file, it is easier to invert
# our array than the file.
- # The file only is valid for Unicode code points, while the inversion
- # list is valid for all possible code points. Therefore, we must test
- # just the Unicode part against the file. Later we will test for
- # the non-Unicode part.
-
- my $before_invert; # Saves the pre-inverted table.
if ($invert) {
- $before_invert = dclone \@tested;
if (@tested && $tested[0] == 0) {
shift @tested;
} else {
unshift @tested, 0;
}
- if (@tested && $tested[-1] == 0x110000) {
- pop @tested;
- }
- else {
- push @tested, 0x110000;
- }
}
# Now construct a string from the list that should match the file.
@@ -1091,9 +1078,11 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of
# otherwise don't get reflected in the file.
my $tested = "";
my $i = 0;
- for (; $i < @tested - 1; $i += 2) {
+ for (; $i < @tested; $i += 2) {
my $start = $tested[$i];
- my $end = $tested[$i+1] - 1;
+ my $end = ($i + 1 < @tested)
+ ? $tested[$i+1] - 1
+ : $Unicode::UCD::MAX_CP;
if ($start == $end) {
$tested .= sprintf("%X\n", $start);
}
@@ -1102,12 +1091,6 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of
}
}
- # As mentioned earlier, the disk files only go up through Unicode,
- # whereas the prop_invlist() ones go as high as necessary. The
- # comparison is only valid through max Unicode.
- if ($i == @tested - 1 && $tested[$i] <= 0x10FFFF) {
- $tested .= sprintf("%X\t10FFFF\n", $tested[$i]);
- }
local $/ = "\n";
chomp $tested;
$/ = $input_record_separator;
@@ -1116,50 +1099,6 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of
next;
}
- # Here, it matched the table. Now need to check for if it is correct
- # for beyond Unicode. First, calculate if is the default table or
- # not. This is the same algorithm as used internally in
- # prop_invlist(), so if it is wrong there, this test won't catch it.
- my $prop = lc $table;
- ($prop_only, $table) = split /\s*[:=]\s*/, $prop;
- if (defined $table) {
-
- # May have optional prefixed 'is'
- $prop = &utf8::_loose_name($prop_only) =~ s/^is//r;
- $prop = $utf8::loose_property_name_of{$prop};
- $prop .= "=" . &utf8::_loose_name($table);
- }
- else {
- $prop = &utf8::_loose_name($prop);
- }
- my $is_default = exists $Unicode::UCD::loose_defaults{$prop};
-
- @tested = @$before_invert if $invert; # Use the original
- if (@tested % 2 == 0) {
-
- # If there are an even number of elements, the final one starts a
- # range (going to infinity) of code points that are not in the
- # list.
- if ($is_default) {
- fail("prop_invlist('$mod_table')");
- diag("default table doesn't goto infinity");
- use Data::Dumper;
- diag Dumper \@tested;
- next;
- }
- }
- else {
- # An odd number of elements means the final one starts a range
- # (going to infinity of code points that are in the list.
- if (! $is_default) {
- fail("prop_invlist('$mod_table')");
- diag("non-default table needs to stop in the Unicode range");
- use Data::Dumper;
- diag Dumper \@tested;
- next;
- }
- }
-
pass("prop_invlist('$mod_table')");
}
}
@@ -1391,7 +1330,35 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
diag("The last inversion list element is not 0x110000");
next PROPERTY;
}
- if ($invmap_ref->[-1] ne $missing) {
+
+ my $upper_limit_subtract;
+
+ # prop_invmap() adds an extra element not present in the disk files for
+ # the above-Unicode code points. For almost all properties, that will be
+ # to $missing. In that case we don't look further at it when comparing
+ # with the disk files.
+ if ($invmap_ref->[-1] eq $missing) {
+ $upper_limit_subtract = 1;
+ }
+ elsif ($invmap_ref->[-1] eq 'Y' && ! grep { $_ !~ /[YN]/ } @$invmap_ref) {
+
+ # But that's not true for a few binary properties like 'Unassigned'
+ # that are Perl extensions (in this case for Gc=Unassigned) which
+ # match above-Unicode code points (hence the 'Y' in the test above).
+ # For properties where it isn't $missing, we're going to want to look
+ # at the whole thing when comparing with the disk file.
+ $upper_limit_subtract = 0;
+
+ # In those properties like 'Unassigned, the final element should be
+ # just a repetition of the next-to-last element, and won't be in the
+ # disk file, so remove it for the comparison. Otherwise, we will
+ # compare the whole of the array with the whole of the disk file.
+ if ($invlist_ref->[-2] <= 0x10FFFF && $invmap_ref->[-2] eq 'Y') {
+ pop @$invlist_ref;
+ pop @$invmap_ref;
+ }
+ }
+ else {
fail("prop_invmap('$display_prop')");
diag("The last inversion list element is '$invmap_ref->[-1]', and should be '$missing'");
next PROPERTY;
@@ -1705,9 +1672,10 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
# it's an error
my %specials = %$specials_ref if $specials_ref;
- # The extra -1 is because the final element has been tested above to
- # be for anything above Unicode. The file doesn't go that high.
- for (my $i = 0; $i < @$invlist_ref - 1; $i++) {
+ # The extra -$upper_limit_subtract is because the final element may
+ # have been tested above to be for anything above Unicode, in which
+ # case the file may not go that high.
+ for (my $i = 0; $i < @$invlist_ref - $upper_limit_subtract; $i++) {
# If the map element is a reference, have to stringify it (but
# don't do so if the format doesn't allow references, so that an
@@ -1899,7 +1867,9 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
# Finally have figured out what the map column in the file should
# be. Append the line to the running string.
my $start = $invlist_ref->[$i];
- my $end = $invlist_ref->[$i+1] - 1;
+ my $end = (defined $invlist_ref->[$i+1])
+ ? $invlist_ref->[$i+1] - 1
+ : $Unicode::UCD::MAX_CP;
$end = ($start == $end) ? "" : sprintf($file_range_format, $end);
if ($invmap_ref->[$i] ne "") {
$tested_map .= sprintf "$file_range_format\t%s\t%s\n",
@@ -1999,7 +1969,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
my @code_point_in_names =
@Unicode::UCD::code_points_ending_in_code_point;
- for my $i (0 .. @$invlist_ref - 1 - 1) {
+ for my $i (0 .. @$invlist_ref - 1 - $upper_limit_subtract) {
my $start = $invlist_ref->[$i];
my $end = $invlist_ref->[$i+1] - 1;
if ($invmap_ref->[$i] eq $missing) {
@@ -2105,10 +2075,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
my %maps;
my $previous_map;
- # (The extra -1 is to not look at the final element in the loop, which
- # we know is the one that starts just beyond Unicode and goes to
- # infinity.)
- for my $i (0 .. @$invlist_ref - 1 - 1) {
+ for my $i (0 .. @$invlist_ref - 1 - $upper_limit_subtract) {
my $range_start = $invlist_ref->[$i];
# Because we are sorting into buckets, things could be