diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-12-23 20:35:54 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-12-31 08:27:23 -0700 |
commit | 2d88a86a5910c97496b47b7b7c223f2c9a14b57c (patch) | |
tree | c0125ea6a9b6175c93245c4048773ae82e0f4efc /lib | |
parent | f215ab38f4d9ea2dca08fc71b38db0eb650d5107 (diff) | |
download | perl-2d88a86a5910c97496b47b7b7c223f2c9a14b57c.tar.gz |
Change \p{} matching for above-Unicode code points
http://markmail.org/message/eod7ukhbbh5tnll4 is the beginning of the
thread that led to this commit.
This commit revises the handling of \p{} and \P{} to treat above-Unicode
code points as typical Unicode unassigned ones, and only output a
warning during matching when the answer is arguable under strict Unicode
rules (that is "matched" for \p{}, and "didn't match" for \P{}). The
exception is if the warning category has been made fatal, then it tries
hard to always output the warning. The definition of \p{All} is changed
to be qr/./s, and no warning is issued at all for matching it against
above-Unicode code points.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Unicode/UCD.pm | 92 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 117 | ||||
-rw-r--r-- | lib/unicore/mktables | 118 |
3 files changed, 157 insertions, 170 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 14752ae2b1..e4ae34e270 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -5,7 +5,7 @@ use warnings; no warnings 'surrogate'; # surrogates can be inputs to this use charnames (); -our $VERSION = '0.55'; +our $VERSION = '0.56'; require Exporter; @@ -2138,21 +2138,10 @@ too high for some operations to work; you may wish to use a smaller number for your purposes.) Note that the inversion lists returned by this function can possibly include -non-Unicode code points, that is anything above 0x10FFFF. This is in -contrast to Perl regular expression matches on those code points, in which a -non-Unicode code point always fails to match. For example, both of these have -the same result: - - chr(0x110000) =~ \p{ASCII_Hex_Digit=True} # Fails. - chr(0x110000) =~ \p{ASCII_Hex_Digit=False} # Fails! - -And both raise a warning that a Unicode property is being used on a -non-Unicode code point. It is arguable as to which is the correct thing to do -here. This function has chosen the way opposite to the Perl regular -expression behavior. This allows you to easily flip to the Perl regular -expression way (for you to go in the other direction would be far harder). -Simply add 0x110000 at the end of the non-empty returned list if it isn't -already that value; and pop that value if it is; like: +non-Unicode code points, that is anything above 0x10FFFF. Unicode properties +are not defined on such code points. You might wish to change the output to +not include these. Simply add 0x110000 at the end of the non-empty returned +list if it isn't already that value; and pop that value if it is; like: my @list = prop_invlist("foo"); if (@list) { @@ -2261,19 +2250,18 @@ sub prop_invlist ($;$) { if (defined $hex_end) { # The next item starts with the code point 1 # beyond the end of the range. - push @invlist, hex($hex_end) + 1; + no warnings 'portable'; + my $end = hex $hex_end; + last if $end == $Unicode::UCD::MAX_CP; + push @invlist, $end + 1; } else { # No end of range, is a single code point. push @invlist, $begin + 1; } } - require "unicore/UCD.pl"; - my $FIRST_NON_UNICODE = $MAX_UNICODE_CODEPOINT + 1; - # Could need to be inverted: add or subtract a 0 at the beginning of the - # list. And to keep it from matching non-Unicode, add or subtract the - # first non-unicode code point. + # list. if ($swash->{'INVERT_IT'}) { if (@invlist && $invlist[0] == 0) { shift @invlist; @@ -2281,46 +2269,6 @@ sub prop_invlist ($;$) { else { unshift @invlist, 0; } - if (@invlist && $invlist[-1] == $FIRST_NON_UNICODE) { - pop @invlist; - } - else { - push @invlist, $FIRST_NON_UNICODE; - } - } - - # Here, the list is set up to include only Unicode code points. But, if - # the table is the default one for the property, it should contain all - # non-Unicode code points. First calculate the loose name for the - # property. This is done even for strict-name properties, as the data - # structure that mktables generates for us is set up so that we don't have - # to worry about that. The property-value needs to be split if compound, - # as the loose rules need to be independently calculated on each part. We - # know that it is syntactically valid, or SWASHNEW would have failed. - - $prop = lc $prop; - my ($prop_only, $table) = split /\s*[:=]\s*/, $prop; - if ($table) { - - # May have optional prefixed 'is' - $prop = utf8::_loose_name($prop_only) =~ s/^is//r; - $prop = $utf8::loose_property_name_of{$prop}; - $prop .= "=" . utf8::_loose_name($table); - } - else { - $prop = utf8::_loose_name($prop); - } - if (exists $loose_defaults{$prop}) { - - # Here, is the default table. If a range ended with 10ffff, instead - # continue that range to infinity, by popping the 110000; otherwise, - # add the range from 11000 to infinity - if (! @invlist || $invlist[-1] != $FIRST_NON_UNICODE) { - push @invlist, $FIRST_NON_UNICODE; - } - else { - pop @invlist; - } } return @invlist; @@ -2349,8 +2297,8 @@ or even better, C<"Gc=LC">). Many Unicode properties have more than one name (or alias). C<prop_invmap> understands all of these, including Perl extensions to them. Ambiguities are resolved as described above for L</prop_aliases()>. The Perl internal -property "Perl_Decimal_Digit, described below, is also accepted. C<undef> is -returned if the property name is unknown. +property "Perl_Decimal_Digit, described below, is also accepted. An empty +list is returned if the property name is unknown. See L<perluniprops/Properties accessible through Unicode::UCD> for the properties acceptable as inputs to this function. @@ -3252,6 +3200,7 @@ RETRY: # Find the beginning and end of the range on the line my ($hex_begin, $hex_end, $map) = split "\t", $range; my $begin = hex $hex_begin; + no warnings 'portable'; my $end = (defined $hex_end && $hex_end ne "") ? hex $hex_end : $begin; @@ -3375,7 +3324,7 @@ RETRY: # to the default value. If there is no gap, the next iteration will # pop this, unless there is no next iteration, and we have filled all # of the Unicode code space, so check for that and skip. - if ($end < $MAX_UNICODE_CODEPOINT) { + if ($end < $Unicode::UCD::MAX_CP) { push @invlist, $end + 1; push @invmap, $missing; } @@ -3388,10 +3337,15 @@ RETRY: push @invmap, $missing; } - # And add in standard element that all non-Unicode code points map to: - # $missing - push @invlist, $MAX_UNICODE_CODEPOINT + 1; - push @invmap, $missing; + # The final element is always for just the above-Unicode code points. If + # not already there, add it. It merely splits the current final range + # that extends to infinity into two elements, each with the same map. + # (This is to conform with the API that says the final element is for + # $MAX_UNICODE_CODEPOINT + 1 .. INFINITY.) + if ($invlist[-1] != $MAX_UNICODE_CODEPOINT + 1) { + push @invmap, $invmap[-1]; + push @invlist, $MAX_UNICODE_CODEPOINT + 1; + } # The second component of the map are those values that require # non-standard specification, stored in SPECIALS. These override any diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index c4b5a85098..b2caf8934c 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -1058,25 +1058,12 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of # If we are to test against an inverted file, it is easier to invert # our array than the file. - # The file only is valid for Unicode code points, while the inversion - # list is valid for all possible code points. Therefore, we must test - # just the Unicode part against the file. Later we will test for - # the non-Unicode part. - - my $before_invert; # Saves the pre-inverted table. if ($invert) { - $before_invert = dclone \@tested; if (@tested && $tested[0] == 0) { shift @tested; } else { unshift @tested, 0; } - if (@tested && $tested[-1] == 0x110000) { - pop @tested; - } - else { - push @tested, 0x110000; - } } # Now construct a string from the list that should match the file. @@ -1091,9 +1078,11 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of # otherwise don't get reflected in the file. my $tested = ""; my $i = 0; - for (; $i < @tested - 1; $i += 2) { + for (; $i < @tested; $i += 2) { my $start = $tested[$i]; - my $end = $tested[$i+1] - 1; + my $end = ($i + 1 < @tested) + ? $tested[$i+1] - 1 + : $Unicode::UCD::MAX_CP; if ($start == $end) { $tested .= sprintf("%X\n", $start); } @@ -1102,12 +1091,6 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of } } - # As mentioned earlier, the disk files only go up through Unicode, - # whereas the prop_invlist() ones go as high as necessary. The - # comparison is only valid through max Unicode. - if ($i == @tested - 1 && $tested[$i] <= 0x10FFFF) { - $tested .= sprintf("%X\t10FFFF\n", $tested[$i]); - } local $/ = "\n"; chomp $tested; $/ = $input_record_separator; @@ -1116,50 +1099,6 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of next; } - # Here, it matched the table. Now need to check for if it is correct - # for beyond Unicode. First, calculate if is the default table or - # not. This is the same algorithm as used internally in - # prop_invlist(), so if it is wrong there, this test won't catch it. - my $prop = lc $table; - ($prop_only, $table) = split /\s*[:=]\s*/, $prop; - if (defined $table) { - - # May have optional prefixed 'is' - $prop = &utf8::_loose_name($prop_only) =~ s/^is//r; - $prop = $utf8::loose_property_name_of{$prop}; - $prop .= "=" . &utf8::_loose_name($table); - } - else { - $prop = &utf8::_loose_name($prop); - } - my $is_default = exists $Unicode::UCD::loose_defaults{$prop}; - - @tested = @$before_invert if $invert; # Use the original - if (@tested % 2 == 0) { - - # If there are an even number of elements, the final one starts a - # range (going to infinity) of code points that are not in the - # list. - if ($is_default) { - fail("prop_invlist('$mod_table')"); - diag("default table doesn't goto infinity"); - use Data::Dumper; - diag Dumper \@tested; - next; - } - } - else { - # An odd number of elements means the final one starts a range - # (going to infinity of code points that are in the list. - if (! $is_default) { - fail("prop_invlist('$mod_table')"); - diag("non-default table needs to stop in the Unicode range"); - use Data::Dumper; - diag Dumper \@tested; - next; - } - } - pass("prop_invlist('$mod_table')"); } } @@ -1391,7 +1330,35 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { diag("The last inversion list element is not 0x110000"); next PROPERTY; } - if ($invmap_ref->[-1] ne $missing) { + + my $upper_limit_subtract; + + # prop_invmap() adds an extra element not present in the disk files for + # the above-Unicode code points. For almost all properties, that will be + # to $missing. In that case we don't look further at it when comparing + # with the disk files. + if ($invmap_ref->[-1] eq $missing) { + $upper_limit_subtract = 1; + } + elsif ($invmap_ref->[-1] eq 'Y' && ! grep { $_ !~ /[YN]/ } @$invmap_ref) { + + # But that's not true for a few binary properties like 'Unassigned' + # that are Perl extensions (in this case for Gc=Unassigned) which + # match above-Unicode code points (hence the 'Y' in the test above). + # For properties where it isn't $missing, we're going to want to look + # at the whole thing when comparing with the disk file. + $upper_limit_subtract = 0; + + # In those properties like 'Unassigned, the final element should be + # just a repetition of the next-to-last element, and won't be in the + # disk file, so remove it for the comparison. Otherwise, we will + # compare the whole of the array with the whole of the disk file. + if ($invlist_ref->[-2] <= 0x10FFFF && $invmap_ref->[-2] eq 'Y') { + pop @$invlist_ref; + pop @$invmap_ref; + } + } + else { fail("prop_invmap('$display_prop')"); diag("The last inversion list element is '$invmap_ref->[-1]', and should be '$missing'"); next PROPERTY; @@ -1705,9 +1672,10 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { # it's an error my %specials = %$specials_ref if $specials_ref; - # The extra -1 is because the final element has been tested above to - # be for anything above Unicode. The file doesn't go that high. - for (my $i = 0; $i < @$invlist_ref - 1; $i++) { + # The extra -$upper_limit_subtract is because the final element may + # have been tested above to be for anything above Unicode, in which + # case the file may not go that high. + for (my $i = 0; $i < @$invlist_ref - $upper_limit_subtract; $i++) { # If the map element is a reference, have to stringify it (but # don't do so if the format doesn't allow references, so that an @@ -1899,7 +1867,9 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { # Finally have figured out what the map column in the file should # be. Append the line to the running string. my $start = $invlist_ref->[$i]; - my $end = $invlist_ref->[$i+1] - 1; + my $end = (defined $invlist_ref->[$i+1]) + ? $invlist_ref->[$i+1] - 1 + : $Unicode::UCD::MAX_CP; $end = ($start == $end) ? "" : sprintf($file_range_format, $end); if ($invmap_ref->[$i] ne "") { $tested_map .= sprintf "$file_range_format\t%s\t%s\n", @@ -1999,7 +1969,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { my @code_point_in_names = @Unicode::UCD::code_points_ending_in_code_point; - for my $i (0 .. @$invlist_ref - 1 - 1) { + for my $i (0 .. @$invlist_ref - 1 - $upper_limit_subtract) { my $start = $invlist_ref->[$i]; my $end = $invlist_ref->[$i+1] - 1; if ($invmap_ref->[$i] eq $missing) { @@ -2105,10 +2075,7 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) { my %maps; my $previous_map; - # (The extra -1 is to not look at the final element in the loop, which - # we know is the one that starts just beyond Unicode and goes to - # infinity.) - for my $i (0 .. @$invlist_ref - 1 - 1) { + for my $i (0 .. @$invlist_ref - 1 - $upper_limit_subtract) { my $range_start = $invlist_ref->[$i]; # Because we are sorting into buckets, things could be diff --git a/lib/unicore/mktables b/lib/unicore/mktables index f3d5c83d58..b94433fe2c 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -1200,6 +1200,18 @@ my $MAX_UNICODE_CODEPOINT_STRING = "10FFFF"; my $MAX_UNICODE_CODEPOINT = hex $MAX_UNICODE_CODEPOINT_STRING; my $MAX_UNICODE_CODEPOINTS = $MAX_UNICODE_CODEPOINT + 1; +# We work with above-Unicode code points, up to UV_MAX. But when you get +# that high, above IV_MAX, some operations don't work, and you can easily get +# overflow. Therefore for internal use, we use a much smaller number, +# translating it to UV_MAX only for output. The exact number is immaterial +# (all Unicode code points are treated exactly the same), but the algorithm +# requires it to be at least 2 * $MAX_UNICODE_CODEPOINTS + 1; +my $MAX_WORKING_CODEPOINTS= $MAX_UNICODE_CODEPOINT * 8; +my $MAX_WORKING_CODEPOINT = $MAX_WORKING_CODEPOINTS - 1; +my $MAX_WORKING_CODEPOINT_STRING = sprintf("%X", $MAX_WORKING_CODEPOINT); + +my $MAX_PLATFORM_CODEPOINT = ~0; + # Matches legal code point. 4-6 hex numbers, If there are 6, the first # two must be 10; if there are 5, the first must not be a 0. Written this way # to decrease backtracking. The first regex allows the code point to be at @@ -1531,7 +1543,8 @@ my $UNASSIGNED_TYPE = -2; my $PRIVATE_USE_TYPE = -3; my $NONCHARACTER_TYPE = -4; my $CONTROL_TYPE = -5; -my $UNKNOWN_TYPE = -6; # Used only if there is a bug in this program +my $ABOVE_UNICODE_TYPE = -6; +my $UNKNOWN_TYPE = -7; # Used only if there is a bug in this program sub populate_char_info ($) { # Used only with the $annotate option. Populates the arrays with the @@ -1562,7 +1575,13 @@ sub populate_char_info ($) { my $end; if (! $viacode[$i]) { my $nonchar; - if ($gc-> table('Private_use')->contains($i)) { + if ($i > $MAX_UNICODE_CODEPOINT) { + $viacode[$i] = 'Above-Unicode'; + $annotate_char_type[$i] = $ABOVE_UNICODE_TYPE; + $printable[$i] = 0; + $end = $MAX_WORKING_CODEPOINT; + } + elsif ($gc-> table('Private_use')->contains($i)) { $viacode[$i] = 'Private Use'; $annotate_char_type[$i] = $PRIVATE_USE_TYPE; $printable[$i] = 0; @@ -1715,7 +1734,15 @@ sub clarify_code_point_count ($) { # This is like clarify_number(), but the input is assumed to be a count of # code points, rather than a generic number. - return clarify_number(shift); + my $append = ""; + + my $number = shift; + if ($number > $MAX_UNICODE_CODEPOINTS) { + $number -= ($MAX_WORKING_CODEPOINTS - $MAX_UNICODE_CODEPOINTS); + return "All above-Unicode code points" if $number == 0; + $append = " + all above-Unicode code points"; + } + return clarify_number($number) . $append; } package Carp; @@ -3450,7 +3477,7 @@ sub trace { return main::trace(@_); } # If the range list is empty, return a large value that isn't adjacent # to any that could be in the range list, for simpler tests - return $MAX_UNICODE_CODEPOINT + 2 unless scalar @{$ranges{$addr}}; + return $MAX_WORKING_CODEPOINT + 2 unless scalar @{$ranges{$addr}}; return $ranges{$addr}->[0]->start; } @@ -3729,9 +3756,6 @@ sub trace { return main::trace(@_); } Carp::my_carp_bug("$owner_name_of{$addr}End of range (" . sprintf("%04X", $end) . ") must not be before start (" . sprintf("%04X", $start) . "). No action taken."); return; } - if ($end > $MAX_UNICODE_CODEPOINT && $operation eq '+') { - Carp::my_carp("$owner_name_of{$addr}Warning: Range '" . sprintf("%04X..%04X", $start, $end) . ") is above the Unicode maximum of " . sprintf("%04X", $MAX_UNICODE_CODEPOINT) . ". Adding it anyway"); - } #local $to_trace = 1 if main::DEBUG; if ($operation eq '-') { @@ -4529,8 +4553,8 @@ sub trace { return main::trace(@_); } # And finally, add the gap from the end of the table to the max # possible code point - if ($max < $MAX_UNICODE_CODEPOINT) { - $new->add_range($max + 1, $MAX_UNICODE_CODEPOINT); + if ($max < $MAX_WORKING_CODEPOINT) { + $new->add_range($max + 1, $MAX_WORKING_CODEPOINT); } return $new; } @@ -4819,6 +4843,7 @@ sub trace { return main::trace(@_); } # range. my $end = $set->end; return $end if is_code_point_usable($end, $try_hard); + $end = $MAX_UNICODE_CODEPOINT + 1 if $end > $MAX_UNICODE_CODEPOINT; # End point didn't, work. Start at the beginning and try # every one until find one that does work. @@ -5722,6 +5747,7 @@ END my $next_end; my $next_value; my $offset = 0; + my $invlist_count = 0; my $output_value_in_hex = $self->isa('Map_Table') && ($self->format eq $HEX_ADJUST_FORMAT @@ -5855,9 +5881,16 @@ END # If there is a range if ($start != $end) { - push @OUT, sprintf "$hex_format\t$hex_format", - $start, $end; - if ($value ne "") { + if ($end == $MAX_WORKING_CODEPOINT) { + push @OUT, sprintf "$hex_format\t$hex_format", + $start, + $MAX_PLATFORM_CODEPOINT; + } + else { + push @OUT, sprintf "$hex_format\t$hex_format", + $start, $end; + } + if (length $value) { if ($convert_map_to_from_hex) { $OUT[-1] .= sprintf "\t$hex_format\n", $value; } @@ -5958,8 +5991,15 @@ END } if ($i != $start || $range_end < $end) { - $annotation = sprintf "%04X..%04X", - $i, $range_end; + if ($range_end < $MAX_WORKING_CODEPOINT) + { + $annotation = sprintf "%04X..%04X", + $i, $range_end; + } + else { + $annotation = sprintf "%04X..INFINITY", + $i; + } } else { # Indent if not displaying code points $annotation = " " x 4; @@ -7696,7 +7736,18 @@ END # Get the number of code points matched by each of the tables in this # file, and add underscores for clarity. my $count = $leader->count; - my $string_count = main::clarify_code_point_count($count); + my $unicode_count; + my $non_unicode_string; + if ($count > $MAX_UNICODE_CODEPOINTS) { + $unicode_count = $count - ($MAX_WORKING_CODEPOINT + - $MAX_UNICODE_CODEPOINT); + $non_unicode_string = "All above-Unicode code points match as well, and are also returned"; + } + else { + $unicode_count = $count; + $non_unicode_string = ""; + } + my $string_count = main::clarify_code_point_count($unicode_count); my $loose_count = 0; # how many aliases loosely matched my $compound_name = ""; # ? Are any names compound?, and if so, an @@ -7894,11 +7945,13 @@ END } } # End of looping through all tables + $matches_comment .= "\n$non_unicode_string\n" if $non_unicode_string; + my $code_points; my $match; my $any_of_these; - if ($count == 1) { + if ($unicode_count == 1) { $match = 'matches'; $code_points = 'single code point'; } @@ -12999,7 +13052,7 @@ END # This fills in any missing values with the default. It's not # necessary to do this with binary properties, as the default # is defined completely in terms of the Y table. - $property->add_map(0, $MAX_UNICODE_CODEPOINT, + $property->add_map(0, $MAX_WORKING_CODEPOINT, $default_map, Replace => $NO); } } @@ -13211,8 +13264,9 @@ sub compile_perl() { # 'All' is all code points. As an error check, instead of just setting it # to be that, construct it to be the union of all the major categories $All = $perl->add_match_table('All', - Description => "[\\x{0000}-\\x{$MAX_UNICODE_CODEPOINT_STRING}]", - Matches_All => 1); + Description + => "All code points, including those above Unicode. Same as qr/./s", + Matches_All => 1); foreach my $major_table ($gc->tables) { @@ -13222,10 +13276,10 @@ sub compile_perl() { $All += $major_table; } - if ($All->max != $MAX_UNICODE_CODEPOINT) { + if ($All->max != $MAX_WORKING_CODEPOINT) { Carp::my_carp_bug("Generated highest code point (" . sprintf("%X", $All->max) - . ") doesn't match expected value $MAX_UNICODE_CODEPOINT_STRING.") + . ") doesn't match expected value $MAX_WORKING_CODEPOINT_STRING.") } if ($All->range_count != 1 || $All->min != 0) { Carp::my_carp_bug("Generated table 'All' doesn't match all code points.") @@ -14284,8 +14338,9 @@ END $unassigned_sans_noncharacters &= $nonchars->table('N'); } - for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT; $i++ ) { + for (my $i = 0; $i <= $MAX_UNICODE_CODEPOINT + 1; $i++ ) { $i = populate_char_info($i); # Note sets $i so may cause skips + } } @@ -14937,7 +14992,18 @@ sub make_re_pod_entries($) { my $full_name = $property->full_name; my $count = $input_table->count; - my $string_count = clarify_number($count); + my $unicode_count; + my $non_unicode_string; + if ($count > $MAX_UNICODE_CODEPOINTS) { + $unicode_count = $count - ($MAX_WORKING_CODEPOINT + - $MAX_UNICODE_CODEPOINT); + $non_unicode_string = " plus all above-Unicode code points"; + } + else { + $unicode_count = $count; + $non_unicode_string = ""; + } + my $string_count = clarify_number($unicode_count) . $non_unicode_string; my $status = $input_table->status; my $status_info = $input_table->status_info; my $caseless_equivalent = $input_table->caseless_equivalent; @@ -16840,13 +16906,13 @@ sub write_all_tables() { my $count = $table->count; if ($expected_full) { - if ($count != $MAX_UNICODE_CODEPOINTS) { + if ($count != $MAX_WORKING_CODEPOINTS) { Carp::my_carp("$table matches only " . clarify_number($count) . " Unicode code points but should match " - . clarify_number($MAX_UNICODE_CODEPOINTS) + . clarify_number($MAX_WORKING_CODEPOINTS) . " (off by " - . clarify_number(abs($MAX_UNICODE_CODEPOINTS - $count)) + . clarify_number(abs($MAX_WORKING_CODEPOINTS - $count)) . "). Proceeding anyway."); } |