diff options
-rw-r--r-- | charclass_invlists.h | 2 | ||||
-rw-r--r-- | lib/unicore/mktables | 87 | ||||
-rw-r--r-- | lib/unicore/uni_keywords.pl | 2 | ||||
-rw-r--r-- | regcharclass.h | 2 | ||||
-rw-r--r-- | uni_keywords.h | 2 |
5 files changed, 81 insertions, 14 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index dde3391a7f..38799d0e28 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -430752,7 +430752,7 @@ static const U8 WB_table[23][23] = { * 43f6df50e4878f501b417e366b0ee097ae5ccb2d4ce942026bed3d62d78e7887 lib/unicore/extracted/DLineBreak.txt * a04502ebb36a45d83cbe48a7d8132ea8143edb7b3d34d0aa6afe4a9685049741 lib/unicore/extracted/DNumType.txt * 11075771b112e8e7ccf6ffa637c4c91eadc3ef3db0517b24e605df8fd3624239 lib/unicore/extracted/DNumValues.txt - * d97aeb4312c8fdc0f44654834108596ecdf5d03c8fca231d6def4338687a89c9 lib/unicore/mktables + * 3f7a81c6f40611d1e68f5e42699368ce95d36ca8a852ca5a252b41cee055391a lib/unicore/mktables * c72bbdeda99714db1c8024d3311da4aef3c0db3b9b9f11455a7cfe10d5e9aba3 lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 4c9dc23bc0..af4ed02869 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -10945,12 +10945,19 @@ sub output_perl_charnames_line ($code_point, $name) { # stored by the Input_file class until we access it here. # It's possible that there is more than one such line # waiting for us; collect them all, and parse - my @missings_list = $file->get_missings + my @missings_list; + @missings_list = $file->get_missings if $file->has_missings_defaults; foreach my $default_ref (@missings_list) { - my $default = $default_ref->{default}; - my $addr = pack 'J', refaddr property_ref($default_ref->{property}); + + # For now, we are only interested in the fallback + # default for the entire property. i.e., an @missing + # line that is for the whole Unicode range. + next if $default_ref->{start} != 0 + || $default_ref->{end} != $MAX_UNICODE_CODEPOINT; + + $default_map = $default_ref->{default}; # For string properties, the default is just what the # file says, but non-string properties should already @@ -10960,16 +10967,76 @@ sub output_perl_charnames_line ($code_point, $name) { if ($property_type == $STRING || $property_type == $UNKNOWN) { - $this_property_info->{$MISSINGS} = $default; + $this_property_info->{$MISSINGS} = $default_map; } else { - $this_property_info->{$MISSINGS} - = $property_object->table($default); + $default_map = + $property_object->table($default_map)->full_name; + $this_property_info->{$MISSINGS} = $default_map; + $this_property_info->{$DEFAULT_MAP} = $default_map; + if (! defined $property_object->default_map) { + $property_object->set_default_map($default_map); + } } } - # Here, we have $default_map defined, possibly in terms of - # $missings, but maybe not, and possibly is a dummy one. + # For later Unicode versions, multiple @missing lines for + # a single property can appear in the files. The first + # always applies to the entire Unicode range, and was + # handled above. The subsequent ones are for smaller + # ranges, and can be read as "But for this range, the + # default is ...". So each overrides all the preceding + # ones for the range it applies to. Typically they apply + # to disjoint ranges, but don't have to. What we do is to + # set them up to work in reverse order, so that after the + # rest of the table is filled, the highest priority + # default range fills in any code points that haven't been + # specified; then the next highest priority one is + # applied, and so forth. + if (@missings_list > 1 && $v_version ge v15.0.0) { + if ($property_type != $ENUM) { + Carp::my_carp_bug("Multiple \@missings lines only" + . " make sense for ENUM-type" + . " properties. Changing type to" + . " that"); + $property_type = $this_property_info->{$TYPE} + = $ENUM; + $property_object->set_type($ENUM); + } + + my $multi = Multi_Default->new(); + + # The overall default should be first on this list, + # and is handled differently than the rest. + $default_map = shift @missings_list; + Carp::my_carp_bug("\@missings needs to be entire range") + if $default_map->{start} != 0 + || $default_map->{end} != $MAX_UNICODE_CODEPOINT; + + # We already have looked at this line above. Use that + # result + $multi->set_final_default($this_property_info-> + {$MISSINGS}); + + # Now get the individual range elements, and add them + # to Multi_Default object + while (@missings_list) { + my $this_entry = pop @missings_list; + my $subrange_default = $this_entry->{default}; + + # Use the short name as a standard + $subrange_default = $property_object-> + table($subrange_default)->short_name; + $multi->append_default($subrange_default, + "Range_List->new(Initialize => Range->new(" + . "$this_entry->{start}, $this_entry->{end}))"); + } + + # Override the property's simple default with this. + $property_object->set_default_map($multi); + } + + if (! $default_map || $property_type != $ENUM) { # Finished storing all the @missings defaults in the # input file so far. Get the one for the current @@ -11066,7 +11133,7 @@ END = $this_property_info->{$DEFAULT_TABLE} = $property_object->table($default_map); } - + } } # End of is first time for this property } # End of switching properties. @@ -13422,7 +13489,7 @@ END # Add mappings to the property for each code point in the list foreach my $range ($list->ranges) { $property->add_map($range->start, $range->end, $default, - Replace => $CROAK); + Replace => $NO); } } diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index bda496f96a..8c82cccf1c 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1320,7 +1320,7 @@ # 43f6df50e4878f501b417e366b0ee097ae5ccb2d4ce942026bed3d62d78e7887 lib/unicore/extracted/DLineBreak.txt # a04502ebb36a45d83cbe48a7d8132ea8143edb7b3d34d0aa6afe4a9685049741 lib/unicore/extracted/DNumType.txt # 11075771b112e8e7ccf6ffa637c4c91eadc3ef3db0517b24e605df8fd3624239 lib/unicore/extracted/DNumValues.txt -# d97aeb4312c8fdc0f44654834108596ecdf5d03c8fca231d6def4338687a89c9 lib/unicore/mktables +# 3f7a81c6f40611d1e68f5e42699368ce95d36ca8a852ca5a252b41cee055391a lib/unicore/mktables # c72bbdeda99714db1c8024d3311da4aef3c0db3b9b9f11455a7cfe10d5e9aba3 lib/unicore/version # 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl # c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl diff --git a/regcharclass.h b/regcharclass.h index 812baaf73e..685427e32e 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -3850,7 +3850,7 @@ * 43f6df50e4878f501b417e366b0ee097ae5ccb2d4ce942026bed3d62d78e7887 lib/unicore/extracted/DLineBreak.txt * a04502ebb36a45d83cbe48a7d8132ea8143edb7b3d34d0aa6afe4a9685049741 lib/unicore/extracted/DNumType.txt * 11075771b112e8e7ccf6ffa637c4c91eadc3ef3db0517b24e605df8fd3624239 lib/unicore/extracted/DNumValues.txt - * d97aeb4312c8fdc0f44654834108596ecdf5d03c8fca231d6def4338687a89c9 lib/unicore/mktables + * 3f7a81c6f40611d1e68f5e42699368ce95d36ca8a852ca5a252b41cee055391a lib/unicore/mktables * c72bbdeda99714db1c8024d3311da4aef3c0db3b9b9f11455a7cfe10d5e9aba3 lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * acc94e4afc339fe2cf2ae74d6e1cbcf2c396328d78e56236ad314eadbfc84125 regen/regcharclass.pl diff --git a/uni_keywords.h b/uni_keywords.h index 9c905f5d6b..8baec415bb 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7677,7 +7677,7 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) { * 43f6df50e4878f501b417e366b0ee097ae5ccb2d4ce942026bed3d62d78e7887 lib/unicore/extracted/DLineBreak.txt * a04502ebb36a45d83cbe48a7d8132ea8143edb7b3d34d0aa6afe4a9685049741 lib/unicore/extracted/DNumType.txt * 11075771b112e8e7ccf6ffa637c4c91eadc3ef3db0517b24e605df8fd3624239 lib/unicore/extracted/DNumValues.txt - * d97aeb4312c8fdc0f44654834108596ecdf5d03c8fca231d6def4338687a89c9 lib/unicore/mktables + * 3f7a81c6f40611d1e68f5e42699368ce95d36ca8a852ca5a252b41cee055391a lib/unicore/mktables * c72bbdeda99714db1c8024d3311da4aef3c0db3b9b9f11455a7cfe10d5e9aba3 lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * c7ff8e0d207d3538c7feb4a1a152b159e5e902d20293b303569ea8323e84633e regen/mk_PL_charclass.pl |