diff options
-rw-r--r-- | charclass_invlists.h | 2 | ||||
-rw-r--r-- | lib/unicore/mktables | 77 | ||||
-rw-r--r-- | lib/unicore/uni_keywords.pl | 2 | ||||
-rw-r--r-- | regcharclass.h | 2 | ||||
-rw-r--r-- | uni_keywords.h | 2 |
5 files changed, 65 insertions, 20 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index b9a17d3b6c..f010188578 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -419812,7 +419812,7 @@ static const U8 WB_table[24][24] = { * 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables + * 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 52c680f445..1820ad3a30 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -2375,6 +2375,11 @@ sub trace { return main::trace(@_); } # giving the first release without this file. main::set_access('withdrawn', \%withdrawn, 'c'); + my %ucd; + # Some files are not actually part of the Unicode Character Database. + # These typically have a different way of indicating their version + main::set_access('ucd', \%ucd, 'c'); + my %in_this_release; # Calculated value from %first_released and %withdrawn. Are we compiling # a Unicode release which includes this file? @@ -2404,6 +2409,7 @@ sub trace { return main::trace(@_); } $missings{$addr} = [ ]; $early{$addr} = [ ]; $optional{$addr} = [ ]; + $ucd{$addr} = 1; # Two positional parameters. return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2; @@ -2839,6 +2845,8 @@ END && ! $early{$addr}[0] && lc($file) ne 'unicodedata.txt') { + my $this_version; + if ($file !~ /^Unihan/i) { # The non-Unihan files started getting version numbers in @@ -2849,17 +2857,39 @@ END # numbers are correct. if ($v_version ge v4.0.1) { $_ = <$file_handle>; # The version number is in the - # very first line - if ($_ !~ / - $string_version \. /x) { - chomp; + # very first line if it is a + # UCD file; otherwise, it + # might be + goto valid_version if $_ =~ / - $string_version \. /x; + chomp; + if ($ucd{$addr}) { $_ =~ s/^#\s*//; # 4.0.1 had some valid files that weren't updated. - if (! ($v_version eq v4.0.1 && $_ =~ /4\.0\.0/)) { - die Carp::my_carp("File '$file' is version " - . "'$_'. It should be " - . "version $string_version"); + goto valid_version + if $v_version eq v4.0.1 && $_ =~ /4\.0\.0/; + $this_version = $_; + goto wrong_version; + } + else { + my $BOM = "\x{FEFF}"; + utf8::encode($BOM); + my $BOM_re = qr/ ^ (?:$BOM)? /x; + + while ($_ =~ s/$BOM_re//) { # BOM; seems to be on + # many lines in some files!! + $_ = <$file_handle>; + chomp; + if ($_ =~ /^# Version: (.*)/) { + $this_version = $1; + goto valid_version + if $this_version eq $string_version; + goto valid_version + if "$this_version.0" eq $string_version; + goto wrong_version; + } } + goto no_version; } } } @@ -2869,23 +2899,30 @@ END # 6.0. The version is somewhere in the first comment # block while (<$file_handle>) { - if ($_ !~ /^#/) { - Carp::my_carp_bug("Could not find the expected " - . "version info in file '$file'"); - last; - } + goto no_version if $_ !~ /^#/; chomp; $_ =~ s/^#\s*//; next if $_ !~ / version: /x; - last if $_ =~ /$string_version/; - die Carp::my_carp("File '$file' is version " - . "'$_'. It should be " - . "version $string_version"); + goto valid_version if $_ =~ /$string_version/; + goto wrong_version; } + goto no_version; + } + else { # Old Unihan; have to assume is valid + goto valid_version; } + + wrong_version: + die Carp::my_carp("File '$file' is version " + . "'$this_version'. It should be " + . "version $string_version"); + no_version: + Carp::my_carp_bug("Could not find the expected " + . "version info in file '$file'"); } } + valid_version: print "$progress_message{$addr}\n" if $verbosity >= $PROGRESS; # Call any special handler for before the file. @@ -20167,18 +20204,26 @@ my @input_file_objects = ( Pre_Handler => \&setup_emojidata, Has_Missings_Defaults => $NOT_IGNORED, Each_Line_Handler => \&filter_emojidata_line, + UCD => 0, ), Input_file->new("$EMOJI/emoji.txt", v13.0.0, Has_Missings_Defaults => $NOT_IGNORED, + UCD => 0, + ), + Input_file->new("$EMOJI/ReadMe.txt", v13.0.0, + Skip => $Documentation, + UCD => 0, ), Input_file->new('IdStatus.txt', v13.0.0, Pre_Handler => \&setup_IdStatus, Property => 'Identifier_Status', + UCD => 0, ), Input_file->new('IdType.txt', v13.0.0, Pre_Handler => \&setup_IdType, Each_Line_Handler => \&filter_IdType_line, Property => 'Identifier_Type', + UCD => 0, ), ); diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index e222178691..7fd25b241c 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1295,7 +1295,7 @@ # 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt # 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt # 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt -# 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables +# 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables # 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl diff --git a/regcharclass.h b/regcharclass.h index f315cb464d..f8e9f0ab68 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -2247,7 +2247,7 @@ * 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables + * 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * f9a393e7add8c7c2728356473ce5b52246d51295b2da0c48fb6f0aa21799e2bb regen/regcharclass.pl diff --git a/uni_keywords.h b/uni_keywords.h index f754c9dda5..be271a14fc 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7540,7 +7540,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables + * 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl |