summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/unicore/mktables77
-rw-r--r--lib/unicore/uni_keywords.pl2
-rw-r--r--regcharclass.h2
-rw-r--r--uni_keywords.h2
5 files changed, 65 insertions, 20 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index b9a17d3b6c..f010188578 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -419812,7 +419812,7 @@ static const U8 WB_table[24][24] = {
* 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables
+ * 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 52c680f445..1820ad3a30 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -2375,6 +2375,11 @@ sub trace { return main::trace(@_); }
# giving the first release without this file.
main::set_access('withdrawn', \%withdrawn, 'c');
+ my %ucd;
+ # Some files are not actually part of the Unicode Character Database.
+ # These typically have a different way of indicating their version
+ main::set_access('ucd', \%ucd, 'c');
+
my %in_this_release;
# Calculated value from %first_released and %withdrawn. Are we compiling
# a Unicode release which includes this file?
@@ -2404,6 +2409,7 @@ sub trace { return main::trace(@_); }
$missings{$addr} = [ ];
$early{$addr} = [ ];
$optional{$addr} = [ ];
+ $ucd{$addr} = 1;
# Two positional parameters.
return Carp::carp_too_few_args(\@_, 2) if main::DEBUG && @_ < 2;
@@ -2839,6 +2845,8 @@ END
&& ! $early{$addr}[0]
&& lc($file) ne 'unicodedata.txt')
{
+ my $this_version;
+
if ($file !~ /^Unihan/i) {
# The non-Unihan files started getting version numbers in
@@ -2849,17 +2857,39 @@ END
# numbers are correct.
if ($v_version ge v4.0.1) {
$_ = <$file_handle>; # The version number is in the
- # very first line
- if ($_ !~ / - $string_version \. /x) {
- chomp;
+ # very first line if it is a
+ # UCD file; otherwise, it
+ # might be
+ goto valid_version if $_ =~ / - $string_version \. /x;
+ chomp;
+ if ($ucd{$addr}) {
$_ =~ s/^#\s*//;
# 4.0.1 had some valid files that weren't updated.
- if (! ($v_version eq v4.0.1 && $_ =~ /4\.0\.0/)) {
- die Carp::my_carp("File '$file' is version "
- . "'$_'. It should be "
- . "version $string_version");
+ goto valid_version
+ if $v_version eq v4.0.1 && $_ =~ /4\.0\.0/;
+ $this_version = $_;
+ goto wrong_version;
+ }
+ else {
+ my $BOM = "\x{FEFF}";
+ utf8::encode($BOM);
+ my $BOM_re = qr/ ^ (?:$BOM)? /x;
+
+ while ($_ =~ s/$BOM_re//) { # BOM; seems to be on
+ # many lines in some files!!
+ $_ = <$file_handle>;
+ chomp;
+ if ($_ =~ /^# Version: (.*)/) {
+ $this_version = $1;
+ goto valid_version
+ if $this_version eq $string_version;
+ goto valid_version
+ if "$this_version.0" eq $string_version;
+ goto wrong_version;
+ }
}
+ goto no_version;
}
}
}
@@ -2869,23 +2899,30 @@ END
# 6.0. The version is somewhere in the first comment
# block
while (<$file_handle>) {
- if ($_ !~ /^#/) {
- Carp::my_carp_bug("Could not find the expected "
- . "version info in file '$file'");
- last;
- }
+ goto no_version if $_ !~ /^#/;
chomp;
$_ =~ s/^#\s*//;
next if $_ !~ / version: /x;
- last if $_ =~ /$string_version/;
- die Carp::my_carp("File '$file' is version "
- . "'$_'. It should be "
- . "version $string_version");
+ goto valid_version if $_ =~ /$string_version/;
+ goto wrong_version;
}
+ goto no_version;
+ }
+ else { # Old Unihan; have to assume is valid
+ goto valid_version;
}
+
+ wrong_version:
+ die Carp::my_carp("File '$file' is version "
+ . "'$this_version'. It should be "
+ . "version $string_version");
+ no_version:
+ Carp::my_carp_bug("Could not find the expected "
+ . "version info in file '$file'");
}
}
+ valid_version:
print "$progress_message{$addr}\n" if $verbosity >= $PROGRESS;
# Call any special handler for before the file.
@@ -20167,18 +20204,26 @@ my @input_file_objects = (
Pre_Handler => \&setup_emojidata,
Has_Missings_Defaults => $NOT_IGNORED,
Each_Line_Handler => \&filter_emojidata_line,
+ UCD => 0,
),
Input_file->new("$EMOJI/emoji.txt", v13.0.0,
Has_Missings_Defaults => $NOT_IGNORED,
+ UCD => 0,
+ ),
+ Input_file->new("$EMOJI/ReadMe.txt", v13.0.0,
+ Skip => $Documentation,
+ UCD => 0,
),
Input_file->new('IdStatus.txt', v13.0.0,
Pre_Handler => \&setup_IdStatus,
Property => 'Identifier_Status',
+ UCD => 0,
),
Input_file->new('IdType.txt', v13.0.0,
Pre_Handler => \&setup_IdType,
Each_Line_Handler => \&filter_IdType_line,
Property => 'Identifier_Type',
+ UCD => 0,
),
);
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index e222178691..7fd25b241c 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1295,7 +1295,7 @@
# 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt
# 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
# 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables
+# 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables
# 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
# 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
# 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
diff --git a/regcharclass.h b/regcharclass.h
index f315cb464d..f8e9f0ab68 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -2247,7 +2247,7 @@
* 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables
+ * 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* f9a393e7add8c7c2728356473ce5b52246d51295b2da0c48fb6f0aa21799e2bb regen/regcharclass.pl
diff --git a/uni_keywords.h b/uni_keywords.h
index f754c9dda5..be271a14fc 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7540,7 +7540,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* 0fea35394151afefbb4121b6380db1b480be6f9bafb4eba3382dc292dcf68526 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 45e23c57b8ddcfed895b1b7b8869e79f2336b9c3b2432b55f051b426ab5a15c6 lib/unicore/mktables
+ * 93f508a690aa8949f213d50b573710f0b4a4e843c17283938035ecf19e0220e2 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl