diff options
author | Karl Williamson <khw@cpan.org> | 2020-03-19 22:02:38 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2020-03-20 07:44:30 -0600 |
commit | c08e9114637298ea7e24716942136ff527afb319 (patch) | |
tree | 6d94d07da6b8ab0dd5aab1d6d3420df15cf79785 | |
parent | fc1663a752718c68d3f49774da8d030b2dbc6ff8 (diff) | |
download | perl-c08e9114637298ea7e24716942136ff527afb319.tar.gz |
mktables: Change named sequences to 5 digits
This makes them correspond to names for single characters, and will make
parsing easier in the next commits.
-rw-r--r-- | charclass_invlists.h | 2 | ||||
-rw-r--r-- | lib/unicore/mktables | 15 | ||||
-rw-r--r-- | lib/unicore/uni_keywords.pl | 2 | ||||
-rw-r--r-- | regcharclass.h | 2 | ||||
-rw-r--r-- | uni_keywords.h | 2 |
5 files changed, 10 insertions, 13 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 1e4ecd5d49..7acc01f931 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -419864,7 +419864,7 @@ static const U8 WB_table[23][23] = { * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables + * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl diff --git a/lib/unicore/mktables b/lib/unicore/mktables index b601d06ba6..456e6d0964 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -12164,15 +12164,12 @@ sub process_NamedSequences($file) { next; } - # Code points below 0x0100 need to be converted to native - $sequence =~ s{ \b 00 ( [0-9A-F]{2} ) \b } - { sprintf("%04X", utf8::unicode_to_native(hex $1)) }gxe - if NON_ASCII_PLATFORM; - - # Note single \t in keeping with special output format of - # Perl_charnames. But it turns out that the code points don't have to - # be 5 digits long, like the rest, based on the internal workings of - # charnames.pm. This could be easily changed for consistency. + # Code points need to be 5 digits long like the other entries in + # Name.pl, for regcomp.c parsing; and the ones below 0x0100 need to be + # converted to native + $sequence = join " ", map { sprintf("%05X", + utf8::unicode_to_native(hex $_)) + } split / /, $sequence; push @named_sequences, "$sequence\n$name\n"; } return; diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 4b42de28ea..0d3291e15c 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1295,7 +1295,7 @@ # baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt # 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt # 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt -# 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables +# d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables # 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version # 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl diff --git a/regcharclass.h b/regcharclass.h index ce81ea586c..40211ccc86 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -2247,7 +2247,7 @@ * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables + * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * f9a393e7add8c7c2728356473ce5b52246d51295b2da0c48fb6f0aa21799e2bb regen/regcharclass.pl diff --git a/uni_keywords.h b/uni_keywords.h index c5744ae83c..ea20c3686d 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7537,7 +7537,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt * 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt * 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt - * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables + * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl |