summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-03-19 22:02:38 -0600
committerKarl Williamson <khw@cpan.org>2020-03-20 07:44:30 -0600
commitc08e9114637298ea7e24716942136ff527afb319 (patch)
tree6d94d07da6b8ab0dd5aab1d6d3420df15cf79785
parentfc1663a752718c68d3f49774da8d030b2dbc6ff8 (diff)
downloadperl-c08e9114637298ea7e24716942136ff527afb319.tar.gz
mktables: Change named sequences to 5 digits
This makes them correspond to names for single characters, and will make parsing easier in the next commits.
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/unicore/mktables15
-rw-r--r--lib/unicore/uni_keywords.pl2
-rw-r--r--regcharclass.h2
-rw-r--r--uni_keywords.h2
5 files changed, 10 insertions, 13 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 1e4ecd5d49..7acc01f931 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -419864,7 +419864,7 @@ static const U8 WB_table[23][23] = {
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+ * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index b601d06ba6..456e6d0964 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -12164,15 +12164,12 @@ sub process_NamedSequences($file) {
next;
}
- # Code points below 0x0100 need to be converted to native
- $sequence =~ s{ \b 00 ( [0-9A-F]{2} ) \b }
- { sprintf("%04X", utf8::unicode_to_native(hex $1)) }gxe
- if NON_ASCII_PLATFORM;
-
- # Note single \t in keeping with special output format of
- # Perl_charnames. But it turns out that the code points don't have to
- # be 5 digits long, like the rest, based on the internal workings of
- # charnames.pm. This could be easily changed for consistency.
+ # Code points need to be 5 digits long like the other entries in
+ # Name.pl, for regcomp.c parsing; and the ones below 0x0100 need to be
+ # converted to native
+ $sequence = join " ", map { sprintf("%05X",
+ utf8::unicode_to_native(hex $_))
+ } split / /, $sequence;
push @named_sequences, "$sequence\n$name\n";
}
return;
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index 4b42de28ea..0d3291e15c 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1295,7 +1295,7 @@
# baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
# 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
# 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+# d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
# 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
# 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
# 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl
diff --git a/regcharclass.h b/regcharclass.h
index ce81ea586c..40211ccc86 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -2247,7 +2247,7 @@
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+ * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* f9a393e7add8c7c2728356473ce5b52246d51295b2da0c48fb6f0aa21799e2bb regen/regcharclass.pl
diff --git a/uni_keywords.h b/uni_keywords.h
index c5744ae83c..ea20c3686d 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7537,7 +7537,7 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
* 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
* 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
- * 4e4bddb6119eca345d54f3c334f1c1472e9e74bfaefd2c02df3ecb6f0f0667b8 lib/unicore/mktables
+ * d595d6b96967567fa57cf477c8cf4a72b456347a8ea054c05847b1fdb3072723 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 6bbad21de0848e0236b02f34f5fa0edd3cdae9ba8173cc9469a5513936b9e728 regen/mk_PL_charclass.pl