summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-09-15 07:36:41 -0600
committerKarl Williamson <khw@cpan.org>2021-09-15 08:48:04 -0600
commitde524f25f5a90dad63fa962cb8585acd86c62a88 (patch)
treea739b7d284acb2f41fe23247925301a45cf68dba /lib
parentaf56221a729795569e62d31e32db88f53f56572c (diff)
downloadperl-de524f25f5a90dad63fa962cb8585acd86c62a88.tar.gz
mktables: Split a Line Break equivalence class
This is used for the \b{lb}, and the rule is changing in Unicode 14.0
Diffstat (limited to 'lib')
-rw-r--r--lib/unicore/mktables17
-rw-r--r--lib/unicore/uni_keywords.pl4
2 files changed, 19 insertions, 2 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 04e9cf6c24..89a2e15ef5 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -15175,6 +15175,8 @@ END
$perl_lb->set_default_map('Alphabetic', 'full_name'); # XX -> AL
my $ea = property_ref('East_Asian_Width');
+ my $Cn_EP;
+ $Cn_EP = $ep & $gc->table('Unassigned') if defined $ep;
for my $range ($perl_lb->ranges) {
my $value = standardize($range->value);
@@ -15202,6 +15204,21 @@ END
}
}
}
+ elsif (defined $ep && $value eq standardize('Ideographic')) {
+
+ # Unicode 14 adds a rule to not break lines before any potential
+ # EBase, They say that any unassigned code point that is ExtPict,
+ # is potentially an EBase. In 14.0, all such ones are in the
+ # ExtPict=ID category. We must split that category for the
+ # pairwise rule table to work.
+ for my $i ($range->start .. $range->end) {
+ if ($Cn_EP->contains($i)) {
+ $perl_lb->add_map($i, $i,
+ 'Unassigned_Extended_Pictographic_Ideographic',
+ Replace => $UNCONDITIONALLY);
+ }
+ }
+ }
elsif ( defined $ea
&& ( $value eq standardize('Close_Parenthesis')
|| $value eq standardize('Open_Punctuation')))
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
index 0ea5d98fc6..51c63044f4 100644
--- a/lib/unicore/uni_keywords.pl
+++ b/lib/unicore/uni_keywords.pl
@@ -1295,9 +1295,9 @@
# baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt
# 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt
# 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt
-# b74c07296be6f14f3b99c92090a9a190188ca6f6afcc46e5d000f1529922120a lib/unicore/mktables
+# 4e169849b96b76987a8fc443ef421b44d2dcebbd981bb457f6a13e8af77cdbe2 lib/unicore/mktables
# 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
# 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
# 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
-# 5eb9e6c825496cc9aa705e3cd33bc6d5a9657dcca16d4c4acc4824ff30b34a26 regen/mk_invlists.pl
+# 4635ff74b13c8f059599be8d0b0e2aea19fefe6ddcbc4c7deef1a3096c91a0dd regen/mk_invlists.pl
# ex: set ro: