diff options
author | Karl Williamson <khw@cpan.org> | 2021-09-15 07:36:41 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-09-15 08:48:04 -0600 |
commit | de524f25f5a90dad63fa962cb8585acd86c62a88 (patch) | |
tree | a739b7d284acb2f41fe23247925301a45cf68dba /lib | |
parent | af56221a729795569e62d31e32db88f53f56572c (diff) | |
download | perl-de524f25f5a90dad63fa962cb8585acd86c62a88.tar.gz |
mktables: Split a Line Break equivalence class
This is used for the \b{lb}, and the rule is changing in Unicode 14.0
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicore/mktables | 17 | ||||
-rw-r--r-- | lib/unicore/uni_keywords.pl | 4 |
2 files changed, 19 insertions, 2 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 04e9cf6c24..89a2e15ef5 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -15175,6 +15175,8 @@ END $perl_lb->set_default_map('Alphabetic', 'full_name'); # XX -> AL my $ea = property_ref('East_Asian_Width'); + my $Cn_EP; + $Cn_EP = $ep & $gc->table('Unassigned') if defined $ep; for my $range ($perl_lb->ranges) { my $value = standardize($range->value); @@ -15202,6 +15204,21 @@ END } } } + elsif (defined $ep && $value eq standardize('Ideographic')) { + + # Unicode 14 adds a rule to not break lines before any potential + # EBase, They say that any unassigned code point that is ExtPict, + # is potentially an EBase. In 14.0, all such ones are in the + # ExtPict=ID category. We must split that category for the + # pairwise rule table to work. + for my $i ($range->start .. $range->end) { + if ($Cn_EP->contains($i)) { + $perl_lb->add_map($i, $i, + 'Unassigned_Extended_Pictographic_Ideographic', + Replace => $UNCONDITIONALLY); + } + } + } elsif ( defined $ea && ( $value eq standardize('Close_Parenthesis') || $value eq standardize('Open_Punctuation'))) diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 0ea5d98fc6..51c63044f4 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1295,9 +1295,9 @@ # baba9dfc133e3cb770a89aaf0973b1341fa61c2da6c176baf6428898b3b568d8 lib/unicore/extracted/DLineBreak.txt # 6d4a8c945dd7db83ed617cbb7d937de7f4ecf016ff22970d846e996a7c9a2a5d lib/unicore/extracted/DNumType.txt # 5b7c14380d5cceeaffcfbc18db1ed936391d2af2d51f5a41f1a17b692c77e59b lib/unicore/extracted/DNumValues.txt -# b74c07296be6f14f3b99c92090a9a190188ca6f6afcc46e5d000f1529922120a lib/unicore/mktables +# 4e169849b96b76987a8fc443ef421b44d2dcebbd981bb457f6a13e8af77cdbe2 lib/unicore/mktables # 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version # 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl # 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl -# 5eb9e6c825496cc9aa705e3cd33bc6d5a9657dcca16d4c4acc4824ff30b34a26 regen/mk_invlists.pl +# 4635ff74b13c8f059599be8d0b0e2aea19fefe6ddcbc4c7deef1a3096c91a0dd regen/mk_invlists.pl # ex: set ro: |