summaryrefslogtreecommitdiff
path: root/lib/unicore/mktables
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unicore/mktables')
-rw-r--r--lib/unicore/mktables60
1 files changed, 60 insertions, 0 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 62c24885e1..4f05062b0f 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -15089,6 +15089,59 @@ END
}
}
+ # Create a version of the LineBreak property with the mappings that are
+ # omitted in the default algorithm remapped to what
+ # http://www.unicode.org/reports/tr14 says they should be.
+ #
+ # Original Resolved General_Category
+ # AI, SG, XX AL Any
+ # SA CM Only Mn or Mc
+ # SA AL Any except Mn and Mc
+ # CJ NS Any
+ my $perl_lb = property_ref('_Perl_LB');
+ if (! defined $perl_lb) {
+ $perl_lb = Property->new('_Perl_LB',
+ Fate => $INTERNAL_ONLY,
+ Perl_Extension => 1,
+ Directory => $map_directory,
+ Type => $STRING);
+ my $lb = property_ref('Line_Break');
+ $perl_lb->initialize($lb);
+ }
+ $perl_lb->set_default_map('AL');
+
+ # It's a little iffy relying on Unicode to not change which property value
+ # synonym they use, but if they do, tests should start failing and we can
+ # fix this up
+ for my $range ($perl_lb->ranges) {
+ my $value = standardize($range->value);
+ if ( $value eq standardize('Unknown')
+ || $value eq standardize('XX')
+ || $value eq standardize('AI')
+ || $value eq standardize('SG'))
+ {
+ $perl_lb->add_map($range->start, $range->end, 'AL',
+ Replace => $UNCONDITIONALLY);
+ }
+ elsif ($value eq standardize('CJ')) {
+ $perl_lb->add_map($range->start, $range->end, 'NS',
+ Replace => $UNCONDITIONALLY);
+ }
+ elsif ($value eq standardize('SA')) {
+ for my $i ($range->start .. $range->end) {
+ my $gc_val = $gc->value_of($i);
+ if ($gc_val eq 'Mn' || $gc_val eq 'Mc') {
+ $perl_lb->add_map($i, $i, 'CM',
+ Replace => $UNCONDITIONALLY);
+ }
+ else {
+ $perl_lb->add_map($i, $i, 'AL',
+ Replace => $UNCONDITIONALLY);
+ }
+ }
+ }
+ }
+
# Here done with all the basic stuff. Ready to populate the information
# about each character if annotating them.
if ($annotate) {
@@ -18839,6 +18892,13 @@ my @input_file_objects = (
Each_Line_Handler => (($v_version lt v3.1.0)
? \&filter_early_ea_lb
: undef),
+ Early => [ "LBsubst.txt", '_Perl_LB', 'AL',
+ 'AL', # default
+
+ # Don't use _Perl_LB as a synonym for
+ # Line_Break in later perls, as it is tailored
+ # and isn't the same as Line_Break
+ 'ONLY_EARLY' ],
),
Input_file->new('EastAsianWidth.txt', v3.0.0,
Property => 'East_Asian_Width',