diff options
author | Karl Williamson <khw@cpan.org> | 2016-01-15 22:46:58 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-01-19 15:08:59 -0700 |
commit | ca8226cfa2cc0ddcc50f60505c42078df8e3b766 (patch) | |
tree | 1efe4bdcff33497f9c669cb2b7f87dd881e55480 /lib | |
parent | b83e64846b899f963162217c08dd5ff8cf40303d (diff) | |
download | perl-ca8226cfa2cc0ddcc50f60505c42078df8e3b766.tar.gz |
Make tables for Perl-tailored Unicode Line_Break property
This is in preparation for adding qr/\b{lb}/. This just generates the
tables, and is a separate commit because otherwise the diff listing is
confusing, as it doesn't realize there are only additions. So, even
though the difference listing for this commit for the generated header
file is wildly crazy, the only changes in reality are the addition of
some tables for Line Break.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/unicore/mktables | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 62c24885e1..4f05062b0f 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -15089,6 +15089,59 @@ END } } + # Create a version of the LineBreak property with the mappings that are + # omitted in the default algorithm remapped to what + # http://www.unicode.org/reports/tr14 says they should be. + # + # Original Resolved General_Category + # AI, SG, XX AL Any + # SA CM Only Mn or Mc + # SA AL Any except Mn and Mc + # CJ NS Any + my $perl_lb = property_ref('_Perl_LB'); + if (! defined $perl_lb) { + $perl_lb = Property->new('_Perl_LB', + Fate => $INTERNAL_ONLY, + Perl_Extension => 1, + Directory => $map_directory, + Type => $STRING); + my $lb = property_ref('Line_Break'); + $perl_lb->initialize($lb); + } + $perl_lb->set_default_map('AL'); + + # It's a little iffy relying on Unicode to not change which property value + # synonym they use, but if they do, tests should start failing and we can + # fix this up + for my $range ($perl_lb->ranges) { + my $value = standardize($range->value); + if ( $value eq standardize('Unknown') + || $value eq standardize('XX') + || $value eq standardize('AI') + || $value eq standardize('SG')) + { + $perl_lb->add_map($range->start, $range->end, 'AL', + Replace => $UNCONDITIONALLY); + } + elsif ($value eq standardize('CJ')) { + $perl_lb->add_map($range->start, $range->end, 'NS', + Replace => $UNCONDITIONALLY); + } + elsif ($value eq standardize('SA')) { + for my $i ($range->start .. $range->end) { + my $gc_val = $gc->value_of($i); + if ($gc_val eq 'Mn' || $gc_val eq 'Mc') { + $perl_lb->add_map($i, $i, 'CM', + Replace => $UNCONDITIONALLY); + } + else { + $perl_lb->add_map($i, $i, 'AL', + Replace => $UNCONDITIONALLY); + } + } + } + } + # Here done with all the basic stuff. Ready to populate the information # about each character if annotating them. if ($annotate) { @@ -18839,6 +18892,13 @@ my @input_file_objects = ( Each_Line_Handler => (($v_version lt v3.1.0) ? \&filter_early_ea_lb : undef), + Early => [ "LBsubst.txt", '_Perl_LB', 'AL', + 'AL', # default + + # Don't use _Perl_LB as a synonym for + # Line_Break in later perls, as it is tailored + # and isn't the same as Line_Break + 'ONLY_EARLY' ], ), Input_file->new('EastAsianWidth.txt', v3.0.0, Property => 'East_Asian_Width', |