summaryrefslogtreecommitdiff
path: root/lib/unicore/mktables
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-01-15 22:46:58 -0700
committerKarl Williamson <khw@cpan.org>2016-01-19 15:08:59 -0700
commitca8226cfa2cc0ddcc50f60505c42078df8e3b766 (patch)
tree1efe4bdcff33497f9c669cb2b7f87dd881e55480 /lib/unicore/mktables
parentb83e64846b899f963162217c08dd5ff8cf40303d (diff)
downloadperl-ca8226cfa2cc0ddcc50f60505c42078df8e3b766.tar.gz
Make tables for Perl-tailored Unicode Line_Break property
This is in preparation for adding qr/\b{lb}/. This just generates the tables, and is a separate commit because otherwise the diff listing is confusing, as it doesn't realize there are only additions. So, even though the difference listing for this commit for the generated header file is wildly crazy, the only changes in reality are the addition of some tables for Line Break.
Diffstat (limited to 'lib/unicore/mktables')
-rw-r--r--lib/unicore/mktables60
1 files changed, 60 insertions, 0 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 62c24885e1..4f05062b0f 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -15089,6 +15089,59 @@ END
}
}
+ # Create a version of the LineBreak property with the mappings that are
+ # omitted in the default algorithm remapped to what
+ # http://www.unicode.org/reports/tr14 says they should be.
+ #
+ # Original Resolved General_Category
+ # AI, SG, XX AL Any
+ # SA CM Only Mn or Mc
+ # SA AL Any except Mn and Mc
+ # CJ NS Any
+ my $perl_lb = property_ref('_Perl_LB');
+ if (! defined $perl_lb) {
+ $perl_lb = Property->new('_Perl_LB',
+ Fate => $INTERNAL_ONLY,
+ Perl_Extension => 1,
+ Directory => $map_directory,
+ Type => $STRING);
+ my $lb = property_ref('Line_Break');
+ $perl_lb->initialize($lb);
+ }
+ $perl_lb->set_default_map('AL');
+
+ # It's a little iffy relying on Unicode to not change which property value
+ # synonym they use, but if they do, tests should start failing and we can
+ # fix this up
+ for my $range ($perl_lb->ranges) {
+ my $value = standardize($range->value);
+ if ( $value eq standardize('Unknown')
+ || $value eq standardize('XX')
+ || $value eq standardize('AI')
+ || $value eq standardize('SG'))
+ {
+ $perl_lb->add_map($range->start, $range->end, 'AL',
+ Replace => $UNCONDITIONALLY);
+ }
+ elsif ($value eq standardize('CJ')) {
+ $perl_lb->add_map($range->start, $range->end, 'NS',
+ Replace => $UNCONDITIONALLY);
+ }
+ elsif ($value eq standardize('SA')) {
+ for my $i ($range->start .. $range->end) {
+ my $gc_val = $gc->value_of($i);
+ if ($gc_val eq 'Mn' || $gc_val eq 'Mc') {
+ $perl_lb->add_map($i, $i, 'CM',
+ Replace => $UNCONDITIONALLY);
+ }
+ else {
+ $perl_lb->add_map($i, $i, 'AL',
+ Replace => $UNCONDITIONALLY);
+ }
+ }
+ }
+ }
+
# Here done with all the basic stuff. Ready to populate the information
# about each character if annotating them.
if ($annotate) {
@@ -18839,6 +18892,13 @@ my @input_file_objects = (
Each_Line_Handler => (($v_version lt v3.1.0)
? \&filter_early_ea_lb
: undef),
+ Early => [ "LBsubst.txt", '_Perl_LB', 'AL',
+ 'AL', # default
+
+ # Don't use _Perl_LB as a synonym for
+ # Line_Break in later perls, as it is tailored
+ # and isn't the same as Line_Break
+ 'ONLY_EARLY' ],
),
Input_file->new('EastAsianWidth.txt', v3.0.0,
Property => 'East_Asian_Width',