Make tables for Perl-tailored Unicode Line_Break property

This is in preparation for adding qr/\b{lb}/. This just generates the tables, and is a separate commit because otherwise the diff listing is confusing, as it doesn't realize there are only additions. So, even though the difference listing for this commit for the generated header file is wildly crazy, the only changes in reality are the addition of some tables for Line Break.
author: Karl Williamson <khw@cpan.org> 2016-01-15 22:46:58 -0700
committer: Karl Williamson <khw@cpan.org> 2016-01-19 15:08:59 -0700
commit: ca8226cfa2cc0ddcc50f60505c42078df8e3b766 (patch)
tree: 1efe4bdcff33497f9c669cb2b7f87dd881e55480 /lib/unicore/mktables
parent: b83e64846b899f963162217c08dd5ff8cf40303d (diff)
download: perl-ca8226cfa2cc0ddcc50f60505c42078df8e3b766.tar.gz
1 files changed, 60 insertions, 0 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 62c24885e1..4f05062b0f 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -15089,6 +15089,59 @@ END
         }
     }
 
+    # Create a version of the LineBreak property with the mappings that are
+    # omitted in the default algorithm remapped to what
+    # http://www.unicode.org/reports/tr14 says they should be.
+    #
+    # Original 	   Resolved  General_Category
+    # AI, SG, XX      AL      Any
+    # SA              CM      Only Mn or Mc
+    # SA              AL      Any except Mn and Mc
+    # CJ              NS      Any
+    my $perl_lb = property_ref('_Perl_LB');
+    if (! defined $perl_lb) {
+        $perl_lb = Property->new('_Perl_LB',
+                                 Fate => $INTERNAL_ONLY,
+                                 Perl_Extension => 1,
+                                 Directory => $map_directory,
+                                 Type => $STRING);
+        my $lb = property_ref('Line_Break');
+        $perl_lb->initialize($lb);
+    }
+    $perl_lb->set_default_map('AL');
+
+    # It's a little iffy relying on Unicode to not change which property value
+    # synonym they use, but if they do, tests should start failing and we can
+    # fix this up
+    for my $range ($perl_lb->ranges) {
+        my $value = standardize($range->value);
+        if (   $value eq standardize('Unknown')
+            || $value eq standardize('XX')
+            || $value eq standardize('AI')
+            || $value eq standardize('SG'))
+        {
+            $perl_lb->add_map($range->start, $range->end, 'AL',
+                              Replace => $UNCONDITIONALLY);
+        }
+        elsif ($value eq standardize('CJ')) {
+            $perl_lb->add_map($range->start, $range->end, 'NS',
+                              Replace => $UNCONDITIONALLY);
+        }
+        elsif ($value eq standardize('SA')) {
+            for my $i ($range->start .. $range->end) {
+                my $gc_val = $gc->value_of($i);
+                if ($gc_val eq 'Mn' || $gc_val eq 'Mc') {
+                    $perl_lb->add_map($i, $i, 'CM',
+                                      Replace => $UNCONDITIONALLY);
+                }
+                else {
+                    $perl_lb->add_map($i, $i, 'AL',
+                                      Replace => $UNCONDITIONALLY);
+                }
+            }
+        }
+    }
+
     # Here done with all the basic stuff.  Ready to populate the information
     # about each character if annotating them.
     if ($annotate) {
@@ -18839,6 +18892,13 @@ my @input_file_objects = (
                     Each_Line_Handler => (($v_version lt v3.1.0)
                                         ? \&filter_early_ea_lb
                                         : undef),
+                    Early => [ "LBsubst.txt", '_Perl_LB', 'AL',
+                               'AL', # default
+
+                               # Don't use _Perl_LB as a synonym for
+                               # Line_Break in later perls, as it is tailored
+                               # and isn't the same as Line_Break
+                               'ONLY_EARLY' ],
                    ),
     Input_file->new('EastAsianWidth.txt', v3.0.0,
                     Property => 'East_Asian_Width',
author	Karl Williamson <khw@cpan.org>	2016-01-15 22:46:58 -0700
committer	Karl Williamson <khw@cpan.org>	2016-01-19 15:08:59 -0700
commit	ca8226cfa2cc0ddcc50f60505c42078df8e3b766 (patch)
tree	1efe4bdcff33497f9c669cb2b7f87dd881e55480 /lib/unicore/mktables
parent	b83e64846b899f963162217c08dd5ff8cf40303d (diff)
download	perl-ca8226cfa2cc0ddcc50f60505c42078df8e3b766.tar.gz