Change format of mktables output binary property tables

mktables now outputs the tables for binary properties as inversion lists, with a size as the first element. This means simpler handling of these tables in the core, including removal of an entire pass over them (it was done just to get the size). These tables are marked as for internal use by the Perl core only, so their format is changeable at will.
author: Karl Williamson <public@khwilliamson.com> 2013-12-24 20:11:23 -0700
committer: Karl Williamson <public@khwilliamson.com> 2013-12-31 08:27:23 -0700
commit: 31aa6e0befef7d9d5586b53de01cc20ca71f9a4b (patch)
tree: 57dc960b4f24bda930539eb512850b1cd7394f7d /lib/Unicode
parent: 2d88a86a5910c97496b47b7b7c223f2c9a14b57c (diff)
download: perl-31aa6e0befef7d9d5586b53de01cc20ca71f9a4b.tar.gz
2 files changed, 62 insertions, 24 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index e4ae34e270..106fe7e678 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -2225,6 +2225,15 @@ sub prop_invlist ($;$) {
 
     my @invlist;
 
+    if ($swash->{'LIST'} =~ /^V/) {
+
+        # A 'V' as the first character marks the input as already an inversion
+        # list, in which case, all we need to do is put the remaining lines
+        # into our array.
+        @invlist = split "\n", $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr;
+        shift @invlist;
+    }
+    else {
     # The input lines look like:
     # 0041\t005A   # [26]
     # 005F
@@ -2259,6 +2268,7 @@ sub prop_invlist ($;$) {
             push @invlist, $begin + 1;
         }
     }
+    }
 
     # Could need to be inverted: add or subtract a 0 at the beginning of the
     # list.
@@ -3173,6 +3183,21 @@ RETRY:
 
     my $requires_adjustment = $format =~ /^a/;
 
+    if ($swash->{'LIST'} =~ /^V/) {
+        @invlist = split "\n", $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr;
+        shift @invlist;
+        foreach my $i (0 .. @invlist - 1) {
+            $invmap[$i] = ($i % 2 == 0) ? 'Y' : 'N'
+        }
+
+        # The map includes lines for all code points; add one for the range
+        # from 0 to the first Y.
+        if ($invlist[0] != 0) {
+            unshift @invlist, 0;
+            unshift @invmap, 'N';
+        }
+    }
+    else {
     # The LIST input lines look like:
     # ...
     # 0374\t\tCommon
@@ -3329,6 +3354,7 @@ RETRY:
             push @invmap, $missing;
         }
     }
+    }
 
     # If the property is empty, make all code points use the value for missing
     # ones.
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index b2caf8934c..0d709b1c15 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -1067,30 +1067,19 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of
         }
 
         # Now construct a string from the list that should match the file.
-        # The file gives ranges of code points with starting and ending values
-        # in hex, like this:
-        # 41\t5A
-        # 61\t7A
-        # AA
-        # Our list has even numbered elements start ranges that are in the
-        # list, and odd ones that aren't in the list.  Therefore the odd
-        # numbered ones are one beyond the end of the previous range, but
-        # otherwise don't get reflected in the file.
-        my $tested = "";
-        my $i = 0;
-        for (; $i < @tested; $i += 2) {
-            my $start = $tested[$i];
-            my $end = ($i + 1 < @tested)
-                      ? $tested[$i+1] - 1
-                      : $Unicode::UCD::MAX_CP;
-            if ($start == $end) {
-                $tested .= sprintf("%X\n", $start);
-            }
-            else {
-                $tested .= sprintf "%X\t%X\n", $start, $end;
-            }
-        }
-
+        # The file is inversion list format code points, like this:
+        # V1216
+        # 65      # [26]
+        # 91
+        # 192     # [23]
+        # ...
+        # The V indicates it's an inversion list, and is followed immediately
+        # by the number of elements (lines) that follow giving its contents.
+        # The list has even numbered elements (0th, 2nd, ...) start ranges
+        # that are in the list, and odd ones that aren't in the list.
+        # Therefore the odd numbered ones are one beyond the end of the
+        # previous range, but otherwise don't get reflected in the file.
+        my $tested =  join "\n", ("V" . scalar @tested), @tested;
         local $/ = "\n";
         chomp $tested;
         $/ = $input_record_separator;
@@ -1665,6 +1654,11 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
         # appends the next line to the running string.
         my $tested_map = "";
 
+        # For use with files for binary properties only, which are stored in
+        # inversion list format.  This counts the number of data lines in the
+        # file.
+        my $binary_count = 0;
+
         # Create a copy of the file's specials hash.  (It has been undef'd if
         # we know it isn't relevant to this property, so if it exists, it's an
         # error or is relevant).  As we go along, we delete from that copy.
@@ -1870,6 +1864,20 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
             my $end = (defined $invlist_ref->[$i+1])
                       ? $invlist_ref->[$i+1] - 1
                       : $Unicode::UCD::MAX_CP;
+            if ($is_binary) {
+
+                # Files for binary properties are in inversion list format,
+                # without ranges.
+                $tested_map .= "$start\n";
+                $binary_count++;
+
+                # If the final value is infinity, no line for it exists.
+                if ($end < $Unicode::UCD::MAX_CP) {
+                    $tested_map .= ($end + 1) . "\n";
+                    $binary_count++;
+                }
+            }
+            else {
             $end = ($start == $end) ? "" : sprintf($file_range_format, $end);
             if ($invmap_ref->[$i] ne "") {
                 $tested_map .= sprintf "$file_range_format\t%s\t%s\n",
@@ -1881,8 +1889,12 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
             else {
                 $tested_map .= sprintf "$file_range_format\n", $start;
             }
+            }
         } # End of looping over all elements.
 
+        # Binary property files begin with a line count line.
+        $tested_map = "V$binary_count\n$tested_map" if $binary_count;
+
         # Here are done with generating what the file should look like
 
         local $/ = "\n";
author	Karl Williamson <public@khwilliamson.com>	2013-12-24 20:11:23 -0700
committer	Karl Williamson <public@khwilliamson.com>	2013-12-31 08:27:23 -0700
commit	31aa6e0befef7d9d5586b53de01cc20ca71f9a4b (patch)
tree	57dc960b4f24bda930539eb512850b1cd7394f7d /lib/Unicode
parent	2d88a86a5910c97496b47b7b7c223f2c9a14b57c (diff)
download	perl-31aa6e0befef7d9d5586b53de01cc20ca71f9a4b.tar.gz