8 files changed, 195 insertions, 33 deletions
diff --git a/embed.fnc b/embed.fnc
index 42ab356823..9c4607b5cd 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1480,6 +1480,7 @@ EXpM	|void	|_invlist_invert_prop|NN SV* const invlist
 EXMpR	|SV*	|_new_invlist	|IV initial_size
 EXMpR	|SV*	|_swash_to_invlist	|NN SV* const swash
 EXMpR	|SV*	|_add_range_to_invlist	|NULLOK SV* invlist|const UV start|const UV end
+EXMpR	|SV*	|_setup_canned_invlist|const STRLEN size|const UV element0|NN UV** other_elements_ptr
 EXMp	|void	|_invlist_populate_swatch   |NN SV* const invlist|const UV start|const UV end|NN U8* swatch
 #endif
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C)
diff --git a/embed.h b/embed.h
index 0f8e82c502..c7e9579c0d 100644
--- a/embed.h
+++ b/embed.h
@@ -990,6 +990,7 @@
 #define _invlist_populate_swatch(a,b,c,d)	Perl__invlist_populate_swatch(aTHX_ a,b,c,d)
 #define _invlist_union_maybe_complement_2nd(a,b,c,d)	Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
 #define _new_invlist(a)		Perl__new_invlist(aTHX_ a)
+#define _setup_canned_invlist(a,b,c)	Perl__setup_canned_invlist(aTHX_ a,b,c)
 #define _swash_to_invlist(a)	Perl__swash_to_invlist(aTHX_ a)
 #  endif
 #  if defined(PERL_IN_REGEXEC_C)
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index e4ae34e270..106fe7e678 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -2225,6 +2225,15 @@ sub prop_invlist ($;$) {
 
     my @invlist;
 
+    if ($swash->{'LIST'} =~ /^V/) {
+
+        # A 'V' as the first character marks the input as already an inversion
+        # list, in which case, all we need to do is put the remaining lines
+        # into our array.
+        @invlist = split "\n", $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr;
+        shift @invlist;
+    }
+    else {
     # The input lines look like:
     # 0041\t005A   # [26]
     # 005F
@@ -2259,6 +2268,7 @@ sub prop_invlist ($;$) {
             push @invlist, $begin + 1;
         }
     }
+    }
 
     # Could need to be inverted: add or subtract a 0 at the beginning of the
     # list.
@@ -3173,6 +3183,21 @@ RETRY:
 
     my $requires_adjustment = $format =~ /^a/;
 
+    if ($swash->{'LIST'} =~ /^V/) {
+        @invlist = split "\n", $swash->{'LIST'} =~ s/ \s* (?: \# .* )? $ //xmgr;
+        shift @invlist;
+        foreach my $i (0 .. @invlist - 1) {
+            $invmap[$i] = ($i % 2 == 0) ? 'Y' : 'N'
+        }
+
+        # The map includes lines for all code points; add one for the range
+        # from 0 to the first Y.
+        if ($invlist[0] != 0) {
+            unshift @invlist, 0;
+            unshift @invmap, 'N';
+        }
+    }
+    else {
     # The LIST input lines look like:
     # ...
     # 0374\t\tCommon
@@ -3329,6 +3354,7 @@ RETRY:
             push @invmap, $missing;
         }
     }
+    }
 
     # If the property is empty, make all code points use the value for missing
     # ones.
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index b2caf8934c..0d709b1c15 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -1067,30 +1067,19 @@ foreach my $set_of_tables (\%utf8::stricter_to_file_of, \%utf8::loose_to_file_of
         }
 
         # Now construct a string from the list that should match the file.
-        # The file gives ranges of code points with starting and ending values
-        # in hex, like this:
-        # 41\t5A
-        # 61\t7A
-        # AA
-        # Our list has even numbered elements start ranges that are in the
-        # list, and odd ones that aren't in the list.  Therefore the odd
-        # numbered ones are one beyond the end of the previous range, but
-        # otherwise don't get reflected in the file.
-        my $tested = "";
-        my $i = 0;
-        for (; $i < @tested; $i += 2) {
-            my $start = $tested[$i];
-            my $end = ($i + 1 < @tested)
-                      ? $tested[$i+1] - 1
-                      : $Unicode::UCD::MAX_CP;
-            if ($start == $end) {
-                $tested .= sprintf("%X\n", $start);
-            }
-            else {
-                $tested .= sprintf "%X\t%X\n", $start, $end;
-            }
-        }
-
+        # The file is inversion list format code points, like this:
+        # V1216
+        # 65      # [26]
+        # 91
+        # 192     # [23]
+        # ...
+        # The V indicates it's an inversion list, and is followed immediately
+        # by the number of elements (lines) that follow giving its contents.
+        # The list has even numbered elements (0th, 2nd, ...) start ranges
+        # that are in the list, and odd ones that aren't in the list.
+        # Therefore the odd numbered ones are one beyond the end of the
+        # previous range, but otherwise don't get reflected in the file.
+        my $tested =  join "\n", ("V" . scalar @tested), @tested;
         local $/ = "\n";
         chomp $tested;
         $/ = $input_record_separator;
@@ -1665,6 +1654,11 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
         # appends the next line to the running string.
         my $tested_map = "";
 
+        # For use with files for binary properties only, which are stored in
+        # inversion list format.  This counts the number of data lines in the
+        # file.
+        my $binary_count = 0;
+
         # Create a copy of the file's specials hash.  (It has been undef'd if
         # we know it isn't relevant to this property, so if it exists, it's an
         # error or is relevant).  As we go along, we delete from that copy.
@@ -1870,6 +1864,20 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
             my $end = (defined $invlist_ref->[$i+1])
                       ? $invlist_ref->[$i+1] - 1
                       : $Unicode::UCD::MAX_CP;
+            if ($is_binary) {
+
+                # Files for binary properties are in inversion list format,
+                # without ranges.
+                $tested_map .= "$start\n";
+                $binary_count++;
+
+                # If the final value is infinity, no line for it exists.
+                if ($end < $Unicode::UCD::MAX_CP) {
+                    $tested_map .= ($end + 1) . "\n";
+                    $binary_count++;
+                }
+            }
+            else {
             $end = ($start == $end) ? "" : sprintf($file_range_format, $end);
             if ($invmap_ref->[$i] ne "") {
                 $tested_map .= sprintf "$file_range_format\t%s\t%s\n",
@@ -1881,8 +1889,12 @@ foreach my $prop (sort(keys %props), sort keys %legacy_props) {
             else {
                 $tested_map .= sprintf "$file_range_format\n", $start;
             }
+            }
         } # End of looping over all elements.
 
+        # Binary property files begin with a line count line.
+        $tested_map = "V$binary_count\n$tested_map" if $binary_count;
+
         # Here are done with generating what the file should look like
 
         local $/ = "\n";
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index b94433fe2c..4a58886eee 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -5019,6 +5019,11 @@ sub trace { return main::trace(@_); }
     # The constructor can override the global flag of the same name.
     main::set_access('output_range_counts', \%output_range_counts, 'r');
 
+    my %write_as_invlist;
+    # A boolean set iff the output file for this table is to be in the form of
+    # an inversion list/map.
+    main::set_access('write_as_invlist', \%write_as_invlist, 'r');
+
     my %format;
     # The format of the entries of the table.  This is calculated from the
     # data in the table (or passed in the constructor).  This is an enum e.g.,
@@ -5055,6 +5060,7 @@ sub trace { return main::trace(@_); }
         $range_size_1{$addr} = delete $args{'Range_Size_1'} || 0;
         $caseless_equivalent{$addr} = delete $args{'Caseless_Equivalent'} || 0;
         $fate{$addr} = delete $args{'Fate'} || $ORDINARY;
+        $write_as_invlist{$addr} = delete $args{'Write_As_Invlist'};# No default
         my $ucd = delete $args{'UCD'};
 
         my $description = delete $args{'Description'};
@@ -5572,6 +5578,7 @@ END
         Carp::carp_extra_args(\@_) if main::DEBUG && @_;
 
         my $addr = do { no overloading; pack 'J', $self; };
+        my $write_as_invlist = $write_as_invlist{$addr};
 
         # Start with the header
         my @HEADER = $self->header;
@@ -5624,16 +5631,23 @@ END
         else {
             my $range_size_1 = $range_size_1{$addr};
 
+            # To make it more readable, use a minimum indentation
+            my $comment_indent;
+
             # These are used only in $annotate option
             my $format;         # e.g. $HEX_ADJUST_FORMAT
             my $include_name;   # ? Include the character's name in the
                                 # annotation?
             my $include_cp;     # ? Include its code point
 
-            # To make it more readable, use a minimum indentation
-            my $comment_indent = 16;
-
-            if ($annotate) {
+            if (! $annotate) {
+                $comment_indent = ($self->isa('Map_Table'))
+                                  ? 24
+                                  : ($write_as_invlist)
+                                    ? 8
+                                    : 16;
+            }
+            else {
                 $format = $self->format;
 
                 # The name of the character is output only for tables that
@@ -5651,7 +5665,11 @@ END
                 # the first column
                 $include_cp = ! $range_size_1;
 
-                if ($self->isa('Map_Table')) {
+                if (! $self->isa('Map_Table')) {
+                    $comment_indent = ($write_as_invlist) ? 8 : 16;
+                }
+                else {
+                    $comment_indent = 16;
 
                     # There are just a few short ranges in this table, so no
                     # need to include the code point in the annotation.
@@ -5879,8 +5897,26 @@ END
                         $previous_value = $value;
                     }
 
-                    # If there is a range
-                    if ($start != $end) {
+                    if ($write_as_invlist) {
+
+                        # Inversion list format has a single number per line,
+                        # the starting code point of a range that matches the
+                        # property
+                        push @OUT, $start, "\n";
+                        $invlist_count++;
+
+                        # Add a comment with the size of the range, if
+                        # requested.
+                        if ($output_range_counts{$addr}) {
+                            $OUT[-1] = merge_single_annotation_line(
+                                    $OUT[-1],
+                                    "# ["
+                                      . main::clarify_code_point_count($end - $start + 1)
+                                      . "]\n",
+                                    $comment_indent);
+                        }
+                    }
+                    elsif ($start != $end) { # If there is a range
                         if ($end == $MAX_WORKING_CODEPOINT) {
                             push @OUT, sprintf "$hex_format\t$hex_format",
                                                 $start,
@@ -6116,6 +6152,15 @@ END
                         }
                     }
 
+                    # Add the beginning of the range that doesn't match the
+                    # property, except if the just added match range extends
+                    # to infinity.  We do this after any annotations for the
+                    # match range.
+                    if ($write_as_invlist && $end < $MAX_WORKING_CODEPOINT) {
+                        push @OUT, $end + 1, "\n";
+                        $invlist_count++;
+                    }
+
                     # If we split the range, set up so the next time through
                     # we get the remainder, and redo.
                     if ($next_start) {
@@ -6129,6 +6174,8 @@ END
             } # End of loop through all the table's ranges
 
             push @OUT, @annotation; # Add orphaned annotation, if any
+
+            splice @OUT, 1, 0, "V$invlist_count\n" if $invlist_count;
         }
 
         # Add anything that goes after the main body, but within the here
@@ -6408,6 +6455,7 @@ sub trace { return main::trace(@_); }
                                     Full_Name => $full_name,
                                     _Property => $property,
                                     _Range_List => $range_list,
+                                    Write_As_Invlist => 0,
                                     %args);
 
         my $addr = do { no overloading; pack 'J', $self; };
@@ -7330,6 +7378,7 @@ sub trace { return main::trace(@_); }
                                       _Property => $property,
                                       _Range_List => $range_list,
                                       Format => $EMPTY_FORMAT,
+                                      Write_As_Invlist => 1,
                                       );
         my $addr = do { no overloading; pack 'J', $self; };
 
@@ -8051,14 +8100,25 @@ END
 
         if ($count) {   # The format differs if no code points, and needs no
                         # explanation in that case
+            if ($leader->write_as_invlist) {
                 $comment.= <<END;
 
-The format of the lines of this file is:
+The first data line of this file begins with the letter V to indicate it is in
+inversion list format.  The number following the V gives the number of lines
+remaining.  Each of those remaining lines is a single number representing the
+starting code point of a range which goes up to but not including the number
+on the next line; The 0th, 2nd, 4th... ranges are for code points that match
+the property; the 1st, 3rd, 5th... are ranges of code points that don't match
+the property.  The final line's range extends to the platform's infinity.
 END
-            $comment.= <<END;
+            }
+            else {
+                $comment.= <<END;
+The format of the lines of this file is:
 START\\tSTOP\\twhere START is the starting code point of the range, in hex;
 STOP is the ending point, or if omitted, the range has just one code point.
 END
+            }
             if ($leader->output_range_counts) {
                 $comment .= <<END;
 Numbers in comments in [brackets] indicate how many code points are in the
diff --git a/proto.h b/proto.h
index 32dfa1a7df..086642abd4 100644
--- a/proto.h
+++ b/proto.h
@@ -7093,6 +7093,12 @@ PERL_CALLCONV void	Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, S
 PERL_CALLCONV SV*	Perl__new_invlist(pTHX_ IV initial_size)
 			__attribute__warn_unused_result__;
 
+PERL_CALLCONV SV*	Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0, UV** other_elements_ptr)
+			__attribute__warn_unused_result__
+			__attribute__nonnull__(pTHX_3);
+#define PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST	\
+	assert(other_elements_ptr)
+
 PERL_CALLCONV SV*	Perl__swash_to_invlist(pTHX_ SV* const swash)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1);
diff --git a/regcomp.c b/regcomp.c
index 128ac01f34..0e86710dcd 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8575,6 +8575,34 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
     return invlist;
 }
 
+SV*
+Perl__setup_canned_invlist(pTHX_ const STRLEN size, const UV element0, UV** other_elements_ptr)
+{
+    /* Create and return an inversion list whose contents are to be populated
+     * by the caller.  The caller gives the number of elements (in 'size') and
+     * the very first element ('element0').  This function will set
+     * '*other_elements_ptr' to an array of UVs, where the remaining elements
+     * are to be placed.
+     *
+     * Obviously there is some trust involved that the caller will properly
+     * fill in the other elements of the array.
+     *
+     * (The first element needs to be passed in, as the underlying code does
+     * things differently depending on whether it is zero or non-zero) */
+
+    SV* invlist = _new_invlist(size);
+    bool offset;
+
+    PERL_ARGS_ASSERT__SETUP_CANNED_INVLIST;
+
+    _append_range_to_invlist(invlist, element0, element0);
+    offset = *get_invlist_offset_addr(invlist);
+
+    invlist_set_len(invlist, size, offset);
+    *other_elements_ptr = invlist_array(invlist) + 1;
+    return invlist;
+}
+
 #endif
 
 PERL_STATIC_INLINE SV*
diff --git a/utf8.c b/utf8.c
index e584aaa537..818efb11e6 100644
--- a/utf8.c
+++ b/utf8.c
@@ -4140,6 +4140,33 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
     loc = (char *) l;
     lend = l + lcur;
 
+    if (*l == 'V') {    /*  Inversion list format */
+        char *after_strtol = (char *) lend;
+        UV element0;
+        UV* other_elements_ptr;
+
+        /* The first number is a count of the rest */
+        l++;
+        elements = Strtoul((char *)l, &after_strtol, 10);
+        l = (U8 *) after_strtol;
+
+        /* Get the 0th element, which is needed to setup the inversion list */
+        element0 = (UV) Strtoul((char *)l, &after_strtol, 10);
+        l = (U8 *) after_strtol;
+        invlist = _setup_canned_invlist(elements, element0, &other_elements_ptr);
+        elements--;
+
+        /* Then just populate the rest of the input */
+        while (elements-- > 0) {
+            if (l > lend) {
+                Perl_croak(aTHX_ "panic: Expecting %"UVuf" more elements than available", elements);
+            }
+            *other_elements_ptr++ = (UV) Strtoul((char *)l, &after_strtol, 10);
+            l = (U8 *) after_strtol;
+        }
+    }
+    else {
+
     /* Scan the input to count the number of lines to preallocate array size
      * based on worst possible case, which is each line in the input creates 2
      * elements in the inversion list: 1) the beginning of a range in the list;
@@ -4173,6 +4200,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
 
 	invlist = _add_range_to_invlist(invlist, start, end);
     }
+    }
 
     /* Invert if the data says it should be */
     if (invert_it_svp && SvUV(*invert_it_svp)) {