t/re/uniprops.t: Add more description for \b{} tests

mktables generates a file of tests used in t/re/uniprops.t. The tests furnished by Unicode for the boundaries like \b{gcb} have comments that indicate the rules each test is testing. These are useful in debugging. This commit changes things so the generated file that includes these Unicode-supplied tests also has the corresponding comments which are output as part of the test descriptions.
author: Karl Williamson <khw@cpan.org> 2016-06-09 15:35:15 -0600
committer: Karl Williamson <khw@cpan.org> 2016-06-21 18:10:38 -0600
commit: 4e058f448f14936d2650df689211ba416bb627ba (patch)
tree: 361c11998c2ae254f8687e0230eff36468f6ed60 /lib/unicore
parent: c92b26e114702cae63b26bfe6517283589a04296 (diff)
download: perl-4e058f448f14936d2650df689211ba416bb627ba.tar.gz
1 files changed, 42 insertions, 4 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index bea8739418..5d49fb7714 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -2198,6 +2198,15 @@ sub trace { return main::trace(@_); }
     # 'handler'
     main::set_access('each_line_handler', \%each_line_handler, 'c');
 
+    my %retain_trailing_comments;
+    # This is used to not discard the comments that end data lines.  This
+    # would be used only for files with non-typical syntax, and most code here
+    # assumes that comments have been stripped, so special handlers would have
+    # to be written.  It is assumed that the code will use these in
+    # single-quoted contexts, and so any "'" marks in the comment will be
+    # prefixed by a backslash.
+    main::set_access('retain_trailing_comments', \%retain_trailing_comments, 'c');
+
     my %properties; # Optional ordered list of the properties that occur in each
     # meaningful line of the input file.  If present, an appropriate
     # each_line_handler() is automatically generated and pushed onto the stack
@@ -2355,6 +2364,7 @@ sub trace { return main::trace(@_); }
 
         # Set defaults
         $handler{$addr} = \&main::process_generic_property_file;
+        $retain_trailing_comments{$addr} = 0;
         $non_skip{$addr} = 0;
         $skip{$addr} = undef;
         $has_missings_defaults{$addr} = $NO_DEFAULTS;
@@ -3020,9 +3030,21 @@ END
                 next;
             }
 
-            # Remove comments and trailing space, and skip this line if the
-            # result is empty
-            s/#.*//;
+            # Unless to keep, remove comments.  If to keep, ignore
+            # comment-only lines
+            if ($retain_trailing_comments{$addr}) {
+                next if / ^ \s* \# /x;
+
+                # But escape any single quotes (done in both the comment and
+                # non-comment portion; this could be a bug someday, but not
+                # likely)
+                s/'/\\'/g;
+            }
+            else {
+                s/#.*//;
+            }
+
+            # Remove trailing space, and skip this line if the result is empty
             s/\s+$//;
             next if /^$/;
 
@@ -19188,18 +19210,21 @@ my @input_file_objects = (
                    ),
     Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0,
                     Handler => \&process_GCB_test,
+                    retain_trailing_comments => 1,
                    ),
     Input_file->new("$AUXILIARY/GraphemeBreakTest.html", v4.1.0,
                     Skip => $Validation_Documentation,
                    ),
     Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0,
                     Handler => \&process_SB_test,
+                    retain_trailing_comments => 1,
                    ),
     Input_file->new("$AUXILIARY/SentenceBreakTest.html", v4.1.0,
                     Skip => $Validation_Documentation,
                    ),
     Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0,
                     Handler => \&process_WB_test,
+                    retain_trailing_comments => 1,
                    ),
     Input_file->new("$AUXILIARY/WordBreakTest.html", v4.1.0,
                     Skip => $Validation_Documentation,
@@ -19250,6 +19275,7 @@ my @input_file_objects = (
                    ),
     Input_file->new("$AUXILIARY/LBTest.txt", v5.1.0,
                     Handler => \&process_LB_test,
+                    retain_trailing_comments => 1,
                    ),
     Input_file->new("$AUXILIARY/LineBreakTest.html", v5.1.0,
                     Skip => $Validation_Documentation,
@@ -19842,6 +19868,15 @@ sub _test_break($$) {
     my $break_type = shift;
 
     my $line   = (caller 1)[2];   # Line number
+    my $comment = "";
+
+    if ($template =~ / ( .*? ) \s* \# (.*) /x) {
+        $template = $1;
+        $comment = $2;
+
+        # Replace leading spaces with a single one.
+        $comment =~ s/ ^ \s* / # /x;
+    }
 
     # The line contains characters above the ASCII range, but in Latin1.  It
     # may or may not be in utf8, and if it is, it may or may not know it.  So,
@@ -19985,7 +20020,10 @@ sub _test_break($$) {
 
             # Fancy display of test results
             $matched = ($matched) ? "matched" : "failed to match";
-            print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale\n";
+            print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale$comment\n";
+
+            # Only print the comment on the first use of this line
+            $comment = "";
 
             # Repeat with the first \B{} in the pattern.  This makes sure the
             # code in regexec.c:find_byclass() for \B gets executed
author	Karl Williamson <khw@cpan.org>	2016-06-09 15:35:15 -0600
committer	Karl Williamson <khw@cpan.org>	2016-06-21 18:10:38 -0600
commit	4e058f448f14936d2650df689211ba416bb627ba (patch)
tree	361c11998c2ae254f8687e0230eff36468f6ed60 /lib/unicore
parent	c92b26e114702cae63b26bfe6517283589a04296 (diff)
download	perl-4e058f448f14936d2650df689211ba416bb627ba.tar.gz