diff options
author | Karl Williamson <khw@cpan.org> | 2016-06-09 15:35:15 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-06-21 18:10:38 -0600 |
commit | 4e058f448f14936d2650df689211ba416bb627ba (patch) | |
tree | 361c11998c2ae254f8687e0230eff36468f6ed60 /lib/unicore | |
parent | c92b26e114702cae63b26bfe6517283589a04296 (diff) | |
download | perl-4e058f448f14936d2650df689211ba416bb627ba.tar.gz |
t/re/uniprops.t: Add more description for \b{} tests
mktables generates a file of tests used in t/re/uniprops.t.
The tests furnished by Unicode for the boundaries like \b{gcb} have
comments that indicate the rules each test is testing. These are useful
in debugging. This commit changes things so the generated file that
includes these Unicode-supplied tests also has the corresponding
comments which are output as part of the test descriptions.
Diffstat (limited to 'lib/unicore')
-rw-r--r-- | lib/unicore/mktables | 46 |
1 files changed, 42 insertions, 4 deletions
diff --git a/lib/unicore/mktables b/lib/unicore/mktables index bea8739418..5d49fb7714 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -2198,6 +2198,15 @@ sub trace { return main::trace(@_); } # 'handler' main::set_access('each_line_handler', \%each_line_handler, 'c'); + my %retain_trailing_comments; + # This is used to not discard the comments that end data lines. This + # would be used only for files with non-typical syntax, and most code here + # assumes that comments have been stripped, so special handlers would have + # to be written. It is assumed that the code will use these in + # single-quoted contexts, and so any "'" marks in the comment will be + # prefixed by a backslash. + main::set_access('retain_trailing_comments', \%retain_trailing_comments, 'c'); + my %properties; # Optional ordered list of the properties that occur in each # meaningful line of the input file. If present, an appropriate # each_line_handler() is automatically generated and pushed onto the stack @@ -2355,6 +2364,7 @@ sub trace { return main::trace(@_); } # Set defaults $handler{$addr} = \&main::process_generic_property_file; + $retain_trailing_comments{$addr} = 0; $non_skip{$addr} = 0; $skip{$addr} = undef; $has_missings_defaults{$addr} = $NO_DEFAULTS; @@ -3020,9 +3030,21 @@ END next; } - # Remove comments and trailing space, and skip this line if the - # result is empty - s/#.*//; + # Unless to keep, remove comments. If to keep, ignore + # comment-only lines + if ($retain_trailing_comments{$addr}) { + next if / ^ \s* \# /x; + + # But escape any single quotes (done in both the comment and + # non-comment portion; this could be a bug someday, but not + # likely) + s/'/\\'/g; + } + else { + s/#.*//; + } + + # Remove trailing space, and skip this line if the result is empty s/\s+$//; next if /^$/; @@ -19188,18 +19210,21 @@ my @input_file_objects = ( ), Input_file->new("$AUXILIARY/GCBTest.txt", v4.1.0, Handler => \&process_GCB_test, + retain_trailing_comments => 1, ), Input_file->new("$AUXILIARY/GraphemeBreakTest.html", v4.1.0, Skip => $Validation_Documentation, ), Input_file->new("$AUXILIARY/SBTest.txt", v4.1.0, Handler => \&process_SB_test, + retain_trailing_comments => 1, ), Input_file->new("$AUXILIARY/SentenceBreakTest.html", v4.1.0, Skip => $Validation_Documentation, ), Input_file->new("$AUXILIARY/WBTest.txt", v4.1.0, Handler => \&process_WB_test, + retain_trailing_comments => 1, ), Input_file->new("$AUXILIARY/WordBreakTest.html", v4.1.0, Skip => $Validation_Documentation, @@ -19250,6 +19275,7 @@ my @input_file_objects = ( ), Input_file->new("$AUXILIARY/LBTest.txt", v5.1.0, Handler => \&process_LB_test, + retain_trailing_comments => 1, ), Input_file->new("$AUXILIARY/LineBreakTest.html", v5.1.0, Skip => $Validation_Documentation, @@ -19842,6 +19868,15 @@ sub _test_break($$) { my $break_type = shift; my $line = (caller 1)[2]; # Line number + my $comment = ""; + + if ($template =~ / ( .*? ) \s* \# (.*) /x) { + $template = $1; + $comment = $2; + + # Replace leading spaces with a single one. + $comment =~ s/ ^ \s* / # /x; + } # The line contains characters above the ASCII range, but in Latin1. It # may or may not be in utf8, and if it is, it may or may not know it. So, @@ -19985,7 +20020,10 @@ sub _test_break($$) { # Fancy display of test results $matched = ($matched) ? "matched" : "failed to match"; - print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale\n"; + print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale$comment\n"; + + # Only print the comment on the first use of this line + $comment = ""; # Repeat with the first \B{} in the pattern. This makes sure the # code in regexec.c:find_byclass() for \B gets executed |