summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-06-16 11:48:28 -0600
committerKarl Williamson <khw@cpan.org>2016-06-21 18:10:38 -0600
commit6295dc14882a54531ce4542f1d80fa8ae7b4f8f0 (patch)
tree9767fce6f4ecaefbb11ab2b0b978f2fe3b07429f
parentb6c0faaf0a2924c76850099116895e5c29a739e1 (diff)
downloadperl-6295dc14882a54531ce4542f1d80fa8ae7b4f8f0.tar.gz
Tell mktables what Unicode version mk_invlist.pl handles
A downside of supporting the Unicode break properties like \b{gcb}, \b{lb} is that these aren't very mature in the Standard, and so code likely has to change when updating Perl to support a new version of the Standard. And the new rules may not be backwards compatible. This commit creates a mechanism to tell mktables the Unicode version that the rules are written for. If that is not the same version as being compiled, the test file marks any failing boundary tests as TODO, and outputs a warning if the compiled version is later than the code expects, to alert you to the fact that the code needs to be updated.
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/unicore/mktables56
-rw-r--r--regcharclass.h2
3 files changed, 50 insertions, 10 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 8a7ba0230f..cb545cbb19 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -87887,7 +87887,7 @@ static const U8 WB_table[19][19] = {
* 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
* 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
* a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 2e9c8c898fd78231c21ff0da9facb8d231bf419bde94dc63075dff904be4f5f7 lib/unicore/mktables
+ * 45321b549a605b65ead1e83cdb90fdd9c5a6c8731a537197f335bab251b4e778 lib/unicore/mktables
* 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
* 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
* 12bd58cb9d5a99f631ca95e269f7f9c90dacaf81020efa5d95a995f3cdc19200 regen/mk_invlists.pl
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 5d49fb7714..7b25ba7f36 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -36,6 +36,17 @@ my $debugging_build = $Config{"ccflags"} =~ /-DDEBUGGING/;
sub NON_ASCII_PLATFORM { ord("A") != 65 }
+# When a new version of Unicode is published, unfortunately the algorithms for
+# dealing with various bounds, like \b{gcb}, \b{lb} may have to be updated
+# manually. The changes may or may not be backward compatible with older
+# releases. The code is in regen/mk_invlist.pl and regexec.c. Make the
+# changes, then come back here and set the variable below to what version the
+# code is expecting. If a newer version of Unicode is being compiled than
+# expected, a warning will be generated. If an older version is being
+# compiled, any bounds tests that fail in the generated test file (-maketest
+# option) will be marked as TODO.
+my $version_of_mk_invlist_bounds = v8.0.0;
+
##########################################################################
#
# mktables -- create the runtime Perl Unicode files (lib/unicore/.../*.pl),
@@ -18788,9 +18799,16 @@ sub make_property_test_script() {
$property->DESTROY();
}
+ # Make any test of the boundary (break) properties TODO if the code
+ # doesn't match the version being compiled
+ my $TODO_FAILING_BREAKS = ($version_of_mk_invlist_bounds ne $v_version)
+ ? "\nsub TODO_FAILING_BREAKS { 1 }\n"
+ : "\nsub TODO_FAILING_BREAKS { 0 }\n";
+
&write($t_path,
0, # Not utf8;
[$HEADER,
+ $TODO_FAILING_BREAKS,
<DATA>,
@output,
(map {"Test_GCB('$_');\n"} @backslash_X_tests),
@@ -19721,6 +19739,13 @@ if ($verbosity >= $NORMAL_VERBOSITY && ! $debug_skip) {
}
print "\nAll done\n" if $verbosity >= $VERBOSE;
}
+
+if ($version_of_mk_invlist_bounds lt $v_version) {
+ Carp::my_carp("WARNING: \\b{} algorithms (regen/mk_invlist.pl) need"
+ . " to be checked and possibly updated to Unicode"
+ . " $string_version");
+}
+
exit(0);
# TRAILING CODE IS USED BY make_property_test_script()
@@ -20015,12 +20040,21 @@ sub _test_break($$) {
my $pattern = "(?$modifier:$break_pattern)";
# Actually do the test
+ my $matched_text;
my $matched = $string =~ qr/$pattern/;
- print "not " unless $matched;
+ if ($matched) {
+ $matched_text = "matched";
+ }
+ else {
+ $matched_text = "failed to match";
+ print "not ";
- # Fancy display of test results
- $matched = ($matched) ? "matched" : "failed to match";
- print "ok ", ++$Tests, " - \"$display_string\" $matched /$pattern/$display_upgrade; line $line $display_locale$comment\n";
+ if (TODO_FAILING_BREAKS) {
+ $comment = " # $comment" unless $comment =~ / ^ \s* \# /x;
+ $comment =~ s/#/# TODO/;
+ }
+ }
+ print "ok ", ++$Tests, " - \"$display_string\" $matched_text /$pattern/$display_upgrade; line $line $display_locale$comment\n";
# Only print the comment on the first use of this line
$comment = "";
@@ -20031,8 +20065,10 @@ sub _test_break($$) {
my $B_pattern = "$1$2";
$matched = $string =~ qr/$B_pattern/;
print "not " unless $matched;
- $matched = ($matched) ? "matched" : "failed to match";
- print "ok ", ++$Tests, " - \"$display_string\" $matched /$B_pattern/$display_upgrade; line $line $display_locale\n";
+ $matched_text = ($matched) ? "matched" : "failed to match";
+ print "ok ", ++$Tests, " - \"$display_string\" $matched_text /$B_pattern/$display_upgrade; line $line $display_locale";
+ print " # TODO" if TODO_FAILING_BREAKS && ! $matched;
+ print "\n";
}
}
@@ -20057,7 +20093,9 @@ sub _test_break($$) {
} else {
$matches[$i] = join("", map { sprintf "\\x{%04X}", ord $_ }
split "", $matches[$i]);
- print "not ok $Tests - In \"$display_string\" =~ /(\\X)/g, \\X #",
+ print "not ok $Tests -";
+ print " # TODO" if TODO_FAILING_BREAKS;
+ print " In \"$display_string\" =~ /(\\X)/g, \\X #",
$i + 1,
" should have matched $should_display[$i]",
" but instead matched $matches[$i]",
@@ -20071,7 +20109,9 @@ sub _test_break($$) {
if (@matches == @should_match) {
print "ok $Tests - Nothing was left over; line $line\n";
} else {
- print "not ok $Tests - There were ", scalar @should_match, " \\X matches expected, but got ", scalar @matches, " instead; line $line\n";
+ print "not ok $Tests - There were ", scalar @should_match, " \\X matches expected, but got ", scalar @matches, " instead; line $line";
+ print " # TODO" if TODO_FAILING_BREAKS;
+ print "\n";
}
}
diff --git a/regcharclass.h b/regcharclass.h
index d88850b422..d50a5c78dd 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -1895,7 +1895,7 @@
* 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
* 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
* a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 2e9c8c898fd78231c21ff0da9facb8d231bf419bde94dc63075dff904be4f5f7 lib/unicore/mktables
+ * 45321b549a605b65ead1e83cdb90fdd9c5a6c8731a537197f335bab251b4e778 lib/unicore/mktables
* 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
* 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl
* d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl