Add 'strict' subpragma to 'use re'

This subpragma is to allow p5p to add warnings/errors for regex patterns without having to worry about backwards compatibility. And it allows users who want to have the latest checks on their code to do so. An experimental warning is raised by default when it is used, not because the subpragma might go away, but because what it catches is subject to change from release-to-release, and so the user is acknowledging that they waive the right to backwards compatibility. I will be working in the near term to make some changes to what is detected by this. Note that there is no indication in the pattern stringification that it was compiled under this. This means I didn't have to figure out how to stringify it. It is fine because using this doesn't affect what the pattern gets compiled into, if successful. And interpolating the stringified pattern under either strict or non-strict should both just work.
author: Karl Williamson <khw@cpan.org> 2015-01-05 13:17:58 -0700
committer: Karl Williamson <khw@cpan.org> 2015-01-13 12:01:03 -0700
commit: 67cdf558540fcb50072632cb50aa953c0583f350 (patch)
tree: 82939f4dd1b0c3783defc60226f6b39dda11d87e
parent: af631a26a8f5a7d7136bf909c27dbba1a2d49690 (diff)
download: perl-67cdf558540fcb50072632cb50aa953c0583f350.tar.gz
10 files changed, 356 insertions, 61 deletions
diff --git a/MANIFEST b/MANIFEST
index fbca8ebe9f..6223b4c85c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3857,6 +3857,7 @@ ext/re/t/re_funcs_u.t		See if exportable 're' funcs in universal.c work
 ext/re/t/regop.pl		generate debug output for various patterns
 ext/re/t/regop.t		test RE optimizations by scraping debug output
 ext/re/t/re.t			see if re pragma works
+ext/re/t/strict.t		see if re 'strict' subpragma works
 ext/SDBM_File/biblio	SDBM kit
 ext/SDBM_File/CHANGES	SDBM kit
 ext/SDBM_File/COMPARE	SDBM kit
diff --git a/ext/re/re.pm b/ext/re/re.pm
index bee65d2ce4..5ddaa217d5 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -4,7 +4,7 @@ package re;
 use strict;
 use warnings;
 
-our $VERSION     = "0.29";
+our $VERSION     = "0.30";
 our @ISA         = qw(Exporter);
 our @EXPORT_OK   = ('regmust',
                     qw(is_regexp regexp_pattern
@@ -25,6 +25,7 @@ my %reflags = (
     x => 1 << ($PMMOD_SHIFT + 3),
     n => 1 << ($PMMOD_SHIFT + 5),
     p => 1 << ($PMMOD_SHIFT + 6),
+    strict => 1 << ($PMMOD_SHIFT + 10),
 # special cases:
     d => 0,
     l => 1,
@@ -141,6 +142,31 @@ sub bits {
 	} elsif ($EXPORT_OK{$s}) {
 	    require Exporter;
 	    re->export_to_level(2, 're', $s);
+        } elsif ($s eq 'strict') {
+            if ($on) {
+                $^H{reflags} |= $reflags{$s};
+                warnings::warnif('experimental::re_strict',
+                                 "\"use re 'strict'\" is experimental");
+
+                # Turn on warnings if not already done.
+                if (! warnings::enabled('regexp')) {
+                    require warnings;
+                    warnings->import('regexp');
+                    $^H{re_strict} = 1;
+                }
+            }
+            else {
+                $^H{reflags} &= ~$reflags{$s};
+
+                # Turn off warnings if we turned them on.
+                warnings->unimport('regexp') if $^H{re_strict};
+            }
+	    if ($^H{reflags}) {
+                $^H |= $flags_hint;
+            }
+            else {
+                $^H &= ~$flags_hint;
+            }
 	} elsif ($s =~ s/^\///) {
 	    my $reflags = $^H{reflags} || 0;
 	    my $seen_charset;
@@ -263,6 +289,8 @@ re - Perl pragma to alter regular expression behaviour
                                    # switch)
     }
 
+    use re 'strict';               # Raise warnings for more conditions
+
     use re '/ix';
     "FOO" =~ / foo /; # /ix implied
     no re '/x';
@@ -324,6 +352,50 @@ interpolation.  Thus:
 I<is> allowed if $pat is a precompiled regular expression, even
 if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions.
 
+=head2 'strict' mode
+
+When C<use re 'strict'> is in effect, stricter checks are applied than
+otherwise when compiling regular expressions patterns.  These may cause more
+warnings to be raised than otherwise, and more things to be fatal instead of
+just warnings.  The purpose of this is to find and report at compile time some
+things, which may be legal, but have a reasonable possibility of not being the
+programmer's actual intent.  This automatically turns on the C<"regexp">
+warnings category (if not already on) within its scope.
+
+As an example of something that is caught under C<"strict'> but not otherwise
+is the pattern
+
+ qr/\xABC/
+
+The C<"\x"> construct without curly braces should be followed by exactly two
+hex digits; this one is followed by three.  This currently evaluates as
+equivalent to
+
+ qr/\x{AB}C/
+
+that is, the character whose code point value is C<0xAB>, followed by the
+letter C<C>.  But since C<C> is a a hex digit, there is a reasonable chance
+that the intent was
+
+ qr/\x{ABC}/
+
+that is the single character at C<0xABC>.  Under C<'strict'> it is an error to
+not follow C<\x> with exactly two hex digits.  When not under C<'strict'> a
+warning is generated if there is only one hex digit, and no warning is raised
+if there are more than two.
+
+It is expected that what exactly C<'strict'> does will evolve over time as we
+gain experience with it.  This means that programs that compile under it in
+today's Perl may not compile, or may have more or fewer warnings, in future
+Perls.  There is no backwards compatibility promises with regards to it.  For
+this reason, using it will raise a C<experimental::re_strict> class warning,
+unless that category is turned off.
+
+Note that if a pattern compiled within C<'strict'> is recompiled, say by
+interpolating into another pattern, outside of C<'strict'>, it is not checked
+again for strictness.  This is because if it works under strict it must work
+under non-strict.
+
 =head2 '/flags' mode
 
 When C<use re '/flags'> is specified, the given flags are automatically
diff --git a/ext/re/t/strict.t b/ext/re/t/strict.t
new file mode 100644
index 0000000000..dd9c8110e4
--- /dev/null
+++ b/ext/re/t/strict.t
@@ -0,0 +1,66 @@
+#!./perl
+
+# Most of the strict effects are tested for in t/re/reg_mesgs.t
+
+BEGIN {
+        require Config;
+        if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+                print "1..0 # Skip -- Perl configured without re module\n";
+                exit 0;
+        }
+}
+
+use strict;
+
+use Test::More tests => 9;
+BEGIN { require_ok( 're' ); }
+
+{
+    my @w;
+    no warnings;
+    local $SIG{__WARN__};
+    BEGIN { $SIG{__WARN__} = sub { push @w, @_ } };
+    qr/\b*/;
+    BEGIN { is(scalar @w, 0, 'No default-on warnings for qr/\b*/'); }
+    BEGIN {undef @w; }
+
+    {
+        use re 'strict';
+        qr/\b*/;
+
+        BEGIN { is(scalar @w, 1, 'use re "strict" turns on warnings'); }
+    }
+
+    BEGIN {undef @w; }
+    qr/\b*/;
+    BEGIN { is(scalar @w, 0, 'dropping out of "strict" scope reverts warnings default'); }
+
+    {
+        use re 'strict';
+        qr/\b*/;
+
+        BEGIN { is(scalar @w, 1, 'use re "strict" turns on warnings'); }
+
+        no re 'strict';
+        BEGIN {undef @w; }
+        qr/\b*/;
+        BEGIN { is(scalar @w, 0, 'turning off "strict" scope reverts warnings default'); }
+    }
+
+    {
+        use warnings 'regexp';
+        BEGIN {undef @w; }
+        qr/\b*/;
+        BEGIN { is(scalar @w, 1, 'use warnings "regexp" works'); }
+
+        use re 'strict';
+        BEGIN {undef @w; }
+        qr/\b*/;
+        BEGIN { is(scalar @w, 1, 'use re "strict" keeps warnings on'); }
+
+        no re 'strict';
+        BEGIN {undef @w; }
+        qr/\b*/;
+        BEGIN { is(scalar @w, 1, 'turning off "strict" scope doesn\'t affect warnings that were already on'); }
+    }
+}
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 4e44922f6b..73d808c1e9 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -57,6 +57,17 @@ See L<perlre/"n"> for more information.
 
 C<prototype()> with no arguments now infers C<$_>.  [perl #123514]
 
+=head2 C<use re 'strict'>
+
+This applies stricter syntax rules to regular expression patterns
+compiled within its scope, which hopefully will alert you to typos and
+other unintentional behavior that backwards-compatibility issues prevent
+us from doing in normal regular expression compilations.  Because the
+behavior of this is subject to change in future Perl releases as we gain
+experience, using this pragma will raise a category
+C<experimental:re_strict> warning.
+See L<'strict' in re|re/'strict' mode>.
+
 =head1 Security
 
 XXX Any security-related notices go here.  In particular, any security
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 84577ae1e6..650839c1fc 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -6733,6 +6733,14 @@ optimized into C<"that " . $foo>, and the warning will refer to the
 C<concatenation (.)> operator, even though there is no C<.> in
 your program.
 
+=item "use re 'strict'" is experimental
+
+(S experimental::re_strict) The things that are different when a regular
+expression pattern is compiled under C<'strict'> are subject to change
+in future Perl releases in incompatible ways.  This means that a pattern
+that compiles today may not in a future Perl release.  This warning is
+to alert you to that risk.
+
 =item Use \x{...} for more than two hex characters in regex; marked by
 S<<-- HERE> in m/%s/
 
diff --git a/pod/perlre.pod b/pod/perlre.pod
index dfd47cd55e..21e0f0406c 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -16,6 +16,9 @@ operations, plus various examples of the same, see discussions of
 C<m//>, C<s///>, C<qr//> and C<??> in L<perlop/"Regexp Quote-Like
 Operators">.
 
+New in v5.22, L<C<use re 'strict'>|re/'strict' mode> applies stricter
+rules than otherwise when compiling regular expression patterns.  It can
+find things that, while legal, may not be what you intended.
 
 =head2 Modifiers
 
diff --git a/pod/perlrequick.pod b/pod/perlrequick.pod
index 008ef339fe..30c3238a9f 100644
--- a/pod/perlrequick.pod
+++ b/pod/perlrequick.pod
@@ -495,6 +495,14 @@ the matched substrings from the groupings as well:
 Since the first character of $x matched the regex, C<split> prepended
 an empty initial element to the list.
 
+=head2 C<use re 'strict'>
+
+New in v5.22, this applies stricter rules than otherwise when compiling
+regular expression patterns.  It can find things that, while legal, may
+not be what you intended.
+
+See L<'strict' in re|re/'strict' mode>.
+
 =head1 BUGS
 
 None.
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index 957b29686d..c5d88910a2 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -49,6 +49,10 @@ is harder to pronounce.  The Perl pod documentation is evenly split on
 regexp vs regex; in Perl, there is more than one way to abbreviate it.
 We'll use regexp in this tutorial.
 
+New in v5.22, L<C<use re 'strict'>|re/'strict' mode> applies stricter
+rules than otherwise when compiling regular expression patterns.  It can
+find things that, while legal, may not be what you intended.
+
 =head1 Part 1: The basics
 
 =head2 Simple word matching
diff --git a/regcomp.c b/regcomp.c
index d48826765e..68bc1f4ce9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -184,6 +184,7 @@ struct RExC_state_t {
     scan_frame *frame_head;
     scan_frame *frame_last;
     U32         frame_count;
+    U32         strict;
 #ifdef ADD_TO_REGEXEC
     char 	*starttry;		/* -Dr: where regtry was called. */
 #define RExC_starttry	(pRExC_state->starttry)
@@ -253,6 +254,7 @@ struct RExC_state_t {
 #define RExC_frame_head (pRExC_state->frame_head)
 #define RExC_frame_last (pRExC_state->frame_last)
 #define RExC_frame_count (pRExC_state->frame_count)
+#define RExC_strict (pRExC_state->strict)
 
 /* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
  * a flag to disable back-off on the fixed/floating substrings - if it's
@@ -6532,6 +6534,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     RExC_uni_semantics = 0;
     RExC_contains_locale = 0;
     RExC_contains_i = 0;
+    RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
     pRExC_state->runtime_code_qr = NULL;
     RExC_frame_head= NULL;
     RExC_frame_last= NULL;
@@ -11648,7 +11651,7 @@ tryagain:
                        FALSE, /* means parse the whole char class */
                        TRUE, /* allow multi-char folds */
                        FALSE, /* don't silence non-portable warnings. */
-                       FALSE, /* not strict */
+                       RExC_strict,
                        NULL);
 	if (*RExC_parse != ']') {
 	    RExC_parse = oregcomp_parse;
@@ -11884,7 +11887,7 @@ tryagain:
                                FALSE, /* don't silence non-portable warnings.
                                          It would be a bug if these returned
                                          non-portables */
-                               FALSE, /* not strict */
+                               RExC_strict,
                                NULL);
                 /* regclass() can only return RESTART_UTF8 if multi-char folds
                    are allowed.  */
@@ -12259,7 +12262,7 @@ tryagain:
 						       &result,
 						       &error_msg,
 						       PASS2, /* out warnings */
-                                                       FALSE, /* not strict */
+                                                       RExC_strict,
                                                        TRUE, /* Output warnings
                                                                 for non-
                                                                 portables */
@@ -12288,7 +12291,7 @@ tryagain:
 						       &result,
 						       &error_msg,
 						       PASS2, /* out warnings */
-                                                       FALSE, /* not strict */
+                                                       RExC_strict,
                                                        TRUE, /* Output warnings
                                                                 for non-
                                                                 portables */
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index e61e8ef531..b4ba410226 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -57,7 +57,9 @@ utf8::encode($utf8);
 
 sub mark_as_utf8 {
     my @ret;
-    while ( my ($pat, $msg) = splice(@_, 0, 2) ) {
+    for (my $i = 0; $i < @_; $i += 2) {
+        my $pat = $_[$i];
+        my $msg = $_[$i+1];
         my $l1_pat = $pat =~ s/$utf8/$l1/gr;
         my $l1_msg;
         $pat = "use utf8; $pat";
@@ -240,6 +242,90 @@ my @death =
  '/((?# This is a comment in the middle of a token)*FAIL)/' => 'In \'(*VERB...)\', the \'(\' and \'*\' must be adjacent {#} m/((?# This is a comment in the middle of a token)*{#}FAIL)/',
 );
 
+# These are messages that are warnings when not strict; death under 'use re
+# "strict".  See comment before @warnings as to why some have a \x{100} in
+# them.  This array has 3 elements per construct.  [0] is the regex to use;
+# [1] is the message under no strict, and [2] is under strict.
+my @death_only_under_strict = (
+    'm/\xABC/' => "",
+               => 'Use \x{...} for more than two hex characters {#} m/\xABC{#}/',
+    'm/[\xABC]/' => "",
+                 => 'Use \x{...} for more than two hex characters {#} m/[\xABC{#}]/',
+
+    # XXX This is a confusing error message.  The G isn't ignored; it just
+    # terminates the \x.  Also some messages below are missing the <-- HERE,
+    # aren't all category 'regexp'.  (Hence we have to turn off 'digit'
+    # messages as well below)
+    'm/\xAG/' => 'Illegal hexadecimal digit \'G\' ignored',
+              => 'Non-hex character {#} m/\xAG{#}/',
+    'm/[\xAG]/' => 'Illegal hexadecimal digit \'G\' ignored',
+                => 'Non-hex character {#} m/[\xAG{#}]/',
+    'm/\o{789}/' => 'Non-octal character \'8\'.  Resolved as "\o{7}"',
+                 => 'Non-octal character {#} m/\o{78{#}9}/',
+    'm/[\o{789}]/' => 'Non-octal character \'8\'.  Resolved as "\o{7}"',
+                   => 'Non-octal character {#} m/[\o{78{#}9}]/',
+    'm/\x{}/' => "",
+              => 'Number with no digits {#} m/\x{}{#}/',
+    'm/[\x{}]/' => "",
+                => 'Number with no digits {#} m/[\x{}{#}]/',
+    'm/\x{ABCDEFG}/' => 'Illegal hexadecimal digit \'G\' ignored',
+                     => 'Non-hex character {#} m/\x{ABCDEFG{#}}/',
+    'm/[\x{ABCDEFG}]/' => 'Illegal hexadecimal digit \'G\' ignored',
+                       => 'Non-hex character {#} m/[\x{ABCDEFG{#}}]/',
+    'm/[[:ascii]]/' => "",
+                    => 'Unmatched \':\' in POSIX class {#} m/[[:ascii{#}]]/',
+    'm/[\N{}]/' => 'Ignoring zero length \\N{} in character class {#} m/[\\N{}{#}]/',
+                => 'Zero length \\N{} {#} m/[\\N{}]{#}/',
+    "m'[\\y]\\x{100}'" => 'Unrecognized escape \y in character class passed through {#} m/[\y{#}]\x{100}/',
+                       => 'Unrecognized escape \y in character class {#} m/[\y{#}]\x{100}/',
+    'm/[a-\d]\x{100}/' => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',
+                       => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',
+    'm/[\w-x]\x{100}/' => 'False [] range "\w-" {#} m/[\w-{#}x]\x{100}/',
+                       => 'False [] range "\w-" {#} m/[\w-{#}x]\x{100}/',
+    'm/[a-\pM]\x{100}/' => 'False [] range "a-\pM" {#} m/[a-\pM{#}]\x{100}/',
+                        => 'False [] range "a-\pM" {#} m/[a-\pM{#}]\x{100}/',
+    'm/[\pM-x]\x{100}/' => 'False [] range "\pM-" {#} m/[\pM-{#}x]\x{100}/',
+                        => 'False [] range "\pM-" {#} m/[\pM-{#}x]\x{100}/',
+    'm/[^\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]/' => 'Using just the first character returned by \N{} in character class {#} m/[^\N{U+100.300}{#}]/',
+                                       => '\N{} in inverted character class or as a range end-point is restricted to one character {#} m/[^\N{U+100.300{#}}]/',
+    'm/[\x03-\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]/' => 'Using just the first character returned by \N{} in character class {#} m/[\x03-\N{U+100.300}{#}]/',
+                                            => '\N{} in inverted character class or as a range end-point is restricted to one character {#} m/[\x03-\N{U+100.300{#}}]/',
+    'm/[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}-\x{10FFFF}]/' => 'Using just the first character returned by \N{} in character class {#} m/[\N{U+100.300}{#}-\x{10FFFF}]/',
+                                                  => '\N{} in inverted character class or as a range end-point is restricted to one character {#} m/[\N{U+100.300{#}}-\x{10FFFF}]/',
+    '/[\08]/'   => '\'\08\' resolved to \'\o{0}8\' {#} m/[\08{#}]/',
+                => 'Need exactly 3 octal digits {#} m/[\08{#}]/',
+    '/[\018]/'  => '\'\018\' resolved to \'\o{1}8\' {#} m/[\018{#}]/',
+                => 'Need exactly 3 octal digits {#} m/[\018{#}]/',
+    '/[\_\0]/'  => "",
+                => 'Need exactly 3 octal digits {#} m/[\_\0]{#}/',
+    '/[\07]/'   => "",
+                => 'Need exactly 3 octal digits {#} m/[\07]{#}/',
+    '/[\0005]/' => "",
+                => 'Need exactly 3 octal digits {#} m/[\0005]{#}/',
+    '/[\8\9]\x{100}/' => ['Unrecognized escape \8 in character class passed through {#} m/[\8{#}\9]\x{100}/',
+                          'Unrecognized escape \9 in character class passed through {#} m/[\8\9{#}]\x{100}/',
+                         ],
+                      => 'Unrecognized escape \8 in character class {#} m/[\8{#}\9]\x{100}/',
+    '/[a-\d]\x{100}/' => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',
+                      => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',
+    '/[\d-b]\x{100}/' => 'False [] range "\d-" {#} m/[\d-{#}b]\x{100}/',
+                      => 'False [] range "\d-" {#} m/[\d-{#}b]\x{100}/',
+    '/[\s-\d]\x{100}/' => 'False [] range "\s-" {#} m/[\s-{#}\d]\x{100}/',
+                       => 'False [] range "\s-" {#} m/[\s-{#}\d]\x{100}/',
+    '/[\d-\s]\x{100}/' => 'False [] range "\d-" {#} m/[\d-{#}\s]\x{100}/',
+                       => 'False [] range "\d-" {#} m/[\d-{#}\s]\x{100}/',
+    '/[a-[:digit:]]\x{100}/' => 'False [] range "a-[:digit:]" {#} m/[a-[:digit:]{#}]\x{100}/',
+                             => 'False [] range "a-[:digit:]" {#} m/[a-[:digit:]{#}]\x{100}/',
+    '/[[:digit:]-b]\x{100}/' => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}b]\x{100}/',
+                             => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}b]\x{100}/',
+    '/[[:alpha:]-[:digit:]]\x{100}/' => 'False [] range "[:alpha:]-" {#} m/[[:alpha:]-{#}[:digit:]]\x{100}/',
+                                     => 'False [] range "[:alpha:]-" {#} m/[[:alpha:]-{#}[:digit:]]\x{100}/',
+    '/[[:digit:]-[:alpha:]]\x{100}/' => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}[:alpha:]]\x{100}/',
+                                     => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}[:alpha:]]\x{100}/',
+    '/[a\zb]\x{100}/' => 'Unrecognized escape \z in character class passed through {#} m/[a\z{#}b]\x{100}/',
+                      => 'Unrecognized escape \z in character class {#} m/[a\z{#}b]\x{100}/',
+);
+
 # These need the character 'ネ' as a marker for mark_as_utf8()
 my @death_utf8 = mark_as_utf8(
  '/ネ[[=ネ=]]ネ/' => 'POSIX syntax [= =] is reserved for future extensions {#} m/ネ[[=ネ=]{#}]ネ/',
@@ -323,6 +409,22 @@ my @death_utf8 = mark_as_utf8(
 );
 push @death, @death_utf8;
 
+my @death_utf8_only_under_strict = (
+    "m'ネ[\\y]ネ'" => 'Unrecognized escape \y in character class passed through {#} m/ネ[\y{#}]ネ/',
+                   => 'Unrecognized escape \y in character class {#} m/ネ[\y{#}]ネ/',
+    'm/ネ[ネ-\d]ネ/' => 'False [] range "ネ-\d" {#} m/ネ[ネ-\d{#}]ネ/',
+                     => 'False [] range "ネ-\d" {#} m/ネ[ネ-\d{#}]ネ/',
+    'm/ネ[\w-ネ]ネ/' => 'False [] range "\w-" {#} m/ネ[\w-{#}ネ]ネ/',
+                     => 'False [] range "\w-" {#} m/ネ[\w-{#}ネ]ネ/',
+    'm/ネ[ネ-\pM]ネ/' => 'False [] range "ネ-\pM" {#} m/ネ[ネ-\pM{#}]ネ/',
+                      => 'False [] range "ネ-\pM" {#} m/ネ[ネ-\pM{#}]ネ/',
+    '/ネ[ネ-[:digit:]]ネ/' => 'False [] range "ネ-[:digit:]" {#} m/ネ[ネ-[:digit:]{#}]ネ/',
+                           => 'False [] range "ネ-[:digit:]" {#} m/ネ[ネ-[:digit:]{#}]ネ/',
+    '/ネ[\d-\s]ネ/' => 'False [] range "\d-" {#} m/ネ[\d-{#}\s]ネ/',
+                    => 'False [] range "\d-" {#} m/ネ[\d-{#}\s]ネ/',
+    '/ネ[a\zb]ネ/' => 'Unrecognized escape \z in character class passed through {#} m/ネ[a\z{#}b]ネ/',
+                   => 'Unrecognized escape \z in character class {#} m/ネ[a\z{#}b]ネ/',
+);
 # Tests involving a user-defined charnames translator are in pat_advanced.t
 
 # In the following arrays of warnings, the value can be an array of things to
@@ -338,20 +440,10 @@ push @death, @death_utf8;
 my @warning = (
     'm/\b*\x{100}/' => '\b* matches null string many times {#} m/\b*{#}\x{100}/',
     'm/[:blank:]\x{100}/' => 'POSIX syntax [: :] belongs inside character classes {#} m/[:blank:]{#}\x{100}/',
-    "m'[\\y]\\x{100}'"     => 'Unrecognized escape \y in character class passed through {#} m/[\y{#}]\x{100}/',
-    'm/[a-\d]\x{100}/' => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',
-    'm/[\w-x]\x{100}/' => 'False [] range "\w-" {#} m/[\w-{#}x]\x{100}/',
-    'm/[a-\pM]\x{100}/' => 'False [] range "a-\pM" {#} m/[a-\pM{#}]\x{100}/',
-    'm/[\pM-x]\x{100}/' => 'False [] range "\pM-" {#} m/[\pM-{#}x]\x{100}/',
-    'm/[^\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]/' => 'Using just the first character returned by \N{} in character class {#} m/[^\N{U+100.300}{#}]/',
-    'm/[\x03-\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]/' => 'Using just the first character returned by \N{} in character class {#} m/[\x03-\N{U+100.300}{#}]/',
-    'm/[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}-\x{10FFFF}]/' => 'Using just the first character returned by \N{} in character class {#} m/[\N{U+100.300}{#}-\x{10FFFF}]/',
     "m'\\y\\x{100}'"     => 'Unrecognized escape \y passed through {#} m/\y{#}\x{100}/',
     '/x{3,1}/'   => 'Quantifier {n,m} with n > m can\'t match {#} m/x{3,1}{#}/',
     '/\08/' => '\'\08\' resolved to \'\o{0}8\' {#} m/\08{#}/',
     '/\018/' => '\'\018\' resolved to \'\o{1}8\' {#} m/\018{#}/',
-    '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' {#} m/[\08{#}]/',
-    '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' {#} m/[\018{#}]/',
     '/(?=a)*/' => '(?=a)* matches null string many times {#} m/(?=a)*{#}/',
     'my $x = \'\m\'; qr/a$x/' => 'Unrecognized escape \m passed through {#} m/a\m{#}/',
     '/\q/' => 'Unrecognized escape \q passed through {#} m/\q{#}/',
@@ -364,26 +456,11 @@ my @warning = (
     '/(a|b)(?=a){3}\x{100}/' => 'Quantifier unexpected on zero-length expression in regex m/(a|b)(?=a){3}\x{100}/',
 
     '/\_/' => "",
-    '/[\_\0]/' => "",
-    '/[\07]/' => "",
     '/[\006]/' => "",
-    '/[\0005]/' => "",
-    '/[\8\9]\x{100}/' => ['Unrecognized escape \8 in character class passed through {#} m/[\8{#}\9]\x{100}/',
-                   'Unrecognized escape \9 in character class passed through {#} m/[\8\9{#}]\x{100}/',
-                  ],
     '/[:alpha:]\x{100}/' => 'POSIX syntax [: :] belongs inside character classes {#} m/[:alpha:]{#}\x{100}/',
     '/[:zog:]\x{100}/' => 'POSIX syntax [: :] belongs inside character classes {#} m/[:zog:]{#}\x{100}/',
     '/[.zog.]\x{100}/' => 'POSIX syntax [. .] belongs inside character classes {#} m/[.zog.]{#}\x{100}/',
     '/[a-b]/' => "",
-    '/[a-\d]\x{100}/' => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',
-    '/[\d-b]\x{100}/' => 'False [] range "\d-" {#} m/[\d-{#}b]\x{100}/',
-    '/[\s-\d]\x{100}/' => 'False [] range "\s-" {#} m/[\s-{#}\d]\x{100}/',
-    '/[\d-\s]\x{100}/' => 'False [] range "\d-" {#} m/[\d-{#}\s]\x{100}/',
-    '/[a-[:digit:]]\x{100}/' => 'False [] range "a-[:digit:]" {#} m/[a-[:digit:]{#}]\x{100}/',
-    '/[[:digit:]-b]\x{100}/' => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}b]\x{100}/',
-    '/[[:alpha:]-[:digit:]]\x{100}/' => 'False [] range "[:alpha:]-" {#} m/[[:alpha:]-{#}[:digit:]]\x{100}/',
-    '/[[:digit:]-[:alpha:]]\x{100}/' => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}[:alpha:]]\x{100}/',
-    '/[a\zb]\x{100}/' => 'Unrecognized escape \z in character class passed through {#} m/[a\z{#}b]\x{100}/',
     '/(?c)\x{100}/' => 'Useless (?c) - use /gc modifier {#} m/(?c{#})\x{100}/',
     '/(?-c)\x{100}/' => 'Useless (?-c) - don\'t use /gc modifier {#} m/(?-c{#})\x{100}/',
     '/(?g)\x{100}/' => 'Useless (?g) - use /g modifier {#} m/(?g{#})\x{100}/',
@@ -413,13 +490,6 @@ my @warnings_utf8 = mark_as_utf8(
     'm/ネ\b*ネ/' => '\b* matches null string many times {#} m/ネ\b*{#}ネ/',
     '/(?=ネ)*/' => '(?=ネ)* matches null string many times {#} m/(?=ネ)*{#}/',
     'm/ネ[:foo:]ネ/' => 'POSIX syntax [: :] belongs inside character classes {#} m/ネ[:foo:]{#}ネ/',
-    "m'ネ[\\y]ネ'" => 'Unrecognized escape \y in character class passed through {#} m/ネ[\y{#}]ネ/',
-    'm/ネ[ネ-\d]ネ/' => 'False [] range "ネ-\d" {#} m/ネ[ネ-\d{#}]ネ/',
-    'm/ネ[\w-ネ]ネ/' => 'False [] range "\w-" {#} m/ネ[\w-{#}ネ]ネ/',
-    'm/ネ[ネ-\pM]ネ/' => 'False [] range "ネ-\pM" {#} m/ネ[ネ-\pM{#}]ネ/',
-    '/ネ[ネ-[:digit:]]ネ/' => 'False [] range "ネ-[:digit:]" {#} m/ネ[ネ-[:digit:]{#}]ネ/',
-    '/ネ[\d-\s]ネ/' => 'False [] range "\d-" {#} m/ネ[\d-{#}\s]ネ/',
-    '/ネ[a\zb]ネ/' => 'Unrecognized escape \z in character class passed through {#} m/ネ[a\z{#}b]ネ/',
     '/ネ(?c)ネ/' => 'Useless (?c) - use /gc modifier {#} m/ネ(?c{#})ネ/',
     '/utf8 ネ (?ogc) ネ/' => [
         'Useless (?o) - use /o modifier {#} m/utf8 ネ (?o{#}gc) ネ/',
@@ -450,36 +520,82 @@ my @deprecated = (
     '/(?xxxx:abc)/' => 'Having more than one /x regexp modifier is deprecated',
 );
 
-while (my ($regex, $expect) = splice @death, 0, 2) {
-    my $expect = fixup_expect($expect);
+for my $strict ("", "use re 'strict';") {
+
+    # First time just use @death; but under strict we add the things that fail
+    # there.  Doing it this way makes sure that 'strict' doesnt change the
+    # things that are already fatal when not under strict.
+    if ($strict) {
+        for (my $i = 0; $i < @death_only_under_strict; $i += 3) {
+            push @death, $death_only_under_strict[$i],    # The regex
+                         $death_only_under_strict[$i+2];  # The fatal msg
+        }
+        for (my $i = 0; $i < @death_utf8_only_under_strict; $i += 3) {
+
+            # Same with the utf8 versions
+            push @death, mark_as_utf8($death_utf8_only_under_strict[$i],
+                                      $death_utf8_only_under_strict[$i+2]);
+        }
+    }
+for (my $i = 0; $i < @death; $i += 2) {
+    my $regex = $death[$i];
+    my $expect = fixup_expect($death[$i+1]);
     no warnings 'experimental::regex_sets';
+    no warnings 'experimental::re_strict';
     # skip the utf8 test on EBCDIC since they do not die
     next if $::IS_EBCDIC && $regex =~ /utf8/;
 
     warning_is(sub {
+                   my $eval_string = "$strict $regex";
 		   $_ = "x";
-		   eval $regex;
-		   like($@, qr/\Q$expect/, $regex);
+		   eval $eval_string;
+		   like($@, qr/\Q$expect/, $eval_string);
 	       }, undef, "... and died without any other warnings");
 }
+}
 
+for my $strict ("no warnings 'experimental::re_strict'; use re 'strict';", "") {
+
+    # First time through we use strict to make sure that that doesn't change
+    # any of the warnings into fatal, and outputs them correctly.  The second
+    # time we don't use strict, and add the messages that are warnings when
+    # not under strict to the list of warnings.  This checks that non-strict
+    # works.
+    if (! $strict) {
+        for (my $i = 0; $i < @death_only_under_strict; $i += 3) {
+            push @warning, $death_only_under_strict[$i],    # The regex
+                           $death_only_under_strict[$i+1];  # The warning
+        }
+        for (my $i = 0; $i < @death_utf8_only_under_strict; $i += 3) {
+            push @warning, mark_as_utf8($death_utf8_only_under_strict[$i], $death_utf8_only_under_strict[$i+1]);
+        }
+    }
 foreach my $ref (\@warning, \@experimental_regex_sets, \@deprecated) {
-    my $warning_type = ($ref == \@warning)
-                       ? 'regexp'
-                       : ($ref == \@deprecated)
-                         ? 'regexp, deprecated'
-                         : 'experimental::regex_sets';
-    while (my ($regex, $expect) = splice @$ref, 0, 2) {
-        my @expect = fixup_expect($expect);
+    my $warning_type;
+    my $default_on;
+    if ($ref == \@warning) {
+        $warning_type = 'regexp, digit';
+        $default_on = $strict;
+    }
+    elsif ($ref == \@deprecated) {
+        $warning_type = 'regexp, deprecated';
+        $default_on = 1;
+    }
+    else {
+        $warning_type = 'experimental::regex_sets';
+        $default_on = 1;
+    }
+    for (my $i = 0; $i < @$ref; $i += 2) {
+        my $regex = $ref->[$i];
+        my @expect = fixup_expect($ref->[$i+1]);
         {
             $_ = "x";
-            no warnings;
-            eval $regex;
+            eval "$strict no warnings; $regex";
         }
-        if (is($@, "", "$regex did not die")) {
+        if (is($@, "", "$strict $regex did not die")) {
             my @got = capture_warnings(sub {
                                     $_ = "x";
-                                    eval $regex });
+                                    eval "$strict $regex" });
             my $count = @expect;
             if (! is(scalar @got, scalar @expect, "... and gave expected number ($count) of warnings")) {
                 if (@got < @expect) {
@@ -499,23 +615,26 @@ foreach my $ref (\@warning, \@experimental_regex_sets, \@deprecated) {
                 else {
                     ok (0 == capture_warnings(sub {
                                     $_ = "x";
-                                    eval "no warnings '$warning_type'; $regex;" }
+                                    eval "$strict no warnings '$warning_type'; $regex;" }
                                 ),
                     "... and turning off '$warning_type' warnings suppressed it");
+
                     # Test that whether the warning is on by default is
-                    # correct.  Experimental and deprecated warnings are;
-                    # others are not.  This test relies on the fact that we
+                    # correct.  This test relies on the fact that we
                     # are outside the scope of any ‘use warnings’.
                     local $^W;
-                    my $on = 'on' x ($warning_type ne 'regexp');
-                    ok !!$on ==
-                        capture_warnings(sub { $_ = "x"; eval $regex }),
-                      "... and the warning is " . ($on||'off')
-                       . " by default";
+                    my @warns = capture_warnings(sub { $_ = "x"; eval "$strict $regex" });
+                    if ($default_on) {
+                        ok @warns > 0, "... and the warning is on by default";
+                    }
+                    else {
+                        ok @warns == 0, "... and the warning is off by default";
+                    }
                 }
             }
         }
     }
 }
+}
 
 done_testing();
author	Karl Williamson <khw@cpan.org>	2015-01-05 13:17:58 -0700
committer	Karl Williamson <khw@cpan.org>	2015-01-13 12:01:03 -0700
commit	67cdf558540fcb50072632cb50aa953c0583f350 (patch)
tree	82939f4dd1b0c3783defc60226f6b39dda11d87e
parent	af631a26a8f5a7d7136bf909c27dbba1a2d49690 (diff)
download	perl-67cdf558540fcb50072632cb50aa953c0583f350.tar.gz