summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-05-24 21:23:49 -0600
committerKarl Williamson <public@khwilliamson.com>2012-05-24 22:48:43 -0600
commit2a53d3314d380af5ab5283758219417c6dfa36e9 (patch)
tree7c72ef5cd43b245d58ad7a4303d789a0f20aa8b4
parent9a54da5c9b834e6244f5695a8d2bf57e89d4fa90 (diff)
downloadperl-2a53d3314d380af5ab5283758219417c6dfa36e9.tar.gz
Deprecate literal unescaped "{" in regexes.
We are deprecating literal left braces in regular expressions. The 5.16 delta announced that this is coming. This commit causes a warning to be raised when a literal "{" is encountered. However, it does not do this if the left brace is at the beginning of a construct. Such a brace does not cause problems for us for our future use of it for other purposes, as, for example in things like \b{w}, and there were a large number of core tests that failed without this condition. I didn't mention this exception in the diagnostic. We may choose to forbid it everywhere, and we certainly want to discourage its use everywhere. But this commit gets the essential components in early in 5.17, and we can tighten it up later if we decide to.
-rw-r--r--lib/diagnostics.pm6
-rw-r--r--pod/perldiag.pod11
-rw-r--r--regcomp.c29
-rw-r--r--t/comp/parser.t6
-rw-r--r--t/io/open.t4
-rw-r--r--t/lib/warnings/regcomp10
-rw-r--r--t/op/taint.t12
-rw-r--r--t/re/pat.t2
-rw-r--r--t/re/pat_advanced.t3
9 files changed, 47 insertions, 36 deletions
diff --git a/lib/diagnostics.pm b/lib/diagnostics.pm
index 39bcb3d1dd..21cdf543b7 100644
--- a/lib/diagnostics.pm
+++ b/lib/diagnostics.pm
@@ -186,7 +186,7 @@ use 5.009001;
use Carp;
$Carp::Internal{__PACKAGE__.""}++;
-our $VERSION = '1.28';
+our $VERSION = '1.29';
our $DEBUG;
our $VERBOSE;
our $PRETTY;
@@ -435,11 +435,11 @@ my %msg;
}
my $lhs = join( '', @toks );
$transfmt{$header}{pat} =
- " s{^$lhs}\n {\Q$header\E}s\n\t&& return 1;\n";
+ " s^$lhs\Q$header\Es\n\t&& return 1;\n";
$transfmt{$header}{len} = $conlen;
} else {
$transfmt{$header}{pat} =
- " m{^\Q$header\E} && return 1;\n";
+ " m^\Q$header\E && return 1;\n";
$transfmt{$header}{len} = length( $header );
}
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 08d52c7ac6..31ce464f06 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -4928,6 +4928,17 @@ C<undef *foo>.
(A) You've accidentally run your script through B<csh> instead of Perl.
Check the #! line, or manually feed your script into Perl yourself.
+=item Unescaped left brace in regex is deprecated, passed through
+
+(D) You used a literal C<"{"> character in a regular expression pattern.
+You should change to use C<"\{"> instead, because a future version of
+Perl (tentatively v5.20) will consider this to be a syntax error. If
+the pattern delimiters are also braces, any matching right brace
+(C<"}">) should also be escaped to avoid confusing the parser, for
+example,
+
+ qr{abc\{def\}ghi}
+
=item unexec of %s into %s failed!
(F) The unexec() routine failed for some reason. See your local FSF
diff --git a/regcomp.c b/regcomp.c
index eefc2ccdbf..9f239ff0f4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9077,12 +9077,6 @@ tryagain:
vFAIL("Internal urp");
/* Supposed to be caught earlier. */
break;
- case '{':
- if (!regcurly(RExC_parse)) {
- RExC_parse++;
- goto defchar;
- }
- /* FALL THROUGH */
case '?':
case '+':
case '*':
@@ -9208,9 +9202,6 @@ tryagain:
ret = reg_node(pRExC_state, op);
FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= SIMPLE;
- if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
- ckWARNregdep(RExC_parse, "\"\\b{\" is deprecated; use \"\\b\\{\" instead");
- }
goto finish_meta_pat;
case 'B':
RExC_seen_zerolen++;
@@ -9235,9 +9226,6 @@ tryagain:
ret = reg_node(pRExC_state, op);
FLAGS(ret) = get_regex_charset(RExC_flags);
*flagp |= SIMPLE;
- if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') {
- ckWARNregdep(RExC_parse, "\"\\B{\" is deprecated; use \"\\B\\{\" instead");
- }
goto finish_meta_pat;
case 's':
switch (get_regex_charset(RExC_flags)) {
@@ -9744,15 +9732,22 @@ tryagain:
/* FALL THROUGH */
default:
if (!SIZE_ONLY&& isALPHA(*p)) {
- /* Include any { following the alpha to emphasize
- * that it could be part of an escape at some point
- * in the future */
- int len = (*(p + 1) == '{') ? 2 : 1;
- ckWARN3reg(p + len, "Unrecognized escape \\%.*s passed through", len, p);
+ ckWARN2reg(p + 1, "Unrecognized escape \\%.1s passed through", p);
}
goto normal_default;
}
break;
+ case '{':
+ /* Currently we don't warn when the lbrace is at the start
+ * of a construct. This catches it in the middle of a
+ * literal string, or when its the first thing after
+ * something like "\b" */
+ if (! SIZE_ONLY
+ && (len || (p > RExC_start && isALPHA_A(*(p -1)))))
+ {
+ ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
+ }
+ /*FALLTHROUGH*/
default:
normal_default:
if (UTF8_IS_START(*p) && UTF) {
diff --git a/t/comp/parser.t b/t/comp/parser.t
index 16b4a826d1..a369adb983 100644
--- a/t/comp/parser.t
+++ b/t/comp/parser.t
@@ -125,11 +125,11 @@ is( $@, '', 'PL_lex_brackstack' );
is("${a}[", "A[", "interpolation, qq//");
my @b=("B");
is("@{b}{", "B{", "interpolation, qq//");
- is(qr/${a}{/, '(?^:A{)', "interpolation, qr//");
+ is(qr/${a}\{/, '(?^:A\{)', "interpolation, qr//");
my $c = "A{";
- $c =~ /${a}{/;
+ $c =~ /${a}\{/;
is($&, 'A{', "interpolation, m//");
- $c =~ s/${a}{/foo/;
+ $c =~ s/${a}\{/foo/;
is($c, 'foo', "interpolation, s/...//");
$c =~ s/foo/${a}{/;
is($c, 'A{', "interpolation, s//.../");
diff --git a/t/io/open.t b/t/io/open.t
index 806639187f..6b1f1d7dc5 100644
--- a/t/io/open.t
+++ b/t/io/open.t
@@ -267,7 +267,7 @@ SKIP: {
open($fh1{k}, "TEST");
gimme($fh1{k});
- like($@, qr/<\$fh1{...}> line 1\./, "autoviv fh package helem");
+ like($@, qr/<\$fh1\{...}> line 1\./, "autoviv fh package helem");
my @fh2;
open($fh2[0], "TEST");
@@ -277,7 +277,7 @@ SKIP: {
my %fh3;
open($fh3{k}, "TEST");
gimme($fh3{k});
- like($@, qr/<\$fh3{...}> line 1\./, "autoviv fh lexical helem");
+ like($@, qr/<\$fh3\{...}> line 1\./, "autoviv fh lexical helem");
}
SKIP: {
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index b435d2a1da..1a2fb99b84 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -57,20 +57,24 @@ Unrecognized escape \m passed through in regex; marked by <-- HERE in m/a\m <--
use warnings 'regexp';
"foo" =~ /\q/;
"foo" =~ /\q{/;
+"foo" =~ /\w{/;
"foo" =~ /a\b{cde/;
"foo" =~ /a\B{cde/;
"bar" =~ /\_/;
no warnings 'regexp';
"foo" =~ /\q/;
"foo" =~ /\q{/;
+"foo" =~ /\w{/;
"foo" =~ /a\b{cde/;
"foo" =~ /a\B{cde/;
"bar" =~ /\_/;
EXPECT
Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE / at - line 4.
-Unrecognized escape \q{ passed through in regex; marked by <-- HERE in m/\q{ <-- HERE / at - line 5.
-"\b{" is deprecated; use "\b\{" instead in regex; marked by <-- HERE in m/a\ <-- HERE b{cde/ at - line 6.
-"\B{" is deprecated; use "\B\{" instead in regex; marked by <-- HERE in m/a\ <-- HERE B{cde/ at - line 7.
+Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE {/ at - line 5.
+Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/\q{ <-- HERE / at - line 5.
+Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/\w{ <-- HERE / at - line 6.
+Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/a\b{ <-- HERE cde/ at - line 7.
+Unescaped left brace in regex is deprecated, passed through in regex; marked by <-- HERE in m/a\B{ <-- HERE cde/ at - line 8.
########
# regcomp.c [S_regpposixcc S_checkposixcc]
#
diff --git a/t/op/taint.t b/t/op/taint.t
index 9cea74040c..a0949d33c2 100644
--- a/t/op/taint.t
+++ b/t/op/taint.t
@@ -152,7 +152,7 @@ my $TEST = 'TEST';
while (my $v = $vars[0]) {
local $ENV{$v} = $TAINT;
last if eval { `$echo 1` };
- last unless $@ =~ /^Insecure \$ENV{$v}/;
+ last unless $@ =~ /^Insecure \$ENV\{$v}/;
shift @vars;
}
is("@vars", "");
@@ -163,7 +163,7 @@ my $TEST = 'TEST';
is(eval { `$echo 1` }, "1\n");
$ENV{TERM} = 'e=mc2' . $TAINT;
is(eval { `$echo 1` }, undef);
- like($@, qr/^Insecure \$ENV{TERM}/);
+ like($@, qr/^Insecure \$ENV\{TERM}/);
}
my $tmp;
@@ -182,7 +182,7 @@ my $TEST = 'TEST';
local $ENV{PATH} = $tmp;
is(eval { `$echo 1` }, undef);
- like($@, qr/^Insecure directory in \$ENV{PATH}/);
+ like($@, qr/^Insecure directory in \$ENV\{PATH}/);
}
SKIP: {
@@ -190,14 +190,14 @@ my $TEST = 'TEST';
$ENV{'DCL$PATH'} = $TAINT;
is(eval { `$echo 1` }, undef);
- like($@, qr/^Insecure \$ENV{DCL\$PATH}/);
+ like($@, qr/^Insecure \$ENV\{DCL\$PATH}/);
SKIP: {
skip q[can't find world-writeable directory to test DCL$PATH], 2
unless $tmp;
$ENV{'DCL$PATH'} = $tmp;
is(eval { `$echo 1` }, undef);
- like($@, qr/^Insecure directory in \$ENV{DCL\$PATH}/);
+ like($@, qr/^Insecure directory in \$ENV\{DCL\$PATH}/);
}
$ENV{'DCL$PATH'} = '';
}
@@ -2112,7 +2112,7 @@ end
ok("A" =~ /\p{$prop}/, "user-defined property: non-tainted case");
$prop = "IsA$TAINT";
eval { "A" =~ /\p{$prop}/};
- like($@, qr/Insecure user-defined property \\p{main::IsA}/,
+ like($@, qr/Insecure user-defined property \\p\{main::IsA}/,
"user-defined property: tainted case");
}
diff --git a/t/re/pat.t b/t/re/pat.t
index b34e0930ab..9c29c9565b 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -152,7 +152,7 @@ sub run_tests {
{
$_ = 'now is the {time for all} good men to come to.';
- / {([^}]*)}/;
+ / \{([^}]*)}/;
is($1, 'time for all', "Match braces");
}
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index d82fcf1291..0d9fbbfae3 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -986,7 +986,7 @@ sub run_tests {
my $w;
local $SIG {__WARN__} = sub {$w .= "@_"};
eval 'q(xxWxx) =~ /[\N{WARN}]/';
- ok $w && $w =~ /Using just the first character returned by \\N{} in character class/,
+ ok $w && $w =~ /Using just the first character returned by \\N\{} in character class/,
"single character in [\\N{}] warning";
undef $w;
@@ -1137,6 +1137,7 @@ sub run_tests {
{
# \, breaks {3,4}
+ no warnings qw{deprecated regexp};
ok "xaaay" !~ /xa{3\,4}y/, '\, in a pattern';
ok "xa{3,4}y" =~ /xa{3\,4}y/, '\, in a pattern';