summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-06-09 21:25:46 -0600
committerKarl Williamson <khw@cpan.org>2016-06-17 12:45:20 -0600
commit8e84dec289732f088323c1bdc1e82f10841b362a (patch)
treecf8be93007ee95d578e0e4c1d0dd9bc857ea7557
parenta1399808f7d0c25a44b5677fd6cafda57e658955 (diff)
downloadperl-8e84dec289732f088323c1bdc1e82f10841b362a.tar.gz
Add missing deprecation message for unescaped '{' in regexes
The use of literal '{' without being escaped has been deprecated since 5.16, and warned on since 5.20. In 5.24, this has been made illegal, with a bunch of CPAN modules broken by it, in spite of the long deprecation period. See https://rt.perl.org/Ticket/Display.html?id=128139 Unfortunately, I overlooked a code path, and not all instances that should have warned did so in fact. This was spotted by Tom Wyant in https://rt.perl.org/Ticket/Display.html?id=128213 This commit adds that warning, and rewords the fatal one slightly, and clarifies the whole thing in perldiag.
-rw-r--r--pod/perldelta.pod13
-rw-r--r--pod/perldiag.pod117
-rw-r--r--regcomp.c11
-rw-r--r--t/re/reg_mesg.t19
4 files changed, 141 insertions, 19 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 1997cfa5f0..fe27320058 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -271,7 +271,12 @@ Perl yourself. The #! line at the top of your file could look like
=item *
-XXX L<message|perldiag/"message">
+L<Unescaped left brace in regex is deprecated here, passed through in regex; marked by S<<-- HERE> in mE<sol>%sE<sol>|perldiag/"Unescaped left brace in regex is deprecated here, passed through in regex; marked by S<<-- HERE> in m/%s/">
+
+Unescaped left braces are already illegal in some contexts in regular
+expression patterns, but, due to an oversight, no deprecation warning
+was raised in other contexts where they are intended to become illegal.
+This warning is now raised in these contexts.
=back
@@ -293,7 +298,11 @@ XXX Changes (i.e. rewording) of diagnostic messages go here
=item *
-XXX Describe change here
+L<Unescaped left brace in regex is illegal here in regex; marked by S<<-- HERE> in mE<sol>%sE<sol>|perldiag/"Unescaped left brace in regex is illegal here in regex; marked by S<<-- HERE> in m/%s/">
+
+The word "here" has been added to the message that was raised in
+v5.25.1. This is to indicate that there are contexts in which unescaped
+left braces are not (yet) illegal.
=back
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index c0899ff6de..4164baf833 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -6139,20 +6139,117 @@ C<undef *foo>.
(A) You've accidentally run your script through B<csh> instead of Perl.
Check the #! line, or manually feed your script into Perl yourself.
-=item Unescaped left brace in regex is illegal in regex;
+=item Unescaped left brace in regex is deprecated here, passed through in
+regex; marked by S<<-- HERE> in m/%s/
+
+(D deprecated, regexp) The simple rule to remember, if you want to
+match a literal C<"{"> character (U+007B C<LEFT CURLY BRACKET>) in a
+regular expression pattern, is to escape each literal instance of it in
+some way. Generally easiest is to precede it with a backslash, like
+C<"\{"> or enclose it in square brackets (C<"[{]">). If the pattern
+delimiters are also braces, any matching right brace (C<"}">) should
+also be escaped to avoid confusing the parser, for example,
+
+ qr{abc\{def\}ghi}
+
+Forcing literal C<"{"> characters to be escaped will enable the Perl
+language to be extended in various ways in future releases. To avoid
+needlessly breaking existing code, the restriction is is not enforced in
+contexts where there are unlikely to ever be extensions that could
+conflict with the use there of C<"{"> as a literal.
+
+In this release of Perl, some literal uses of C<"{"> are fatal, and some
+still just deprecated. This is because of an oversight: some uses of a
+literal C<"{"> that should have raised a deprecation warning starting in
+v5.20 did not warn until v5.26. By making the already-warned uses fatal
+now, some of the planned extensions can be made to the language sooner.
+
+The contexts where no warnings or errors are raised are:
+
+=over 4
+
+=item *
+
+as the first character in a pattern, or following C<"^"> indicating to
+anchor the match to the beginning of a line.
+
+=item *
+
+as the first character following a C<"|"> indicating alternation.
+
+=item *
+
+as the first character in a parenthesized grouping like
+
+ /foo({bar)/
+ /foo(?:{bar)/
+
+=item *
+
+as the first character following a quantifier
+
+ /\s*{/
+
+=back
+
+=for comment
+The text of the message above is duplicated below to allow splain (and
+'use diagnostics') to work. Since one is fatal, and one not, they can't
+be combined as one message. And since the non-fatal one is temporary,
+there's no real need to enhance perldiag to handle this transient case.
+
+=item Unescaped left brace in regex is illegal here in regex;
marked by S<<-- HERE> in m/%s/
-(F) You used a literal C<"{"> character in a regular
-expression pattern. You should change to use C<"\{"> or C<[{]> instead.
-If the pattern delimiters are also braces, any matching
-right brace (C<"}">) should also be escaped to avoid confusing the parser,
-for example,
+(F) The simple rule to remember, if you want to
+match a literal C<"{"> character (U+007B C<LEFT CURLY BRACKET>) in a
+regular expression pattern, is to escape each literal instance of it in
+some way. Generally easiest is to precede it with a backslash, like
+C<"\{"> or enclose it in square brackets (C<"[{]">). If the pattern
+delimiters are also braces, any matching right brace (C<"}">) should
+also be escaped to avoid confusing the parser, for example,
+
+ qr{abc\{def\}ghi}
+
+Forcing literal C<"{"> characters to be escaped will enable the Perl
+language to be extended in various ways in future releases. To avoid
+needlessly breaking existing code, the restriction is is not enforced in
+contexts where there are unlikely to ever be extensions that could
+conflict with the use there of C<"{"> as a literal.
+
+In this release of Perl, some literal uses of C<"{"> are fatal, and some
+still just deprecated. This is because of an oversight: some uses of a
+literal C<"{"> that should have raised a deprecation warning starting in
+v5.20 did not warn until v5.26. By making the already-warned uses fatal
+now, some of the planned extensions can be made to the language sooner.
+
+The contexts where no warnings or errors are raised are:
+
+=over 4
+
+=item *
- qr{abc\{def\}ghi}
+as the first character in a pattern, or following C<"^"> indicating to
+anchor the match to the beginning of a line.
-This restriction is not enforced if the C<"{"> is the first character in
-the pattern; nor is a warning generated for this case, as there are no
-current plans to forbid it.
+=item *
+
+as the first character following a C<"|"> indicating alternation.
+
+=item *
+
+as the first character in a parenthesized grouping like
+
+ /foo({bar)/
+ /foo(?:{bar)/
+
+=item *
+
+as the first character following a quantifier
+
+ /\s*{/
+
+=back
=item unexec of %s into %s failed!
diff --git a/regcomp.c b/regcomp.c
index 86173db3e0..2e7805768b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -13259,7 +13259,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* something like "\b" */
if (len || (p > RExC_start && isALPHA_A(*(p -1)))) {
RExC_parse = p + 1;
- vFAIL("Unescaped left brace in regex is illegal");
+ vFAIL("Unescaped left brace in regex is illegal here");
}
/*FALLTHROUGH*/
default: /* A literal character */
@@ -13664,8 +13664,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
RExC_parse = p - 1;
Set_Node_Cur_Length(ret, parse_start);
RExC_parse = p;
- skip_to_be_ignored_text(pRExC_state, &RExC_parse,
- FALSE /* Don't force to /x */ );
{
/* len is STRLEN which is unsigned, need to copy to signed */
IV iv = len;
@@ -13677,6 +13675,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
break;
} /* End of giant switch on input character */
+ /* Position parse to next real character */
+ skip_to_be_ignored_text(pRExC_state, &RExC_parse,
+ FALSE /* Don't force to /x */ );
+ if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) {
+ ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here, passed through");
+ }
+
return(ret);
}
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index ff200834a3..e3c11ba4cf 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -268,10 +268,11 @@ my @death =
'/(?[\ |!])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[\ |!{#}])/', # [perl #126180]
'/(?[()-!])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[()-!{#}])/', # [perl #126204]
'/(?[!()])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[!(){#}])/', # [perl #126404]
- '/\w{/' => 'Unescaped left brace in regex is illegal {#} m/\w{{#}/',
- '/\q{/' => 'Unescaped left brace in regex is illegal {#} m/\q{{#}/',
- '/:{4,a}/' => 'Unescaped left brace in regex is illegal {#} m/:{{#}4,a}/',
- '/xa{3\,4}y/' => 'Unescaped left brace in regex is illegal {#} m/xa{{#}3\,4}y/',
+ '/\w{/' => 'Unescaped left brace in regex is illegal here {#} m/\w{{#}/',
+ '/\q{/' => 'Unescaped left brace in regex is illegal here {#} m/\q{{#}/',
+ '/\A{/' => 'Unescaped left brace in regex is illegal here {#} m/\A{{#}/',
+ '/:{4,a}/' => 'Unescaped left brace in regex is illegal here {#} m/:{{#}4,a}/',
+ '/xa{3\,4}y/' => 'Unescaped left brace in regex is illegal here {#} m/xa{{#}3\,4}y/',
'/abc/xix' => 'Only one /x regex modifier is allowed',
'/(?xmsixp:abc)/' => 'Only one /x regex modifier is allowed {#} m/(?xmsixp{#}:abc)/',
'/(?xmsixp)abc/' => 'Only one /x regex modifier is allowed {#} m/(?xmsixp{#})abc/',
@@ -621,6 +622,16 @@ my @experimental_regex_sets = (
);
my @deprecated = (
+ '/^{/' => "",
+ '/foo|{/' => "",
+ '/foo|^{/' => "",
+ '/foo({bar)/' => "",
+ '/foo(:?{bar)/' => "",
+ '/\s*{/' => "",
+ '/a{3,4}{/' => "",
+ '/.{/' => 'Unescaped left brace in regex is deprecated here, passed through {#} m/.{{#}/',
+ '/[x]{/' => 'Unescaped left brace in regex is deprecated here, passed through {#} m/[x]{{#}/',
+ '/\p{Latin}{/' => 'Unescaped left brace in regex is deprecated here, passed through {#} m/\p{Latin}{{#}/',
);
for my $strict ("", "use re 'strict';") {