diff options
author | Karl Williamson <khw@cpan.org> | 2016-06-09 21:25:46 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-06-17 12:45:20 -0600 |
commit | 8e84dec289732f088323c1bdc1e82f10841b362a (patch) | |
tree | cf8be93007ee95d578e0e4c1d0dd9bc857ea7557 | |
parent | a1399808f7d0c25a44b5677fd6cafda57e658955 (diff) | |
download | perl-8e84dec289732f088323c1bdc1e82f10841b362a.tar.gz |
Add missing deprecation message for unescaped '{' in regexes
The use of literal '{' without being escaped has been deprecated since
5.16, and warned on since 5.20. In 5.24, this has been made illegal,
with a bunch of CPAN modules broken by it, in spite of the long
deprecation period. See
https://rt.perl.org/Ticket/Display.html?id=128139
Unfortunately, I overlooked a code path, and not all instances that
should have warned did so in fact. This was spotted by Tom Wyant in
https://rt.perl.org/Ticket/Display.html?id=128213
This commit adds that warning, and rewords the fatal one slightly, and
clarifies the whole thing in perldiag.
-rw-r--r-- | pod/perldelta.pod | 13 | ||||
-rw-r--r-- | pod/perldiag.pod | 117 | ||||
-rw-r--r-- | regcomp.c | 11 | ||||
-rw-r--r-- | t/re/reg_mesg.t | 19 |
4 files changed, 141 insertions, 19 deletions
diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 1997cfa5f0..fe27320058 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -271,7 +271,12 @@ Perl yourself. The #! line at the top of your file could look like =item * -XXX L<message|perldiag/"message"> +L<Unescaped left brace in regex is deprecated here, passed through in regex; marked by S<<-- HERE> in mE<sol>%sE<sol>|perldiag/"Unescaped left brace in regex is deprecated here, passed through in regex; marked by S<<-- HERE> in m/%s/"> + +Unescaped left braces are already illegal in some contexts in regular +expression patterns, but, due to an oversight, no deprecation warning +was raised in other contexts where they are intended to become illegal. +This warning is now raised in these contexts. =back @@ -293,7 +298,11 @@ XXX Changes (i.e. rewording) of diagnostic messages go here =item * -XXX Describe change here +L<Unescaped left brace in regex is illegal here in regex; marked by S<<-- HERE> in mE<sol>%sE<sol>|perldiag/"Unescaped left brace in regex is illegal here in regex; marked by S<<-- HERE> in m/%s/"> + +The word "here" has been added to the message that was raised in +v5.25.1. This is to indicate that there are contexts in which unescaped +left braces are not (yet) illegal. =back diff --git a/pod/perldiag.pod b/pod/perldiag.pod index c0899ff6de..4164baf833 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -6139,20 +6139,117 @@ C<undef *foo>. (A) You've accidentally run your script through B<csh> instead of Perl. Check the #! line, or manually feed your script into Perl yourself. -=item Unescaped left brace in regex is illegal in regex; +=item Unescaped left brace in regex is deprecated here, passed through in +regex; marked by S<<-- HERE> in m/%s/ + +(D deprecated, regexp) The simple rule to remember, if you want to +match a literal C<"{"> character (U+007B C<LEFT CURLY BRACKET>) in a +regular expression pattern, is to escape each literal instance of it in +some way. Generally easiest is to precede it with a backslash, like +C<"\{"> or enclose it in square brackets (C<"[{]">). If the pattern +delimiters are also braces, any matching right brace (C<"}">) should +also be escaped to avoid confusing the parser, for example, + + qr{abc\{def\}ghi} + +Forcing literal C<"{"> characters to be escaped will enable the Perl +language to be extended in various ways in future releases. To avoid +needlessly breaking existing code, the restriction is is not enforced in +contexts where there are unlikely to ever be extensions that could +conflict with the use there of C<"{"> as a literal. + +In this release of Perl, some literal uses of C<"{"> are fatal, and some +still just deprecated. This is because of an oversight: some uses of a +literal C<"{"> that should have raised a deprecation warning starting in +v5.20 did not warn until v5.26. By making the already-warned uses fatal +now, some of the planned extensions can be made to the language sooner. + +The contexts where no warnings or errors are raised are: + +=over 4 + +=item * + +as the first character in a pattern, or following C<"^"> indicating to +anchor the match to the beginning of a line. + +=item * + +as the first character following a C<"|"> indicating alternation. + +=item * + +as the first character in a parenthesized grouping like + + /foo({bar)/ + /foo(?:{bar)/ + +=item * + +as the first character following a quantifier + + /\s*{/ + +=back + +=for comment +The text of the message above is duplicated below to allow splain (and +'use diagnostics') to work. Since one is fatal, and one not, they can't +be combined as one message. And since the non-fatal one is temporary, +there's no real need to enhance perldiag to handle this transient case. + +=item Unescaped left brace in regex is illegal here in regex; marked by S<<-- HERE> in m/%s/ -(F) You used a literal C<"{"> character in a regular -expression pattern. You should change to use C<"\{"> or C<[{]> instead. -If the pattern delimiters are also braces, any matching -right brace (C<"}">) should also be escaped to avoid confusing the parser, -for example, +(F) The simple rule to remember, if you want to +match a literal C<"{"> character (U+007B C<LEFT CURLY BRACKET>) in a +regular expression pattern, is to escape each literal instance of it in +some way. Generally easiest is to precede it with a backslash, like +C<"\{"> or enclose it in square brackets (C<"[{]">). If the pattern +delimiters are also braces, any matching right brace (C<"}">) should +also be escaped to avoid confusing the parser, for example, + + qr{abc\{def\}ghi} + +Forcing literal C<"{"> characters to be escaped will enable the Perl +language to be extended in various ways in future releases. To avoid +needlessly breaking existing code, the restriction is is not enforced in +contexts where there are unlikely to ever be extensions that could +conflict with the use there of C<"{"> as a literal. + +In this release of Perl, some literal uses of C<"{"> are fatal, and some +still just deprecated. This is because of an oversight: some uses of a +literal C<"{"> that should have raised a deprecation warning starting in +v5.20 did not warn until v5.26. By making the already-warned uses fatal +now, some of the planned extensions can be made to the language sooner. + +The contexts where no warnings or errors are raised are: + +=over 4 + +=item * - qr{abc\{def\}ghi} +as the first character in a pattern, or following C<"^"> indicating to +anchor the match to the beginning of a line. -This restriction is not enforced if the C<"{"> is the first character in -the pattern; nor is a warning generated for this case, as there are no -current plans to forbid it. +=item * + +as the first character following a C<"|"> indicating alternation. + +=item * + +as the first character in a parenthesized grouping like + + /foo({bar)/ + /foo(?:{bar)/ + +=item * + +as the first character following a quantifier + + /\s*{/ + +=back =item unexec of %s into %s failed! @@ -13259,7 +13259,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * something like "\b" */ if (len || (p > RExC_start && isALPHA_A(*(p -1)))) { RExC_parse = p + 1; - vFAIL("Unescaped left brace in regex is illegal"); + vFAIL("Unescaped left brace in regex is illegal here"); } /*FALLTHROUGH*/ default: /* A literal character */ @@ -13664,8 +13664,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) RExC_parse = p - 1; Set_Node_Cur_Length(ret, parse_start); RExC_parse = p; - skip_to_be_ignored_text(pRExC_state, &RExC_parse, - FALSE /* Don't force to /x */ ); { /* len is STRLEN which is unsigned, need to copy to signed */ IV iv = len; @@ -13677,6 +13675,13 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) break; } /* End of giant switch on input character */ + /* Position parse to next real character */ + skip_to_be_ignored_text(pRExC_state, &RExC_parse, + FALSE /* Don't force to /x */ ); + if (PASS2 && *RExC_parse == '{' && OP(ret) != SBOL && ! regcurly(RExC_parse)) { + ckWARNregdep(RExC_parse + 1, "Unescaped left brace in regex is deprecated here, passed through"); + } + return(ret); } diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index ff200834a3..e3c11ba4cf 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -268,10 +268,11 @@ my @death = '/(?[\ |!])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[\ |!{#}])/', # [perl #126180] '/(?[()-!])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[()-!{#}])/', # [perl #126204] '/(?[!()])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[!(){#}])/', # [perl #126404] - '/\w{/' => 'Unescaped left brace in regex is illegal {#} m/\w{{#}/', - '/\q{/' => 'Unescaped left brace in regex is illegal {#} m/\q{{#}/', - '/:{4,a}/' => 'Unescaped left brace in regex is illegal {#} m/:{{#}4,a}/', - '/xa{3\,4}y/' => 'Unescaped left brace in regex is illegal {#} m/xa{{#}3\,4}y/', + '/\w{/' => 'Unescaped left brace in regex is illegal here {#} m/\w{{#}/', + '/\q{/' => 'Unescaped left brace in regex is illegal here {#} m/\q{{#}/', + '/\A{/' => 'Unescaped left brace in regex is illegal here {#} m/\A{{#}/', + '/:{4,a}/' => 'Unescaped left brace in regex is illegal here {#} m/:{{#}4,a}/', + '/xa{3\,4}y/' => 'Unescaped left brace in regex is illegal here {#} m/xa{{#}3\,4}y/', '/abc/xix' => 'Only one /x regex modifier is allowed', '/(?xmsixp:abc)/' => 'Only one /x regex modifier is allowed {#} m/(?xmsixp{#}:abc)/', '/(?xmsixp)abc/' => 'Only one /x regex modifier is allowed {#} m/(?xmsixp{#})abc/', @@ -621,6 +622,16 @@ my @experimental_regex_sets = ( ); my @deprecated = ( + '/^{/' => "", + '/foo|{/' => "", + '/foo|^{/' => "", + '/foo({bar)/' => "", + '/foo(:?{bar)/' => "", + '/\s*{/' => "", + '/a{3,4}{/' => "", + '/.{/' => 'Unescaped left brace in regex is deprecated here, passed through {#} m/.{{#}/', + '/[x]{/' => 'Unescaped left brace in regex is deprecated here, passed through {#} m/[x]{{#}/', + '/\p{Latin}{/' => 'Unescaped left brace in regex is deprecated here, passed through {#} m/\p{Latin}{{#}/', ); for my $strict ("", "use re 'strict';") { |