diff options
author | Karl Williamson <khw@cpan.org> | 2014-06-12 21:50:29 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-06-12 22:32:32 -0600 |
commit | 412f55bbce575aecc79b1ca79fd2856893dd8738 (patch) | |
tree | a033fdcf6ed906fa1b4170fea5d491e635d32724 /regcomp.c | |
parent | 4a7e65afe24af2e709b485d8bb4a67fe3d047ada (diff) | |
download | perl-412f55bbce575aecc79b1ca79fd2856893dd8738.tar.gz |
Deprecate unescaped literal "{" in regex patterns
This commit also causes escaped (by a backslash) "(", "[", and "{" to be
considered literally. In the previous 2 Perl versions, the escaping was
ignored, and a (default-on) deprecation warning was raised. Now that we
have warned for 2 release cycles, we can change the meaning.of escaping
to actually do something
Warning when a literal left brace is not escaped by a backslash, will
allow us to eventually use this character in more contexts as being
meta, allowing us to extend the language. For example, the lower limit
of a quantifier could be omited, and better error checking instituted,
or things like \w could be followed by a {...} indicating some special
word character, like \w{Greek} to restrict to just Greek word
characters.
We tried to do this in v5.16, and many CPAN modules changed to backslash
their left braces at that time. However we had to back out that change
before 5.16 shipped because it turned out that escaping a left brace in
some contexts didn't work, namely when the brace would normally be a
metacharacter (for example surrounding a quantifier), and the pattern
delimiters were { }. Instead we raised the useless backslash warning
mentioned above, which has now been there for the requisite 2 cycles.
This patch partially reverts 2 patches. The first,
e62d0b1335a7959680be5f7e56910067d6f33c1f, partially reverted
the deprecation of unescaped literal left brace. The other,
4d68ffa0f7f345bc1ae6751744518ba4bc3859bd, instituted the deprecation of
the useless left-characters.
Note that, as in the original attempt to deprecate, we don't raise a
warning if the left brace is the first character in the pattern. This
is because in that position it can't be a metacharacter, so we don't
require any disambiguation, and we found that if we did raise an error,
there were quite a few places where this occurred.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 24 |
1 files changed, 14 insertions, 10 deletions
@@ -225,7 +225,7 @@ struct RExC_state_t { #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ - ((*s) == '{' && regcurly(s, FALSE))) + ((*s) == '{' && regcurly(s))) /* * Flags to be passed up and down. @@ -10484,7 +10484,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) op = *RExC_parse; - if (op == '{' && regcurly(RExC_parse, FALSE)) { + if (op == '{' && regcurly(RExC_parse)) { maxpos = NULL; #ifdef RE_TRACK_PATTERN_OFFSETS parse_start = RExC_parse; /* MJD */ @@ -10760,7 +10760,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, /* Disambiguate between \N meaning a named character versus \N meaning * [^\n]. The former is assumed when it can't be the latter. */ - if (*p != '{' || regcurly(p, FALSE)) { + if (*p != '{' || regcurly(p)) { RExC_parse = p; if (! node_p) { /* no bare \N allowed in a charclass */ @@ -11341,12 +11341,6 @@ tryagain: vFAIL("Internal urp"); /* Supposed to be caught earlier. */ break; - case '{': - if (!regcurly(RExC_parse, FALSE)) { - RExC_parse++; - goto defchar; - } - /* FALLTHROUGH */ case '?': case '+': case '*': @@ -12028,8 +12022,18 @@ tryagain: goto normal_default; } /* End of switch on '\' */ break; + case '{': + /* Currently we don't warn when the lbrace is at the start + * of a construct. This catches it in the middle of a + * literal string, or when its the first thing after + * something like "\b" */ + if (! SIZE_ONLY + && (len || (p > RExC_start && isALPHA_A(*(p -1))))) + { + ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through"); + } + /*FALLTHROUGH*/ default: /* A literal character */ - normal_default: if (UTF8_IS_START(*p) && UTF) { STRLEN numlen; |