summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-06-12 21:50:29 -0600
committerKarl Williamson <khw@cpan.org>2014-06-12 22:32:32 -0600
commit412f55bbce575aecc79b1ca79fd2856893dd8738 (patch)
treea033fdcf6ed906fa1b4170fea5d491e635d32724 /regcomp.c
parent4a7e65afe24af2e709b485d8bb4a67fe3d047ada (diff)
downloadperl-412f55bbce575aecc79b1ca79fd2856893dd8738.tar.gz
Deprecate unescaped literal "{" in regex patterns
This commit also causes escaped (by a backslash) "(", "[", and "{" to be considered literally. In the previous 2 Perl versions, the escaping was ignored, and a (default-on) deprecation warning was raised. Now that we have warned for 2 release cycles, we can change the meaning.of escaping to actually do something Warning when a literal left brace is not escaped by a backslash, will allow us to eventually use this character in more contexts as being meta, allowing us to extend the language. For example, the lower limit of a quantifier could be omited, and better error checking instituted, or things like \w could be followed by a {...} indicating some special word character, like \w{Greek} to restrict to just Greek word characters. We tried to do this in v5.16, and many CPAN modules changed to backslash their left braces at that time. However we had to back out that change before 5.16 shipped because it turned out that escaping a left brace in some contexts didn't work, namely when the brace would normally be a metacharacter (for example surrounding a quantifier), and the pattern delimiters were { }. Instead we raised the useless backslash warning mentioned above, which has now been there for the requisite 2 cycles. This patch partially reverts 2 patches. The first, e62d0b1335a7959680be5f7e56910067d6f33c1f, partially reverted the deprecation of unescaped literal left brace. The other, 4d68ffa0f7f345bc1ae6751744518ba4bc3859bd, instituted the deprecation of the useless left-characters. Note that, as in the original attempt to deprecate, we don't raise a warning if the left brace is the first character in the pattern. This is because in that position it can't be a metacharacter, so we don't require any disambiguation, and we found that if we did raise an error, there were quite a few places where this occurred.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c24
1 files changed, 14 insertions, 10 deletions
diff --git a/regcomp.c b/regcomp.c
index b2c8ed4461..205c840b42 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -225,7 +225,7 @@ struct RExC_state_t {
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
- ((*s) == '{' && regcurly(s, FALSE)))
+ ((*s) == '{' && regcurly(s)))
/*
* Flags to be passed up and down.
@@ -10484,7 +10484,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
op = *RExC_parse;
- if (op == '{' && regcurly(RExC_parse, FALSE)) {
+ if (op == '{' && regcurly(RExC_parse)) {
maxpos = NULL;
#ifdef RE_TRACK_PATTERN_OFFSETS
parse_start = RExC_parse; /* MJD */
@@ -10760,7 +10760,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
/* Disambiguate between \N meaning a named character versus \N meaning
* [^\n]. The former is assumed when it can't be the latter. */
- if (*p != '{' || regcurly(p, FALSE)) {
+ if (*p != '{' || regcurly(p)) {
RExC_parse = p;
if (! node_p) {
/* no bare \N allowed in a charclass */
@@ -11341,12 +11341,6 @@ tryagain:
vFAIL("Internal urp");
/* Supposed to be caught earlier. */
break;
- case '{':
- if (!regcurly(RExC_parse, FALSE)) {
- RExC_parse++;
- goto defchar;
- }
- /* FALLTHROUGH */
case '?':
case '+':
case '*':
@@ -12028,8 +12022,18 @@ tryagain:
goto normal_default;
} /* End of switch on '\' */
break;
+ case '{':
+ /* Currently we don't warn when the lbrace is at the start
+ * of a construct. This catches it in the middle of a
+ * literal string, or when its the first thing after
+ * something like "\b" */
+ if (! SIZE_ONLY
+ && (len || (p > RExC_start && isALPHA_A(*(p -1)))))
+ {
+ ckWARNregdep(p + 1, "Unescaped left brace in regex is deprecated, passed through");
+ }
+ /*FALLTHROUGH*/
default: /* A literal character */
-
normal_default:
if (UTF8_IS_START(*p) && UTF) {
STRLEN numlen;