summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2012-03-12 20:50:02 +0000
committerDavid Mitchell <davem@iabyn.com>2012-06-13 13:32:49 +0100
commite4a2df84a6027f19dfb84f5057abfc4ff54618b6 (patch)
tree43bd1077ff5de9117a80eadb80d709e92f646185 /toke.c
parenta5ae69f0baa6e5bcbc8ca09d9e0c16b89ca0fdae (diff)
downloadperl-e4a2df84a6027f19dfb84f5057abfc4ff54618b6.tar.gz
improve skipping of regex [..] char class in toker
Recently S_scan_const() was enhanced to know when it was within a [...] of a regex, so that it could ignore anything that looked like a code block within it; i.e. [(?{...] wasn't misinterpreted as the start of a (?{...}) However, the code was too simplistic, and didn't handle \[ and \] escapes well enough; e.g. are these char classes or not?: \\\[abc\\\] \\\\[abc\\\\]
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c22
1 files changed, 17 insertions, 5 deletions
diff --git a/toke.c b/toke.c
index 0570a15732..6cde83cc0c 100644
--- a/toke.c
+++ b/toke.c
@@ -2862,11 +2862,23 @@ S_scan_const(pTHX_ char *start)
/* if we get here, we're not doing a transliteration */
- else if (in_charclass && *s == ']' && ! (s>start+1 && s[-1] == '\\'))
- in_charclass = FALSE;
-
- else if (PL_lex_inpat && *s == '[')
- in_charclass = TRUE;
+ else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = TRUE;
+ }
+
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
/* skip for regexp comments /(?#comment)/, except for the last
* char, which will be done separately.