summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2014-02-18 18:29:14 +0000
committerRicardo Signes <rjbs@cpan.org>2014-03-07 19:05:11 -0500
commit1f621a8560b56ce47c6d6f706f31b836437b3ea5 (patch)
tree1dd7c860ec4859d017b980de76968c639d4be417
parent43c6e0a7ba1950c4a64b59be5d0a9cd7b1807cca (diff)
downloadperl-1f621a8560b56ce47c6d6f706f31b836437b3ea5.tar.gz
RT #119125: fix two issues with/[#]/x
(This is a maint-specific patch, not a cherry-pick from blead) A hash within a character class in an expanded pattern is an odd beast. It is handled twice, first by the perl toker, which is looking for things like embedded variables that need interpolating, and second by the regex parser. The toker only has limited knowledge of regex syntax, and struggles to work out for things like /#$foo/x and /[#$foo]/x, whether that's a regex comment and so whether '$foo' is part of the comment string or a variable to be interpolated. Up until 5.18.0 inclusive it got very confused when the '#' was within a character class, and usually got it wrong. 5.18.0 also introduced the additional complication that (?{}) code-blocks were now normally handled by the perl toker rather than by the regex parser. A side-effect of this was that if for any reason the toker didn't spot a code block (because it erroneously thought it was part of regex comment for example), then the literal code block text would be passed through uncompiled to the regex parser, which would then refuse to compile unless "use re eval" was in scope. Al these problems have been fixed in blead. However, the fixes couldn't be fully back-ported to maint, since there was a fair bit of code on CPAN that would (erroneously) do things like /[#$^]/ which the author expected to match one three special characters, and indeed does on on older perls. On bleed however, this (correctly) expands to /[#STDOUT_TOP]/ (based on what $^ is currently set to). So we decided to keep the old (broken) behaviour on maint. These fixes and half-fixes were included in 5.18.2. However, it turns out that 5.18.2 still has a couple of issues, one of which is a regression from 5.16.x. The table below shows the behaviours of certain regex constructs under various flavours of perl. "5.18.3" represents the changes included in this commit, and the entries marked "*******" represent changes in behaviour since 5.18.2 (i.e. they are what this commit fixes). /[#$b]/x 5.16.3 - $b not expanded 5.18.0 - $b not expanded 5.18.2 - $b not expanded - keep bug for backwards compatibility 5.18.3 - $b not expanded - keep bug for backwards compatibility blead - $b expanded /[#]$c/x 5.16.3 - $c not expanded 5.18.0 - $c not expanded 5.18.2 - $c not expanded 5.18.3 - $c expanded ******* blead - $c expanded /[#] (?{})/x # i.e. this pattern includes a literal newline 5.16.3 - re eval not needed 5.18.0 - re eval needed 5.18.1 - re eval needed 5.18.2 - re eval needed 5.18.3 - re eval not needed ******* blead - re eval not needed
-rw-r--r--t/re/pat.t10
-rw-r--r--toke.c38
2 files changed, 33 insertions, 15 deletions
diff --git a/t/re/pat.t b/t/re/pat.t
index edb78ca19e..7ee66bf840 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -20,7 +20,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 672; # Update this when adding/deleting tests.
+plan tests => 674; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1399,6 +1399,14 @@ EOP
$s = 'abcd$%#&';
$s =~ s/[a#$b%]/X/gx;
is ($s, 'XXcdXXX&', 'RT #119125 with /x');
+
+ $s = 'abYcd$Y#Y&';
+ my $c = 'Y';
+ $s =~ s/[#$b]$c/X/gx;
+ is ($s, 'aXcdXX&', 'RT #119125 with /x and trailing var');
+
+ ok("a#b" =~ /a[#]
+ b(?{})/x, 'RT #119125 with newline and codeblock');
}
} # End of sub run_tests
diff --git a/toke.c b/toke.c
index fc53c7978e..533f67f7d2 100644
--- a/toke.c
+++ b/toke.c
@@ -3178,22 +3178,32 @@ S_scan_const(pTHX_ char *start)
/* likewise skip #-initiated comments in //x patterns */
else if (*s == '#' && PL_lex_inpat &&
- ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
- while (s+1 < send && *s != '\n') {
+ ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
+ {
+ if (in_charclass) {
/* for maint-5.18, half-fix #-in-charclass bug:
- * *do* recognise codeblocks: /[#](?{})/
- * *don't* recognise interpolated vars: /[#$x]/
- */
- if (in_charclass && !PL_lex_casemods && s+3 < send &&
- s[0] == '(' &&
- s[1] == '?' &&
- ( s[2] == '{'
- || (s[2] == '?' && s[3] == '{')))
- break;
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ * strictly speaking, #-in-charclass has no special
+ * meaning; however, for backwards compatibility,
+ * ignore $variables etc for the rest of the charclass
+ * scope */
+ while (in_charclass && s+1 < send && *s != '\n') {
+ if (*s == ']') {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
+ *d++ = *s++;
+ }
+ continue;
+ }
+ else {
+ /* normal /...#.../x; skipt to end of line */
+ while (s+1 < send && *s != '\n')
+ *d++ = *s++;
}
- if (s+ 1 < send && *s != '\n')
- break; /* we stopped on (?{}), not EOL */
}
/* no further processing of single-quoted regex */