diff options
-rw-r--r-- | pod/perlop.pod | 10 | ||||
-rwxr-xr-x | t/op/pat.t | 30 | ||||
-rwxr-xr-x | t/op/subst.t | 12 | ||||
-rwxr-xr-x | t/op/tr.t | 12 | ||||
-rw-r--r-- | toke.c | 11 |
5 files changed, 63 insertions, 12 deletions
diff --git a/pod/perlop.pod b/pod/perlop.pod index 8e57c39514..f69b8bbc97 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1983,7 +1983,7 @@ C<(>, C<)>, C<->, C<n>, C<r>, C<t>, C<f>, C<e>, C<a>, C<x>, C<c>, C<z>, digits (C<0> to C<9>), C<[>, C<{>, C<]>, C<}>, whitespaces (SPACE, TAB, LF, CR, FF, and VT in addition), and C<#>. As C<\c> is skipped at this step, C<@> of C<\c@> in RE is possibly -treated as an array symbol (for example one of C<@foo> or C<@->), +treated as an array symbol (for example C<@foo>), even though the same text in C<qq//> gives interpolation of C<\c@>. Note that C<\N{name}> is interpolated at this step. @@ -1992,10 +1992,10 @@ a C<#>-comment in a C<//x>-regular expression, no processing is performed whatsoever. This is the first step at which the presence of the C<//x> modifier is relevant. -Interpolation has several quirks: C<$|>, C<$(>, and C<$)> are not -interpolated, and constructs C<$var[SOMETHING]> are voted (by several -different estimators) to be either an array element or C<$var> -followed by an RE alternative. This is where the notation +Interpolation in patterns has several quirks: C<$|>, C<$(>, C<$)>, C<@+> +and C<@-> are not interpolated, and constructs C<$var[SOMETHING]> are +voted (by several different estimators) to be either an array element +or C<$var> followed by an RE alternative. This is where the notation C<${arr[$bar]}> comes handy: C</${arr[0-9]}/> is interpreted as array element C<-9>, not as a regular expression from the variable C<$arr> followed by a digit, which would be the interpretation of diff --git a/t/op/pat.t b/t/op/pat.t index 0de38e14df..f0f1b2bbb7 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -7,7 +7,7 @@ $| = 1; # please update note at bottom of file when you change this -print "1..1212\n"; +print "1..1231\n"; BEGIN { chdir 't' if -d 't'; @@ -3546,9 +3546,35 @@ if ($ordA == 193) { ok(defined($res) && length($res)==$size,"\$1 is correct size"); } +{ # related to [perl #27940] + ok("\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'); + ok("\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'); + ok("X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'); + ok("X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'); + + ok("X\0A" =~ /X\c@?A/, '\c@?'); + ok("X\0A" =~ /X\c@*A/, '\c@*'); + ok("X\0A" =~ /X\c@(A)/, '\c@('); + ok("X\0A" =~ /X(\c@)A/, '\c@)'); + ok("X\0A" =~ /X\c@|ZA/, '\c@|'); + + ok("X\@A" =~ /X@?A/, '@?'); + ok("X\@A" =~ /X@*A/, '@*'); + ok("X\@A" =~ /X@(A)/, '@('); + ok("X\@A" =~ /X(@)A/, '@)'); + ok("X\@A" =~ /X@|ZA/, '@|'); + + local $" = ','; # non-whitespace and non-RE-specific + ok('abc' =~ /(.)(.)(.)/, 'the last successful match is bogus'); + ok("A@+B" =~ /A@{+}B/, 'interpolation of @+ in /@{+}/'); + ok("A@-B" =~ /A@{-}B/, 'interpolation of @- in /@{-}/'); + ok("A@+B" =~ /A@{+}B/x, 'interpolation of @+ in /@{+}/x'); + ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x'); +} + # Keep the following test last -- it may crash perl ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274") or print "# Unexpected outcome: should pass or crash perl\n"; -# last test 1211 +# last test 1231 diff --git a/t/op/subst.t b/t/op/subst.t index bd481e47b3..0b02ff93f4 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -7,7 +7,7 @@ BEGIN { } require './test.pl'; -plan( tests => 131 ); +plan( tests => 133 ); $x = 'foo'; $_ = "x"; @@ -553,3 +553,13 @@ is($name, "cis", q[#22351 bug with 'e' substitution modifier]); } +{ # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not + my $c; + + ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g; + is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g"); + + ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g; + is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g"); +} + @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 116; +plan tests => 118; my $Is_EBCDIC = (ord('i') == 0x89 & ord('J') == 0xd1); @@ -455,3 +455,13 @@ is($s, "AxBC", "utf8, DELETE"); } # non-characters end +{ # related to [perl #27940] + my $c; + + ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d; + is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d"); + + ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d; + is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d"); +} + @@ -1994,9 +1994,14 @@ S_scan_const(pTHX_ char *start) /* check for embedded arrays (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) */ - else if (*s == '@' && s[1] - && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1]))) - break; + else if (*s == '@' && s[1]) { + if (isALNUM_lazy_if(s+1,UTF)) + break; + if (strchr(":'{$", s[1])) + break; + if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) + break; /* in regexp, neither @+ nor @- are interpolated */ + } /* check for embedded scalars. only stop if we're sure it's a variable. |