summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSADAHIRO Tomoyuki <BQW10602@nifty.com>2006-10-16 07:23:00 +0900
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2006-10-16 12:55:28 +0000
commitcc74c5bd822c38bbfaf542cfef5f7219e05e467e (patch)
tree4f4159b49a0ac7d05fce0bfc8d7feb0c93411e54
parent9911cee9a9c011ce0c7f2203e6247489dafc24ae (diff)
downloadperl-cc74c5bd822c38bbfaf542cfef5f7219e05e467e.tar.gz
remove leaveit from toke.c:scan_const
Message-Id: <20061015222223.BC38.BQW10602@nifty.com> p4raw-id: //depot/perl@29026
-rw-r--r--pod/perlop.pod16
-rw-r--r--t/lib/warnings/regcomp11
-rw-r--r--t/lib/warnings/toke11
-rwxr-xr-xt/op/pat.t28
-rw-r--r--t/op/regmesg.t9
-rw-r--r--toke.c18
6 files changed, 52 insertions, 41 deletions
diff --git a/pod/perlop.pod b/pod/perlop.pod
index dcd537b529..205556cfd5 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1972,16 +1972,12 @@ is emitted if the C<use warnings> pragma or the B<-w> command-line flag
=item C<RE> in C<?RE?>, C</RE/>, C<m/RE/>, C<s/RE/foo/>,
-Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l>, and interpolation
-happens (almost) as with C<qq//> constructs.
-
-However combinations of C<\> followed by RE-special chars are not
-substituted but only skipped. The full list of RE-special chars is
-C<\>, C<.>, C<^>, C<$>, C<@>, C<A>, C<G>, C<Z>, C<d>, C<D>, C<w>, C<W>,
-C<s>, C<S>, C<b>, C<B>, C<p>, C<P>, C<X>, C<C>, C<+>, C<*>, C<?>, C<|>,
-C<(>, C<)>, C<->, C<N>, C<n>, C<r>, C<t>, C<f>, C<e>, C<a>, C<x>, C<c>,
-C<z>, digits (C<0> to C<9>), C<[>, C<{>, C<]>, C<}>, whitespaces
-(SPACE, TAB, LF, CR, FF, and VT in addition), and C<#>.
+Processing of C<\Q>, C<\U>, C<\u>, C<\L>, C<\l>, C<\E>,
+and interpolation happens (almost) as with C<qq//> constructs.
+
+However any other combinations of C<\> followed by a character
+are not substituted but only skipped, in order to parse them
+as regular expressions at the following step.
As C<\c> is skipped at this step, C<@> of C<\c@> in RE is possibly
treated as an array symbol (for example C<@foo>),
even though the same text in C<qq//> gives interpolation of C<\c@>.
diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp
index 4982016594..6818c6233c 100644
--- a/t/lib/warnings/regcomp
+++ b/t/lib/warnings/regcomp
@@ -56,6 +56,17 @@ $a =~ /a$x/ ;
EXPECT
Unrecognized escape \m passed through in regex; marked by <-- HERE in m/a\m <-- HERE / at - line 4.
########
+# regcomp.c [S_regatom]
+# The \q should warn, the \_ should NOT warn.
+use warnings 'regexp';
+"foo" =~ /\q/;
+"bar" =~ /\_/;
+no warnings 'regexp';
+"foo" =~ /\q/;
+"bar" =~ /\_/;
+EXPECT
+Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE / at - line 4.
+########
# regcomp.c [S_regpposixcc S_checkposixcc]
#
use warnings 'regexp' ;
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke
index 5c44df7496..e4fa82c1e0 100644
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -737,17 +737,6 @@ EXPECT
Possible unintended interpolation of @mjd_previously_unused_array in string at - line 3.
########
# toke.c
-# The \q should warn, the \_ should NOT warn.
-use warnings 'misc';
-"foo" =~ /\q/;
-"bar" =~ /\_/;
-no warnings 'misc';
-"foo" =~ /\q/;
-"bar" =~ /\_/;
-EXPECT
-Unrecognized escape \q passed through at - line 4.
-########
-# toke.c
# 20020328 mjd-perl-patch+@plover.com at behest of jfriedl@yahoo.com
use warnings 'regexp';
"foo" =~ /foo/c;
diff --git a/t/op/pat.t b/t/op/pat.t
index a6ea46c8f5..9bd655312f 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -476,27 +476,27 @@ print "not " unless $^R eq '79' and $x eq '12';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/i eq '(?i-xsm:\bv$)';
+print "not " unless qr/\b\v$/i eq '(?i-xsm:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/s eq '(?s-xim:\bv$)';
+print "not " unless qr/\b\v$/s eq '(?s-xim:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/m eq '(?m-xis:\bv$)';
+print "not " unless qr/\b\v$/m eq '(?m-xis:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/x eq '(?x-ism:\bv$)';
+print "not " unless qr/\b\v$/x eq '(?x-ism:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/xism eq '(?msix:\bv$)';
+print "not " unless qr/\b\v$/xism eq '(?msix:\b\v$)';
print "ok $test\n";
$test++;
-print "not " unless qr/\b\v$/ eq '(?-xism:\bv$)';
+print "not " unless qr/\b\v$/ eq '(?-xism:\b\v$)';
print "ok $test\n";
$test++;
@@ -3824,6 +3824,20 @@ sub iseq($$;$) {
ok($ok, $msg);
}
+# \, breaks {3,4}
+ok("xaaay" !~ /xa{3\,4}y/, "\, in a pattern");
+ok("xa{3,4}y" =~ /xa{3\,4}y/, "\, in a pattern");
+
+# \c\ followed by _
+ok("x\c_y" !~ /x\c\_y/, "\_ in a pattern");
+ok("x\c\_y" =~ /x\c\_y/, "\_ in a pattern");
+
+# \c\ followed by other characters
+for my $c ("z", "\0", "!", chr(254), chr(256)) {
+ my $targ = "a\034$c";
+ my $reg = "a\\c\\$c";
+ ok(eval("qq/$targ/ =~ /$reg/"), "\\c\\ in pattern");
+}
# Keep the following tests last -- they may crash perl
@@ -3835,5 +3849,5 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
or print "# Unexpected outcome: should pass or crash perl\n";
# Don't forget to update this!
-BEGIN{print "1..1275\n"};
+BEGIN{print "1..1284\n"};
diff --git a/t/op/regmesg.t b/t/op/regmesg.t
index 1b613ed351..fbfb6b286e 100644
--- a/t/op/regmesg.t
+++ b/t/op/regmesg.t
@@ -47,6 +47,15 @@ my @death =
'/(?;x/' => 'Sequence (?;...) not recognized in regex; marked by {#} in m/(?;{#}x/',
'/(?<;x/' => 'Sequence (?<;...) not recognized in regex; marked by {#} in m/(?<;{#}x/',
+ '/(?\ix/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}ix/',
+ '/(?\mx/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}mx/',
+ '/(?\:x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}:x/',
+ '/(?\=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}=x/',
+ '/(?\!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}!x/',
+ '/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/',
+ '/(?\<!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<!x/',
+ '/(?\>x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/',
+
'/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/',
"/x{$inf_p1}/" => "Quantifier in {,} bigger than $inf_m1 in regex; marked by {#} in m/x{{#}$inf_p1}/",
diff --git a/toke.c b/toke.c
index 927f904cb6..4ab58ea4f1 100644
--- a/toke.c
+++ b/toke.c
@@ -1790,12 +1790,6 @@ S_scan_const(pTHX_ char *start)
bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
#endif
- const char * const leaveit = /* set of acceptably-backslashed characters */
- (const char *)
- (PL_lex_inpat
- ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-Nnrktfeaxcz0123456789[{]} \t\n\r\f\v#"
- : "");
-
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
@@ -2020,13 +2014,6 @@ S_scan_const(pTHX_ char *start)
if (*s == '\\' && s+1 < send) {
s++;
- /* some backslashes we leave behind */
- if (*leaveit && *s && strchr(leaveit, *s)) {
- *d++ = NATIVE_TO_NEED(has_utf8,'\\');
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
- continue;
- }
-
/* deprecate \1 in strings and substitution replacements */
if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
@@ -2042,6 +2029,11 @@ S_scan_const(pTHX_ char *start)
--s;
break;
}
+ /* skip any other backslash escapes in a pattern */
+ else if (PL_lex_inpat) {
+ *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+ goto default_action;
+ }
/* if we get here, it's either a quoted -, or a digit */
switch (*s) {