diff options
author | David Mitchell <davem@iabyn.com> | 2014-05-13 14:18:06 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2014-05-13 15:20:54 +0100 |
commit | 1738e041e86c4796d194727eae67369600abf920 (patch) | |
tree | f2d770365c5ae181fbdbe51f0e1008e43c8b2926 | |
parent | 98830e71b322ee2b78a218cf29c6e32d7f94ff62 (diff) | |
download | perl-1738e041e86c4796d194727eae67369600abf920.tar.gz |
[perl #121854] use re 'taint' regression
Commit v5.19.8-533-g63baef5 changed the handling of locale-dependent
regexes so that the pattern was considered tainted at compile-time, rather
than determining it each time at run-time whenever it executed a
locale-dependent node. Unfortunately due to the conflating of two flags,
RXf_TAINTED and RXf_TAINTED_SEEN, it had the side effect of permanently
marking a pattern as tainted once it had had a single tainted result.
E.g.
use re qw(taint);
use Scalar::Util qw(tainted);
for ($^X, "abc") {
/(.*)/ or die;
print "not " unless tainted("$1"); print "tainted\n";
};
which from 5.19.9 onwards output:
tainted
tainted
but with this commit (and with 5.19.8 and earlier), it now outputs:
tainted
not tainted
The RXf_TAINTED flag indicates that the pattern itself is tainted, e.g.
$r = qr/$tainted_value/
while the RXf_TAINTED_SEEN flag means that the results of the last match
are tainted, e.g.
use re 'tainted';
$tainted =~ /(.*)/;
# $1 is tainted
Pre 63baef5, the code used to look like:
at run-time:
turn off RXf_TAINTED_SEEN;
while (nodes to execute) {
switch(node) {
case
BOUNDL: /* and other locale-specific ops */
turn on RXf_TAINTED_SEEN;
...;
}
}
if (tainted || RXf_TAINTED)
turn on RXf_TAINTED_SEEN;
63baef5 changed it to:
at compile-time:
if (pattern has locale ops)
turn on RXf_TAINTED_SEEN;
at run-time:
while (nodes to execute) {
...
}
if (tainted || RXf_TAINTED)
turn on RXf_TAINTED_SEEN;
This commit changes it to:
at compile-time;
if (pattern has locale ops)
turn on RXf_TAINTED;
at run-time:
turn off RXf_TAINTED_SEEN;
while (nodes to execute) {
...
}
if (tainted || RXf_TAINTED)
turn on RXf_TAINTED_SEEN;
-rw-r--r-- | pp_hot.c | 4 | ||||
-rw-r--r-- | regcomp.c | 2 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rw-r--r-- | regexp.h | 3 | ||||
-rw-r--r-- | t/op/taint.t | 14 |
5 files changed, 19 insertions, 6 deletions
@@ -1964,8 +1964,8 @@ While the pattern is being assembled/concatenated and then compiled, PL_tainted will get set (via TAINT_set) if any component of the pattern is tainted, e.g. /.*$tainted/. At the end of pattern compilation, the RXf_TAINTED flag is set on the pattern if PL_tainted is set (via -TAINT_get). Also, if any component of the pattern matches based on -locale-dependent behavior, the RXf_TAINTED_SEEN flag is set. +TAINT_get). It will also be set if any component of the pattern matches +based on locale-dependent behavior. When the pattern is copied, e.g. $r = qr/..../, the SV holding the ref to the pattern is marked as tainted. This means that subsequent usage, such @@ -7110,7 +7110,7 @@ reStudy: } if (RExC_contains_locale) { - RXp_EXTFLAGS(r) |= RXf_TAINTED_SEEN; + RXp_EXTFLAGS(r) |= RXf_TAINTED; } #ifdef DEBUGGING @@ -2584,6 +2584,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, Perl_croak(aTHX_ "corrupted regexp program"); } + RX_MATCH_TAINTED_off(rx); + reginfo->prog = rx; /* Yes, sorry that this is confusing. */ reginfo->intuit = 0; reginfo->is_utf8_target = cBOOL(utf8_target); @@ -415,8 +415,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) /* Copy and tainted info */ #define RXf_COPY_DONE (1<<(RXf_BASE_SHIFT+16)) -/* during execution: pattern temporarily tainted by executing locale ops; - * post-execution: $1 et al are tainted */ +/* post-execution: $1 et al are tainted */ #define RXf_TAINTED_SEEN (1<<(RXf_BASE_SHIFT+17)) /* this pattern was tainted during compilation */ #define RXf_TAINTED (1<<(RXf_BASE_SHIFT+18)) diff --git a/t/op/taint.t b/t/op/taint.t index 3f014b3ee2..aaf556af09 100644 --- a/t/op/taint.t +++ b/t/op/taint.t @@ -17,7 +17,7 @@ BEGIN { use strict; use Config; -plan tests => 798; +plan tests => 800; $| = 1; @@ -1057,6 +1057,18 @@ my $TEST = 'TEST'; is($s, 'abcd', "$desc: s value"); is($res, 'xyz', "$desc: res value"); is($one, 'abcd', "$desc: \$1 value"); + + # [perl #121854] match taintedness became sticky + # when one match has a taintess result, subseqent matches + # using the same pattern shouldn't necessarily be tainted + + { + my $f = sub { $_[0] =~ /(.*)/ or die; $1 }; + $res = $f->($TAINT); + is_tainted($res, "121854: res tainted"); + $res = $f->("abc"); + isnt_tainted($res, "121854: res not tainted"); + } } $foo = $1 if 'bar' =~ /(.+)$TAINT/; |