summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2014-05-13 14:18:06 +0100
committerDavid Mitchell <davem@iabyn.com>2014-05-13 15:20:54 +0100
commit1738e041e86c4796d194727eae67369600abf920 (patch)
treef2d770365c5ae181fbdbe51f0e1008e43c8b2926
parent98830e71b322ee2b78a218cf29c6e32d7f94ff62 (diff)
downloadperl-1738e041e86c4796d194727eae67369600abf920.tar.gz
[perl #121854] use re 'taint' regression
Commit v5.19.8-533-g63baef5 changed the handling of locale-dependent regexes so that the pattern was considered tainted at compile-time, rather than determining it each time at run-time whenever it executed a locale-dependent node. Unfortunately due to the conflating of two flags, RXf_TAINTED and RXf_TAINTED_SEEN, it had the side effect of permanently marking a pattern as tainted once it had had a single tainted result. E.g. use re qw(taint); use Scalar::Util qw(tainted); for ($^X, "abc") { /(.*)/ or die; print "not " unless tainted("$1"); print "tainted\n"; }; which from 5.19.9 onwards output: tainted tainted but with this commit (and with 5.19.8 and earlier), it now outputs: tainted not tainted The RXf_TAINTED flag indicates that the pattern itself is tainted, e.g. $r = qr/$tainted_value/ while the RXf_TAINTED_SEEN flag means that the results of the last match are tainted, e.g. use re 'tainted'; $tainted =~ /(.*)/; # $1 is tainted Pre 63baef5, the code used to look like: at run-time: turn off RXf_TAINTED_SEEN; while (nodes to execute) { switch(node) { case BOUNDL: /* and other locale-specific ops */ turn on RXf_TAINTED_SEEN; ...; } } if (tainted || RXf_TAINTED) turn on RXf_TAINTED_SEEN; 63baef5 changed it to: at compile-time: if (pattern has locale ops) turn on RXf_TAINTED_SEEN; at run-time: while (nodes to execute) { ... } if (tainted || RXf_TAINTED) turn on RXf_TAINTED_SEEN; This commit changes it to: at compile-time; if (pattern has locale ops) turn on RXf_TAINTED; at run-time: turn off RXf_TAINTED_SEEN; while (nodes to execute) { ... } if (tainted || RXf_TAINTED) turn on RXf_TAINTED_SEEN;
-rw-r--r--pp_hot.c4
-rw-r--r--regcomp.c2
-rw-r--r--regexec.c2
-rw-r--r--regexp.h3
-rw-r--r--t/op/taint.t14
5 files changed, 19 insertions, 6 deletions
diff --git a/pp_hot.c b/pp_hot.c
index ac69bc7208..2cccc48bed 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1964,8 +1964,8 @@ While the pattern is being assembled/concatenated and then compiled,
PL_tainted will get set (via TAINT_set) if any component of the pattern
is tainted, e.g. /.*$tainted/. At the end of pattern compilation,
the RXf_TAINTED flag is set on the pattern if PL_tainted is set (via
-TAINT_get). Also, if any component of the pattern matches based on
-locale-dependent behavior, the RXf_TAINTED_SEEN flag is set.
+TAINT_get). It will also be set if any component of the pattern matches
+based on locale-dependent behavior.
When the pattern is copied, e.g. $r = qr/..../, the SV holding the ref to
the pattern is marked as tainted. This means that subsequent usage, such
diff --git a/regcomp.c b/regcomp.c
index 33994348b7..eaee60479d 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7110,7 +7110,7 @@ reStudy:
}
if (RExC_contains_locale) {
- RXp_EXTFLAGS(r) |= RXf_TAINTED_SEEN;
+ RXp_EXTFLAGS(r) |= RXf_TAINTED;
}
#ifdef DEBUGGING
diff --git a/regexec.c b/regexec.c
index 4ed2ba9d42..362390bd66 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2584,6 +2584,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
Perl_croak(aTHX_ "corrupted regexp program");
}
+ RX_MATCH_TAINTED_off(rx);
+
reginfo->prog = rx; /* Yes, sorry that this is confusing. */
reginfo->intuit = 0;
reginfo->is_utf8_target = cBOOL(utf8_target);
diff --git a/regexp.h b/regexp.h
index d32e669a4c..db7ae8be01 100644
--- a/regexp.h
+++ b/regexp.h
@@ -415,8 +415,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
/* Copy and tainted info */
#define RXf_COPY_DONE (1<<(RXf_BASE_SHIFT+16))
-/* during execution: pattern temporarily tainted by executing locale ops;
- * post-execution: $1 et al are tainted */
+/* post-execution: $1 et al are tainted */
#define RXf_TAINTED_SEEN (1<<(RXf_BASE_SHIFT+17))
/* this pattern was tainted during compilation */
#define RXf_TAINTED (1<<(RXf_BASE_SHIFT+18))
diff --git a/t/op/taint.t b/t/op/taint.t
index 3f014b3ee2..aaf556af09 100644
--- a/t/op/taint.t
+++ b/t/op/taint.t
@@ -17,7 +17,7 @@ BEGIN {
use strict;
use Config;
-plan tests => 798;
+plan tests => 800;
$| = 1;
@@ -1057,6 +1057,18 @@ my $TEST = 'TEST';
is($s, 'abcd', "$desc: s value");
is($res, 'xyz', "$desc: res value");
is($one, 'abcd', "$desc: \$1 value");
+
+ # [perl #121854] match taintedness became sticky
+ # when one match has a taintess result, subseqent matches
+ # using the same pattern shouldn't necessarily be tainted
+
+ {
+ my $f = sub { $_[0] =~ /(.*)/ or die; $1 };
+ $res = $f->($TAINT);
+ is_tainted($res, "121854: res tainted");
+ $res = $f->("abc");
+ isnt_tainted($res, "121854: res not tainted");
+ }
}
$foo = $1 if 'bar' =~ /(.+)$TAINT/;