diff options
author | David Mitchell <davem@iabyn.com> | 2011-02-16 17:17:18 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2011-02-16 17:34:08 +0000 |
commit | ef07e810699a20ac03620e091b78c1c6ef971c32 (patch) | |
tree | a8a2b4336cee567c62d4edd14b95b9532df16cd9 | |
parent | 20be6587f85cec282e10810718c869dd958afe43 (diff) | |
download | perl-ef07e810699a20ac03620e091b78c1c6ef971c32.tar.gz |
document how tainting works with pattern matching
-rw-r--r-- | op.h | 2 | ||||
-rw-r--r-- | pp_ctl.c | 7 | ||||
-rw-r--r-- | pp_hot.c | 71 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rw-r--r-- | regexp.h | 6 |
5 files changed, 82 insertions, 6 deletions
@@ -369,7 +369,7 @@ struct pmop { * unshared area without affecting binary compatibility */ #define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+6) -/* taint $1 etc. if target tainted */ +/* 'use re "taint"' in scope: taint $1 etc. if target tainted */ #define PMf_RETAINT (1<<(PMf_BASE_SHIFT+0)) /* match successfully only once per reset, with related flag RXf_USED in @@ -294,6 +294,7 @@ PP(pp_substcont) SvGETMAGIC(TOPs); /* possibly clear taint on $1 etc: #67962 */ + /* See "how taint works" above pp_subst() */ if (SvTAINTED(TOPs)) cx->sb_rxtainted |= SUBST_TAINT_REPL; sv_catsv_nomg(dstr, POPs); @@ -343,7 +344,8 @@ PP(pp_substcont) (void)SvPOK_only_UTF8(targ); /* update the taint state of various various variables in - * preparation for final exit */ + * preparation for final exit. + * See "how taint works" above pp_subst() */ if (PL_tainting) { if ((cx->sb_rxtainted & SUBST_TAINT_PAT) || ((cx->sb_rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) @@ -402,7 +404,8 @@ PP(pp_substcont) if (old != rx) (void)ReREFCNT_inc(rx); /* update the taint state of various various variables in preparation - * for calling the code block */ + * for calling the code block. + * See "how taint works" above pp_subst() */ if (PL_tainting) { if (RX_MATCH_TAINTED(rx)) /* run time pattern taint, eg locale */ cx->sb_rxtainted |= SUBST_TAINT_PAT; @@ -2056,6 +2056,72 @@ PP(pp_iter) RETPUSHYES; } +/* +A description of how taint works in pattern matching and substitution. + +While the pattern is being assembled and them compiled, PL_tainted will +get set if any part of the pattern is tainted, e.g. qr/.*$tainted/. +At the end of pattern compilation, the RXf_TAINTED flag is set on the +pattern if PL_tainted is set. + +When the pattern is copied, e.g. $r = qr/..../, the SV holding the ref the +pattern is marked as tainted. This means that subsequent usage, such as +/x$r/, will set PL_tainted and thus RXf_TAINTED on the new pattern too. + +During execution of a pattern, locale-variant ops such as ALNUML set the +local flag RF_tainted. At the end of execution, the engine sets the +RXf_TAINTED_SEEN on the pattern if RF_tainted got set. + +In addition, RXf_TAINTED_SEEN is used post-execution by the get magic code +of $1 et al to indicate whether the returned value should be tainted. +It is the responsibility of the caller of the pattern (i.e. pp_match, +pp_subst etc) to set this flag for any other circumstances where $1 needs +to be tainted. + +The taint behaviour of pp_subst (and pp_substcont) is quite complex. + +There are three possible sources of taint + * the source string + * the pattern (both compile- and run-time, RXf_TAINTED / RXf_TAINTED_SEEN) + * the replacement string (or expression under /e) + +There are four destinations of taint and they are affected by the sources +according to the rules below: + + * the return value (not including /r): + tainted by the source string and pattern, but only for the + number-of-iterations case; boolean returns aren't tainted; + * the modified string (or modified copy under /r): + tainted by the source string, pattern, and replacement strings; + * $1 et al: + tainted by the pattern, and under 'use re "taint"', by the source + string too; + * PL_taint - i.e. whether subsequent code (e.g. in a /e block) is tainted: + should always be unset before executing subsequent code. + +The overall action of pp_subst is: + + * at the start, set bits in rxtainted indicating the taint status of + the various sources. + + * After each pattern execution, update the SUBST_TAINT_PAT bit in + rxtainted if RXf_TAINTED_SEEN has been set, to indicate that the + pattern has subsequently become tainted via locale ops. + + * If control is being passed to pp_substcont to execute a /e block, + save rxtainted in the CXt_SUBST block, for future use by + pp_substcont. + + * Whenever control is being returned to perl code (either by falling + off the "end" of pp_subst/pp_substcont, or by entering a /e block), + use the flag bits in rxtainted to make all the appropriate types of + destination taint visible; e.g. set RXf_TAINTED_SEEN so that $1 et + al will appear tainted. + +pp_match is just a simpler version of the above. + +*/ + PP(pp_subst) { dVAR; dSP; dTARG; @@ -2071,7 +2137,8 @@ PP(pp_subst) I32 maxiters; register I32 i; bool once; - U8 rxtainted = 0; /* holds various SUBST_TAINT_* flag bits */ + U8 rxtainted = 0; /* holds various SUBST_TAINT_* flag bits. + See "how taint works" above */ char *orig; U8 r_flags; register REGEXP *rx = PM_GETRE(pm); @@ -2131,6 +2198,7 @@ PP(pp_subst) /* only replace once? */ once = !(rpm->op_pmflags & PMf_GLOBAL); + /* See "how taint works" above */ if (PL_tainting) { rxtainted = ( (SvTAINTED(TARG) ? SUBST_TAINT_STR : 0) @@ -2412,6 +2480,7 @@ PP(pp_subst) if (doutf8) SvUTF8_on(TARG); + /* See "how taint works" above */ if (PL_tainting) { if ((rxtainted & SUBST_TAINT_PAT) || ((rxtainted & (SUBST_TAINT_STR|SUBST_TAINT_RETAINT)) == @@ -80,7 +80,7 @@ # include "regcomp.h" #endif -#define RF_tainted 1 /* tainted information used? */ +#define RF_tainted 1 /* tainted information used? e.g. locale */ #define RF_warned 2 /* warned about big count? */ #define RF_utf8 8 /* Pattern contains multibyte chars? */ @@ -365,8 +365,12 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) /* Copy and tainted info */ #define RXf_COPY_DONE (1<<(RXf_BASE_SHIFT+16)) + +/* during execution: pattern temporarily tainted by executing locale ops; + * post-execution: $1 et al are tainted */ #define RXf_TAINTED_SEEN (1<<(RXf_BASE_SHIFT+17)) -#define RXf_TAINTED (1<<(RXf_BASE_SHIFT+18)) /* this pattern is tainted */ +/* this pattern was tainted during compilation */ +#define RXf_TAINTED (1<<(RXf_BASE_SHIFT+18)) /* Flags indicating special patterns */ #define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */ |