diff options
-rw-r--r-- | lib/re.pm | 30 | ||||
-rw-r--r-- | perl.h | 1 | ||||
-rw-r--r-- | pod/perldiag.pod | 21 | ||||
-rw-r--r-- | pod/perlre.pod | 9 | ||||
-rw-r--r-- | regcomp.c | 7 | ||||
-rwxr-xr-x | t/op/misc.t | 8 | ||||
-rwxr-xr-x | t/op/pat.t | 18 | ||||
-rwxr-xr-x | t/op/regexp.t | 7 | ||||
-rwxr-xr-x | t/op/subst.t | 7 |
9 files changed, 88 insertions, 20 deletions
@@ -6,26 +6,42 @@ re - Perl pragma to alter regular expression behaviour =head1 SYNOPSIS - ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here + use re 'taint'; + ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here - use re "taint"; - ($x) = ($^X =~ /^(.*)$/s); # $x _is_ tainted here + use re 'eval'; + /foo(?{ $foo = 1 })bar/; # won't fail (when not under -T switch) + + { + no re 'taint'; # the default + ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here + + no re 'eval'; # the default + /foo(?{ $foo = 1 })bar/; # disallowed (with or without -T switch) + } =head1 DESCRIPTION When C<use re 'taint'> is in effect, and a tainted string is the target of a regex, the regex memories (or values returned by the m// operator -in list context) are tainted. +in list context) are tainted. This feature is useful when regex operations +on tainted data aren't meant to extract safe substrings, but to perform +other transformations. -This feature is useful when regex operations on tainted data aren't -meant to extract safe substrings, but to perform other transformations. +When C<use re 'eval'> is in effect, a regex is allowed to contain +C<(?{ ... })> zero-width assertions (which may not be interpolated in +the regex). That is normally disallowed, since it is a potential security +risk. Note that this pragma is ignored when perl detects tainted data, +i.e. evaluation is always disallowed with tainted data. See +L<perlre/(?{ code })>. See L<perlmodlib/Pragmatic Modules>. =cut my %bitmask = ( -taint => 0x00100000 +taint => 0x00100000, +eval => 0x00200000, ); sub bits { @@ -1824,6 +1824,7 @@ typedef enum { #define HINT_LOCALIZE_HH 0x00020000 /* %^H needs to be copied */ #define HINT_RE_TAINT 0x00100000 +#define HINT_RE_EVAL 0x00200000 /* Various states of an input record separator SV (rs, nrs) */ #define RsSNARF(sv) (! SvOK(sv)) diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 7c8ab3d482..221cc35f68 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1063,6 +1063,27 @@ single form when it must operate on them directly. Either you've passed an invalid file specification to Perl, or you've found a case the conversion routines don't handle. Drat. +=item %s: Eval-group in insecure regular expression + +(F) Perl detected tainted data when trying to compile a regular expression +that contains the C<(?{ ... })> zero-width assertion, which is unsafe. +See L<perlre/(?{ code })>, and L<perlsec>. + +=item %s: Eval-group not allowed, use re 'eval' + +(F) A regular expression contained the C<(?{ ... })> zero-width assertion, +but that construct is only allowed when the C<use re 'eval'> pragma is +in effect. See L<perlre/(?{ code })>. + +=item %s: Eval-group not allowed at run time + +(F) Perl tried to compile a regular expression containing the C<(?{ ... })> +zero-width assertion at run time, at it would when the pattern contains +interpolated values. Since this is a risk to security, it is not allowed. +If you insist, you may still do this by explicitly building the pattern +from an interpolated string at run time and using that in an eval(). +See L<perlre/(?{ code })>. + =item Excessively long <> operator (F) The contents of a <> operator may not exceed the maximum size of a diff --git a/pod/perlre.pod b/pod/perlre.pod index 30608ced75..f6fdc29eea 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -330,6 +330,10 @@ Experimental "evaluate any Perl code" zero-width assertion. Always succeeds. C<code> is not interpolated. Currently the rules to determine where the C<code> ends are somewhat convoluted. +Owing to the risks to security, this is only available when the +C<use re 'eval'> pragma is used, and then only for patterns that don't +have any variables that must be interpolated at run time. + The C<code> is properly scoped in the following sense: if the assertion is backtracked (compare L<"Backtracking">), all the changes introduced after C<local>isation are undone, so @@ -360,11 +364,6 @@ other C<(?{ code })> assertions inside the same regular expression. The above assignment to $^R is properly localized, thus the old value of $^R is restored if the assertion is backtracked (compare L<"Backtracking">). -B<WARNING>: This is a grave security risk for arbitrarily interpolated -patterns. It introduces security holes in previously safe programs. -A fix to Perl, and to this documentation, will be forthcoming prior -to the actual 5.005 release. - =item C<(?E<gt>pattern)> An "independent" subexpression. Matches the substring that a @@ -1043,6 +1043,13 @@ reg(I32 paren, I32 *flagp) regcomp_rx->data->data[n+2] = (void*)sop; SvREFCNT_dec(sv); } else { /* First pass */ + if (curcop == &compiling) { + if (!(hints & HINT_RE_EVAL)) + FAIL("Eval-group not allowed, use re 'eval'"); + } + else { + FAIL("Eval-group not allowed at run time"); + } if (tainted) FAIL("Eval-group in insecure regular expression"); } diff --git a/t/op/misc.t b/t/op/misc.t index 9ab6831859..25f566e19b 100755 --- a/t/op/misc.t +++ b/t/op/misc.t @@ -336,16 +336,18 @@ sub foo { local $_ = shift; split; @_ } @x = foo(' x y z '); print "you die joe!\n" unless "@x" eq 'x y z'; ######## +use re 'eval'; /(?{"{"})/ # Check it outside of eval too EXPECT -Sequence (?{...}) not terminated or not {}-balanced at - line 1, within pattern -/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1. +Sequence (?{...}) not terminated or not {}-balanced at - line 2, within pattern +/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 2. ######## +use re 'eval'; /(?{"{"}})/ # Check it outside of eval too EXPECT Unmatched right bracket at (re_eval 1) line 1, at end of line syntax error at (re_eval 1) line 1, near ""{"}" -Compilation failed in regexp at - line 1. +Compilation failed in regexp at - line 2. ######## BEGIN { @ARGV = qw(a b c) } BEGIN { print "argv <@ARGV>\nbegin <",shift,">\n" } diff --git a/t/op/pat.t b/t/op/pat.t index fecdf0c199..7ee1f0943c 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,11 +6,14 @@ # $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $ -print "1..123\n"; +print "1..124\n"; -chdir 't' if -d 't'; -@INC = "../lib"; +BEGIN { + chdir 't' if -d 't'; + @INC = "../lib" if -d "../lib"; +} eval 'use Config'; # Defaults assumed if this fails +use re 'eval'; $x = "abc\ndef\n"; @@ -379,7 +382,14 @@ $test++; $code = '{$blah = 45}'; $blah = 12; -/(?$code)/; +eval { /(?$code)/ }; +print "not " unless $@ and $@ =~ /not allowed at run time/ and $blah == 12; +print "ok $test\n"; +$test++; + +$code = '{$blah = 45}'; +$blah = 12; +eval "/(?$code)/"; print "not " if $blah != 45; print "ok $test\n"; $test++; diff --git a/t/op/regexp.t b/t/op/regexp.t index 7e43526f63..244ed4ab99 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -26,6 +26,13 @@ # If you want to add a regular expression test that can't be expressed # in this format, don't add it here: put it in op/pat.t instead. +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; +} + +use re 'eval'; + $iters = shift || 1; # Poor man performance suite, 10000 is OK. open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || diff --git a/t/op/subst.t b/t/op/subst.t index 92a848fe80..1323b2d004 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -1,6 +1,10 @@ #!./perl -# $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $ + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib' if -d '../lib'; +} print "1..70\n"; @@ -276,6 +280,7 @@ $_ = <<'EOL'; EOL $^R = 'junk'; +use re 'eval'; $foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' . ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' . ' lowercase $@%#MiXeD$@%# '; |