summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/re.pm30
-rw-r--r--perl.h1
-rw-r--r--pod/perldiag.pod21
-rw-r--r--pod/perlre.pod9
-rw-r--r--regcomp.c7
-rwxr-xr-xt/op/misc.t8
-rwxr-xr-xt/op/pat.t18
-rwxr-xr-xt/op/regexp.t7
-rwxr-xr-xt/op/subst.t7
9 files changed, 88 insertions, 20 deletions
diff --git a/lib/re.pm b/lib/re.pm
index ff38c41286..b7375e3e71 100644
--- a/lib/re.pm
+++ b/lib/re.pm
@@ -6,26 +6,42 @@ re - Perl pragma to alter regular expression behaviour
=head1 SYNOPSIS
- ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
+ use re 'taint';
+ ($x) = ($^X =~ /^(.*)$/s); # $x is tainted here
- use re "taint";
- ($x) = ($^X =~ /^(.*)$/s); # $x _is_ tainted here
+ use re 'eval';
+ /foo(?{ $foo = 1 })bar/; # won't fail (when not under -T switch)
+
+ {
+ no re 'taint'; # the default
+ ($x) = ($^X =~ /^(.*)$/s); # $x is not tainted here
+
+ no re 'eval'; # the default
+ /foo(?{ $foo = 1 })bar/; # disallowed (with or without -T switch)
+ }
=head1 DESCRIPTION
When C<use re 'taint'> is in effect, and a tainted string is the target
of a regex, the regex memories (or values returned by the m// operator
-in list context) are tainted.
+in list context) are tainted. This feature is useful when regex operations
+on tainted data aren't meant to extract safe substrings, but to perform
+other transformations.
-This feature is useful when regex operations on tainted data aren't
-meant to extract safe substrings, but to perform other transformations.
+When C<use re 'eval'> is in effect, a regex is allowed to contain
+C<(?{ ... })> zero-width assertions (which may not be interpolated in
+the regex). That is normally disallowed, since it is a potential security
+risk. Note that this pragma is ignored when perl detects tainted data,
+i.e. evaluation is always disallowed with tainted data. See
+L<perlre/(?{ code })>.
See L<perlmodlib/Pragmatic Modules>.
=cut
my %bitmask = (
-taint => 0x00100000
+taint => 0x00100000,
+eval => 0x00200000,
);
sub bits {
diff --git a/perl.h b/perl.h
index 7f3fd63c1e..9d982ec4de 100644
--- a/perl.h
+++ b/perl.h
@@ -1824,6 +1824,7 @@ typedef enum {
#define HINT_LOCALIZE_HH 0x00020000 /* %^H needs to be copied */
#define HINT_RE_TAINT 0x00100000
+#define HINT_RE_EVAL 0x00200000
/* Various states of an input record separator SV (rs, nrs) */
#define RsSNARF(sv) (! SvOK(sv))
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 7c8ab3d482..221cc35f68 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1063,6 +1063,27 @@ single form when it must operate on them directly. Either you've
passed an invalid file specification to Perl, or you've found a
case the conversion routines don't handle. Drat.
+=item %s: Eval-group in insecure regular expression
+
+(F) Perl detected tainted data when trying to compile a regular expression
+that contains the C<(?{ ... })> zero-width assertion, which is unsafe.
+See L<perlre/(?{ code })>, and L<perlsec>.
+
+=item %s: Eval-group not allowed, use re 'eval'
+
+(F) A regular expression contained the C<(?{ ... })> zero-width assertion,
+but that construct is only allowed when the C<use re 'eval'> pragma is
+in effect. See L<perlre/(?{ code })>.
+
+=item %s: Eval-group not allowed at run time
+
+(F) Perl tried to compile a regular expression containing the C<(?{ ... })>
+zero-width assertion at run time, at it would when the pattern contains
+interpolated values. Since this is a risk to security, it is not allowed.
+If you insist, you may still do this by explicitly building the pattern
+from an interpolated string at run time and using that in an eval().
+See L<perlre/(?{ code })>.
+
=item Excessively long <> operator
(F) The contents of a <> operator may not exceed the maximum size of a
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 30608ced75..f6fdc29eea 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -330,6 +330,10 @@ Experimental "evaluate any Perl code" zero-width assertion. Always
succeeds. C<code> is not interpolated. Currently the rules to
determine where the C<code> ends are somewhat convoluted.
+Owing to the risks to security, this is only available when the
+C<use re 'eval'> pragma is used, and then only for patterns that don't
+have any variables that must be interpolated at run time.
+
The C<code> is properly scoped in the following sense: if the assertion
is backtracked (compare L<"Backtracking">), all the changes introduced after
C<local>isation are undone, so
@@ -360,11 +364,6 @@ other C<(?{ code })> assertions inside the same regular expression.
The above assignment to $^R is properly localized, thus the old value of $^R
is restored if the assertion is backtracked (compare L<"Backtracking">).
-B<WARNING>: This is a grave security risk for arbitrarily interpolated
-patterns. It introduces security holes in previously safe programs.
-A fix to Perl, and to this documentation, will be forthcoming prior
-to the actual 5.005 release.
-
=item C<(?E<gt>pattern)>
An "independent" subexpression. Matches the substring that a
diff --git a/regcomp.c b/regcomp.c
index 0a36cbbb4d..b18740cbec 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1043,6 +1043,13 @@ reg(I32 paren, I32 *flagp)
regcomp_rx->data->data[n+2] = (void*)sop;
SvREFCNT_dec(sv);
} else { /* First pass */
+ if (curcop == &compiling) {
+ if (!(hints & HINT_RE_EVAL))
+ FAIL("Eval-group not allowed, use re 'eval'");
+ }
+ else {
+ FAIL("Eval-group not allowed at run time");
+ }
if (tainted)
FAIL("Eval-group in insecure regular expression");
}
diff --git a/t/op/misc.t b/t/op/misc.t
index 9ab6831859..25f566e19b 100755
--- a/t/op/misc.t
+++ b/t/op/misc.t
@@ -336,16 +336,18 @@ sub foo { local $_ = shift; split; @_ }
@x = foo(' x y z ');
print "you die joe!\n" unless "@x" eq 'x y z';
########
+use re 'eval';
/(?{"{"})/ # Check it outside of eval too
EXPECT
-Sequence (?{...}) not terminated or not {}-balanced at - line 1, within pattern
-/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 1.
+Sequence (?{...}) not terminated or not {}-balanced at - line 2, within pattern
+/(?{"{"})/: Sequence (?{...}) not terminated or not {}-balanced at - line 2.
########
+use re 'eval';
/(?{"{"}})/ # Check it outside of eval too
EXPECT
Unmatched right bracket at (re_eval 1) line 1, at end of line
syntax error at (re_eval 1) line 1, near ""{"}"
-Compilation failed in regexp at - line 1.
+Compilation failed in regexp at - line 2.
########
BEGIN { @ARGV = qw(a b c) }
BEGIN { print "argv <@ARGV>\nbegin <",shift,">\n" }
diff --git a/t/op/pat.t b/t/op/pat.t
index fecdf0c199..7ee1f0943c 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,11 +6,14 @@
# $RCSfile: pat.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:12 $
-print "1..123\n";
+print "1..124\n";
-chdir 't' if -d 't';
-@INC = "../lib";
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = "../lib" if -d "../lib";
+}
eval 'use Config'; # Defaults assumed if this fails
+use re 'eval';
$x = "abc\ndef\n";
@@ -379,7 +382,14 @@ $test++;
$code = '{$blah = 45}';
$blah = 12;
-/(?$code)/;
+eval { /(?$code)/ };
+print "not " unless $@ and $@ =~ /not allowed at run time/ and $blah == 12;
+print "ok $test\n";
+$test++;
+
+$code = '{$blah = 45}';
+$blah = 12;
+eval "/(?$code)/";
print "not " if $blah != 45;
print "ok $test\n";
$test++;
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 7e43526f63..244ed4ab99 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -26,6 +26,13 @@
# If you want to add a regular expression test that can't be expressed
# in this format, don't add it here: put it in op/pat.t instead.
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib' if -d '../lib';
+}
+
+use re 'eval';
+
$iters = shift || 1; # Poor man performance suite, 10000 is OK.
open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') ||
diff --git a/t/op/subst.t b/t/op/subst.t
index 92a848fe80..1323b2d004 100755
--- a/t/op/subst.t
+++ b/t/op/subst.t
@@ -1,6 +1,10 @@
#!./perl
-# $RCSfile: s.t,v $$Revision: 4.1 $$Date: 92/08/07 18:28:22 $
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib' if -d '../lib';
+}
print "1..70\n";
@@ -276,6 +280,7 @@ $_ = <<'EOL';
EOL
$^R = 'junk';
+use re 'eval';
$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' .
' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' .
' lowercase $@%#MiXeD$@%# ';