summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-12-19 12:27:48 +0000
committerDavid Mitchell <davem@iabyn.com>2011-12-19 15:06:08 +0000
commitb93ed387fefcbafe93612ae457574ac214f13f3e (patch)
treec4f150cab52f78f0b938dc7e8e0847464b8e4740
parentf3539662ce10aed6b074d859a24b8199fb038a70 (diff)
downloadperl-davem/re_eval.tar.gz
add tests for regex recompilationsmoke-me/re_evaldavem/re_eval
The run-time regexp compilation (invoked via pp_regcomp()) has a mechanism to skip the recompilation if the pattern text hasn't changed since the last recompile. Astonishingly this mechanism isn't actually tested, so here's a test file. All the tests now pass, but this is due to the various recent fixes in this branch. In particular, it never used to consider the UTF8ness of the pattern string, or whether the pattern contained code blocks. It works by checking the output of 'use re debug' (and -Dr if available) to detect how many times the pattern was compiled. This file then is also an indirect test of whether the correct debugging output is generated, i.e. whether the regcomp.c or ext/re/re_comp.c versions of functions are getting called.
-rw-r--r--MANIFEST1
-rw-r--r--t/re/recompile.t186
2 files changed, 187 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index c71a1ab4d8..062bc83144 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5351,6 +5351,7 @@ t/re/qr-72922.t Test for bug #72922
t/re/qr_gc.t See if qr doesn't leak
t/re/qrstack.t See if qr expands the stack properly
t/re/qr.t See if qr works
+t/re/recompile.t See if pattern caching/recompilation works
t/re/reg_60508.t See if bug #60508 is fixed
t/re/reg_email.t See if regex recursion works by parsing email addresses
t/re/reg_email_thr.t See if regex recursion works by parsing email addresses in another thread
diff --git a/t/re/recompile.t b/t/re/recompile.t
new file mode 100644
index 0000000000..0fb80afbc9
--- /dev/null
+++ b/t/re/recompile.t
@@ -0,0 +1,186 @@
+#!./perl
+
+# Check that we don't recompile runtime patterns when the pattern hasn't
+# changed
+#
+# Works by checking the debugging output of 'use re debug' and, if
+# available, -Dr. We use both to check that the different code paths
+# with Perl_foo() verses the my_foo() under ext/re/ don't cause any
+# changes.
+
+use strict;
+use warnings;
+
+$| = 1;
+
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = ('../lib','.');
+ require './test.pl';
+ skip_all_if_miniperl("no dynamic loading on miniperl, no re");
+}
+
+
+plan tests => 36;
+
+my $results = runperl(
+ switches => [ '-Dr' ],
+ prog => '1',
+ stderr => 1,
+ );
+my $has_Dr = $results !~ /Recompile perl with -DDEBUGGING/;
+
+my $tmpfile = tempfile();
+
+
+# Check that a pattern triggers a regex compilation exactly N times,
+# using either -Dr or 'use re debug'
+# This is partially based on _fresh_perl() in test.pl
+
+sub _comp_n {
+ my ($use_Dr, $n, $prog, $desc) = @_;
+ open my $tf, ">$tmpfile" or die "Cannot open $tmpfile: $!";
+
+ my $switches = [];
+ if ($use_Dr) {
+ push @$switches, '-Dr';
+ }
+ else {
+ $prog = qq{use re qw(debug);\n$prog};
+ }
+
+ print $tf $prog;
+ close $tf or die "Cannot close $tmpfile: $!";
+ my $results = runperl(
+ switches => $switches,
+ progfile => $tmpfile,
+ stderr => 1,
+ );
+
+ my $status = $?;
+
+ my $count = () = $results =~ /Final program:/g;
+ if ($count == $n) {
+ pass($desc);
+ }
+ else {
+ fail($desc);
+ _diag "# COUNT: $count EXPECTED $n\n";
+ _diag "# STATUS: $status\n";
+ _diag "# SWITCHES: @$switches\n";
+ _diag "# PROG: \n$prog\n";
+ # this is verbose; uncomment for debugging
+ #_diag "# OUTPUT:\n------------------\n $results-------------------\n";
+ }
+}
+
+# Check that a pattern triggers a regex compilation exactly N times,
+
+sub comp_n {
+ my ($n, $prog, $desc) = @_;
+ if ($has_Dr) {
+ _comp_n(1, $n, $prog, "$desc -Dr");
+ }
+ else {
+ SKIP: {
+ skip("-Dr not compiled in");
+ }
+ }
+ _comp_n(0, @_);
+}
+
+# Check that a pattern triggers a regex compilation exactly once.
+
+sub comp_1 {
+ comp_n(1, @_);
+}
+
+
+comp_1(<<'CODE', 'simple');
+"a" =~ /$_/ for qw(a a a);
+CODE
+
+comp_1(<<'CODE', 'simple qr');
+"a" =~ qr/$_/ for qw(a a a);
+CODE
+
+comp_1(<<'CODE', 'literal utf8');
+"a" =~ /$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'literal utf8 qr');
+"a" =~ qr/$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'longjmp literal utf8');
+my $x = chr(0x80);
+"a" =~ /$x$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'longjmp literal utf8 qr');
+my $x = chr(0x80);
+"a" =~ qr/$x$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'utf8');
+"a" =~ /$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_1(<<'CODE', 'utf8 qr');
+"a" =~ qr/$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_1(<<'CODE', 'longjmp utf8');
+my $x = chr(0x80);
+"a" =~ /$x$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_1(<<'CODE', 'longjmp utf8');
+my $x = chr(0x80);
+"a" =~ qr/$x$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_n(3, <<'CODE', 'mixed utf8');
+"a" =~ /$_/ for "\x{c4}\x{80}", "\x{100}", "\x{c4}\x{80}";
+CODE
+
+comp_n(3, <<'CODE', 'mixed utf8 qr');
+"a" =~ qr/$_/ for "\x{c4}\x{80}", "\x{100}", "\x{c4}\x{80}";
+CODE
+
+comp_n(3, <<'CODE', 'runtime code');
+my $x = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ /a$_/ for $x, $x, $x;
+CODE
+
+comp_n(3, <<'CODE', 'runtime code qr');
+my $x = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ qr/a$_/ for $x, $x, $x;
+CODE
+
+comp_n(4, <<'CODE', 'embedded code');
+my $x = qr/(?{1})/;
+"a" =~ /a$_/ for $x, $x, $x;
+CODE
+
+comp_n(4, <<'CODE', 'embedded code qr');
+my $x = qr/(?{1})/;
+"a" =~ qr/a$_/ for $x, $x, $x;
+CODE
+
+comp_n(4, <<'CODE', 'mixed code');
+my $x = qr/(?{1})/;
+my $y = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ /a$x$_/ for $y, $y, $y;
+CODE
+
+comp_n(4, <<'CODE', 'mixed code qr');
+my $x = qr/(?{1})/;
+my $y = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ qr/a$x$_/ for $y, $y, $y;
+CODE