summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-12-19 12:27:48 +0000
committerDavid Mitchell <davem@iabyn.com>2012-06-13 13:32:48 +0100
commit23af30a1edf3b57b8b6950f73f724cce1266fdbd (patch)
tree8509e3888ff6176bea7dc5e133a74e61f53e0397
parent629cd4f36c79e2b9633bff622171e643c504eaf2 (diff)
downloadperl-23af30a1edf3b57b8b6950f73f724cce1266fdbd.tar.gz
add tests for regex recompilation
The run-time regexp compilation (invoked via pp_regcomp()) has a mechanism to skip the recompilation if the pattern text hasn't changed since the last recompile. Astonishingly this mechanism isn't actually tested, so here's a test file. All the tests now pass, but this is due to the various recent fixes in this branch. In particular, it never used to consider the UTF8ness of the pattern string, or whether the pattern contained code blocks. It works by checking the output of 'use re debug' (and -Dr if available) to detect how many times the pattern was compiled. This file then is also an indirect test of whether the correct debugging output is generated, i.e. whether the regcomp.c or ext/re/re_comp.c versions of functions are getting called.
-rw-r--r--MANIFEST1
-rw-r--r--t/re/recompile.t186
2 files changed, 187 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index 1e0433807c..011e032df1 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5393,6 +5393,7 @@ t/re/qr-72922.t Test for bug #72922
t/re/qr_gc.t See if qr doesn't leak
t/re/qrstack.t See if qr expands the stack properly
t/re/qr.t See if qr works
+t/re/recompile.t See if pattern caching/recompilation works
t/re/reg_60508.t See if bug #60508 is fixed
t/re/reg_email.t See if regex recursion works by parsing email addresses
t/re/reg_email_thr.t See if regex recursion works by parsing email addresses in another thread
diff --git a/t/re/recompile.t b/t/re/recompile.t
new file mode 100644
index 0000000000..aa6f7e981f
--- /dev/null
+++ b/t/re/recompile.t
@@ -0,0 +1,186 @@
+#!./perl
+
+# Check that we don't recompile runtime patterns when the pattern hasn't
+# changed
+#
+# Works by checking the debugging output of 'use re debug' and, if
+# available, -Dr. We use both to check that the different code paths
+# with Perl_foo() verses the my_foo() under ext/re/ don't cause any
+# changes.
+
+use strict;
+use warnings;
+
+$| = 1;
+
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = ('../lib','.');
+ require './test.pl';
+ skip_all_if_miniperl("no dynamic loading on miniperl, no re");
+}
+
+
+plan tests => 36;
+
+my $results = runperl(
+ switches => [ '-Dr' ],
+ prog => '1',
+ stderr => 1,
+ );
+my $has_Dr = $results !~ /Recompile perl with -DDEBUGGING/;
+
+my $tmpfile = tempfile();
+
+
+# Check that a pattern triggers a regex compilation exactly N times,
+# using either -Dr or 'use re debug'
+# This is partially based on _fresh_perl() in test.pl
+
+sub _comp_n {
+ my ($use_Dr, $n, $prog, $desc) = @_;
+ open my $tf, ">$tmpfile" or die "Cannot open $tmpfile: $!";
+
+ my $switches = [];
+ if ($use_Dr) {
+ push @$switches, '-Dr';
+ }
+ else {
+ $prog = qq{use re qw(debug);\n$prog};
+ }
+
+ print $tf $prog;
+ close $tf or die "Cannot close $tmpfile: $!";
+ my $results = runperl(
+ switches => $switches,
+ progfile => $tmpfile,
+ stderr => 1,
+ );
+
+ my $status = $?;
+
+ my $count = () = $results =~ /Final program:/g;
+ if ($count == $n) {
+ pass($desc);
+ }
+ else {
+ fail($desc);
+ _diag "# COUNT: $count EXPECTED $n\n";
+ _diag "# STATUS: $status\n";
+ _diag "# SWITCHES: @$switches\n";
+ _diag "# PROG: \n$prog\n";
+ # this is verbose; uncomment for debugging
+ #_diag "# OUTPUT:\n------------------\n $results-------------------\n";
+ }
+}
+
+# Check that a pattern triggers a regex compilation exactly N times,
+
+sub comp_n {
+ my ($n, $prog, $desc) = @_;
+ if ($has_Dr) {
+ _comp_n(1, $n, $prog, "$desc -Dr");
+ }
+ else {
+ SKIP: {
+ skip("-Dr not compiled in");
+ }
+ }
+ _comp_n(0, @_);
+}
+
+# Check that a pattern triggers a regex compilation exactly once.
+
+sub comp_1 {
+ comp_n(1, @_);
+}
+
+
+comp_1(<<'CODE', 'simple');
+"a" =~ /$_/ for qw(a a a);
+CODE
+
+comp_1(<<'CODE', 'simple qr');
+"a" =~ qr/$_/ for qw(a a a);
+CODE
+
+comp_1(<<'CODE', 'literal utf8');
+"a" =~ /$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'literal utf8 qr');
+"a" =~ qr/$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'longjmp literal utf8');
+my $x = chr(0x80);
+"a" =~ /$x$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'longjmp literal utf8 qr');
+my $x = chr(0x80);
+"a" =~ qr/$x$_/ for "\x{100}", "\x{100}", "\x{100}";
+CODE
+
+comp_1(<<'CODE', 'utf8');
+"a" =~ /$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_1(<<'CODE', 'utf8 qr');
+"a" =~ qr/$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_1(<<'CODE', 'longjmp utf8');
+my $x = chr(0x80);
+"a" =~ /$x$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_1(<<'CODE', 'longjmp utf8');
+my $x = chr(0x80);
+"a" =~ qr/$x$_/ for '\x{100}', '\x{100}', '\x{100}';
+CODE
+
+comp_n(3, <<'CODE', 'mixed utf8');
+"a" =~ /$_/ for "\x{c4}\x{80}", "\x{100}", "\x{c4}\x{80}";
+CODE
+
+comp_n(3, <<'CODE', 'mixed utf8 qr');
+"a" =~ qr/$_/ for "\x{c4}\x{80}", "\x{100}", "\x{c4}\x{80}";
+CODE
+
+comp_n(3, <<'CODE', 'runtime code');
+my $x = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ /a$_/ for $x, $x, $x;
+CODE
+
+comp_n(3, <<'CODE', 'runtime code qr');
+my $x = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ qr/a$_/ for $x, $x, $x;
+CODE
+
+comp_n(4, <<'CODE', 'embedded code');
+my $x = qr/(?{1})/;
+"a" =~ /a$_/ for $x, $x, $x;
+CODE
+
+comp_n(4, <<'CODE', 'embedded code qr');
+my $x = qr/(?{1})/;
+"a" =~ qr/a$_/ for $x, $x, $x;
+CODE
+
+comp_n(4, <<'CODE', 'mixed code');
+my $x = qr/(?{1})/;
+my $y = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ /a$x$_/ for $y, $y, $y;
+CODE
+
+comp_n(4, <<'CODE', 'mixed code qr');
+my $x = qr/(?{1})/;
+my $y = '(?{1})';
+BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
+"a" =~ qr/a$x$_/ for $y, $y, $y;
+CODE