summaryrefslogtreecommitdiff
path: root/perltest.pl
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-05 12:36:47 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-05 12:36:47 +0000
commit8ae0b4ccbefb07f479ab5cd2c357c153c6b553ad (patch)
treed09d13da9c0a5c979df307673de7024769eb8cc7 /perltest.pl
parent5bdc3f16de2ec858899e83f62c1b631f03a64c36 (diff)
downloadpcre-8ae0b4ccbefb07f479ab5cd2c357c153c6b553ad.tar.gz
Applied Bob and Daniel's patches to convert the build system to automake. Added
the maintain directory, containing files that are used for maintenance, but are not distributed. This is an intermediate step. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@97 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'perltest.pl')
-rwxr-xr-xperltest.pl191
1 files changed, 191 insertions, 0 deletions
diff --git a/perltest.pl b/perltest.pl
new file mode 100755
index 0000000..0d290c1
--- /dev/null
+++ b/perltest.pl
@@ -0,0 +1,191 @@
+#! /usr/bin/env perl
+
+# Program for testing regular expressions with perl to check that PCRE handles
+# them the same. This is the version that supports /8 for UTF-8 testing. As it
+# stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to
+# have "use utf8" at the start for running the UTF-8 tests, but *not* for the
+# other tests. The only way I've found for doing this is to cat this line in
+# explicitly in the RunPerlTest script.
+
+# use locale; # With this included, \x0b matches \s!
+
+# Function for turning a string into a string of printing chars. There are
+# currently problems with UTF-8 strings; this fudges round them.
+
+sub pchars {
+my($t) = "";
+
+if ($utf8)
+ {
+ @p = unpack('U*', $_[0]);
+ foreach $c (@p)
+ {
+ if ($c >= 32 && $c < 127) { $t .= chr $c; }
+ else { $t .= sprintf("\\x{%02x}", $c); }
+ }
+ }
+
+else
+ {
+ foreach $c (split(//, $_[0]))
+ {
+ if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
+ else { $t .= sprintf("\\x%02x", ord $c); }
+ }
+ }
+
+$t;
+}
+
+
+# Read lines from named file or stdin and write to named file or stdout; lines
+# consist of a regular expression, in delimiters and optionally followed by
+# options, followed by a set of test data, terminated by an empty line.
+
+# Sort out the input and output files
+
+if (@ARGV > 0)
+ {
+ open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
+ $infile = "INFILE";
+ }
+else { $infile = "STDIN"; }
+
+if (@ARGV > 1)
+ {
+ open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
+ $outfile = "OUTFILE";
+ }
+else { $outfile = "STDOUT"; }
+
+printf($outfile "Perl $] Regular Expressions\n\n");
+
+# Main loop
+
+NEXT_RE:
+for (;;)
+ {
+ printf " re> " if $infile eq "STDIN";
+ last if ! ($_ = <$infile>);
+ printf $outfile "$_" if $infile ne "STDIN";
+ next if ($_ eq "");
+
+ $pattern = $_;
+
+ while ($pattern !~ /^\s*(.).*\1/s)
+ {
+ printf " > " if $infile eq "STDIN";
+ last if ! ($_ = <$infile>);
+ printf $outfile "$_" if $infile ne "STDIN";
+ $pattern .= $_;
+ }
+
+ chomp($pattern);
+ $pattern =~ s/\s+$//;
+
+ # The private /+ modifier means "print $' afterwards".
+
+ $showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
+
+ # Remove /8 from a UTF-8 pattern.
+
+ $utf8 = $pattern =~ s/8(?=[a-z]*$)//;
+
+ # Check that the pattern is valid
+
+ eval "\$_ =~ ${pattern}";
+ if ($@)
+ {
+ printf $outfile "Error: $@";
+ next NEXT_RE;
+ }
+
+ # If the /g modifier is present, we want to put a loop round the matching;
+ # otherwise just a single "if".
+
+ $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
+
+ # If the pattern is actually the null string, Perl uses the most recently
+ # executed (and successfully compiled) regex is used instead. This is a
+ # nasty trap for the unwary! The PCRE test suite does contain null strings
+ # in places - if they are allowed through here all sorts of weird and
+ # unexpected effects happen. To avoid this, we replace such patterns with
+ # a non-null pattern that has the same effect.
+
+ $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
+
+ # Read data lines and test them
+
+ for (;;)
+ {
+ printf "data> " if $infile eq "STDIN";
+ last NEXT_RE if ! ($_ = <$infile>);
+ chomp;
+ printf $outfile "$_\n" if $infile ne "STDIN";
+
+ s/\s+$//;
+ s/^\s+//;
+
+ last if ($_ eq "");
+ $x = eval "\"$_\""; # To get escapes processed
+
+ # Empty array for holding results, then do the matching.
+
+ @subs = ();
+
+ $pushes = "push \@subs,\$&;" .
+ "push \@subs,\$1;" .
+ "push \@subs,\$2;" .
+ "push \@subs,\$3;" .
+ "push \@subs,\$4;" .
+ "push \@subs,\$5;" .
+ "push \@subs,\$6;" .
+ "push \@subs,\$7;" .
+ "push \@subs,\$8;" .
+ "push \@subs,\$9;" .
+ "push \@subs,\$10;" .
+ "push \@subs,\$11;" .
+ "push \@subs,\$12;" .
+ "push \@subs,\$13;" .
+ "push \@subs,\$14;" .
+ "push \@subs,\$15;" .
+ "push \@subs,\$16;" .
+ "push \@subs,\$'; }";
+
+ eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
+
+ if ($@)
+ {
+ printf $outfile "Error: $@\n";
+ next NEXT_RE;
+ }
+ elsif (scalar(@subs) == 0)
+ {
+ printf $outfile "No match\n";
+ }
+ else
+ {
+ while (scalar(@subs) != 0)
+ {
+ printf $outfile (" 0: %s\n", &pchars($subs[0]));
+ printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
+ $last_printed = 0;
+ for ($i = 1; $i <= 16; $i++)
+ {
+ if (defined $subs[$i])
+ {
+ while ($last_printed++ < $i-1)
+ { printf $outfile ("%2d: <unset>\n", $last_printed); }
+ printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
+ $last_printed = $i;
+ }
+ }
+ splice(@subs, 0, 18);
+ }
+ }
+ }
+ }
+
+# printf $outfile "\n";
+
+# End