summaryrefslogtreecommitdiff
path: root/perltest
diff options
context:
space:
mode:
Diffstat (limited to 'perltest')
-rwxr-xr-xperltest62
1 files changed, 52 insertions, 10 deletions
diff --git a/perltest b/perltest
index e6f7974..bb34cc8 100755
--- a/perltest
+++ b/perltest
@@ -1,19 +1,39 @@
#! /usr/bin/perl
# Program for testing regular expressions with perl to check that PCRE handles
-# them the same.
+# them the same. This is the version that supports /8 for UTF-8 testing. As it
+# stands, it requires at least Perl 5.8 for UTF-8 support. For Perl 5.6, it
+# can be used as is for non-UTF-8 testing, but you have to uncomment the
+# "use utf8" lines in order to to UTF-8 stuff (and you mustn't uncomment them
+# for non-UTF-8 use).
-# Function for turning a string into a string of printing chars
+# Function for turning a string into a string of printing chars. There are
+# currently problems with UTF-8 strings; this fudges round them.
sub pchars {
my($t) = "";
-foreach $c (split(//, $_[0]))
+if ($utf8)
{
- if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
- else { $t .= sprintf("\\x%02x", ord $c); }
+# use utf8; <=============== For UTF-8 in Perl 5.6
+ @p = unpack('U*', $_[0]);
+ foreach $c (@p)
+ {
+ if ($c >= 32 && $c < 127) { $t .= chr $c; }
+ else { $t .= sprintf("\\x{%02x}", $c); }
+ }
}
+
+else
+ {
+ foreach $c (split(//, $_[0]))
+ {
+ if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
+ else { $t .= sprintf("\\x%02x", ord $c); }
+ }
+ }
+
$t;
}
@@ -64,14 +84,27 @@ for (;;)
chomp($pattern);
$pattern =~ s/\s+$//;
- # The private /+ modifier means "print $' afterwards". We use it
- # only on the end of patterns to make it easy to chop off here.
+ # The private /+ modifier means "print $' afterwards".
$showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
+ # The private /8 modifier means "operate in UTF-8". Currently, Perl
+ # has bugs that we try to work around using this flag.
+
+ $utf8 = ($pattern =~ s/8(?=[a-z]*$)//);
+
# Check that the pattern is valid
- eval "\$_ =~ ${pattern}";
+ if ($utf8)
+ {
+# use utf8; <=============== For UTF-8 in Perl 5.6
+ eval "\$_ =~ ${pattern}";
+ }
+ else
+ {
+ eval "\$_ =~ ${pattern}";
+ }
+
if ($@)
{
printf $outfile "Error: $@";
@@ -112,8 +145,7 @@ for (;;)
@subs = ();
- eval "${cmd} (\$x =~ ${pattern}) {" .
- "push \@subs,\$&;" .
+ $pushes = "push \@subs,\$&;" .
"push \@subs,\$1;" .
"push \@subs,\$2;" .
"push \@subs,\$3;" .
@@ -132,6 +164,16 @@ for (;;)
"push \@subs,\$16;" .
"push \@subs,\$'; }";
+ if ($utf8)
+ {
+# use utf8; <=============== For UTF-8 in Perl 5.6
+ eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
+ }
+ else
+ {
+ eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
+ }
+
if ($@)
{
printf $outfile "Error: $@\n";