summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-01-14 16:20:44 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-01-14 16:20:44 +0000
commit3cbf1c2db892334e94f79fbed6f83ff33ba5297c (patch)
tree943d276f29fdd6132f36f982ebe78f9af4b47f37
parent32925a47941f651ab158479c977828875c478348 (diff)
downloadpcre-3cbf1c2db892334e94f79fbed6f83ff33ba5297c.tar.gz
Fix issues with UTF-8 in the Perl checking script.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@871 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--doc/perltest.txt10
-rwxr-xr-xperltest.pl33
2 files changed, 28 insertions, 15 deletions
diff --git a/doc/perltest.txt b/doc/perltest.txt
index 3785bdd..37e0012 100644
--- a/doc/perltest.txt
+++ b/doc/perltest.txt
@@ -28,13 +28,15 @@ the initial identifying banner.
The perltest.pl script can also test UTF-8 features. It recognizes the special
modifier /8 that pcretest uses to invoke UTF-8 functionality. The testinput4
and testinput6 files can be fed to perltest to run compatible UTF-8 tests.
-However, it is necessary to add "use utf8;" to the script to make this work
-correctly.
+However, it is necessary to add "use utf8; require Encode" to the script to
+make this work correctly. I have not managed to find a way to handle this
+automatically.
The other testinput files are not suitable for feeding to perltest.pl, since
they make use of the special upper case modifiers and escapes that pcretest
-uses to test some features of PCRE. Some of these files also contains malformed
-regular expressions, in order to check that PCRE diagnoses them correctly.
+uses to test certain features of PCRE. Some of these files also contain
+malformed regular expressions, in order to check that PCRE diagnoses them
+correctly.
Philip Hazel
January 2012
diff --git a/perltest.pl b/perltest.pl
index d81a6c4..d44e6c5 100755
--- a/perltest.pl
+++ b/perltest.pl
@@ -1,16 +1,18 @@
#! /usr/bin/env perl
# Program for testing regular expressions with perl to check that PCRE handles
-# them the same. This is the version that supports /8 for UTF-8 testing. As it
-# stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to
-# have "use utf8" at the start for running the UTF-8 tests, but *not* for the
-# other tests. The only way I've found for doing this is to cat this line in
-# explicitly in the RunPerlTest script.
+# them the same. This version supports /8 for UTF-8 testing. However, it needs
+# to have "use utf8" at the start for running the UTF-8 tests, but *not* for
+# the other tests. The only way I've found for doing this is to cat this line
+# in explicitly in the RunPerlTest script. I've also used this method to supply
+# "require Encode" for the UTF-8 tests, so that the main test will still run
+# where Encode is not installed.
# use locale; # With this included, \x0b matches \s!
-# Function for turning a string into a string of printing chars. There are
-# currently problems with UTF-8 strings; this fudges round them.
+# Function for turning a string into a string of printing chars.
+
+#require Encode;
sub pchars {
my($t) = "";
@@ -21,10 +23,10 @@ if ($utf8)
foreach $c (@p)
{
if ($c >= 32 && $c < 127) { $t .= chr $c; }
- else { $t .= sprintf("\\x{%02x}", $c); }
+ else { $t .= sprintf("\\x{%02x}", $c);
+ }
}
}
-
else
{
foreach $c (split(//, $_[0]))
@@ -192,7 +194,7 @@ for (;;)
{
printf $outfile "No match";
if (defined $REGERROR && $REGERROR != 1)
- { print $outfile (", mark = $REGERROR"); }
+ { printf $outfile (", mark = %s", &pchars($REGERROR)); }
printf $outfile "\n";
}
else
@@ -214,8 +216,17 @@ for (;;)
}
splice(@subs, 0, 18);
}
+
+ # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
+ # set and the input pattern was a UTF-8 string. We can, however, force
+ # it to be so marked.
+
if (defined $REGMARK && $REGMARK != 1)
- { print $outfile ("MK: $REGMARK\n"); }
+ {
+ $xx = $REGMARK;
+ $xx = Encode::decode_utf8($xx) if $utf8;
+ printf $outfile ("MK: %s\n", &pchars($xx));
+ }
}
}
}