summaryrefslogtreecommitdiff
path: root/lib/utf8.t
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2010-12-21 16:55:38 +0000
committerNicholas Clark <nick@ccl4.org>2010-12-21 16:55:38 +0000
commita18d6e6e4cf998a0ba9067ceac2d75f71aedef15 (patch)
tree0b3b68d0cdb46f49e3bdd0d7a09c163c9480163f /lib/utf8.t
parente4dc48dc285e86e786d9f1ca22417ef481b6daff (diff)
downloadperl-a18d6e6e4cf998a0ba9067ceac2d75f71aedef15.tar.gz
Fix IS_UTF8_CHAR() to recognise start bytes 0xF5, 0xF6, 0xF7.
The refactoring of 3b0fc154d4e77cfb inadvertently introduced a bug in Perl_is_utf8_char() and its callers, such as Perl_is_utf8_string(), whereby the beyond-Unicode characters 0x140000 to 0x1fffff were no longer recognised as valid.
Diffstat (limited to 'lib/utf8.t')
-rw-r--r--lib/utf8.t24
1 files changed, 22 insertions, 2 deletions
diff --git a/lib/utf8.t b/lib/utf8.t
index 722c51de37..715ca3e7e1 100644
--- a/lib/utf8.t
+++ b/lib/utf8.t
@@ -39,8 +39,6 @@ no utf8; # Ironic, no?
#
#
-plan tests => 157;
-
{
# bug id 20001009.001
@@ -465,3 +463,25 @@ SKIP: {
no strict 'subs';
ok( !utf8::is_utf8( asd ), "Wasteful format - bareword" );
}
+
+{
+ my @highest =
+ (undef, 0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF);
+ my @step =
+ (undef, undef, 0x40, 0x1000, 0x40000, 0x1000000, 0x40000000);
+
+ foreach my $length (6, 5, 4, 3, 2) {
+ my $high = $highest[$length];
+ while ($high > $highest[$length - 1]) {
+ my $low = $high - $step[$length] + 1;
+ $low = $highest[$length - 1] + 1 if $low <= $highest[$length - 1];
+ ok(utf8::valid(do {no warnings 'utf8'; chr $low}),
+ sprintf "chr %x, length $length is valid", $low);
+ ok(utf8::valid(do {no warnings 'utf8'; chr $high}),
+ sprintf "chr %x, length $length is valid", $high);
+ $high -= $step[$length];
+ }
+ }
+}
+
+done_testing();