diff options
author | Nicholas Clark <nick@ccl4.org> | 2010-12-21 16:55:38 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2010-12-21 16:55:38 +0000 |
commit | a18d6e6e4cf998a0ba9067ceac2d75f71aedef15 (patch) | |
tree | 0b3b68d0cdb46f49e3bdd0d7a09c163c9480163f /lib/utf8.t | |
parent | e4dc48dc285e86e786d9f1ca22417ef481b6daff (diff) | |
download | perl-a18d6e6e4cf998a0ba9067ceac2d75f71aedef15.tar.gz |
Fix IS_UTF8_CHAR() to recognise start bytes 0xF5, 0xF6, 0xF7.
The refactoring of 3b0fc154d4e77cfb inadvertently introduced a bug
in Perl_is_utf8_char() and its callers, such as Perl_is_utf8_string(),
whereby the beyond-Unicode characters 0x140000 to 0x1fffff were no longer
recognised as valid.
Diffstat (limited to 'lib/utf8.t')
-rw-r--r-- | lib/utf8.t | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/lib/utf8.t b/lib/utf8.t index 722c51de37..715ca3e7e1 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -39,8 +39,6 @@ no utf8; # Ironic, no? # # -plan tests => 157; - { # bug id 20001009.001 @@ -465,3 +463,25 @@ SKIP: { no strict 'subs'; ok( !utf8::is_utf8( asd ), "Wasteful format - bareword" ); } + +{ + my @highest = + (undef, 0x7F, 0x7FF, 0xFFFF, 0x1FFFFF, 0x3FFFFFF, 0x7FFFFFFF); + my @step = + (undef, undef, 0x40, 0x1000, 0x40000, 0x1000000, 0x40000000); + + foreach my $length (6, 5, 4, 3, 2) { + my $high = $highest[$length]; + while ($high > $highest[$length - 1]) { + my $low = $high - $step[$length] + 1; + $low = $highest[$length - 1] + 1 if $low <= $highest[$length - 1]; + ok(utf8::valid(do {no warnings 'utf8'; chr $low}), + sprintf "chr %x, length $length is valid", $low); + ok(utf8::valid(do {no warnings 'utf8'; chr $high}), + sprintf "chr %x, length $length is valid", $high); + $high -= $step[$length]; + } + } +} + +done_testing(); |