diff options
author | Nicholas Clark <nick@ccl4.org> | 2009-10-22 11:50:40 +0100 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2009-10-22 13:06:13 +0100 |
commit | c28d61051c446453c532f387d478df78d6f95c55 (patch) | |
tree | b5269841b136d4b6de17e091386aa9621b76c683 /t | |
parent | 9fb03e618192b6b5d49274cc64422acee51fe198 (diff) | |
download | perl-c28d61051c446453c532f387d478df78d6f95c55.tar.gz |
Re-write S_utf16_textfilter() to correctly handle partial reads of UTF-16.
Treat any (and all) octects after the BOM (or all, if there was no BOM) as
initial read data for the filter, and call it to convert them to the first
line, reading more if necessary. This correctly handles the "problem" that
UTF-16LE read as a line, on the assumption that it's ASCII/ISO-8859-*/UTF-8/etc
will be truncated after the first octect of the "\n\0" pair that is "\n"
encoded as UTF-16LE. This fixes bug #69678.
Read from the upstream filter in block mode, rather than line mode.
Diffstat (limited to 't')
-rw-r--r-- | t/comp/utf.t | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/t/comp/utf.t b/t/comp/utf.t index 6f79d27769..c1a3e82fdd 100644 --- a/t/comp/utf.t +++ b/t/comp/utf.t @@ -1,6 +1,6 @@ #!./perl -w -print "1..18\n"; +print "1..36\n"; my $test = 0; my %templates = ( @@ -17,26 +17,28 @@ sub bytes_to_utf { } sub test { - my ($enc, $tag, $bom) = @_; + my ($enc, $tag, $bom, $nl) = @_; open my $fh, ">", "utf$$.pl" or die "utf.pl: $!"; binmode $fh; - print $fh bytes_to_utf($enc, "$tag\n", $bom); + print $fh bytes_to_utf($enc, $tag . ($nl ? "\n" : ''), $bom); close $fh or die $!; my $got = do "./utf$$.pl"; $test = $test + 1; if (!defined $got) { - print "not ok $test # $enc $tag $bom; got undef\n"; + print "not ok $test # $enc $tag $bom $nl; got undef\n"; } elsif ($got ne $tag) { - print "not ok $test # $enc $tag $bom; got '$got'\n"; + print "not ok $test # $enc $tag $bom $nl; got '$got'\n"; } else { - print "ok $test\n"; + print "ok $test # $enc $tag $bom $nl\n"; } } for my $bom (0, 1) { for my $enc (qw(utf16le utf16be utf8)) { for my $value (123, 1234, 12345) { - test($enc, $value, $bom); + for my $nl (1, 0) { + test($enc, $value, $bom, $nl); + } } } } |