summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2009-10-22 11:50:40 +0100
committerNicholas Clark <nick@ccl4.org>2009-10-22 13:06:13 +0100
commitc28d61051c446453c532f387d478df78d6f95c55 (patch)
treeb5269841b136d4b6de17e091386aa9621b76c683 /t
parent9fb03e618192b6b5d49274cc64422acee51fe198 (diff)
downloadperl-c28d61051c446453c532f387d478df78d6f95c55.tar.gz
Re-write S_utf16_textfilter() to correctly handle partial reads of UTF-16.
Treat any (and all) octects after the BOM (or all, if there was no BOM) as initial read data for the filter, and call it to convert them to the first line, reading more if necessary. This correctly handles the "problem" that UTF-16LE read as a line, on the assumption that it's ASCII/ISO-8859-*/UTF-8/etc will be truncated after the first octect of the "\n\0" pair that is "\n" encoded as UTF-16LE. This fixes bug #69678. Read from the upstream filter in block mode, rather than line mode.
Diffstat (limited to 't')
-rw-r--r--t/comp/utf.t16
1 files changed, 9 insertions, 7 deletions
diff --git a/t/comp/utf.t b/t/comp/utf.t
index 6f79d27769..c1a3e82fdd 100644
--- a/t/comp/utf.t
+++ b/t/comp/utf.t
@@ -1,6 +1,6 @@
#!./perl -w
-print "1..18\n";
+print "1..36\n";
my $test = 0;
my %templates = (
@@ -17,26 +17,28 @@ sub bytes_to_utf {
}
sub test {
- my ($enc, $tag, $bom) = @_;
+ my ($enc, $tag, $bom, $nl) = @_;
open my $fh, ">", "utf$$.pl" or die "utf.pl: $!";
binmode $fh;
- print $fh bytes_to_utf($enc, "$tag\n", $bom);
+ print $fh bytes_to_utf($enc, $tag . ($nl ? "\n" : ''), $bom);
close $fh or die $!;
my $got = do "./utf$$.pl";
$test = $test + 1;
if (!defined $got) {
- print "not ok $test # $enc $tag $bom; got undef\n";
+ print "not ok $test # $enc $tag $bom $nl; got undef\n";
} elsif ($got ne $tag) {
- print "not ok $test # $enc $tag $bom; got '$got'\n";
+ print "not ok $test # $enc $tag $bom $nl; got '$got'\n";
} else {
- print "ok $test\n";
+ print "ok $test # $enc $tag $bom $nl\n";
}
}
for my $bom (0, 1) {
for my $enc (qw(utf16le utf16be utf8)) {
for my $value (123, 1234, 12345) {
- test($enc, $value, $bom);
+ for my $nl (1, 0) {
+ test($enc, $value, $bom, $nl);
+ }
}
}
}