Re-write S_utf16_textfilter() to correctly handle partial reads of UTF-16.

Treat any (and all) octects after the BOM (or all, if there was no BOM) as initial read data for the filter, and call it to convert them to the first line, reading more if necessary. This correctly handles the "problem" that UTF-16LE read as a line, on the assumption that it's ASCII/ISO-8859-*/UTF-8/etc will be truncated after the first octect of the "\n\0" pair that is "\n" encoded as UTF-16LE. This fixes bug #69678. Read from the upstream filter in block mode, rather than line mode.
author: Nicholas Clark <nick@ccl4.org> 2009-10-22 11:50:40 +0100
committer: Nicholas Clark <nick@ccl4.org> 2009-10-22 13:06:13 +0100
commit: c28d61051c446453c532f387d478df78d6f95c55 (patch)
tree: b5269841b136d4b6de17e091386aa9621b76c683 /t
parent: 9fb03e618192b6b5d49274cc64422acee51fe198 (diff)
download: perl-c28d61051c446453c532f387d478df78d6f95c55.tar.gz
1 files changed, 9 insertions, 7 deletions
diff --git a/t/comp/utf.t b/t/comp/utf.t
index 6f79d27769..c1a3e82fdd 100644
--- a/t/comp/utf.t
+++ b/t/comp/utf.t
@@ -1,6 +1,6 @@
 #!./perl -w
 
-print "1..18\n";
+print "1..36\n";
 my $test = 0;
 
 my %templates = (
@@ -17,26 +17,28 @@ sub bytes_to_utf {
 }
 
 sub test {
-    my ($enc, $tag, $bom) = @_;
+    my ($enc, $tag, $bom, $nl) = @_;
     open my $fh, ">", "utf$$.pl" or die "utf.pl: $!";
     binmode $fh;
-    print $fh bytes_to_utf($enc, "$tag\n", $bom);
+    print $fh bytes_to_utf($enc, $tag . ($nl ? "\n" : ''), $bom);
     close $fh or die $!;
     my $got = do "./utf$$.pl";
     $test = $test + 1;
     if (!defined $got) {
-	print "not ok $test # $enc $tag $bom; got undef\n";
+	print "not ok $test # $enc $tag $bom $nl; got undef\n";
     } elsif ($got ne $tag) {
-	print "not ok $test # $enc $tag $bom; got '$got'\n";
+	print "not ok $test # $enc $tag $bom $nl; got '$got'\n";
     } else {
-	print "ok $test\n";
+	print "ok $test # $enc $tag $bom $nl\n";
     }
 }
 
 for my $bom (0, 1) {
     for my $enc (qw(utf16le utf16be utf8)) {
 	for my $value (123, 1234, 12345) {
-	    test($enc, $value, $bom);
+	    for my $nl (1, 0) {
+		test($enc, $value, $bom, $nl);
+	    }
 	}
     }
 }
author	Nicholas Clark <nick@ccl4.org>	2009-10-22 11:50:40 +0100
committer	Nicholas Clark <nick@ccl4.org>	2009-10-22 13:06:13 +0100
commit	c28d61051c446453c532f387d478df78d6f95c55 (patch)
tree	b5269841b136d4b6de17e091386aa9621b76c683 /t
parent	9fb03e618192b6b5d49274cc64422acee51fe198 (diff)
download	perl-c28d61051c446453c532f387d478df78d6f95c55.tar.gz