summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Kogai <dankogai@dan.co.jp>2003-05-24 09:15:49 +0900
committerJarkko Hietaniemi <jhi@iki.fi>2003-05-23 14:19:53 +0000
commit47dd3999631d2fd5030069ca2ee4380b5f022d88 (patch)
tree8783f18d193e109a152f6bb85b4961bc85d39075
parent2530b651d288d7db98df4c0991f07a783a1cd32c (diff)
downloadperl-47dd3999631d2fd5030069ca2ee4380b5f022d88.tar.gz
Re: Stateful PerlIO implemented [Was: [perl #22261] Was: Unrecognised BOM...]
Message-Id: <6F7B29DA-8D31-11D7-9F95-000393AE4244@dan.co.jp> p4raw-id: //depot/perl@19595
-rw-r--r--ext/Encode/Changes13
-rw-r--r--ext/Encode/Unicode/Unicode.xs17
-rw-r--r--ext/Encode/t/perlio.t62
-rw-r--r--ext/PerlIO/encoding/encoding.pm2
4 files changed, 79 insertions, 15 deletions
diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index ded94e1483..f729d18b1b 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -3,6 +3,19 @@
# $Id: Changes,v 1.95 2003/05/21 08:41:11 dankogai Exp $
#
$Revision: 1.95 $ $Date: 2003/05/21 08:41:11 $
+! encoding.pm
+ Addressed [cpan #2629] Wrong assumption in numeric comparison
+ Message-Id: <rt-2629-7326.19.5700583232515@cpan.org>
+! Encode.pm Encode.xs Unicode/Unicode.pm Unicode/Unicode.xs
+ lib/Encode/Encoding.pm t/perlio.t
+ ! API Change: ->new_sequence() => ->renew()
+ + Encode::Unicode makes use of it so it can handle BOM on PerlIO
+ + Encode::XS and Encode::utf8 now supports ->renew()
+ + Encode::Encoding now documents this with examples
+ - Non-XS (en|de)code stripped out of Encode::Unicode
+ Message-Id: <146957DB-8C39-11D7-9C91-000393AE4244@dan.co.jp>
+
+1.95 2003/05/21 08:41:11
! ucm/8859-*.ucm
Since bogus entries were found in iso-8859-6, all entries are
re-generated once again out of
diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs
index 8b02402d1e..cb27bb3c44 100644
--- a/ext/Encode/Unicode/Unicode.xs
+++ b/ext/Encode/Unicode/Unicode.xs
@@ -171,8 +171,11 @@ CODE:
SvCUR_set(result,d - (U8 *)SvPVX(result));
}
if (s < e) {
+ /* unlikely to happen because it's fixed-length -- dankogai */
+ if (check & ENCODE_WARN_ON_ERR){
Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
*hv_fetch((HV *)SvRV(obj),"Name",4,0));
+ }
}
if (check && !(check & ENCODE_LEAVE_SRC)){
if (s < e) {
@@ -242,8 +245,16 @@ CODE:
}
}
if (s < e) {
- Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
- *hv_fetch((HV *)SvRV(obj),"Name",4,0));
+ /* UTF-8 partial char happens often on PerlIO.
+ Since this is okay and normal, we do not warn.
+ But this is critical when you choose to LEAVE_SRC
+ in which case we die */
+ if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)){
+ Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
+ "when CHECK = 0x%" UVuf,
+ *hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
+ }
+
}
if (check && !(check & ENCODE_LEAVE_SRC)){
if (s < e) {
@@ -254,7 +265,7 @@ CODE:
SvCUR_set(utf8,0);
}
*SvEND(utf8) = '\0';
- }
+ }
XSRETURN(1);
}
diff --git a/ext/Encode/t/perlio.t b/ext/Encode/t/perlio.t
index 9966ef8afe..ed16796a52 100644
--- a/ext/Encode/t/perlio.t
+++ b/ext/Encode/t/perlio.t
@@ -27,7 +27,7 @@ use File::Copy;
use FileHandle;
#use Test::More qw(no_plan);
-use Test::More tests => 28;
+use Test::More tests => 38;
our $DEBUG = 0;
@@ -40,23 +40,19 @@ use Encode (":all");
#$Encode::JP::JIS7::DEBUG = $DEBUG;
}
-
-
my $seq = 0;
my $dir = dirname(__FILE__);
my %e =
(
jisx0208 => [ qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/],
- #ksc5601 => [ qw/euc-kr iso-2022-kr/],
ksc5601 => [ qw/euc-kr/],
- #gb2312 => [ qw/euc-cn hz/],
- gb2312 => [ qw/euc-cn/],
+ gb2312 => [ qw/euc-cn hz/],
);
$/ = "\x0a"; # may fix VMS problem for test #28 and #29
-for my $src(sort keys %e) {
+for my $src (sort keys %e) {
my $ufile = File::Spec->catfile($dir,"$src.utf");
open my $fh, "<:utf8", $ufile or die "$ufile : $!";
my @uline = <$fh>;
@@ -72,9 +68,8 @@ for my $src(sort keys %e) {
# then create a file via perlio without autoflush
- TODO:{
- #local $TODO = "$e: !perlio_ok" unless (perlio_ok($e) or $DEBUG);
- todo_skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG);
+ SKIP:{
+ skip "$e: !perlio_ok", 4 unless (perlio_ok($e) or $DEBUG);
no warnings 'uninitialized';
open $fh, ">:encoding($e)", $pfile or die "$sfile : $!";
$fh->autoflush(0);
@@ -130,8 +125,53 @@ for my $src(sort keys %e) {
$DEBUG or unlink ($sfile, $pfile);
}
}
-
+# BOM Test
+
+SKIP:{
+ my $pev = PerlIO::encoding->VERSION;
+ skip "PerlIO::encoding->VERSION = $pev <= 0.07 ", 6
+ unless ($pev >= 0.07 or $DEBUG);
+
+ my $file = File::Spec->catfile($dir,"jisx0208.utf");
+ open my $fh, "<:utf8", $file or die "$file : $!";
+ my $str = join('' => <$fh>);
+ close $fh;
+ my %bom = (
+ 'UTF-16BE' => pack('n', 0xFeFF),
+ 'UTF-16LE' => pack('v', 0xFeFF),
+ 'UTF-32BE' => pack('N', 0xFeFF),
+ 'UTF-32LE' => pack('V', 0xFeFF),
+ );
+ # reading
+ for my $utf (sort keys %bom){
+ my $bomed = $bom{$utf} . encode($utf, $str);
+ my $sfile = File::Spec->catfile($dir,".$utf.$seq.$$");
+ dump2file($sfile, $bomed);
+ my $utf_nobom = $utf; $utf_nobom =~ s/(LE|BE)$//o;
+ # reading
+ open $fh, "<:encoding($utf_nobom)", $sfile or die "$sfile : $!";
+ my $cmp = join '' => <$fh>;
+ close $fh;
+ is($str, $cmp, "<:encoding($utf_nobom) eq $utf");
+ unlink $sfile; $seq++;
+ }
+ # writing
+ for my $utf_nobom (qw/UTF-16 UTF-32/){
+ my $utf = $utf_nobom . 'BE';
+ my $sfile = File::Spec->catfile($dir,".$utf_nobom.$seq.$$");
+ my $bomed = $bom{$utf} . encode($utf, $str);
+ open $fh, ">:encoding($utf_nobom)", $sfile or die "$sfile : $!";
+ print $fh $str;
+ close $fh;
+ open my $fh, "<:raw", $sfile or die "$sfile : $!";
+ read $fh, my $cmp, -s $sfile;
+ close $fh;
+ use bytes ();
+ ok($bomed eq $cmp, ">:encoding($utf_nobom) eq $utf");
+ unlink $sfile; $seq++;
+ }
+}
sub dump2file{
no warnings;
open my $fh, ">", $_[0] or die "$_[0]: $!";
diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm
index 53c9d460c7..61a116f205 100644
--- a/ext/PerlIO/encoding/encoding.pm
+++ b/ext/PerlIO/encoding/encoding.pm
@@ -1,6 +1,6 @@
package PerlIO::encoding;
use strict;
-our $VERSION = '0.06';
+our $VERSION = '0.07';
our $DEBUG = 0;
$DEBUG and warn __PACKAGE__, " called by ", join(", ", caller), "\n";