summaryrefslogtreecommitdiff
path: root/ext/Encode
diff options
context:
space:
mode:
authorSADAHIRO Tomoyuki <BQW10602@nifty.com>2001-10-16 10:50:16 +0900
committerJarkko Hietaniemi <jhi@iki.fi>2001-10-19 19:01:46 +0000
commit96d6357cc349e1e67175ef1b324a18e782e390b7 (patch)
treee4f671e34ac03a9dce5f38b08f431dd05e48cc24 /ext/Encode
parent2cd1776c1779fda5b6df8baba77742b2f09eb9e2 (diff)
downloadperl-96d6357cc349e1e67175ef1b324a18e782e390b7.tar.gz
Re: PerlIO and Encode
Message-Id: <20011016014150.0C8E.BQW10602@nifty.com> p4raw-id: //depot/perl@12509
Diffstat (limited to 'ext/Encode')
-rw-r--r--ext/Encode/Encode.pm2
-rw-r--r--ext/Encode/Encode/Tcl.pm520
-rw-r--r--ext/Encode/Encode/Tcl.t67
3 files changed, 388 insertions, 201 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 2035e20c15..6ddcb32132 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -225,7 +225,7 @@ sub decode
my $enc = find_encoding($name);
croak("Unknown encoding '$name'") unless defined $enc;
my $string = $enc->decode($octets,$check);
- return undef if ($check && length($octets));
+ $_[1] = $octets if $check;
return $string;
}
diff --git a/ext/Encode/Encode/Tcl.pm b/ext/Encode/Encode/Tcl.pm
index eb13c5f4fc..460a521bb9 100644
--- a/ext/Encode/Encode/Tcl.pm
+++ b/ext/Encode/Encode/Tcl.pm
@@ -40,6 +40,23 @@ sub import
INC_search();
}
+sub no_map_in_encode ($$)
+ # codepoint, enc-name;
+{
+ carp sprintf "\"\\N{U+%x}\" does not map to %s", @_;
+# /* FIXME: Skip over the character, copy in replacement and continue
+# * but that is messy so for now just fail.
+# */
+ return;
+}
+
+sub no_map_in_decode ($$)
+ # enc-name, string beginning the malform char;
+{
+# /* UTF-8 is supposed to be "Universal" so should not happen */
+ croak sprintf "%s '%s' does not map to UTF-8", @_;
+}
+
sub encode
{
my $obj = shift;
@@ -78,11 +95,11 @@ sub loadEncoding
$type = substr($line,0,1);
last unless $type eq '#';
}
- my $class = ref($obj).('::'.(
- ($type eq 'X') ? 'Extended' :
- ($type eq 'H') ? 'HanZi' :
- ($type eq 'E') ? 'Escape' : 'Table'
- ));
+ my $subclass =
+ ($type eq 'X') ? 'Extended' :
+ ($type eq 'H') ? 'HanZi' :
+ ($type eq 'E') ? 'Escape' : 'Table';
+ my $class = ref($obj) . '::' . $subclass;
# carp "Loading $file";
bless $obj,$class;
return $obj if $obj->read($fh,$obj->name,$type);
@@ -109,7 +126,8 @@ sub INC_find
package Encode::Tcl::Table;
use base 'Encode::Encoding';
-use Data::Dumper;
+use Carp;
+#use Data::Dumper;
sub read
{
@@ -150,8 +168,12 @@ sub read
}
$touni[$page] = \@page;
}
- $rep = $type ne 'M' ? $obj->can("rep_$type") :
- sub { ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C'};
+ $rep = $type ne 'M'
+ ? $obj->can("rep_$type")
+ : sub
+ {
+ ($_[0] > 255) || $leading[$_[0]] ? 'n' : 'C';
+ };
$obj->{'Rep'} = $rep;
$obj->{'ToUni'} = \@touni;
$obj->{'FmUni'} = \%fmuni;
@@ -175,13 +197,15 @@ sub representation
sub decode
{
- my ($obj,$str,$chk) = @_;
+ my($obj,$str,$chk) = @_;
+ my $name = $obj->{'Name'};
my $rep = $obj->{'Rep'};
my $touni = $obj->{'ToUni'};
my $uni;
while (length($str))
{
- my $ch = ord(substr($str,0,1,''));
+ my $cc = substr($str,0,1,'');
+ my $ch = ord($cc);
my $x;
if (&$rep($ch) eq 'C')
{
@@ -189,13 +213,18 @@ sub decode
}
else
{
- $x = $touni->[$ch][ord(substr($str,0,1,''))];
+ if(! length $str)
+ {
+ $str = pack('C',$ch); # split leading byte
+ last;
+ }
+ my $c2 = substr($str,0,1,'');
+ $cc .= $c2;
+ $x = $touni->[$ch][ord($c2)];
}
unless (defined $x)
{
- last if $chk;
- # What do we do here ?
- $x = '';
+ Encode::Tcl::no_map_in_decode($name, $cc.$str);
}
$uni .= $x;
}
@@ -209,16 +238,20 @@ sub encode
my ($obj,$uni,$chk) = @_;
my $fmuni = $obj->{'FmUni'};
my $def = $obj->{'Def'};
+ my $name = $obj->{'Name'};
my $rep = $obj->{'Rep'};
my $str;
while (length($uni))
{
my $ch = substr($uni,0,1,'');
- my $x = $fmuni->{chr(ord($ch))};
- unless (defined $x)
+ my $x = $fmuni->{$ch};
+ unless(defined $x)
{
- last if ($chk);
- $x = $def;
+ unless($chk)
+ {
+ Encode::Tcl::no_map_in_encode(ord($ch), $name)
+ }
+ return undef;
}
$str .= pack(&$rep($x),$x);
}
@@ -231,29 +264,41 @@ use base 'Encode::Encoding';
use Carp;
+use constant SI => "\cO";
+use constant SO => "\cN";
+use constant SS2 => "\eN";
+use constant SS3 => "\eO";
+
sub read
{
my ($obj,$fh,$name) = @_;
my(%tbl, @seq, $enc, @esc, %grp);
while (<$fh>)
{
- my ($key,$val) = /^(\S+)\s+(.*)$/;
+ next unless /^(\S+)\s+(.*)$/;
+ my ($key,$val) = ($1,$2);
$val =~ s/^\{(.*?)\}/$1/g;
$val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
- if($enc = Encode->getEncoding($key)){
+ if($enc = Encode->getEncoding($key))
+ {
$tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
push @seq, $val;
$grp{$val} =
- $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
- $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
- $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
- $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
- 0; # G0
- }else{
+ $val =~ m|[(]| ? 0 : # G0 : SI eq "\cO"
+ $val =~ m|[)-]| ? 1 : # G1 : SO eq "\cN"
+ $val =~ m|[*.]| ? 2 : # G2 : SS2 eq "\eN"
+ $val =~ m|[+/]| ? 3 : # G3 : SS3 eq "\eO"
+ 0; # G0
+ }
+ else
+ {
$obj->{$key} = $val;
- }
- if($val =~ /^\e(.*)/){ push(@esc, quotemeta $1) }
+ }
+ if($val =~ /^\e(.*)/)
+ {
+ push(@esc, quotemeta $1);
+ }
}
$obj->{'Grp'} = \%grp; # graphic chars
$obj->{'Seq'} = \@seq; # escape sequences
@@ -265,6 +310,7 @@ sub read
sub decode
{
my ($obj,$str,$chk) = @_;
+ my $name = $obj->{'Name'};
my $tbl = $obj->{'Tbl'};
my $seq = $obj->{'Seq'};
my $grp = $obj->{'Grp'};
@@ -277,45 +323,57 @@ sub decode
my $s = 0; # state of SO-SI. 0 (G0) or 1 (G1);
my $ss = 0; # state of SS2,SS3. 0 (G0), 2 (G2) or 3 (G3);
my $uni;
- while (length($str)){
- my $uch = substr($str,0,1,'');
- if($uch eq "\e"){
- if($str =~ s/^($esc)//)
- {
- my $e = "\e$1";
- $sta[ $grp->{$e} ] = $e if $tbl->{$e};
- }
+ while (length($str))
+ {
+ my $cc = substr($str,0,1,'');
+ if($cc eq "\e")
+ {
+ if($str =~ s/^($esc)//)
+ {
+ my $e = "\e$1";
+ $sta[ $grp->{$e} ] = $e if $tbl->{$e};
+ }
# appearance of "\eN\eO" or "\eO\eN" isn't supposed.
- elsif($str =~ s/^N//)
- {
- $ss = 2;
- }
- elsif($str =~ s/^O//)
- {
- $ss = 3;
- }
- else
- {
- $str =~ s/^([\x20-\x2F]*[\x30-\x7E])//;
- carp "unknown escape sequence: ESC $1";
- }
- next;
- }
- if($uch eq "\x0e"){
- $s = 1; next;
- }
- if($uch eq "\x0f"){
- $s = 0; next;
- }
+ # but in that case, the former will be ignored.
+ elsif($str =~ s/^N//)
+ {
+ $ss = 2;
+ }
+ elsif($str =~ s/^O//)
+ {
+ $ss = 3;
+ }
+ else
+ {
+ # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
+ $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
+ if($chk && ! length $str)
+ {
+ $str = "\e$1"; # split sequence
+ last;
+ }
+ croak "unknown escape sequence: ESC $1";
+ }
+ next;
+ }
+ if($cc eq SO)
+ {
+ $s = 1; next;
+ }
+ if($cc eq SI)
+ {
+ $s = 0; next;
+ }
$cur = $ss ? $sta[$ss] : $sta[$s];
- if(ref($tbl->{$cur}) eq 'Encode::XS'){
- $uni .= $tbl->{$cur}->decode($uch);
+ if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
+ {
+ $uni .= $tbl->{$cur}->decode($cc);
$ss = 0;
next;
- }
- my $ch = ord($uch);
+ }
+ my $ch = ord($cc);
my $rep = $tbl->{$cur}->{'Rep'};
my $touni = $tbl->{$cur}->{'ToUni'};
my $x;
@@ -325,24 +383,36 @@ sub decode
}
else
{
- $x = $touni->[$ch][ord(substr($str,0,1,''))];
+ if(! length $str)
+ {
+ $str = $cc; # split leading byte
+ last;
+ }
+ my $c2 = substr($str,0,1,'');
+ $cc .= $c2;
+ $x = $touni->[$ch][ord($c2)];
}
unless (defined $x)
{
- last if $chk;
- # What do we do here ?
- $x = '';
+ Encode::Tcl::no_map_in_decode($name, $cc.$str);
}
$uni .= $x;
$ss = 0;
}
- $_[1] = $str if $chk;
- return $uni;
+ if($chk)
+ {
+ my $back = join('', grep defined($_) && $_ ne $std, @sta);
+ $back .= SO if $s;
+ $back .= $ss == 2 ? SS2 : SS3 if $ss;
+ $_[1] = $back.$str;
+ }
+ return $uni;
}
sub encode
{
my ($obj,$uni,$chk) = @_;
+ my $name = $obj->{'Name'};
my $tbl = $obj->{'Tbl'};
my $seq = $obj->{'Seq'};
my $grp = $obj->{'Grp'};
@@ -357,39 +427,45 @@ sub encode
if($ini && defined $grp->{$ini})
{
- $sta[ $grp->{$ini} ] = $ini;
+ $sta[ $grp->{$ini} ] = $ini;
}
- while (length($uni)){
- my $ch = substr($uni,0,1,'');
- my $x;
- foreach my $e_seq (@$seq){
- $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
- ? $tbl->{$e_seq}->encode($ch,1)
- : $tbl->{$e_seq}->{FmUni}->{$ch};
- $cur = $e_seq, last if defined $x;
- }
- if(ref($tbl->{$cur}) ne 'Encode::XS')
- {
- my $def = $tbl->{$cur}->{'Def'};
- my $rep = $tbl->{$cur}->{'Rep'};
- unless (defined $x){
- last if ($chk);
- $x = $def;
+ while (length($uni))
+ {
+ my $ch = substr($uni,0,1,'');
+ my $x;
+ foreach my $e_seq (@$seq)
+ {
+ $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
+ ? $tbl->{$e_seq}->{FmUni}->{$ch}
+ : $tbl->{$e_seq}->encode($ch,1);
+ $cur = $e_seq, last if defined $x;
}
- $x = pack(&$rep($x),$x);
+ unless (defined $x)
+ {
+ unless($chk)
+ {
+ Encode::Tcl::no_map_in_encode(ord($ch), $name)
+ }
+ return undef;
}
- $cG = $grp->{$cur};
- $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
-
- $str .= $cG == 0 && $pG == 1 ? "\cO" :
- $cG == 1 && $pG == 0 ? "\cN" :
- $cG == 2 ? "\eN" :
- $cG == 3 ? "\eO" : "";
- $str .= $x;
- $pG = $cG if $cG < 2;
- }
- $str .= "\cO" if $pG == 1; # back to G0
+ if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
+ {
+ my $def = $tbl->{$cur}->{'Def'};
+ my $rep = $tbl->{$cur}->{'Rep'};
+ $x = pack(&$rep($x),$x);
+ }
+ $cG = $grp->{$cur};
+ $str .= $sta[$cG] = $cur unless $cG < 2 && $cur eq $sta[$cG];
+
+ $str .= $cG == 0 && $pG == 1 ? SI :
+ $cG == 1 && $pG == 0 ? SO :
+ $cG == 2 ? SS2 :
+ $cG == 3 ? SS3 : "";
+ $str .= $x;
+ $pG = $cG if $cG < 2;
+ }
+ $str .= SI if $pG == 1; # back to G0
$str .= $std unless $std eq $sta[0]; # GO to ASCII
$str .= $fin; # necessary?
$_[1] = $uni if $chk;
@@ -408,18 +484,21 @@ sub read
my(%tbl, $enc, %ssc, @key);
while (<$fh>)
{
- my ($key,$val) = /^(\S+)\s+(.*)$/;
+ next unless /^(\S+)\s+(.*)$/;
+ my ($key,$val) = ($1,$2);
$val =~ s/\{(.*?)\}/$1/;
$val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
- if($enc = Encode->getEncoding($key)){
+ if($enc = Encode->getEncoding($key))
+ {
push @key, $val;
- $tbl{$val} = ref($enc) eq 'Encode::Tcl'
- ? $enc->loadEncoding : $enc;
+ $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
$ssc{$val} = substr($val,1) if $val =~ /^>/;
- }else{
+ }
+ else
+ {
$obj->{$key} = $val;
- }
+ }
}
$obj->{'SSC'} = \%ssc; # single shift char
$obj->{'Tbl'} = \%tbl; # encoding tables
@@ -430,25 +509,28 @@ sub read
sub decode
{
my ($obj,$str,$chk) = @_;
- my $tbl = $obj->{'Tbl'};
- my $ssc = $obj->{'SSC'};
+ my $name = $obj->{'Name'};
+ my $tbl = $obj->{'Tbl'};
+ my $ssc = $obj->{'SSC'};
my $cur = ''; # current state
my $uni;
- while (length($str)){
- my $uch = substr($str,0,1,'');
- my $ch = ord($uch);
+ while (length($str))
+ {
+ my $cc = substr($str,0,1,'');
+ my $ch = ord($cc);
if(!$cur && $ch > 0x7F)
{
$cur = '>';
- $cur .= $uch, next if $ssc->{$cur.$uch};
+ $cur .= $cc, next if $ssc->{$cur.$cc};
}
$ch ^= 0x80 if $cur;
- if(ref($tbl->{$cur}) eq 'Encode::XS'){
- $uni .= $tbl->{$cur}->decode(chr($ch));
+ if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
+ {
+ $uni .= $tbl->{$cur}->decode($cc);
$cur = '';
next;
- }
+ }
my $rep = $tbl->{$cur}->{'Rep'};
my $touni = $tbl->{$cur}->{'ToUni'};
my $x;
@@ -458,59 +540,74 @@ sub decode
}
else
{
- $x = $touni->[$ch][0x80 ^ ord(substr($str,0,1,''))];
+ if(! length $str)
+ {
+ $str = $cc; # split leading byte
+ last;
+ }
+ my $c2 = substr($str,0,1,'');
+ $cc .= $c2;
+ $x = $touni->[$ch][0x80 ^ ord($c2)];
}
unless (defined $x)
{
- last if $chk;
- # What do we do here ?
- $x = '';
+ Encode::Tcl::no_map_in_decode($name, $cc.$str);
}
$uni .= $x;
$cur = '';
}
- $_[1] = $str if $chk;
+ if($chk)
+ {
+ $cur =~ s/>//;
+ $_[1] = $cur ne '' ? $cur.$str : $str;
+ }
return $uni;
}
sub encode
{
my ($obj,$uni,$chk) = @_;
+ my $name = $obj->{'Name'};
my $tbl = $obj->{'Tbl'};
my $ssc = $obj->{'SSC'};
my $key = $obj->{'Key'};
my $str;
my $cur;
- while (length($uni)){
- my $ch = substr($uni,0,1,'');
- my $x;
- foreach my $k (@$key){
- $x = ref($tbl->{$k}) eq 'Encode::XS'
- ? $k =~ /^>/
- ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
- : $tbl->{$k}->encode($ch,1)
- : $tbl->{$k}->{FmUni}->{$ch};
- $cur = $k, last if defined $x;
- }
- if(ref($tbl->{$cur}) ne 'Encode::XS')
- {
- my $def = $tbl->{$cur}->{'Def'};
- my $rep = $tbl->{$cur}->{'Rep'};
- unless (defined $x){
- last if ($chk);
- $x = $def;
- }
- my $r = &$rep($x);
- $x = pack($r,
+ while (length($uni))
+ {
+ my $ch = substr($uni,0,1,'');
+ my $x;
+ foreach my $k (@$key)
+ {
+ $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table'
+ ? $k =~ /^>/
+ ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1)
+ : $tbl->{$k}->encode($ch,1)
+ : $tbl->{$k}->{FmUni}->{$ch};
+ $cur = $k, last if defined $x;
+ }
+ unless (defined $x)
+ {
+ unless($chk)
+ {
+ Encode::Tcl::no_map_in_encode(ord($ch), $name)
+ }
+ return undef;
+ }
+ if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
+ {
+ my $def = $tbl->{$cur}->{'Def'};
+ my $rep = $tbl->{$cur}->{'Rep'};
+ my $r = &$rep($x);
+ $x = pack($r,
$cur =~ /^>/
? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x
: $x);
- }
-
- $str .= $ssc->{$cur} if defined $ssc->{$cur};
- $str .= $x;
- }
+ }
+ $str .= $ssc->{$cur} if defined $ssc->{$cur};
+ $str .= $x;
+ }
$_[1] = $uni if $chk;
return $str;
}
@@ -526,15 +623,19 @@ sub read
my(%tbl, @seq, $enc);
while (<$fh>)
{
- my ($key,$val) = /^(\S+)\s+(.*)$/;
+ next unless /^(\S+)\s+(.*)$/;
+ my ($key,$val) = ($1,$2);
$val =~ s/^\{(.*?)\}/$1/g;
$val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
- if($enc = Encode->getEncoding($key)){
+ if($enc = Encode->getEncoding($key))
+ {
$tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc;
push @seq, $val;
- }else{
+ }
+ else
+ {
$obj->{$key} = $val;
- }
+ }
}
$obj->{'Seq'} = \@seq; # escape sequences
$obj->{'Tbl'} = \%tbl; # encoding tables
@@ -544,39 +645,47 @@ sub read
sub decode
{
my ($obj,$str,$chk) = @_;
+ my $name = $obj->{'Name'};
my $tbl = $obj->{'Tbl'};
my $seq = $obj->{'Seq'};
my $std = $seq->[0];
my $cur = $std;
my $uni;
while (length($str)){
- my $uch = substr($str,0,1,'');
- if($uch eq "~"){
- if($str =~ s/^\cJ//)
- {
- next;
- }
- elsif($str =~ s/^\~//)
- {
- 1;
- }
- elsif($str =~ s/^([{}])//)
- {
- $cur = "~$1";
- next;
- }
- else
- {
- $str =~ s/^([^~])//;
- carp "unknown HanZi escape sequence: ~$1";
- next;
- }
- }
- if(ref($tbl->{$cur}) eq 'Encode::XS'){
- $uni .= $tbl->{$cur}->decode($uch);
+ my $cc = substr($str,0,1,'');
+ if($cc eq "~")
+ {
+ if($str =~ s/^\cJ//)
+ {
+ next;
+ }
+ elsif($str =~ s/^\~//)
+ {
+ 1; # no-op
+ }
+ elsif($str =~ s/^([{}])//)
+ {
+ $cur = "~$1";
+ next;
+ }
+ elsif(! length $str)
+ {
+ $str = '~';
+ last;
+ }
+ else
+ {
+ $str =~ s/^([^~])//;
+ croak "unknown HanZi escape sequence: ~$1";
+ next;
+ }
+ }
+ if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table')
+ {
+ $uni .= $tbl->{$cur}->decode($cc);
next;
- }
- my $ch = ord($uch);
+ }
+ my $ch = ord($cc);
my $rep = $tbl->{$cur}->{'Rep'};
my $touni = $tbl->{$cur}->{'ToUni'};
my $x;
@@ -586,23 +695,32 @@ sub decode
}
else
{
- $x = $touni->[$ch][ord(substr($str,0,1,''))];
+ if(! length $str)
+ {
+ $str = $cc; # split leading byte
+ last;
+ }
+ my $c2 = substr($str,0,1,'');
+ $cc .= $c2;
+ $x = $touni->[$ch][ord($c2)];
}
unless (defined $x)
{
- last if $chk;
- # What do we do here ?
- $x = '';
+ Encode::Tcl::no_map_in_decode($name, $cc.$str);
}
$uni .= $x;
}
- $_[1] = $str if $chk;
+ if($chk)
+ {
+ $_[1] = $cur eq $std ? $str : $cur.$str;
+ }
return $uni;
}
sub encode
{
my ($obj,$uni,$chk) = @_;
+ my $name = $obj->{'Name'};
my $tbl = $obj->{'Tbl'};
my $seq = $obj->{'Seq'};
my $std = $seq->[0];
@@ -610,28 +728,34 @@ sub encode
my $pre = $std;
my $cur = $pre;
- while (length($uni)){
- my $ch = chr(ord(substr($uni,0,1,'')));
- my $x;
- foreach my $e_seq (@$seq){
- $x = ref($tbl->{$e_seq}) eq 'Encode::XS'
- ? $tbl->{$e_seq}->encode($ch,1)
- : $tbl->{$e_seq}->{FmUni}->{$ch};
- $cur = $e_seq and last if defined $x;
- }
- if(ref($tbl->{$cur}) ne 'Encode::XS')
- {
- my $def = $tbl->{$cur}->{'Def'};
- my $rep = $tbl->{$cur}->{'Rep'};
- unless (defined $x){
- last if ($chk);
- $x = $def;
+ while (length($uni))
+ {
+ my $ch = substr($uni,0,1,'');
+ my $x;
+ foreach my $e_seq (@$seq)
+ {
+ $x = ref($tbl->{$e_seq}) eq 'Encode::Tcl::Table'
+ ? $tbl->{$e_seq}->{FmUni}->{$ch}
+ : $tbl->{$e_seq}->encode($ch,1);
+ $cur = $e_seq and last if defined $x;
}
- $x = pack(&$rep($x),$x);
- }
- $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
- $str .= '~' if $x eq '~'; # to '~~'
- }
+ unless (defined $x)
+ {
+ unless($chk)
+ {
+ Encode::Tcl::no_map_in_encode(ord($ch), $name)
+ }
+ return undef;
+ }
+ if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table')
+ {
+ my $def = $tbl->{$cur}->{'Def'};
+ my $rep = $tbl->{$cur}->{'Rep'};
+ $x = pack(&$rep($x),$x);
+ }
+ $str .= $cur eq $pre ? $x : ($pre = $cur).$x;
+ $str .= '~' if $x eq '~'; # to '~~'
+ }
$str .= $std unless $cur eq $std;
$_[1] = $uni if $chk;
return $str;
diff --git a/ext/Encode/Encode/Tcl.t b/ext/Encode/Encode/Tcl.t
index 7e01ca6c13..950f658f90 100644
--- a/ext/Encode/Encode/Tcl.t
+++ b/ext/Encode/Encode/Tcl.t
@@ -1,6 +1,6 @@
BEGIN {
chdir 't' if -d 't';
- @INC = '../lib';
+# @INC = '../lib';
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
@@ -88,8 +88,41 @@ my @hz_txt = (
my $hz_exp = '007e0069006e002000470042002e5df162404e0d6b32'
. 'ff0c52ff65bd65bc4eba3002004200790065002e007e';
+use constant BUFSIZ => 64; # for test
+use constant hiragana => "\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}";
+use constant han_kana => "\x{FF71}\x{FF72}\x{FF73}\x{FF74}\x{FF75}";
+use constant macron => "\x{0100}\x{0112}\x{012a}\x{014c}\x{016a}";
+use constant TAIL => 'bbb';
+use constant YES => 1;
+
+my @ary_buff = ( # [ encoding, decoded, encoded ]
+# type-M
+ ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
+ ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
+ ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
+ ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
+ ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
+ ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
+# type-E
+ ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
+ ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
+ ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
+ [ $jis, han_kana, "\e\(I".'12345'."\e(B" ],
+ ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
+ ["2022-jp2", "\x{C0}" . macron . "\x{C1}",
+ "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
+# type-X
+ ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
+ ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
+ ["euc-jp-0212", macron,
+ "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
+# type-H
+ [ $hz, hiragana, "~{". '$"$$$&$($*' . "~}" ],
+ [ $hz, hiragana, "~{". '$"$$' ."~\cJ". '$&$($*' . "~}" ],
+);
+
plan test => $n*@encodings + $n*@encodings*@greek
- + $n*@encodings*@ideodigit + $num_esc + $n + @hz_txt;
+ + $n*@encodings*@ideodigit + $num_esc + $n + @hz_txt + @ary_buff;
foreach my $enc (@encodings)
{
@@ -189,3 +222,33 @@ foreach my $enc (@encodings)
}
}
}
+
+for my $ary (@ary_buff) {
+ my $NG = 0;
+ my $enc = $ary->[0];
+ for my $n ( int(BUFSIZ/2) .. 2*BUFSIZ+4 ){
+ my $dst = "a"x$n. $ary->[1] . TAIL;
+ my $src = "a"x$n. $ary->[2] . TAIL;
+ my $utf = buff_decode($enc, $src);
+ $NG++ unless $dst eq $utf;
+ }
+ ok($NG, 0, "$enc mangled translating to Unicode");
+}
+
+sub buff_decode {
+ my($enc, $str) = @_;
+ my $utf8 = '';
+ my $inconv = '';
+ while(length $str){
+ my $buff = $inconv.substr($str,0,BUFSIZ - length $inconv,'');
+ my $decoded = decode($enc, $buff, YES);
+ if(length $decoded){
+ $utf8 .= $decoded;
+ $inconv = $buff;
+ } else {
+ last; # malformed?
+ }
+ }
+ return $utf8;
+}
+