diff options
-rw-r--r-- | MANIFEST | 4 | ||||
-rw-r--r-- | ext/Encode/Encode/euc-jp-0212.enc | 7 | ||||
-rw-r--r-- | ext/Encode/MANIFEST | 2 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Tcl/Escape.pm | 8 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Tcl/Extended.pm | 171 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Tcl/Table.pm | 2 | ||||
-rw-r--r-- | ext/Encode/t/Tcl.t | 27 |
7 files changed, 7 insertions, 214 deletions
@@ -289,8 +289,7 @@ ext/Encode/Encode/cp950.enc Encode table ext/Encode/Encode/dingbats.enc Encode table ext/Encode/Encode/dingbats.ucm Encode table ext/Encode/Encode/euc-cn.enc Encode table -ext/Encode/Encode/euc-jp+0212.ucm Encode extension -ext/Encode/Encode/euc-jp-0212.enc Encode table +ext/Encode/Encode/euc-jp+0212.ucm Encode table ext/Encode/Encode/euc-jp.enc Encode table ext/Encode/Encode/euc-jp.ucm Encode table ext/Encode/Encode/euc-kr.enc Encode table @@ -349,7 +348,6 @@ ext/Encode/lib/Encode/JP/ISO_2022_JP.pm Encode extension ext/Encode/lib/Encode/JP/JIS.pm Encode extension ext/Encode/lib/Encode/Tcl.pm Encode extension ext/Encode/lib/Encode/Tcl/Escape.pm Encode extension -ext/Encode/lib/Encode/Tcl/Extended.pm Encode extension ext/Encode/lib/Encode/Tcl/Table.pm Encode extension ext/Encode/lib/Encode/ucs2_le.pm Encode extension ext/Encode/lib/Encode/Unicode.pm Encode extension diff --git a/ext/Encode/Encode/euc-jp-0212.enc b/ext/Encode/Encode/euc-jp-0212.enc deleted file mode 100644 index 23d7325e9b..0000000000 --- a/ext/Encode/Encode/euc-jp-0212.enc +++ /dev/null @@ -1,7 +0,0 @@ -# Encoding file: euc-jp-0212, extended -X -name euc-jp-0212 -ascii {} -jis0208 >{} -7bit-kana >\x8e -jis0212 >\x8f diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index 9c79175341..71753735e0 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -88,7 +88,6 @@ Encode/cp950.enc Encode/dingbats.enc Encode/dingbats.ucm Encode/euc-cn.enc -Encode/euc-jp-0212.enc Encode/euc-jp.enc Encode/euc-jp.ucm Encode/euc-jp+0212.ucm @@ -155,7 +154,6 @@ lib/Encode/JP/ISO_2022_JP.pm lib/Encode/JP/JIS.pm lib/Encode/Tcl.pm lib/Encode/Tcl/Escape.pm -lib/Encode/Tcl/Extended.pm lib/Encode/Tcl/Table.pm lib/Encode/Unicode.pm lib/Encode/XS.pm diff --git a/ext/Encode/lib/Encode/Tcl/Escape.pm b/ext/Encode/lib/Encode/Tcl/Escape.pm index b6908b6476..1571b8cfcc 100644 --- a/ext/Encode/lib/Encode/Tcl/Escape.pm +++ b/ext/Encode/lib/Encode/Tcl/Escape.pm @@ -32,7 +32,7 @@ sub read $val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported) $val =~ /[\x40-\x5F]$/ ? 2 : # double byte $val =~ /[\x60-\x6F]$/ ? 3 : # triple byte - $val =~ /[\x70-\x7F]$/ ? 4 : + $val =~ /[\x70-\x7E]$/ ? 4 : # 4 or more (only 4 is supported) croak("odd sequence is defined"); @@ -97,8 +97,8 @@ sub decode } else { - # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped. - $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//; + # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped. + $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//; if ($chk && ! length $str) { $str = "\e$1"; # split sequence @@ -216,7 +216,7 @@ and the following: SINGLE SHIFT TWO (SS2) ESC 04/14 SINGLE SHIFT THREE (SS3) ESC 04/15 -Designation of control character sets are not supported. +Designation of control character sets is not supported. =head1 SEE ALSO diff --git a/ext/Encode/lib/Encode/Tcl/Extended.pm b/ext/Encode/lib/Encode/Tcl/Extended.pm deleted file mode 100644 index f4d669b336..0000000000 --- a/ext/Encode/lib/Encode/Tcl/Extended.pm +++ /dev/null @@ -1,171 +0,0 @@ -package Encode::Tcl::Extended; -use strict; -our $VERSION = do { my @r = (q$Revision: 0.90 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; - -use base 'Encode::Encoding'; - -use Carp; - -sub read -{ - my ($obj,$fh,$name) = @_; - my(%tbl, $enc, %ssc, @key); - while (<$fh>) - { - next unless /^(\S+)\s+(.*)$/; - my ($key,$val) = ($1,$2); - $val =~ s/\{(.*?)\}/$1/; - $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - - if($enc = Encode->getEncoding($key)) - { - push @key, $val; - $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; - $ssc{$val} = substr($val,1) if $val =~ /^>/; - } - else - { - $obj->{$key} = $val; - } - } - $obj->{'SSC'} = \%ssc; # single shift char - $obj->{'Tbl'} = \%tbl; # encoding tables - $obj->{'Key'} = \@key; # keys of table hash - return $obj; -} - -sub decode -{ - my ($obj,$str,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $ssc = $obj->{'SSC'}; - my $cur = ''; # current state - my $uni; - while (length($str)) - { - my $cc = substr($str,0,1,''); - my $ch = ord($cc); - if(!$cur && $ch > 0x7F) - { - $cur = '>'; - $cur .= $cc, next if $ssc->{$cur.$cc}; - } - $ch ^= 0x80 if $cur; - - if(ref($tbl->{$cur}) ne 'Encode::Tcl::Table') - { - $uni .= $tbl->{$cur}->decode($cc); - $cur = ''; - next; - } - my $rep = $tbl->{$cur}->{'Rep'}; - my $touni = $tbl->{$cur}->{'ToUni'}; - my $x; - if (&$rep($ch) eq 'C') - { - $x = $touni->[0][$ch]; - } - else - { - if(! length $str) - { - $str = $cc; # split leading byte - last; - } - my $c2 = substr($str,0,1,''); - $cc .= $c2; - $x = $touni->[$ch][0x80 ^ ord($c2)]; - } - unless (defined $x) - { - Encode::Tcl::no_map_in_decode($name, $cc.$str); - } - $uni .= $x; - $cur = ''; - } - if($chk) - { - $cur =~ s/>//; - $_[1] = $cur ne '' ? $cur.$str : $str; - } - return $uni; -} - -sub encode -{ - my ($obj,$uni,$chk) = @_; - my $name = $obj->{'Name'}; - my $tbl = $obj->{'Tbl'}; - my $ssc = $obj->{'SSC'}; - my $key = $obj->{'Key'}; - my $str; - my $cur; - - while (length($uni)) - { - my $ch = substr($uni,0,1,''); - my $x; - foreach my $k (@$key) - { - $x = ref($tbl->{$k}) ne 'Encode::Tcl::Table' - ? $k =~ /^>/ - ? $tbl->{$k}->encode(chr(0x80 ^ ord $ch),1) - : $tbl->{$k}->encode($ch,1) - : $tbl->{$k}->{FmUni}->{$ch}; - $cur = $k, last if defined $x; - } - unless (defined $x) - { - unless($chk) - { - Encode::Tcl::no_map_in_encode(ord($ch), $name) - } - return undef; - } - if(ref($tbl->{$cur}) eq 'Encode::Tcl::Table') - { - my $def = $tbl->{$cur}->{'Def'}; - my $rep = $tbl->{$cur}->{'Rep'}; - my $r = &$rep($x); - $x = pack($r, - $cur =~ /^>/ - ? $r eq 'C' ? 0x80 ^ $x : 0x8080 ^ $x - : $x); - } - $str .= $ssc->{$cur} if defined $ssc->{$cur}; - $str .= $x; - } - $_[1] = $uni if $chk; - return $str; -} -1; -__END__ - -=head1 NAME - -Encode::Tcl::Extended - Tcl EUC encodings - -=head1 SYNOPSIS - -none - -=head1 DESCRIPTION - -This module is used internally by Encode::Tcl -and handles type X of Tcl encodings (a Perl extenstion). - -Only F<euc-jp-0212.enc> belongs to type X. -This is a variant of EUC-JP with JIS X 0212 in G3. -If another Encode:: module would support the above encoding, -this module should be removed. - -=head1 SEE ALSO - -L<Encode> - -L<Encode::Tcl> - -L<Encode::JP> - -=cut diff --git a/ext/Encode/lib/Encode/Tcl/Table.pm b/ext/Encode/lib/Encode/Tcl/Table.pm index e849e288a4..26a7a10128 100644 --- a/ext/Encode/lib/Encode/Tcl/Table.pm +++ b/ext/Encode/lib/Encode/Tcl/Table.pm @@ -152,7 +152,7 @@ This module is used internally by Encode::Tcl and handles types S, D, and M of Tcl encodings. Implementation for type M is restricted to encodings -in which bytes per a character is up to 2. +in which the number of bytes per a character is up to 2. =head1 SEE ALSO diff --git a/ext/Encode/t/Tcl.t b/ext/Encode/t/Tcl.t index 294bedef5f..96dc2141f4 100644 --- a/ext/Encode/t/Tcl.t +++ b/ext/Encode/t/Tcl.t @@ -15,12 +15,11 @@ use Test; use Encode qw(encode decode); use Encode::Tcl; -my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK +my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK my $n = 2; my %greek = ( 'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8], - 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8], 'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8], 'big5' => [0xA344..0xA35B,0xA35C..0xA373], 'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6], @@ -37,7 +36,6 @@ my @greek = qw( my %ideodigit = ( # cjk ideograph 'one' to 'ten' 'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)], - 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)], 'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)], 'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)], 'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)], @@ -45,23 +43,9 @@ my %ideodigit = ( # cjk ideograph 'one' to 'ten' ); my @ideodigit = qw(one two three four five six seven eight nine ten); -my $jis = '7bit-jis'; my $kr = '2022-kr'; my %esc_str; -$esc_str{$jis} = {qw( - 1b24422422242424262428242a1b2842 - 3042304430463048304a - 1b284931323334355d1b2842 - ff71ff72ff73ff74ff75ff9d - 1b2442467c4b5c1b2842 - 65e5672c - 3132331b244234413b7a1b28425065726c - 0031003200336f225b57005000650072006c - 546573740a1b24422546253925481b28420a - 0054006500730074000a30c630b930c8000a -)}; - $esc_str{$kr} = {qw( 1b2429430e2a22213e0f410d0a 304200b10041000d000a @@ -84,24 +68,15 @@ use constant YES => 1; my @ary_buff = ( # [ encoding, decoded, encoded ] # type-M ["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ], - ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ], - ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ], ["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ], ["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ], ["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ], # type-E ["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ], - ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ], ["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ], -# [ $jis, han_kana, "\e\(I".'12345'."\e(B" ], ["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"], ["2022-jp2", "\x{C0}" . macron . "\x{C1}", "\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"], -# type-X - ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ], - ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ], - ["euc-jp-0212", macron, - "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ], ); plan test => $n*@encodings + $n*@encodings*@greek |