summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-04-30 15:46:38 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-04-30 15:46:38 +0000
commite8c86ba6ca66f86dc4c8f4de0abf70f53c2484f4 (patch)
treeb54d2ad5fa967ab11cff4dfa4bab9361455530ea
parent649c119fadcfc419baf62b96fb8285edabe5ffc6 (diff)
downloadperl-e8c86ba6ca66f86dc4c8f4de0abf70f53c2484f4.tar.gz
Upgrade to Encode 1.65.
p4raw-id: //depot/perl@16282
-rw-r--r--ext/Encode/Changes18
-rw-r--r--ext/Encode/Encode.pm11
-rw-r--r--ext/Encode/bin/ucm2table6
-rw-r--r--ext/Encode/lib/Encode/Guess.pm3
-rw-r--r--ext/Encode/lib/Encode/Supported.pod42
-rw-r--r--ext/Encode/ucm/euc-jp.ucm2
6 files changed, 66 insertions, 16 deletions
diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index 75d9208550..595595e2eb 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,9 +1,21 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.64 2002/04/29 06:54:06 dankogai Exp $
+# $Id: Changes,v 1.65 2002/04/30 16:13:37 dankogai Exp dankogai $
#
-$Revision: 1.64 $ $Date: 2002/04/29 06:54:06 $
+$Revision: 1.65 $ $Date: 2002/04/30 16:13:37 $
+! Encode.pm
+ encode(undef) no longer warns for C<Use of uninitialized value in
+ subroutine entry>. Suggested by Paul.
+ Message-Id: <AIEAJICLCBDNAAOLLOKLMEEEEJAA.Paul.Marquess@ntlworld.com>
+! lib/Encode/Supported.pod
+ Encode::MIME::Header and Encode::Guess mentioned
+ Updated for Encode::HanExtra 0.05 and Encode::JIS2K
+! lib/Encode/Guess.pm
+ POD fix by Miyagawa-kun
+ Message-Id: <86k7qqx8p7.wl@mail.edge.co.jp>
+
+1.64 2002/04/29 06:54:06
! ucm/euc-jp.ucm
Now decodes euc-jisx0213 also. CAVEAT: encode("euc-jp"...) and
encocde("euc-jisx0213") are still DIFFERENT.
@@ -541,7 +553,7 @@ $Revision: 1.64 $ $Date: 2002/04/29 06:54:06 $
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/29 06:54:06 $
+1.11 $Date: 2002/04/30 16:13:37 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 45a66f65f9..4b0b1fe48d 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,9 +1,9 @@
#
-# $Id: Encode.pm,v 1.64 2002/04/29 06:54:06 dankogai Exp $
+# $Id: Encode.pm,v 1.65 2002/04/30 16:13:37 dankogai Exp dankogai $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.64 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.65 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
@@ -130,7 +130,8 @@ sub resolve_alias {
sub encode($$;$)
{
- my ($name,$string,$check) = @_;
+ my ($name, $string, $check) = @_;
+ defined $string or return;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
@@ -145,6 +146,7 @@ sub encode($$;$)
sub decode($$;$)
{
my ($name,$octets,$check) = @_;
+ defined $octets or return;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
@@ -159,6 +161,7 @@ sub decode($$;$)
sub from_to($$$;$)
{
my ($string,$from,$to,$check) = @_;
+ defined $string or return;
$check ||=0;
my $f = find_encoding($from);
unless (defined $f){
@@ -180,6 +183,7 @@ sub from_to($$$;$)
sub encode_utf8($)
{
my ($str) = @_;
+ defined $str or return;
utf8::encode($str);
return $str;
}
@@ -187,6 +191,7 @@ sub encode_utf8($)
sub decode_utf8($)
{
my ($str) = @_;
+ defined $str or return;
return undef unless utf8::decode($str);
return $str;
}
diff --git a/ext/Encode/bin/ucm2table b/ext/Encode/bin/ucm2table
index ab44573993..094ebe0583 100644
--- a/ext/Encode/bin/ucm2table
+++ b/ext/Encode/bin/ucm2table
@@ -1,5 +1,5 @@
#!/usr/bin/perl
-# $Id: ucm2table,v 1.1 2002/04/22 23:57:10 dankogai Exp $
+# $Id: ucm2table,v 1.2 2002/04/30 16:13:37 dankogai Exp dankogai $
#
use 5.006;
@@ -13,7 +13,7 @@ my $Hex = '[0-9A-Fa-f]';
while(<>){
chomp;
my ($uni, $enc, $fb) =
- /^<U($Hex+)>\s+(\S+)\s+\|(\d)$/o or next;
+ /^<U($Hex+)>\s+(\S+)\s+\|(\d)/o or next;
$fb eq '0' or next;
my @byte = ();
my $ord = 0;
@@ -32,7 +32,7 @@ while(<>){
my $start = $Opt{a} ? 0x20 : 0xa0;
-for (my $x = $start; $x <= 0xffff; $ x+= 32) {
+for (my $x = $start; $x <= 0xffff; $x += 32) {
my $line = '';
for my $i (0..31){
my $num = $x+$i; $num eq 0x7f and next; # skip delete
diff --git a/ext/Encode/lib/Encode/Guess.pm b/ext/Encode/lib/Encode/Guess.pm
index 1efa5cda22..35cc1e13f5 100644
--- a/ext/Encode/lib/Encode/Guess.pm
+++ b/ext/Encode/lib/Encode/Guess.pm
@@ -2,7 +2,7 @@ package Encode::Guess;
use strict;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
my $Canon = 'Guess';
our $DEBUG = 0;
@@ -133,6 +133,7 @@ Encode::Guess -- Guesses encoding from data
# if you are sure $data won't contain anything bogus
+ use Encode;
use Encode::Guess qw/euc-jp shiftjis 7bit-jis/;
my $utf8 = decode("Guess", $data);
my $data = encode("Guess", $utf8); # this doesn't work!
diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod
index 806f85b948..8cdcec1489 100644
--- a/ext/Encode/lib/Encode/Supported.pod
+++ b/ext/Encode/lib/Encode/Supported.pod
@@ -202,7 +202,6 @@ to 'CN', continental China, while traditional Chinese is mapped to
euc-jp
shiftjis cp932 macJapanese
7bit-jis
- euc-jp
iso-2022-jp [RFC1468]
iso-2022-jp-1 [RFC2237]
jis0201-raw { JIS X 0201 (roman + halfwidth kana) without CES }
@@ -239,9 +238,25 @@ distributed separately on CPAN, under the name Encode::HanExtra.
Standard DOS/Win Macintosh Comment/Reference
----------------------------------------------------------------
- gb18030
- euc-tw
- big5plus
+ big5ext CMEX's Big5e Extension
+ big5plus CMEX's Big5+ Extension
+ cccii Chinese Character Code for Information Interchange
+ euc-tw EUC (Extended Unix Character)
+ gb18030 GBK with Traditional Characters
+ ----------------------------------------------------------------
+
+=item Encode::JIS2K -- JIS X 0213 encodings via CPAN
+
+Due to size concerns, additional Japanese encodings below are
+distributed separately on CPAN, under the name Encode::JIS2K.
+
+ Standard DOS/Win Macintosh Comment/Reference
+ ----------------------------------------------------------------
+ euc-jisx0213
+ shiftjisx0123
+ iso-2022-jp-3
+ jis0213-1-raw
+ jis0213-2-raw
----------------------------------------------------------------
=back
@@ -275,6 +290,23 @@ For symbols and dingbats.
AdobeSymbol
----------------------------------------------------------------
+=item Encode::MIME::Header
+
+Strictly speaking, MIME header encoding documented in RFC 2047 is more
+of encapsulation than encoding. But included anyway.
+
+ ----------------------------------------------------------------
+ MIME-Header [RFC2047]
+ MIME-B [RFC2047]
+ MIME-Q [RFC2047]
+ ----------------------------------------------------------------
+
+=item Encode::Guess
+
+This one is not a name of encoding but a utility that lets you pick up
+the most appropriate encoding for a data out of given I<suspects>. See
+L<Encode::Guess> for details.
+
=back
=head1 Unsupported encodings
@@ -530,7 +562,6 @@ pages!
The rule of thumb is to use C<UTF-8> unless you know what
you're doing and unless you really benefit from using C<UTF-16>.
-
ISO-IR-165 [RFC1345]
VISCII
GB 12345
@@ -701,6 +732,7 @@ L<Encode>,
L<Encode::Byte>,
L<Encode::CN>, L<Encode::JP>, L<Encode::KR>, L<Encode::TW>,
L<Encode::EBCDIC>, L<Encode::Symbol>
+L<Encode::MIME::Header>, L<Encode::Guess>
=head1 References
diff --git a/ext/Encode/ucm/euc-jp.ucm b/ext/Encode/ucm/euc-jp.ucm
index 9479163024..cc1379b375 100644
--- a/ext/Encode/ucm/euc-jp.ucm
+++ b/ext/Encode/ucm/euc-jp.ucm
@@ -1,5 +1,5 @@
#
-# $Id: euc-jp.ucm,v 1.2 2002/04/29 07:01:58 dankogai Exp dankogai $
+# $Id: euc-jp.ucm,v 1.2 2002/04/29 07:01:58 dankogai Exp $
#
<code_set_name> "euc-jp"
<mb_cur_min> 1