Upgrade to Encode 1.94.

p4raw-id: //depot/perl@19477
author: Jarkko Hietaniemi <jhi@iki.fi> 2003-05-10 18:59:29 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2003-05-10 18:59:29 +0000
commit: bedba6814834d84c03e3c8711e154e5c1e84209c (patch)
tree: dc9f5676a52ebf21202212d26979d881509a7f07 /ext/Encode
parent: 11785058c0b08a3960f7342e133e44fbc54cea1e (diff)
download: perl-bedba6814834d84c03e3c8711e154e5c1e84209c.tar.gz
8 files changed, 155 insertions, 57 deletions
diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS
index f921fd5c08..b565a0f492 100644
--- a/ext/Encode/AUTHORS
+++ b/ext/Encode/AUTHORS
@@ -13,11 +13,11 @@ Andreas J. Koenig		<andreas.koenig@anima.de>
 Anton Tagunov			<tagunov@motor.ru>
 Autrijus Tang			<autrijus@autrijus.org>
 Benjamin Goldberg               <goldbb2@earthlink.net>
+Bjoern Jacke			<debianbugs@j3e.de>
 Chris Nandor                    <pudge@pobox.com>
 Craig A. Berry			<craigberry@mac.com>
 Dan Kogai			<dankogai@dan.co.jp>
 Elizabeth Mattijsen             <liz@dijkmat.nl>
-Enache Adrian                   <enache@rdslink.ro>
 Gerrit P. Haase			<gp@familiehaase.de>
 Graham Barr                     <gbarr@pobox.com>
 Gurusamy Sarathy		<gsar@activestate.com>
@@ -39,6 +39,7 @@ Robin Barker                    <rmb1@cise.npl.co.uk>
 SADAHIRO Tomoyuki		<SADAHIRO@cpan.org>
 SUGAWARA Hajime			<sugawara@hdt.co.jp>
 SUZUKI Norio                    <ZAP00217@nifty.com>
+Simon Cozens			<simon@netthink.co.uk>
 Spider Boardman			<spider@web.zk3.dec.com>
 Tatsuhiko Miyagawa		<miyagawa@edge.co.jp>
 Vadim Konovalov			<vkonovalov@peterstar.ru>
diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index 8d7a054c9c..1e68f4374b 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,8 +1,42 @@
 # Revision history for Perl extension Encode.
 #
-# $Id: Changes,v 1.93 2003/04/24 17:43:16 dankogai Exp $
+# $Id: Changes,v 1.94 2003/05/10 18:13:59 dankogai Exp $
 #
-$Revision: 1.93 $ $Date: 2003/04/24 17:43:16 $
+$Revision: 1.94 $ $Date: 2003/05/10 18:13:59 $
+! lib/Encode/MIME/Header.pm
+  A more sophisticated solution for double-encoding by dankogai
+! lib/Encode/MIME/Header.pm AUTHORS
+  Two bugs fixed by Bjoern Jacke
+  * "Double Encoding" was not possible
+    i.e. encode("MIME-B" => "=?UTF-8?B?w4RwZmVs?=")
+  * encode("MIME-Q") had UTF-8 flag on
+  Message-Id: <rt-22166-57077.2.12980078979811@bugs6.perl.org>
+! lib/Encode/MIME/Header.pm AUTHORS
+  Two occurances of "croak ()" fixed as "croak qq()".
+  Simon Cozens is added to AUTHORS as a result.
+  Message-Id: <20030509103708.GA30664@deep-dark-truthful-mirror.pad>
+! bin/piconv
+  POD fixes that reflect enhancements by jhi
+! bin/piconv
+  Two enhancements by jhi.
+  + Now uses Getopt::Long so it accepts long name options 
+   (--from for -f, for example)
+  + New option: -r,--resolve
+  Message-Id: <20030505114149.GA227075@kosh.hut.fi>
+! MANIFEST META.yml
+  META.yml added upon request of Schwern
+  Message-Id: <F3B0BD2C-7BCB-11D7-A488-000393AE4244@dan.co.jp>
+! AUTHORS
+  Enache Adrian removed upon request -- to live longer than Encode
+  and/or FreeBSD (toy-)?thread :)
+  Message-Id: <20030425015701.GA2069@ratsnest.hole>
+! t/enc_module.t
+  "close STDOUT unless $^O eq 'freebsd';" once again relocated
+  to keep VMS happy in which case "$^O eq 'freebsd'" is required
+  to keep FreeBSD+thread happy.  Sigh.
+  Message-Id: <3EA88ADC.3000300@mac.com>
+
+1.93 2003/04/24 17:43:16
 ! t/enc_eucjp.t
   added "no warnings 'pack'" in for loop to keep bleedperl from
   complaining "Character in 'C' format wrapped in pack".
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 45d134b7e0..e9dead4a1d 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,9 +1,9 @@
 #
-# $Id: Encode.pm,v 1.93 2003/04/24 17:44:00 dankogai Exp $
+# $Id: Encode.pm,v 1.94 2003/05/10 18:14:36 dankogai Exp $
 #
 package Encode;
 use strict;
-our $VERSION = do { my @r = (q$Revision: 1.93 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
 our $DEBUG = 0;
 use XSLoader ();
 XSLoader::load(__PACKAGE__, $VERSION);
diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST
index d46dea183b..86aaea7fca 100644
--- a/ext/Encode/MANIFEST
+++ b/ext/Encode/MANIFEST
@@ -20,6 +20,7 @@ JP/Makefile.PL	Encode extension
 KR/KR.pm		Encode extension
 KR/Makefile.PL		Encode extension
 MANIFEST		Encode extension
+META.yml                                Module meta-data in YAML
 Makefile.PL		Encode extension makefile writer
 README		Encode extension
 Symbol/Makefile.PL     Encode extension
diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv
index fb1d7d63e9..b25b0b56df 100644
--- a/ext/Encode/bin/piconv
+++ b/ext/Encode/bin/piconv
@@ -1,5 +1,5 @@
 #!./perl
-# $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp $
+# $Id: piconv,v 1.26 2003/05/10 18:13:59 dankogai Exp $
 #
 use 5.8.0;
 use strict;
@@ -7,21 +7,42 @@ use Encode ;
 use Encode::Alias;
 my %Scheme =  map {$_ => 1} qw(from_to decode_encode perlio);
 
-use Getopt::Std;
-
-my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt);
-$Opt{h} and help();
-$Opt{l} and list_encodings();
+use File::Basename;
+my $name = basename($0);
+
+use Getopt::Long;
+
+my %Opt;
+
+help()
+    unless
+      GetOptions(\%Opt,
+		 'from|f=s',
+		 'to|t=s',
+		 'list|l',
+		 'string|s=s',
+		 'check|C=i',
+		 'c',
+		 'perlqq|p',
+		 'debug|D',
+		 'scheme|S=s',
+		 'resolve|r=s',
+		 'help',
+		 );
+
+$Opt{help} and help();
+$Opt{list} and list_encodings();
 my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
-$Opt{f} || $Opt{t} || help();
-my $from = $Opt{f} || $locale or help("from_encoding unspecified");
-my $to   = $Opt{t} || $locale or help("to_encoding unspecified");
-$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
-my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} :  'from_to';
-$Opt{C} ||= $Opt{c};
-$Opt{p} and $Opt{C} = Encode::FB_PERLQQ;
-
-if ($Opt{D}){
+defined $Opt{resolve} and resolve_encoding($Opt{resolve});
+$Opt{from} || $Opt{to} || help();
+my $from = $Opt{from} || $locale or help("from_encoding unspecified");
+my $to   = $Opt{to}   || $locale or help("to_encoding unspecified");
+$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
+my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} :  'from_to';
+$Opt{check} ||= $Opt{c};
+$Opt{p} and $Opt{check} = Encode::FB_PERLQQ;
+
+if ($Opt{debug}){
     my $cfrom = Encode->getEncoding($from)->name;
     my $cto   = Encode->getEncoding($to)->name;
     print <<"EOT";
@@ -34,12 +55,12 @@ EOT
 # default
 if     ($scheme eq 'from_to'){ 
     while(<>){
-	Encode::from_to($_, $from, $to, $Opt{C}); print;
+	Encode::from_to($_, $from, $to, $Opt{check}); print;
     };
 # step-by-step
 }elsif ($scheme eq 'decode_encode'){
    while(<>){
-       my $decoded = decode($from, $_, $Opt{C});
+       my $decoded = decode($from, $_, $Opt{check});
        my $encoded = encode($to, $decoded);
        print $encoded;
     };
@@ -48,27 +69,46 @@ if     ($scheme eq 'from_to'){
     binmode(STDIN,  ":encoding($from)");
     binmode(STDOUT, ":encoding($to)");
     while(<>){ print; }
-}else{ # won't reach
-    die "unknown scheme: $scheme";
+} else { # won't reach
+    die "$name: unknown scheme: $scheme";
 }
 
 sub list_encodings{
     print join("\n", Encode->encodings(":all")), "\n";
-    exit;
+    exit 0;
+}
+
+sub resolve_encoding {
+    if (my $alias = Encode::resolve_alias($_[0])) {
+	print $alias, "\n";
+	exit 0;
+    } else {
+	warn "$name: $_[0] is not known to Encode\n";
+	exit 1;
+    }
 }
 
 sub help{
     my $message = shift;
-    use File::Basename;
-    my $name = basename($0);
     $message and print STDERR "$name error: $message\n";
     print STDERR <<"EOT";
 $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
 $name -l
-  -l lists all available encodings (the canonical names, many aliases exist)
-  -f from_encoding  When omitted, the current locale will be used.
-  -t to_encoding    When omitted, the current locale will be used.
-  -s string         "string" will be converted instead of STDIN.
+$name -r encoding_alias
+  -l,--list
+     lists all available encodings
+  -r,--resolve encoding_alias
+    resolve encoding to its (Encode) canonical name
+  -f,--from from_encoding  
+     when omitted, the current locale will be used
+  -t,--to to_encoding    
+     when omitted, the current locale will be used
+  -s,--string string         
+     "string" will be the input instead of STDIN or files
+The following are mainly of interest to Encode hackers:
+  -D,--debug          show debug information
+  -C N | -c | -p      check the validity of the input
+  -S,--scheme scheme  use the scheme for conversion
 EOT
   exit;
 }
@@ -83,6 +123,11 @@ piconv -- iconv(1), reinvented in perl
 
   piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
   piconv -l
+  piconv [-C N|-c|-p]
+  piconv -S scheme ...
+  piconv -r encoding
+  piconv -D ...
+  piconv -h
 
 =head1 DESCRIPTION
 
@@ -94,16 +139,17 @@ place of iconv for virtually any case.
 piconv converts the character encoding of either STDIN or files
 specified in the argument and prints out to STDOUT.
 
-Here is the  list of options.
+Here is the list of options.  Each option can be in short format (-f)
+or long (--from).
 
 =over 4
 
-=item -f from_encoding
+=item -f,--from from_encoding
 
 Specifies the encoding you are converting from.  Unlike B<iconv>,
 this option can be omitted.  In such cases, the current locale is used.
 
-=item -t to_encoding
+=item -t,--to to_encoding
 
 Specifies the encoding you are converting to.  Unlike B<iconv>,
 this option can be omitted.  In such cases, the current locale is used.
@@ -111,11 +157,11 @@ this option can be omitted.  In such cases, the current locale is used.
 Therefore, when both -f and -t are omitted, B<piconv> just acts
 like B<cat>.
 
-=item -s I<string>
+=item -s,--string I<string>
 
-uses I<string> instead of file for the source of text.  Same as B<iconv>.
+uses I<string> instead of file for the source of text.
 
-=item -l
+=item -l,--list
 
 Lists all available encodings, one per line, in case-insensitive
 order.  Note that only the canonical names are listed; many aliases
@@ -124,7 +170,7 @@ and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
 instead of "cp850", or "winlatin1" for "cp1252".  See L<Encode::Supported>
 for a full discussion.
 
-=item -C I<N>
+=item -C,--check I<N>
 
 Check the validity of the stream if I<N> = 1.  When I<N> = -1, something
 interesting happens when it encounters an invalid character.
@@ -133,19 +179,19 @@ interesting happens when it encounters an invalid character.
 
 Same as C<-C 1>.
 
-=item -p
+=item -p,--perlqq
 
 Same as C<-C -1>.
 
-=item -h
+=item -h,--help
 
 Show usage.
 
-=item -D
+=item -D,--debug
 
 Invokes debugging mode.  Primarily for Encode hackers.
 
-=item -S scheme
+=item -S,--scheme scheme
 
 Selects which scheme is to be used for conversion.  Available schemes
 are as follows:
@@ -173,8 +219,8 @@ Like the I<-D> option, this is also for Encode hackers.
 
 =head1 SEE ALSO
 
-L<iconv(1)>
-L<locale(3)>
+L<iconv/1>
+L<locale/3>
 L<Encode>
 L<Encode::Supported>
 L<Encode::Alias>
diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm
index fb4fdd9585..447951b17e 100644
--- a/ext/Encode/lib/Encode/MIME/Header.pm
+++ b/ext/Encode/lib/Encode/MIME/Header.pm
@@ -1,9 +1,8 @@
 package Encode::MIME::Header;
 use strict;
 # use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use Encode qw(find_encoding encode_utf8);
+our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use Encode qw(find_encoding encode_utf8 decode_utf8);
 use MIME::Base64;
 use Carp;
 
@@ -72,7 +71,7 @@ sub decode($$;$){
 
 sub decode_b{
     my $enc = shift;
-    my $d = find_encoding($enc)	or croak(Unknown encoding "$enc");
+    my $d = find_encoding($enc)        or croak qq(Unknown encoding "$enc");
     my $db64 = decode_base64(shift);
     return $d->name eq 'utf8' ?
 	Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
@@ -80,7 +79,7 @@ sub decode_b{
 
 sub decode_q{
     my ($enc, $q) = @_;
-    my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+    my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
     $q =~ s/_/ /go;
     $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
     return $d->name eq 'utf8' ? 
@@ -92,7 +91,18 @@ my $especials =
 	 map {quotemeta(chr($_))} 
 	 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
 
-my $re_especials = qr/$especials/o;
+my $re_encoded_word =
+    qr{
+       (?:
+	=\?               # begin encoded word
+	(?:[0-9A-Za-z\-_]+) # charset (encoding)
+	\?(?:[QqBb])\?      # delimiter
+	(?:.*?)             # Base64-encodede contents
+	\?=                 # end encoded word
+       )
+      }xo;
+
+my $re_especials = qr{$re_encoded_word|$especials}xo;
 
 sub encode($$;$){
     my ($obj, $str, $chk) = @_;
@@ -100,7 +110,7 @@ sub encode($$;$){
     for my $line (split /\r|\n|\r\n/o, $str){
 	my (@word, @subline);
         for my $word (split /($re_especials)/o, $line){
-	    if ($word =~ /[^\x00-\x7f]/o){ 
+	    if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
 		push @word, $obj->_encode($word);
 	    }else{
 		push @word, $word;
@@ -158,7 +168,7 @@ sub _encode_q{
 	       }{
 		   join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
 	       }egox;
-    return HEAD . 'Q?' . $chunk . TAIL;
+    return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
 }
 
 1;
diff --git a/ext/Encode/t/enc_module.t b/ext/Encode/t/enc_module.t
index d444f40a15..d6d9e7e085 100644
--- a/ext/Encode/t/enc_module.t
+++ b/ext/Encode/t/enc_module.t
@@ -1,4 +1,4 @@
-# $Id: enc_module.t,v 1.5 2003/04/24 17:43:16 dankogai Exp $
+# $Id: enc_module.t,v 1.6 2003/05/10 18:13:59 dankogai Exp $
 # This file is in euc-jp
 BEGIN {
     require Config; import Config;
@@ -41,9 +41,9 @@ print $obj->str, "\n";
 $obj->set("�ƥ���ʸ����");
 print $obj->str, "\n";
 
-# I have tested and found "unless $^O eq 'freebsd'" is not
-# necessary but I will leave it for the sake of Enache -- dankogai
 # Please do not move this to a point after the comparison -- Craig Berry
+# and "unless $^O eq 'freebsd'" is needed for FreeBSD (toy-)?thread
+# -- dankogai
 close STDOUT unless $^O eq 'freebsd';
 
 my $cmp = compare_text($file0, $file1);
diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t
index 4c84c4fac8..81d6ec8dc0 100644
--- a/ext/Encode/t/mime-header.t
+++ b/ext/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
 #
-# $Id: mime-header.t,v 1.6 2002/10/21 19:47:47 dankogai Exp $
+# $Id: mime-header.t,v 1.7 2003/05/10 18:13:59 dankogai Exp $
 # This script is written in utf8
 #
 BEGIN {
@@ -23,7 +23,7 @@ no utf8;
 
 use strict;
 #use Test::More qw(no_plan);
-use Test::More tests => 7;
+use Test::More tests => 9;
 use_ok("Encode::MIME::Header");
 
 my $eheader =<<'EOS';
@@ -91,4 +91,10 @@ is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B");
 is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
 is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
 is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");
+
+$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
+$bheader = "What is =?UTF-8?B?PT9VVEYtOD9CP3c0UndabVZzPz0=?= ?";
+$qheader = "What is =?UTF-8?Q?=3D=3FUTF=2D8=3FB=3Fw4RwZmVs=3F=3D?= ?";
+is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
+is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
 __END__;
author	Jarkko Hietaniemi <jhi@iki.fi>	2003-05-10 18:59:29 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2003-05-10 18:59:29 +0000
commit	bedba6814834d84c03e3c8711e154e5c1e84209c (patch)
tree	dc9f5676a52ebf21202212d26979d881509a7f07 /ext/Encode
parent	11785058c0b08a3960f7342e133e44fbc54cea1e (diff)
download	perl-bedba6814834d84c03e3c8711e154e5c1e84209c.tar.gz