summaryrefslogtreecommitdiff
path: root/ext/Encode
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-05-10 18:59:29 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-05-10 18:59:29 +0000
commitbedba6814834d84c03e3c8711e154e5c1e84209c (patch)
treedc9f5676a52ebf21202212d26979d881509a7f07 /ext/Encode
parent11785058c0b08a3960f7342e133e44fbc54cea1e (diff)
downloadperl-bedba6814834d84c03e3c8711e154e5c1e84209c.tar.gz
Upgrade to Encode 1.94.
p4raw-id: //depot/perl@19477
Diffstat (limited to 'ext/Encode')
-rw-r--r--ext/Encode/AUTHORS3
-rw-r--r--ext/Encode/Changes38
-rw-r--r--ext/Encode/Encode.pm4
-rw-r--r--ext/Encode/MANIFEST1
-rw-r--r--ext/Encode/bin/piconv124
-rw-r--r--ext/Encode/lib/Encode/MIME/Header.pm26
-rw-r--r--ext/Encode/t/enc_module.t6
-rw-r--r--ext/Encode/t/mime-header.t10
8 files changed, 155 insertions, 57 deletions
diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS
index f921fd5c08..b565a0f492 100644
--- a/ext/Encode/AUTHORS
+++ b/ext/Encode/AUTHORS
@@ -13,11 +13,11 @@ Andreas J. Koenig <andreas.koenig@anima.de>
Anton Tagunov <tagunov@motor.ru>
Autrijus Tang <autrijus@autrijus.org>
Benjamin Goldberg <goldbb2@earthlink.net>
+Bjoern Jacke <debianbugs@j3e.de>
Chris Nandor <pudge@pobox.com>
Craig A. Berry <craigberry@mac.com>
Dan Kogai <dankogai@dan.co.jp>
Elizabeth Mattijsen <liz@dijkmat.nl>
-Enache Adrian <enache@rdslink.ro>
Gerrit P. Haase <gp@familiehaase.de>
Graham Barr <gbarr@pobox.com>
Gurusamy Sarathy <gsar@activestate.com>
@@ -39,6 +39,7 @@ Robin Barker <rmb1@cise.npl.co.uk>
SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
SUGAWARA Hajime <sugawara@hdt.co.jp>
SUZUKI Norio <ZAP00217@nifty.com>
+Simon Cozens <simon@netthink.co.uk>
Spider Boardman <spider@web.zk3.dec.com>
Tatsuhiko Miyagawa <miyagawa@edge.co.jp>
Vadim Konovalov <vkonovalov@peterstar.ru>
diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index 8d7a054c9c..1e68f4374b 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,8 +1,42 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.93 2003/04/24 17:43:16 dankogai Exp $
+# $Id: Changes,v 1.94 2003/05/10 18:13:59 dankogai Exp $
#
-$Revision: 1.93 $ $Date: 2003/04/24 17:43:16 $
+$Revision: 1.94 $ $Date: 2003/05/10 18:13:59 $
+! lib/Encode/MIME/Header.pm
+ A more sophisticated solution for double-encoding by dankogai
+! lib/Encode/MIME/Header.pm AUTHORS
+ Two bugs fixed by Bjoern Jacke
+ * "Double Encoding" was not possible
+ i.e. encode("MIME-B" => "=?UTF-8?B?w4RwZmVs?=")
+ * encode("MIME-Q") had UTF-8 flag on
+ Message-Id: <rt-22166-57077.2.12980078979811@bugs6.perl.org>
+! lib/Encode/MIME/Header.pm AUTHORS
+ Two occurances of "croak ()" fixed as "croak qq()".
+ Simon Cozens is added to AUTHORS as a result.
+ Message-Id: <20030509103708.GA30664@deep-dark-truthful-mirror.pad>
+! bin/piconv
+ POD fixes that reflect enhancements by jhi
+! bin/piconv
+ Two enhancements by jhi.
+ + Now uses Getopt::Long so it accepts long name options
+ (--from for -f, for example)
+ + New option: -r,--resolve
+ Message-Id: <20030505114149.GA227075@kosh.hut.fi>
+! MANIFEST META.yml
+ META.yml added upon request of Schwern
+ Message-Id: <F3B0BD2C-7BCB-11D7-A488-000393AE4244@dan.co.jp>
+! AUTHORS
+ Enache Adrian removed upon request -- to live longer than Encode
+ and/or FreeBSD (toy-)?thread :)
+ Message-Id: <20030425015701.GA2069@ratsnest.hole>
+! t/enc_module.t
+ "close STDOUT unless $^O eq 'freebsd';" once again relocated
+ to keep VMS happy in which case "$^O eq 'freebsd'" is required
+ to keep FreeBSD+thread happy. Sigh.
+ Message-Id: <3EA88ADC.3000300@mac.com>
+
+1.93 2003/04/24 17:43:16
! t/enc_eucjp.t
added "no warnings 'pack'" in for loop to keep bleedperl from
complaining "Character in 'C' format wrapped in pack".
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 45d134b7e0..e9dead4a1d 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,9 +1,9 @@
#
-# $Id: Encode.pm,v 1.93 2003/04/24 17:44:00 dankogai Exp $
+# $Id: Encode.pm,v 1.94 2003/05/10 18:14:36 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.93 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.94 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST
index d46dea183b..86aaea7fca 100644
--- a/ext/Encode/MANIFEST
+++ b/ext/Encode/MANIFEST
@@ -20,6 +20,7 @@ JP/Makefile.PL Encode extension
KR/KR.pm Encode extension
KR/Makefile.PL Encode extension
MANIFEST Encode extension
+META.yml Module meta-data in YAML
Makefile.PL Encode extension makefile writer
README Encode extension
Symbol/Makefile.PL Encode extension
diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv
index fb1d7d63e9..b25b0b56df 100644
--- a/ext/Encode/bin/piconv
+++ b/ext/Encode/bin/piconv
@@ -1,5 +1,5 @@
#!./perl
-# $Id: piconv,v 1.25 2002/06/01 18:07:49 dankogai Exp $
+# $Id: piconv,v 1.26 2003/05/10 18:13:59 dankogai Exp $
#
use 5.8.0;
use strict;
@@ -7,21 +7,42 @@ use Encode ;
use Encode::Alias;
my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
-use Getopt::Std;
-
-my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt);
-$Opt{h} and help();
-$Opt{l} and list_encodings();
+use File::Basename;
+my $name = basename($0);
+
+use Getopt::Long;
+
+my %Opt;
+
+help()
+ unless
+ GetOptions(\%Opt,
+ 'from|f=s',
+ 'to|t=s',
+ 'list|l',
+ 'string|s=s',
+ 'check|C=i',
+ 'c',
+ 'perlqq|p',
+ 'debug|D',
+ 'scheme|S=s',
+ 'resolve|r=s',
+ 'help',
+ );
+
+$Opt{help} and help();
+$Opt{list} and list_encodings();
my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
-$Opt{f} || $Opt{t} || help();
-my $from = $Opt{f} || $locale or help("from_encoding unspecified");
-my $to = $Opt{t} || $locale or help("to_encoding unspecified");
-$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
-my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to';
-$Opt{C} ||= $Opt{c};
-$Opt{p} and $Opt{C} = Encode::FB_PERLQQ;
-
-if ($Opt{D}){
+defined $Opt{resolve} and resolve_encoding($Opt{resolve});
+$Opt{from} || $Opt{to} || help();
+my $from = $Opt{from} || $locale or help("from_encoding unspecified");
+my $to = $Opt{to} || $locale or help("to_encoding unspecified");
+$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
+my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
+$Opt{check} ||= $Opt{c};
+$Opt{p} and $Opt{check} = Encode::FB_PERLQQ;
+
+if ($Opt{debug}){
my $cfrom = Encode->getEncoding($from)->name;
my $cto = Encode->getEncoding($to)->name;
print <<"EOT";
@@ -34,12 +55,12 @@ EOT
# default
if ($scheme eq 'from_to'){
while(<>){
- Encode::from_to($_, $from, $to, $Opt{C}); print;
+ Encode::from_to($_, $from, $to, $Opt{check}); print;
};
# step-by-step
}elsif ($scheme eq 'decode_encode'){
while(<>){
- my $decoded = decode($from, $_, $Opt{C});
+ my $decoded = decode($from, $_, $Opt{check});
my $encoded = encode($to, $decoded);
print $encoded;
};
@@ -48,27 +69,46 @@ if ($scheme eq 'from_to'){
binmode(STDIN, ":encoding($from)");
binmode(STDOUT, ":encoding($to)");
while(<>){ print; }
-}else{ # won't reach
- die "unknown scheme: $scheme";
+} else { # won't reach
+ die "$name: unknown scheme: $scheme";
}
sub list_encodings{
print join("\n", Encode->encodings(":all")), "\n";
- exit;
+ exit 0;
+}
+
+sub resolve_encoding {
+ if (my $alias = Encode::resolve_alias($_[0])) {
+ print $alias, "\n";
+ exit 0;
+ } else {
+ warn "$name: $_[0] is not known to Encode\n";
+ exit 1;
+ }
}
sub help{
my $message = shift;
- use File::Basename;
- my $name = basename($0);
$message and print STDERR "$name error: $message\n";
print STDERR <<"EOT";
$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
$name -l
- -l lists all available encodings (the canonical names, many aliases exist)
- -f from_encoding When omitted, the current locale will be used.
- -t to_encoding When omitted, the current locale will be used.
- -s string "string" will be converted instead of STDIN.
+$name -r encoding_alias
+ -l,--list
+ lists all available encodings
+ -r,--resolve encoding_alias
+ resolve encoding to its (Encode) canonical name
+ -f,--from from_encoding
+ when omitted, the current locale will be used
+ -t,--to to_encoding
+ when omitted, the current locale will be used
+ -s,--string string
+ "string" will be the input instead of STDIN or files
+The following are mainly of interest to Encode hackers:
+ -D,--debug show debug information
+ -C N | -c | -p check the validity of the input
+ -S,--scheme scheme use the scheme for conversion
EOT
exit;
}
@@ -83,6 +123,11 @@ piconv -- iconv(1), reinvented in perl
piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
piconv -l
+ piconv [-C N|-c|-p]
+ piconv -S scheme ...
+ piconv -r encoding
+ piconv -D ...
+ piconv -h
=head1 DESCRIPTION
@@ -94,16 +139,17 @@ place of iconv for virtually any case.
piconv converts the character encoding of either STDIN or files
specified in the argument and prints out to STDOUT.
-Here is the list of options.
+Here is the list of options. Each option can be in short format (-f)
+or long (--from).
=over 4
-=item -f from_encoding
+=item -f,--from from_encoding
Specifies the encoding you are converting from. Unlike B<iconv>,
this option can be omitted. In such cases, the current locale is used.
-=item -t to_encoding
+=item -t,--to to_encoding
Specifies the encoding you are converting to. Unlike B<iconv>,
this option can be omitted. In such cases, the current locale is used.
@@ -111,11 +157,11 @@ this option can be omitted. In such cases, the current locale is used.
Therefore, when both -f and -t are omitted, B<piconv> just acts
like B<cat>.
-=item -s I<string>
+=item -s,--string I<string>
-uses I<string> instead of file for the source of text. Same as B<iconv>.
+uses I<string> instead of file for the source of text.
-=item -l
+=item -l,--list
Lists all available encodings, one per line, in case-insensitive
order. Note that only the canonical names are listed; many aliases
@@ -124,7 +170,7 @@ and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
for a full discussion.
-=item -C I<N>
+=item -C,--check I<N>
Check the validity of the stream if I<N> = 1. When I<N> = -1, something
interesting happens when it encounters an invalid character.
@@ -133,19 +179,19 @@ interesting happens when it encounters an invalid character.
Same as C<-C 1>.
-=item -p
+=item -p,--perlqq
Same as C<-C -1>.
-=item -h
+=item -h,--help
Show usage.
-=item -D
+=item -D,--debug
Invokes debugging mode. Primarily for Encode hackers.
-=item -S scheme
+=item -S,--scheme scheme
Selects which scheme is to be used for conversion. Available schemes
are as follows:
@@ -173,8 +219,8 @@ Like the I<-D> option, this is also for Encode hackers.
=head1 SEE ALSO
-L<iconv(1)>
-L<locale(3)>
+L<iconv/1>
+L<locale/3>
L<Encode>
L<Encode::Supported>
L<Encode::Alias>
diff --git a/ext/Encode/lib/Encode/MIME/Header.pm b/ext/Encode/lib/Encode/MIME/Header.pm
index fb4fdd9585..447951b17e 100644
--- a/ext/Encode/lib/Encode/MIME/Header.pm
+++ b/ext/Encode/lib/Encode/MIME/Header.pm
@@ -1,9 +1,8 @@
package Encode::MIME::Header;
use strict;
# use warnings;
-our $VERSION = do { my @r = (q$Revision: 1.7 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use Encode qw(find_encoding encode_utf8);
+our $VERSION = do { my @r = (q$Revision: 1.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+use Encode qw(find_encoding encode_utf8 decode_utf8);
use MIME::Base64;
use Carp;
@@ -72,7 +71,7 @@ sub decode($$;$){
sub decode_b{
my $enc = shift;
- my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+ my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
my $db64 = decode_base64(shift);
return $d->name eq 'utf8' ?
Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
@@ -80,7 +79,7 @@ sub decode_b{
sub decode_q{
my ($enc, $q) = @_;
- my $d = find_encoding($enc) or croak(Unknown encoding "$enc");
+ my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
$q =~ s/_/ /go;
$q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
return $d->name eq 'utf8' ?
@@ -92,7 +91,18 @@ my $especials =
map {quotemeta(chr($_))}
unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
-my $re_especials = qr/$especials/o;
+my $re_encoded_word =
+ qr{
+ (?:
+ =\? # begin encoded word
+ (?:[0-9A-Za-z\-_]+) # charset (encoding)
+ \?(?:[QqBb])\? # delimiter
+ (?:.*?) # Base64-encodede contents
+ \?= # end encoded word
+ )
+ }xo;
+
+my $re_especials = qr{$re_encoded_word|$especials}xo;
sub encode($$;$){
my ($obj, $str, $chk) = @_;
@@ -100,7 +110,7 @@ sub encode($$;$){
for my $line (split /\r|\n|\r\n/o, $str){
my (@word, @subline);
for my $word (split /($re_especials)/o, $line){
- if ($word =~ /[^\x00-\x7f]/o){
+ if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
push @word, $obj->_encode($word);
}else{
push @word, $word;
@@ -158,7 +168,7 @@ sub _encode_q{
}{
join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
}egox;
- return HEAD . 'Q?' . $chunk . TAIL;
+ return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
}
1;
diff --git a/ext/Encode/t/enc_module.t b/ext/Encode/t/enc_module.t
index d444f40a15..d6d9e7e085 100644
--- a/ext/Encode/t/enc_module.t
+++ b/ext/Encode/t/enc_module.t
@@ -1,4 +1,4 @@
-# $Id: enc_module.t,v 1.5 2003/04/24 17:43:16 dankogai Exp $
+# $Id: enc_module.t,v 1.6 2003/05/10 18:13:59 dankogai Exp $
# This file is in euc-jp
BEGIN {
require Config; import Config;
@@ -41,9 +41,9 @@ print $obj->str, "\n";
$obj->set("テスト文字列");
print $obj->str, "\n";
-# I have tested and found "unless $^O eq 'freebsd'" is not
-# necessary but I will leave it for the sake of Enache -- dankogai
# Please do not move this to a point after the comparison -- Craig Berry
+# and "unless $^O eq 'freebsd'" is needed for FreeBSD (toy-)?thread
+# -- dankogai
close STDOUT unless $^O eq 'freebsd';
my $cmp = compare_text($file0, $file1);
diff --git a/ext/Encode/t/mime-header.t b/ext/Encode/t/mime-header.t
index 4c84c4fac8..81d6ec8dc0 100644
--- a/ext/Encode/t/mime-header.t
+++ b/ext/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-header.t,v 1.6 2002/10/21 19:47:47 dankogai Exp $
+# $Id: mime-header.t,v 1.7 2003/05/10 18:13:59 dankogai Exp $
# This script is written in utf8
#
BEGIN {
@@ -23,7 +23,7 @@ no utf8;
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 7;
+use Test::More tests => 9;
use_ok("Encode::MIME::Header");
my $eheader =<<'EOS';
@@ -91,4 +91,10 @@ is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B");
is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");
+
+$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
+$bheader = "What is =?UTF-8?B?PT9VVEYtOD9CP3c0UndabVZzPz0=?= ?";
+$qheader = "What is =?UTF-8?Q?=3D=3FUTF=2D8=3FB=3Fw4RwZmVs=3F=3D?= ?";
+is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
+is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
__END__;