summaryrefslogtreecommitdiff
path: root/cpan/Encode
diff options
context:
space:
mode:
authorSteve Hay <steve.m.hay@googlemail.com>2016-06-28 08:26:38 +0100
committerSteve Hay <steve.m.hay@googlemail.com>2016-06-28 08:26:38 +0100
commit0dcb562a7727869dc36989700ab4a3e5da7cad3b (patch)
tree2f3b438b3d835b37be707bd16eb24d1795667ce4 /cpan/Encode
parent36efb5a67d73cc824ada76aaa2e3b03bda5cdc60 (diff)
downloadperl-0dcb562a7727869dc36989700ab4a3e5da7cad3b.tar.gz
Upgrade Encode from version 2.80 to 2.84
This retains the customizations to Byte/Makefile.PL (not yet assimilated) and encoding.pm (can't be removed without a $VERSION++, which would be a customization again!).
Diffstat (limited to 'cpan/Encode')
-rw-r--r--cpan/Encode/Encode.pm4
-rw-r--r--cpan/Encode/lib/Encode/MIME/Header.pm225
-rw-r--r--cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm4
-rw-r--r--cpan/Encode/lib/Encode/Supported.pod2
-rw-r--r--cpan/Encode/t/encoding-locale.t5
-rw-r--r--cpan/Encode/t/mime-header.t214
6 files changed, 253 insertions, 201 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index 3dade96bf3..ff868a5794 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,10 +1,10 @@
#
-# $Id: Encode.pm,v 2.80 2016/01/25 14:54:01 dankogai Exp $
+# $Id: Encode.pm,v 2.84 2016/04/11 07:16:52 dankogai Exp $
#
package Encode;
use strict;
use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.80 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.84 $ =~ /(\d+)/g;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use XSLoader ();
XSLoader::load( __PACKAGE__, $VERSION );
diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm
index ba6adba475..d74d453b8b 100644
--- a/cpan/Encode/lib/Encode/MIME/Header.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header.pm
@@ -3,7 +3,7 @@ use strict;
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(find_encoding encode_utf8 decode_utf8);
use MIME::Base64;
use Carp;
@@ -26,7 +26,7 @@ $Encode::Encoding{'MIME-B'} = bless {
$Encode::Encoding{'MIME-Q'} = bless {
%seed,
- decode_q => 1,
+ decode_b => 0,
encode => 'Q',
Name => 'MIME-Q',
} => __PACKAGE__;
@@ -36,47 +36,74 @@ use parent qw(Encode::Encoding);
sub needs_lines { 1 }
sub perlio_ok { 0 }
+# RFC 2047 and RFC 2231 grammar
+my $re_charset = qr/[-0-9A-Za-z_]+/;
+my $re_language = qr/[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*/;
+my $re_encoding = qr/[QqBb]/;
+my $re_encoded_text = qr/[^\?\s]*/;
+my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
+my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
+
+our $STRICT_DECODE = 0;
+
sub decode($$;$) {
use utf8;
my ( $obj, $str, $chk ) = @_;
- # zap spaces between encoded words
- $str =~ s/\?=\s+=\?/\?==\?/gos;
-
+
# multi-line header to single line
- $str =~ s/(?:\r\n|[\r\n])[ \t]//gos;
-
- 1 while ( $str =~
- s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)([^?]*?)\?=\1([^?]*?\?=)/$1$2$3/ )
- ; # Concat consecutive QP encoded mime headers
- # Fixes breaking inside multi-byte characters
-
- $str =~ s{
- =\? # begin encoded word
- ([-0-9A-Za-z_]+) # charset (encoding)
- (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
- \?([QqBb])\? # delimiter
- (.*?) # Base64-encodede contents
- \?= # end encoded word
- }{
- if (uc($2) eq 'B'){
- $obj->{decode_b} or croak qq(MIME "B" unsupported);
- decode_b($1, $3, $chk);
- } elsif (uc($2) eq 'Q'){
- $obj->{decode_q} or croak qq(MIME "Q" unsupported);
- decode_q($1, $3, $chk);
- } else {
- croak qq(MIME "$2" encoding is nonexistent!);
- }
- }egox;
- $_[1] = $str if $chk;
- return $str;
+ $str =~ s/(?:\r\n|[\r\n])([ \t])/$1/gos;
+
+ # decode each line separately
+ my @input = split /(\r\n|\r|\n)/o, $str;
+ my $output = substr($str, 0, 0); # to propagate taintedness
+
+ while ( @input ) {
+
+ my $line = shift @input;
+ my $sep = shift @input;
+
+ # in strict mode encoded words must be always separated by spaces or tabs
+ # except in comments when separator between words and comment round brackets can be omitted
+ my $re_word_begin = $STRICT_DECODE ? qr/(?:[ \t\n]|\A)\(?/ : qr//;
+ my $re_word_sep = $STRICT_DECODE ? qr/[ \t]+/ : qr/\s*/;
+ my $re_word_end = $STRICT_DECODE ? qr/\)?(?:[ \t\n]|\z)/ : qr//;
+
+ # concat consecutive encoded mime words with same charset, language and encoding
+ # fixes breaking inside multi-byte characters
+ 1 while $line =~ s/($re_word_begin)$re_capture_encoded_word$re_word_sep=\?\2\3\?\4\?($re_encoded_text)\?=(?=$re_word_end)/$1=\?$2$3\?$4\?$5$6\?=/;
+
+ $line =~ s{($re_word_begin)((?:$re_encoded_word$re_word_sep)*$re_encoded_word)(?=$re_word_end)}{
+ my $begin = $1;
+ my $words = $2;
+ $words =~ s{$re_capture_encoded_word$re_word_sep?}{
+ if (uc($3) eq 'B') {
+ $obj->{decode_b} or croak qq(MIME "B" unsupported);
+ decode_b($1, $4, $chk);
+ } elsif (uc($3) eq 'Q') {
+ $obj->{decode_q} or croak qq(MIME "Q" unsupported);
+ decode_q($1, $4, $chk);
+ } else {
+ croak qq(MIME "$3" encoding is nonexistent!);
+ }
+ }eg;
+ $begin . $words;
+ }eg;
+
+ $output .= $line;
+ $output .= $sep if defined $sep;
+
+ }
+
+ $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok
+ return $output;
}
sub decode_b {
- my $enc = shift;
- my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
- my $db64 = decode_base64(shift);
- my $chk = shift;
+ my ( $enc, $b, $chk ) = @_;
+ my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
+ # MIME::Base64::decode_base64 ignores everything after a '=' padding character
+ # split string after each sequence of padding characters and decode each substring
+ my $db64 = join('', map { decode_base64($_) } split /(?<==)(?=[^=])/, $b);
return $d->name eq 'utf8'
? Encode::decode_utf8($db64)
: $d->decode( $db64, $chk || Encode::FB_PERLQQ );
@@ -92,102 +119,92 @@ sub decode_q {
: $d->decode( $q, $chk || Encode::FB_PERLQQ );
}
-my $especials =
- join( '|' => map { quotemeta( chr($_) ) }
- unpack( "C*", qq{()<>,;:"'/[]?=} ) );
-
-my $re_encoded_word = qr{
- =\? # begin encoded word
- (?:[-0-9A-Za-z_]+) # charset (encoding)
- (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
- \?(?:[QqBb])\? # delimiter
- (?:.*?) # Base64-encodede contents
- \?= # end encoded word
-}xo;
-
-my $re_especials = qr{$re_encoded_word|$especials}xo;
-
-# cf:
-# https://rt.cpan.org/Ticket/Display.html?id=88717
-# https://www.ietf.org/rfc/rfc0822.txt
-my $re_linear_white_space = qr{(?:[ \t]|\r\n?)};
-
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
- my @line = ();
- for my $line ( split /\r\n|[\r\n]/o, $str ) {
- my ( @word, @subline );
- if ($line =~ /\A([\w\-]+:\s+)(.*)\z/o) {
- push @word, $1, $obj->_encode($2); # "X-Header-Name: ..."
+ $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok
+ return $obj->_fold_line($obj->_encode_line($str));
+}
+
+sub _fold_line {
+ my ( $obj, $line ) = @_;
+ my $bpl = $obj->{bpl};
+ my $output = substr($line, 0, 0); # to propagate taintedness
+
+ while ( length $line ) {
+ if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
+ $output .= $1;
+ $output .= "\r\n" . $2 if length $line;
+ } elsif ( $line =~ s/(\s)(.*)$// ) {
+ $output .= $line;
+ $line = $2;
+ $output .= "\r\n" . $1 if length $line;
} else {
- push @word, $obj->_encode($line); # anything else
+ $output .= $line;
+ last;
}
- my $subline = '';
- for my $word (@word) {
- use bytes ();
- if ( bytes::length($subline) + bytes::length($word) >
- $obj->{bpl} - 1 )
- {
- push @subline, $subline;
- $subline = '';
- }
- $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/);
- $subline .= $word;
- }
- length($subline) and push @subline, $subline;
- push @line, join( "\n " => grep !/^$/, @subline );
}
- $_[1] = '' if $chk;
- return (substr($str, 0, 0) . join( "\n", @line ));
+
+ return $output;
}
use constant HEAD => '=?UTF-8?';
use constant TAIL => '?=';
-use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
+use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, B_len => \&_encode_b_len, Q_len => \&_encode_q_len };
-sub _encode {
+sub _encode_line {
my ( $o, $str ) = @_;
my $enc = $o->{encode};
+ my $enc_len = $enc . '_len';
my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
- # to coerce a floating-point arithmetics, the following contains
- # .0 in numbers -- dankogai
- $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
my @result = ();
my $chunk = '';
while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
- use bytes ();
- if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
+ if ( SINGLE->{$enc_len}($chunk . $chr) > $llen ) {
push @result, SINGLE->{$enc}($chunk);
$chunk = '';
}
$chunk .= $chr;
}
length($chunk) and push @result, SINGLE->{$enc}($chunk);
- return @result;
+ return join(' ', @result);
}
sub _encode_b {
HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
}
+sub _encode_b_len {
+ my ( $chunk ) = @_;
+ use bytes ();
+ return bytes::length($chunk) * 4 / 3;
+}
+
+my $valid_q_chars = '0-9A-Za-z !*+\-/';
+
sub _encode_q {
- my $chunk = shift;
+ my ( $chunk ) = @_;
$chunk = encode_utf8($chunk);
- $chunk =~ s{
- ([^0-9A-Za-z])
- }{
- join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
- }egox;
+ $chunk =~ s{([^$valid_q_chars])}{
+ join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
+ }egox;
+ $chunk =~ s/ /_/go;
return HEAD . 'Q?' . $chunk . TAIL;
}
+sub _encode_q_len {
+ my ( $chunk ) = @_;
+ use bytes ();
+ my $valid_count =()= $chunk =~ /[$valid_q_chars]/sgo;
+ return ( bytes::length($chunk) - $valid_count ) * 3 + $valid_count;
+}
+
1;
__END__
=head1 NAME
-Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
+Encode::MIME::Header -- MIME 'B' and 'Q' encoding for unstructured header
=head1 SYNOPSIS
@@ -197,7 +214,8 @@ Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
=head1 ABSTRACT
-This module implements RFC 2047 Mime Header Encoding. There are 3
+This module implements RFC 2047 MIME encoding for unstructured header.
+It cannot be used for structured headers like From or To. There are 3
variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
difference is described below
@@ -222,6 +240,25 @@ line.
=head1 BUGS
+Before version 2.83 this module had broken both decoder and encoder.
+Encoder inserted additional spaces, incorrectly encoded input data
+and produced invalid MIME strings. Decoder lot of times discarded
+white space characters, incorrectly interpreted data or decoded
+Base64 string as Quoted-Printable.
+
+As of version 2.83 encoder should be fully compliant of RFC 2047.
+Due to bugs in previous versions of encoder, decoder is by default in
+less strict compatible mode. It should be able to decode strings
+encoded by pre 2.83 version of this module. But this default mode is
+not correct according to RFC 2047.
+
+In default mode decoder try to decode every substring which looks like
+MIME encoded data. So it means that MIME data does not need to be
+separated by white space. To enforce correct strict mode, set package
+variable $Encode::MIME::Header::STRICT_DECODE to 1, e.g. by localizing:
+
+C<require Encode::MIME::Header; local $Encode::MIME::Header::STRICT_DECODE = 1;>
+
It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
and =?ISO-8859-1?= but that makes the implementation too complicated.
These days major mail agents all support =?UTF-8? so I think it is
diff --git a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
index 86955c83f1..86e66c371c 100644
--- a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
@@ -6,7 +6,7 @@ use warnings;
use parent qw(Encode::MIME::Header);
$Encode::Encoding{'MIME-Header-ISO_2022_JP'} =
- bless { encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
+ bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
__PACKAGE__;
use constant HEAD => '=?ISO-2022-JP?B?';
@@ -14,7 +14,7 @@ use constant TAIL => '?=';
use Encode::CJKConstants qw(%RE);
-our $VERSION = do { my @r = ( q$Revision: 1.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
# I owe the below codes totally to
# Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
diff --git a/cpan/Encode/lib/Encode/Supported.pod b/cpan/Encode/lib/Encode/Supported.pod
index c731509a81..b23f6ca83e 100644
--- a/cpan/Encode/lib/Encode/Supported.pod
+++ b/cpan/Encode/lib/Encode/Supported.pod
@@ -367,7 +367,7 @@ Unicode character should belong).
Not very popular. Needs CNS 11643-1 and -2 which are not available in
this module. CNS 11643 is supported (via euc-tw) in Encode::HanExtra.
-Autrijus Tang may add support for this encoding in his module in future.
+Audrey Tang may add support for this encoding in her module in future.
=item Various HP-UX encodings
diff --git a/cpan/Encode/t/encoding-locale.t b/cpan/Encode/t/encoding-locale.t
index 1153b8ed0a..7a305a0dcf 100644
--- a/cpan/Encode/t/encoding-locale.t
+++ b/cpan/Encode/t/encoding-locale.t
@@ -14,8 +14,9 @@ use Encode qw<find_encoding>;
my $locale_encoding = encoding::_get_locale_encoding;
SKIP: {
- is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value')
- or skip 'no locale encoding found', 1;
+ defined $locale_encoding or skip 'no locale encoding found', 3;
+
+ is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value');
my $enc = find_encoding($locale_encoding);
ok(defined $enc, 'encoding returned is supported')
diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t
index b031aa4010..a9e6086129 100644
--- a/cpan/Encode/t/mime-header.t
+++ b/cpan/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-header.t,v 2.8 2016/01/25 14:54:13 dankogai Exp dankogai $
+# $Id: mime-header.t,v 2.12 2016/04/11 07:17:02 dankogai Exp dankogai $
# This script is written in utf8
#
BEGIN {
@@ -19,114 +19,128 @@ BEGIN {
$| = 1;
}
-no utf8;
-
use strict;
-#use Test::More qw(no_plan);
-use Test::More tests => 14;
-use_ok("Encode::MIME::Header");
-
-my $eheader =<<'EOS';
-From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-my $dheader=<<"EOS";
-From: Keith Moore <moore\@cs.utk.edu>
-To: Keld J\xF8rn Simonsen <keld\@dkuug.dk>
-CC: Andr\xE9 Pirard <PIRARD\@vm1.ulg.ac.be>
-Subject: If you can read this you understand the example.
-EOS
-
-is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)");
use utf8;
+use charnames ":full";
-my $uheader =<<'EOS';
-From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)");
-
-my $lheader =<<'EOS';
-From: =?US-ASCII*en-US?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-is(Encode::decode('MIME-Header', $lheader), $dheader, "decode language tag (RFC2231)");
-
-
-$dheader=<<'EOS';
-From: 小飼 弾 <dankogai@dan.co.jp>
-To: dankogai@dan.co.jp (小飼=Kogai, 弾=Dan)
-Subject: 漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?
-EOS
-
-my $bheader =<<'EOS';
-From: =?UTF-8?B?5bCP6aO8IOW8viA8ZGFua29nYWlAZGFuLmNvLmpwPg==?=
-To: =?UTF-8?B?ZGFua29nYWlAZGFuLmNvLmpwICjlsI/po7w9S29nYWksIOW8vj1EYW4p?=
-Subject:
- =?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=
- =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=
- =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=
- =?UTF-8?B?77yf?=
-EOS
+use Test::More tests => 130;
+use_ok("Encode::MIME::Header");
-my $qheader=<<'EOS';
-From: =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=20=E5=BC=BE=20=3Cdankogai=40?=
- =?UTF-8?Q?dan=2Eco=2Ejp=3E?=
-To: =?UTF-8?Q?dankogai=40dan=2Eco=2Ejp=20=28?=
- =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=3DKogai=2C=20=E5=BC=BE=3DDan?= =?UTF-8?Q?=29?=
-Subject:
- =?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=
- =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=
- =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=
- =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=
- =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=
- =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C?=
- =?UTF-8?Q?=E3=82=8B=E3=81=AE=E3=81=8B=EF=BC=9F?=
-EOS
+my @decode_tests = (
+ # RFC2047 p.5
+ "=?iso-8859-1?q?this=20is=20some=20text?=" => "this is some text",
+ # RFC2047 p.10
+ "=?US-ASCII?Q?Keith_Moore?=" => "Keith Moore",
+ "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
+ "=?ISO-8859-1?Q?Andr=E9?= Pirard" => "André Pirard",
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+ "=?ISO-8859-1?Q?Olle_J=E4rnefors?=" => "Olle Järnefors",
+ "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=" => "Patrik Fältström",
+ # RFC2047 p.11
+ "(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" => "(םולש ןב ילטפנ)",
+ "(=?ISO-8859-1?Q?a?=)" => "(a)",
+ "(=?ISO-8859-1?Q?a?= b)" => "(a b)",
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)",
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)",
+ "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)" => "(ab)",
+ # RFC2047 p.12
+ "(=?ISO-8859-1?Q?a_b?=)" => '(a b)',
+ "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)" => "(a b)",
+ # RFC2231 p.6
+ "=?US-ASCII*EN?Q?Keith_Moore?=" => "Keith Moore",
+ # others
+ "=?US-ASCII*en-US?Q?Keith_Moore?=" => "Keith Moore",
+ "=?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
+ "=?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard" => "André Pirard",
+ "=?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+ # RT67569
+ "foo =?us-ascii?q?bar?=" => "foo bar",
+ "foo\r\n =?us-ascii?q?bar?=" => "foo bar",
+ "=?us-ascii?q?foo?= bar" => "foo bar",
+ "=?us-ascii?q?foo?=\r\n bar" => "foo bar",
+ "foo bar" => "foo bar",
+ "foo\r\n bar" => "foo bar",
+ "=?us-ascii?q?foo?= =?us-ascii?q?bar?=" => "foobar",
+ "=?us-ascii?q?foo?=\r\n =?us-ascii?q?bar?=" => "foobar",
+ "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=",
+ "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=",
+ # RT40027
+ "a: b\r\n c" => "a: b c",
+ # RT104422
+ "=?utf-8?Q?pre?= =?utf-8?B?IGZvbw==?=\r\n =?utf-8?Q?bar?=" => "pre foobar",
+);
+
+my @decode_default_tests = (
+ @decode_tests,
+ '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
+ '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"foo <bar@baz.foo> bar"',
+ "=?us-ascii?q?foo?==?us-ascii?q?bar?=" => "foobar",
+ "foo=?us-ascii?q?bar?=" => "foobar",
+ "foo =?us-ascii?q?=20?==?us-ascii?q?bar?=" => "foo bar",
+ # Encode::MIME::Header pre 2.83
+ "[=?UTF-8?B?ZsOzcnVt?=]=?UTF-8?B?IHNwcsOhdmE=?=" => "[fórum] správa",
+ "test:=?UTF-8?B?IHNwcsOhdmE=?=" => "test: správa",
+ "=?UTF-8?B?dMOpc3Q=?=:=?UTF-8?B?IHNwcsOhdmE=?=", "tést: správa",
+);
+
+my @decode_strict_tests = (
+ @decode_tests,
+ '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
+ '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="',
+);
+
+my @encode_tests = (
+ "小飼 弾" => "=?UTF-8?B?5bCP6aO8IOW8vg==?=", "=?UTF-8?Q?=E5=B0=8F=E9=A3=BC_=E5=BC=BE?=",
+ "漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?" => "=?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=\r\n =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=\r\n =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=\r\n =?UTF-8?B?77yf?=", "=?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=\r\n =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=\r\n =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=\r\n =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=\r\n =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=\r\n =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C=E3=82=8B?=\r\n =?UTF-8?Q?=E3=81=AE=E3=81=8B=EF=BC=9F?=",
+ # double encode
+ "What is =?UTF-8?B?w4RwZmVs?= ?" => "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=", "=?UTF-8?Q?What_is_=3D=3FUTF-8=3FB=3Fw4RwZmVs=3F=3D_=3F?=",
+ # pound 1024
+ "\N{POUND SIGN}1024" => "=?UTF-8?B?wqMxMDI0?=", "=?UTF-8?Q?=C2=A31024?=",
+ # latin1 characters
+ "\x{fc}" => "=?UTF-8?B?w7w=?=", "=?UTF-8?Q?=C3=BC?=",
+ # RT42627
+ Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0") => "=?UTF-8?B?wqN4eHh4eHh4eHh4eHh4eHh4eHh4MA==?=", "=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx0?=",
+ # RT87831
+ "0" => "=?UTF-8?B?MA==?=", "=?UTF-8?Q?0?=",
+ # RT88717
+ "Hey foo\x{2024}bar:whee" => "=?UTF-8?B?SGV5IGZvb+KApGJhcjp3aGVl?=", "=?UTF-8?Q?Hey_foo=E2=80=A4bar=3Awhee?=",
+ # valid q chars
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hpams=?=\r\n =?UTF-8?B?bG1ub3BxcnN0dXZ3eHl6ICEqKy0v?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=",
+ # invalid q chars
+ "." => "=?UTF-8?B?Lg==?=", "=?UTF-8?Q?=2E?=",
+ "," => "=?UTF-8?B?LA==?=", "=?UTF-8?Q?=2C?=",
+);
+
+sub info {
+ my ($str) = @_;
+ $str = Encode::encode_utf8($str);
+ $str =~ s/\r/\\r/gs;
+ $str =~ s/\n/\\n/gs;
+ return $str;
+}
-is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B");
-is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
-is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
-is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");
+my @splice;
-$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
-$bheader = "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=";
-$qheader = "=?UTF-8?Q?What=20is=20=3D=3FUTF=2D8=3FB=3Fw4R?="
- . "\n " . "=?UTF-8?Q?wZmVs=3F=3D=20=3F?=";
-is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
-is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
-{
- # From: Dave Evans <dave@rudolf.org.uk>
- # Subject: Bug in Encode::MIME::Header
- # Message-Id: <3F43440B.7060606@rudolf.org.uk>
- use charnames ":full";
- my $pound_1024 = "\N{POUND SIGN}1024";
- is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=',
- 'pound 1024');
+@splice = @encode_tests;
+while (my ($d, $b, $q) = splice @splice, 0, 3) {
+ is Encode::encode('MIME-Header', $d) => $b, info("encode default: $d => $b");
+ is Encode::encode('MIME-B', $d) => $b, info("encode base64: $d => $b");
+ is Encode::encode('MIME-Q', $d) => $q, info("encode qp: $d => $q");
+ is Encode::decode('MIME-B', $b) => $d, info("decode base64: $b => $d");
+ is Encode::decode('MIME-Q', $q) => $d, info("decode qp: $b => $d");
}
-is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters');
+@splice = @decode_default_tests;
+while (my ($e, $d) = splice @splice, 0, 2) {
+ is Encode::decode('MIME-Header', $e) => $d, info("decode default: $e => $d");
+}
-# RT42627
+local $Encode::MIME::Header::STRICT_DECODE = 1;
-my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0");
-is(Encode::encode('MIME-Q', $rt42627),
- '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?= =?UTF-8?Q?0?=',
- 'MIME-Q encoding does not truncate trailing zeros');
+@splice = @decode_strict_tests;
+while (my ($e, $d) = splice @splice, 0, 2) {
+ is Encode::decode('MIME-Header', $e) => $d, info("decode strict: $e => $d");
+}
-# RT87831
-is(Encode::encode('MIME-Header', '0'), '=?UTF-8?B?MA==?=', 'RT87831');
-__END__;
+__END__