Upgrade Encode from version 2.80 to 2.84

This retains the customizations to Byte/Makefile.PL (not yet assimilated) and encoding.pm (can't be removed without a $VERSION++, which would be a customization again!).
author: Steve Hay <steve.m.hay@googlemail.com> 2016-06-28 08:26:38 +0100
committer: Steve Hay <steve.m.hay@googlemail.com> 2016-06-28 08:26:38 +0100
commit: 0dcb562a7727869dc36989700ab4a3e5da7cad3b (patch)
tree: 2f3b438b3d835b37be707bd16eb24d1795667ce4 /cpan/Encode
parent: 36efb5a67d73cc824ada76aaa2e3b03bda5cdc60 (diff)
download: perl-0dcb562a7727869dc36989700ab4a3e5da7cad3b.tar.gz
6 files changed, 253 insertions, 201 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index 3dade96bf3..ff868a5794 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,10 +1,10 @@
 #
-# $Id: Encode.pm,v 2.80 2016/01/25 14:54:01 dankogai Exp $
+# $Id: Encode.pm,v 2.84 2016/04/11 07:16:52 dankogai Exp $
 #
 package Encode;
 use strict;
 use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.80 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.84 $ =~ /(\d+)/g;
 use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
 use XSLoader ();
 XSLoader::load( __PACKAGE__, $VERSION );
diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm
index ba6adba475..d74d453b8b 100644
--- a/cpan/Encode/lib/Encode/MIME/Header.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header.pm
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 no warnings 'redefine';
 
-our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 use Encode qw(find_encoding encode_utf8 decode_utf8);
 use MIME::Base64;
 use Carp;
@@ -26,7 +26,7 @@ $Encode::Encoding{'MIME-B'} = bless {
 
 $Encode::Encoding{'MIME-Q'} = bless {
     %seed,
-    decode_q => 1,
+    decode_b => 0,
     encode   => 'Q',
     Name     => 'MIME-Q',
 } => __PACKAGE__;
@@ -36,47 +36,74 @@ use parent qw(Encode::Encoding);
 sub needs_lines { 1 }
 sub perlio_ok   { 0 }
 
+# RFC 2047 and RFC 2231 grammar
+my $re_charset = qr/[-0-9A-Za-z_]+/;
+my $re_language = qr/[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*/;
+my $re_encoding = qr/[QqBb]/;
+my $re_encoded_text = qr/[^\?\s]*/;
+my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
+my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
+
+our $STRICT_DECODE = 0;
+
 sub decode($$;$) {
     use utf8;
     my ( $obj, $str, $chk ) = @_;
-    # zap spaces between encoded words
-    $str =~ s/\?=\s+=\?/\?==\?/gos;
-    
+
     # multi-line header to single line
-    $str =~ s/(?:\r\n|[\r\n])[ \t]//gos;
-
-    1 while ( $str =~
-              s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)([^?]*?)\?=\1([^?]*?\?=)/$1$2$3/ )
-      ;    # Concat consecutive QP encoded mime headers
-           # Fixes breaking inside multi-byte characters
-
-    $str =~ s{
-        =\?              # begin encoded word
-        ([-0-9A-Za-z_]+) # charset (encoding)
-        (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
-        \?([QqBb])\?     # delimiter
-        (.*?)            # Base64-encodede contents
-        \?=              # end encoded word
-    }{
-        if      (uc($2) eq 'B'){
-            $obj->{decode_b} or croak qq(MIME "B" unsupported);
-            decode_b($1, $3, $chk);
-        } elsif (uc($2) eq 'Q'){
-            $obj->{decode_q} or croak qq(MIME "Q" unsupported);
-            decode_q($1, $3, $chk);
-        } else {
-            croak qq(MIME "$2" encoding is nonexistent!);
-        }
-    }egox;
-    $_[1] = $str if $chk;
-    return $str;
+    $str =~ s/(?:\r\n|[\r\n])([ \t])/$1/gos;
+
+    # decode each line separately
+    my @input = split /(\r\n|\r|\n)/o, $str;
+    my $output = substr($str, 0, 0); # to propagate taintedness
+
+    while ( @input ) {
+
+        my $line = shift @input;
+        my $sep = shift @input;
+
+        # in strict mode encoded words must be always separated by spaces or tabs
+        # except in comments when separator between words and comment round brackets can be omitted
+        my $re_word_begin = $STRICT_DECODE ? qr/(?:[ \t\n]|\A)\(?/ : qr//;
+        my $re_word_sep = $STRICT_DECODE ? qr/[ \t]+/ : qr/\s*/;
+        my $re_word_end = $STRICT_DECODE ? qr/\)?(?:[ \t\n]|\z)/ : qr//;
+
+        # concat consecutive encoded mime words with same charset, language and encoding
+        # fixes breaking inside multi-byte characters
+        1 while $line =~ s/($re_word_begin)$re_capture_encoded_word$re_word_sep=\?\2\3\?\4\?($re_encoded_text)\?=(?=$re_word_end)/$1=\?$2$3\?$4\?$5$6\?=/;
+
+        $line =~ s{($re_word_begin)((?:$re_encoded_word$re_word_sep)*$re_encoded_word)(?=$re_word_end)}{
+            my $begin = $1;
+            my $words = $2;
+            $words =~ s{$re_capture_encoded_word$re_word_sep?}{
+                if (uc($3) eq 'B') {
+                    $obj->{decode_b} or croak qq(MIME "B" unsupported);
+                    decode_b($1, $4, $chk);
+                } elsif (uc($3) eq 'Q') {
+                    $obj->{decode_q} or croak qq(MIME "Q" unsupported);
+                    decode_q($1, $4, $chk);
+                } else {
+                    croak qq(MIME "$3" encoding is nonexistent!);
+                }
+            }eg;
+            $begin . $words;
+        }eg;
+
+        $output .= $line;
+        $output .= $sep if defined $sep;
+
+    }
+
+    $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok
+    return $output;
 }
 
 sub decode_b {
-    my $enc  = shift;
-    my $d    = find_encoding($enc) or croak qq(Unknown encoding "$enc");
-    my $db64 = decode_base64(shift);
-    my $chk  = shift;
+    my ( $enc, $b, $chk ) = @_;
+    my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
+    # MIME::Base64::decode_base64 ignores everything after a '=' padding character
+    # split string after each sequence of padding characters and decode each substring
+    my $db64 = join('', map { decode_base64($_) } split /(?<==)(?=[^=])/, $b);
     return $d->name eq 'utf8'
       ? Encode::decode_utf8($db64)
       : $d->decode( $db64, $chk || Encode::FB_PERLQQ );
@@ -92,102 +119,92 @@ sub decode_q {
       : $d->decode( $q, $chk || Encode::FB_PERLQQ );
 }
 
-my $especials =
-  join( '|' => map { quotemeta( chr($_) ) }
-      unpack( "C*", qq{()<>,;:"'/[]?=} ) );
-
-my $re_encoded_word = qr{
-    =\?                # begin encoded word
-    (?:[-0-9A-Za-z_]+) # charset (encoding)
-    (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231)
-    \?(?:[QqBb])\?     # delimiter
-    (?:.*?)            # Base64-encodede contents
-    \?=                # end encoded word
-}xo;
-
-my $re_especials = qr{$re_encoded_word|$especials}xo;
-
-# cf:
-#    https://rt.cpan.org/Ticket/Display.html?id=88717
-#    https://www.ietf.org/rfc/rfc0822.txt
-my $re_linear_white_space = qr{(?:[ \t]|\r\n?)};
-
 sub encode($$;$) {
     my ( $obj, $str, $chk ) = @_;
-    my @line = ();
-    for my $line ( split /\r\n|[\r\n]/o, $str ) {
-        my ( @word, @subline );
-        if ($line =~ /\A([\w\-]+:\s+)(.*)\z/o) {
-            push @word, $1, $obj->_encode($2); # "X-Header-Name: ..."
+    $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok
+    return $obj->_fold_line($obj->_encode_line($str));
+}
+
+sub _fold_line {
+    my ( $obj, $line ) = @_;
+    my $bpl = $obj->{bpl};
+    my $output = substr($line, 0, 0); # to propagate taintedness
+
+    while ( length $line ) {
+        if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
+            $output .= $1;
+            $output .= "\r\n" . $2 if length $line;
+        } elsif ( $line =~ s/(\s)(.*)$// ) {
+            $output .= $line;
+            $line = $2;
+            $output .= "\r\n" . $1 if length $line;
         } else {
-            push @word, $obj->_encode($line);  # anything else
+            $output .= $line;
+            last;
         }
-        my $subline = '';
-        for my $word (@word) {
-            use bytes ();
-            if ( bytes::length($subline) + bytes::length($word) >
-                $obj->{bpl} - 1 )
-            {
-                push @subline, $subline;
-                $subline = '';
-            }
-            $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/);
-            $subline .= $word;
-        }
-        length($subline) and push @subline, $subline;
-        push @line, join( "\n " => grep !/^$/, @subline );
     }
-    $_[1] = '' if $chk;
-    return (substr($str, 0, 0) . join( "\n", @line ));
+
+    return $output;
 }
 
 use constant HEAD   => '=?UTF-8?';
 use constant TAIL   => '?=';
-use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
+use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, B_len => \&_encode_b_len, Q_len => \&_encode_q_len };
 
-sub _encode {
+sub _encode_line {
     my ( $o, $str ) = @_;
     my $enc  = $o->{encode};
+    my $enc_len = $enc . '_len';
     my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
 
-    # to coerce a floating-point arithmetics, the following contains
-    # .0 in numbers -- dankogai
-    $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0;
     my @result = ();
     my $chunk  = '';
     while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
-        use bytes ();
-        if ( bytes::length($chunk) + bytes::length($chr) > $llen ) {
+        if ( SINGLE->{$enc_len}($chunk . $chr) > $llen ) {
             push @result, SINGLE->{$enc}($chunk);
             $chunk = '';
         }
         $chunk .= $chr;
     }
     length($chunk) and push @result, SINGLE->{$enc}($chunk);
-    return @result;
+    return join(' ', @result);
 }
 
 sub _encode_b {
     HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
 }
 
+sub _encode_b_len {
+    my ( $chunk ) = @_;
+    use bytes ();
+    return bytes::length($chunk) * 4 / 3;
+}
+
+my $valid_q_chars = '0-9A-Za-z !*+\-/';
+
 sub _encode_q {
-    my $chunk = shift;
+    my ( $chunk ) = @_;
     $chunk = encode_utf8($chunk);
-    $chunk =~ s{
-	   ([^0-9A-Za-z])
-       }{
-            join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
-       }egox;
+    $chunk =~ s{([^$valid_q_chars])}{
+        join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
+    }egox;
+    $chunk =~ s/ /_/go;
     return HEAD . 'Q?' . $chunk . TAIL;
 }
 
+sub _encode_q_len {
+    my ( $chunk ) = @_;
+    use bytes ();
+    my $valid_count =()= $chunk =~ /[$valid_q_chars]/sgo;
+    return ( bytes::length($chunk) - $valid_count ) * 3 + $valid_count;
+}
+
 1;
 __END__
 
 =head1 NAME
 
-Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
+Encode::MIME::Header -- MIME 'B' and 'Q' encoding for unstructured header
 
 =head1 SYNOPSIS
 
@@ -197,7 +214,8 @@ Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
 
 =head1 ABSTRACT
 
-This module implements RFC 2047 Mime Header Encoding.  There are 3
+This module implements RFC 2047 MIME encoding for unstructured header.
+It cannot be used for structured headers like From or To.  There are 3
 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>.  The
 difference is described below
 
@@ -222,6 +240,25 @@ line.
 
 =head1 BUGS
 
+Before version 2.83 this module had broken both decoder and encoder.
+Encoder inserted additional spaces, incorrectly encoded input data
+and produced invalid MIME strings. Decoder lot of times discarded
+white space characters, incorrectly interpreted data or decoded
+Base64 string as Quoted-Printable.
+
+As of version 2.83 encoder should be fully compliant of RFC 2047.
+Due to bugs in previous versions of encoder, decoder is by default in
+less strict compatible mode. It should be able to decode strings
+encoded by pre 2.83 version of this module. But this default mode is
+not correct according to RFC 2047.
+
+In default mode decoder try to decode every substring which looks like
+MIME encoded data. So it means that MIME data does not need to be
+separated by white space. To enforce correct strict mode, set package
+variable $Encode::MIME::Header::STRICT_DECODE to 1, e.g. by localizing:
+
+C<require Encode::MIME::Header; local $Encode::MIME::Header::STRICT_DECODE = 1;>
+
 It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
 and =?ISO-8859-1?= but that makes the implementation too complicated.
 These days major mail agents all support =?UTF-8? so I think it is
diff --git a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
index 86955c83f1..86e66c371c 100644
--- a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
@@ -6,7 +6,7 @@ use warnings;
 use parent qw(Encode::MIME::Header);
 
 $Encode::Encoding{'MIME-Header-ISO_2022_JP'} =
-  bless { encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
+  bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
   __PACKAGE__;
 
 use constant HEAD => '=?ISO-2022-JP?B?';
@@ -14,7 +14,7 @@ use constant TAIL => '?=';
 
 use Encode::CJKConstants qw(%RE);
 
-our $VERSION = do { my @r = ( q$Revision: 1.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 # I owe the below codes totally to
 #   Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
diff --git a/cpan/Encode/lib/Encode/Supported.pod b/cpan/Encode/lib/Encode/Supported.pod
index c731509a81..b23f6ca83e 100644
--- a/cpan/Encode/lib/Encode/Supported.pod
+++ b/cpan/Encode/lib/Encode/Supported.pod
@@ -367,7 +367,7 @@ Unicode character should belong).
 
 Not very popular.  Needs CNS 11643-1 and -2 which are not available in
 this module.  CNS 11643 is supported (via euc-tw) in Encode::HanExtra.
-Autrijus Tang may add support for this encoding in his module in future.
+Audrey Tang may add support for this encoding in her module in future.
 
 =item Various HP-UX encodings
 
diff --git a/cpan/Encode/t/encoding-locale.t b/cpan/Encode/t/encoding-locale.t
index 1153b8ed0a..7a305a0dcf 100644
--- a/cpan/Encode/t/encoding-locale.t
+++ b/cpan/Encode/t/encoding-locale.t
@@ -14,8 +14,9 @@ use Encode qw<find_encoding>;
 my $locale_encoding = encoding::_get_locale_encoding;
 
 SKIP: {
-    is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value')
-	or skip 'no locale encoding found', 1;
+    defined $locale_encoding or skip 'no locale encoding found', 3;
+
+    is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value');
 
     my $enc = find_encoding($locale_encoding);
     ok(defined $enc, 'encoding returned is supported')
diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t
index b031aa4010..a9e6086129 100644
--- a/cpan/Encode/t/mime-header.t
+++ b/cpan/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
 #
-# $Id: mime-header.t,v 2.8 2016/01/25 14:54:13 dankogai Exp dankogai $
+# $Id: mime-header.t,v 2.12 2016/04/11 07:17:02 dankogai Exp dankogai $
 # This script is written in utf8
 #
 BEGIN {
@@ -19,114 +19,128 @@ BEGIN {
     $| = 1;
 }
 
-no utf8;
-
 use strict;
-#use Test::More qw(no_plan);
-use Test::More tests => 14;
-use_ok("Encode::MIME::Header");
-
-my $eheader =<<'EOS';
-From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-my $dheader=<<"EOS";
-From: Keith Moore <moore\@cs.utk.edu>
-To: Keld J\xF8rn Simonsen <keld\@dkuug.dk>
-CC: Andr\xE9 Pirard <PIRARD\@vm1.ulg.ac.be>
-Subject: If you can read this you understand the example.
-EOS
-
-is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)");
 
 use utf8;
+use charnames ":full";
 
-my $uheader =<<'EOS';
-From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)");
-
-my $lheader =<<'EOS';
-From: =?US-ASCII*en-US?Q?Keith_Moore?= <moore@cs.utk.edu>
-To: =?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
-CC: =?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>
-Subject: =?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
- =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
-EOS
-
-is(Encode::decode('MIME-Header', $lheader), $dheader, "decode language tag (RFC2231)");
-
-
-$dheader=<<'EOS';
-From: 小飼 弾 <dankogai@dan.co.jp>
-To: dankogai@dan.co.jp (小飼=Kogai, 弾=Dan)
-Subject: 漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか？
-EOS
-
-my $bheader =<<'EOS';
-From: =?UTF-8?B?5bCP6aO8IOW8viA8ZGFua29nYWlAZGFuLmNvLmpwPg==?=
-To: =?UTF-8?B?ZGFua29nYWlAZGFuLmNvLmpwICjlsI/po7w9S29nYWksIOW8vj1EYW4p?=
-Subject: 
- =?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=
- =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=
- =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=
- =?UTF-8?B?77yf?=
-EOS
+use Test::More tests => 130;
+use_ok("Encode::MIME::Header");
 
-my $qheader=<<'EOS';
-From: =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=20=E5=BC=BE=20=3Cdankogai=40?=
- =?UTF-8?Q?dan=2Eco=2Ejp=3E?=
-To: =?UTF-8?Q?dankogai=40dan=2Eco=2Ejp=20=28?=
- =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=3DKogai=2C=20=E5=BC=BE=3DDan?= =?UTF-8?Q?=29?=
-Subject: 
- =?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=
- =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=
- =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=
- =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=
- =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=
- =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C?=
- =?UTF-8?Q?=E3=82=8B=E3=81=AE=E3=81=8B=EF=BC=9F?=
-EOS
+my @decode_tests = (
+    # RFC2047 p.5
+    "=?iso-8859-1?q?this=20is=20some=20text?=" => "this is some text",
+    # RFC2047 p.10
+    "=?US-ASCII?Q?Keith_Moore?=" => "Keith Moore",
+    "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
+    "=?ISO-8859-1?Q?Andr=E9?= Pirard" => "André Pirard",
+    "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+    "=?ISO-8859-1?Q?Olle_J=E4rnefors?=" => "Olle Järnefors",
+    "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=" => "Patrik Fältström",
+    # RFC2047 p.11
+    "(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" => "(םולש ןב ילטפנ)",
+    "(=?ISO-8859-1?Q?a?=)" => "(a)",
+    "(=?ISO-8859-1?Q?a?= b)" => "(a b)",
+    "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)",
+    "(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)" => "(ab)",
+    "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)" => "(ab)",
+    # RFC2047 p.12
+    "(=?ISO-8859-1?Q?a_b?=)" => '(a b)',
+    "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)" => "(a b)",
+    # RFC2231 p.6
+    "=?US-ASCII*EN?Q?Keith_Moore?=" => "Keith Moore",
+    # others
+    "=?US-ASCII*en-US?Q?Keith_Moore?=" => "Keith Moore",
+    "=?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
+    "=?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard" => "André Pirard",
+    "=?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+    # RT67569
+    "foo =?us-ascii?q?bar?=" => "foo bar",
+    "foo\r\n =?us-ascii?q?bar?=" => "foo bar",
+    "=?us-ascii?q?foo?= bar" => "foo bar",
+    "=?us-ascii?q?foo?=\r\n bar" => "foo bar",
+    "foo bar" => "foo bar",
+    "foo\r\n bar" => "foo bar",
+    "=?us-ascii?q?foo?= =?us-ascii?q?bar?=" => "foobar",
+    "=?us-ascii?q?foo?=\r\n =?us-ascii?q?bar?=" => "foobar",
+    "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=",
+    "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=",
+    # RT40027
+    "a: b\r\n c" => "a: b c",
+    # RT104422
+    "=?utf-8?Q?pre?= =?utf-8?B?IGZvbw==?=\r\n =?utf-8?Q?bar?=" => "pre foobar",
+);
+
+my @decode_default_tests = (
+    @decode_tests,
+    '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
+    '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"foo <bar@baz.foo> bar"',
+    "=?us-ascii?q?foo?==?us-ascii?q?bar?=" => "foobar",
+    "foo=?us-ascii?q?bar?=" => "foobar",
+    "foo =?us-ascii?q?=20?==?us-ascii?q?bar?=" => "foo  bar",
+    # Encode::MIME::Header pre 2.83
+    "[=?UTF-8?B?ZsOzcnVt?=]=?UTF-8?B?IHNwcsOhdmE=?=" => "[fórum] správa",
+    "test:=?UTF-8?B?IHNwcsOhdmE=?=" => "test: správa",
+    "=?UTF-8?B?dMOpc3Q=?=:=?UTF-8?B?IHNwcsOhdmE=?=", "tést: správa",
+);
+
+my @decode_strict_tests = (
+    @decode_tests,
+    '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
+    '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="',
+);
+
+my @encode_tests = (
+    "小飼 弾" => "=?UTF-8?B?5bCP6aO8IOW8vg==?=", "=?UTF-8?Q?=E5=B0=8F=E9=A3=BC_=E5=BC=BE?=",
+    "漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか？" => "=?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=\r\n =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=\r\n =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=\r\n =?UTF-8?B?77yf?=", "=?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=\r\n =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=\r\n =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=\r\n =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=\r\n =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=\r\n =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C=E3=82=8B?=\r\n =?UTF-8?Q?=E3=81=AE=E3=81=8B=EF=BC=9F?=",
+    # double encode
+    "What is =?UTF-8?B?w4RwZmVs?= ?" => "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=", "=?UTF-8?Q?What_is_=3D=3FUTF-8=3FB=3Fw4RwZmVs=3F=3D_=3F?=",
+    # pound 1024
+    "\N{POUND SIGN}1024" => "=?UTF-8?B?wqMxMDI0?=", "=?UTF-8?Q?=C2=A31024?=",
+    # latin1 characters
+    "\x{fc}" => "=?UTF-8?B?w7w=?=", "=?UTF-8?Q?=C3=BC?=",
+    # RT42627
+    Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0") => "=?UTF-8?B?wqN4eHh4eHh4eHh4eHh4eHh4eHh4MA==?=", "=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx0?=",
+    # RT87831
+    "0" => "=?UTF-8?B?MA==?=", "=?UTF-8?Q?0?=",
+    # RT88717
+    "Hey foo\x{2024}bar:whee" => "=?UTF-8?B?SGV5IGZvb+KApGJhcjp3aGVl?=", "=?UTF-8?Q?Hey_foo=E2=80=A4bar=3Awhee?=",
+    # valid q chars
+    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hpams=?=\r\n =?UTF-8?B?bG1ub3BxcnN0dXZ3eHl6ICEqKy0v?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=",
+    # invalid q chars
+    "." => "=?UTF-8?B?Lg==?=", "=?UTF-8?Q?=2E?=",
+    "," => "=?UTF-8?B?LA==?=", "=?UTF-8?Q?=2C?=",
+);
+
+sub info {
+    my ($str) = @_;
+    $str = Encode::encode_utf8($str);
+    $str =~ s/\r/\\r/gs;
+    $str =~ s/\n/\\n/gs;
+    return $str;
+}
 
-is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B");
-is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q");
-is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B");
-is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q");
+my @splice;
 
-$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?";
-$bheader = "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=";
-$qheader = "=?UTF-8?Q?What=20is=20=3D=3FUTF=2D8=3FB=3Fw4R?="
-         . "\n " . "=?UTF-8?Q?wZmVs=3F=3D=20=3F?=";
-is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B");
-is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q");
-{
-    # From: Dave Evans <dave@rudolf.org.uk>
-    # Subject: Bug in Encode::MIME::Header
-    # Message-Id: <3F43440B.7060606@rudolf.org.uk>
-    use charnames ":full";
-    my $pound_1024 = "\N{POUND SIGN}1024";
-    is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=',
-       'pound 1024');
+@splice = @encode_tests;
+while (my ($d, $b, $q) = splice @splice, 0, 3) {
+    is Encode::encode('MIME-Header', $d) => $b, info("encode default: $d => $b");
+    is Encode::encode('MIME-B', $d) => $b, info("encode base64: $d => $b");
+    is Encode::encode('MIME-Q', $d) => $q, info("encode qp: $d => $q");
+    is Encode::decode('MIME-B', $b) => $d, info("decode base64: $b => $d");
+    is Encode::decode('MIME-Q', $q) => $d, info("decode qp: $b => $d");
 }
 
-is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters');
+@splice = @decode_default_tests;
+while (my ($e, $d) = splice @splice, 0, 2) {
+    is Encode::decode('MIME-Header', $e) => $d, info("decode default: $e => $d");
+}
 
-# RT42627
+local $Encode::MIME::Header::STRICT_DECODE = 1;
 
-my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0");
-is(Encode::encode('MIME-Q', $rt42627), 
-   '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?= =?UTF-8?Q?0?=',
-   'MIME-Q encoding does not truncate trailing zeros');
+@splice = @decode_strict_tests;
+while (my ($e, $d) = splice @splice, 0, 2) {
+    is Encode::decode('MIME-Header', $e) => $d, info("decode strict: $e => $d");
+}
 
-# RT87831
-is(Encode::encode('MIME-Header', '0'), '=?UTF-8?B?MA==?=', 'RT87831');
-__END__;
+__END__
author	Steve Hay <steve.m.hay@googlemail.com>	2016-06-28 08:26:38 +0100
committer	Steve Hay <steve.m.hay@googlemail.com>	2016-06-28 08:26:38 +0100
commit	0dcb562a7727869dc36989700ab4a3e5da7cad3b (patch)
tree	2f3b438b3d835b37be707bd16eb24d1795667ce4 /cpan/Encode
parent	36efb5a67d73cc824ada76aaa2e3b03bda5cdc60 (diff)
download	perl-0dcb562a7727869dc36989700ab4a3e5da7cad3b.tar.gz