diff options
author | Steve Hay <steve.m.hay@googlemail.com> | 2016-06-28 08:26:38 +0100 |
---|---|---|
committer | Steve Hay <steve.m.hay@googlemail.com> | 2016-06-28 08:26:38 +0100 |
commit | 0dcb562a7727869dc36989700ab4a3e5da7cad3b (patch) | |
tree | 2f3b438b3d835b37be707bd16eb24d1795667ce4 /cpan/Encode | |
parent | 36efb5a67d73cc824ada76aaa2e3b03bda5cdc60 (diff) | |
download | perl-0dcb562a7727869dc36989700ab4a3e5da7cad3b.tar.gz |
Upgrade Encode from version 2.80 to 2.84
This retains the customizations to Byte/Makefile.PL (not yet assimilated)
and encoding.pm (can't be removed without a $VERSION++, which would be a
customization again!).
Diffstat (limited to 'cpan/Encode')
-rw-r--r-- | cpan/Encode/Encode.pm | 4 | ||||
-rw-r--r-- | cpan/Encode/lib/Encode/MIME/Header.pm | 225 | ||||
-rw-r--r-- | cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm | 4 | ||||
-rw-r--r-- | cpan/Encode/lib/Encode/Supported.pod | 2 | ||||
-rw-r--r-- | cpan/Encode/t/encoding-locale.t | 5 | ||||
-rw-r--r-- | cpan/Encode/t/mime-header.t | 214 |
6 files changed, 253 insertions, 201 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm index 3dade96bf3..ff868a5794 100644 --- a/cpan/Encode/Encode.pm +++ b/cpan/Encode/Encode.pm @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.80 2016/01/25 14:54:01 dankogai Exp $ +# $Id: Encode.pm,v 2.84 2016/04/11 07:16:52 dankogai Exp $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.80 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.84 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm index ba6adba475..d74d453b8b 100644 --- a/cpan/Encode/lib/Encode/MIME/Header.pm +++ b/cpan/Encode/lib/Encode/MIME/Header.pm @@ -3,7 +3,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -26,7 +26,7 @@ $Encode::Encoding{'MIME-B'} = bless { $Encode::Encoding{'MIME-Q'} = bless { %seed, - decode_q => 1, + decode_b => 0, encode => 'Q', Name => 'MIME-Q', } => __PACKAGE__; @@ -36,47 +36,74 @@ use parent qw(Encode::Encoding); sub needs_lines { 1 } sub perlio_ok { 0 } +# RFC 2047 and RFC 2231 grammar +my $re_charset = qr/[-0-9A-Za-z_]+/; +my $re_language = qr/[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*/; +my $re_encoding = qr/[QqBb]/; +my $re_encoded_text = qr/[^\?\s]*/; +my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/; +my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/; + +our $STRICT_DECODE = 0; + sub decode($$;$) { use utf8; my ( $obj, $str, $chk ) = @_; - # zap spaces between encoded words - $str =~ s/\?=\s+=\?/\?==\?/gos; - + # multi-line header to single line - $str =~ s/(?:\r\n|[\r\n])[ \t]//gos; - - 1 while ( $str =~ - s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)([^?]*?)\?=\1([^?]*?\?=)/$1$2$3/ ) - ; # Concat consecutive QP encoded mime headers - # Fixes breaking inside multi-byte characters - - $str =~ s{ - =\? # begin encoded word - ([-0-9A-Za-z_]+) # charset (encoding) - (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) - \?([QqBb])\? # delimiter - (.*?) # Base64-encodede contents - \?= # end encoded word - }{ - if (uc($2) eq 'B'){ - $obj->{decode_b} or croak qq(MIME "B" unsupported); - decode_b($1, $3, $chk); - } elsif (uc($2) eq 'Q'){ - $obj->{decode_q} or croak qq(MIME "Q" unsupported); - decode_q($1, $3, $chk); - } else { - croak qq(MIME "$2" encoding is nonexistent!); - } - }egox; - $_[1] = $str if $chk; - return $str; + $str =~ s/(?:\r\n|[\r\n])([ \t])/$1/gos; + + # decode each line separately + my @input = split /(\r\n|\r|\n)/o, $str; + my $output = substr($str, 0, 0); # to propagate taintedness + + while ( @input ) { + + my $line = shift @input; + my $sep = shift @input; + + # in strict mode encoded words must be always separated by spaces or tabs + # except in comments when separator between words and comment round brackets can be omitted + my $re_word_begin = $STRICT_DECODE ? qr/(?:[ \t\n]|\A)\(?/ : qr//; + my $re_word_sep = $STRICT_DECODE ? qr/[ \t]+/ : qr/\s*/; + my $re_word_end = $STRICT_DECODE ? qr/\)?(?:[ \t\n]|\z)/ : qr//; + + # concat consecutive encoded mime words with same charset, language and encoding + # fixes breaking inside multi-byte characters + 1 while $line =~ s/($re_word_begin)$re_capture_encoded_word$re_word_sep=\?\2\3\?\4\?($re_encoded_text)\?=(?=$re_word_end)/$1=\?$2$3\?$4\?$5$6\?=/; + + $line =~ s{($re_word_begin)((?:$re_encoded_word$re_word_sep)*$re_encoded_word)(?=$re_word_end)}{ + my $begin = $1; + my $words = $2; + $words =~ s{$re_capture_encoded_word$re_word_sep?}{ + if (uc($3) eq 'B') { + $obj->{decode_b} or croak qq(MIME "B" unsupported); + decode_b($1, $4, $chk); + } elsif (uc($3) eq 'Q') { + $obj->{decode_q} or croak qq(MIME "Q" unsupported); + decode_q($1, $4, $chk); + } else { + croak qq(MIME "$3" encoding is nonexistent!); + } + }eg; + $begin . $words; + }eg; + + $output .= $line; + $output .= $sep if defined $sep; + + } + + $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok + return $output; } sub decode_b { - my $enc = shift; - my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); - my $db64 = decode_base64(shift); - my $chk = shift; + my ( $enc, $b, $chk ) = @_; + my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); + # MIME::Base64::decode_base64 ignores everything after a '=' padding character + # split string after each sequence of padding characters and decode each substring + my $db64 = join('', map { decode_base64($_) } split /(?<==)(?=[^=])/, $b); return $d->name eq 'utf8' ? Encode::decode_utf8($db64) : $d->decode( $db64, $chk || Encode::FB_PERLQQ ); @@ -92,102 +119,92 @@ sub decode_q { : $d->decode( $q, $chk || Encode::FB_PERLQQ ); } -my $especials = - join( '|' => map { quotemeta( chr($_) ) } - unpack( "C*", qq{()<>,;:"'/[]?=} ) ); - -my $re_encoded_word = qr{ - =\? # begin encoded word - (?:[-0-9A-Za-z_]+) # charset (encoding) - (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) - \?(?:[QqBb])\? # delimiter - (?:.*?) # Base64-encodede contents - \?= # end encoded word -}xo; - -my $re_especials = qr{$re_encoded_word|$especials}xo; - -# cf: -# https://rt.cpan.org/Ticket/Display.html?id=88717 -# https://www.ietf.org/rfc/rfc0822.txt -my $re_linear_white_space = qr{(?:[ \t]|\r\n?)}; - sub encode($$;$) { my ( $obj, $str, $chk ) = @_; - my @line = (); - for my $line ( split /\r\n|[\r\n]/o, $str ) { - my ( @word, @subline ); - if ($line =~ /\A([\w\-]+:\s+)(.*)\z/o) { - push @word, $1, $obj->_encode($2); # "X-Header-Name: ..." + $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok + return $obj->_fold_line($obj->_encode_line($str)); +} + +sub _fold_line { + my ( $obj, $line ) = @_; + my $bpl = $obj->{bpl}; + my $output = substr($line, 0, 0); # to propagate taintedness + + while ( length $line ) { + if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) { + $output .= $1; + $output .= "\r\n" . $2 if length $line; + } elsif ( $line =~ s/(\s)(.*)$// ) { + $output .= $line; + $line = $2; + $output .= "\r\n" . $1 if length $line; } else { - push @word, $obj->_encode($line); # anything else + $output .= $line; + last; } - my $subline = ''; - for my $word (@word) { - use bytes (); - if ( bytes::length($subline) + bytes::length($word) > - $obj->{bpl} - 1 ) - { - push @subline, $subline; - $subline = ''; - } - $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/); - $subline .= $word; - } - length($subline) and push @subline, $subline; - push @line, join( "\n " => grep !/^$/, @subline ); } - $_[1] = '' if $chk; - return (substr($str, 0, 0) . join( "\n", @line )); + + return $output; } use constant HEAD => '=?UTF-8?'; use constant TAIL => '?='; -use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, }; +use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, B_len => \&_encode_b_len, Q_len => \&_encode_q_len }; -sub _encode { +sub _encode_line { my ( $o, $str ) = @_; my $enc = $o->{encode}; + my $enc_len = $enc . '_len'; my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) ); - # to coerce a floating-point arithmetics, the following contains - # .0 in numbers -- dankogai - $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0; my @result = (); my $chunk = ''; while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) { - use bytes (); - if ( bytes::length($chunk) + bytes::length($chr) > $llen ) { + if ( SINGLE->{$enc_len}($chunk . $chr) > $llen ) { push @result, SINGLE->{$enc}($chunk); $chunk = ''; } $chunk .= $chr; } length($chunk) and push @result, SINGLE->{$enc}($chunk); - return @result; + return join(' ', @result); } sub _encode_b { HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL; } +sub _encode_b_len { + my ( $chunk ) = @_; + use bytes (); + return bytes::length($chunk) * 4 / 3; +} + +my $valid_q_chars = '0-9A-Za-z !*+\-/'; + sub _encode_q { - my $chunk = shift; + my ( $chunk ) = @_; $chunk = encode_utf8($chunk); - $chunk =~ s{ - ([^0-9A-Za-z]) - }{ - join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) - }egox; + $chunk =~ s{([^$valid_q_chars])}{ + join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) + }egox; + $chunk =~ s/ /_/go; return HEAD . 'Q?' . $chunk . TAIL; } +sub _encode_q_len { + my ( $chunk ) = @_; + use bytes (); + my $valid_count =()= $chunk =~ /[$valid_q_chars]/sgo; + return ( bytes::length($chunk) - $valid_count ) * 3 + $valid_count; +} + 1; __END__ =head1 NAME -Encode::MIME::Header -- MIME 'B' and 'Q' header encoding +Encode::MIME::Header -- MIME 'B' and 'Q' encoding for unstructured header =head1 SYNOPSIS @@ -197,7 +214,8 @@ Encode::MIME::Header -- MIME 'B' and 'Q' header encoding =head1 ABSTRACT -This module implements RFC 2047 Mime Header Encoding. There are 3 +This module implements RFC 2047 MIME encoding for unstructured header. +It cannot be used for structured headers like From or To. There are 3 variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The difference is described below @@ -222,6 +240,25 @@ line. =head1 BUGS +Before version 2.83 this module had broken both decoder and encoder. +Encoder inserted additional spaces, incorrectly encoded input data +and produced invalid MIME strings. Decoder lot of times discarded +white space characters, incorrectly interpreted data or decoded +Base64 string as Quoted-Printable. + +As of version 2.83 encoder should be fully compliant of RFC 2047. +Due to bugs in previous versions of encoder, decoder is by default in +less strict compatible mode. It should be able to decode strings +encoded by pre 2.83 version of this module. But this default mode is +not correct according to RFC 2047. + +In default mode decoder try to decode every substring which looks like +MIME encoded data. So it means that MIME data does not need to be +separated by white space. To enforce correct strict mode, set package +variable $Encode::MIME::Header::STRICT_DECODE to 1, e.g. by localizing: + +C<require Encode::MIME::Header; local $Encode::MIME::Header::STRICT_DECODE = 1;> + It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP? and =?ISO-8859-1?= but that makes the implementation too complicated. These days major mail agents all support =?UTF-8? so I think it is diff --git a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm index 86955c83f1..86e66c371c 100644 --- a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm +++ b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm @@ -6,7 +6,7 @@ use warnings; use parent qw(Encode::MIME::Header); $Encode::Encoding{'MIME-Header-ISO_2022_JP'} = - bless { encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => + bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => __PACKAGE__; use constant HEAD => '=?ISO-2022-JP?B?'; @@ -14,7 +14,7 @@ use constant TAIL => '?='; use Encode::CJKConstants qw(%RE); -our $VERSION = do { my @r = ( q$Revision: 1.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; # I owe the below codes totally to # Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64 diff --git a/cpan/Encode/lib/Encode/Supported.pod b/cpan/Encode/lib/Encode/Supported.pod index c731509a81..b23f6ca83e 100644 --- a/cpan/Encode/lib/Encode/Supported.pod +++ b/cpan/Encode/lib/Encode/Supported.pod @@ -367,7 +367,7 @@ Unicode character should belong). Not very popular. Needs CNS 11643-1 and -2 which are not available in this module. CNS 11643 is supported (via euc-tw) in Encode::HanExtra. -Autrijus Tang may add support for this encoding in his module in future. +Audrey Tang may add support for this encoding in her module in future. =item Various HP-UX encodings diff --git a/cpan/Encode/t/encoding-locale.t b/cpan/Encode/t/encoding-locale.t index 1153b8ed0a..7a305a0dcf 100644 --- a/cpan/Encode/t/encoding-locale.t +++ b/cpan/Encode/t/encoding-locale.t @@ -14,8 +14,9 @@ use Encode qw<find_encoding>; my $locale_encoding = encoding::_get_locale_encoding; SKIP: { - is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value') - or skip 'no locale encoding found', 1; + defined $locale_encoding or skip 'no locale encoding found', 3; + + is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value'); my $enc = find_encoding($locale_encoding); ok(defined $enc, 'encoding returned is supported') diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t index b031aa4010..a9e6086129 100644 --- a/cpan/Encode/t/mime-header.t +++ b/cpan/Encode/t/mime-header.t @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 2.8 2016/01/25 14:54:13 dankogai Exp dankogai $ +# $Id: mime-header.t,v 2.12 2016/04/11 07:17:02 dankogai Exp dankogai $ # This script is written in utf8 # BEGIN { @@ -19,114 +19,128 @@ BEGIN { $| = 1; } -no utf8; - use strict; -#use Test::More qw(no_plan); -use Test::More tests => 14; -use_ok("Encode::MIME::Header"); - -my $eheader =<<'EOS'; -From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu> -To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk> -CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be> -Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= - =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= -EOS - -my $dheader=<<"EOS"; -From: Keith Moore <moore\@cs.utk.edu> -To: Keld J\xF8rn Simonsen <keld\@dkuug.dk> -CC: Andr\xE9 Pirard <PIRARD\@vm1.ulg.ac.be> -Subject: If you can read this you understand the example. -EOS - -is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)"); use utf8; +use charnames ":full"; -my $uheader =<<'EOS'; -From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu> -To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk> -CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be> -Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= - =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= -EOS - -is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)"); - -my $lheader =<<'EOS'; -From: =?US-ASCII*en-US?Q?Keith_Moore?= <moore@cs.utk.edu> -To: =?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk> -CC: =?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be> -Subject: =?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= - =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= -EOS - -is(Encode::decode('MIME-Header', $lheader), $dheader, "decode language tag (RFC2231)"); - - -$dheader=<<'EOS'; -From: 小飼 弾 <dankogai@dan.co.jp> -To: dankogai@dan.co.jp (小飼=Kogai, 弾=Dan) -Subject: 漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか? -EOS - -my $bheader =<<'EOS'; -From: =?UTF-8?B?5bCP6aO8IOW8viA8ZGFua29nYWlAZGFuLmNvLmpwPg==?= -To: =?UTF-8?B?ZGFua29nYWlAZGFuLmNvLmpwICjlsI/po7w9S29nYWksIOW8vj1EYW4p?= -Subject: - =?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?= - =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?= - =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?= - =?UTF-8?B?77yf?= -EOS +use Test::More tests => 130; +use_ok("Encode::MIME::Header"); -my $qheader=<<'EOS'; -From: =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=20=E5=BC=BE=20=3Cdankogai=40?= - =?UTF-8?Q?dan=2Eco=2Ejp=3E?= -To: =?UTF-8?Q?dankogai=40dan=2Eco=2Ejp=20=28?= - =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=3DKogai=2C=20=E5=BC=BE=3DDan?= =?UTF-8?Q?=29?= -Subject: - =?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?= - =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?= - =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?= - =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?= - =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?= - =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C?= - =?UTF-8?Q?=E3=82=8B=E3=81=AE=E3=81=8B=EF=BC=9F?= -EOS +my @decode_tests = ( + # RFC2047 p.5 + "=?iso-8859-1?q?this=20is=20some=20text?=" => "this is some text", + # RFC2047 p.10 + "=?US-ASCII?Q?Keith_Moore?=" => "Keith Moore", + "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen", + "=?ISO-8859-1?Q?Andr=E9?= Pirard" => "André Pirard", + "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.", + "=?ISO-8859-1?Q?Olle_J=E4rnefors?=" => "Olle Järnefors", + "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=" => "Patrik Fältström", + # RFC2047 p.11 + "(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" => "(םולש ןב ילטפנ)", + "(=?ISO-8859-1?Q?a?=)" => "(a)", + "(=?ISO-8859-1?Q?a?= b)" => "(a b)", + "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)", + "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)", + "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)" => "(ab)", + # RFC2047 p.12 + "(=?ISO-8859-1?Q?a_b?=)" => '(a b)', + "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)" => "(a b)", + # RFC2231 p.6 + "=?US-ASCII*EN?Q?Keith_Moore?=" => "Keith Moore", + # others + "=?US-ASCII*en-US?Q?Keith_Moore?=" => "Keith Moore", + "=?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen", + "=?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard" => "André Pirard", + "=?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.", + # RT67569 + "foo =?us-ascii?q?bar?=" => "foo bar", + "foo\r\n =?us-ascii?q?bar?=" => "foo bar", + "=?us-ascii?q?foo?= bar" => "foo bar", + "=?us-ascii?q?foo?=\r\n bar" => "foo bar", + "foo bar" => "foo bar", + "foo\r\n bar" => "foo bar", + "=?us-ascii?q?foo?= =?us-ascii?q?bar?=" => "foobar", + "=?us-ascii?q?foo?=\r\n =?us-ascii?q?bar?=" => "foobar", + "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=", + "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=", + # RT40027 + "a: b\r\n c" => "a: b c", + # RT104422 + "=?utf-8?Q?pre?= =?utf-8?B?IGZvbw==?=\r\n =?utf-8?Q?bar?=" => "pre foobar", +); + +my @decode_default_tests = ( + @decode_tests, + '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar', + '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"foo <bar@baz.foo> bar"', + "=?us-ascii?q?foo?==?us-ascii?q?bar?=" => "foobar", + "foo=?us-ascii?q?bar?=" => "foobar", + "foo =?us-ascii?q?=20?==?us-ascii?q?bar?=" => "foo bar", + # Encode::MIME::Header pre 2.83 + "[=?UTF-8?B?ZsOzcnVt?=]=?UTF-8?B?IHNwcsOhdmE=?=" => "[fórum] správa", + "test:=?UTF-8?B?IHNwcsOhdmE=?=" => "test: správa", + "=?UTF-8?B?dMOpc3Q=?=:=?UTF-8?B?IHNwcsOhdmE=?=", "tést: správa", +); + +my @decode_strict_tests = ( + @decode_tests, + '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar', + '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="', +); + +my @encode_tests = ( + "小飼 弾" => "=?UTF-8?B?5bCP6aO8IOW8vg==?=", "=?UTF-8?Q?=E5=B0=8F=E9=A3=BC_=E5=BC=BE?=", + "漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?" => "=?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=\r\n =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=\r\n =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=\r\n =?UTF-8?B?77yf?=", "=?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=\r\n =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=\r\n =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=\r\n =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=\r\n =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=\r\n =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C=E3=82=8B?=\r\n =?UTF-8?Q?=E3=81=AE=E3=81=8B=EF=BC=9F?=", + # double encode + "What is =?UTF-8?B?w4RwZmVs?= ?" => "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=", "=?UTF-8?Q?What_is_=3D=3FUTF-8=3FB=3Fw4RwZmVs=3F=3D_=3F?=", + # pound 1024 + "\N{POUND SIGN}1024" => "=?UTF-8?B?wqMxMDI0?=", "=?UTF-8?Q?=C2=A31024?=", + # latin1 characters + "\x{fc}" => "=?UTF-8?B?w7w=?=", "=?UTF-8?Q?=C3=BC?=", + # RT42627 + Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0") => "=?UTF-8?B?wqN4eHh4eHh4eHh4eHh4eHh4eHh4MA==?=", "=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx0?=", + # RT87831 + "0" => "=?UTF-8?B?MA==?=", "=?UTF-8?Q?0?=", + # RT88717 + "Hey foo\x{2024}bar:whee" => "=?UTF-8?B?SGV5IGZvb+KApGJhcjp3aGVl?=", "=?UTF-8?Q?Hey_foo=E2=80=A4bar=3Awhee?=", + # valid q chars + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hpams=?=\r\n =?UTF-8?B?bG1ub3BxcnN0dXZ3eHl6ICEqKy0v?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=", + # invalid q chars + "." => "=?UTF-8?B?Lg==?=", "=?UTF-8?Q?=2E?=", + "," => "=?UTF-8?B?LA==?=", "=?UTF-8?Q?=2C?=", +); + +sub info { + my ($str) = @_; + $str = Encode::encode_utf8($str); + $str =~ s/\r/\\r/gs; + $str =~ s/\n/\\n/gs; + return $str; +} -is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B"); -is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q"); -is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B"); -is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q"); +my @splice; -$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?"; -$bheader = "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?="; -$qheader = "=?UTF-8?Q?What=20is=20=3D=3FUTF=2D8=3FB=3Fw4R?=" - . "\n " . "=?UTF-8?Q?wZmVs=3F=3D=20=3F?="; -is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B"); -is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q"); -{ - # From: Dave Evans <dave@rudolf.org.uk> - # Subject: Bug in Encode::MIME::Header - # Message-Id: <3F43440B.7060606@rudolf.org.uk> - use charnames ":full"; - my $pound_1024 = "\N{POUND SIGN}1024"; - is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=', - 'pound 1024'); +@splice = @encode_tests; +while (my ($d, $b, $q) = splice @splice, 0, 3) { + is Encode::encode('MIME-Header', $d) => $b, info("encode default: $d => $b"); + is Encode::encode('MIME-B', $d) => $b, info("encode base64: $d => $b"); + is Encode::encode('MIME-Q', $d) => $q, info("encode qp: $d => $q"); + is Encode::decode('MIME-B', $b) => $d, info("decode base64: $b => $d"); + is Encode::decode('MIME-Q', $q) => $d, info("decode qp: $b => $d"); } -is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters'); +@splice = @decode_default_tests; +while (my ($e, $d) = splice @splice, 0, 2) { + is Encode::decode('MIME-Header', $e) => $d, info("decode default: $e => $d"); +} -# RT42627 +local $Encode::MIME::Header::STRICT_DECODE = 1; -my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0"); -is(Encode::encode('MIME-Q', $rt42627), - '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?= =?UTF-8?Q?0?=', - 'MIME-Q encoding does not truncate trailing zeros'); +@splice = @decode_strict_tests; +while (my ($e, $d) = splice @splice, 0, 2) { + is Encode::decode('MIME-Header', $e) => $d, info("decode strict: $e => $d"); +} -# RT87831 -is(Encode::encode('MIME-Header', '0'), '=?UTF-8?B?MA==?=', 'RT87831'); -__END__; +__END__ |