summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Crane <arc@cpan.org>2017-07-18 18:06:46 +0100
committerAaron Crane <arc@cpan.org>2017-07-18 18:06:46 +0100
commit3f60a9307162888df8e8e13b2361a3b8380c8744 (patch)
treeed7e9ea407d62779e1a440ea7b905da5d32522fd
parent589c97f41d373f2e7205a4ffbcb7a639635b7bda (diff)
downloadperl-3f60a9307162888df8e8e13b2361a3b8380c8744.tar.gz
Import Encode-2.92 from CPAN
This also permits removing the local customisation for the previous version.
-rw-r--r--MANIFEST6
-rwxr-xr-xPorting/Maintainers.pl3
-rw-r--r--cpan/Encode/Encode.pm277
-rw-r--r--cpan/Encode/Encode.xs324
-rw-r--r--cpan/Encode/Makefile.PL100
-rw-r--r--cpan/Encode/Unicode/Unicode.pm14
-rw-r--r--cpan/Encode/Unicode/Unicode.xs6
-rw-r--r--cpan/Encode/bin/enc2xs14
-rw-r--r--cpan/Encode/bin/ucmlint7
-rw-r--r--cpan/Encode/encoding.pm21
-rw-r--r--cpan/Encode/lib/Encode/Alias.pm8
-rw-r--r--cpan/Encode/lib/Encode/CN/HZ.pm4
-rw-r--r--cpan/Encode/lib/Encode/Encoding.pm24
-rw-r--r--cpan/Encode/lib/Encode/GSM0338.pm11
-rw-r--r--cpan/Encode/lib/Encode/Guess.pm5
-rw-r--r--cpan/Encode/lib/Encode/JP/JIS7.pm7
-rw-r--r--cpan/Encode/lib/Encode/KR/2022_KR.pm4
-rw-r--r--cpan/Encode/lib/Encode/MIME/Header.pm23
-rw-r--r--cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm6
-rw-r--r--cpan/Encode/lib/Encode/Unicode/UTF7.pm9
-rw-r--r--cpan/Encode/t/CJKT.t6
-rw-r--r--cpan/Encode/t/enc_data.t2
-rw-r--r--cpan/Encode/t/enc_eucjp.t23
-rw-r--r--cpan/Encode/t/enc_module.t2
-rw-r--r--cpan/Encode/t/enc_utf8.t23
-rw-r--r--cpan/Encode/t/fallback.t44
-rw-r--r--cpan/Encode/t/guess.t6
-rw-r--r--cpan/Encode/t/jperl.t2
-rw-r--r--cpan/Encode/t/mime-header.t8
-rw-r--r--cpan/Encode/t/truncated_utf8.t55
-rw-r--r--cpan/Encode/t/undef.t25
-rw-r--r--cpan/Encode/t/use-Encode-Alias.t8
-rw-r--r--cpan/Encode/t/utf8messages.t33
-rw-r--r--cpan/Encode/t/whatwg-aliases.json455
-rw-r--r--cpan/Encode/t/whatwg-aliases.t66
-rw-r--r--t/porting/customized.dat1
36 files changed, 1210 insertions, 422 deletions
diff --git a/MANIFEST b/MANIFEST
index 01cb8b36a1..1cefc5c6b5 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -984,11 +984,17 @@ cpan/Encode/t/rt76824.t test script
cpan/Encode/t/rt85489.t test script
cpan/Encode/t/rt86327.t test script
cpan/Encode/t/taint.t
+cpan/Encode/t/truncated_utf8.t
+cpan/Encode/t/undef.t
cpan/Encode/t/unibench.pl benchmark script
cpan/Encode/t/Unicode.t test script
+cpan/Encode/t/use-Encode-Alias.t
+cpan/Encode/t/utf8messages.t
cpan/Encode/t/utf8ref.t test script
cpan/Encode/t/utf8strict.t test script
cpan/Encode/t/utf8warnings.t
+cpan/Encode/t/whatwg-aliases.json
+cpan/Encode/t/whatwg-aliases.t
cpan/Encode/TW/Makefile.PL Encode extension
cpan/Encode/TW/TW.pm Encode extension
cpan/Encode/ucm/8859-1.ucm Unicode Character Map
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index 7a703b7697..ccde06feab 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -386,9 +386,8 @@ use File::Glob qw(:case);
},
'Encode' => {
- 'DISTRIBUTION' => 'DANKOGAI/Encode-2.88.tar.gz',
+ 'DISTRIBUTION' => 'DANKOGAI/Encode-2.92.tar.gz',
'FILES' => q[cpan/Encode],
- 'CUSTOMIZED' => [ qw(Unicode/Unicode.pm) ],
},
'encoding::warnings' => {
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index 57b4292279..5a27c5990c 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,16 +1,21 @@
#
-# $Id: Encode.pm,v 2.88 2016/11/29 23:30:30 dankogai Exp dankogai $
+# $Id: Encode.pm,v 2.92 2017/07/18 07:15:29 dankogai Exp dankogai $
#
package Encode;
use strict;
use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.88 $ =~ /(\d+)/g;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
-use XSLoader ();
-XSLoader::load( __PACKAGE__, $VERSION );
+our $VERSION;
+BEGIN {
+ $VERSION = sprintf "%d.%02d", q$Revision: 2.92 $ =~ /(\d+)/g;
+ require XSLoader;
+ XSLoader::load( __PACKAGE__, $VERSION );
+}
use Exporter 5.57 'import';
+our @CARP_NOT = qw(Encode::Encoder);
+
# Public, encouraged API is exported by default
our @EXPORT = qw(
@@ -44,7 +49,10 @@ our %EXPORT_TAGS = (
our $ON_EBCDIC = ( ord("A") == 193 );
-use Encode::Alias;
+use Encode::Alias ();
+use Encode::MIME::Name;
+
+use Storable;
# Make a %Encoding package variable to allow a certain amount of cheating
our %Encoding;
@@ -96,6 +104,9 @@ sub define_encoding {
my $alias = shift;
define_alias( $alias, $obj );
}
+ my $class = ref($obj);
+ push @Encode::CARP_NOT, $class unless grep { $_ eq $class } @Encode::CARP_NOT;
+ push @Encode::Encoding::CARP_NOT, $class unless grep { $_ eq $class } @Encode::Encoding::CARP_NOT;
return $obj;
}
@@ -127,6 +138,15 @@ sub getEncoding {
return;
}
+# HACK: These two functions must be defined in Encode and because of
+# cyclic dependency between Encode and Encode::Alias, Exporter does not work
+sub find_alias {
+ goto &Encode::Alias::find_alias;
+}
+sub define_alias {
+ goto &Encode::Alias::define_alias;
+}
+
sub find_encoding($;$) {
my ( $name, $skip_external ) = @_;
return __PACKAGE__->getEncoding( $name, $skip_external );
@@ -134,8 +154,6 @@ sub find_encoding($;$) {
sub find_mime_encoding($;$) {
my ( $mime_name, $skip_external ) = @_;
- eval { require Encode::MIME::Name; };
- $@ and return;
my $name = Encode::MIME::Name::get_encode_name( $mime_name );
return find_encoding( $name, $skip_external );
}
@@ -149,8 +167,6 @@ sub resolve_alias($) {
sub clone_encoding($) {
my $obj = find_encoding(shift);
ref $obj or return;
- eval { require Storable };
- $@ and return;
return Storable::dclone($obj);
}
@@ -182,7 +198,7 @@ sub encode($$;$) {
else {
$octets = $enc->encode( $string, $check );
}
- $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC() );
+ $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC );
return $octets;
}
*str2bytes = \&encode;
@@ -211,7 +227,7 @@ sub decode($$;$) {
else {
$string = $enc->decode( $octets, $check );
}
- $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() );
+ $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC );
return $string;
}
*bytes2str = \&decode;
@@ -278,133 +294,87 @@ sub decode_utf8($;$) {
$check ||= 0;
$utf8enc ||= find_encoding('utf8');
my $string = $utf8enc->decode( $octets, $check );
- $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() );
+ $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC );
return $string;
}
-# sub decode_utf8($;$) {
-# my ( $str, $check ) = @_;
-# return $str if is_utf8($str);
-# if ($check) {
-# return decode( "utf8", $str, $check );
-# }
-# else {
-# return decode( "utf8", $str );
-# return $str;
-# }
-# }
-
-predefine_encodings(1);
-
-#
-# This is to restore %Encoding if really needed;
-#
-
-sub predefine_encodings {
- require Encode::Encoding;
- no warnings 'redefine';
- my $use_xs = shift;
- if ($ON_EBCDIC) {
-
- # was in Encode::UTF_EBCDIC
- package Encode::UTF_EBCDIC;
- push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding';
- *decode = sub {
- my ( undef, $str, $chk ) = @_;
- my $res = '';
- for ( my $i = 0 ; $i < length($str) ; $i++ ) {
- $res .=
- chr(
- utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) )
- );
- }
- $_[1] = '' if $chk;
- return $res;
- };
- *encode = sub {
- my ( undef, $str, $chk ) = @_;
- my $res = '';
- for ( my $i = 0 ; $i < length($str) ; $i++ ) {
- $res .=
- chr(
- utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) )
- );
- }
- $_[1] = '' if $chk;
- return $res;
- };
- $Encode::Encoding{Unicode} =
- bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC";
+onBOOT;
+
+if ($ON_EBCDIC) {
+ package Encode::UTF_EBCDIC;
+ use parent 'Encode::Encoding';
+ my $obj = bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC";
+ Encode::define_encoding($obj, 'Unicode');
+ sub decode {
+ my ( undef, $str, $chk ) = @_;
+ my $res = '';
+ for ( my $i = 0 ; $i < length($str) ; $i++ ) {
+ $res .=
+ chr(
+ utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) )
+ );
+ }
+ $_[1] = '' if $chk;
+ return $res;
}
- else {
-
- package Encode::Internal;
- push @Encode::Internal::ISA, 'Encode::Encoding';
- *decode = sub {
- my ( undef, $str, $chk ) = @_;
- utf8::upgrade($str);
- $_[1] = '' if $chk;
- return $str;
- };
- *encode = \&decode;
- $Encode::Encoding{Unicode} =
- bless { Name => "Internal" } => "Encode::Internal";
+ sub encode {
+ my ( undef, $str, $chk ) = @_;
+ my $res = '';
+ for ( my $i = 0 ; $i < length($str) ; $i++ ) {
+ $res .=
+ chr(
+ utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) )
+ );
+ }
+ $_[1] = '' if $chk;
+ return $res;
}
- {
- # https://rt.cpan.org/Public/Bug/Display.html?id=103253
- package Encode::XS;
- push @Encode::XS::ISA, 'Encode::Encoding';
+} else {
+ package Encode::Internal;
+ use parent 'Encode::Encoding';
+ my $obj = bless { Name => "Internal" } => "Encode::Internal";
+ Encode::define_encoding($obj, 'Unicode');
+ sub decode {
+ my ( undef, $str, $chk ) = @_;
+ utf8::upgrade($str);
+ $_[1] = '' if $chk;
+ return $str;
}
- {
+ *encode = \&decode;
+}
- # was in Encode::utf8
- package Encode::utf8;
- push @Encode::utf8::ISA, 'Encode::Encoding';
+{
+ # https://rt.cpan.org/Public/Bug/Display.html?id=103253
+ package Encode::XS;
+ use parent 'Encode::Encoding';
+}
- #
- if ($use_xs) {
- Encode::DEBUG and warn __PACKAGE__, " XS on";
- *decode = \&decode_xs;
- *encode = \&encode_xs;
- }
- else {
- Encode::DEBUG and warn __PACKAGE__, " XS off";
- *decode = sub {
- my ( undef, $octets, $chk ) = @_;
- my $str = Encode::decode_utf8($octets);
- if ( defined $str ) {
- $_[1] = '' if $chk;
- return $str;
- }
- return undef;
- };
- *encode = sub {
- my ( undef, $string, $chk ) = @_;
- my $octets = Encode::encode_utf8($string);
- $_[1] = '' if $chk;
- return $octets;
- };
+{
+ package Encode::utf8;
+ use parent 'Encode::Encoding';
+ my %obj = (
+ 'utf8' => { Name => 'utf8' },
+ 'utf-8-strict' => { Name => 'utf-8-strict', strict_utf8 => 1 }
+ );
+ for ( keys %obj ) {
+ bless $obj{$_} => __PACKAGE__;
+ Encode::define_encoding( $obj{$_} => $_ );
+ }
+ sub cat_decode {
+ # ($obj, $dst, $src, $pos, $trm, $chk)
+ # currently ignores $chk
+ my ( undef, undef, undef, $pos, $trm ) = @_;
+ my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
+ use bytes;
+ if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) {
+ $$rdst .=
+ substr( $$rsrc, $pos, $npos - $pos + length($trm) );
+ $$rpos = $npos + length($trm);
+ return 1;
}
- *cat_decode = sub { # ($obj, $dst, $src, $pos, $trm, $chk)
- # currently ignores $chk
- my ( undef, undef, undef, $pos, $trm ) = @_;
- my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ];
- use bytes;
- if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) {
- $$rdst .=
- substr( $$rsrc, $pos, $npos - $pos + length($trm) );
- $$rpos = $npos + length($trm);
- return 1;
- }
- $$rdst .= substr( $$rsrc, $pos );
- $$rpos = length($$rsrc);
- return '';
- };
- $Encode::Encoding{utf8} =
- bless { Name => "utf8" } => "Encode::utf8";
- $Encode::Encoding{"utf-8-strict"} =
- bless { Name => "utf-8-strict", strict_utf8 => 1 }
- => "Encode::utf8";
+ $$rdst .= substr( $$rsrc, $pos );
+ $$rpos = length($$rsrc);
+ return '';
}
}
@@ -516,14 +486,16 @@ ISO-8859-1, also known as Latin1:
$octets = encode("iso-8859-1", $string);
-B<CAVEAT>: When you run C<$octets = encode("utf8", $string)>, then
+B<CAVEAT>: When you run C<$octets = encode("UTF-8", $string)>, then
$octets I<might not be equal to> $string. Though both contain the
same data, the UTF8 flag for $octets is I<always> off. When you
encode anything, the UTF8 flag on the result is always off, even when it
-contains a completely valid utf8 string. See L</"The UTF8 flag"> below.
+contains a completely valid UTF-8 string. See L</"The UTF8 flag"> below.
If the $string is C<undef>, then C<undef> is returned.
+C<str2bytes> may be used as an alias for C<encode>.
+
=head3 decode
$string = decode(ENCODING, OCTETS[, CHECK])
@@ -544,13 +516,15 @@ internal format:
$string = decode("iso-8859-1", $octets);
-B<CAVEAT>: When you run C<$string = decode("utf8", $octets)>, then $string
+B<CAVEAT>: When you run C<$string = decode("UTF-8", $octets)>, then $string
I<might not be equal to> $octets. Though both contain the same data, the
UTF8 flag for $string is on. See L</"The UTF8 flag">
below.
If the $string is C<undef>, then C<undef> is returned.
+C<bytes2str> may be used as an alias for C<decode>.
+
=head3 find_encoding
[$obj =] find_encoding(ENCODING)
@@ -559,11 +533,11 @@ Returns the I<encoding object> corresponding to I<ENCODING>. Returns
C<undef> if no matching I<ENCODING> is find. The returned object is
what does the actual encoding or decoding.
- $utf8 = decode($name, $bytes);
+ $string = decode($name, $bytes);
is in fact
- $utf8 = do {
+ $string = do {
$obj = find_encoding($name);
croak qq(encoding "$name" not found) unless ref $obj;
$obj->decode($bytes);
@@ -575,8 +549,8 @@ You can therefore save time by reusing this object as follows;
my $enc = find_encoding("iso-8859-1");
while(<>) {
- my $utf8 = $enc->decode($_);
- ... # now do something with $utf8;
+ my $string = $enc->decode($_);
+ ... # now do something with $string;
}
Besides L</decode> and L</encode>, other methods are
@@ -624,13 +598,13 @@ and C<undef> on error.
B<CAVEAT>: The following operations may look the same, but are not:
- from_to($data, "iso-8859-1", "utf8"); #1
+ from_to($data, "iso-8859-1", "UTF-8"); #1
$data = decode("iso-8859-1", $data); #2
Both #1 and #2 make $data consist of a completely valid UTF-8 string,
but only #2 turns the UTF8 flag on. #1 is equivalent to:
- $data = encode("utf8", decode("iso-8859-1", $data));
+ $data = encode("UTF-8", decode("iso-8859-1", $data));
See L</"The UTF8 flag"> below.
@@ -655,7 +629,11 @@ followed by C<encode> as follows:
Equivalent to C<$octets = encode("utf8", $string)>. The characters in
$string are encoded in Perl's internal format, and the result is returned
as a sequence of octets. Because all possible characters in Perl have a
-(loose, not strict) UTF-8 representation, this function cannot fail.
+(loose, not strict) utf8 representation, this function cannot fail.
+
+B<WARNING>: do not use this function for data exchange as it can produce
+not strict utf8 $octets! For strictly valid UTF-8 output use
+C<$octets = encode("UTF-8", $string)>.
=head3 decode_utf8
@@ -663,11 +641,15 @@ as a sequence of octets. Because all possible characters in Perl have a
Equivalent to C<$string = decode("utf8", $octets [, CHECK])>.
The sequence of octets represented by $octets is decoded
-from UTF-8 into a sequence of logical characters.
-Because not all sequences of octets are valid UTF-8,
+from (loose, not strict) utf8 into a sequence of logical characters.
+Because not all sequences of octets are valid not strict utf8,
it is quite possible for this function to fail.
For CHECK, see L</"Handling Malformed Data">.
+B<WARNING>: do not use this function for data exchange as it can produce
+$string with not strict utf8 representation! For strictly valid UTF-8
+$string representation use C<$string = decode("UTF-8", $octets [, CHECK])>.
+
B<CAVEAT>: the input I<$octets> might be modified in-place depending on
what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be
left unchanged.
@@ -903,15 +885,14 @@ octets that represent the fallback character. For instance:
Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>.
-Even the fallback for C<decode> must return octets, which are
-then decoded with the character encoding that C<decode> accepts. So for
+Fallback for C<decode> must return decoded string (sequence of characters)
+and takes a list of ordinal values as its arguments. So for
example if you wish to decode octets as UTF-8, and use ISO-8859-15 as
a fallback for bytes that are not valid UTF-8, you could write
$str = decode 'UTF-8', $octets, sub {
- my $tmp = chr shift;
- from_to $tmp, 'ISO-8859-15', 'UTF-8';
- return $tmp;
+ my $tmp = join '', map chr, @_;
+ return decode 'ISO-8859-15', $tmp;
};
=head1 Defining Encodings
@@ -980,9 +961,9 @@ When you I<encode>, the resulting UTF8 flag is always B<off>.
When you I<decode>, the resulting UTF8 flag is B<on>--I<unless> you can
unambiguously represent data. Here is what we mean by "unambiguously".
-After C<$utf8 = decode("foo", $octet)>,
+After C<$str = decode("foo", $octet)>,
- When $octet is... The UTF8 flag in $utf8 is
+ When $octet is... The UTF8 flag in $str is
---------------------------------------------
In ASCII only (or EBCDIC only) OFF
In ISO-8859-1 ON
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index b5160d2516..6c077bec3a 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Encode.xs,v 2.39 2016/11/29 23:29:23 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.41 2017/06/10 17:23:50 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
@@ -35,17 +35,6 @@ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
#define SvIV_nomg SvIV
#endif
-#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE
-#else
-# define UTF8_ALLOW_STRICT 0
-#endif
-
-#define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \
- ~(UTF8_ALLOW_CONTINUATION | \
- UTF8_ALLOW_NON_CONTINUATION | \
- UTF8_ALLOW_LONG))
-
static void
Encode_XSEncoding(pTHX_ encode_t * enc)
{
@@ -114,24 +103,52 @@ utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify)
#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
+#define ERR_DECODE_STR_NOMAP "%s \"%s\" does not map to Unicode"
static SV *
do_fallback_cb(pTHX_ UV ch, SV *fallback_cb)
{
dSP;
int argc;
- SV *retval = newSVpv("",0);
+ SV *retval;
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVnv((UV)ch)));
+ XPUSHs(sv_2mortal(newSVuv(ch)));
PUTBACK;
argc = call_sv(fallback_cb, G_SCALAR);
SPAGAIN;
if (argc != 1){
croak("fallback sub must return scalar!");
}
- sv_catsv(retval, POPs);
+ retval = POPs;
+ SvREFCNT_inc(retval);
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ return retval;
+}
+
+static SV *
+do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb)
+{
+ dSP;
+ int argc;
+ STRLEN i;
+ SV *retval;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ for (i=0; i<slen; ++i)
+ XPUSHs(sv_2mortal(newSVuv(s[i])));
+ PUTBACK;
+ argc = call_sv(fallback_cb, G_SCALAR);
+ SPAGAIN;
+ if (argc != 1){
+ croak("fallback sub must return scalar!");
+ }
+ retval = POPs;
+ SvREFCNT_inc(retval);
PUTBACK;
FREETMPS;
LEAVE;
@@ -241,16 +258,22 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 *
goto ENCODE_SET_SRC;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ STRLEN sublen;
+ char *substr;
SV* subchar =
(fallback_cb != &PL_sv_undef)
? do_fallback_cb(aTHX_ ch, fallback_cb)
: newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" :
check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
"&#x%" UVxf ";", (UV)ch);
- SvUTF8_off(subchar); /* make sure no decoded string gets in */
+ substr = SvPV(subchar, sublen);
+ if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */
+ SvREFCNT_dec(subchar);
+ croak("Wide character");
+ }
sdone += slen + clen;
- ddone += dlen + SvCUR(subchar);
- sv_catsv(dst, subchar);
+ ddone += dlen + sublen;
+ sv_catpvn(dst, substr, sublen);
SvREFCNT_dec(subchar);
} else {
/* fallback char */
@@ -277,18 +300,21 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 *
}
if (check &
(ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ STRLEN sublen;
+ char *substr;
SV* subchar =
(fallback_cb != &PL_sv_undef)
? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb)
: newSVpvf("\\x%02" UVXf, (UV)s[slen]);
+ substr = SvPVutf8(subchar, sublen);
sdone += slen + 1;
- ddone += dlen + SvCUR(subchar);
- sv_catsv(dst, subchar);
+ ddone += dlen + sublen;
+ sv_catpvn(dst, substr, sublen);
SvREFCNT_dec(subchar);
} else {
sdone += slen + 1;
ddone += dlen + strlen(FBCHAR_UTF8);
- sv_catpv(dst, FBCHAR_UTF8);
+ sv_catpvn(dst, FBCHAR_UTF8, strlen(FBCHAR_UTF8));
}
}
/* settle variables when fallback */
@@ -382,7 +408,7 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
U8 *ptr = s;
bool overflowed = 0;
- uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len);
+ uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s));
len--;
s++;
@@ -401,7 +427,6 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
*rlen = s-ptr;
if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) {
- *rlen = 1;
return 0;
}
@@ -418,6 +443,8 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
int check;
U8 *d;
STRLEN dlen;
+ char esc[UTF8_MAXLEN * 6 + 1];
+ STRLEN i;
if (SvROK(check_sv)) {
/* croak("UTF-8 decoder doesn't support callback CHECK"); */
@@ -441,22 +468,24 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
continue;
}
+ uv = 0;
ulen = 1;
- if (UTF8_IS_START(*s)) {
+ if (! UTF8_IS_CONTINUATION(*s)) {
+ /* Not an invariant nor a continuation; must be a start byte. (We
+ * can't test for UTF8_IS_START as that excludes things like \xC0
+ * which are start bytes, but always lead to overlongs */
+
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
- if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) {
- const U8 *p = s + 1;
- for (; p < e; p++) {
- if (!UTF8_IS_CONTINUATION(*p)) {
- ulen = p-s;
- goto malformed_byte;
- }
- }
+ /* just calculate ulen, in pathological cases can be smaller then e-s */
+ if (e-s >= 2)
+ convert_utf8_multi_seq(s, e-s, &ulen);
+ else
+ ulen = 1;
+
+ if ((stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) && ulen == (STRLEN)(e-s))
break;
- }
- ulen = e-s;
goto malformed_byte;
}
@@ -475,44 +504,67 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
}
/* If we get here there is something wrong with alleged UTF-8 */
+ /* uv is used only when encoding */
malformed_byte:
- uv = (UV)*s;
- if (ulen == 0)
+ if (uv == 0)
+ uv = (UV)*s;
+ if (encode || ulen == 0)
ulen = 1;
malformed:
+ if (!encode && (check & (ENCODE_DIE_ON_ERR|ENCODE_WARN_ON_ERR|ENCODE_PERLQQ)))
+ for (i=0; i<ulen; ++i) sprintf(esc+4*i, "\\x%02X", s[i]);
if (check & ENCODE_DIE_ON_ERR){
if (encode)
- Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8");
+ Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8"));
else
- Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv);
+ Perl_croak(aTHX_ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc);
}
if (check & ENCODE_WARN_ON_ERR){
if (encode)
Perl_warner(aTHX_ packWARN(WARN_UTF8),
- ERR_ENCODE_NOMAP, uv, "utf8");
+ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8"));
else
Perl_warner(aTHX_ packWARN(WARN_UTF8),
- ERR_DECODE_NOMAP, "utf8", uv);
+ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc);
}
if (check & ENCODE_RETURN_ON_ERR) {
break;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar =
- (fallback_cb != &PL_sv_undef)
- ? do_fallback_cb(aTHX_ uv, fallback_cb)
- : newSVpvf(check & ENCODE_PERLQQ
- ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
- : check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
- : "&#x%" UVxf ";", uv);
- if (encode){
- SvUTF8_off(subchar); /* make sure no decoded string gets in */
- }
- dlen += SvCUR(subchar) - ulen;
+ STRLEN sublen;
+ char *substr;
+ SV* subchar;
+ if (encode) {
+ subchar =
+ (fallback_cb != &PL_sv_undef)
+ ? do_fallback_cb(aTHX_ uv, fallback_cb)
+ : newSVpvf(check & ENCODE_PERLQQ
+ ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}")
+ : check & ENCODE_HTMLCREF ? "&#%" UVuf ";"
+ : "&#x%" UVxf ";", uv);
+ substr = SvPV(subchar, sublen);
+ if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */
+ SvREFCNT_dec(subchar);
+ croak("Wide character");
+ }
+ } else {
+ if (fallback_cb != &PL_sv_undef) {
+ /* in decode mode we have sequence of wrong bytes */
+ subchar = do_bytes_fallback_cb(aTHX_ s, ulen, fallback_cb);
+ } else {
+ char *ptr = esc;
+ /* ENCODE_PERLQQ is already stored in esc */
+ if (check & (ENCODE_HTMLCREF|ENCODE_XMLCREF))
+ for (i=0; i<ulen; ++i) ptr += sprintf(ptr, ((check & ENCODE_HTMLCREF) ? "&#%u;" : "&#x%02X;"), s[i]);
+ subchar = newSVpvn(esc, strlen(esc));
+ }
+ substr = SvPVutf8(subchar, sublen);
+ }
+ dlen += sublen - ulen;
SvCUR_set(dst, d-(U8 *)SvPVX(dst));
*SvEND(dst) = '\0';
- sv_catsv(dst, subchar);
+ sv_catpvn(dst, substr, sublen);
SvREFCNT_dec(subchar);
d = (U8 *) SvGROW(dst, dlen) + SvCUR(dst);
} else {
@@ -539,7 +591,7 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_
PROTOTYPES: DISABLE
void
-Method_decode_xs(obj,src,check_sv = &PL_sv_no)
+Method_decode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
@@ -551,14 +603,13 @@ PREINIT:
bool renewed = 0;
int check;
bool modify;
+ dSP;
INIT:
SvGETMAGIC(src);
SvGETMAGIC(check_sv);
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
modify = (check && !(check & ENCODE_LEAVE_SRC));
-CODE:
-{
- dSP;
+PPCODE:
if (!SvOK(src))
XSRETURN_UNDEF;
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
@@ -600,10 +651,9 @@ CODE:
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
ST(0) = dst;
XSRETURN(1);
-}
void
-Method_encode_xs(obj,src,check_sv = &PL_sv_no)
+Method_encode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
@@ -619,8 +669,7 @@ INIT:
SvGETMAGIC(check_sv);
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
modify = (check && !(check & ENCODE_LEAVE_SRC));
-CODE:
-{
+PPCODE:
if (!SvOK(src))
XSRETURN_UNDEF;
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
@@ -673,20 +722,19 @@ CODE:
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
ST(0) = dst;
XSRETURN(1);
-}
MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_
-PROTOTYPES: ENABLE
+PROTOTYPES: DISABLE
-void
+SV *
Method_renew(obj)
SV * obj
CODE:
-{
PERL_UNUSED_VAR(obj);
- XSRETURN(1);
-}
+ RETVAL = newSVsv(obj);
+OUTPUT:
+ RETVAL
int
Method_renewed(obj)
@@ -697,17 +745,19 @@ CODE:
OUTPUT:
RETVAL
-void
+SV *
Method_name(obj)
SV * obj
+PREINIT:
+ encode_t *enc;
+INIT:
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
-{
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0])));
- XSRETURN(1);
-}
+ RETVAL = newSVpvn(enc->name[0], strlen(enc->name[0]));
+OUTPUT:
+ RETVAL
-void
+bool
Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no)
SV * obj
SV * dst
@@ -734,7 +784,6 @@ INIT:
enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
offset = (STRLEN)SvIV(off);
CODE:
-{
if (!SvOK(src))
XSRETURN_NO;
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
@@ -745,13 +794,9 @@ CODE:
sv_catsv(dst, tmp);
SvREFCNT_dec(tmp);
SvIV_set(off, (IV)offset);
- if (code == ENCODE_FOUND_TERM) {
- ST(0) = &PL_sv_yes;
- }else{
- ST(0) = &PL_sv_no;
- }
- XSRETURN(1);
-}
+ RETVAL = (code == ENCODE_FOUND_TERM);
+OUTPUT:
+ RETVAL
SV *
Method_decode(obj,src,check_sv = &PL_sv_no)
@@ -773,7 +818,6 @@ INIT:
modify = (check && !(check & ENCODE_LEAVE_SRC));
enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
-{
if (!SvOK(src))
XSRETURN_UNDEF;
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
@@ -782,7 +826,6 @@ CODE:
RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check,
NULL, Nullsv, NULL, fallback_cb);
SvUTF8_on(RETVAL);
-}
OUTPUT:
RETVAL
@@ -806,7 +849,6 @@ INIT:
modify = (check && !(check & ENCODE_LEAVE_SRC));
enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
-{
if (!SvOK(src))
XSRETURN_UNDEF;
s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
@@ -814,76 +856,51 @@ CODE:
utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify);
RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check,
NULL, Nullsv, NULL, fallback_cb);
-}
OUTPUT:
RETVAL
-void
+bool
Method_needs_lines(obj)
SV * obj
CODE:
-{
- /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
PERL_UNUSED_VAR(obj);
- ST(0) = &PL_sv_no;
- XSRETURN(1);
-}
+ RETVAL = FALSE;
+OUTPUT:
+ RETVAL
-void
+bool
Method_perlio_ok(obj)
SV * obj
PREINIT:
SV *sv;
CODE:
-{
- /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
- /* require_pv(PERLIO_FILENAME); */
-
PERL_UNUSED_VAR(obj);
- eval_pv("require PerlIO::encoding", 0);
- SPAGAIN;
-
- sv = get_sv("@", 0);
- if (SvTRUE(sv)) {
- ST(0) = &PL_sv_no;
- }else{
- ST(0) = &PL_sv_yes;
- }
- XSRETURN(1);
-}
+ sv = eval_pv("require PerlIO::encoding", 0);
+ RETVAL = SvTRUE(sv);
+OUTPUT:
+ RETVAL
-void
+SV *
Method_mime_name(obj)
SV * obj
PREINIT:
- SV *sv;
+ encode_t *enc;
+INIT:
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
-{
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- SV *retval;
- eval_pv("require Encode::MIME::Name", 0);
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
+ PUTBACK;
+ call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
SPAGAIN;
-
- sv = get_sv("@", 0);
- if (SvTRUE(sv)) {
- ST(0) = &PL_sv_undef;
- }else{
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0]))));
- PUTBACK;
- call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR);
- SPAGAIN;
- retval = newSVsv(POPs);
- PUTBACK;
- FREETMPS;
- LEAVE;
- /* enc->name[0] */
- ST(0) = retval;
- }
- XSRETURN(1);
-}
+ RETVAL = newSVsv(POPs);
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+OUTPUT:
+ RETVAL
MODULE = Encode PACKAGE = Encode
@@ -892,10 +909,11 @@ PROTOTYPES: ENABLE
I32
_bytes_to_utf8(sv, ...)
SV * sv
+PREINIT:
+ SV * encoding;
+INIT:
+ encoding = items == 2 ? ST(1) : Nullsv;
CODE:
-{
- SV * encoding = items == 2 ? ST(1) : Nullsv;
-
if (encoding)
RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
else {
@@ -909,18 +927,19 @@ CODE:
Safefree(converted); /* ... so free it */
RETVAL = len;
}
-}
OUTPUT:
RETVAL
I32
_utf8_to_bytes(sv, ...)
SV * sv
+PREINIT:
+ SV * to;
+ SV * check;
+INIT:
+ to = items > 1 ? ST(1) : Nullsv;
+ check = items > 2 ? ST(2) : Nullsv;
CODE:
-{
- SV * to = items > 1 ? ST(1) : Nullsv;
- SV * check = items > 2 ? ST(2) : Nullsv;
-
if (to) {
RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to));
} else {
@@ -980,7 +999,6 @@ CODE:
RETVAL = (utf8_to_bytes(s, &len) ? len : 0);
}
}
-}
OUTPUT:
RETVAL
@@ -992,13 +1010,11 @@ PREINIT:
char *str;
STRLEN len;
CODE:
-{
SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */
str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */
RETVAL = SvUTF8(sv) ? TRUE : FALSE;
if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len)))
RETVAL = FALSE;
-}
OUTPUT:
RETVAL
@@ -1006,17 +1022,15 @@ SV *
_utf8_on(sv)
SV * sv
CODE:
-{
SvGETMAGIC(sv);
if (!SvTAINTED(sv) && SvPOKp(sv)) {
if (SvTHINKFIRST(sv)) sv_force_normal(sv);
- RETVAL = newSViv(SvUTF8(sv));
+ RETVAL = boolSV(SvUTF8(sv));
SvUTF8_on(sv);
SvSETMAGIC(sv);
} else {
RETVAL = &PL_sv_undef;
}
-}
OUTPUT:
RETVAL
@@ -1024,20 +1038,25 @@ SV *
_utf8_off(sv)
SV * sv
CODE:
-{
SvGETMAGIC(sv);
if (!SvTAINTED(sv) && SvPOKp(sv)) {
if (SvTHINKFIRST(sv)) sv_force_normal(sv);
- RETVAL = newSViv(SvUTF8(sv));
+ RETVAL = boolSV(SvUTF8(sv));
SvUTF8_off(sv);
SvSETMAGIC(sv);
} else {
RETVAL = &PL_sv_undef;
}
-}
OUTPUT:
RETVAL
+void
+onBOOT()
+CODE:
+{
+#include "def_t.exh"
+}
+
BOOT:
{
HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD);
@@ -1057,6 +1076,3 @@ BOOT:
newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF));
newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF));
}
-{
-#include "def_t.exh"
-}
diff --git a/cpan/Encode/Makefile.PL b/cpan/Encode/Makefile.PL
index 8203105247..8b801443d8 100644
--- a/cpan/Encode/Makefile.PL
+++ b/cpan/Encode/Makefile.PL
@@ -1,9 +1,10 @@
#
-# $Id: Makefile.PL,v 2.18 2016/11/29 23:29:23 dankogai Exp dankogai $
+# $Id: Makefile.PL,v 2.21 2017/07/18 07:15:29 dankogai Exp dankogai $
#
use 5.007003;
use strict;
use warnings;
+use utf8;
use ExtUtils::MakeMaker;
use File::Spec;
use Config;
@@ -15,9 +16,12 @@ $ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE};
# similar strictness as in core
my $ccflags = $Config{ccflags};
if (!$ENV{PERL_CORE}) {
- if ($Config{gccversion}) {
- $ccflags .= ' -Werror=declaration-after-statement';
- $ccflags .= ' -Wpointer-sign' unless $Config{d_cplusplus};
+ if (my $gccver = $Config{gccversion}) {
+ $gccver =~ s/\.//g; $gccver =~ s/ .*//;
+ $gccver .= "0" while length $gccver < 3;
+ $gccver = 0+$gccver;
+ $ccflags .= ' -Werror=declaration-after-statement' if $gccver > 400;
+ $ccflags .= ' -Wpointer-sign' if !$Config{d_cplusplus} and $gccver > 400;
$ccflags .= ' -fpermissive' if $Config{d_cplusplus};
}
}
@@ -49,6 +53,8 @@ WriteMakefile(
NAME => "Encode",
EXE_FILES => \@exe_files,
VERSION_FROM => 'Encode.pm',
+ ABSTRACT_FROM=> 'Encode.pm',
+ AUTHOR => 'Dan Kogai <dankogai@dan.co.jp>',
OBJECT => '$(O_FILES)',
'dist' => {
COMPRESS => 'gzip -9f',
@@ -61,6 +67,7 @@ WriteMakefile(
PREREQ_PM => {
Exporter => '5.57', # use Exporter 'import';
parent => '0.221', # version bundled with 5.10.1
+ Storable => '0', # bundled with Perl 5.7.3
},
TEST_REQUIRES => {
'Test::More' => '0.81_01',
@@ -71,6 +78,91 @@ WriteMakefile(
resources => {
repository => 'https://github.com/dankogai/p5-encode',
},
+ x_contributors => [
+ 'Alex Davies <alex.davies@talktalk.net>',
+ 'Alex Kapranoff <alex@kapranoff.ru>',
+ 'Alex Vandiver <alex@chmrr.net>',
+ 'Andreas J. Koenig <andreas.koenig@anima.de>',
+ 'Andrew Pennebaker <andrew.pennebaker@networkedinsights.com>',
+ 'Andy Grundman <andyg@activestate.com>',
+ 'Anton Tagunov <tagunov@motor.ru>',
+ 'Autrijus Tang <autrijus@autrijus.org>',
+ 'Benjamin Goldberg <goldbb2@earthlink.net>',
+ 'Bjoern Hoehrmann <derhoermi@gmx.net>',
+ 'Bjoern Jacke <debianbugs@j3e.de>',
+ 'bulk88 <bulk88@hotmail.com>',
+ 'Craig A. Berry <craigberry@mac.com>',
+ 'Curtis Jewell <csjewell@cpan.org>',
+ 'Dan Kogai <dankogai@dan.co.jp>',
+ 'Dave Evans <dave@rudolf.org.uk>',
+ 'David Golden <dagolden@cpan.org>',
+ 'David Steinbrunner <dsteinbrunner@pobox.com>',
+ 'Deng Liu <dengliu@ntu.edu.tw>',
+ 'Dominic Dunlop <domo@computer.org>',
+ 'drry',
+ 'Elizabeth Mattijsen <liz@dijkmat.nl>',
+ 'Flavio Poletti <flavio@polettix.it>',
+ 'Gerrit P. Haase <gp@familiehaase.de>',
+ 'Gisle Aas <gisle@ActiveState.com>',
+ 'Graham Barr <gbarr@pobox.com>',
+ 'Graham Knop <haarg@haarg.org>',
+ 'Graham Ollis <perl@wdlabs.com>',
+ 'Gurusamy Sarathy <gsar@activestate.com>',
+ 'H.Merijn Brand <h.m.brand@xs4all.nl>',
+ 'Hugo van der Sanden <hv@crypt.org>',
+ 'chansen <chansen@cpan.org>',
+ 'Chris Nandor <pudge@pobox.com>',
+ 'Inaba Hiroto <inaba@st.rim.or.jp>',
+ 'Jarkko Hietaniemi <jhi@iki.fi>',
+ 'Jesse Vincent <jesse@fsck.com>',
+ 'Jungshik Shin <jshin@mailaps.org>',
+ 'Karen Etheridge <ether@cpan.org>',
+ 'Karl Williamson <khw@cpan.org>',
+ 'Kenichi Ishigaki <ishigaki@cpan.org>',
+ 'KONNO Hiroharu <hiroharu.konno@bowneglobal.co.jp>',
+ 'Laszlo Molnar <ml1050@freemail.hu>',
+ 'Makamaka <makamaka@donzoko.net>',
+ 'Mark-Jason Dominus <mjd@plover.com>',
+ 'Masahiro Iuchi <masahiro.iuchi@gmail.com>',
+ 'MATSUNO Tokuhiro <tokuhirom+cpan@gmail.com>',
+ 'Mattia Barbon <mbarbon@dsi.unive.it>',
+ 'Michael G Schwern <schwern@pobox.com>',
+ 'Michael LaGrasta <michael@lagrasta.com>',
+ 'Miron Cuperman <miron@hyper.to>',
+ 'Moritz Lenz <moritz@faui2k3.org>',
+ 'MORIYAMA Masayuki <msyk@mtg.biglobe.ne.jp>',
+ 'Nick Ing-Simmons <nick@ing-simmons.net>',
+ 'Nicholas Clark <nick@ccl4.org>',
+ 'Olivier Mengué <dolmen@cpan.org>',
+ 'otsune',
+ 'Pali <pali@cpan.org>',
+ 'Paul Marquess <paul_marquess@yahoo.co.uk>',
+ 'Peter Prymmer <pvhp@best.com>',
+ 'Peter Rabbitson <ribasushi@cpan.org>',
+ 'Philip Newton <pne@cpan.org>',
+ 'Piotr Fusik <pfusik@op.pl>',
+ 'Rafael Garcia-Suarez <rgarciasuarez@mandriva.com>',
+ 'Randy Stauner <randy@magnificent-tears.com>',
+ 'Reini Urban <rurban@cpan.org>',
+ 'Robin Barker <rmb1@cise.npl.co.uk>',
+ 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>',
+ 'Simon Cozens <simon@netthink.co.uk>',
+ 'Slaven Rezic <SREZIC@cpan.org>',
+ 'Spider Boardman <spider@web.zk3.dec.com>',
+ 'Steve Hay <steve.m.hay@googlemail.com>',
+ 'Steve Peters <steve@fisharerojo.org>',
+ 'SUGAWARA Hajime <sugawara@hdt.co.jp>',
+ 'SUZUKI Norio <ZAP00217@nifty.com>',
+ 'szr8 <blz.marcel@gmail.com>',
+ 'Tatsuhiko Miyagawa <miyagawa@bulknews.net>',
+ 'Tels <perl_dummy@bloodgate.com>',
+ 'Tony Cook <tony@develop-help.com>',
+ 'Vadim Konovalov <vkonovalov@peterstar.ru>',
+ 'Victor <victor@vsespb.ru>',
+ 'Ville Skyttä <ville.skytta@iki.fi>',
+ 'Vincent van Dam <vvandam@sandvine.com>',
+ 'Yitzchak Scott-Thoennes <sthoenna@efn.org>',
+ ],
},
);
diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm
index fc1d3d1382..c56745d7b1 100644
--- a/cpan/Encode/Unicode/Unicode.pm
+++ b/cpan/Encode/Unicode/Unicode.pm
@@ -2,9 +2,8 @@ package Encode::Unicode;
use strict;
use warnings;
-no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.15_01 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
@@ -13,7 +12,7 @@ XSLoader::load( __PACKAGE__, $VERSION );
# Object Generator 8 transcoders all at once!
#
-require Encode;
+use Encode ();
our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32);
@@ -34,12 +33,13 @@ for my $name (
$endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : '';
$size == 4 and $endian = uc($endian);
- $Encode::Encoding{$name} = bless {
+ my $obj = bless {
Name => $name,
size => $size,
endian => $endian,
ucs2 => $ucs2,
} => __PACKAGE__;
+ Encode::define_encoding($obj, $name);
}
use parent qw(Encode::Encoding);
@@ -52,12 +52,6 @@ sub renew {
return $clone;
}
-# There used to be a perl implementation of (en|de)code but with
-# XS version is ripe, perl version is zapped for optimal speed
-
-*decode = \&decode_xs;
-*encode = \&encode_xs;
-
1;
__END__
diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs
index 117e14d83f..b3b1d2fea8 100644
--- a/cpan/Encode/Unicode/Unicode.xs
+++ b/cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Unicode.xs,v 2.15 2016/11/29 23:29:23 dankogai Exp dankogai $
+ $Id: Unicode.xs,v 2.16 2017/06/10 17:23:50 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
@@ -127,7 +127,7 @@ PROTOTYPES: DISABLE
*hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef)
void
-decode_xs(obj, str, check = 0)
+decode(obj, str, check = 0)
SV * obj
SV * str
IV check
@@ -345,7 +345,7 @@ CODE:
}
void
-encode_xs(obj, utf8, check = 0)
+encode(obj, utf8, check = 0)
SV * obj
SV * utf8
IV check
diff --git a/cpan/Encode/bin/enc2xs b/cpan/Encode/bin/enc2xs
index bd39639ae8..619b64b757 100644
--- a/cpan/Encode/bin/enc2xs
+++ b/cpan/Encode/bin/enc2xs
@@ -11,7 +11,7 @@ use warnings;
use Getopt::Std;
use Config;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 2.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
@@ -1038,8 +1038,7 @@ sub find_e2x{
sub make_makefile_pl
{
- eval { require Encode; };
- $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n";
+ eval { require Encode } or die "You need to install Encode to use enc2xs -M\nerror: $@\n";
# our used for variable expansion
$_Enc2xs = $0;
$_Version = $VERSION;
@@ -1063,8 +1062,7 @@ use vars qw(
);
sub make_configlocal_pm {
- eval { require Encode; };
- $@ and die "Unable to require Encode: $@\n";
+ eval { require Encode } or die "Unable to require Encode: $@\n";
eval { require File::Spec; };
# our used for variable expantion
@@ -1084,8 +1082,7 @@ sub make_configlocal_pm {
$mod =~ s/.*\bEncode\b/Encode/o;
$mod =~ s/\.pm\z//o;
$mod =~ s,/,::,og;
- eval qq{ require $mod; };
- return if $@;
+ eval qq{ require $mod; } or return;
warn qq{ require $mod;\n};
for my $enc ( Encode->encodings() ) {
no warnings;
@@ -1119,8 +1116,7 @@ sub _mkversion{
}
sub _print_expand{
- eval { require File::Basename; };
- $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n";
+ eval { require File::Basename } or die "File::Basename needed. Are you on miniperl?;\nerror: $@\n";
File::Basename->import();
my ($src, $dst, $clobber) = @_;
if (!$clobber and -e $dst){
diff --git a/cpan/Encode/bin/ucmlint b/cpan/Encode/bin/ucmlint
index a240f2c75e..a31a7a28f6 100644
--- a/cpan/Encode/bin/ucmlint
+++ b/cpan/Encode/bin/ucmlint
@@ -1,19 +1,18 @@
#!/usr/local/bin/perl
#
-# $Id: ucmlint,v 2.3 2016/08/04 03:15:58 dankogai Exp $
+# $Id: ucmlint,v 2.4 2017/06/10 17:23:50 dankogai Exp $
#
BEGIN { pop @INC if $INC[-1] eq '.' }
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Getopt::Std;
our %Opt;
getopts("Dehfv", \%Opt);
if ($Opt{e}){
- eval{ require Encode; };
- $@ and die "can't load Encode : $@";
+ eval { require Encode } or die "can't load Encode : $@";
}
$Opt{h} and help();
diff --git a/cpan/Encode/encoding.pm b/cpan/Encode/encoding.pm
index dc342683ee..7cd9eb2949 100644
--- a/cpan/Encode/encoding.pm
+++ b/cpan/Encode/encoding.pm
@@ -1,15 +1,16 @@
-# $Id: encoding.pm,v 2.19 2016/11/01 13:30:38 dankogai Exp $
+# $Id: encoding.pm,v 2.20 2017/06/10 17:23:50 dankogai Exp $
package encoding;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.19 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g;
use Encode;
use strict;
use warnings;
+use Config;
use constant {
DEBUG => !!$ENV{PERL_ENCODE_DEBUG},
HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) },
- PERL_5_21_7 => $^V && $^V ge v5.21.7,
+ PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped
};
sub _exception {
@@ -115,7 +116,8 @@ sub import {
}
my $deprecate =
- $] >= 5.017 ? "Use of the encoding pragma is deprecated" : 0;
+ ($] >= 5.017 and !$Config{usecperl})
+ ? "Use of the encoding pragma is deprecated" : 0;
my $class = shift;
my $name = shift;
@@ -132,6 +134,7 @@ sub import {
return;
}
$name = _get_locale_encoding() if $name eq ':locale';
+ BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; }
my %arg = @_;
$name = $ENV{PERL_ENCODING} unless defined $name;
my $enc = find_encoding($name);
@@ -141,9 +144,9 @@ sub import {
}
$name = $enc->name; # canonize
unless ( $arg{Filter} ) {
- if ($] >= 5.025003) {
+ if ($] >= 5.025003 and !$Config{usecperl}) {
require Carp;
- Carp::croak("The encoding pragma is no longer supported");
+ Carp::croak("The encoding pragma is no longer supported. Check cperl");
}
warnings::warnif("deprecated",$deprecate) if $deprecate;
@@ -186,8 +189,8 @@ sub import {
$status;
}
);
- };
- $@ eq '' and DEBUG and warn "Filter installed";
+ 1;
+ } and DEBUG and warn "Filter installed";
}
defined ${^UNICODE} and ${^UNICODE} != 0 and return 1;
for my $h (qw(STDIN STDOUT)) {
@@ -368,7 +371,7 @@ Note that C<STDERR> WILL NOT be changed, regardless.
Also note that non-STD file handles remain unaffected. Use C<use
open> or C<binmode> to change the layers of those.
-=item C<use encoding I<ENCNAME> Filter=E<gt>1;>
+=item C<use encoding I<ENCNAME>, Filter=E<gt>1;>
This operates as above, but the C<Filter> argument with a non-zero
value causes the entire script, and not just literals, to be translated from
diff --git a/cpan/Encode/lib/Encode/Alias.pm b/cpan/Encode/lib/Encode/Alias.pm
index 0a252560f5..6dcd112a40 100644
--- a/cpan/Encode/lib/Encode/Alias.pm
+++ b/cpan/Encode/lib/Encode/Alias.pm
@@ -1,8 +1,7 @@
package Encode::Alias;
use strict;
use warnings;
-no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.21 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use Exporter 'import';
@@ -19,7 +18,6 @@ our @Alias; # ordered matching list
our %Alias; # cached known aliases
sub find_alias {
- require Encode;
my $class = shift;
my $find = shift;
unless ( exists $Alias{$find} ) {
@@ -109,6 +107,9 @@ sub define_alias {
}
}
+# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias
+use Encode ();
+
# Allow latin-1 style names as well
# 0 1 2 3 4 5 6 7 8 9 10
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 );
@@ -134,7 +135,6 @@ sub undef_aliases {
}
sub init_aliases {
- require Encode;
undef_aliases();
# Try all-lower-case version should all else fails
diff --git a/cpan/Encode/lib/Encode/CN/HZ.pm b/cpan/Encode/lib/Encode/CN/HZ.pm
index 4510b0b400..a0dc59d153 100644
--- a/cpan/Encode/lib/Encode/CN/HZ.pm
+++ b/cpan/Encode/lib/Encode/CN/HZ.pm
@@ -5,7 +5,7 @@ use warnings;
use utf8 ();
use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -21,6 +21,7 @@ sub needs_lines { 1 }
sub decode ($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = substr($str, 0, 0); # to propagate taintedness
@@ -135,6 +136,7 @@ sub cat_decode {
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $GB = Encode::find_encoding('gb2312-raw');
my $ret = substr($str, 0, 0); # to propagate taintedness;
diff --git a/cpan/Encode/lib/Encode/Encoding.pm b/cpan/Encode/lib/Encode/Encoding.pm
index 39d2e0ab64..815937f455 100644
--- a/cpan/Encode/lib/Encode/Encoding.pm
+++ b/cpan/Encode/lib/Encode/Encoding.pm
@@ -3,11 +3,15 @@ package Encode::Encoding;
# Base class for classes which implement encodings
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
-require Encode;
+our @CARP_NOT = qw(Encode Encode::Encoder);
-sub DEBUG { 0 }
+use Carp ();
+use Encode ();
+use Encode::MIME::Name;
+
+use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
sub Define {
my $obj = shift;
@@ -20,13 +24,10 @@ sub Define {
sub name { return shift->{'Name'} }
-sub mime_name{
- require Encode::MIME::Name;
+sub mime_name {
return Encode::MIME::Name::get_mime_name(shift->name);
}
-# sub renew { return $_[0] }
-
sub renew {
my $self = shift;
my $clone = bless {%$self} => ref($self);
@@ -42,8 +43,7 @@ sub renewed { return $_[0]->{renewed} || 0 }
sub needs_lines { 0 }
sub perlio_ok {
- eval { require PerlIO::encoding };
- return $@ ? 0 : 1;
+ return eval { require PerlIO::encoding } ? 1 : 0;
}
# (Temporary|legacy) methods
@@ -56,14 +56,12 @@ sub fromUnicode { shift->encode(@_) }
#
sub encode {
- require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
Carp::croak( $class . "->encode() not defined!" );
}
sub decode {
- require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
Carp::croak( $class . "->encode() not defined!" );
@@ -188,7 +186,6 @@ MUST return the string representing the canonical name of the encoding.
Predefined As:
sub mime_name{
- require Encode::MIME::Name;
return Encode::MIME::Name::get_mime_name(shift->name);
}
@@ -226,8 +223,7 @@ unless the value is numeric so return 0 for false.
Predefined As:
sub perlio_ok {
- eval{ require PerlIO::encoding };
- return $@ ? 0 : 1;
+ return eval { require PerlIO::encoding } ? 1 : 0;
}
If your encoding does not support PerlIO for some reasons, just;
diff --git a/cpan/Encode/lib/Encode/GSM0338.pm b/cpan/Encode/lib/Encode/GSM0338.pm
index 20257a1cbd..e87141ebc4 100644
--- a/cpan/Encode/lib/Encode/GSM0338.pm
+++ b/cpan/Encode/lib/Encode/GSM0338.pm
@@ -1,5 +1,5 @@
#
-# $Id: GSM0338.pm,v 2.5 2013/09/14 07:51:59 dankogai Exp $
+# $Id: GSM0338.pm,v 2.7 2017/06/10 17:23:50 dankogai Exp $
#
package Encode::GSM0338;
@@ -8,7 +8,7 @@ use warnings;
use Carp;
use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -171,6 +171,7 @@ our $NBSP = "\x{00A0}";
sub decode ($$;$) {
my ( $obj, $bytes, $chk ) = @_;
+ return undef unless defined $bytes;
my $str = substr($bytes, 0, 0); # to propagate taintedness;
while ( length $bytes ) {
my $c = substr( $bytes, 0, 1, '' );
@@ -216,6 +217,7 @@ sub decode ($$;$) {
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $bytes = substr($str, 0, 0); # to propagate taintedness
while ( length $str ) {
my $u = substr( $str, 0, 1, '' );
@@ -270,10 +272,9 @@ expression with C<eval {}> block as follows;
eval {
$utf8 = decode("gsm0338", $gsm0338, $chk);
- };
- if ($@){
+ } or do {
# handle exception here
- }
+ };
=head1 BUGS
diff --git a/cpan/Encode/lib/Encode/Guess.pm b/cpan/Encode/lib/Encode/Guess.pm
index b44daf59eb..41fc19b799 100644
--- a/cpan/Encode/lib/Encode/Guess.pm
+++ b/cpan/Encode/lib/Encode/Guess.pm
@@ -2,15 +2,16 @@ package Encode::Guess;
use strict;
use warnings;
use Encode qw(:fallbacks find_encoding);
-our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
my $Canon = 'Guess';
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8);
-$Encode::Encoding{$Canon} = bless {
+my $obj = bless {
Name => $Canon,
Suspects => {%DEF_SUSPECTS},
} => __PACKAGE__;
+Encode::define_encoding($obj, $Canon);
use parent qw(Encode::Encoding);
sub needs_lines { 1 }
diff --git a/cpan/Encode/lib/Encode/JP/JIS7.pm b/cpan/Encode/lib/Encode/JP/JIS7.pm
index 588389a034..a0629a3690 100644
--- a/cpan/Encode/lib/Encode/JP/JIS7.pm
+++ b/cpan/Encode/lib/Encode/JP/JIS7.pm
@@ -1,7 +1,7 @@
package Encode::JP::JIS7;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -9,11 +9,12 @@ for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) {
my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1;
my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1;
- $Encode::Encoding{$name} = bless {
+ my $obj = bless {
Name => $name,
h2z => $h2z,
jis0212 => $jis0212,
} => __PACKAGE__;
+ Encode::define_encoding($obj, $name);
}
use parent qw(Encode::Encoding);
@@ -29,6 +30,7 @@ use Encode::CJKConstants qw(:all);
sub decode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $residue = '';
if ($chk) {
$str =~ s/([^\x00-\x7f].*)$//so and $residue = $1;
@@ -45,6 +47,7 @@ sub decode($$;$) {
sub encode($$;$) {
require Encode::JP::H2Z;
my ( $obj, $utf8, $chk ) = @_;
+ return undef unless defined $utf8;
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
diff --git a/cpan/Encode/lib/Encode/KR/2022_KR.pm b/cpan/Encode/lib/Encode/KR/2022_KR.pm
index 44373e5d58..122326403b 100644
--- a/cpan/Encode/lib/Encode/KR/2022_KR.pm
+++ b/cpan/Encode/lib/Encode/KR/2022_KR.pm
@@ -1,7 +1,7 @@
package Encode::KR::2022_KR;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -16,6 +16,7 @@ sub perlio_ok {
sub decode {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $res = $str;
my $residue = iso_euc( \$res );
@@ -26,6 +27,7 @@ sub decode {
sub encode {
my ( $obj, $utf8, $chk ) = @_;
+ return undef unless defined $utf8;
# empty the input string in the stack so perlio is ok
$_[1] = '' if $chk;
diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm
index ad14dba374..e23abffe37 100644
--- a/cpan/Encode/lib/Encode/MIME/Header.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header.pm
@@ -2,7 +2,7 @@ package Encode::MIME::Header;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.27 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Carp ();
use Encode ();
@@ -16,24 +16,28 @@ my %seed = (
bpl => 75, # bytes per line
);
-$Encode::Encoding{'MIME-Header'} = bless {
+my @objs;
+
+push @objs, bless {
%seed,
Name => 'MIME-Header',
} => __PACKAGE__;
-$Encode::Encoding{'MIME-B'} = bless {
+push @objs, bless {
%seed,
decode_q => 0,
Name => 'MIME-B',
} => __PACKAGE__;
-$Encode::Encoding{'MIME-Q'} = bless {
+push @objs, bless {
%seed,
decode_b => 0,
encode => 'Q',
Name => 'MIME-Q',
} => __PACKAGE__;
+Encode::define_encoding($_, $_->{Name}) foreach @objs;
+
use parent qw(Encode::Encoding);
sub needs_lines { 1 }
@@ -52,7 +56,7 @@ my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($
my $re_encoding_strict_b = qr/[Bb]/;
my $re_encoding_strict_q = qr/[Qq]/;
my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
-my $re_encoded_text_strict_q = qr/(?:[^\?\s=]|=[0-9A-Fa-f]{2})*/;
+my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB
my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
@@ -74,6 +78,7 @@ our $STRICT_DECODE = 0;
sub decode($$;$) {
my ($obj, $str, $chk) = @_;
+ return undef unless defined $str;
my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
@@ -194,7 +199,6 @@ sub _decode_q {
sub _decode_octets {
my ($enc, $octets, $chk) = @_;
$chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
- local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller
my $output = $enc->decode($octets, $chk);
return undef if not ref $chk and $chk and $octets ne '';
return $output;
@@ -202,6 +206,7 @@ sub _decode_octets {
sub encode($$;$) {
my ($obj, $str, $chk) = @_;
+ return undef unless defined $str;
my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
$_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
return $output . substr($str, 0, 0); # to propagate taintedness
@@ -237,11 +242,7 @@ sub _encode_string {
my @result = ();
my $octets = '';
while ( length( my $chr = substr($str, 0, 1, '') ) ) {
- my $seq;
- {
- local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller
- $seq = $enc->encode($chr, $enc_chk);
- }
+ my $seq = $enc->encode($chr, $enc_chk);
if ( not length($seq) ) {
substr($str, 0, 0, $chr);
last;
diff --git a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
index 86e66c371c..dc1e4275f0 100644
--- a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm
@@ -5,16 +5,17 @@ use warnings;
use parent qw(Encode::MIME::Header);
-$Encode::Encoding{'MIME-Header-ISO_2022_JP'} =
+my $obj =
bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } =>
__PACKAGE__;
+Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP');
use constant HEAD => '=?ISO-2022-JP?B?';
use constant TAIL => '?=';
use Encode::CJKConstants qw(%RE);
-our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
# I owe the below codes totally to
# Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64
@@ -22,6 +23,7 @@ our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%0
sub encode {
my $self = shift;
my $str = shift;
+ return undef unless defined $str;
utf8::encode($str) if ( Encode::is_utf8($str) );
Encode::from_to( $str, 'utf8', 'euc-jp' );
diff --git a/cpan/Encode/lib/Encode/Unicode/UTF7.pm b/cpan/Encode/lib/Encode/Unicode/UTF7.pm
index d5d86e2f90..e68647755f 100644
--- a/cpan/Encode/lib/Encode/Unicode/UTF7.pm
+++ b/cpan/Encode/lib/Encode/Unicode/UTF7.pm
@@ -1,15 +1,14 @@
#
-# $Id: UTF7.pm,v 2.8 2013/09/14 07:51:59 dankogai Exp $
+# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $
#
package Encode::Unicode::UTF7;
use strict;
use warnings;
-no warnings 'redefine';
use parent qw(Encode::Encoding);
__PACKAGE__->Define('UTF-7');
-our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use MIME::Base64;
-use Encode;
+use Encode qw(find_encoding);
#
# Algorithms taken from Unicode::String by Gisle Aas
@@ -30,6 +29,7 @@ sub needs_lines { 1 }
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
+ return undef unless defined $str;
my $len = length($str);
pos($str) = 0;
my $bytes = substr($str, 0, 0); # to propagate taintedness
@@ -61,6 +61,7 @@ sub encode($$;$) {
sub decode($$;$) {
use re 'taint';
my ( $obj, $bytes, $chk ) = @_;
+ return undef unless defined $bytes;
my $len = length($bytes);
my $str = substr($bytes, 0, 0); # to propagate taintedness;
pos($bytes) = 0;
diff --git a/cpan/Encode/t/CJKT.t b/cpan/Encode/t/CJKT.t
index 1648b1e5fd..264daf072f 100644
--- a/cpan/Encode/t/CJKT.t
+++ b/cpan/Encode/t/CJKT.t
@@ -57,8 +57,7 @@ for my $charset (sort keys %Charset){
$txt = join('',<$src>);
close($src);
- eval{ $uni = $transcoder->decode($txt, 1) };
- $@ and print $@;
+ eval { $uni = $transcoder->decode($txt, 1) } or print $@;
ok(defined($uni), "decode $charset"); $seq++;
is(length($txt),0, "decode $charset completely"); $seq++;
@@ -89,8 +88,7 @@ for my $charset (sort keys %Charset){
close $src;
my $unisave = $uni;
- eval{ $txt = $transcoder->encode($uni,1) };
- $@ and print $@;
+ eval { $txt = $transcoder->encode($uni,1) } or print $@;
ok(defined($txt), "encode $charset"); $seq++;
is(length($uni), 0, "encode $charset completely"); $seq++;
$uni = $unisave;
diff --git a/cpan/Encode/t/enc_data.t b/cpan/Encode/t/enc_data.t
index 2ead16ea95..e610b0d10e 100644
--- a/cpan/Encode/t/enc_data.t
+++ b/cpan/Encode/t/enc_data.t
@@ -1,4 +1,4 @@
-# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $
+# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $
BEGIN {
require Config; import Config;
diff --git a/cpan/Encode/t/enc_eucjp.t b/cpan/Encode/t/enc_eucjp.t
index 9b32459792..fc0af3cf33 100644
--- a/cpan/Encode/t/enc_eucjp.t
+++ b/cpan/Encode/t/enc_eucjp.t
@@ -1,4 +1,4 @@
-# $Id: enc_eucjp.t,v 2.3 2016/08/10 18:08:45 dankogai Exp $
+# $Id: enc_eucjp.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $
# This is the twin of enc_utf8.t .
BEGIN {
@@ -19,8 +19,8 @@ BEGIN {
print "1..0 # Skip: Perl 5.8.1 or later required\n";
exit 0;
}
- if ($] >= 5.025003){
- print "1..0 # Skip: Perl 5.25.2 or lower required\n";
+ if ($] >= 5.025003 and !$Config{usecperl}){
+ print "1..0 # Skip: Perl <=5.25.2 or cperl required\n";
exit 0;
}
}
@@ -30,7 +30,7 @@ use encoding 'euc-jp';
my @c = (127, 128, 255, 256);
-print "1.." . (scalar @c + 1) . "\n";
+print "1.." . (scalar @c + 2) . "\n";
my @f;
@@ -65,7 +65,19 @@ binmode(F, ":raw"); # Output raw bytes.
print F chr(128); # Output illegal UTF-8.
close F;
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
-binmode(F, ":encoding(utf-8)");
+binmode(F, ":encoding(UTF-8)");
+{
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
+}
+close F;
+print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ?
+ "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
+
+open(F, $f) or die "$0: failed to open '$f' for reading: $!";
+binmode(F, ":encoding(utf8)");
{
local $^W = 1;
local $SIG{__WARN__} = sub { $a = shift };
@@ -74,6 +86,7 @@ binmode(F, ":encoding(utf-8)");
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
"ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
# On VMS temporary file names like "f0." may be more readable than "f0" since
# "f0" could be a logical name pointing elsewhere.
diff --git a/cpan/Encode/t/enc_module.t b/cpan/Encode/t/enc_module.t
index 7d7382b903..fd6e6dcde6 100644
--- a/cpan/Encode/t/enc_module.t
+++ b/cpan/Encode/t/enc_module.t
@@ -1,4 +1,4 @@
-# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $
+# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $
# This file is in euc-jp
BEGIN {
require Config; import Config;
diff --git a/cpan/Encode/t/enc_utf8.t b/cpan/Encode/t/enc_utf8.t
index b07c573960..be7d487804 100644
--- a/cpan/Encode/t/enc_utf8.t
+++ b/cpan/Encode/t/enc_utf8.t
@@ -1,4 +1,4 @@
-# $Id: enc_utf8.t,v 2.3 2016/08/10 18:08:45 dankogai Exp $
+# $Id: enc_utf8.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $
# This is the twin of enc_eucjp.t .
BEGIN {
@@ -15,8 +15,8 @@ BEGIN {
print "1..0 # encoding pragma does not support EBCDIC platforms\n";
exit(0);
}
- if ($] >= 5.025003){
- print "1..0 # Skip: Perl 5.25.2 or lower required\n";
+ if ($] >= 5.025003 and !$Config{usecperl}){
+ print "1..0 # Skip: Perl <=5.25.2 or cperl required\n";
exit 0;
}
}
@@ -26,7 +26,7 @@ use encoding 'utf8';
my @c = (127, 128, 255, 256);
-print "1.." . (scalar @c + 1) . "\n";
+print "1.." . (scalar @c + 2) . "\n";
my @f;
@@ -59,7 +59,19 @@ binmode(F, ":raw"); # Output raw bytes.
print F chr(128); # Output illegal UTF-8.
close F;
open(F, $f) or die "$0: failed to open '$f' for reading: $!";
-binmode(F, ":encoding(utf-8)");
+binmode(F, ":encoding(UTF-8)");
+{
+ local $^W = 1;
+ local $SIG{__WARN__} = sub { $a = shift };
+ eval { <F> }; # This should get caught.
+}
+close F;
+print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ?
+ "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
+
+open(F, $f) or die "$0: failed to open '$f' for reading: $!";
+binmode(F, ":encoding(utf8)");
{
local $^W = 1;
local $SIG{__WARN__} = sub { $a = shift };
@@ -68,6 +80,7 @@ binmode(F, ":encoding(utf-8)");
close F;
print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ?
"ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n";
+$t++;
# On VMS temporary file names like "f0." may be more readable than "f0" since
# "f0" could be a logical name pointing elsewhere.
diff --git a/cpan/Encode/t/fallback.t b/cpan/Encode/t/fallback.t
index 86605ef3b8..011c86dbfc 100644
--- a/cpan/Encode/t/fallback.t
+++ b/cpan/Encode/t/fallback.t
@@ -17,7 +17,7 @@ BEGIN {
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 50;
+use Test::More tests => 58;
use Encode q(:all);
my $uo = '';
@@ -35,7 +35,7 @@ for my $i (0x80..0xff){
$uo .= chr($i);
$residue .= chr($i);
$af .= '?';
- $uf .= "\x{FFFD}" if $i < 0xfd;
+ $uf .= "\x{FFFD}";
$ap .= sprintf("\\x{%04x}", $i);
$up .= sprintf("\\x%02X", $i);
$ah .= sprintf("&#%d;", $i);
@@ -50,6 +50,7 @@ my $ao = $uo;
utf8::upgrade($uo);
my $ascii = find_encoding('ascii');
+my $latin1 = find_encoding('latin1');
my $utf8 = find_encoding('utf8');
my $src = $uo;
@@ -166,19 +167,46 @@ is($src, $ao, "coderef residue decode");
$src = "\x{3000}";
$dst = $ascii->encode($src, sub{ $_[0] });
-is $dst, 0x3000."", qq{$ascii->encode(\$src, sub{ \$_[0] } )};
+is $dst, 0x3000."", q{$ascii->encode($src, sub{ $_[0] } )};
$dst = encode("ascii", "\x{3000}", sub{ $_[0] });
-is $dst, 0x3000."", qq{encode("ascii", "\\x{3000}", sub{ \$_[0] })};
+is $dst, 0x3000."", q{encode("ascii", "\x{3000}", sub{ $_[0] })};
$src = pack "C*", 0xFF;
$dst = $ascii->decode($src, sub{ $_[0] });
-is $dst, 0xFF."", qq{$ascii->encode(\$src, sub{ \$_[0] } )};
+is $dst, 0xFF."", q{$ascii->encode($src, sub{ $_[0] } )};
$dst = decode("ascii", (pack "C*", 0xFF), sub{ $_[0] });
-is $dst, 0xFF."", qq{decode("ascii", (pack "C*", 0xFF), sub{ \$_[0] })};
+is $dst, 0xFF."", q{decode("ascii", (pack "C*", 0xFF), sub{ $_[0] })};
$src = pack "C*", 0x80;
$dst = $utf8->decode($src, sub{ $_[0] });
-is $dst, 0x80."", qq{$utf8->encode(\$src, sub{ \$_[0] } )};
+is $dst, 0x80."", q{$utf8->encode($src, sub{ $_[0] } )};
$dst = decode("utf8", $src, sub{ $_[0] });
-is $dst, 0x80."", qq{decode("utf8", (pack "C*", 0x80), sub{ \$_[0] })};
+is $dst, 0x80."", q{decode("utf8", (pack "C*", 0x80), sub{ $_[0] })};
+
+$src = "\x{3000}";
+$dst = $latin1->encode($src, sub { "\N{U+FF}" });
+is $dst, "\x{ff}", q{$latin1->encode($src, sub { "\N{U+FF}" })};
+$dst = encode("latin1", $src, sub { "\N{U+FF}" });
+is $dst, "\x{ff}", q{encode("latin1", $src, sub { "\N{U+FF}" })};
+
+$src = "\x{3000}";
+$dst = $latin1->encode($src, sub { utf8::upgrade(my $r = "\x{ff}"); $r });
+is $dst, "\x{ff}", q{$latin1->encode($src, sub { utf8::upgrade(my $r = "\x{ff}"); $r })};
+$dst = encode("latin1", $src, sub { utf8::upgrade(my $r = "\x{ff}"); $r });
+is $dst, "\x{ff}", q{encode("latin1", $src, sub { utf8::upgrade(my $r = "\x{ff}"); $r })};
+
+$src = "\x{ff}";
+$dst = $utf8->decode($src, sub { chr($_[0]) });
+is $dst, "\x{ff}", q{$utf8->decode($src, sub { chr($_[0]) })};
+$dst = decode("utf8", $src, sub { chr($_[0]) });
+is $dst, "\x{ff}", q{decode("utf8", $src, sub { chr($_[0]) })};
+
+{
+ use charnames ':full';
+ $src = "\x{ff}";
+ $dst = $utf8->decode($src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r });
+ is $dst, "\N{LATIN SMALL LETTER Y WITH DIAERESIS}", q{$utf8->decode($src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r })};
+ $dst = decode("utf8", $src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r });
+ is $dst, "\N{LATIN SMALL LETTER Y WITH DIAERESIS}", q{decode("utf8", $src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r })};
+}
diff --git a/cpan/Encode/t/guess.t b/cpan/Encode/t/guess.t
index 81ab91b562..896028ba8d 100644
--- a/cpan/Encode/t/guess.t
+++ b/cpan/Encode/t/guess.t
@@ -18,11 +18,7 @@ use Encode qw(decode encode find_encoding _utf8_off);
#use Test::More qw(no_plan);
use Test::More tests => 32;
-use_ok("Encode::Guess");
-{
- no warnings;
- $Encode::Guess::DEBUG = shift || 0;
-}
+BEGIN { use_ok("Encode::Guess") }
my $ascii = join('' => map {chr($_)}(0x21..0x7e));
my $latin1 = join('' => map {chr($_)}(0xa1..0xfe));
diff --git a/cpan/Encode/t/jperl.t b/cpan/Encode/t/jperl.t
index a0e7a379f6..5995a592ba 100644
--- a/cpan/Encode/t/jperl.t
+++ b/cpan/Encode/t/jperl.t
@@ -1,5 +1,5 @@
#
-# $Id: jperl.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $
+# $Id: jperl.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $
#
# This script is written in euc-jp
diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t
index a997dffb41..7abb0206cb 100644
--- a/cpan/Encode/t/mime-header.t
+++ b/cpan/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-header.t,v 2.14 2016/11/29 23:29:23 dankogai Exp dankogai $
+# $Id: mime-header.t,v 2.15 2017/07/18 07:15:29 dankogai Exp dankogai $
# This script is written in utf8
#
BEGIN {
@@ -24,7 +24,7 @@ use strict;
use utf8;
use charnames ":full";
-use Test::More tests => 264;
+use Test::More tests => 266;
BEGIN {
use_ok("Encode::MIME::Header");
@@ -136,6 +136,8 @@ my @decode_default_tests = (
"=?utf8?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
"=?UTF8?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
"=?utf-8-strict?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
+ # allow non-ASCII characters in q word
+ "=?UTF-8?Q?\x{C3}\x{A1}?=" => "á",
);
my @decode_strict_tests = (
@@ -155,6 +157,8 @@ my @decode_strict_tests = (
"=?utf8?Q?=C3=A1?=" => "=?utf8?Q?=C3=A1?=",
"=?UTF8?Q?=C3=A1?=" => "=?UTF8?Q?=C3=A1?=",
"=?utf-8-strict?Q?=C3=A1?=" => "=?utf-8-strict?Q?=C3=A1?=",
+ # do not allow non-ASCII characters in q word
+ "=?UTF-8?Q?\x{C3}\x{A1}?=" => "=?UTF-8?Q?\x{C3}\x{A1}?=",
);
my @encode_tests = (
diff --git a/cpan/Encode/t/truncated_utf8.t b/cpan/Encode/t/truncated_utf8.t
new file mode 100644
index 0000000000..7de8bb9ac1
--- /dev/null
+++ b/cpan/Encode/t/truncated_utf8.t
@@ -0,0 +1,55 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Encode;
+use PerlIO::encoding;
+$PerlIO::encoding::fallback &= ~(Encode::WARN_ON_ERR|Encode::PERLQQ);
+
+use Test::More tests => 9;
+
+binmode Test::More->builder->failure_output, ":utf8";
+binmode Test::More->builder->todo_output, ":utf8";
+
+is(decode("UTF-8", "\xfd\xfe"), "\x{fffd}" x 2);
+is(decode("UTF-8", "\xfd\xfe\xff"), "\x{fffd}" x 3);
+is(decode("UTF-8", "\xfd\xfe\xff\xe0"), "\x{fffd}" x 4);
+is(decode("UTF-8", "\xfd\xfe\xff\xe0\xe1"), "\x{fffd}" x 5);
+is(decode("UTF-8", "\xc1\x9f"), "\x{fffd}");
+is(decode("UTF-8", "\xFF\x80\x90\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"), "\x{fffd}");
+is(decode("UTF-8", "\xF0\x80\x80\x80"), "\x{fffd}");
+
+SKIP: {
+ # infinite loop due to bug: https://rt.perl.org/Public/Bug/Display.html?id=41442
+ skip "Perl Version ($]) is older than v5.8.9", 2 if $] < 5.008009;
+ my $str = ("x" x 1023) . "\xfd\xfe\xffx";
+ open my $fh, '<:encoding(UTF-8)', \$str;
+ my $str2 = <$fh>;
+ close $fh;
+ is($str2, ("x" x 1023) . ("\x{fffd}" x 3) . "x");
+
+ TODO: {
+ local $TODO = "bug in perlio";
+ my $str = ("x" x 1023) . "\xfd\xfe\xff";
+ open my $fh, '<:encoding(UTF-8)', \$str;
+ my $str2 = <$fh>;
+ close $fh;
+ is($str2, ("x" x 1023) . ("\x{fffd}" x 3));
+ }
+}
diff --git a/cpan/Encode/t/undef.t b/cpan/Encode/t/undef.t
new file mode 100644
index 0000000000..de52019b18
--- /dev/null
+++ b/cpan/Encode/t/undef.t
@@ -0,0 +1,25 @@
+use strict;
+use warnings FATAL => 'all';
+
+use Test::More;
+
+use Encode qw(encode decode find_encoding);
+use Encode::Encoder qw(encoder);
+
+local %Encode::ExtModule = %Encode::Config::ExtModule;
+
+my @names = Encode->encodings(':all');
+
+plan tests => 1 + 4 * @names;
+
+my $emptyutf8;
+eval { my $c = encoder($emptyutf8)->utf8; };
+ok(!$@,"crashed encoding undef variable ($@)");
+
+for my $name (@names) {
+ my $enc = find_encoding($name);
+ is($enc->encode(undef), undef, "find_encoding('$name')->encode(undef) returns undef");
+ is($enc->decode(undef), undef, "find_encoding('$name')->decode(undef) returns undef");
+ is(encode($name, undef), undef, "encode('$name', undef) returns undef");
+ is(decode($name, undef), undef, "decode('$name', undef) returns undef");
+}
diff --git a/cpan/Encode/t/use-Encode-Alias.t b/cpan/Encode/t/use-Encode-Alias.t
new file mode 100644
index 0000000000..dab8142cfa
--- /dev/null
+++ b/cpan/Encode/t/use-Encode-Alias.t
@@ -0,0 +1,8 @@
+use strict;
+use warnings;
+
+use Encode::Alias;
+use open ":std", ":locale";
+
+print "1..1\n";
+print "ok 1 - use Encode::Alias works\n";
diff --git a/cpan/Encode/t/utf8messages.t b/cpan/Encode/t/utf8messages.t
new file mode 100644
index 0000000000..8b6b379acb
--- /dev/null
+++ b/cpan/Encode/t/utf8messages.t
@@ -0,0 +1,33 @@
+use strict;
+use warnings;
+BEGIN { 'warnings'->unimport('utf8') if $] < 5.014 }; # turn off 'UTF-16 surrogate 0xd800' warnings
+
+use Test::More;
+use Encode qw(encode decode FB_CROAK LEAVE_SRC);
+
+plan tests => 12;
+
+my @invalid;
+
+ok ! defined eval { encode('UTF-8', "\x{D800}", FB_CROAK | LEAVE_SRC) }, 'Surrogate codepoint \x{D800} is not encoded to strict UTF-8';
+like $@, qr/^"\\x\{d800\}" does not map to UTF-8 /, 'Error message contains strict UTF-8 name';
+@invalid = ();
+encode('UTF-8', "\x{D800}", sub { @invalid = @_; return ""; });
+is_deeply \@invalid, [ 0xD800 ], 'Fallback coderef contains invalid codepoint 0xD800';
+
+ok ! defined eval { decode('UTF-8', "\xed\xa0\x80", FB_CROAK | LEAVE_SRC) }, 'Surrogate UTF-8 byte sequence \xED\xA0\x80 is decoded with strict UTF-8 decoder';
+like $@, qr/^UTF-8 "\\xED\\xA0\\x80" does not map to Unicode /, 'Error message contains strict UTF-8 name and original (not decoded) invalid sequence';
+@invalid = ();
+decode('UTF-8', "\xed\xa0\x80", sub { @invalid = @_; return ""; });
+is_deeply \@invalid, [ 0xED, 0xA0, 0x80 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0, 0x80';
+
+ok ! defined eval { decode('UTF-8', "\xed\xa0", FB_CROAK | LEAVE_SRC) }, 'Invalid byte sequence \xED\xA0 is not decoded with strict UTF-8 decoder';
+like $@, qr/^UTF-8 "\\xED\\xA0" does not map to Unicode /, 'Error message contains strict UTF-8 name and original (not decoded) invalid sequence';
+@invalid = ();
+decode('UTF-8', "\xed\xa0", sub { @invalid = @_; return ""; });
+is_deeply \@invalid, [ 0xED, 0xA0 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0';
+
+ok ! defined eval { decode('utf8', "\xed\xa0", FB_CROAK | LEAVE_SRC) }, 'Invalid byte sequence \xED\xA0 is not decoded with non-strict utf8 decoder';
+like $@, qr/^utf8 "\\xED\\xA0" does not map to Unicode /, 'Error message contains non-strict utf8 name and original (not decoded) invalid sequence';
+decode('utf8', "\xed\xa0", sub { @invalid = @_; return ""; });
+is_deeply \@invalid, [ 0xED, 0xA0 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0';
diff --git a/cpan/Encode/t/whatwg-aliases.json b/cpan/Encode/t/whatwg-aliases.json
new file mode 100644
index 0000000000..4307b0cc48
--- /dev/null
+++ b/cpan/Encode/t/whatwg-aliases.json
@@ -0,0 +1,455 @@
+[
+ {
+ "encodings": [
+ {
+ "labels": [
+ "unicode-1-1-utf-8",
+ "utf-8",
+ "utf8"
+ ],
+ "name": "UTF-8"
+ }
+ ],
+ "heading": "The Encoding"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "866",
+ "cp866",
+ "csibm866",
+ "ibm866"
+ ],
+ "name": "IBM866"
+ },
+ {
+ "labels": [
+ "csisolatin2",
+ "iso-8859-2",
+ "iso-ir-101",
+ "iso8859-2",
+ "iso88592",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "l2",
+ "latin2"
+ ],
+ "name": "ISO-8859-2"
+ },
+ {
+ "labels": [
+ "csisolatin3",
+ "iso-8859-3",
+ "iso-ir-109",
+ "iso8859-3",
+ "iso88593",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "l3",
+ "latin3"
+ ],
+ "name": "ISO-8859-3"
+ },
+ {
+ "labels": [
+ "csisolatin4",
+ "iso-8859-4",
+ "iso-ir-110",
+ "iso8859-4",
+ "iso88594",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "l4",
+ "latin4"
+ ],
+ "name": "ISO-8859-4"
+ },
+ {
+ "labels": [
+ "csisolatincyrillic",
+ "cyrillic",
+ "iso-8859-5",
+ "iso-ir-144",
+ "iso8859-5",
+ "iso88595",
+ "iso_8859-5",
+ "iso_8859-5:1988"
+ ],
+ "name": "ISO-8859-5"
+ },
+ {
+ "labels": [
+ "arabic",
+ "asmo-708",
+ "csiso88596e",
+ "csiso88596i",
+ "csisolatinarabic",
+ "ecma-114",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-ir-127",
+ "iso8859-6",
+ "iso88596",
+ "iso_8859-6",
+ "iso_8859-6:1987"
+ ],
+ "name": "ISO-8859-6"
+ },
+ {
+ "labels": [
+ "csisolatingreek",
+ "ecma-118",
+ "elot_928",
+ "greek",
+ "greek8",
+ "iso-8859-7",
+ "iso-ir-126",
+ "iso8859-7",
+ "iso88597",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "sun_eu_greek"
+ ],
+ "name": "ISO-8859-7"
+ },
+ {
+ "labels": [
+ "csiso88598e",
+ "csisolatinhebrew",
+ "hebrew",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-ir-138",
+ "iso8859-8",
+ "iso88598",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "visual"
+ ],
+ "name": "ISO-8859-8"
+ },
+ {
+ "labels": [
+ "csiso88598i",
+ "iso-8859-8-i",
+ "logical"
+ ],
+ "name": "ISO-8859-8-I"
+ },
+ {
+ "labels": [
+ "csisolatin6",
+ "iso-8859-10",
+ "iso-ir-157",
+ "iso8859-10",
+ "iso885910",
+ "l6",
+ "latin6"
+ ],
+ "name": "ISO-8859-10"
+ },
+ {
+ "labels": [
+ "iso-8859-13",
+ "iso8859-13",
+ "iso885913"
+ ],
+ "name": "ISO-8859-13"
+ },
+ {
+ "labels": [
+ "iso-8859-14",
+ "iso8859-14",
+ "iso885914"
+ ],
+ "name": "ISO-8859-14"
+ },
+ {
+ "labels": [
+ "csisolatin9",
+ "iso-8859-15",
+ "iso8859-15",
+ "iso885915",
+ "iso_8859-15",
+ "l9"
+ ],
+ "name": "ISO-8859-15"
+ },
+ {
+ "labels": [
+ "iso-8859-16"
+ ],
+ "name": "ISO-8859-16"
+ },
+ {
+ "labels": [
+ "cskoi8r",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8_r"
+ ],
+ "name": "KOI8-R"
+ },
+ {
+ "labels": [
+ "koi8-ru",
+ "koi8-u"
+ ],
+ "name": "KOI8-U"
+ },
+ {
+ "labels": [
+ "csmacintosh",
+ "mac",
+ "macintosh",
+ "x-mac-roman"
+ ],
+ "name": "macintosh"
+ },
+ {
+ "labels": [
+ "dos-874",
+ "iso-8859-11",
+ "iso8859-11",
+ "iso885911",
+ "tis-620",
+ "windows-874"
+ ],
+ "name": "windows-874"
+ },
+ {
+ "labels": [
+ "cp1250",
+ "windows-1250",
+ "x-cp1250"
+ ],
+ "name": "windows-1250"
+ },
+ {
+ "labels": [
+ "cp1251",
+ "windows-1251",
+ "x-cp1251"
+ ],
+ "name": "windows-1251"
+ },
+ {
+ "labels": [
+ "ansi_x3.4-1968",
+ "ascii",
+ "cp1252",
+ "cp819",
+ "csisolatin1",
+ "ibm819",
+ "iso-8859-1",
+ "iso-ir-100",
+ "iso8859-1",
+ "iso88591",
+ "iso_8859-1",
+ "iso_8859-1:1987",
+ "l1",
+ "latin1",
+ "us-ascii",
+ "windows-1252",
+ "x-cp1252"
+ ],
+ "name": "windows-1252"
+ },
+ {
+ "labels": [
+ "cp1253",
+ "windows-1253",
+ "x-cp1253"
+ ],
+ "name": "windows-1253"
+ },
+ {
+ "labels": [
+ "cp1254",
+ "csisolatin5",
+ "iso-8859-9",
+ "iso-ir-148",
+ "iso8859-9",
+ "iso88599",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "l5",
+ "latin5",
+ "windows-1254",
+ "x-cp1254"
+ ],
+ "name": "windows-1254"
+ },
+ {
+ "labels": [
+ "cp1255",
+ "windows-1255",
+ "x-cp1255"
+ ],
+ "name": "windows-1255"
+ },
+ {
+ "labels": [
+ "cp1256",
+ "windows-1256",
+ "x-cp1256"
+ ],
+ "name": "windows-1256"
+ },
+ {
+ "labels": [
+ "cp1257",
+ "windows-1257",
+ "x-cp1257"
+ ],
+ "name": "windows-1257"
+ },
+ {
+ "labels": [
+ "cp1258",
+ "windows-1258",
+ "x-cp1258"
+ ],
+ "name": "windows-1258"
+ },
+ {
+ "labels": [
+ "x-mac-cyrillic",
+ "x-mac-ukrainian"
+ ],
+ "name": "x-mac-cyrillic"
+ }
+ ],
+ "heading": "Legacy single-byte encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "chinese",
+ "csgb2312",
+ "csiso58gb231280",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "iso-ir-58",
+ "x-gbk"
+ ],
+ "name": "GBK"
+ },
+ {
+ "labels": [
+ "gb18030"
+ ],
+ "name": "gb18030"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (simplified) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "big5",
+ "big5-hkscs",
+ "cn-big5",
+ "csbig5",
+ "x-x-big5"
+ ],
+ "name": "Big5"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (traditional) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseucpkdfmtjapanese",
+ "euc-jp",
+ "x-euc-jp"
+ ],
+ "name": "EUC-JP"
+ },
+ {
+ "labels": [
+ "csiso2022jp",
+ "iso-2022-jp"
+ ],
+ "name": "ISO-2022-JP"
+ },
+ {
+ "labels": [
+ "csshiftjis",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "windows-31j",
+ "x-sjis"
+ ],
+ "name": "Shift_JIS"
+ }
+ ],
+ "heading": "Legacy multi-byte Japanese encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseuckr",
+ "csksc56011987",
+ "euc-kr",
+ "iso-ir-149",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "windows-949"
+ ],
+ "name": "EUC-KR"
+ }
+ ],
+ "heading": "Legacy multi-byte Korean encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "csiso2022kr",
+ "hz-gb-2312",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-kr"
+ ],
+ "name": "replacement"
+ },
+ {
+ "labels": [
+ "utf-16be"
+ ],
+ "name": "UTF-16BE"
+ },
+ {
+ "labels": [
+ "utf-16",
+ "utf-16le"
+ ],
+ "name": "UTF-16LE"
+ },
+ {
+ "labels": [
+ "x-user-defined"
+ ],
+ "name": "x-user-defined"
+ }
+ ],
+ "heading": "Legacy miscellaneous encodings"
+ }
+]
diff --git a/cpan/Encode/t/whatwg-aliases.t b/cpan/Encode/t/whatwg-aliases.t
new file mode 100644
index 0000000000..ffc030bb75
--- /dev/null
+++ b/cpan/Encode/t/whatwg-aliases.t
@@ -0,0 +1,66 @@
+# This test checks aliases support based on the list in the
+# WHATWG Encoding Living Standard
+#
+# https://encoding.spec.whatwg.org/
+#
+# The input of this test is the file whatwg-aliases.json downloaded from
+# https://encoding.spec.whatwg.org/encodings.json
+#
+# To run:
+# AUTHOR_TESTING=1 prove -l t/whatwg-aliases.t
+
+
+use Test::More
+ ($ENV{AUTHOR_TESTING} || $ENV{RELEASE_TESTING})
+ ? 'no_plan'
+ : (skip_all => 'For maintainers only');
+use Encode 'find_encoding';
+use JSON::PP 'decode_json';
+use File::Spec;
+use FindBin;
+
+my $encodings = decode_json(do {
+ # https://encoding.spec.whatwg.org/encodings.json
+ open my $f, '<', File::Spec->catdir($FindBin::Bin, 'whatwg-aliases.json');
+ local $/;
+ <$f>
+});
+
+my %IGNORE = map { $_ => '' } qw(
+ replacement
+ utf8
+);
+
+my %TODO = (
+ 'ISO-8859-8-I' => 'Not supported',
+ 'gb18030' => 'Not supported',
+ '866' => 'Not supported',
+ 'x-user-defined' => 'Not supported',
+ # ...
+);
+
+for my $section (@$encodings) {
+ for my $enc (@{$section->{encodings}}) {
+
+ my $name = $enc->{name};
+
+ next if exists $IGNORE{$name};
+
+ local $TODO = $TODO{$name} if exists $TODO{$name};
+
+ my $encoding = find_encoding($name);
+ isa_ok($encoding, 'Encode::Encoding', $name);
+
+ for my $label (@{$enc->{labels}}) {
+ local $TODO = $TODO{$label} if exists $TODO{$label};
+
+ my $e = find_encoding($label);
+ if (isa_ok($e, 'Encode::Encoding', $label)) {
+ next if exists $IGNORE{$label};
+ is($e->name, $encoding->name, "$label ->name is $name")
+ }
+ }
+ }
+}
+
+done_testing;
diff --git a/t/porting/customized.dat b/t/porting/customized.dat
index 26b5673c65..1513c1822a 100644
--- a/t/porting/customized.dat
+++ b/t/porting/customized.dat
@@ -1,5 +1,4 @@
Digest cpan/Digest/Digest.pm 43f7f544cb11842b2f55c73e28930da50774e081
-Encode cpan/Encode/Unicode/Unicode.pm 9749692c67f7d69083034de9184a93f070ab4799
ExtUtils::Constant cpan/ExtUtils-Constant/t/Constant.t a0369c919e216fb02767a637666bb4577ad79b02
Locale::Maketext::Simple cpan/Locale-Maketext-Simple/lib/Locale/Maketext/Simple.pm 57ed38905791a17c150210cd6f42ead22a7707b6
Math::Complex cpan/Math-Complex/lib/Math/Complex.pm 198ea6c6c584f5ea79a0fd7e9d411d0878f3b2af