diff options
author | Aaron Crane <arc@cpan.org> | 2017-07-18 18:06:46 +0100 |
---|---|---|
committer | Aaron Crane <arc@cpan.org> | 2017-07-18 18:06:46 +0100 |
commit | 3f60a9307162888df8e8e13b2361a3b8380c8744 (patch) | |
tree | ed7e9ea407d62779e1a440ea7b905da5d32522fd /cpan | |
parent | 589c97f41d373f2e7205a4ffbcb7a639635b7bda (diff) | |
download | perl-3f60a9307162888df8e8e13b2361a3b8380c8744.tar.gz |
Import Encode-2.92 from CPAN
This also permits removing the local customisation for the previous version.
Diffstat (limited to 'cpan')
33 files changed, 1203 insertions, 419 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm index 57b4292279..5a27c5990c 100644 --- a/cpan/Encode/Encode.pm +++ b/cpan/Encode/Encode.pm @@ -1,16 +1,21 @@ # -# $Id: Encode.pm,v 2.88 2016/11/29 23:30:30 dankogai Exp dankogai $ +# $Id: Encode.pm,v 2.92 2017/07/18 07:15:29 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.88 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; -use XSLoader (); -XSLoader::load( __PACKAGE__, $VERSION ); +our $VERSION; +BEGIN { + $VERSION = sprintf "%d.%02d", q$Revision: 2.92 $ =~ /(\d+)/g; + require XSLoader; + XSLoader::load( __PACKAGE__, $VERSION ); +} use Exporter 5.57 'import'; +our @CARP_NOT = qw(Encode::Encoder); + # Public, encouraged API is exported by default our @EXPORT = qw( @@ -44,7 +49,10 @@ our %EXPORT_TAGS = ( our $ON_EBCDIC = ( ord("A") == 193 ); -use Encode::Alias; +use Encode::Alias (); +use Encode::MIME::Name; + +use Storable; # Make a %Encoding package variable to allow a certain amount of cheating our %Encoding; @@ -96,6 +104,9 @@ sub define_encoding { my $alias = shift; define_alias( $alias, $obj ); } + my $class = ref($obj); + push @Encode::CARP_NOT, $class unless grep { $_ eq $class } @Encode::CARP_NOT; + push @Encode::Encoding::CARP_NOT, $class unless grep { $_ eq $class } @Encode::Encoding::CARP_NOT; return $obj; } @@ -127,6 +138,15 @@ sub getEncoding { return; } +# HACK: These two functions must be defined in Encode and because of +# cyclic dependency between Encode and Encode::Alias, Exporter does not work +sub find_alias { + goto &Encode::Alias::find_alias; +} +sub define_alias { + goto &Encode::Alias::define_alias; +} + sub find_encoding($;$) { my ( $name, $skip_external ) = @_; return __PACKAGE__->getEncoding( $name, $skip_external ); @@ -134,8 +154,6 @@ sub find_encoding($;$) { sub find_mime_encoding($;$) { my ( $mime_name, $skip_external ) = @_; - eval { require Encode::MIME::Name; }; - $@ and return; my $name = Encode::MIME::Name::get_encode_name( $mime_name ); return find_encoding( $name, $skip_external ); } @@ -149,8 +167,6 @@ sub resolve_alias($) { sub clone_encoding($) { my $obj = find_encoding(shift); ref $obj or return; - eval { require Storable }; - $@ and return; return Storable::dclone($obj); } @@ -182,7 +198,7 @@ sub encode($$;$) { else { $octets = $enc->encode( $string, $check ); } - $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC() ); + $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC ); return $octets; } *str2bytes = \&encode; @@ -211,7 +227,7 @@ sub decode($$;$) { else { $string = $enc->decode( $octets, $check ); } - $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() ); + $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC ); return $string; } *bytes2str = \&decode; @@ -278,133 +294,87 @@ sub decode_utf8($;$) { $check ||= 0; $utf8enc ||= find_encoding('utf8'); my $string = $utf8enc->decode( $octets, $check ); - $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() ); + $_[0] = $octets if $check and !ref $check and !( $check & LEAVE_SRC ); return $string; } -# sub decode_utf8($;$) { -# my ( $str, $check ) = @_; -# return $str if is_utf8($str); -# if ($check) { -# return decode( "utf8", $str, $check ); -# } -# else { -# return decode( "utf8", $str ); -# return $str; -# } -# } - -predefine_encodings(1); - -# -# This is to restore %Encoding if really needed; -# - -sub predefine_encodings { - require Encode::Encoding; - no warnings 'redefine'; - my $use_xs = shift; - if ($ON_EBCDIC) { - - # was in Encode::UTF_EBCDIC - package Encode::UTF_EBCDIC; - push @Encode::UTF_EBCDIC::ISA, 'Encode::Encoding'; - *decode = sub { - my ( undef, $str, $chk ) = @_; - my $res = ''; - for ( my $i = 0 ; $i < length($str) ; $i++ ) { - $res .= - chr( - utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) ) - ); - } - $_[1] = '' if $chk; - return $res; - }; - *encode = sub { - my ( undef, $str, $chk ) = @_; - my $res = ''; - for ( my $i = 0 ; $i < length($str) ; $i++ ) { - $res .= - chr( - utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) ) - ); - } - $_[1] = '' if $chk; - return $res; - }; - $Encode::Encoding{Unicode} = - bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC"; +onBOOT; + +if ($ON_EBCDIC) { + package Encode::UTF_EBCDIC; + use parent 'Encode::Encoding'; + my $obj = bless { Name => "UTF_EBCDIC" } => "Encode::UTF_EBCDIC"; + Encode::define_encoding($obj, 'Unicode'); + sub decode { + my ( undef, $str, $chk ) = @_; + my $res = ''; + for ( my $i = 0 ; $i < length($str) ; $i++ ) { + $res .= + chr( + utf8::unicode_to_native( ord( substr( $str, $i, 1 ) ) ) + ); + } + $_[1] = '' if $chk; + return $res; } - else { - - package Encode::Internal; - push @Encode::Internal::ISA, 'Encode::Encoding'; - *decode = sub { - my ( undef, $str, $chk ) = @_; - utf8::upgrade($str); - $_[1] = '' if $chk; - return $str; - }; - *encode = \&decode; - $Encode::Encoding{Unicode} = - bless { Name => "Internal" } => "Encode::Internal"; + sub encode { + my ( undef, $str, $chk ) = @_; + my $res = ''; + for ( my $i = 0 ; $i < length($str) ; $i++ ) { + $res .= + chr( + utf8::native_to_unicode( ord( substr( $str, $i, 1 ) ) ) + ); + } + $_[1] = '' if $chk; + return $res; } - { - # https://rt.cpan.org/Public/Bug/Display.html?id=103253 - package Encode::XS; - push @Encode::XS::ISA, 'Encode::Encoding'; +} else { + package Encode::Internal; + use parent 'Encode::Encoding'; + my $obj = bless { Name => "Internal" } => "Encode::Internal"; + Encode::define_encoding($obj, 'Unicode'); + sub decode { + my ( undef, $str, $chk ) = @_; + utf8::upgrade($str); + $_[1] = '' if $chk; + return $str; } - { + *encode = \&decode; +} - # was in Encode::utf8 - package Encode::utf8; - push @Encode::utf8::ISA, 'Encode::Encoding'; +{ + # https://rt.cpan.org/Public/Bug/Display.html?id=103253 + package Encode::XS; + use parent 'Encode::Encoding'; +} - # - if ($use_xs) { - Encode::DEBUG and warn __PACKAGE__, " XS on"; - *decode = \&decode_xs; - *encode = \&encode_xs; - } - else { - Encode::DEBUG and warn __PACKAGE__, " XS off"; - *decode = sub { - my ( undef, $octets, $chk ) = @_; - my $str = Encode::decode_utf8($octets); - if ( defined $str ) { - $_[1] = '' if $chk; - return $str; - } - return undef; - }; - *encode = sub { - my ( undef, $string, $chk ) = @_; - my $octets = Encode::encode_utf8($string); - $_[1] = '' if $chk; - return $octets; - }; +{ + package Encode::utf8; + use parent 'Encode::Encoding'; + my %obj = ( + 'utf8' => { Name => 'utf8' }, + 'utf-8-strict' => { Name => 'utf-8-strict', strict_utf8 => 1 } + ); + for ( keys %obj ) { + bless $obj{$_} => __PACKAGE__; + Encode::define_encoding( $obj{$_} => $_ ); + } + sub cat_decode { + # ($obj, $dst, $src, $pos, $trm, $chk) + # currently ignores $chk + my ( undef, undef, undef, $pos, $trm ) = @_; + my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; + use bytes; + if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { + $$rdst .= + substr( $$rsrc, $pos, $npos - $pos + length($trm) ); + $$rpos = $npos + length($trm); + return 1; } - *cat_decode = sub { # ($obj, $dst, $src, $pos, $trm, $chk) - # currently ignores $chk - my ( undef, undef, undef, $pos, $trm ) = @_; - my ( $rdst, $rsrc, $rpos ) = \@_[ 1, 2, 3 ]; - use bytes; - if ( ( my $npos = index( $$rsrc, $trm, $pos ) ) >= 0 ) { - $$rdst .= - substr( $$rsrc, $pos, $npos - $pos + length($trm) ); - $$rpos = $npos + length($trm); - return 1; - } - $$rdst .= substr( $$rsrc, $pos ); - $$rpos = length($$rsrc); - return ''; - }; - $Encode::Encoding{utf8} = - bless { Name => "utf8" } => "Encode::utf8"; - $Encode::Encoding{"utf-8-strict"} = - bless { Name => "utf-8-strict", strict_utf8 => 1 } - => "Encode::utf8"; + $$rdst .= substr( $$rsrc, $pos ); + $$rpos = length($$rsrc); + return ''; } } @@ -516,14 +486,16 @@ ISO-8859-1, also known as Latin1: $octets = encode("iso-8859-1", $string); -B<CAVEAT>: When you run C<$octets = encode("utf8", $string)>, then +B<CAVEAT>: When you run C<$octets = encode("UTF-8", $string)>, then $octets I<might not be equal to> $string. Though both contain the same data, the UTF8 flag for $octets is I<always> off. When you encode anything, the UTF8 flag on the result is always off, even when it -contains a completely valid utf8 string. See L</"The UTF8 flag"> below. +contains a completely valid UTF-8 string. See L</"The UTF8 flag"> below. If the $string is C<undef>, then C<undef> is returned. +C<str2bytes> may be used as an alias for C<encode>. + =head3 decode $string = decode(ENCODING, OCTETS[, CHECK]) @@ -544,13 +516,15 @@ internal format: $string = decode("iso-8859-1", $octets); -B<CAVEAT>: When you run C<$string = decode("utf8", $octets)>, then $string +B<CAVEAT>: When you run C<$string = decode("UTF-8", $octets)>, then $string I<might not be equal to> $octets. Though both contain the same data, the UTF8 flag for $string is on. See L</"The UTF8 flag"> below. If the $string is C<undef>, then C<undef> is returned. +C<bytes2str> may be used as an alias for C<decode>. + =head3 find_encoding [$obj =] find_encoding(ENCODING) @@ -559,11 +533,11 @@ Returns the I<encoding object> corresponding to I<ENCODING>. Returns C<undef> if no matching I<ENCODING> is find. The returned object is what does the actual encoding or decoding. - $utf8 = decode($name, $bytes); + $string = decode($name, $bytes); is in fact - $utf8 = do { + $string = do { $obj = find_encoding($name); croak qq(encoding "$name" not found) unless ref $obj; $obj->decode($bytes); @@ -575,8 +549,8 @@ You can therefore save time by reusing this object as follows; my $enc = find_encoding("iso-8859-1"); while(<>) { - my $utf8 = $enc->decode($_); - ... # now do something with $utf8; + my $string = $enc->decode($_); + ... # now do something with $string; } Besides L</decode> and L</encode>, other methods are @@ -624,13 +598,13 @@ and C<undef> on error. B<CAVEAT>: The following operations may look the same, but are not: - from_to($data, "iso-8859-1", "utf8"); #1 + from_to($data, "iso-8859-1", "UTF-8"); #1 $data = decode("iso-8859-1", $data); #2 Both #1 and #2 make $data consist of a completely valid UTF-8 string, but only #2 turns the UTF8 flag on. #1 is equivalent to: - $data = encode("utf8", decode("iso-8859-1", $data)); + $data = encode("UTF-8", decode("iso-8859-1", $data)); See L</"The UTF8 flag"> below. @@ -655,7 +629,11 @@ followed by C<encode> as follows: Equivalent to C<$octets = encode("utf8", $string)>. The characters in $string are encoded in Perl's internal format, and the result is returned as a sequence of octets. Because all possible characters in Perl have a -(loose, not strict) UTF-8 representation, this function cannot fail. +(loose, not strict) utf8 representation, this function cannot fail. + +B<WARNING>: do not use this function for data exchange as it can produce +not strict utf8 $octets! For strictly valid UTF-8 output use +C<$octets = encode("UTF-8", $string)>. =head3 decode_utf8 @@ -663,11 +641,15 @@ as a sequence of octets. Because all possible characters in Perl have a Equivalent to C<$string = decode("utf8", $octets [, CHECK])>. The sequence of octets represented by $octets is decoded -from UTF-8 into a sequence of logical characters. -Because not all sequences of octets are valid UTF-8, +from (loose, not strict) utf8 into a sequence of logical characters. +Because not all sequences of octets are valid not strict utf8, it is quite possible for this function to fail. For CHECK, see L</"Handling Malformed Data">. +B<WARNING>: do not use this function for data exchange as it can produce +$string with not strict utf8 representation! For strictly valid UTF-8 +$string representation use C<$string = decode("UTF-8", $octets [, CHECK])>. + B<CAVEAT>: the input I<$octets> might be modified in-place depending on what is set in CHECK. See L</LEAVE_SRC> if you want your inputs to be left unchanged. @@ -903,15 +885,14 @@ octets that represent the fallback character. For instance: Acts like C<FB_PERLQQ> but U+I<XXXX> is used instead of C<\x{I<XXXX>}>. -Even the fallback for C<decode> must return octets, which are -then decoded with the character encoding that C<decode> accepts. So for +Fallback for C<decode> must return decoded string (sequence of characters) +and takes a list of ordinal values as its arguments. So for example if you wish to decode octets as UTF-8, and use ISO-8859-15 as a fallback for bytes that are not valid UTF-8, you could write $str = decode 'UTF-8', $octets, sub { - my $tmp = chr shift; - from_to $tmp, 'ISO-8859-15', 'UTF-8'; - return $tmp; + my $tmp = join '', map chr, @_; + return decode 'ISO-8859-15', $tmp; }; =head1 Defining Encodings @@ -980,9 +961,9 @@ When you I<encode>, the resulting UTF8 flag is always B<off>. When you I<decode>, the resulting UTF8 flag is B<on>--I<unless> you can unambiguously represent data. Here is what we mean by "unambiguously". -After C<$utf8 = decode("foo", $octet)>, +After C<$str = decode("foo", $octet)>, - When $octet is... The UTF8 flag in $utf8 is + When $octet is... The UTF8 flag in $str is --------------------------------------------- In ASCII only (or EBCDIC only) OFF In ISO-8859-1 ON diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs index b5160d2516..6c077bec3a 100644 --- a/cpan/Encode/Encode.xs +++ b/cpan/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.39 2016/11/29 23:29:23 dankogai Exp dankogai $ + $Id: Encode.xs,v 2.41 2017/06/10 17:23:50 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -35,17 +35,6 @@ UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) #define SvIV_nomg SvIV #endif -#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE -# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE -#else -# define UTF8_ALLOW_STRICT 0 -#endif - -#define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ - ~(UTF8_ALLOW_CONTINUATION | \ - UTF8_ALLOW_NON_CONTINUATION | \ - UTF8_ALLOW_LONG)) - static void Encode_XSEncoding(pTHX_ encode_t * enc) { @@ -114,24 +103,52 @@ utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" +#define ERR_DECODE_STR_NOMAP "%s \"%s\" does not map to Unicode" static SV * do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) { dSP; int argc; - SV *retval = newSVpv("",0); + SV *retval; ENTER; SAVETMPS; PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVnv((UV)ch))); + XPUSHs(sv_2mortal(newSVuv(ch))); PUTBACK; argc = call_sv(fallback_cb, G_SCALAR); SPAGAIN; if (argc != 1){ croak("fallback sub must return scalar!"); } - sv_catsv(retval, POPs); + retval = POPs; + SvREFCNT_inc(retval); + PUTBACK; + FREETMPS; + LEAVE; + return retval; +} + +static SV * +do_bytes_fallback_cb(pTHX_ U8 *s, STRLEN slen, SV *fallback_cb) +{ + dSP; + int argc; + STRLEN i; + SV *retval; + ENTER; + SAVETMPS; + PUSHMARK(sp); + for (i=0; i<slen; ++i) + XPUSHs(sv_2mortal(newSVuv(s[i]))); + PUTBACK; + argc = call_sv(fallback_cb, G_SCALAR); + SPAGAIN; + if (argc != 1){ + croak("fallback sub must return scalar!"); + } + retval = POPs; + SvREFCNT_inc(retval); PUTBACK; FREETMPS; LEAVE; @@ -241,16 +258,22 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * goto ENCODE_SET_SRC; } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + STRLEN sublen; + char *substr; SV* subchar = (fallback_cb != &PL_sv_undef) ? do_fallback_cb(aTHX_ ch, fallback_cb) : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" : "&#x%" UVxf ";", (UV)ch); - SvUTF8_off(subchar); /* make sure no decoded string gets in */ + substr = SvPV(subchar, sublen); + if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ + SvREFCNT_dec(subchar); + croak("Wide character"); + } sdone += slen + clen; - ddone += dlen + SvCUR(subchar); - sv_catsv(dst, subchar); + ddone += dlen + sublen; + sv_catpvn(dst, substr, sublen); SvREFCNT_dec(subchar); } else { /* fallback char */ @@ -277,18 +300,21 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ + STRLEN sublen; + char *substr; SV* subchar = (fallback_cb != &PL_sv_undef) ? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) : newSVpvf("\\x%02" UVXf, (UV)s[slen]); + substr = SvPVutf8(subchar, sublen); sdone += slen + 1; - ddone += dlen + SvCUR(subchar); - sv_catsv(dst, subchar); + ddone += dlen + sublen; + sv_catpvn(dst, substr, sublen); SvREFCNT_dec(subchar); } else { sdone += slen + 1; ddone += dlen + strlen(FBCHAR_UTF8); - sv_catpv(dst, FBCHAR_UTF8); + sv_catpvn(dst, FBCHAR_UTF8, strlen(FBCHAR_UTF8)); } } /* settle variables when fallback */ @@ -382,7 +408,7 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) U8 *ptr = s; bool overflowed = 0; - uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); + uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(UTF8SKIP(s)); len--; s++; @@ -401,7 +427,6 @@ convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen) *rlen = s-ptr; if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) { - *rlen = 1; return 0; } @@ -418,6 +443,8 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, int check; U8 *d; STRLEN dlen; + char esc[UTF8_MAXLEN * 6 + 1]; + STRLEN i; if (SvROK(check_sv)) { /* croak("UTF-8 decoder doesn't support callback CHECK"); */ @@ -441,22 +468,24 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, continue; } + uv = 0; ulen = 1; - if (UTF8_IS_START(*s)) { + if (! UTF8_IS_CONTINUATION(*s)) { + /* Not an invariant nor a continuation; must be a start byte. (We + * can't test for UTF8_IS_START as that excludes things like \xC0 + * which are start bytes, but always lead to overlongs */ + U8 skip = UTF8SKIP(s); if ((s + skip) > e) { - if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) { - const U8 *p = s + 1; - for (; p < e; p++) { - if (!UTF8_IS_CONTINUATION(*p)) { - ulen = p-s; - goto malformed_byte; - } - } + /* just calculate ulen, in pathological cases can be smaller then e-s */ + if (e-s >= 2) + convert_utf8_multi_seq(s, e-s, &ulen); + else + ulen = 1; + + if ((stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) && ulen == (STRLEN)(e-s)) break; - } - ulen = e-s; goto malformed_byte; } @@ -475,44 +504,67 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, } /* If we get here there is something wrong with alleged UTF-8 */ + /* uv is used only when encoding */ malformed_byte: - uv = (UV)*s; - if (ulen == 0) + if (uv == 0) + uv = (UV)*s; + if (encode || ulen == 0) ulen = 1; malformed: + if (!encode && (check & (ENCODE_DIE_ON_ERR|ENCODE_WARN_ON_ERR|ENCODE_PERLQQ))) + for (i=0; i<ulen; ++i) sprintf(esc+4*i, "\\x%02X", s[i]); if (check & ENCODE_DIE_ON_ERR){ if (encode) - Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); + Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8")); else - Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); + Perl_croak(aTHX_ ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc); } if (check & ENCODE_WARN_ON_ERR){ if (encode) Perl_warner(aTHX_ packWARN(WARN_UTF8), - ERR_ENCODE_NOMAP, uv, "utf8"); + ERR_ENCODE_NOMAP, uv, (strict ? "UTF-8" : "utf8")); else Perl_warner(aTHX_ packWARN(WARN_UTF8), - ERR_DECODE_NOMAP, "utf8", uv); + ERR_DECODE_STR_NOMAP, (strict ? "UTF-8" : "utf8"), esc); } if (check & ENCODE_RETURN_ON_ERR) { break; } if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ - SV* subchar = - (fallback_cb != &PL_sv_undef) - ? do_fallback_cb(aTHX_ uv, fallback_cb) - : newSVpvf(check & ENCODE_PERLQQ - ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") - : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" - : "&#x%" UVxf ";", uv); - if (encode){ - SvUTF8_off(subchar); /* make sure no decoded string gets in */ - } - dlen += SvCUR(subchar) - ulen; + STRLEN sublen; + char *substr; + SV* subchar; + if (encode) { + subchar = + (fallback_cb != &PL_sv_undef) + ? do_fallback_cb(aTHX_ uv, fallback_cb) + : newSVpvf(check & ENCODE_PERLQQ + ? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") + : check & ENCODE_HTMLCREF ? "&#%" UVuf ";" + : "&#x%" UVxf ";", uv); + substr = SvPV(subchar, sublen); + if (SvUTF8(subchar) && sublen && !utf8_to_bytes((U8 *)substr, &sublen)) { /* make sure no decoded string gets in */ + SvREFCNT_dec(subchar); + croak("Wide character"); + } + } else { + if (fallback_cb != &PL_sv_undef) { + /* in decode mode we have sequence of wrong bytes */ + subchar = do_bytes_fallback_cb(aTHX_ s, ulen, fallback_cb); + } else { + char *ptr = esc; + /* ENCODE_PERLQQ is already stored in esc */ + if (check & (ENCODE_HTMLCREF|ENCODE_XMLCREF)) + for (i=0; i<ulen; ++i) ptr += sprintf(ptr, ((check & ENCODE_HTMLCREF) ? "&#%u;" : "&#x%02X;"), s[i]); + subchar = newSVpvn(esc, strlen(esc)); + } + substr = SvPVutf8(subchar, sublen); + } + dlen += sublen - ulen; SvCUR_set(dst, d-(U8 *)SvPVX(dst)); *SvEND(dst) = '\0'; - sv_catsv(dst, subchar); + sv_catpvn(dst, substr, sublen); SvREFCNT_dec(subchar); d = (U8 *) SvGROW(dst, dlen) + SvCUR(dst); } else { @@ -539,7 +591,7 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ PROTOTYPES: DISABLE void -Method_decode_xs(obj,src,check_sv = &PL_sv_no) +Method_decode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv @@ -551,14 +603,13 @@ PREINIT: bool renewed = 0; int check; bool modify; + dSP; INIT: SvGETMAGIC(src); SvGETMAGIC(check_sv); check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); modify = (check && !(check & ENCODE_LEAVE_SRC)); -CODE: -{ - dSP; +PPCODE: if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -600,10 +651,9 @@ CODE: if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ ST(0) = dst; XSRETURN(1); -} void -Method_encode_xs(obj,src,check_sv = &PL_sv_no) +Method_encode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv @@ -619,8 +669,7 @@ INIT: SvGETMAGIC(check_sv); check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); modify = (check && !(check & ENCODE_LEAVE_SRC)); -CODE: -{ +PPCODE: if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -673,20 +722,19 @@ CODE: if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ ST(0) = dst; XSRETURN(1); -} MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ -PROTOTYPES: ENABLE +PROTOTYPES: DISABLE -void +SV * Method_renew(obj) SV * obj CODE: -{ PERL_UNUSED_VAR(obj); - XSRETURN(1); -} + RETVAL = newSVsv(obj); +OUTPUT: + RETVAL int Method_renewed(obj) @@ -697,17 +745,19 @@ CODE: OUTPUT: RETVAL -void +SV * Method_name(obj) SV * obj +PREINIT: + encode_t *enc; +INIT: + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); - XSRETURN(1); -} + RETVAL = newSVpvn(enc->name[0], strlen(enc->name[0])); +OUTPUT: + RETVAL -void +bool Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) SV * obj SV * dst @@ -734,7 +784,6 @@ INIT: enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); offset = (STRLEN)SvIV(off); CODE: -{ if (!SvOK(src)) XSRETURN_NO; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -745,13 +794,9 @@ CODE: sv_catsv(dst, tmp); SvREFCNT_dec(tmp); SvIV_set(off, (IV)offset); - if (code == ENCODE_FOUND_TERM) { - ST(0) = &PL_sv_yes; - }else{ - ST(0) = &PL_sv_no; - } - XSRETURN(1); -} + RETVAL = (code == ENCODE_FOUND_TERM); +OUTPUT: + RETVAL SV * Method_decode(obj,src,check_sv = &PL_sv_no) @@ -773,7 +818,6 @@ INIT: modify = (check && !(check & ENCODE_LEAVE_SRC)); enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -782,7 +826,6 @@ CODE: RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); SvUTF8_on(RETVAL); -} OUTPUT: RETVAL @@ -806,7 +849,6 @@ INIT: modify = (check && !(check & ENCODE_LEAVE_SRC)); enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ if (!SvOK(src)) XSRETURN_UNDEF; s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen); @@ -814,76 +856,51 @@ CODE: utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify); RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); -} OUTPUT: RETVAL -void +bool Method_needs_lines(obj) SV * obj CODE: -{ - /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ PERL_UNUSED_VAR(obj); - ST(0) = &PL_sv_no; - XSRETURN(1); -} + RETVAL = FALSE; +OUTPUT: + RETVAL -void +bool Method_perlio_ok(obj) SV * obj PREINIT: SV *sv; CODE: -{ - /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ - /* require_pv(PERLIO_FILENAME); */ - PERL_UNUSED_VAR(obj); - eval_pv("require PerlIO::encoding", 0); - SPAGAIN; - - sv = get_sv("@", 0); - if (SvTRUE(sv)) { - ST(0) = &PL_sv_no; - }else{ - ST(0) = &PL_sv_yes; - } - XSRETURN(1); -} + sv = eval_pv("require PerlIO::encoding", 0); + RETVAL = SvTRUE(sv); +OUTPUT: + RETVAL -void +SV * Method_mime_name(obj) SV * obj PREINIT: - SV *sv; + encode_t *enc; +INIT: + enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); CODE: -{ - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - SV *retval; - eval_pv("require Encode::MIME::Name", 0); + ENTER; + SAVETMPS; + PUSHMARK(sp); + XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); + PUTBACK; + call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); SPAGAIN; - - sv = get_sv("@", 0); - if (SvTRUE(sv)) { - ST(0) = &PL_sv_undef; - }else{ - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); - PUTBACK; - call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); - SPAGAIN; - retval = newSVsv(POPs); - PUTBACK; - FREETMPS; - LEAVE; - /* enc->name[0] */ - ST(0) = retval; - } - XSRETURN(1); -} + RETVAL = newSVsv(POPs); + PUTBACK; + FREETMPS; + LEAVE; +OUTPUT: + RETVAL MODULE = Encode PACKAGE = Encode @@ -892,10 +909,11 @@ PROTOTYPES: ENABLE I32 _bytes_to_utf8(sv, ...) SV * sv +PREINIT: + SV * encoding; +INIT: + encoding = items == 2 ? ST(1) : Nullsv; CODE: -{ - SV * encoding = items == 2 ? ST(1) : Nullsv; - if (encoding) RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); else { @@ -909,18 +927,19 @@ CODE: Safefree(converted); /* ... so free it */ RETVAL = len; } -} OUTPUT: RETVAL I32 _utf8_to_bytes(sv, ...) SV * sv +PREINIT: + SV * to; + SV * check; +INIT: + to = items > 1 ? ST(1) : Nullsv; + check = items > 2 ? ST(2) : Nullsv; CODE: -{ - SV * to = items > 1 ? ST(1) : Nullsv; - SV * check = items > 2 ? ST(2) : Nullsv; - if (to) { RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); } else { @@ -980,7 +999,6 @@ CODE: RETVAL = (utf8_to_bytes(s, &len) ? len : 0); } } -} OUTPUT: RETVAL @@ -992,13 +1010,11 @@ PREINIT: char *str; STRLEN len; CODE: -{ SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */ str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */ RETVAL = SvUTF8(sv) ? TRUE : FALSE; if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len))) RETVAL = FALSE; -} OUTPUT: RETVAL @@ -1006,17 +1022,15 @@ SV * _utf8_on(sv) SV * sv CODE: -{ SvGETMAGIC(sv); if (!SvTAINTED(sv) && SvPOKp(sv)) { if (SvTHINKFIRST(sv)) sv_force_normal(sv); - RETVAL = newSViv(SvUTF8(sv)); + RETVAL = boolSV(SvUTF8(sv)); SvUTF8_on(sv); SvSETMAGIC(sv); } else { RETVAL = &PL_sv_undef; } -} OUTPUT: RETVAL @@ -1024,20 +1038,25 @@ SV * _utf8_off(sv) SV * sv CODE: -{ SvGETMAGIC(sv); if (!SvTAINTED(sv) && SvPOKp(sv)) { if (SvTHINKFIRST(sv)) sv_force_normal(sv); - RETVAL = newSViv(SvUTF8(sv)); + RETVAL = boolSV(SvUTF8(sv)); SvUTF8_off(sv); SvSETMAGIC(sv); } else { RETVAL = &PL_sv_undef; } -} OUTPUT: RETVAL +void +onBOOT() +CODE: +{ +#include "def_t.exh" +} + BOOT: { HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD); @@ -1057,6 +1076,3 @@ BOOT: newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF)); newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF)); } -{ -#include "def_t.exh" -} diff --git a/cpan/Encode/Makefile.PL b/cpan/Encode/Makefile.PL index 8203105247..8b801443d8 100644 --- a/cpan/Encode/Makefile.PL +++ b/cpan/Encode/Makefile.PL @@ -1,9 +1,10 @@ # -# $Id: Makefile.PL,v 2.18 2016/11/29 23:29:23 dankogai Exp dankogai $ +# $Id: Makefile.PL,v 2.21 2017/07/18 07:15:29 dankogai Exp dankogai $ # use 5.007003; use strict; use warnings; +use utf8; use ExtUtils::MakeMaker; use File::Spec; use Config; @@ -15,9 +16,12 @@ $ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE}; # similar strictness as in core my $ccflags = $Config{ccflags}; if (!$ENV{PERL_CORE}) { - if ($Config{gccversion}) { - $ccflags .= ' -Werror=declaration-after-statement'; - $ccflags .= ' -Wpointer-sign' unless $Config{d_cplusplus}; + if (my $gccver = $Config{gccversion}) { + $gccver =~ s/\.//g; $gccver =~ s/ .*//; + $gccver .= "0" while length $gccver < 3; + $gccver = 0+$gccver; + $ccflags .= ' -Werror=declaration-after-statement' if $gccver > 400; + $ccflags .= ' -Wpointer-sign' if !$Config{d_cplusplus} and $gccver > 400; $ccflags .= ' -fpermissive' if $Config{d_cplusplus}; } } @@ -49,6 +53,8 @@ WriteMakefile( NAME => "Encode", EXE_FILES => \@exe_files, VERSION_FROM => 'Encode.pm', + ABSTRACT_FROM=> 'Encode.pm', + AUTHOR => 'Dan Kogai <dankogai@dan.co.jp>', OBJECT => '$(O_FILES)', 'dist' => { COMPRESS => 'gzip -9f', @@ -61,6 +67,7 @@ WriteMakefile( PREREQ_PM => { Exporter => '5.57', # use Exporter 'import'; parent => '0.221', # version bundled with 5.10.1 + Storable => '0', # bundled with Perl 5.7.3 }, TEST_REQUIRES => { 'Test::More' => '0.81_01', @@ -71,6 +78,91 @@ WriteMakefile( resources => { repository => 'https://github.com/dankogai/p5-encode', }, + x_contributors => [ + 'Alex Davies <alex.davies@talktalk.net>', + 'Alex Kapranoff <alex@kapranoff.ru>', + 'Alex Vandiver <alex@chmrr.net>', + 'Andreas J. Koenig <andreas.koenig@anima.de>', + 'Andrew Pennebaker <andrew.pennebaker@networkedinsights.com>', + 'Andy Grundman <andyg@activestate.com>', + 'Anton Tagunov <tagunov@motor.ru>', + 'Autrijus Tang <autrijus@autrijus.org>', + 'Benjamin Goldberg <goldbb2@earthlink.net>', + 'Bjoern Hoehrmann <derhoermi@gmx.net>', + 'Bjoern Jacke <debianbugs@j3e.de>', + 'bulk88 <bulk88@hotmail.com>', + 'Craig A. Berry <craigberry@mac.com>', + 'Curtis Jewell <csjewell@cpan.org>', + 'Dan Kogai <dankogai@dan.co.jp>', + 'Dave Evans <dave@rudolf.org.uk>', + 'David Golden <dagolden@cpan.org>', + 'David Steinbrunner <dsteinbrunner@pobox.com>', + 'Deng Liu <dengliu@ntu.edu.tw>', + 'Dominic Dunlop <domo@computer.org>', + 'drry', + 'Elizabeth Mattijsen <liz@dijkmat.nl>', + 'Flavio Poletti <flavio@polettix.it>', + 'Gerrit P. Haase <gp@familiehaase.de>', + 'Gisle Aas <gisle@ActiveState.com>', + 'Graham Barr <gbarr@pobox.com>', + 'Graham Knop <haarg@haarg.org>', + 'Graham Ollis <perl@wdlabs.com>', + 'Gurusamy Sarathy <gsar@activestate.com>', + 'H.Merijn Brand <h.m.brand@xs4all.nl>', + 'Hugo van der Sanden <hv@crypt.org>', + 'chansen <chansen@cpan.org>', + 'Chris Nandor <pudge@pobox.com>', + 'Inaba Hiroto <inaba@st.rim.or.jp>', + 'Jarkko Hietaniemi <jhi@iki.fi>', + 'Jesse Vincent <jesse@fsck.com>', + 'Jungshik Shin <jshin@mailaps.org>', + 'Karen Etheridge <ether@cpan.org>', + 'Karl Williamson <khw@cpan.org>', + 'Kenichi Ishigaki <ishigaki@cpan.org>', + 'KONNO Hiroharu <hiroharu.konno@bowneglobal.co.jp>', + 'Laszlo Molnar <ml1050@freemail.hu>', + 'Makamaka <makamaka@donzoko.net>', + 'Mark-Jason Dominus <mjd@plover.com>', + 'Masahiro Iuchi <masahiro.iuchi@gmail.com>', + 'MATSUNO Tokuhiro <tokuhirom+cpan@gmail.com>', + 'Mattia Barbon <mbarbon@dsi.unive.it>', + 'Michael G Schwern <schwern@pobox.com>', + 'Michael LaGrasta <michael@lagrasta.com>', + 'Miron Cuperman <miron@hyper.to>', + 'Moritz Lenz <moritz@faui2k3.org>', + 'MORIYAMA Masayuki <msyk@mtg.biglobe.ne.jp>', + 'Nick Ing-Simmons <nick@ing-simmons.net>', + 'Nicholas Clark <nick@ccl4.org>', + 'Olivier Mengué <dolmen@cpan.org>', + 'otsune', + 'Pali <pali@cpan.org>', + 'Paul Marquess <paul_marquess@yahoo.co.uk>', + 'Peter Prymmer <pvhp@best.com>', + 'Peter Rabbitson <ribasushi@cpan.org>', + 'Philip Newton <pne@cpan.org>', + 'Piotr Fusik <pfusik@op.pl>', + 'Rafael Garcia-Suarez <rgarciasuarez@mandriva.com>', + 'Randy Stauner <randy@magnificent-tears.com>', + 'Reini Urban <rurban@cpan.org>', + 'Robin Barker <rmb1@cise.npl.co.uk>', + 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>', + 'Simon Cozens <simon@netthink.co.uk>', + 'Slaven Rezic <SREZIC@cpan.org>', + 'Spider Boardman <spider@web.zk3.dec.com>', + 'Steve Hay <steve.m.hay@googlemail.com>', + 'Steve Peters <steve@fisharerojo.org>', + 'SUGAWARA Hajime <sugawara@hdt.co.jp>', + 'SUZUKI Norio <ZAP00217@nifty.com>', + 'szr8 <blz.marcel@gmail.com>', + 'Tatsuhiko Miyagawa <miyagawa@bulknews.net>', + 'Tels <perl_dummy@bloodgate.com>', + 'Tony Cook <tony@develop-help.com>', + 'Vadim Konovalov <vkonovalov@peterstar.ru>', + 'Victor <victor@vsespb.ru>', + 'Ville Skyttä <ville.skytta@iki.fi>', + 'Vincent van Dam <vvandam@sandvine.com>', + 'Yitzchak Scott-Thoennes <sthoenna@efn.org>', + ], }, ); diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm index fc1d3d1382..c56745d7b1 100644 --- a/cpan/Encode/Unicode/Unicode.pm +++ b/cpan/Encode/Unicode/Unicode.pm @@ -2,9 +2,8 @@ package Encode::Unicode; use strict; use warnings; -no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.15_01 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -13,7 +12,7 @@ XSLoader::load( __PACKAGE__, $VERSION ); # Object Generator 8 transcoders all at once! # -require Encode; +use Encode (); our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32); @@ -34,12 +33,13 @@ for my $name ( $endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : ''; $size == 4 and $endian = uc($endian); - $Encode::Encoding{$name} = bless { + my $obj = bless { Name => $name, size => $size, endian => $endian, ucs2 => $ucs2, } => __PACKAGE__; + Encode::define_encoding($obj, $name); } use parent qw(Encode::Encoding); @@ -52,12 +52,6 @@ sub renew { return $clone; } -# There used to be a perl implementation of (en|de)code but with -# XS version is ripe, perl version is zapped for optimal speed - -*decode = \&decode_xs; -*encode = \&encode_xs; - 1; __END__ diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs index 117e14d83f..b3b1d2fea8 100644 --- a/cpan/Encode/Unicode/Unicode.xs +++ b/cpan/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.15 2016/11/29 23:29:23 dankogai Exp dankogai $ + $Id: Unicode.xs,v 2.16 2017/06/10 17:23:50 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -127,7 +127,7 @@ PROTOTYPES: DISABLE *hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef) void -decode_xs(obj, str, check = 0) +decode(obj, str, check = 0) SV * obj SV * str IV check @@ -345,7 +345,7 @@ CODE: } void -encode_xs(obj, utf8, check = 0) +encode(obj, utf8, check = 0) SV * obj SV * utf8 IV check diff --git a/cpan/Encode/bin/enc2xs b/cpan/Encode/bin/enc2xs index bd39639ae8..619b64b757 100644 --- a/cpan/Encode/bin/enc2xs +++ b/cpan/Encode/bin/enc2xs @@ -11,7 +11,7 @@ use warnings; use Getopt::Std; use Config; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 2.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter @@ -1038,8 +1038,7 @@ sub find_e2x{ sub make_makefile_pl { - eval { require Encode; }; - $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n"; + eval { require Encode } or die "You need to install Encode to use enc2xs -M\nerror: $@\n"; # our used for variable expansion $_Enc2xs = $0; $_Version = $VERSION; @@ -1063,8 +1062,7 @@ use vars qw( ); sub make_configlocal_pm { - eval { require Encode; }; - $@ and die "Unable to require Encode: $@\n"; + eval { require Encode } or die "Unable to require Encode: $@\n"; eval { require File::Spec; }; # our used for variable expantion @@ -1084,8 +1082,7 @@ sub make_configlocal_pm { $mod =~ s/.*\bEncode\b/Encode/o; $mod =~ s/\.pm\z//o; $mod =~ s,/,::,og; - eval qq{ require $mod; }; - return if $@; + eval qq{ require $mod; } or return; warn qq{ require $mod;\n}; for my $enc ( Encode->encodings() ) { no warnings; @@ -1119,8 +1116,7 @@ sub _mkversion{ } sub _print_expand{ - eval { require File::Basename; }; - $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; + eval { require File::Basename } or die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; File::Basename->import(); my ($src, $dst, $clobber) = @_; if (!$clobber and -e $dst){ diff --git a/cpan/Encode/bin/ucmlint b/cpan/Encode/bin/ucmlint index a240f2c75e..a31a7a28f6 100644 --- a/cpan/Encode/bin/ucmlint +++ b/cpan/Encode/bin/ucmlint @@ -1,19 +1,18 @@ #!/usr/local/bin/perl # -# $Id: ucmlint,v 2.3 2016/08/04 03:15:58 dankogai Exp $ +# $Id: ucmlint,v 2.4 2017/06/10 17:23:50 dankogai Exp $ # BEGIN { pop @INC if $INC[-1] eq '.' } use strict; -our $VERSION = do { my @r = (q$Revision: 2.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.4 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Getopt::Std; our %Opt; getopts("Dehfv", \%Opt); if ($Opt{e}){ - eval{ require Encode; }; - $@ and die "can't load Encode : $@"; + eval { require Encode } or die "can't load Encode : $@"; } $Opt{h} and help(); diff --git a/cpan/Encode/encoding.pm b/cpan/Encode/encoding.pm index dc342683ee..7cd9eb2949 100644 --- a/cpan/Encode/encoding.pm +++ b/cpan/Encode/encoding.pm @@ -1,15 +1,16 @@ -# $Id: encoding.pm,v 2.19 2016/11/01 13:30:38 dankogai Exp $ +# $Id: encoding.pm,v 2.20 2017/06/10 17:23:50 dankogai Exp $ package encoding; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.19 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.20 $ =~ /(\d+)/g; use Encode; use strict; use warnings; +use Config; use constant { DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, - PERL_5_21_7 => $^V && $^V ge v5.21.7, + PERL_5_21_7 => $^V && $^V ge v5.21.7, # lexically scoped }; sub _exception { @@ -115,7 +116,8 @@ sub import { } my $deprecate = - $] >= 5.017 ? "Use of the encoding pragma is deprecated" : 0; + ($] >= 5.017 and !$Config{usecperl}) + ? "Use of the encoding pragma is deprecated" : 0; my $class = shift; my $name = shift; @@ -132,6 +134,7 @@ sub import { return; } $name = _get_locale_encoding() if $name eq ':locale'; + BEGIN { strict->unimport('hashpairs') if $] >= 5.027 and $^V =~ /c$/; } my %arg = @_; $name = $ENV{PERL_ENCODING} unless defined $name; my $enc = find_encoding($name); @@ -141,9 +144,9 @@ sub import { } $name = $enc->name; # canonize unless ( $arg{Filter} ) { - if ($] >= 5.025003) { + if ($] >= 5.025003 and !$Config{usecperl}) { require Carp; - Carp::croak("The encoding pragma is no longer supported"); + Carp::croak("The encoding pragma is no longer supported. Check cperl"); } warnings::warnif("deprecated",$deprecate) if $deprecate; @@ -186,8 +189,8 @@ sub import { $status; } ); - }; - $@ eq '' and DEBUG and warn "Filter installed"; + 1; + } and DEBUG and warn "Filter installed"; } defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; for my $h (qw(STDIN STDOUT)) { @@ -368,7 +371,7 @@ Note that C<STDERR> WILL NOT be changed, regardless. Also note that non-STD file handles remain unaffected. Use C<use open> or C<binmode> to change the layers of those. -=item C<use encoding I<ENCNAME> Filter=E<gt>1;> +=item C<use encoding I<ENCNAME>, Filter=E<gt>1;> This operates as above, but the C<Filter> argument with a non-zero value causes the entire script, and not just literals, to be translated from diff --git a/cpan/Encode/lib/Encode/Alias.pm b/cpan/Encode/lib/Encode/Alias.pm index 0a252560f5..6dcd112a40 100644 --- a/cpan/Encode/lib/Encode/Alias.pm +++ b/cpan/Encode/lib/Encode/Alias.pm @@ -1,8 +1,7 @@ package Encode::Alias; use strict; use warnings; -no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.21 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use Exporter 'import'; @@ -19,7 +18,6 @@ our @Alias; # ordered matching list our %Alias; # cached known aliases sub find_alias { - require Encode; my $class = shift; my $find = shift; unless ( exists $Alias{$find} ) { @@ -109,6 +107,9 @@ sub define_alias { } } +# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias +use Encode (); + # Allow latin-1 style names as well # 0 1 2 3 4 5 6 7 8 9 10 our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); @@ -134,7 +135,6 @@ sub undef_aliases { } sub init_aliases { - require Encode; undef_aliases(); # Try all-lower-case version should all else fails diff --git a/cpan/Encode/lib/Encode/CN/HZ.pm b/cpan/Encode/lib/Encode/CN/HZ.pm index 4510b0b400..a0dc59d153 100644 --- a/cpan/Encode/lib/Encode/CN/HZ.pm +++ b/cpan/Encode/lib/Encode/CN/HZ.pm @@ -5,7 +5,7 @@ use warnings; use utf8 (); use vars qw($VERSION); -$VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -21,6 +21,7 @@ sub needs_lines { 1 } sub decode ($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $GB = Encode::find_encoding('gb2312-raw'); my $ret = substr($str, 0, 0); # to propagate taintedness @@ -135,6 +136,7 @@ sub cat_decode { sub encode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $GB = Encode::find_encoding('gb2312-raw'); my $ret = substr($str, 0, 0); # to propagate taintedness; diff --git a/cpan/Encode/lib/Encode/Encoding.pm b/cpan/Encode/lib/Encode/Encoding.pm index 39d2e0ab64..815937f455 100644 --- a/cpan/Encode/lib/Encode/Encoding.pm +++ b/cpan/Encode/lib/Encode/Encoding.pm @@ -3,11 +3,15 @@ package Encode::Encoding; # Base class for classes which implement encodings use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; -require Encode; +our @CARP_NOT = qw(Encode Encode::Encoder); -sub DEBUG { 0 } +use Carp (); +use Encode (); +use Encode::MIME::Name; + +use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; sub Define { my $obj = shift; @@ -20,13 +24,10 @@ sub Define { sub name { return shift->{'Name'} } -sub mime_name{ - require Encode::MIME::Name; +sub mime_name { return Encode::MIME::Name::get_mime_name(shift->name); } -# sub renew { return $_[0] } - sub renew { my $self = shift; my $clone = bless {%$self} => ref($self); @@ -42,8 +43,7 @@ sub renewed { return $_[0]->{renewed} || 0 } sub needs_lines { 0 } sub perlio_ok { - eval { require PerlIO::encoding }; - return $@ ? 0 : 1; + return eval { require PerlIO::encoding } ? 1 : 0; } # (Temporary|legacy) methods @@ -56,14 +56,12 @@ sub fromUnicode { shift->encode(@_) } # sub encode { - require Carp; my $obj = shift; my $class = ref($obj) ? ref($obj) : $obj; Carp::croak( $class . "->encode() not defined!" ); } sub decode { - require Carp; my $obj = shift; my $class = ref($obj) ? ref($obj) : $obj; Carp::croak( $class . "->encode() not defined!" ); @@ -188,7 +186,6 @@ MUST return the string representing the canonical name of the encoding. Predefined As: sub mime_name{ - require Encode::MIME::Name; return Encode::MIME::Name::get_mime_name(shift->name); } @@ -226,8 +223,7 @@ unless the value is numeric so return 0 for false. Predefined As: sub perlio_ok { - eval{ require PerlIO::encoding }; - return $@ ? 0 : 1; + return eval { require PerlIO::encoding } ? 1 : 0; } If your encoding does not support PerlIO for some reasons, just; diff --git a/cpan/Encode/lib/Encode/GSM0338.pm b/cpan/Encode/lib/Encode/GSM0338.pm index 20257a1cbd..e87141ebc4 100644 --- a/cpan/Encode/lib/Encode/GSM0338.pm +++ b/cpan/Encode/lib/Encode/GSM0338.pm @@ -1,5 +1,5 @@ # -# $Id: GSM0338.pm,v 2.5 2013/09/14 07:51:59 dankogai Exp $ +# $Id: GSM0338.pm,v 2.7 2017/06/10 17:23:50 dankogai Exp $ # package Encode::GSM0338; @@ -8,7 +8,7 @@ use warnings; use Carp; use vars qw($VERSION); -$VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -171,6 +171,7 @@ our $NBSP = "\x{00A0}"; sub decode ($$;$) { my ( $obj, $bytes, $chk ) = @_; + return undef unless defined $bytes; my $str = substr($bytes, 0, 0); # to propagate taintedness; while ( length $bytes ) { my $c = substr( $bytes, 0, 1, '' ); @@ -216,6 +217,7 @@ sub decode ($$;$) { sub encode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $bytes = substr($str, 0, 0); # to propagate taintedness while ( length $str ) { my $u = substr( $str, 0, 1, '' ); @@ -270,10 +272,9 @@ expression with C<eval {}> block as follows; eval { $utf8 = decode("gsm0338", $gsm0338, $chk); - }; - if ($@){ + } or do { # handle exception here - } + }; =head1 BUGS diff --git a/cpan/Encode/lib/Encode/Guess.pm b/cpan/Encode/lib/Encode/Guess.pm index b44daf59eb..41fc19b799 100644 --- a/cpan/Encode/lib/Encode/Guess.pm +++ b/cpan/Encode/lib/Encode/Guess.pm @@ -2,15 +2,16 @@ package Encode::Guess; use strict; use warnings; use Encode qw(:fallbacks find_encoding); -our $VERSION = do { my @r = ( q$Revision: 2.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; my $Canon = 'Guess'; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; our %DEF_SUSPECTS = map { $_ => find_encoding($_) } qw(ascii utf8); -$Encode::Encoding{$Canon} = bless { +my $obj = bless { Name => $Canon, Suspects => {%DEF_SUSPECTS}, } => __PACKAGE__; +Encode::define_encoding($obj, $Canon); use parent qw(Encode::Encoding); sub needs_lines { 1 } diff --git a/cpan/Encode/lib/Encode/JP/JIS7.pm b/cpan/Encode/lib/Encode/JP/JIS7.pm index 588389a034..a0629a3690 100644 --- a/cpan/Encode/lib/Encode/JP/JIS7.pm +++ b/cpan/Encode/lib/Encode/JP/JIS7.pm @@ -1,7 +1,7 @@ package Encode::JP::JIS7; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.5 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -9,11 +9,12 @@ for my $name ( '7bit-jis', 'iso-2022-jp', 'iso-2022-jp-1' ) { my $h2z = ( $name eq '7bit-jis' ) ? 0 : 1; my $jis0212 = ( $name eq 'iso-2022-jp' ) ? 0 : 1; - $Encode::Encoding{$name} = bless { + my $obj = bless { Name => $name, h2z => $h2z, jis0212 => $jis0212, } => __PACKAGE__; + Encode::define_encoding($obj, $name); } use parent qw(Encode::Encoding); @@ -29,6 +30,7 @@ use Encode::CJKConstants qw(:all); sub decode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $residue = ''; if ($chk) { $str =~ s/([^\x00-\x7f].*)$//so and $residue = $1; @@ -45,6 +47,7 @@ sub decode($$;$) { sub encode($$;$) { require Encode::JP::H2Z; my ( $obj, $utf8, $chk ) = @_; + return undef unless defined $utf8; # empty the input string in the stack so perlio is ok $_[1] = '' if $chk; diff --git a/cpan/Encode/lib/Encode/KR/2022_KR.pm b/cpan/Encode/lib/Encode/KR/2022_KR.pm index 44373e5d58..122326403b 100644 --- a/cpan/Encode/lib/Encode/KR/2022_KR.pm +++ b/cpan/Encode/lib/Encode/KR/2022_KR.pm @@ -1,7 +1,7 @@ package Encode::KR::2022_KR; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.4 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -16,6 +16,7 @@ sub perlio_ok { sub decode { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $res = $str; my $residue = iso_euc( \$res ); @@ -26,6 +27,7 @@ sub decode { sub encode { my ( $obj, $utf8, $chk ) = @_; + return undef unless defined $utf8; # empty the input string in the stack so perlio is ok $_[1] = '' if $chk; diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm index ad14dba374..e23abffe37 100644 --- a/cpan/Encode/lib/Encode/MIME/Header.pm +++ b/cpan/Encode/lib/Encode/MIME/Header.pm @@ -2,7 +2,7 @@ package Encode::MIME::Header; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.27 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Carp (); use Encode (); @@ -16,24 +16,28 @@ my %seed = ( bpl => 75, # bytes per line ); -$Encode::Encoding{'MIME-Header'} = bless { +my @objs; + +push @objs, bless { %seed, Name => 'MIME-Header', } => __PACKAGE__; -$Encode::Encoding{'MIME-B'} = bless { +push @objs, bless { %seed, decode_q => 0, Name => 'MIME-B', } => __PACKAGE__; -$Encode::Encoding{'MIME-Q'} = bless { +push @objs, bless { %seed, decode_b => 0, encode => 'Q', Name => 'MIME-Q', } => __PACKAGE__; +Encode::define_encoding($_, $_->{Name}) foreach @objs; + use parent qw(Encode::Encoding); sub needs_lines { 1 } @@ -52,7 +56,7 @@ my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($ my $re_encoding_strict_b = qr/[Bb]/; my $re_encoding_strict_q = qr/[Qq]/; my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/; -my $re_encoded_text_strict_q = qr/(?:[^\?\s=]|=[0-9A-Fa-f]{2})*/; +my $re_encoded_text_strict_q = qr/(?:[\x21-\x3C\x3E\x40-\x7E]|=[0-9A-Fa-f]{2})*/; # NOTE: first part are printable US-ASCII except ?, =, SPACE and TAB my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/; my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/; @@ -74,6 +78,7 @@ our $STRICT_DECODE = 0; sub decode($$;$) { my ($obj, $str, $chk) = @_; + return undef unless defined $str; my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match; my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture; @@ -194,7 +199,6 @@ sub _decode_q { sub _decode_octets { my ($enc, $octets, $chk) = @_; $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk; - local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller my $output = $enc->decode($octets, $chk); return undef if not ref $chk and $chk and $octets ne ''; return $output; @@ -202,6 +206,7 @@ sub _decode_octets { sub encode($$;$) { my ($obj, $str, $chk) = @_; + return undef unless defined $str; my $output = $obj->_fold_line($obj->_encode_string($str, $chk)); $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); return $output . substr($str, 0, 0); # to propagate taintedness @@ -237,11 +242,7 @@ sub _encode_string { my @result = (); my $octets = ''; while ( length( my $chr = substr($str, 0, 1, '') ) ) { - my $seq; - { - local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller - $seq = $enc->encode($chr, $enc_chk); - } + my $seq = $enc->encode($chr, $enc_chk); if ( not length($seq) ) { substr($str, 0, 0, $chr); last; diff --git a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm index 86e66c371c..dc1e4275f0 100644 --- a/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm +++ b/cpan/Encode/lib/Encode/MIME/Header/ISO_2022_JP.pm @@ -5,16 +5,17 @@ use warnings; use parent qw(Encode::MIME::Header); -$Encode::Encoding{'MIME-Header-ISO_2022_JP'} = +my $obj = bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => __PACKAGE__; +Encode::define_encoding($obj, 'MIME-Header-ISO_2022_JP'); use constant HEAD => '=?ISO-2022-JP?B?'; use constant TAIL => '?='; use Encode::CJKConstants qw(%RE); -our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 1.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; # I owe the below codes totally to # Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64 @@ -22,6 +23,7 @@ our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%0 sub encode { my $self = shift; my $str = shift; + return undef unless defined $str; utf8::encode($str) if ( Encode::is_utf8($str) ); Encode::from_to( $str, 'utf8', 'euc-jp' ); diff --git a/cpan/Encode/lib/Encode/Unicode/UTF7.pm b/cpan/Encode/lib/Encode/Unicode/UTF7.pm index d5d86e2f90..e68647755f 100644 --- a/cpan/Encode/lib/Encode/Unicode/UTF7.pm +++ b/cpan/Encode/lib/Encode/Unicode/UTF7.pm @@ -1,15 +1,14 @@ # -# $Id: UTF7.pm,v 2.8 2013/09/14 07:51:59 dankogai Exp $ +# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $ # package Encode::Unicode::UTF7; use strict; use warnings; -no warnings 'redefine'; use parent qw(Encode::Encoding); __PACKAGE__->Define('UTF-7'); -our $VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use MIME::Base64; -use Encode; +use Encode qw(find_encoding); # # Algorithms taken from Unicode::String by Gisle Aas @@ -30,6 +29,7 @@ sub needs_lines { 1 } sub encode($$;$) { my ( $obj, $str, $chk ) = @_; + return undef unless defined $str; my $len = length($str); pos($str) = 0; my $bytes = substr($str, 0, 0); # to propagate taintedness @@ -61,6 +61,7 @@ sub encode($$;$) { sub decode($$;$) { use re 'taint'; my ( $obj, $bytes, $chk ) = @_; + return undef unless defined $bytes; my $len = length($bytes); my $str = substr($bytes, 0, 0); # to propagate taintedness; pos($bytes) = 0; diff --git a/cpan/Encode/t/CJKT.t b/cpan/Encode/t/CJKT.t index 1648b1e5fd..264daf072f 100644 --- a/cpan/Encode/t/CJKT.t +++ b/cpan/Encode/t/CJKT.t @@ -57,8 +57,7 @@ for my $charset (sort keys %Charset){ $txt = join('',<$src>); close($src); - eval{ $uni = $transcoder->decode($txt, 1) }; - $@ and print $@; + eval { $uni = $transcoder->decode($txt, 1) } or print $@; ok(defined($uni), "decode $charset"); $seq++; is(length($txt),0, "decode $charset completely"); $seq++; @@ -89,8 +88,7 @@ for my $charset (sort keys %Charset){ close $src; my $unisave = $uni; - eval{ $txt = $transcoder->encode($uni,1) }; - $@ and print $@; + eval { $txt = $transcoder->encode($uni,1) } or print $@; ok(defined($txt), "encode $charset"); $seq++; is(length($uni), 0, "encode $charset completely"); $seq++; $uni = $unisave; diff --git a/cpan/Encode/t/enc_data.t b/cpan/Encode/t/enc_data.t index 2ead16ea95..e610b0d10e 100644 --- a/cpan/Encode/t/enc_data.t +++ b/cpan/Encode/t/enc_data.t @@ -1,4 +1,4 @@ -# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $ +# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $ BEGIN { require Config; import Config; diff --git a/cpan/Encode/t/enc_eucjp.t b/cpan/Encode/t/enc_eucjp.t index 9b32459792..fc0af3cf33 100644 --- a/cpan/Encode/t/enc_eucjp.t +++ b/cpan/Encode/t/enc_eucjp.t @@ -1,4 +1,4 @@ -# $Id: enc_eucjp.t,v 2.3 2016/08/10 18:08:45 dankogai Exp $ +# $Id: enc_eucjp.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $ # This is the twin of enc_utf8.t . BEGIN { @@ -19,8 +19,8 @@ BEGIN { print "1..0 # Skip: Perl 5.8.1 or later required\n"; exit 0; } - if ($] >= 5.025003){ - print "1..0 # Skip: Perl 5.25.2 or lower required\n"; + if ($] >= 5.025003 and !$Config{usecperl}){ + print "1..0 # Skip: Perl <=5.25.2 or cperl required\n"; exit 0; } } @@ -30,7 +30,7 @@ use encoding 'euc-jp'; my @c = (127, 128, 255, 256); -print "1.." . (scalar @c + 1) . "\n"; +print "1.." . (scalar @c + 2) . "\n"; my @f; @@ -65,7 +65,19 @@ binmode(F, ":raw"); # Output raw bytes. print F chr(128); # Output illegal UTF-8. close F; open(F, $f) or die "$0: failed to open '$f' for reading: $!"; -binmode(F, ":encoding(utf-8)"); +binmode(F, ":encoding(UTF-8)"); +{ + local $^W = 1; + local $SIG{__WARN__} = sub { $a = shift }; + eval { <F> }; # This should get caught. +} +close F; +print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ? + "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n"; +$t++; + +open(F, $f) or die "$0: failed to open '$f' for reading: $!"; +binmode(F, ":encoding(utf8)"); { local $^W = 1; local $SIG{__WARN__} = sub { $a = shift }; @@ -74,6 +86,7 @@ binmode(F, ":encoding(utf-8)"); close F; print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ? "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n"; +$t++; # On VMS temporary file names like "f0." may be more readable than "f0" since # "f0" could be a logical name pointing elsewhere. diff --git a/cpan/Encode/t/enc_module.t b/cpan/Encode/t/enc_module.t index 7d7382b903..fd6e6dcde6 100644 --- a/cpan/Encode/t/enc_module.t +++ b/cpan/Encode/t/enc_module.t @@ -1,4 +1,4 @@ -# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $ +# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $ # This file is in euc-jp BEGIN { require Config; import Config; diff --git a/cpan/Encode/t/enc_utf8.t b/cpan/Encode/t/enc_utf8.t index b07c573960..be7d487804 100644 --- a/cpan/Encode/t/enc_utf8.t +++ b/cpan/Encode/t/enc_utf8.t @@ -1,4 +1,4 @@ -# $Id: enc_utf8.t,v 2.3 2016/08/10 18:08:45 dankogai Exp $ +# $Id: enc_utf8.t,v 2.5 2017/06/10 17:23:50 dankogai Exp $ # This is the twin of enc_eucjp.t . BEGIN { @@ -15,8 +15,8 @@ BEGIN { print "1..0 # encoding pragma does not support EBCDIC platforms\n"; exit(0); } - if ($] >= 5.025003){ - print "1..0 # Skip: Perl 5.25.2 or lower required\n"; + if ($] >= 5.025003 and !$Config{usecperl}){ + print "1..0 # Skip: Perl <=5.25.2 or cperl required\n"; exit 0; } } @@ -26,7 +26,7 @@ use encoding 'utf8'; my @c = (127, 128, 255, 256); -print "1.." . (scalar @c + 1) . "\n"; +print "1.." . (scalar @c + 2) . "\n"; my @f; @@ -59,7 +59,19 @@ binmode(F, ":raw"); # Output raw bytes. print F chr(128); # Output illegal UTF-8. close F; open(F, $f) or die "$0: failed to open '$f' for reading: $!"; -binmode(F, ":encoding(utf-8)"); +binmode(F, ":encoding(UTF-8)"); +{ + local $^W = 1; + local $SIG{__WARN__} = sub { $a = shift }; + eval { <F> }; # This should get caught. +} +close F; +print $a =~ qr{^UTF-8 "\\x80" does not map to Unicode} ? + "ok $t - illegal UTF-8 input\n" : "not ok $t - illegal UTF-8 input: a = " . unpack("H*", $a) . "\n"; +$t++; + +open(F, $f) or die "$0: failed to open '$f' for reading: $!"; +binmode(F, ":encoding(utf8)"); { local $^W = 1; local $SIG{__WARN__} = sub { $a = shift }; @@ -68,6 +80,7 @@ binmode(F, ":encoding(utf-8)"); close F; print $a =~ qr{^utf8 "\\x80" does not map to Unicode} ? "ok $t - illegal utf8 input\n" : "not ok $t - illegal utf8 input: a = " . unpack("H*", $a) . "\n"; +$t++; # On VMS temporary file names like "f0." may be more readable than "f0" since # "f0" could be a logical name pointing elsewhere. diff --git a/cpan/Encode/t/fallback.t b/cpan/Encode/t/fallback.t index 86605ef3b8..011c86dbfc 100644 --- a/cpan/Encode/t/fallback.t +++ b/cpan/Encode/t/fallback.t @@ -17,7 +17,7 @@ BEGIN { use strict; #use Test::More qw(no_plan); -use Test::More tests => 50; +use Test::More tests => 58; use Encode q(:all); my $uo = ''; @@ -35,7 +35,7 @@ for my $i (0x80..0xff){ $uo .= chr($i); $residue .= chr($i); $af .= '?'; - $uf .= "\x{FFFD}" if $i < 0xfd; + $uf .= "\x{FFFD}"; $ap .= sprintf("\\x{%04x}", $i); $up .= sprintf("\\x%02X", $i); $ah .= sprintf("&#%d;", $i); @@ -50,6 +50,7 @@ my $ao = $uo; utf8::upgrade($uo); my $ascii = find_encoding('ascii'); +my $latin1 = find_encoding('latin1'); my $utf8 = find_encoding('utf8'); my $src = $uo; @@ -166,19 +167,46 @@ is($src, $ao, "coderef residue decode"); $src = "\x{3000}"; $dst = $ascii->encode($src, sub{ $_[0] }); -is $dst, 0x3000."", qq{$ascii->encode(\$src, sub{ \$_[0] } )}; +is $dst, 0x3000."", q{$ascii->encode($src, sub{ $_[0] } )}; $dst = encode("ascii", "\x{3000}", sub{ $_[0] }); -is $dst, 0x3000."", qq{encode("ascii", "\\x{3000}", sub{ \$_[0] })}; +is $dst, 0x3000."", q{encode("ascii", "\x{3000}", sub{ $_[0] })}; $src = pack "C*", 0xFF; $dst = $ascii->decode($src, sub{ $_[0] }); -is $dst, 0xFF."", qq{$ascii->encode(\$src, sub{ \$_[0] } )}; +is $dst, 0xFF."", q{$ascii->encode($src, sub{ $_[0] } )}; $dst = decode("ascii", (pack "C*", 0xFF), sub{ $_[0] }); -is $dst, 0xFF."", qq{decode("ascii", (pack "C*", 0xFF), sub{ \$_[0] })}; +is $dst, 0xFF."", q{decode("ascii", (pack "C*", 0xFF), sub{ $_[0] })}; $src = pack "C*", 0x80; $dst = $utf8->decode($src, sub{ $_[0] }); -is $dst, 0x80."", qq{$utf8->encode(\$src, sub{ \$_[0] } )}; +is $dst, 0x80."", q{$utf8->encode($src, sub{ $_[0] } )}; $dst = decode("utf8", $src, sub{ $_[0] }); -is $dst, 0x80."", qq{decode("utf8", (pack "C*", 0x80), sub{ \$_[0] })}; +is $dst, 0x80."", q{decode("utf8", (pack "C*", 0x80), sub{ $_[0] })}; + +$src = "\x{3000}"; +$dst = $latin1->encode($src, sub { "\N{U+FF}" }); +is $dst, "\x{ff}", q{$latin1->encode($src, sub { "\N{U+FF}" })}; +$dst = encode("latin1", $src, sub { "\N{U+FF}" }); +is $dst, "\x{ff}", q{encode("latin1", $src, sub { "\N{U+FF}" })}; + +$src = "\x{3000}"; +$dst = $latin1->encode($src, sub { utf8::upgrade(my $r = "\x{ff}"); $r }); +is $dst, "\x{ff}", q{$latin1->encode($src, sub { utf8::upgrade(my $r = "\x{ff}"); $r })}; +$dst = encode("latin1", $src, sub { utf8::upgrade(my $r = "\x{ff}"); $r }); +is $dst, "\x{ff}", q{encode("latin1", $src, sub { utf8::upgrade(my $r = "\x{ff}"); $r })}; + +$src = "\x{ff}"; +$dst = $utf8->decode($src, sub { chr($_[0]) }); +is $dst, "\x{ff}", q{$utf8->decode($src, sub { chr($_[0]) })}; +$dst = decode("utf8", $src, sub { chr($_[0]) }); +is $dst, "\x{ff}", q{decode("utf8", $src, sub { chr($_[0]) })}; + +{ + use charnames ':full'; + $src = "\x{ff}"; + $dst = $utf8->decode($src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r }); + is $dst, "\N{LATIN SMALL LETTER Y WITH DIAERESIS}", q{$utf8->decode($src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r })}; + $dst = decode("utf8", $src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r }); + is $dst, "\N{LATIN SMALL LETTER Y WITH DIAERESIS}", q{decode("utf8", $src, sub { utf8::downgrade(my $r = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"); $r })}; +} diff --git a/cpan/Encode/t/guess.t b/cpan/Encode/t/guess.t index 81ab91b562..896028ba8d 100644 --- a/cpan/Encode/t/guess.t +++ b/cpan/Encode/t/guess.t @@ -18,11 +18,7 @@ use Encode qw(decode encode find_encoding _utf8_off); #use Test::More qw(no_plan); use Test::More tests => 32; -use_ok("Encode::Guess"); -{ - no warnings; - $Encode::Guess::DEBUG = shift || 0; -} +BEGIN { use_ok("Encode::Guess") } my $ascii = join('' => map {chr($_)}(0x21..0x7e)); my $latin1 = join('' => map {chr($_)}(0xa1..0xfe)); diff --git a/cpan/Encode/t/jperl.t b/cpan/Encode/t/jperl.t index a0e7a379f6..5995a592ba 100644 --- a/cpan/Encode/t/jperl.t +++ b/cpan/Encode/t/jperl.t @@ -1,5 +1,5 @@ # -# $Id: jperl.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $ +# $Id: jperl.t,v 2.5 2016/11/29 23:29:23 dankogai Exp $ # # This script is written in euc-jp diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t index a997dffb41..7abb0206cb 100644 --- a/cpan/Encode/t/mime-header.t +++ b/cpan/Encode/t/mime-header.t @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 2.14 2016/11/29 23:29:23 dankogai Exp dankogai $ +# $Id: mime-header.t,v 2.15 2017/07/18 07:15:29 dankogai Exp dankogai $ # This script is written in utf8 # BEGIN { @@ -24,7 +24,7 @@ use strict; use utf8; use charnames ":full"; -use Test::More tests => 264; +use Test::More tests => 266; BEGIN { use_ok("Encode::MIME::Header"); @@ -136,6 +136,8 @@ my @decode_default_tests = ( "=?utf8?Q?=C3=A1=f9=80=80=80=80?=" => "á�", "=?UTF8?Q?=C3=A1=f9=80=80=80=80?=" => "á�", "=?utf-8-strict?Q?=C3=A1=f9=80=80=80=80?=" => "á�", + # allow non-ASCII characters in q word + "=?UTF-8?Q?\x{C3}\x{A1}?=" => "á", ); my @decode_strict_tests = ( @@ -155,6 +157,8 @@ my @decode_strict_tests = ( "=?utf8?Q?=C3=A1?=" => "=?utf8?Q?=C3=A1?=", "=?UTF8?Q?=C3=A1?=" => "=?UTF8?Q?=C3=A1?=", "=?utf-8-strict?Q?=C3=A1?=" => "=?utf-8-strict?Q?=C3=A1?=", + # do not allow non-ASCII characters in q word + "=?UTF-8?Q?\x{C3}\x{A1}?=" => "=?UTF-8?Q?\x{C3}\x{A1}?=", ); my @encode_tests = ( diff --git a/cpan/Encode/t/truncated_utf8.t b/cpan/Encode/t/truncated_utf8.t new file mode 100644 index 0000000000..7de8bb9ac1 --- /dev/null +++ b/cpan/Encode/t/truncated_utf8.t @@ -0,0 +1,55 @@ +BEGIN { + if ($ENV{'PERL_CORE'}) { + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use warnings; + +use Encode; +use PerlIO::encoding; +$PerlIO::encoding::fallback &= ~(Encode::WARN_ON_ERR|Encode::PERLQQ); + +use Test::More tests => 9; + +binmode Test::More->builder->failure_output, ":utf8"; +binmode Test::More->builder->todo_output, ":utf8"; + +is(decode("UTF-8", "\xfd\xfe"), "\x{fffd}" x 2); +is(decode("UTF-8", "\xfd\xfe\xff"), "\x{fffd}" x 3); +is(decode("UTF-8", "\xfd\xfe\xff\xe0"), "\x{fffd}" x 4); +is(decode("UTF-8", "\xfd\xfe\xff\xe0\xe1"), "\x{fffd}" x 5); +is(decode("UTF-8", "\xc1\x9f"), "\x{fffd}"); +is(decode("UTF-8", "\xFF\x80\x90\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"), "\x{fffd}"); +is(decode("UTF-8", "\xF0\x80\x80\x80"), "\x{fffd}"); + +SKIP: { + # infinite loop due to bug: https://rt.perl.org/Public/Bug/Display.html?id=41442 + skip "Perl Version ($]) is older than v5.8.9", 2 if $] < 5.008009; + my $str = ("x" x 1023) . "\xfd\xfe\xffx"; + open my $fh, '<:encoding(UTF-8)', \$str; + my $str2 = <$fh>; + close $fh; + is($str2, ("x" x 1023) . ("\x{fffd}" x 3) . "x"); + + TODO: { + local $TODO = "bug in perlio"; + my $str = ("x" x 1023) . "\xfd\xfe\xff"; + open my $fh, '<:encoding(UTF-8)', \$str; + my $str2 = <$fh>; + close $fh; + is($str2, ("x" x 1023) . ("\x{fffd}" x 3)); + } +} diff --git a/cpan/Encode/t/undef.t b/cpan/Encode/t/undef.t new file mode 100644 index 0000000000..de52019b18 --- /dev/null +++ b/cpan/Encode/t/undef.t @@ -0,0 +1,25 @@ +use strict; +use warnings FATAL => 'all'; + +use Test::More; + +use Encode qw(encode decode find_encoding); +use Encode::Encoder qw(encoder); + +local %Encode::ExtModule = %Encode::Config::ExtModule; + +my @names = Encode->encodings(':all'); + +plan tests => 1 + 4 * @names; + +my $emptyutf8; +eval { my $c = encoder($emptyutf8)->utf8; }; +ok(!$@,"crashed encoding undef variable ($@)"); + +for my $name (@names) { + my $enc = find_encoding($name); + is($enc->encode(undef), undef, "find_encoding('$name')->encode(undef) returns undef"); + is($enc->decode(undef), undef, "find_encoding('$name')->decode(undef) returns undef"); + is(encode($name, undef), undef, "encode('$name', undef) returns undef"); + is(decode($name, undef), undef, "decode('$name', undef) returns undef"); +} diff --git a/cpan/Encode/t/use-Encode-Alias.t b/cpan/Encode/t/use-Encode-Alias.t new file mode 100644 index 0000000000..dab8142cfa --- /dev/null +++ b/cpan/Encode/t/use-Encode-Alias.t @@ -0,0 +1,8 @@ +use strict; +use warnings; + +use Encode::Alias; +use open ":std", ":locale"; + +print "1..1\n"; +print "ok 1 - use Encode::Alias works\n"; diff --git a/cpan/Encode/t/utf8messages.t b/cpan/Encode/t/utf8messages.t new file mode 100644 index 0000000000..8b6b379acb --- /dev/null +++ b/cpan/Encode/t/utf8messages.t @@ -0,0 +1,33 @@ +use strict; +use warnings; +BEGIN { 'warnings'->unimport('utf8') if $] < 5.014 }; # turn off 'UTF-16 surrogate 0xd800' warnings + +use Test::More; +use Encode qw(encode decode FB_CROAK LEAVE_SRC); + +plan tests => 12; + +my @invalid; + +ok ! defined eval { encode('UTF-8', "\x{D800}", FB_CROAK | LEAVE_SRC) }, 'Surrogate codepoint \x{D800} is not encoded to strict UTF-8'; +like $@, qr/^"\\x\{d800\}" does not map to UTF-8 /, 'Error message contains strict UTF-8 name'; +@invalid = (); +encode('UTF-8', "\x{D800}", sub { @invalid = @_; return ""; }); +is_deeply \@invalid, [ 0xD800 ], 'Fallback coderef contains invalid codepoint 0xD800'; + +ok ! defined eval { decode('UTF-8', "\xed\xa0\x80", FB_CROAK | LEAVE_SRC) }, 'Surrogate UTF-8 byte sequence \xED\xA0\x80 is decoded with strict UTF-8 decoder'; +like $@, qr/^UTF-8 "\\xED\\xA0\\x80" does not map to Unicode /, 'Error message contains strict UTF-8 name and original (not decoded) invalid sequence'; +@invalid = (); +decode('UTF-8', "\xed\xa0\x80", sub { @invalid = @_; return ""; }); +is_deeply \@invalid, [ 0xED, 0xA0, 0x80 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0, 0x80'; + +ok ! defined eval { decode('UTF-8', "\xed\xa0", FB_CROAK | LEAVE_SRC) }, 'Invalid byte sequence \xED\xA0 is not decoded with strict UTF-8 decoder'; +like $@, qr/^UTF-8 "\\xED\\xA0" does not map to Unicode /, 'Error message contains strict UTF-8 name and original (not decoded) invalid sequence'; +@invalid = (); +decode('UTF-8', "\xed\xa0", sub { @invalid = @_; return ""; }); +is_deeply \@invalid, [ 0xED, 0xA0 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0'; + +ok ! defined eval { decode('utf8', "\xed\xa0", FB_CROAK | LEAVE_SRC) }, 'Invalid byte sequence \xED\xA0 is not decoded with non-strict utf8 decoder'; +like $@, qr/^utf8 "\\xED\\xA0" does not map to Unicode /, 'Error message contains non-strict utf8 name and original (not decoded) invalid sequence'; +decode('utf8', "\xed\xa0", sub { @invalid = @_; return ""; }); +is_deeply \@invalid, [ 0xED, 0xA0 ], 'Fallback coderef contains invalid byte sequence 0xED, 0xA0'; diff --git a/cpan/Encode/t/whatwg-aliases.json b/cpan/Encode/t/whatwg-aliases.json new file mode 100644 index 0000000000..4307b0cc48 --- /dev/null +++ b/cpan/Encode/t/whatwg-aliases.json @@ -0,0 +1,455 @@ +[ + { + "encodings": [ + { + "labels": [ + "unicode-1-1-utf-8", + "utf-8", + "utf8" + ], + "name": "UTF-8" + } + ], + "heading": "The Encoding" + }, + { + "encodings": [ + { + "labels": [ + "866", + "cp866", + "csibm866", + "ibm866" + ], + "name": "IBM866" + }, + { + "labels": [ + "csisolatin2", + "iso-8859-2", + "iso-ir-101", + "iso8859-2", + "iso88592", + "iso_8859-2", + "iso_8859-2:1987", + "l2", + "latin2" + ], + "name": "ISO-8859-2" + }, + { + "labels": [ + "csisolatin3", + "iso-8859-3", + "iso-ir-109", + "iso8859-3", + "iso88593", + "iso_8859-3", + "iso_8859-3:1988", + "l3", + "latin3" + ], + "name": "ISO-8859-3" + }, + { + "labels": [ + "csisolatin4", + "iso-8859-4", + "iso-ir-110", + "iso8859-4", + "iso88594", + "iso_8859-4", + "iso_8859-4:1988", + "l4", + "latin4" + ], + "name": "ISO-8859-4" + }, + { + "labels": [ + "csisolatincyrillic", + "cyrillic", + "iso-8859-5", + "iso-ir-144", + "iso8859-5", + "iso88595", + "iso_8859-5", + "iso_8859-5:1988" + ], + "name": "ISO-8859-5" + }, + { + "labels": [ + "arabic", + "asmo-708", + "csiso88596e", + "csiso88596i", + "csisolatinarabic", + "ecma-114", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-ir-127", + "iso8859-6", + "iso88596", + "iso_8859-6", + "iso_8859-6:1987" + ], + "name": "ISO-8859-6" + }, + { + "labels": [ + "csisolatingreek", + "ecma-118", + "elot_928", + "greek", + "greek8", + "iso-8859-7", + "iso-ir-126", + "iso8859-7", + "iso88597", + "iso_8859-7", + "iso_8859-7:1987", + "sun_eu_greek" + ], + "name": "ISO-8859-7" + }, + { + "labels": [ + "csiso88598e", + "csisolatinhebrew", + "hebrew", + "iso-8859-8", + "iso-8859-8-e", + "iso-ir-138", + "iso8859-8", + "iso88598", + "iso_8859-8", + "iso_8859-8:1988", + "visual" + ], + "name": "ISO-8859-8" + }, + { + "labels": [ + "csiso88598i", + "iso-8859-8-i", + "logical" + ], + "name": "ISO-8859-8-I" + }, + { + "labels": [ + "csisolatin6", + "iso-8859-10", + "iso-ir-157", + "iso8859-10", + "iso885910", + "l6", + "latin6" + ], + "name": "ISO-8859-10" + }, + { + "labels": [ + "iso-8859-13", + "iso8859-13", + "iso885913" + ], + "name": "ISO-8859-13" + }, + { + "labels": [ + "iso-8859-14", + "iso8859-14", + "iso885914" + ], + "name": "ISO-8859-14" + }, + { + "labels": [ + "csisolatin9", + "iso-8859-15", + "iso8859-15", + "iso885915", + "iso_8859-15", + "l9" + ], + "name": "ISO-8859-15" + }, + { + "labels": [ + "iso-8859-16" + ], + "name": "ISO-8859-16" + }, + { + "labels": [ + "cskoi8r", + "koi", + "koi8", + "koi8-r", + "koi8_r" + ], + "name": "KOI8-R" + }, + { + "labels": [ + "koi8-ru", + "koi8-u" + ], + "name": "KOI8-U" + }, + { + "labels": [ + "csmacintosh", + "mac", + "macintosh", + "x-mac-roman" + ], + "name": "macintosh" + }, + { + "labels": [ + "dos-874", + "iso-8859-11", + "iso8859-11", + "iso885911", + "tis-620", + "windows-874" + ], + "name": "windows-874" + }, + { + "labels": [ + "cp1250", + "windows-1250", + "x-cp1250" + ], + "name": "windows-1250" + }, + { + "labels": [ + "cp1251", + "windows-1251", + "x-cp1251" + ], + "name": "windows-1251" + }, + { + "labels": [ + "ansi_x3.4-1968", + "ascii", + "cp1252", + "cp819", + "csisolatin1", + "ibm819", + "iso-8859-1", + "iso-ir-100", + "iso8859-1", + "iso88591", + "iso_8859-1", + "iso_8859-1:1987", + "l1", + "latin1", + "us-ascii", + "windows-1252", + "x-cp1252" + ], + "name": "windows-1252" + }, + { + "labels": [ + "cp1253", + "windows-1253", + "x-cp1253" + ], + "name": "windows-1253" + }, + { + "labels": [ + "cp1254", + "csisolatin5", + "iso-8859-9", + "iso-ir-148", + "iso8859-9", + "iso88599", + "iso_8859-9", + "iso_8859-9:1989", + "l5", + "latin5", + "windows-1254", + "x-cp1254" + ], + "name": "windows-1254" + }, + { + "labels": [ + "cp1255", + "windows-1255", + "x-cp1255" + ], + "name": "windows-1255" + }, + { + "labels": [ + "cp1256", + "windows-1256", + "x-cp1256" + ], + "name": "windows-1256" + }, + { + "labels": [ + "cp1257", + "windows-1257", + "x-cp1257" + ], + "name": "windows-1257" + }, + { + "labels": [ + "cp1258", + "windows-1258", + "x-cp1258" + ], + "name": "windows-1258" + }, + { + "labels": [ + "x-mac-cyrillic", + "x-mac-ukrainian" + ], + "name": "x-mac-cyrillic" + } + ], + "heading": "Legacy single-byte encodings" + }, + { + "encodings": [ + { + "labels": [ + "chinese", + "csgb2312", + "csiso58gb231280", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "iso-ir-58", + "x-gbk" + ], + "name": "GBK" + }, + { + "labels": [ + "gb18030" + ], + "name": "gb18030" + } + ], + "heading": "Legacy multi-byte Chinese (simplified) encodings" + }, + { + "encodings": [ + { + "labels": [ + "big5", + "big5-hkscs", + "cn-big5", + "csbig5", + "x-x-big5" + ], + "name": "Big5" + } + ], + "heading": "Legacy multi-byte Chinese (traditional) encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseucpkdfmtjapanese", + "euc-jp", + "x-euc-jp" + ], + "name": "EUC-JP" + }, + { + "labels": [ + "csiso2022jp", + "iso-2022-jp" + ], + "name": "ISO-2022-JP" + }, + { + "labels": [ + "csshiftjis", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "windows-31j", + "x-sjis" + ], + "name": "Shift_JIS" + } + ], + "heading": "Legacy multi-byte Japanese encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseuckr", + "csksc56011987", + "euc-kr", + "iso-ir-149", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "windows-949" + ], + "name": "EUC-KR" + } + ], + "heading": "Legacy multi-byte Korean encodings" + }, + { + "encodings": [ + { + "labels": [ + "csiso2022kr", + "hz-gb-2312", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-kr" + ], + "name": "replacement" + }, + { + "labels": [ + "utf-16be" + ], + "name": "UTF-16BE" + }, + { + "labels": [ + "utf-16", + "utf-16le" + ], + "name": "UTF-16LE" + }, + { + "labels": [ + "x-user-defined" + ], + "name": "x-user-defined" + } + ], + "heading": "Legacy miscellaneous encodings" + } +] diff --git a/cpan/Encode/t/whatwg-aliases.t b/cpan/Encode/t/whatwg-aliases.t new file mode 100644 index 0000000000..ffc030bb75 --- /dev/null +++ b/cpan/Encode/t/whatwg-aliases.t @@ -0,0 +1,66 @@ +# This test checks aliases support based on the list in the +# WHATWG Encoding Living Standard +# +# https://encoding.spec.whatwg.org/ +# +# The input of this test is the file whatwg-aliases.json downloaded from +# https://encoding.spec.whatwg.org/encodings.json +# +# To run: +# AUTHOR_TESTING=1 prove -l t/whatwg-aliases.t + + +use Test::More + ($ENV{AUTHOR_TESTING} || $ENV{RELEASE_TESTING}) + ? 'no_plan' + : (skip_all => 'For maintainers only'); +use Encode 'find_encoding'; +use JSON::PP 'decode_json'; +use File::Spec; +use FindBin; + +my $encodings = decode_json(do { + # https://encoding.spec.whatwg.org/encodings.json + open my $f, '<', File::Spec->catdir($FindBin::Bin, 'whatwg-aliases.json'); + local $/; + <$f> +}); + +my %IGNORE = map { $_ => '' } qw( + replacement + utf8 +); + +my %TODO = ( + 'ISO-8859-8-I' => 'Not supported', + 'gb18030' => 'Not supported', + '866' => 'Not supported', + 'x-user-defined' => 'Not supported', + # ... +); + +for my $section (@$encodings) { + for my $enc (@{$section->{encodings}}) { + + my $name = $enc->{name}; + + next if exists $IGNORE{$name}; + + local $TODO = $TODO{$name} if exists $TODO{$name}; + + my $encoding = find_encoding($name); + isa_ok($encoding, 'Encode::Encoding', $name); + + for my $label (@{$enc->{labels}}) { + local $TODO = $TODO{$label} if exists $TODO{$label}; + + my $e = find_encoding($label); + if (isa_ok($e, 'Encode::Encoding', $label)) { + next if exists $IGNORE{$label}; + is($e->name, $encoding->name, "$label ->name is $name") + } + } + } +} + +done_testing; |