diff options
author | Ricardo Signes <rjbs@semiotic.systems> | 2021-08-09 13:15:48 -0400 |
---|---|---|
committer | Ricardo Signes <rjbs@semiotic.systems> | 2021-08-09 13:30:09 -0400 |
commit | 7167e196c729bb679e5f71ce33585f3a6870507b (patch) | |
tree | 6dcb82848fe13bda70fe266636bad4560fedd4e4 /cpan | |
parent | c275db86a94cfa31d8d2877ec92c38efa923f762 (diff) | |
download | perl-7167e196c729bb679e5f71ce33585f3a6870507b.tar.gz |
Upgraded Encode from 3.10_01 to 3.12
Diffstat (limited to 'cpan')
-rw-r--r-- | cpan/Encode/Encode.pm | 5 | ||||
-rw-r--r-- | cpan/Encode/Unicode/Unicode.pm | 4 | ||||
-rw-r--r-- | cpan/Encode/Unicode/Unicode.xs | 6 | ||||
-rw-r--r-- | cpan/Encode/lib/Encode/GSM0338.pm | 52 | ||||
-rw-r--r-- | cpan/Encode/t/Unicode.t | 4 | ||||
-rw-r--r-- | cpan/Encode/t/Unicode_trailing_nul.t | 26 | ||||
-rw-r--r-- | cpan/Encode/t/whatwg-aliases.json | 455 | ||||
-rw-r--r-- | cpan/Encode/t/whatwg-aliases.t | 66 |
8 files changed, 586 insertions, 32 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm index b96a850416..841ec23afb 100644 --- a/cpan/Encode/Encode.pm +++ b/cpan/Encode/Encode.pm @@ -1,5 +1,5 @@ # -# $Id: Encode.pm,v 3.10 2021/05/18 07:42:45 dankogai Exp dankogai $ +# $Id: Encode.pm,v 3.12 2021/08/09 14:17:04 dankogai Exp dankogai $ # package Encode; use strict; @@ -7,8 +7,7 @@ use warnings; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; our $VERSION; BEGIN { - $VERSION = "3.10_01"; - $VERSION = eval $VERSION; + $VERSION = sprintf "%d.%02d", q$Revision: 3.12 $ =~ /(\d+)/g; require XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); } diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm index 540337e94f..eb72c3903f 100644 --- a/cpan/Encode/Unicode/Unicode.pm +++ b/cpan/Encode/Unicode/Unicode.pm @@ -3,7 +3,7 @@ package Encode::Unicode; use strict; use warnings; -our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -259,7 +259,7 @@ Consider that "division by zero" of Encode :) =head1 SEE ALSO L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>, -L<http://www.unicode.org/unicode/faq/utf_bom.html>, +L<http://www.unicode.org/faq/utf_bom.html>, RFC 2781 L<http://www.ietf.org/rfc/rfc2781.txt>, diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs index 4e111e25d7..cc4817e117 100644 --- a/cpan/Encode/Unicode/Unicode.xs +++ b/cpan/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.19 2019/01/21 03:09:59 dankogai Exp $ + $Id: Unicode.xs,v 2.20 2021/07/23 02:26:54 dankogai Exp $ */ #define IN_UNICODE_XS @@ -361,6 +361,10 @@ CODE: } if (!temp_result) shrink_buffer(result); + + /* Make sure we have a trailing NUL: */ + *SvEND(result) = '\0'; + if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */ XSRETURN(1); } diff --git a/cpan/Encode/lib/Encode/GSM0338.pm b/cpan/Encode/lib/Encode/GSM0338.pm index 644d445285..b149c6dc9c 100644 --- a/cpan/Encode/lib/Encode/GSM0338.pm +++ b/cpan/Encode/lib/Encode/GSM0338.pm @@ -1,5 +1,5 @@ # -# $Id: GSM0338.pm,v 2.9 2020/12/02 01:28:17 dankogai Exp dankogai $ +# $Id: GSM0338.pm,v 2.10 2021/05/24 10:56:53 dankogai Exp $ # package Encode::GSM0338; @@ -8,7 +8,7 @@ use warnings; use Carp; use vars qw($VERSION); -$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(:fallbacks); @@ -159,12 +159,12 @@ our %UNI2GSM = ( "\x{20AC}" => "\x1B\x65", # EURO SIGN ); our %GSM2UNI = reverse %UNI2GSM; -our $ESC = "\x1b"; +our $ESC = "\x1b"; sub decode ($$;$) { my ( $obj, $bytes, $chk ) = @_; return undef unless defined $bytes; - my $str = substr($bytes, 0, 0); # to propagate taintedness; + my $str = substr( $bytes, 0, 0 ); # to propagate taintedness; while ( length $bytes ) { my $seq = ''; my $c; @@ -173,53 +173,57 @@ sub decode ($$;$) { $seq .= $c; } while ( length $bytes and $c eq $ESC ); my $u = - exists $GSM2UNI{$seq} - ? $GSM2UNI{$seq} - : ($chk && ref $chk eq 'CODE') - ? $chk->( unpack 'C*', $seq ) - : "\x{FFFD}"; + exists $GSM2UNI{$seq} ? $GSM2UNI{$seq} + : ( $chk && ref $chk eq 'CODE' ) ? $chk->( unpack 'C*', $seq ) + : "\x{FFFD}"; if ( not exists $GSM2UNI{$seq} and $chk and not ref $chk ) { - if ( substr($seq, 0, 1) eq $ESC and ($chk & Encode::STOP_AT_PARTIAL) ) { + if ( substr( $seq, 0, 1 ) eq $ESC + and ( $chk & Encode::STOP_AT_PARTIAL ) ) + { $bytes .= $seq; last; } - croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::DIE_ON_ERR; - carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::WARN_ON_ERR; - if ($chk & Encode::RETURN_ON_ERR) { + croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) + . ' does not map to Unicode' + if $chk & Encode::DIE_ON_ERR; + carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) + . ' does not map to Unicode' + if $chk & Encode::WARN_ON_ERR; + if ( $chk & Encode::RETURN_ON_ERR ) { $bytes .= $seq; last; } } $str .= $u; } - $_[1] = $bytes if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); + $_[1] = $bytes if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC ); return $str; } sub encode($$;$) { my ( $obj, $str, $chk ) = @_; return undef unless defined $str; - my $bytes = substr($str, 0, 0); # to propagate taintedness + my $bytes = substr( $str, 0, 0 ); # to propagate taintedness while ( length $str ) { my $u = substr( $str, 0, 1, '' ); my $c; my $seq = - exists $UNI2GSM{$u} - ? $UNI2GSM{$u} - : ($chk && ref $chk eq 'CODE') - ? $chk->( ord($u) ) - : $UNI2GSM{'?'}; + exists $UNI2GSM{$u} ? $UNI2GSM{$u} + : ( $chk && ref $chk eq 'CODE' ) ? $chk->( ord($u) ) + : $UNI2GSM{'?'}; if ( not exists $UNI2GSM{$u} and $chk and not ref $chk ) { - croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::DIE_ON_ERR; - carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::WARN_ON_ERR; - if ($chk & Encode::RETURN_ON_ERR) { + croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) + if $chk & Encode::DIE_ON_ERR; + carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) + if $chk & Encode::WARN_ON_ERR; + if ( $chk & Encode::RETURN_ON_ERR ) { $str .= $u; last; } } $bytes .= $seq; } - $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC); + $_[1] = $str if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC ); return $bytes; } diff --git a/cpan/Encode/t/Unicode.t b/cpan/Encode/t/Unicode.t index 2cc5d54855..61cef422fc 100644 --- a/cpan/Encode/t/Unicode.t +++ b/cpan/Encode/t/Unicode.t @@ -1,5 +1,5 @@ # -# $Id: Unicode.t,v 2.3 2012/08/05 23:08:49 dankogai Exp $ +# $Id: Unicode.t,v 2.4 2021/07/23 02:26:54 dankogai Exp $ # # This script is written entirely in ASCII, even though quoted literals # do include non-BMP unicode characters -- Are you happy, jhi? @@ -25,7 +25,7 @@ use Encode qw(encode decode find_encoding); # # see -# http://www.unicode.org/unicode/reports/tr19/ +# http://www.unicode.org/reports/tr19/ # my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}"; diff --git a/cpan/Encode/t/Unicode_trailing_nul.t b/cpan/Encode/t/Unicode_trailing_nul.t new file mode 100644 index 0000000000..e7fb7340bb --- /dev/null +++ b/cpan/Encode/t/Unicode_trailing_nul.t @@ -0,0 +1,26 @@ +use strict; +use Test::More; + +use Encode; +use File::Temp; +use File::Spec; + +# This test relies on https://github.com/Perl/perl5/issues/10623; +# if that bug is ever fixed then this test may never fail again. + +my $foo = Encode::decode("UTF-16LE", "/\0v\0a\0r\0/\0f\0f\0f\0f\0f\0f\0/\0u\0s\0e\0r\0s\0/\0s\0u\0p\0e\0r\0m\0a\0n\0"); + +my ($fh, $path) = File::Temp::tempfile( CLEANUP => 1 ); + +diag "temp file: $path"; + +# Perl gives the internal PV to exec .. which is buggy/wrong but +# useful here: +system( $^X, '-e', "open my \$fh, '>>', '$path' or die \$!; print {\$fh} \$ARGV[0]", $foo ); +die if $?; + +my $output = do { local $/; <$fh> }; + +is( $output, "/var/ffffff/users/superman", 'UTF-16 decodes with trailing NUL' ); + +done_testing(); diff --git a/cpan/Encode/t/whatwg-aliases.json b/cpan/Encode/t/whatwg-aliases.json new file mode 100644 index 0000000000..4307b0cc48 --- /dev/null +++ b/cpan/Encode/t/whatwg-aliases.json @@ -0,0 +1,455 @@ +[ + { + "encodings": [ + { + "labels": [ + "unicode-1-1-utf-8", + "utf-8", + "utf8" + ], + "name": "UTF-8" + } + ], + "heading": "The Encoding" + }, + { + "encodings": [ + { + "labels": [ + "866", + "cp866", + "csibm866", + "ibm866" + ], + "name": "IBM866" + }, + { + "labels": [ + "csisolatin2", + "iso-8859-2", + "iso-ir-101", + "iso8859-2", + "iso88592", + "iso_8859-2", + "iso_8859-2:1987", + "l2", + "latin2" + ], + "name": "ISO-8859-2" + }, + { + "labels": [ + "csisolatin3", + "iso-8859-3", + "iso-ir-109", + "iso8859-3", + "iso88593", + "iso_8859-3", + "iso_8859-3:1988", + "l3", + "latin3" + ], + "name": "ISO-8859-3" + }, + { + "labels": [ + "csisolatin4", + "iso-8859-4", + "iso-ir-110", + "iso8859-4", + "iso88594", + "iso_8859-4", + "iso_8859-4:1988", + "l4", + "latin4" + ], + "name": "ISO-8859-4" + }, + { + "labels": [ + "csisolatincyrillic", + "cyrillic", + "iso-8859-5", + "iso-ir-144", + "iso8859-5", + "iso88595", + "iso_8859-5", + "iso_8859-5:1988" + ], + "name": "ISO-8859-5" + }, + { + "labels": [ + "arabic", + "asmo-708", + "csiso88596e", + "csiso88596i", + "csisolatinarabic", + "ecma-114", + "iso-8859-6", + "iso-8859-6-e", + "iso-8859-6-i", + "iso-ir-127", + "iso8859-6", + "iso88596", + "iso_8859-6", + "iso_8859-6:1987" + ], + "name": "ISO-8859-6" + }, + { + "labels": [ + "csisolatingreek", + "ecma-118", + "elot_928", + "greek", + "greek8", + "iso-8859-7", + "iso-ir-126", + "iso8859-7", + "iso88597", + "iso_8859-7", + "iso_8859-7:1987", + "sun_eu_greek" + ], + "name": "ISO-8859-7" + }, + { + "labels": [ + "csiso88598e", + "csisolatinhebrew", + "hebrew", + "iso-8859-8", + "iso-8859-8-e", + "iso-ir-138", + "iso8859-8", + "iso88598", + "iso_8859-8", + "iso_8859-8:1988", + "visual" + ], + "name": "ISO-8859-8" + }, + { + "labels": [ + "csiso88598i", + "iso-8859-8-i", + "logical" + ], + "name": "ISO-8859-8-I" + }, + { + "labels": [ + "csisolatin6", + "iso-8859-10", + "iso-ir-157", + "iso8859-10", + "iso885910", + "l6", + "latin6" + ], + "name": "ISO-8859-10" + }, + { + "labels": [ + "iso-8859-13", + "iso8859-13", + "iso885913" + ], + "name": "ISO-8859-13" + }, + { + "labels": [ + "iso-8859-14", + "iso8859-14", + "iso885914" + ], + "name": "ISO-8859-14" + }, + { + "labels": [ + "csisolatin9", + "iso-8859-15", + "iso8859-15", + "iso885915", + "iso_8859-15", + "l9" + ], + "name": "ISO-8859-15" + }, + { + "labels": [ + "iso-8859-16" + ], + "name": "ISO-8859-16" + }, + { + "labels": [ + "cskoi8r", + "koi", + "koi8", + "koi8-r", + "koi8_r" + ], + "name": "KOI8-R" + }, + { + "labels": [ + "koi8-ru", + "koi8-u" + ], + "name": "KOI8-U" + }, + { + "labels": [ + "csmacintosh", + "mac", + "macintosh", + "x-mac-roman" + ], + "name": "macintosh" + }, + { + "labels": [ + "dos-874", + "iso-8859-11", + "iso8859-11", + "iso885911", + "tis-620", + "windows-874" + ], + "name": "windows-874" + }, + { + "labels": [ + "cp1250", + "windows-1250", + "x-cp1250" + ], + "name": "windows-1250" + }, + { + "labels": [ + "cp1251", + "windows-1251", + "x-cp1251" + ], + "name": "windows-1251" + }, + { + "labels": [ + "ansi_x3.4-1968", + "ascii", + "cp1252", + "cp819", + "csisolatin1", + "ibm819", + "iso-8859-1", + "iso-ir-100", + "iso8859-1", + "iso88591", + "iso_8859-1", + "iso_8859-1:1987", + "l1", + "latin1", + "us-ascii", + "windows-1252", + "x-cp1252" + ], + "name": "windows-1252" + }, + { + "labels": [ + "cp1253", + "windows-1253", + "x-cp1253" + ], + "name": "windows-1253" + }, + { + "labels": [ + "cp1254", + "csisolatin5", + "iso-8859-9", + "iso-ir-148", + "iso8859-9", + "iso88599", + "iso_8859-9", + "iso_8859-9:1989", + "l5", + "latin5", + "windows-1254", + "x-cp1254" + ], + "name": "windows-1254" + }, + { + "labels": [ + "cp1255", + "windows-1255", + "x-cp1255" + ], + "name": "windows-1255" + }, + { + "labels": [ + "cp1256", + "windows-1256", + "x-cp1256" + ], + "name": "windows-1256" + }, + { + "labels": [ + "cp1257", + "windows-1257", + "x-cp1257" + ], + "name": "windows-1257" + }, + { + "labels": [ + "cp1258", + "windows-1258", + "x-cp1258" + ], + "name": "windows-1258" + }, + { + "labels": [ + "x-mac-cyrillic", + "x-mac-ukrainian" + ], + "name": "x-mac-cyrillic" + } + ], + "heading": "Legacy single-byte encodings" + }, + { + "encodings": [ + { + "labels": [ + "chinese", + "csgb2312", + "csiso58gb231280", + "gb2312", + "gb_2312", + "gb_2312-80", + "gbk", + "iso-ir-58", + "x-gbk" + ], + "name": "GBK" + }, + { + "labels": [ + "gb18030" + ], + "name": "gb18030" + } + ], + "heading": "Legacy multi-byte Chinese (simplified) encodings" + }, + { + "encodings": [ + { + "labels": [ + "big5", + "big5-hkscs", + "cn-big5", + "csbig5", + "x-x-big5" + ], + "name": "Big5" + } + ], + "heading": "Legacy multi-byte Chinese (traditional) encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseucpkdfmtjapanese", + "euc-jp", + "x-euc-jp" + ], + "name": "EUC-JP" + }, + { + "labels": [ + "csiso2022jp", + "iso-2022-jp" + ], + "name": "ISO-2022-JP" + }, + { + "labels": [ + "csshiftjis", + "ms932", + "ms_kanji", + "shift-jis", + "shift_jis", + "sjis", + "windows-31j", + "x-sjis" + ], + "name": "Shift_JIS" + } + ], + "heading": "Legacy multi-byte Japanese encodings" + }, + { + "encodings": [ + { + "labels": [ + "cseuckr", + "csksc56011987", + "euc-kr", + "iso-ir-149", + "korean", + "ks_c_5601-1987", + "ks_c_5601-1989", + "ksc5601", + "ksc_5601", + "windows-949" + ], + "name": "EUC-KR" + } + ], + "heading": "Legacy multi-byte Korean encodings" + }, + { + "encodings": [ + { + "labels": [ + "csiso2022kr", + "hz-gb-2312", + "iso-2022-cn", + "iso-2022-cn-ext", + "iso-2022-kr" + ], + "name": "replacement" + }, + { + "labels": [ + "utf-16be" + ], + "name": "UTF-16BE" + }, + { + "labels": [ + "utf-16", + "utf-16le" + ], + "name": "UTF-16LE" + }, + { + "labels": [ + "x-user-defined" + ], + "name": "x-user-defined" + } + ], + "heading": "Legacy miscellaneous encodings" + } +] diff --git a/cpan/Encode/t/whatwg-aliases.t b/cpan/Encode/t/whatwg-aliases.t new file mode 100644 index 0000000000..ccb8a16e01 --- /dev/null +++ b/cpan/Encode/t/whatwg-aliases.t @@ -0,0 +1,66 @@ +# This test checks aliases support based on the list in the +# WHATWG Encoding Living Standard +# +# https://encoding.spec.whatwg.org/ +# +# The input of this test is the file whatwg-aliases.json downloaded from +# https://encoding.spec.whatwg.org/encodings.json +# +# To run: +# AUTHOR_TESTING=1 prove -l t/whatwg-aliases.t + + +use Test::More + $ENV{AUTHOR_TESTING} + ? 'no_plan' + : (skip_all => 'For maintainers only'); +use Encode 'find_encoding'; +use JSON::PP 'decode_json'; +use File::Spec; +use FindBin; + +my $encodings = decode_json(do { + # https://encoding.spec.whatwg.org/encodings.json + open my $f, '<', File::Spec->catdir($FindBin::Bin, 'whatwg-aliases.json'); + local $/; + <$f> +}); + +my %IGNORE = map { $_ => '' } qw( + replacement + utf8 +); + +my %TODO = ( + 'ISO-8859-8-I' => 'Not supported', + 'gb18030' => 'Not supported', + '866' => 'Not supported', + 'x-user-defined' => 'Not supported', + # ... +); + +for my $section (@$encodings) { + for my $enc (@{$section->{encodings}}) { + + my $name = $enc->{name}; + + next if exists $IGNORE{$name}; + + local $TODO = $TODO{$name} if exists $TODO{$name}; + + my $encoding = find_encoding($name); + isa_ok($encoding, 'Encode::Encoding', $name); + + for my $label (@{$enc->{labels}}) { + local $TODO = $TODO{$label} if exists $TODO{$label}; + + my $e = find_encoding($label); + if (isa_ok($e, 'Encode::Encoding', $label)) { + next if exists $IGNORE{$label}; + is($e->name, $encoding->name, "$label ->name is $name") + } + } + } +} + +done_testing; |