summaryrefslogtreecommitdiff
path: root/cpan
diff options
context:
space:
mode:
authorRicardo Signes <rjbs@semiotic.systems>2021-08-09 13:15:48 -0400
committerRicardo Signes <rjbs@semiotic.systems>2021-08-09 13:30:09 -0400
commit7167e196c729bb679e5f71ce33585f3a6870507b (patch)
tree6dcb82848fe13bda70fe266636bad4560fedd4e4 /cpan
parentc275db86a94cfa31d8d2877ec92c38efa923f762 (diff)
downloadperl-7167e196c729bb679e5f71ce33585f3a6870507b.tar.gz
Upgraded Encode from 3.10_01 to 3.12
Diffstat (limited to 'cpan')
-rw-r--r--cpan/Encode/Encode.pm5
-rw-r--r--cpan/Encode/Unicode/Unicode.pm4
-rw-r--r--cpan/Encode/Unicode/Unicode.xs6
-rw-r--r--cpan/Encode/lib/Encode/GSM0338.pm52
-rw-r--r--cpan/Encode/t/Unicode.t4
-rw-r--r--cpan/Encode/t/Unicode_trailing_nul.t26
-rw-r--r--cpan/Encode/t/whatwg-aliases.json455
-rw-r--r--cpan/Encode/t/whatwg-aliases.t66
8 files changed, 586 insertions, 32 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index b96a850416..841ec23afb 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,5 +1,5 @@
#
-# $Id: Encode.pm,v 3.10 2021/05/18 07:42:45 dankogai Exp dankogai $
+# $Id: Encode.pm,v 3.12 2021/08/09 14:17:04 dankogai Exp dankogai $
#
package Encode;
use strict;
@@ -7,8 +7,7 @@ use warnings;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our $VERSION;
BEGIN {
- $VERSION = "3.10_01";
- $VERSION = eval $VERSION;
+ $VERSION = sprintf "%d.%02d", q$Revision: 3.12 $ =~ /(\d+)/g;
require XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
}
diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm
index 540337e94f..eb72c3903f 100644
--- a/cpan/Encode/Unicode/Unicode.pm
+++ b/cpan/Encode/Unicode/Unicode.pm
@@ -3,7 +3,7 @@ package Encode::Unicode;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
@@ -259,7 +259,7 @@ Consider that "division by zero" of Encode :)
=head1 SEE ALSO
L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>,
-L<http://www.unicode.org/unicode/faq/utf_bom.html>,
+L<http://www.unicode.org/faq/utf_bom.html>,
RFC 2781 L<http://www.ietf.org/rfc/rfc2781.txt>,
diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs
index 4e111e25d7..cc4817e117 100644
--- a/cpan/Encode/Unicode/Unicode.xs
+++ b/cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Unicode.xs,v 2.19 2019/01/21 03:09:59 dankogai Exp $
+ $Id: Unicode.xs,v 2.20 2021/07/23 02:26:54 dankogai Exp $
*/
#define IN_UNICODE_XS
@@ -361,6 +361,10 @@ CODE:
}
if (!temp_result) shrink_buffer(result);
+
+ /* Make sure we have a trailing NUL: */
+ *SvEND(result) = '\0';
+
if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */
XSRETURN(1);
}
diff --git a/cpan/Encode/lib/Encode/GSM0338.pm b/cpan/Encode/lib/Encode/GSM0338.pm
index 644d445285..b149c6dc9c 100644
--- a/cpan/Encode/lib/Encode/GSM0338.pm
+++ b/cpan/Encode/lib/Encode/GSM0338.pm
@@ -1,5 +1,5 @@
#
-# $Id: GSM0338.pm,v 2.9 2020/12/02 01:28:17 dankogai Exp dankogai $
+# $Id: GSM0338.pm,v 2.10 2021/05/24 10:56:53 dankogai Exp $
#
package Encode::GSM0338;
@@ -8,7 +8,7 @@ use warnings;
use Carp;
use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.9 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -159,12 +159,12 @@ our %UNI2GSM = (
"\x{20AC}" => "\x1B\x65", # EURO SIGN
);
our %GSM2UNI = reverse %UNI2GSM;
-our $ESC = "\x1b";
+our $ESC = "\x1b";
sub decode ($$;$) {
my ( $obj, $bytes, $chk ) = @_;
return undef unless defined $bytes;
- my $str = substr($bytes, 0, 0); # to propagate taintedness;
+ my $str = substr( $bytes, 0, 0 ); # to propagate taintedness;
while ( length $bytes ) {
my $seq = '';
my $c;
@@ -173,53 +173,57 @@ sub decode ($$;$) {
$seq .= $c;
} while ( length $bytes and $c eq $ESC );
my $u =
- exists $GSM2UNI{$seq}
- ? $GSM2UNI{$seq}
- : ($chk && ref $chk eq 'CODE')
- ? $chk->( unpack 'C*', $seq )
- : "\x{FFFD}";
+ exists $GSM2UNI{$seq} ? $GSM2UNI{$seq}
+ : ( $chk && ref $chk eq 'CODE' ) ? $chk->( unpack 'C*', $seq )
+ : "\x{FFFD}";
if ( not exists $GSM2UNI{$seq} and $chk and not ref $chk ) {
- if ( substr($seq, 0, 1) eq $ESC and ($chk & Encode::STOP_AT_PARTIAL) ) {
+ if ( substr( $seq, 0, 1 ) eq $ESC
+ and ( $chk & Encode::STOP_AT_PARTIAL ) )
+ {
$bytes .= $seq;
last;
}
- croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::DIE_ON_ERR;
- carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq ) . ' does not map to Unicode' if $chk & Encode::WARN_ON_ERR;
- if ($chk & Encode::RETURN_ON_ERR) {
+ croak join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
+ . ' does not map to Unicode'
+ if $chk & Encode::DIE_ON_ERR;
+ carp join( '', map { sprintf "\\x%02X", $_ } unpack 'C*', $seq )
+ . ' does not map to Unicode'
+ if $chk & Encode::WARN_ON_ERR;
+ if ( $chk & Encode::RETURN_ON_ERR ) {
$bytes .= $seq;
last;
}
}
$str .= $u;
}
- $_[1] = $bytes if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+ $_[1] = $bytes if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
return $str;
}
sub encode($$;$) {
my ( $obj, $str, $chk ) = @_;
return undef unless defined $str;
- my $bytes = substr($str, 0, 0); # to propagate taintedness
+ my $bytes = substr( $str, 0, 0 ); # to propagate taintedness
while ( length $str ) {
my $u = substr( $str, 0, 1, '' );
my $c;
my $seq =
- exists $UNI2GSM{$u}
- ? $UNI2GSM{$u}
- : ($chk && ref $chk eq 'CODE')
- ? $chk->( ord($u) )
- : $UNI2GSM{'?'};
+ exists $UNI2GSM{$u} ? $UNI2GSM{$u}
+ : ( $chk && ref $chk eq 'CODE' ) ? $chk->( ord($u) )
+ : $UNI2GSM{'?'};
if ( not exists $UNI2GSM{$u} and $chk and not ref $chk ) {
- croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::DIE_ON_ERR;
- carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name ) if $chk & Encode::WARN_ON_ERR;
- if ($chk & Encode::RETURN_ON_ERR) {
+ croak sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
+ if $chk & Encode::DIE_ON_ERR;
+ carp sprintf( "\\x{%04x} does not map to %s", ord($u), $obj->name )
+ if $chk & Encode::WARN_ON_ERR;
+ if ( $chk & Encode::RETURN_ON_ERR ) {
$str .= $u;
last;
}
}
$bytes .= $seq;
}
- $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+ $_[1] = $str if not ref $chk and $chk and !( $chk & Encode::LEAVE_SRC );
return $bytes;
}
diff --git a/cpan/Encode/t/Unicode.t b/cpan/Encode/t/Unicode.t
index 2cc5d54855..61cef422fc 100644
--- a/cpan/Encode/t/Unicode.t
+++ b/cpan/Encode/t/Unicode.t
@@ -1,5 +1,5 @@
#
-# $Id: Unicode.t,v 2.3 2012/08/05 23:08:49 dankogai Exp $
+# $Id: Unicode.t,v 2.4 2021/07/23 02:26:54 dankogai Exp $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
@@ -25,7 +25,7 @@ use Encode qw(encode decode find_encoding);
#
# see
-# http://www.unicode.org/unicode/reports/tr19/
+# http://www.unicode.org/reports/tr19/
#
my $dankogai = "\x{5c0f}\x{98fc}\x{3000}\x{5f3e}";
diff --git a/cpan/Encode/t/Unicode_trailing_nul.t b/cpan/Encode/t/Unicode_trailing_nul.t
new file mode 100644
index 0000000000..e7fb7340bb
--- /dev/null
+++ b/cpan/Encode/t/Unicode_trailing_nul.t
@@ -0,0 +1,26 @@
+use strict;
+use Test::More;
+
+use Encode;
+use File::Temp;
+use File::Spec;
+
+# This test relies on https://github.com/Perl/perl5/issues/10623;
+# if that bug is ever fixed then this test may never fail again.
+
+my $foo = Encode::decode("UTF-16LE", "/\0v\0a\0r\0/\0f\0f\0f\0f\0f\0f\0/\0u\0s\0e\0r\0s\0/\0s\0u\0p\0e\0r\0m\0a\0n\0");
+
+my ($fh, $path) = File::Temp::tempfile( CLEANUP => 1 );
+
+diag "temp file: $path";
+
+# Perl gives the internal PV to exec .. which is buggy/wrong but
+# useful here:
+system( $^X, '-e', "open my \$fh, '>>', '$path' or die \$!; print {\$fh} \$ARGV[0]", $foo );
+die if $?;
+
+my $output = do { local $/; <$fh> };
+
+is( $output, "/var/ffffff/users/superman", 'UTF-16 decodes with trailing NUL' );
+
+done_testing();
diff --git a/cpan/Encode/t/whatwg-aliases.json b/cpan/Encode/t/whatwg-aliases.json
new file mode 100644
index 0000000000..4307b0cc48
--- /dev/null
+++ b/cpan/Encode/t/whatwg-aliases.json
@@ -0,0 +1,455 @@
+[
+ {
+ "encodings": [
+ {
+ "labels": [
+ "unicode-1-1-utf-8",
+ "utf-8",
+ "utf8"
+ ],
+ "name": "UTF-8"
+ }
+ ],
+ "heading": "The Encoding"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "866",
+ "cp866",
+ "csibm866",
+ "ibm866"
+ ],
+ "name": "IBM866"
+ },
+ {
+ "labels": [
+ "csisolatin2",
+ "iso-8859-2",
+ "iso-ir-101",
+ "iso8859-2",
+ "iso88592",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "l2",
+ "latin2"
+ ],
+ "name": "ISO-8859-2"
+ },
+ {
+ "labels": [
+ "csisolatin3",
+ "iso-8859-3",
+ "iso-ir-109",
+ "iso8859-3",
+ "iso88593",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "l3",
+ "latin3"
+ ],
+ "name": "ISO-8859-3"
+ },
+ {
+ "labels": [
+ "csisolatin4",
+ "iso-8859-4",
+ "iso-ir-110",
+ "iso8859-4",
+ "iso88594",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "l4",
+ "latin4"
+ ],
+ "name": "ISO-8859-4"
+ },
+ {
+ "labels": [
+ "csisolatincyrillic",
+ "cyrillic",
+ "iso-8859-5",
+ "iso-ir-144",
+ "iso8859-5",
+ "iso88595",
+ "iso_8859-5",
+ "iso_8859-5:1988"
+ ],
+ "name": "ISO-8859-5"
+ },
+ {
+ "labels": [
+ "arabic",
+ "asmo-708",
+ "csiso88596e",
+ "csiso88596i",
+ "csisolatinarabic",
+ "ecma-114",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-ir-127",
+ "iso8859-6",
+ "iso88596",
+ "iso_8859-6",
+ "iso_8859-6:1987"
+ ],
+ "name": "ISO-8859-6"
+ },
+ {
+ "labels": [
+ "csisolatingreek",
+ "ecma-118",
+ "elot_928",
+ "greek",
+ "greek8",
+ "iso-8859-7",
+ "iso-ir-126",
+ "iso8859-7",
+ "iso88597",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "sun_eu_greek"
+ ],
+ "name": "ISO-8859-7"
+ },
+ {
+ "labels": [
+ "csiso88598e",
+ "csisolatinhebrew",
+ "hebrew",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-ir-138",
+ "iso8859-8",
+ "iso88598",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "visual"
+ ],
+ "name": "ISO-8859-8"
+ },
+ {
+ "labels": [
+ "csiso88598i",
+ "iso-8859-8-i",
+ "logical"
+ ],
+ "name": "ISO-8859-8-I"
+ },
+ {
+ "labels": [
+ "csisolatin6",
+ "iso-8859-10",
+ "iso-ir-157",
+ "iso8859-10",
+ "iso885910",
+ "l6",
+ "latin6"
+ ],
+ "name": "ISO-8859-10"
+ },
+ {
+ "labels": [
+ "iso-8859-13",
+ "iso8859-13",
+ "iso885913"
+ ],
+ "name": "ISO-8859-13"
+ },
+ {
+ "labels": [
+ "iso-8859-14",
+ "iso8859-14",
+ "iso885914"
+ ],
+ "name": "ISO-8859-14"
+ },
+ {
+ "labels": [
+ "csisolatin9",
+ "iso-8859-15",
+ "iso8859-15",
+ "iso885915",
+ "iso_8859-15",
+ "l9"
+ ],
+ "name": "ISO-8859-15"
+ },
+ {
+ "labels": [
+ "iso-8859-16"
+ ],
+ "name": "ISO-8859-16"
+ },
+ {
+ "labels": [
+ "cskoi8r",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8_r"
+ ],
+ "name": "KOI8-R"
+ },
+ {
+ "labels": [
+ "koi8-ru",
+ "koi8-u"
+ ],
+ "name": "KOI8-U"
+ },
+ {
+ "labels": [
+ "csmacintosh",
+ "mac",
+ "macintosh",
+ "x-mac-roman"
+ ],
+ "name": "macintosh"
+ },
+ {
+ "labels": [
+ "dos-874",
+ "iso-8859-11",
+ "iso8859-11",
+ "iso885911",
+ "tis-620",
+ "windows-874"
+ ],
+ "name": "windows-874"
+ },
+ {
+ "labels": [
+ "cp1250",
+ "windows-1250",
+ "x-cp1250"
+ ],
+ "name": "windows-1250"
+ },
+ {
+ "labels": [
+ "cp1251",
+ "windows-1251",
+ "x-cp1251"
+ ],
+ "name": "windows-1251"
+ },
+ {
+ "labels": [
+ "ansi_x3.4-1968",
+ "ascii",
+ "cp1252",
+ "cp819",
+ "csisolatin1",
+ "ibm819",
+ "iso-8859-1",
+ "iso-ir-100",
+ "iso8859-1",
+ "iso88591",
+ "iso_8859-1",
+ "iso_8859-1:1987",
+ "l1",
+ "latin1",
+ "us-ascii",
+ "windows-1252",
+ "x-cp1252"
+ ],
+ "name": "windows-1252"
+ },
+ {
+ "labels": [
+ "cp1253",
+ "windows-1253",
+ "x-cp1253"
+ ],
+ "name": "windows-1253"
+ },
+ {
+ "labels": [
+ "cp1254",
+ "csisolatin5",
+ "iso-8859-9",
+ "iso-ir-148",
+ "iso8859-9",
+ "iso88599",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "l5",
+ "latin5",
+ "windows-1254",
+ "x-cp1254"
+ ],
+ "name": "windows-1254"
+ },
+ {
+ "labels": [
+ "cp1255",
+ "windows-1255",
+ "x-cp1255"
+ ],
+ "name": "windows-1255"
+ },
+ {
+ "labels": [
+ "cp1256",
+ "windows-1256",
+ "x-cp1256"
+ ],
+ "name": "windows-1256"
+ },
+ {
+ "labels": [
+ "cp1257",
+ "windows-1257",
+ "x-cp1257"
+ ],
+ "name": "windows-1257"
+ },
+ {
+ "labels": [
+ "cp1258",
+ "windows-1258",
+ "x-cp1258"
+ ],
+ "name": "windows-1258"
+ },
+ {
+ "labels": [
+ "x-mac-cyrillic",
+ "x-mac-ukrainian"
+ ],
+ "name": "x-mac-cyrillic"
+ }
+ ],
+ "heading": "Legacy single-byte encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "chinese",
+ "csgb2312",
+ "csiso58gb231280",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "iso-ir-58",
+ "x-gbk"
+ ],
+ "name": "GBK"
+ },
+ {
+ "labels": [
+ "gb18030"
+ ],
+ "name": "gb18030"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (simplified) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "big5",
+ "big5-hkscs",
+ "cn-big5",
+ "csbig5",
+ "x-x-big5"
+ ],
+ "name": "Big5"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (traditional) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseucpkdfmtjapanese",
+ "euc-jp",
+ "x-euc-jp"
+ ],
+ "name": "EUC-JP"
+ },
+ {
+ "labels": [
+ "csiso2022jp",
+ "iso-2022-jp"
+ ],
+ "name": "ISO-2022-JP"
+ },
+ {
+ "labels": [
+ "csshiftjis",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "windows-31j",
+ "x-sjis"
+ ],
+ "name": "Shift_JIS"
+ }
+ ],
+ "heading": "Legacy multi-byte Japanese encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseuckr",
+ "csksc56011987",
+ "euc-kr",
+ "iso-ir-149",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "windows-949"
+ ],
+ "name": "EUC-KR"
+ }
+ ],
+ "heading": "Legacy multi-byte Korean encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "csiso2022kr",
+ "hz-gb-2312",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-kr"
+ ],
+ "name": "replacement"
+ },
+ {
+ "labels": [
+ "utf-16be"
+ ],
+ "name": "UTF-16BE"
+ },
+ {
+ "labels": [
+ "utf-16",
+ "utf-16le"
+ ],
+ "name": "UTF-16LE"
+ },
+ {
+ "labels": [
+ "x-user-defined"
+ ],
+ "name": "x-user-defined"
+ }
+ ],
+ "heading": "Legacy miscellaneous encodings"
+ }
+]
diff --git a/cpan/Encode/t/whatwg-aliases.t b/cpan/Encode/t/whatwg-aliases.t
new file mode 100644
index 0000000000..ccb8a16e01
--- /dev/null
+++ b/cpan/Encode/t/whatwg-aliases.t
@@ -0,0 +1,66 @@
+# This test checks aliases support based on the list in the
+# WHATWG Encoding Living Standard
+#
+# https://encoding.spec.whatwg.org/
+#
+# The input of this test is the file whatwg-aliases.json downloaded from
+# https://encoding.spec.whatwg.org/encodings.json
+#
+# To run:
+# AUTHOR_TESTING=1 prove -l t/whatwg-aliases.t
+
+
+use Test::More
+ $ENV{AUTHOR_TESTING}
+ ? 'no_plan'
+ : (skip_all => 'For maintainers only');
+use Encode 'find_encoding';
+use JSON::PP 'decode_json';
+use File::Spec;
+use FindBin;
+
+my $encodings = decode_json(do {
+ # https://encoding.spec.whatwg.org/encodings.json
+ open my $f, '<', File::Spec->catdir($FindBin::Bin, 'whatwg-aliases.json');
+ local $/;
+ <$f>
+});
+
+my %IGNORE = map { $_ => '' } qw(
+ replacement
+ utf8
+);
+
+my %TODO = (
+ 'ISO-8859-8-I' => 'Not supported',
+ 'gb18030' => 'Not supported',
+ '866' => 'Not supported',
+ 'x-user-defined' => 'Not supported',
+ # ...
+);
+
+for my $section (@$encodings) {
+ for my $enc (@{$section->{encodings}}) {
+
+ my $name = $enc->{name};
+
+ next if exists $IGNORE{$name};
+
+ local $TODO = $TODO{$name} if exists $TODO{$name};
+
+ my $encoding = find_encoding($name);
+ isa_ok($encoding, 'Encode::Encoding', $name);
+
+ for my $label (@{$enc->{labels}}) {
+ local $TODO = $TODO{$label} if exists $TODO{$label};
+
+ my $e = find_encoding($label);
+ if (isa_ok($e, 'Encode::Encoding', $label)) {
+ next if exists $IGNORE{$label};
+ is($e->name, $encoding->name, "$label ->name is $name")
+ }
+ }
+ }
+}
+
+done_testing;