diff options
author | Ricardo Signes <rjbs@cpan.org> | 2015-09-18 13:29:43 -0400 |
---|---|---|
committer | Ricardo Signes <rjbs@cpan.org> | 2015-09-18 13:47:23 -0400 |
commit | 6916a94cde40f03bd33b3b63bf26ad8d48b399fd (patch) | |
tree | 4f5cf729bd996afb014dfa42d2740562a2e7189d | |
parent | a293d0fd7883038d8dfef01528c7398ba246b5f9 (diff) | |
download | perl-6916a94cde40f03bd33b3b63bf26ad8d48b399fd.tar.gz |
Update Encode to CPAN version 2.77
[DELTA]
$Revision: 2.77 $ $Date: 2015/09/15 13:53:27 $
! Unicode/Unicode.xs Unicode/Unicode.pm
Address RT#107043: If no BOM is found, the routine dies.
When you decode from UTF-(16|32) without -BE or LE without BOM,
Encode now assumes BE accordingly to RFC2781 and the Unicode
Standard version 8.0
https://rt.cpan.org/Public/Bug/Display.html?id=107043
! Makefile.PL encoding.t
Mend pull/42
! Encode.xs Makefile.PL encoding.pm encoding.t
Pulled: precompile 1252 table as that is now the Pod::Simple default
https://github.com/dankogai/p5-encode/pull/42
-rwxr-xr-x | Porting/Maintainers.pl | 2 | ||||
-rw-r--r-- | cpan/Encode/Encode.pm | 4 | ||||
-rw-r--r-- | cpan/Encode/Encode.xs | 25 | ||||
-rw-r--r-- | cpan/Encode/Makefile.PL | 4 | ||||
-rw-r--r-- | cpan/Encode/Unicode/Unicode.pm | 10 | ||||
-rw-r--r-- | cpan/Encode/Unicode/Unicode.xs | 18 | ||||
-rw-r--r-- | cpan/Encode/encoding.pm | 17 | ||||
-rw-r--r-- | cpan/Encode/t/encoding.t | 13 | ||||
-rw-r--r-- | cpan/Encode/ucm/koi8-u.ucm | 4 |
9 files changed, 64 insertions, 33 deletions
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index d28fef7af3..ce207c4df8 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -385,7 +385,7 @@ use File::Glob qw(:case); }, 'Encode' => { - 'DISTRIBUTION' => 'DANKOGAI/Encode-2.76.tar.gz', + 'DISTRIBUTION' => 'DANKOGAI/Encode-2.77.tar.gz', 'FILES' => q[cpan/Encode], }, diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm index 1fea02b63b..574720ed27 100644 --- a/cpan/Encode/Encode.pm +++ b/cpan/Encode/Encode.pm @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.76 2015/07/31 02:17:53 dankogai Exp $ +# $Id: Encode.pm,v 2.77 2015/09/15 13:53:11 dankogai Exp $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.76 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.77 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs index 73f64a8d44..81b5deadb0 100644 --- a/cpan/Encode/Encode.xs +++ b/cpan/Encode/Encode.xs @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.33 2015/01/22 10:17:32 dankogai Exp $ + $Id: Encode.xs,v 2.34 2015/09/15 13:53:27 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -534,20 +534,25 @@ CODE: } } else { - /* Native bytes - can always encode */ - U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ - while (s < e) { - UV uv = NATIVE_TO_UNI((UV) *s); - s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ + /* Native bytes - can always encode */ + U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ + while (s < e) { +#ifdef append_utf8_from_native_byte + append_utf8_from_native_byte(*s, &d); + s++; +#else + UV uv = NATIVE_TO_UNI((UV) *s); + s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ if (UNI_IS_INVARIANT(uv)) - *d++ = (U8)UTF_TO_NATIVE(uv); + *d++ = (U8)UTF_TO_NATIVE(uv); else { - *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); + *d++ = (U8)UTF8_EIGHT_BIT_HI(uv); *d++ = (U8)UTF8_EIGHT_BIT_LO(uv); } - } +#endif + } SvCUR_set(dst, d- (U8 *)SvPVX(dst)); - *SvEND(dst) = '\0'; + *SvEND(dst) = '\0'; } /* Clear out translated part of source unless asked not to */ diff --git a/cpan/Encode/Makefile.PL b/cpan/Encode/Makefile.PL index 0ee181b249..39e557090d 100644 --- a/cpan/Encode/Makefile.PL +++ b/cpan/Encode/Makefile.PL @@ -1,5 +1,5 @@ # -# $Id: Makefile.PL,v 2.14 2015/06/25 00:49:23 dankogai Exp $ +# $Id: Makefile.PL,v 2.15 2015/09/15 13:53:27 dankogai Exp dankogai $ # use 5.007003; use strict; @@ -15,7 +15,7 @@ $ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE}; my %tables = ( def_t => ['ascii.ucm', - '8859-1.ucm', + '8859-1.ucm', # cp1252 is an alias thereof 'null.ucm', 'ctrl.ucm', ] diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm index 3d9fb87891..316768e639 100644 --- a/cpan/Encode/Unicode/Unicode.pm +++ b/cpan/Encode/Unicode/Unicode.pm @@ -4,7 +4,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.13 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.14 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -176,7 +176,13 @@ simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE). When BE or LE is omitted during decode(), it checks if BOM is at the beginning of the string; if one is found, the endianness is set to -what the BOM says. If no BOM is found, the routine dies. +what the BOM says. + +=item Default Byte Order + +When no BOM is found, Encode 2.76 and blow croaked. Since Encode +2.77, it falls back to BE accordingly to RFC2781 and the Unicode +Standard version 8.0 =item * diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs index 5f3bceb262..42f215c713 100644 --- a/cpan/Encode/Unicode/Unicode.xs +++ b/cpan/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.12 2015/06/25 00:49:23 dankogai Exp $ + $Id: Unicode.xs,v 2.13 2015/09/15 13:53:27 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -166,9 +166,19 @@ CODE: endian = 'V'; } else { - croak("%"SVf":Unrecognised BOM %"UVxf, - *hv_fetch((HV *)SvRV(obj),"Name",4,0), - bom); + /* No BOM found, use big-endian fallback as specified in + * RFC2781 and the Unicode Standard version 8.0: + * + * The UTF-16 encoding scheme may or may not begin with + * a BOM. However, when there is no BOM, and in the + * absence of a higher-level protocol, the byte order + * of the UTF-16 encoding scheme is big-endian. + * + * If the first two octets of the text is not 0xFE + * followed by 0xFF, and is not 0xFF followed by 0xFE, + * then the text SHOULD be interpreted as big-endian. + */ + s -= size; } } #if 1 diff --git a/cpan/Encode/encoding.pm b/cpan/Encode/encoding.pm index ae9512e55e..8450f9ca12 100644 --- a/cpan/Encode/encoding.pm +++ b/cpan/Encode/encoding.pm @@ -1,6 +1,6 @@ -# $Id: encoding.pm,v 2.16 2015/06/30 09:55:44 dankogai Exp $ +# $Id: encoding.pm,v 2.17 2015/09/15 13:53:27 dankogai Exp dankogai $ package encoding; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.16 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.17 $ =~ /(\d+)/g; use Encode; use strict; @@ -12,13 +12,6 @@ use constant { PERL_5_21_7 => $^V && $^V ge v5.21.7, }; -BEGIN { - if ( ord("A") == 193 ) { - require Carp; - Carp::croak("encoding: pragma does not support EBCDIC platforms"); - } -} - sub _exception { my $name = shift; $] > 5.008 and return 0; # 5.8.1 or higher then no @@ -115,6 +108,12 @@ sub _get_locale_encoding { } sub import { + + if ( ord("A") == 193 ) { + require Carp; + Carp::croak("encoding: pragma does not support EBCDIC platforms"); + } + if ($] >= 5.017) { warnings::warnif("deprecated", "Use of the encoding pragma is deprecated") diff --git a/cpan/Encode/t/encoding.t b/cpan/Encode/t/encoding.t index 8c7f253377..21f9e47eb7 100644 --- a/cpan/Encode/t/encoding.t +++ b/cpan/Encode/t/encoding.t @@ -14,7 +14,8 @@ BEGIN { } } -print "1..31\n"; +print "1..33\n"; + no warnings "deprecated"; use encoding "latin1"; # ignored (overwritten by the next line) @@ -202,3 +203,13 @@ print "ok 28\n"; print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; } + +# Order of finding the above-Latin1 code point should not matter: both should +# assume Latin1/Unicode encoding +{ + use bytes; + print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/; + print "ok 32\n"; + print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/; + print "ok 33\n"; +} diff --git a/cpan/Encode/ucm/koi8-u.ucm b/cpan/Encode/ucm/koi8-u.ucm index c955083fd8..95f07c7c78 100644 --- a/cpan/Encode/ucm/koi8-u.ucm +++ b/cpan/Encode/ucm/koi8-u.ucm @@ -1,7 +1,7 @@ # -# $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp dankogai $ +# $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp $ # -# Written $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp dankogai $ +# Written $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp $ # ./compile -n koi8-u -o Encode/koi8-u.ucm Encode/koi8-u.enc # # Original table can be obtained at |