diff options
author | Ricardo Signes <rjbs@cpan.org> | 2015-09-18 13:29:43 -0400 |
---|---|---|
committer | Ricardo Signes <rjbs@cpan.org> | 2015-09-18 13:47:23 -0400 |
commit | 6916a94cde40f03bd33b3b63bf26ad8d48b399fd (patch) | |
tree | 4f5cf729bd996afb014dfa42d2740562a2e7189d /cpan/Encode/Unicode | |
parent | a293d0fd7883038d8dfef01528c7398ba246b5f9 (diff) | |
download | perl-6916a94cde40f03bd33b3b63bf26ad8d48b399fd.tar.gz |
Update Encode to CPAN version 2.77
[DELTA]
$Revision: 2.77 $ $Date: 2015/09/15 13:53:27 $
! Unicode/Unicode.xs Unicode/Unicode.pm
Address RT#107043: If no BOM is found, the routine dies.
When you decode from UTF-(16|32) without -BE or LE without BOM,
Encode now assumes BE accordingly to RFC2781 and the Unicode
Standard version 8.0
https://rt.cpan.org/Public/Bug/Display.html?id=107043
! Makefile.PL encoding.t
Mend pull/42
! Encode.xs Makefile.PL encoding.pm encoding.t
Pulled: precompile 1252 table as that is now the Pod::Simple default
https://github.com/dankogai/p5-encode/pull/42
Diffstat (limited to 'cpan/Encode/Unicode')
-rw-r--r-- | cpan/Encode/Unicode/Unicode.pm | 10 | ||||
-rw-r--r-- | cpan/Encode/Unicode/Unicode.xs | 18 |
2 files changed, 22 insertions, 6 deletions
diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm index 3d9fb87891..316768e639 100644 --- a/cpan/Encode/Unicode/Unicode.pm +++ b/cpan/Encode/Unicode/Unicode.pm @@ -4,7 +4,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.13 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.14 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); @@ -176,7 +176,13 @@ simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE). When BE or LE is omitted during decode(), it checks if BOM is at the beginning of the string; if one is found, the endianness is set to -what the BOM says. If no BOM is found, the routine dies. +what the BOM says. + +=item Default Byte Order + +When no BOM is found, Encode 2.76 and blow croaked. Since Encode +2.77, it falls back to BE accordingly to RFC2781 and the Unicode +Standard version 8.0 =item * diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs index 5f3bceb262..42f215c713 100644 --- a/cpan/Encode/Unicode/Unicode.xs +++ b/cpan/Encode/Unicode/Unicode.xs @@ -1,5 +1,5 @@ /* - $Id: Unicode.xs,v 2.12 2015/06/25 00:49:23 dankogai Exp $ + $Id: Unicode.xs,v 2.13 2015/09/15 13:53:27 dankogai Exp dankogai $ */ #define PERL_NO_GET_CONTEXT @@ -166,9 +166,19 @@ CODE: endian = 'V'; } else { - croak("%"SVf":Unrecognised BOM %"UVxf, - *hv_fetch((HV *)SvRV(obj),"Name",4,0), - bom); + /* No BOM found, use big-endian fallback as specified in + * RFC2781 and the Unicode Standard version 8.0: + * + * The UTF-16 encoding scheme may or may not begin with + * a BOM. However, when there is no BOM, and in the + * absence of a higher-level protocol, the byte order + * of the UTF-16 encoding scheme is big-endian. + * + * If the first two octets of the text is not 0xFE + * followed by 0xFF, and is not 0xFF followed by 0xFE, + * then the text SHOULD be interpreted as big-endian. + */ + s -= size; } } #if 1 |