Update Encode to CPAN version 2.77

[DELTA] $Revision: 2.77 $ $Date: 2015/09/15 13:53:27 $ ! Unicode/Unicode.xs Unicode/Unicode.pm Address RT#107043: If no BOM is found, the routine dies. When you decode from UTF-(16|32) without -BE or LE without BOM, Encode now assumes BE accordingly to RFC2781 and the Unicode Standard version 8.0 https://rt.cpan.org/Public/Bug/Display.html?id=107043 ! Makefile.PL encoding.t Mend pull/42 ! Encode.xs Makefile.PL encoding.pm encoding.t Pulled: precompile 1252 table as that is now the Pod::Simple default https://github.com/dankogai/p5-encode/pull/42
author: Ricardo Signes <rjbs@cpan.org> 2015-09-18 13:29:43 -0400
committer: Ricardo Signes <rjbs@cpan.org> 2015-09-18 13:47:23 -0400
commit: 6916a94cde40f03bd33b3b63bf26ad8d48b399fd (patch)
tree: 4f5cf729bd996afb014dfa42d2740562a2e7189d
parent: a293d0fd7883038d8dfef01528c7398ba246b5f9 (diff)
download: perl-6916a94cde40f03bd33b3b63bf26ad8d48b399fd.tar.gz
9 files changed, 64 insertions, 33 deletions
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index d28fef7af3..ce207c4df8 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -385,7 +385,7 @@ use File::Glob qw(:case);
     },
 
     'Encode' => {
-        'DISTRIBUTION' => 'DANKOGAI/Encode-2.76.tar.gz',
+        'DISTRIBUTION' => 'DANKOGAI/Encode-2.77.tar.gz',
         'FILES'        => q[cpan/Encode],
     },
 
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index 1fea02b63b..574720ed27 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,10 +1,10 @@
 #
-# $Id: Encode.pm,v 2.76 2015/07/31 02:17:53 dankogai Exp $
+# $Id: Encode.pm,v 2.77 2015/09/15 13:53:11 dankogai Exp $
 #
 package Encode;
 use strict;
 use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.76 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.77 $ =~ /(\d+)/g;
 use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
 use XSLoader ();
 XSLoader::load( __PACKAGE__, $VERSION );
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index 73f64a8d44..81b5deadb0 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Encode.xs,v 2.33 2015/01/22 10:17:32 dankogai Exp $
+ $Id: Encode.xs,v 2.34 2015/09/15 13:53:27 dankogai Exp dankogai $
  */
 
 #define PERL_NO_GET_CONTEXT
@@ -534,20 +534,25 @@ CODE:
         }
     }
     else {
-    	/* Native bytes - can always encode */
-    U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */
-    	while (s < e) {
-    	    UV uv = NATIVE_TO_UNI((UV) *s);
-	    s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */
+        /* Native bytes - can always encode */
+        U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */
+        while (s < e) {
+#ifdef append_utf8_from_native_byte
+            append_utf8_from_native_byte(*s, &d);
+            s++;
+#else
+            UV uv = NATIVE_TO_UNI((UV) *s);
+            s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */
             if (UNI_IS_INVARIANT(uv))
-            	*d++ = (U8)UTF_TO_NATIVE(uv);
+                *d++ = (U8)UTF_TO_NATIVE(uv);
             else {
-    	        *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
+                *d++ = (U8)UTF8_EIGHT_BIT_HI(uv);
                 *d++ = (U8)UTF8_EIGHT_BIT_LO(uv);
             }
-    }
+#endif
+        }
         SvCUR_set(dst, d- (U8 *)SvPVX(dst));
-    	*SvEND(dst) = '\0';
+        *SvEND(dst) = '\0';
     }
 
     /* Clear out translated part of source unless asked not to */
diff --git a/cpan/Encode/Makefile.PL b/cpan/Encode/Makefile.PL
index 0ee181b249..39e557090d 100644
--- a/cpan/Encode/Makefile.PL
+++ b/cpan/Encode/Makefile.PL
@@ -1,5 +1,5 @@
 #
-# $Id: Makefile.PL,v 2.14 2015/06/25 00:49:23 dankogai Exp $
+# $Id: Makefile.PL,v 2.15 2015/09/15 13:53:27 dankogai Exp dankogai $
 #
 use 5.007003;
 use strict;
@@ -15,7 +15,7 @@ $ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE};
 my %tables = 
     (
      def_t => ['ascii.ucm',
-           '8859-1.ucm',
+           '8859-1.ucm', # cp1252 is an alias thereof
            'null.ucm',
            'ctrl.ucm',
            ]
diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm
index 3d9fb87891..316768e639 100644
--- a/cpan/Encode/Unicode/Unicode.pm
+++ b/cpan/Encode/Unicode/Unicode.pm
@@ -4,7 +4,7 @@ use strict;
 use warnings;
 no warnings 'redefine';
 
-our $VERSION = do { my @r = ( q$Revision: 2.13 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.14 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
 
 use XSLoader;
 XSLoader::load( __PACKAGE__, $VERSION );
@@ -176,7 +176,13 @@ simply treated as a normal character (ZERO WIDTH NO-BREAK SPACE).
 
 When BE or LE is omitted during decode(), it checks if BOM is at the
 beginning of the string; if one is found, the endianness is set to
-what the BOM says.  If no BOM is found, the routine dies.
+what the BOM says.  
+
+=item Default Byte Order
+
+When no BOM is found, Encode 2.76 and blow croaked.  Since Encode
+2.77, it falls back to BE accordingly to RFC2781 and the Unicode
+Standard version 8.0
 
 =item *
 
diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs
index 5f3bceb262..42f215c713 100644
--- a/cpan/Encode/Unicode/Unicode.xs
+++ b/cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
 /*
- $Id: Unicode.xs,v 2.12 2015/06/25 00:49:23 dankogai Exp $
+ $Id: Unicode.xs,v 2.13 2015/09/15 13:53:27 dankogai Exp dankogai $
  */
 
 #define PERL_NO_GET_CONTEXT
@@ -166,9 +166,19 @@ CODE:
 		endian = 'V';
 	    }
 	    else {
-		croak("%"SVf":Unrecognised BOM %"UVxf,
-		      *hv_fetch((HV *)SvRV(obj),"Name",4,0),
-		      bom);
+               /* No BOM found, use big-endian fallback as specified in
+                * RFC2781 and the Unicode Standard version 8.0:
+                *
+                *  The UTF-16 encoding scheme may or may not begin with
+                *  a BOM. However, when there is no BOM, and in the
+                *  absence of a higher-level protocol, the byte order
+                *  of the UTF-16 encoding scheme is big-endian.
+                *
+                *  If the first two octets of the text is not 0xFE
+                *  followed by 0xFF, and is not 0xFF followed by 0xFE,
+                *  then the text SHOULD be interpreted as big-endian.
+                */
+                s -= size;
 	    }
 	}
 #if 1
diff --git a/cpan/Encode/encoding.pm b/cpan/Encode/encoding.pm
index ae9512e55e..8450f9ca12 100644
--- a/cpan/Encode/encoding.pm
+++ b/cpan/Encode/encoding.pm
@@ -1,6 +1,6 @@
-# $Id: encoding.pm,v 2.16 2015/06/30 09:55:44 dankogai Exp $
+# $Id: encoding.pm,v 2.17 2015/09/15 13:53:27 dankogai Exp dankogai $
 package encoding;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.16 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.17 $ =~ /(\d+)/g;
 
 use Encode;
 use strict;
@@ -12,13 +12,6 @@ use constant {
     PERL_5_21_7 => $^V && $^V ge v5.21.7,
 };
 
-BEGIN {
-    if ( ord("A") == 193 ) {
-        require Carp;
-        Carp::croak("encoding: pragma does not support EBCDIC platforms");
-    }
-}
-
 sub _exception {
     my $name = shift;
     $] > 5.008 and return 0;    # 5.8.1 or higher then no
@@ -115,6 +108,12 @@ sub _get_locale_encoding {
 }
 
 sub import {
+
+    if ( ord("A") == 193 ) {
+        require Carp;
+        Carp::croak("encoding: pragma does not support EBCDIC platforms");
+    }
+
     if ($] >= 5.017) {
 	warnings::warnif("deprecated",
 			 "Use of the encoding pragma is deprecated")
diff --git a/cpan/Encode/t/encoding.t b/cpan/Encode/t/encoding.t
index 8c7f253377..21f9e47eb7 100644
--- a/cpan/Encode/t/encoding.t
+++ b/cpan/Encode/t/encoding.t
@@ -14,7 +14,8 @@ BEGIN {
     }
 }
 
-print "1..31\n";
+print "1..33\n";
+ 
 
 no warnings "deprecated";
 use encoding "latin1"; # ignored (overwritten by the next line)
@@ -202,3 +203,13 @@ print "ok 28\n";
     print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
     print $h2{"\xdf"}    == 42 ? "ok 31\n" : "not ok 31\n";
 }
+
+# Order of finding the above-Latin1 code point should not matter: both should
+# assume Latin1/Unicode encoding
+{
+    use bytes;
+    print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/;
+    print "ok 32\n";
+    print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/;
+    print "ok 33\n";
+}
diff --git a/cpan/Encode/ucm/koi8-u.ucm b/cpan/Encode/ucm/koi8-u.ucm
index c955083fd8..95f07c7c78 100644
--- a/cpan/Encode/ucm/koi8-u.ucm
+++ b/cpan/Encode/ucm/koi8-u.ucm
@@ -1,7 +1,7 @@
 #
-# $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp dankogai $
+# $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp $
 #
-# Written $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp dankogai $
+# Written $Id: koi8-u.ucm,v 2.2 2015/07/31 02:18:28 dankogai Exp $
 # ./compile -n koi8-u -o Encode/koi8-u.ucm Encode/koi8-u.enc
 #
 # Original table can be obtained at
author	Ricardo Signes <rjbs@cpan.org>	2015-09-18 13:29:43 -0400
committer	Ricardo Signes <rjbs@cpan.org>	2015-09-18 13:47:23 -0400
commit	6916a94cde40f03bd33b3b63bf26ad8d48b399fd (patch)
tree	4f5cf729bd996afb014dfa42d2740562a2e7189d
parent	a293d0fd7883038d8dfef01528c7398ba246b5f9 (diff)
download	perl-6916a94cde40f03bd33b3b63bf26ad8d48b399fd.tar.gz