Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24,

by SADAHIRO Tomoyuki. p4raw-id: //depot/perl@19144
author: Rafael Garcia-Suarez <rgarciasuarez@gmail.com> 2003-04-05 11:28:22 +0000
committer: Rafael Garcia-Suarez <rgarciasuarez@gmail.com> 2003-04-05 11:28:22 +0000
commit: 9f1f04a17d93e8b8afa26e6ca9144732df879671 (patch)
tree: 8b5ebdfc44e09fa908ec7773043f91b8a5675134 /ext/Unicode
parent: 843027b0d05d16cd1217a5e0476a463b117fb188 (diff)
download: perl-9f1f04a17d93e8b8afa26e6ca9144732df879671.tar.gz
7 files changed, 65 insertions, 60 deletions
diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes
index 30f5c4af50..92b944e7e6 100644
--- a/ext/Unicode/Normalize/Changes
+++ b/ext/Unicode/Normalize/Changes
@@ -1,5 +1,8 @@
 Revision history for Perl extension Unicode::Normalize.
 
+0.21  Thu Apr 02 23:12:54 2003
+	- internal tweak: for (?un)pack 'U'.
+
 0.20  Sun Mar 02 13:29:25 2003
 	- decompose Hangul syllables in a decomposition mapping.
 
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm
index e0232d30a1..14c121a756 100644
--- a/ext/Unicode/Normalize/Normalize.pm
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -1,8 +1,8 @@
 package Unicode::Normalize;
 
 BEGIN {
-    if (ord("A") == 193) {
-	die "Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+	die "Unicode::Normalize cannot stringify a Unicode code point\n";
     }
 }
 
@@ -11,7 +11,7 @@ use strict;
 use warnings;
 use Carp;
 
-our $VERSION = '0.20';
+our $VERSION = '0.21';
 our $PACKAGE = __PACKAGE__;
 
 require Exporter;
@@ -35,6 +35,29 @@ our %EXPORT_TAGS = (
 
 bootstrap Unicode::Normalize $VERSION;
 
+use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41);
+use constant NATIVE_FOR_PACK  => "A" eq pack('U', ord("A"));
+
+use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A");
+use constant NATIVE_FOR_UNPACK  => ord("A") == unpack('U', "A");
+
+sub pack_U {
+    return UNICODE_FOR_PACK
+	? pack('U*', @_)
+	: NATIVE_FOR_PACK
+	    ? pack('U*', map utf8::unicode_to_native($_), @_)
+	    : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
+sub unpack_U {
+    return UNICODE_FOR_UNPACK
+	? unpack('U*', shift)
+	: NATIVE_FOR_UNPACK
+	    ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
+	    : die "$PACKAGE, a code point returned from unpack U " .
+		"cannot be converted into Unicode.\n";
+}
+
 use constant COMPAT => 1;
 
 sub NFD  ($) { reorder(decompose($_[0])) }
@@ -136,7 +159,7 @@ As C<$form_name>, one of the following names must be given.
 
 =item C<$decomposed_string = decompose($string, $useCompatMapping)>
 
-Decompose the specified string and returns the result.
+Decomposes the specified string and returns the result.
 
 If the second parameter (a boolean) is omitted or false, decomposes it
 using the Canonical Decomposition Mapping.
@@ -150,7 +173,7 @@ Reordering may be required.
 
 =item C<$reordered_string  = reorder($string)>
 
-Reorder the combining characters and the like in the canonical ordering
+Reorders the combining characters and the like in the canonical ordering
 and returns the result.
 
 E.g., when you have a list of NFD/NFKD strings,
diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README
index f1b1754dd4..8447502782 100644
--- a/ext/Unicode/Normalize/README
+++ b/ext/Unicode/Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 0.20
+Unicode/Normalize version 0.21
 ===================================
 
 Unicode::Normalize - Unicode Normalization Forms
diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader
index 6cac3905fb..e2c4f1244b 100644
--- a/ext/Unicode/Normalize/mkheader
+++ b/ext/Unicode/Normalize/mkheader
@@ -15,7 +15,11 @@ use warnings;
 use Carp;
 use File::Spec;
 
-our $IsEBCDIC = ord("A") != 0x41;
+BEGIN {
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+	die "Unicode::Normalize cannot stringify a Unicode code point\n";
+    }
+}
 
 our $PACKAGE = 'Unicode::Normalize, mkheader';
 
@@ -197,12 +201,17 @@ foreach my $key (keys %Compat) {
     $Compat{$key} = [ getCompatList($key) ];
 }
 
+sub _pack_U {
+    return "A" eq pack('U', 0x41)
+	? pack('U*', @_)
+	: "A" eq pack('U', ord("A"))
+	    ? pack('U*', map utf8::unicode_to_native($_), @_)
+	    : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
 sub _U_stringify {
     sprintf '"%s"', join '',
-	map sprintf("\\x%02x", $_), unpack 'C*',
-	    $IsEBCDIC
-		? pack('U*', map utf8::unicode_to_native($_), @_)
-		: pack('U*', @_);
+	map sprintf("\\x%02x", $_), unpack 'C*', _pack_U(@_);
 }
 
 foreach my $hash (\%Canon, \%Compat) {
diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t
index d540d99226..81e092a96c 100644
--- a/ext/Unicode/Normalize/t/func.t
+++ b/ext/Unicode/Normalize/t/func.t
@@ -1,7 +1,8 @@
 
 BEGIN {
-    if (ord("A") == 193) {
-	print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+	print "1..0 # Unicode::Normalize " .
+	    "cannot stringify a Unicode code point\n";
 	exit 0;
     }
 }
@@ -9,7 +10,7 @@ BEGIN {
 BEGIN {
     if ($ENV{PERL_CORE}) {
         chdir('t') if -d 't';
-        @INC = qw(../lib);
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
     }
 }
 
@@ -22,19 +23,8 @@ BEGIN { plan tests => 13 };
 use Unicode::Normalize qw(:all);
 ok(1); # If we made it this far, we're ok.
 
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
-    return $IsEBCDIC
-	? pack('U*', map utf8::unicode_to_native($_), @_)
-	: pack('U*', @_);
-}
-
-sub _unpack_U {
-    return $IsEBCDIC
-	? map(utf8::native_to_unicode($_), unpack 'U*', shift)
-	: unpack('U*', shift);
-}
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
 
 #########################
 
@@ -50,7 +40,7 @@ print ! defined getCanon( 0)
    && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308)
    && getCanon(0x304C) eq _pack_U(0x304B, 0x3099)
    && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
-   && getCanon(0x1F82) eq "\x{03B1}\x{0313}\x{0300}\x{0345}"
+   && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
    && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
    && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161)
    && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
diff --git a/ext/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t
index 77ca218d7d..76ee255ec0 100644
--- a/ext/Unicode/Normalize/t/norm.t
+++ b/ext/Unicode/Normalize/t/norm.t
@@ -1,7 +1,8 @@
 
 BEGIN {
-    if (ord("A") == 193) {
-	print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+	print "1..0 # Unicode::Normalize " .
+	    "cannot stringify a Unicode code point\n";
 	exit 0;
     }
 }
@@ -9,7 +10,7 @@ BEGIN {
 BEGIN {
     if ($ENV{PERL_CORE}) {
         chdir('t') if -d 't';
-        @INC = qw(../lib);
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
     }
 }
 
@@ -22,19 +23,8 @@ BEGIN { plan tests => 18 };
 use Unicode::Normalize qw(normalize);
 ok(1); # If we made it this far, we're ok.
 
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
-    return $IsEBCDIC
-	? pack('U*', map utf8::unicode_to_native($_), @_)
-	: pack('U*', @_);
-}
-
-sub _unpack_U {
-    return $IsEBCDIC
-	? map(utf8::native_to_unicode($_), unpack 'U*', shift)
-	: unpack('U*', shift);
-}
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
 
 #########################
 
diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t
index db1a53673f..b98a8b83b0 100644
--- a/ext/Unicode/Normalize/t/test.t
+++ b/ext/Unicode/Normalize/t/test.t
@@ -1,7 +1,8 @@
 
 BEGIN {
-    if (ord("A") == 193) {
-	print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+	print "1..0 # Unicode::Normalize " .
+	    "cannot stringify a Unicode code point\n";
 	exit 0;
     }
 }
@@ -9,7 +10,7 @@ BEGIN {
 BEGIN {
     if ($ENV{PERL_CORE}) {
         chdir('t') if -d 't';
-        @INC = qw(../lib);
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
     }
 }
 
@@ -22,19 +23,8 @@ BEGIN { plan tests => 20 };
 use Unicode::Normalize;
 ok(1); # If we made it this far, we're ok.
 
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
-    return $IsEBCDIC
-	? pack('U*', map utf8::unicode_to_native($_), @_)
-	: pack('U*', @_);
-}
-
-sub _unpack_U {
-    return $IsEBCDIC
-	? map(utf8::native_to_unicode($_), unpack 'U*', shift)
-	: unpack('U*', shift);
-}
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
 
 #########################
author	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>	2003-04-05 11:28:22 +0000
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>	2003-04-05 11:28:22 +0000
commit	9f1f04a17d93e8b8afa26e6ca9144732df879671 (patch)
tree	8b5ebdfc44e09fa908ec7773043f91b8a5675134 /ext/Unicode
parent	843027b0d05d16cd1217a5e0476a463b117fb188 (diff)
download	perl-9f1f04a17d93e8b8afa26e6ca9144732df879671.tar.gz