summaryrefslogtreecommitdiff
path: root/ext/Unicode
diff options
context:
space:
mode:
authorRafael Garcia-Suarez <rgarciasuarez@gmail.com>2003-04-05 11:28:22 +0000
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2003-04-05 11:28:22 +0000
commit9f1f04a17d93e8b8afa26e6ca9144732df879671 (patch)
tree8b5ebdfc44e09fa908ec7773043f91b8a5675134 /ext/Unicode
parent843027b0d05d16cd1217a5e0476a463b117fb188 (diff)
downloadperl-9f1f04a17d93e8b8afa26e6ca9144732df879671.tar.gz
Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24,
by SADAHIRO Tomoyuki. p4raw-id: //depot/perl@19144
Diffstat (limited to 'ext/Unicode')
-rw-r--r--ext/Unicode/Normalize/Changes3
-rw-r--r--ext/Unicode/Normalize/Normalize.pm33
-rw-r--r--ext/Unicode/Normalize/README2
-rw-r--r--ext/Unicode/Normalize/mkheader19
-rw-r--r--ext/Unicode/Normalize/t/func.t24
-rw-r--r--ext/Unicode/Normalize/t/norm.t22
-rw-r--r--ext/Unicode/Normalize/t/test.t22
7 files changed, 65 insertions, 60 deletions
diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes
index 30f5c4af50..92b944e7e6 100644
--- a/ext/Unicode/Normalize/Changes
+++ b/ext/Unicode/Normalize/Changes
@@ -1,5 +1,8 @@
Revision history for Perl extension Unicode::Normalize.
+0.21 Thu Apr 02 23:12:54 2003
+ - internal tweak: for (?un)pack 'U'.
+
0.20 Sun Mar 02 13:29:25 2003
- decompose Hangul syllables in a decomposition mapping.
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm
index e0232d30a1..14c121a756 100644
--- a/ext/Unicode/Normalize/Normalize.pm
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -1,8 +1,8 @@
package Unicode::Normalize;
BEGIN {
- if (ord("A") == 193) {
- die "Unicode::Normalize not ported to EBCDIC\n";
+ unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+ die "Unicode::Normalize cannot stringify a Unicode code point\n";
}
}
@@ -11,7 +11,7 @@ use strict;
use warnings;
use Carp;
-our $VERSION = '0.20';
+our $VERSION = '0.21';
our $PACKAGE = __PACKAGE__;
require Exporter;
@@ -35,6 +35,29 @@ our %EXPORT_TAGS = (
bootstrap Unicode::Normalize $VERSION;
+use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41);
+use constant NATIVE_FOR_PACK => "A" eq pack('U', ord("A"));
+
+use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A");
+use constant NATIVE_FOR_UNPACK => ord("A") == unpack('U', "A");
+
+sub pack_U {
+ return UNICODE_FOR_PACK
+ ? pack('U*', @_)
+ : NATIVE_FOR_PACK
+ ? pack('U*', map utf8::unicode_to_native($_), @_)
+ : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
+sub unpack_U {
+ return UNICODE_FOR_UNPACK
+ ? unpack('U*', shift)
+ : NATIVE_FOR_UNPACK
+ ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
+ : die "$PACKAGE, a code point returned from unpack U " .
+ "cannot be converted into Unicode.\n";
+}
+
use constant COMPAT => 1;
sub NFD ($) { reorder(decompose($_[0])) }
@@ -136,7 +159,7 @@ As C<$form_name>, one of the following names must be given.
=item C<$decomposed_string = decompose($string, $useCompatMapping)>
-Decompose the specified string and returns the result.
+Decomposes the specified string and returns the result.
If the second parameter (a boolean) is omitted or false, decomposes it
using the Canonical Decomposition Mapping.
@@ -150,7 +173,7 @@ Reordering may be required.
=item C<$reordered_string = reorder($string)>
-Reorder the combining characters and the like in the canonical ordering
+Reorders the combining characters and the like in the canonical ordering
and returns the result.
E.g., when you have a list of NFD/NFKD strings,
diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README
index f1b1754dd4..8447502782 100644
--- a/ext/Unicode/Normalize/README
+++ b/ext/Unicode/Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 0.20
+Unicode/Normalize version 0.21
===================================
Unicode::Normalize - Unicode Normalization Forms
diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader
index 6cac3905fb..e2c4f1244b 100644
--- a/ext/Unicode/Normalize/mkheader
+++ b/ext/Unicode/Normalize/mkheader
@@ -15,7 +15,11 @@ use warnings;
use Carp;
use File::Spec;
-our $IsEBCDIC = ord("A") != 0x41;
+BEGIN {
+ unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+ die "Unicode::Normalize cannot stringify a Unicode code point\n";
+ }
+}
our $PACKAGE = 'Unicode::Normalize, mkheader';
@@ -197,12 +201,17 @@ foreach my $key (keys %Compat) {
$Compat{$key} = [ getCompatList($key) ];
}
+sub _pack_U {
+ return "A" eq pack('U', 0x41)
+ ? pack('U*', @_)
+ : "A" eq pack('U', ord("A"))
+ ? pack('U*', map utf8::unicode_to_native($_), @_)
+ : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
sub _U_stringify {
sprintf '"%s"', join '',
- map sprintf("\\x%02x", $_), unpack 'C*',
- $IsEBCDIC
- ? pack('U*', map utf8::unicode_to_native($_), @_)
- : pack('U*', @_);
+ map sprintf("\\x%02x", $_), unpack 'C*', _pack_U(@_);
}
foreach my $hash (\%Canon, \%Compat) {
diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t
index d540d99226..81e092a96c 100644
--- a/ext/Unicode/Normalize/t/func.t
+++ b/ext/Unicode/Normalize/t/func.t
@@ -1,7 +1,8 @@
BEGIN {
- if (ord("A") == 193) {
- print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+ unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
exit 0;
}
}
@@ -9,7 +10,7 @@ BEGIN {
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
- @INC = qw(../lib);
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -22,19 +23,8 @@ BEGIN { plan tests => 13 };
use Unicode::Normalize qw(:all);
ok(1); # If we made it this far, we're ok.
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
- return $IsEBCDIC
- ? pack('U*', map utf8::unicode_to_native($_), @_)
- : pack('U*', @_);
-}
-
-sub _unpack_U {
- return $IsEBCDIC
- ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
- : unpack('U*', shift);
-}
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
@@ -50,7 +40,7 @@ print ! defined getCanon( 0)
&& getCanon(0x00EF) eq _pack_U(0x0069, 0x0308)
&& getCanon(0x304C) eq _pack_U(0x304B, 0x3099)
&& getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
- && getCanon(0x1F82) eq "\x{03B1}\x{0313}\x{0300}\x{0345}"
+ && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
&& getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
&& getCanon(0xAC00) eq _pack_U(0x1100, 0x1161)
&& getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
diff --git a/ext/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t
index 77ca218d7d..76ee255ec0 100644
--- a/ext/Unicode/Normalize/t/norm.t
+++ b/ext/Unicode/Normalize/t/norm.t
@@ -1,7 +1,8 @@
BEGIN {
- if (ord("A") == 193) {
- print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+ unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
exit 0;
}
}
@@ -9,7 +10,7 @@ BEGIN {
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
- @INC = qw(../lib);
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -22,19 +23,8 @@ BEGIN { plan tests => 18 };
use Unicode::Normalize qw(normalize);
ok(1); # If we made it this far, we're ok.
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
- return $IsEBCDIC
- ? pack('U*', map utf8::unicode_to_native($_), @_)
- : pack('U*', @_);
-}
-
-sub _unpack_U {
- return $IsEBCDIC
- ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
- : unpack('U*', shift);
-}
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################
diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t
index db1a53673f..b98a8b83b0 100644
--- a/ext/Unicode/Normalize/t/test.t
+++ b/ext/Unicode/Normalize/t/test.t
@@ -1,7 +1,8 @@
BEGIN {
- if (ord("A") == 193) {
- print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+ unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+ print "1..0 # Unicode::Normalize " .
+ "cannot stringify a Unicode code point\n";
exit 0;
}
}
@@ -9,7 +10,7 @@ BEGIN {
BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
- @INC = qw(../lib);
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
}
}
@@ -22,19 +23,8 @@ BEGIN { plan tests => 20 };
use Unicode::Normalize;
ok(1); # If we made it this far, we're ok.
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
- return $IsEBCDIC
- ? pack('U*', map utf8::unicode_to_native($_), @_)
- : pack('U*', @_);
-}
-
-sub _unpack_U {
- return $IsEBCDIC
- ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
- : unpack('U*', shift);
-}
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
#########################