diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2003-04-05 11:28:22 +0000 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2003-04-05 11:28:22 +0000 |
commit | 9f1f04a17d93e8b8afa26e6ca9144732df879671 (patch) | |
tree | 8b5ebdfc44e09fa908ec7773043f91b8a5675134 /ext/Unicode | |
parent | 843027b0d05d16cd1217a5e0476a463b117fb188 (diff) | |
download | perl-9f1f04a17d93e8b8afa26e6ca9144732df879671.tar.gz |
Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24,
by SADAHIRO Tomoyuki.
p4raw-id: //depot/perl@19144
Diffstat (limited to 'ext/Unicode')
-rw-r--r-- | ext/Unicode/Normalize/Changes | 3 | ||||
-rw-r--r-- | ext/Unicode/Normalize/Normalize.pm | 33 | ||||
-rw-r--r-- | ext/Unicode/Normalize/README | 2 | ||||
-rw-r--r-- | ext/Unicode/Normalize/mkheader | 19 | ||||
-rw-r--r-- | ext/Unicode/Normalize/t/func.t | 24 | ||||
-rw-r--r-- | ext/Unicode/Normalize/t/norm.t | 22 | ||||
-rw-r--r-- | ext/Unicode/Normalize/t/test.t | 22 |
7 files changed, 65 insertions, 60 deletions
diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes index 30f5c4af50..92b944e7e6 100644 --- a/ext/Unicode/Normalize/Changes +++ b/ext/Unicode/Normalize/Changes @@ -1,5 +1,8 @@ Revision history for Perl extension Unicode::Normalize. +0.21 Thu Apr 02 23:12:54 2003 + - internal tweak: for (?un)pack 'U'. + 0.20 Sun Mar 02 13:29:25 2003 - decompose Hangul syllables in a decomposition mapping. diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm index e0232d30a1..14c121a756 100644 --- a/ext/Unicode/Normalize/Normalize.pm +++ b/ext/Unicode/Normalize/Normalize.pm @@ -1,8 +1,8 @@ package Unicode::Normalize; BEGIN { - if (ord("A") == 193) { - die "Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + die "Unicode::Normalize cannot stringify a Unicode code point\n"; } } @@ -11,7 +11,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.20'; +our $VERSION = '0.21'; our $PACKAGE = __PACKAGE__; require Exporter; @@ -35,6 +35,29 @@ our %EXPORT_TAGS = ( bootstrap Unicode::Normalize $VERSION; +use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41); +use constant NATIVE_FOR_PACK => "A" eq pack('U', ord("A")); + +use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A"); +use constant NATIVE_FOR_UNPACK => ord("A") == unpack('U', "A"); + +sub pack_U { + return UNICODE_FOR_PACK + ? pack('U*', @_) + : NATIVE_FOR_PACK + ? pack('U*', map utf8::unicode_to_native($_), @_) + : die "$PACKAGE, a Unicode code point cannot be stringified.\n"; +} + +sub unpack_U { + return UNICODE_FOR_UNPACK + ? unpack('U*', shift) + : NATIVE_FOR_UNPACK + ? map(utf8::native_to_unicode($_), unpack 'U*', shift) + : die "$PACKAGE, a code point returned from unpack U " . + "cannot be converted into Unicode.\n"; +} + use constant COMPAT => 1; sub NFD ($) { reorder(decompose($_[0])) } @@ -136,7 +159,7 @@ As C<$form_name>, one of the following names must be given. =item C<$decomposed_string = decompose($string, $useCompatMapping)> -Decompose the specified string and returns the result. +Decomposes the specified string and returns the result. If the second parameter (a boolean) is omitted or false, decomposes it using the Canonical Decomposition Mapping. @@ -150,7 +173,7 @@ Reordering may be required. =item C<$reordered_string = reorder($string)> -Reorder the combining characters and the like in the canonical ordering +Reorders the combining characters and the like in the canonical ordering and returns the result. E.g., when you have a list of NFD/NFKD strings, diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README index f1b1754dd4..8447502782 100644 --- a/ext/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 0.20 +Unicode/Normalize version 0.21 =================================== Unicode::Normalize - Unicode Normalization Forms diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader index 6cac3905fb..e2c4f1244b 100644 --- a/ext/Unicode/Normalize/mkheader +++ b/ext/Unicode/Normalize/mkheader @@ -15,7 +15,11 @@ use warnings; use Carp; use File::Spec; -our $IsEBCDIC = ord("A") != 0x41; +BEGIN { + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + die "Unicode::Normalize cannot stringify a Unicode code point\n"; + } +} our $PACKAGE = 'Unicode::Normalize, mkheader'; @@ -197,12 +201,17 @@ foreach my $key (keys %Compat) { $Compat{$key} = [ getCompatList($key) ]; } +sub _pack_U { + return "A" eq pack('U', 0x41) + ? pack('U*', @_) + : "A" eq pack('U', ord("A")) + ? pack('U*', map utf8::unicode_to_native($_), @_) + : die "$PACKAGE, a Unicode code point cannot be stringified.\n"; +} + sub _U_stringify { sprintf '"%s"', join '', - map sprintf("\\x%02x", $_), unpack 'C*', - $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); + map sprintf("\\x%02x", $_), unpack 'C*', _pack_U(@_); } foreach my $hash (\%Canon, \%Compat) { diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t index d540d99226..81e092a96c 100644 --- a/ext/Unicode/Normalize/t/func.t +++ b/ext/Unicode/Normalize/t/func.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -9,7 +10,7 @@ BEGIN { BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; - @INC = qw(../lib); + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -22,19 +23,8 @@ BEGIN { plan tests => 13 }; use Unicode::Normalize qw(:all); ok(1); # If we made it this far, we're ok. -our $IsEBCDIC = ord("A") != 0x41; - -sub _pack_U { - return $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); -} - -sub _unpack_U { - return $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack 'U*', shift) - : unpack('U*', shift); -} +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } ######################### @@ -50,7 +40,7 @@ print ! defined getCanon( 0) && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308) && getCanon(0x304C) eq _pack_U(0x304B, 0x3099) && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301) - && getCanon(0x1F82) eq "\x{03B1}\x{0313}\x{0300}\x{0345}" + && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345) && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345) && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161) && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF) diff --git a/ext/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t index 77ca218d7d..76ee255ec0 100644 --- a/ext/Unicode/Normalize/t/norm.t +++ b/ext/Unicode/Normalize/t/norm.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -9,7 +10,7 @@ BEGIN { BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; - @INC = qw(../lib); + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -22,19 +23,8 @@ BEGIN { plan tests => 18 }; use Unicode::Normalize qw(normalize); ok(1); # If we made it this far, we're ok. -our $IsEBCDIC = ord("A") != 0x41; - -sub _pack_U { - return $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); -} - -sub _unpack_U { - return $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack 'U*', shift) - : unpack('U*', shift); -} +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } ######################### diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t index db1a53673f..b98a8b83b0 100644 --- a/ext/Unicode/Normalize/t/test.t +++ b/ext/Unicode/Normalize/t/test.t @@ -1,7 +1,8 @@ BEGIN { - if (ord("A") == 193) { - print "1..0 # Unicode::Normalize not ported to EBCDIC\n"; + unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; exit 0; } } @@ -9,7 +10,7 @@ BEGIN { BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; - @INC = qw(../lib); + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); } } @@ -22,19 +23,8 @@ BEGIN { plan tests => 20 }; use Unicode::Normalize; ok(1); # If we made it this far, we're ok. -our $IsEBCDIC = ord("A") != 0x41; - -sub _pack_U { - return $IsEBCDIC - ? pack('U*', map utf8::unicode_to_native($_), @_) - : pack('U*', @_); -} - -sub _unpack_U { - return $IsEBCDIC - ? map(utf8::native_to_unicode($_), unpack 'U*', shift) - : unpack('U*', shift); -} +sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _unpack_U { Unicode::Normalize::unpack_U(@_) } ######################### |