Update Unicode-Normalize to CPAN release 1.06

[DELTA] 1.06 Thu Feb 11 16:19:54 2010 - Pure Perl/mkheader: fixed the internal _getHexArray() for perl 5.11.3 changes (Bug #53197). 1.05 Mon Sep 28 21:43:17 2009 - normalize_partial() and NFX_partial(). { NFX =~ /^NFK?[CD]\z/ } - added partial1.t for NFX_partial(). - added partial2.t for normalize_partial(). 1.04 Wed Sep 23 22:32:57 2009 - doc: splitOnLastStarter() since 0.24 is now documented. - test: some new tests are added to split.t.
author: Chris 'BinGOs' Williams <chris@bingosnet.co.uk> 2010-06-27 19:51:20 +0100
committer: Chris 'BinGOs' Williams <chris@bingosnet.co.uk> 2010-06-27 19:51:20 +0100
commit: a96160d631b54897af2f2540335c2ca854dcea68 (patch)
tree: 2cf0106edd8be54b706dbc2ba0df8a0da0cd7129
parent: 6ad8f254c95c6d4523948ded91d651dcc490dee5 (diff)
download: perl-a96160d631b54897af2f2540335c2ca854dcea68.tar.gz
10 files changed, 490 insertions, 102 deletions
diff --git a/MANIFEST b/MANIFEST
index bc386928a0..9a7f322000 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -2512,6 +2512,8 @@ cpan/Unicode-Normalize/t/form.t		Unicode::Normalize
 cpan/Unicode-Normalize/t/func.t		Unicode::Normalize
 cpan/Unicode-Normalize/t/illegal.t	Unicode::Normalize
 cpan/Unicode-Normalize/t/norm.t		Unicode::Normalize
+cpan/Unicode-Normalize/t/partial1.t		Unicode::Normalize
+cpan/Unicode-Normalize/t/partial2.t		Unicode::Normalize
 cpan/Unicode-Normalize/t/null.t		Unicode::Normalize
 cpan/Unicode-Normalize/t/proto.t	Unicode::Normalize
 cpan/Unicode-Normalize/t/split.t	Unicode::Normalize
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl
index e8d674d35e..18e6995319 100755
--- a/Porting/Maintainers.pl
+++ b/Porting/Maintainers.pl
@@ -1501,7 +1501,7 @@ use File::Glob qw(:case);
     'Unicode::Normalize' =>
 	{
 	'MAINTAINER'	=> 'sadahiro',
-	'DISTRIBUTION'	=> 'SADAHIRO/Unicode-Normalize-1.03.tar.gz',
+	'DISTRIBUTION'	=> 'SADAHIRO/Unicode-Normalize-1.06-withoutworldwriteables.tar.gz',
 	'FILES'		=> q[cpan/Unicode-Normalize],
 	'EXCLUDED'	=> [ qw{MANIFEST.N Normalize.pmN disableXS enableXS }],
 	'UPSTREAM'	=> 'first-come',
diff --git a/cpan/Unicode-Normalize/Changes b/cpan/Unicode-Normalize/Changes
index e9cb3918a5..433edb4db7 100644
--- a/cpan/Unicode-Normalize/Changes
+++ b/cpan/Unicode-Normalize/Changes
@@ -1,5 +1,18 @@
 Revision history for Perl extension Unicode::Normalize.
 
+1.06  Thu Feb 11 16:19:54 2010
+    - Pure Perl/mkheader: fixed the internal _getHexArray() for perl 5.11.3
+      changes (Bug #53197).
+
+1.05  Mon Sep 28 21:43:17 2009
+    - normalize_partial() and NFX_partial(). { NFX =~ /^NFK?[CD]\z/ }
+    - added partial1.t for NFX_partial().
+    - added partial2.t for normalize_partial().
+
+1.04  Wed Sep 23 22:32:57 2009
+    - doc: splitOnLastStarter() since 0.24 is now documented.
+    - test: some new tests are added to split.t.
+
 1.03  Sun Mar 29 12:56:23 2009
     - mkheader: check if no composition needs growing the string.
     - Makefile.PL: a tweak
@@ -69,7 +82,8 @@ Revision history for Perl extension Unicode::Normalize.
         script files, named "enableXS" and "disableXS".
         (no longer <perl Makefile.PL xs> and <perl Makefile.PL noxs>.)
       * simplified Makefile.PL.
-    - added fcdc.t and split.t.
+    - added fcdc.t for FCD() and FCC().
+    - added split.t for splitOnLastStarter(): an undocumented function.
 
 0.23  Sat Jun 28 20:38:10 2003
     - bug fix: \0-terminate in compose() in XS.
diff --git a/cpan/Unicode-Normalize/Normalize.pm b/cpan/Unicode-Normalize/Normalize.pm
index ad5ff82a83..00be139070 100644
--- a/cpan/Unicode-Normalize/Normalize.pm
+++ b/cpan/Unicode-Normalize/Normalize.pm
@@ -13,13 +13,9 @@ use Carp;
 
 no warnings 'utf8';
 
-our $VERSION = '1.03';
+our $VERSION = '1.06';
 our $PACKAGE = __PACKAGE__;
 
-require Exporter;
-require DynaLoader;
-
-our @ISA = qw(Exporter DynaLoader);
 our @EXPORT = qw( NFC NFD NFKC NFKD );
 our @EXPORT_OK = qw(
     normalize decompose reorder compose
@@ -27,8 +23,8 @@ our @EXPORT_OK = qw(
     getCanon getCompat getComposite getCombinClass
     isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
     isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
-    FCD checkFCD FCC checkFCC composeContiguous
-    splitOnLastStarter
+    FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter
+    normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial
 );
 our %EXPORT_TAGS = (
     all       => [ @EXPORT, @EXPORT_OK ],
@@ -37,12 +33,6 @@ our %EXPORT_TAGS = (
     fast      => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ],
 );
 
-######
-
-bootstrap Unicode::Normalize $VERSION;
-
-######
-
 ##
 ## utilites for tests
 ##
@@ -55,9 +45,18 @@ sub unpack_U {
     return unpack('U*', shift(@_).pack('U*'));
 }
 
+require Exporter;
+
+##### The above part is common to XS and PP #####
+
+our @ISA = qw(Exporter DynaLoader);
+require DynaLoader;
+bootstrap Unicode::Normalize $VERSION;
+
+##### The below part is common to XS and PP #####
 
 ##
-## normalization forms
+## normalize
 ##
 
 sub FCD ($) {
@@ -83,9 +82,27 @@ sub normalize($$)
     croak($PACKAGE."::normalize: invalid form name: $form");
 }
 
+##
+## partial
+##
+
+sub normalize_partial ($$) {
+    if (exists $formNorm{$_[0]}) {
+	my $n = normalize($_[0], $_[1]);
+	my($p, $u) = splitOnLastStarter($n);
+	$_[1] = $u;
+	return $p;
+    }
+    croak($PACKAGE."::normalize_partial: invalid form name: $_[0]");
+}
+
+sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) }
+sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) }
+sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) }
+sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) }
 
 ##
-## quick check
+## check
 ##
 
 our %formCheck = (
@@ -239,6 +256,82 @@ you can get its NFC/NFKC string, by saying
     $NFC_string  = compose($NFD_string);
     $NFKC_string = compose($NFKD_string);
 
+=item C<($processed, $unprocessed) = splitOnLastStarter($normalized)>
+
+It returns two strings: the first one, C<$processed>, is a part
+before the last starter, and the second one, C<$unprocessed> is
+another part after the first part. A starter is a character having
+a combining class of zero (see UAX #15).
+
+Note that C<$processed> may be empty (when C<$normalized> contains no
+starter or starts with the last starter), and then C<$unprocessed>
+should be equal to the entire C<$normalized>.
+
+When you have a C<$normalized> string and an C<$unnormalized> string
+following it, a simple concatenation is wrong:
+
+    $concat = $normalized . normalize($form, $unnormalized); # wrong!
+
+Instead of it, do like this:
+
+    ($processed, $unprocessed) = splitOnLastStarter($normalized);
+     $concat = $processed . normalize($form, $unprocessed.$unnormalized);
+
+C<splitOnLastStarter()> should be called with a pre-normalized parameter
+C<$normalized>, that is in the same form as C<$form> you want.
+
+If you have an array of C<@string> that should be concatenated and then
+normalized, you can do like this:
+
+    my $result = "";
+    my $unproc = "";
+    foreach my $str (@string) {
+        $unproc .= $str;
+        my $n = normalize($form, $unproc);
+        my($p, $u) = splitOnLastStarter($n);
+        $result .= $p;
+        $unproc  = $u;
+    }
+    $result .= $unproc;
+    # instead of normalize($form, join('', @string))
+
+=item C<$processed = normalize_partial($form, $unprocessed)>
+
+A wrapper for the combination of C<normalize()> and C<splitOnLastStarter()>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+If you have an array of C<@string> that should be concatenated and then
+normalized, you can do like this:
+
+    my $result = "";
+    my $unproc = "";
+    foreach my $str (@string) {
+        $unproc .= $str;
+        $result .= normalize_partial($form, $unproc);
+    }
+    $result .= $unproc;
+    # instead of normalize($form, join('', @string))
+
+=item C<$processed = NFD_partial($unprocessed)>
+
+It does like C<normalize_partial('NFD', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFC_partial($unprocessed)>
+
+It does like C<normalize_partial('NFC', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFKD_partial($unprocessed)>
+
+It does like C<normalize_partial('NFKD', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFKC_partial($unprocessed)>
+
+It does like C<normalize_partial('NFKC', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
 =back
 
 =head2 Quick Check
@@ -321,15 +414,15 @@ while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
 If you want to check exactly, compare the string with its NFC/NFKC/FCC.
 
     if ($string eq NFC($string)) {
-	# $string is exactly normalized in NFC;
+        # $string is exactly normalized in NFC;
     } else {
-	# $string is not normalized in NFC;
+        # $string is not normalized in NFC;
     }
 
     if ($string eq NFKC($string)) {
-	# $string is exactly normalized in NFKC;
+        # $string is exactly normalized in NFKC;
     } else {
-	# $string is not normalized in NFKC;
+        # $string is not normalized in NFKC;
     }
 
 =head2 Character Data
@@ -454,7 +547,7 @@ normalization implemented by this module depends on your perl's version.
      5.8.4-5.8.6          4.0.1 (normalization is same as 4.0.0)
      5.8.7-5.8.8          4.1.0
        5.10.0             5.0.0
-       5.8.9              5.1.0
+     5.8.9, 5.10.1        5.1.0
 
 =item Correction of decomposition mapping
 
@@ -482,7 +575,7 @@ lower than 4.1.0.
 
 SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
 
-Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved.
+Copyright(C) 2001-2010, SADAHIRO Tomoyuki. Japan. All rights reserved.
 
 This module is free software; you can redistribute it
 and/or modify it under the same terms as Perl itself.
diff --git a/cpan/Unicode-Normalize/README b/cpan/Unicode-Normalize/README
index 8a5390cc78..06c9d0c83f 100644
--- a/cpan/Unicode-Normalize/README
+++ b/cpan/Unicode-Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 1.03
+Unicode/Normalize version 1.06
 ===================================
 
 Unicode::Normalize - Unicode Normalization Forms
@@ -83,7 +83,7 @@ COPYRIGHT AND LICENSE
 
 SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
 
-Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved.
+Copyright(C) 2001-2010, SADAHIRO Tomoyuki. Japan. All rights reserved.
 
 This module is free software; you can redistribute it and/or
 modify it under the same terms as Perl itself.
diff --git a/cpan/Unicode-Normalize/mkheader b/cpan/Unicode-Normalize/mkheader
index b3e3c3153c..b6d153c552 100644
--- a/cpan/Unicode-Normalize/mkheader
+++ b/cpan/Unicode-Normalize/mkheader
@@ -32,30 +32,37 @@ BEGIN {
 
 our $PACKAGE = 'Unicode::Normalize, mkheader';
 
-our $Combin = do "unicore/CombiningClass.pl"
-    || do "unicode/CombiningClass.pl"
-    || croak "$PACKAGE: CombiningClass.pl not found";
+our $prefix = "UNF_";
+our $structname = "${prefix}complist";
 
-our $Decomp = do "unicore/Decomposition.pl"
-    || do "unicode/Decomposition.pl"
-    || croak "$PACKAGE: Decomposition.pl not found";
+sub pack_U {
+    return pack('U*', @_);
+}
+
+# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify()
+our %Comp1st;	# $codepoint => $listname  : may be composed with a next char.
+our %CompList;	# $listname,$2nd  => $codepoint : composite
+
+##### The below part is common to mkheader and PP #####
 
 our %Combin;	# $codepoint => $number    : combination class
 our %Canon;	# $codepoint => \@codepoints : canonical decomp.
 our %Compat;	# $codepoint => \@codepoints : compat. decomp.
-# after _U_stringify(), ($codepoint => $hexstring) for %Canon and %Compat
+our %Compos;	# $1st,$2nd  => $codepoint : composite
 our %Exclus;	# $codepoint => 1          : composition exclusions
 our %Single;	# $codepoint => 1          : singletons
 our %NonStD;	# $codepoint => 1          : non-starter decompositions
-
-our %Comp1st;	# $codepoint => $listname  : may be composed with a next char.
 our %Comp2nd;	# $codepoint => 1          : may be composed with a prev char.
-our %CompList;	# $listname,$2nd  => $codepoint : composite
 
-our $prefix = "UNF_";
-our $structname = "${prefix}complist";
+# from Unicode database
+our $Combin = do "unicore/CombiningClass.pl"
+    || do "unicode/CombiningClass.pl"
+    || croak "$PACKAGE: CombiningClass.pl not found";
+our $Decomp = do "unicore/Decomposition.pl"
+    || do "unicode/Decomposition.pl"
+    || croak "$PACKAGE: Decomposition.pl not found";
 
-########## definition of Hangul constants ##########
+# definition of Hangul constants
 use constant SBase  => 0xAC00;
 use constant SFinal => 0xD7A3; # SBase -1 + SCount
 use constant SCount =>  11172; # LCount * NCount
@@ -71,48 +78,26 @@ use constant TFinal => 0x11C2;
 use constant TCount =>     28;
 
 sub decomposeHangul {
-    my $SIndex = $_[0] - SBase;
-    my $LIndex = int( $SIndex / NCount);
-    my $VIndex = int(($SIndex % NCount) / TCount);
-    my $TIndex =      $SIndex % TCount;
+    my $sindex = $_[0] - SBase;
+    my $lindex = int( $sindex / NCount);
+    my $vindex = int(($sindex % NCount) / TCount);
+    my $tindex =      $sindex % TCount;
     my @ret = (
-       LBase + $LIndex,
-       VBase + $VIndex,
-      $TIndex ? (TBase + $TIndex) : (),
+       LBase + $lindex,
+       VBase + $vindex,
+      $tindex ? (TBase + $tindex) : (),
     );
-    return @ret;
-}
-
-########## length of a character ##########
-
-sub utf8len {
-  my $uv = shift;
-  return $uv < 0x80 ? 1 :
-        $uv < 0x800 ? 2 :
-      $uv < 0x10000 ? 3 :
-     $uv < 0x110000 ? 4 :
-  croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff.";
-}
-
-sub utfelen {
-  my $uv = shift;
-  return $uv < 0xA0 ? 1 :
-        $uv < 0x400 ? 2 :
-       $uv < 0x4000 ? 3 :
-      $uv < 0x40000 ? 4 :
-     $uv < 0x110000 ? 5 :
-  croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff.";
+    return wantarray ? @ret : pack_U(@ret);
 }
 
-my $errExpand = "$PACKAGE: Composition to U+%04X (from U+%04X and U+%04X) " .
-    "needs growing the string in %s! Quit. Please inform the author...";
-
 ########## getting full decomposion ##########
 {
     my($f, $fh);
     foreach my $d (@INC) {
 	$f = File::Spec->catfile($d, "unicore", "CompositionExclusions.txt");
 	last if open($fh, $f);
+	$f = File::Spec->catfile($d, "unicore", "CompExcl.txt");
+	last if open($fh, $f);
 	$f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
 	last if open($fh, $f);
 	$f = undef;
@@ -128,10 +113,9 @@ my $errExpand = "$PACKAGE: Composition to U+%04X (from U+%04X and U+%04X) " .
     close $fh;
 }
 
-##
 ## converts string "hhhh hhhh hhhh" to a numeric list
-##
-sub _getHexArray { map hex, $_[0] =~ /([0-9A-Fa-f]+)/g }
+## (hex digits separated by spaces)
+sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g }
 
 while ($Combin =~ /(.+)/g) {
     my @tab = split /\t/, $1;
@@ -147,14 +131,10 @@ while ($Decomp =~ /(.+)/g) {
     my @tab = split /\t/, $1;
     my $compat = $tab[2] =~ s/<[^>]+>//;
     my $dec = [ _getHexArray($tab[2]) ]; # decomposition
-    my $ini = hex($tab[0]);
+    my $ini = hex($tab[0]); # initial decomposable character
     my $end = $tab[1] eq '' ? $ini : hex($tab[1]);
     # ($ini .. $end) is the range of decomposable characters.
 
-    my $listname =
-	@$dec == 2 ? sprintf("${structname}_%06x", $dec->[0]) : 'USELESS';
-		# %04x is bad since it'd place _3046 after _1d157.
-
     foreach my $u ($ini .. $end) {
 	$Compat{$u} = $dec;
 
@@ -162,20 +142,10 @@ while ($Decomp =~ /(.+)/g) {
 	    $Canon{$u} = $dec;
 
 	    if (@$dec == 2) {
-		if (utf8len($dec->[0]) + utf8len($dec->[1]) < utf8len($u)) {
-		    croak sprintf $errExpand, $u, $dec->[0], $dec->[1],
-				  "utf-8";
-		}
-		if (utfelen($dec->[0]) + utfelen($dec->[1]) < utfelen($u)) {
-		    croak sprintf $errExpand, $u, $dec->[0], $dec->[1],
-				  "utf-ebcdic";
-		}
-
 		if ($Combin{ $dec->[0] }) {
 		    $NonStD{$u} = 1;
 		} else {
-		    $CompList{ $listname }{ $dec->[1] } = $u;
-		    $Comp1st{ $dec->[0] } = $listname;
+		    $Compos{ $dec->[0] }{ $dec->[1] } = $u;
 		    $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
 		}
 	    } elsif (@$dec == 1) {
@@ -222,8 +192,47 @@ foreach my $key (keys %Compat) {
     $Compat{$key} = [ getCompatList($key) ];
 }
 
-sub _pack_U {
-    return pack('U*', @_);
+##### The above part is common to mkheader and PP #####
+
+sub utf8len {
+  my $uv = shift;
+  return $uv < 0x80 ? 1 :
+        $uv < 0x800 ? 2 :
+      $uv < 0x10000 ? 3 :
+     $uv < 0x110000 ? 4 :
+  croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff.";
+}
+
+sub utfelen {
+  my $uv = shift;
+  return $uv < 0xA0 ? 1 :
+        $uv < 0x400 ? 2 :
+       $uv < 0x4000 ? 3 :
+      $uv < 0x40000 ? 4 :
+     $uv < 0x110000 ? 5 :
+  croak "$PACKAGE: illegal char in the composite. codepoint max is 0x10ffff.";
+}
+
+my $errExpand = "$PACKAGE: Composition to U+%04X (from U+%04X and U+%04X) " .
+    "needs growing the string in %s! Quit. Please inform the author...";
+
+foreach my $comp1st (keys %Compos) {
+    my $listname = sprintf("${structname}_%06x", $comp1st);
+		# %04x is bad since it'd place _3046 after _1d157.
+    $Comp1st{$comp1st} = $listname;
+    my $rh1st = $Compos{$comp1st};
+
+    foreach my $comp2nd (keys %$rh1st) {
+	my $uc = $rh1st->{$comp2nd};
+	$CompList{$listname}{$comp2nd} = $uc;
+
+	if (utf8len($comp1st) + utf8len($comp2nd) < utf8len($uc)) {
+	    croak sprintf $errExpand, $uc, $comp1st, $comp2nd, "utf-8";
+	}
+	if (utfelen($comp1st) + utfelen($comp2nd) < utfelen($uc)) {
+	    croak sprintf $errExpand, $uc, $comp1st, $comp2nd, "utf-ebcdic";
+	}
+    }
 }
 
 sub split_into_char {
@@ -239,7 +248,7 @@ sub split_into_char {
 
 sub _U_stringify {
     sprintf '"%s"', join '',
-	map sprintf("\\x%02x", $_), split_into_char(_pack_U(@_));
+	map sprintf("\\x%02x", $_), split_into_char(pack_U(@_));
 }
 
 foreach my $hash (\%Canon, \%Compat) {
diff --git a/cpan/Unicode-Normalize/t/partial1.t b/cpan/Unicode-Normalize/t/partial1.t
new file mode 100644
index 0000000000..d8158511a9
--- /dev/null
+++ b/cpan/Unicode-Normalize/t/partial1.t
@@ -0,0 +1,104 @@
+
+BEGIN {
+    unless ("A" eq pack('U', 0x41)) {
+	print "1..0 # Unicode::Normalize " .
+	    "cannot stringify a Unicode code point\n";
+	exit 0;
+    }
+}
+
+BEGIN {
+    if ($ENV{PERL_CORE}) {
+        chdir('t') if -d 't';
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+    }
+}
+
+BEGIN {
+    unless (5.006001 <= $]) {
+	print "1..0 # skipped: Perl 5.6.1 or later".
+		" needed for this test\n";
+	exit;
+    }
+}
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 26 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+sub arraynorm {
+    my $form   = shift;
+    my @string = @_;
+    my $result = "";
+    my $unproc = "";
+    foreach my $str (@string) {
+        $unproc .= $str;
+        $result .= $form eq 'NFC'  ? NFC_partial ($unproc) :
+		   $form eq 'NFD'  ? NFD_partial ($unproc) :
+		   $form eq 'NFKC' ? NFKC_partial($unproc) :
+		   $form eq 'NFKD' ? NFKD_partial($unproc) :
+		   undef;
+    }
+    $result .= $unproc;
+    return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX =  "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+####
+
+# must modify the source
+my $sNFD = "\x{FA19}";
+ok(NFD_partial($sNFD), "");
+ok($sNFD, "\x{795E}");
+
+my $sNFC = "\x{FA1B}";
+ok(NFC_partial($sNFC), "");
+ok($sNFC, "\x{798F}");
+
+my $sNFKD = "\x{FA1E}";
+ok(NFKD_partial($sNFKD), "");
+ok($sNFKD, "\x{7FBD}");
+
+my $sNFKC = "\x{FA26}";
+ok(NFKC_partial($sNFKC), "");
+ok($sNFKC, "\x{90FD}");
diff --git a/cpan/Unicode-Normalize/t/partial2.t b/cpan/Unicode-Normalize/t/partial2.t
new file mode 100644
index 0000000000..4dcdb79289
--- /dev/null
+++ b/cpan/Unicode-Normalize/t/partial2.t
@@ -0,0 +1,100 @@
+
+BEGIN {
+    unless ("A" eq pack('U', 0x41)) {
+	print "1..0 # Unicode::Normalize " .
+	    "cannot stringify a Unicode code point\n";
+	exit 0;
+    }
+}
+
+BEGIN {
+    if ($ENV{PERL_CORE}) {
+        chdir('t') if -d 't';
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+    }
+}
+
+BEGIN {
+    unless (5.006001 <= $]) {
+	print "1..0 # skipped: Perl 5.6.1 or later".
+		" needed for this test\n";
+	exit;
+    }
+}
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 26 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+sub arraynorm {
+    my $form   = shift;
+    my @string = @_;
+    my $result = "";
+    my $unproc = "";
+    foreach my $str (@string) {
+        $unproc .= $str;
+        $result .= normalize_partial($form, $unproc);
+    }
+    $result .= $unproc;
+    return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX =  "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+####
+
+# must modify the source
+my $sNFD = "\x{FA19}";
+ok(normalize_partial('NFD', $sNFD), "");
+ok($sNFD, "\x{795E}");
+
+my $sNFC = "\x{FA1B}";
+ok(normalize_partial('NFC', $sNFC), "");
+ok($sNFC, "\x{798F}");
+
+my $sNFKD = "\x{FA1E}";
+ok(normalize_partial('NFKD', $sNFKD), "");
+ok($sNFKD, "\x{7FBD}");
+
+my $sNFKC = "\x{FA26}";
+ok(normalize_partial('NFKC', $sNFKC), "");
+ok($sNFKC, "\x{90FD}");
diff --git a/cpan/Unicode-Normalize/t/split.t b/cpan/Unicode-Normalize/t/split.t
index 6bf7ff66b1..a8275bff88 100644
--- a/cpan/Unicode-Normalize/t/split.t
+++ b/cpan/Unicode-Normalize/t/split.t
@@ -27,7 +27,7 @@ BEGIN {
 use Test;
 use strict;
 use warnings;
-BEGIN { plan tests => 14 };
+BEGIN { plan tests => 34 };
 use Unicode::Normalize qw(:all);
 ok(1); # If we made it this far, we're ok.
 
@@ -40,14 +40,6 @@ our $proc;    # before the last starter
 our $unproc;  # the last starter and after
 # If string has no starter, entire string is set to $unproc.
 
-# When you have $normalized string and $unnormalized string following,
-# a simple concatenation
-#   C<$concat = $normalized . normalize($form, $unnormalized)>
-# is wrong. Instead of it, like this:
-#
-#       ($processed, $unprocessed) = splitOnLastStarter($normalized);
-#       $concat = $processed . normalize($form, $unprocessed.$unnormalized);
-
 ($proc, $unproc) = splitOnLastStarter("");
 ok($proc,   "");
 ok($unproc, "");
@@ -79,3 +71,61 @@ ok(NFC($ka_grave.$dakuten) eq $ga_grave);
 ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
 ok($concat eq $ga_grave);
 
+##############
+
+sub arraynorm {
+    my $form   = shift;
+    my @string = @_;
+    my $result = "";
+    my $unproc = "";
+    foreach my $str (@string) {
+        $unproc .= $str;
+        my $n = normalize($form, $unproc);
+        my($p, $u) = splitOnLastStarter($n);
+        $result .= $p;
+        $unproc  = $u;
+    }
+    $result .= $unproc;
+    return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX =  "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+##############
+
+# don't modify the source
+
+my $source = "ABC";
+($proc, $unproc) = splitOnLastStarter($source);
+ok($proc,   "AB");
+ok($unproc, "C");
+ok($source, "ABC");
diff --git a/cpan/Unicode-Normalize/t/test.t b/cpan/Unicode-Normalize/t/test.t
index e48e16f1b9..2dea2a1402 100644
--- a/cpan/Unicode-Normalize/t/test.t
+++ b/cpan/Unicode-Normalize/t/test.t
@@ -19,7 +19,7 @@ BEGIN {
 use Test;
 use strict;
 use warnings;
-BEGIN { plan tests => 58 };
+BEGIN { plan tests => 70 };
 use Unicode::Normalize;
 ok(1); # If we made it this far, we're ok.
 
@@ -123,3 +123,19 @@ my $str21 = _pack_U(0xE0, 0xAC00);
 my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161);
 ok(NFD $str21 eq $str22);
 
+## Bug #53197: NFKC("\x{2000}") produces...
+
+ok(NFKC("\x{2002}") eq ' ');
+ok(NFKD("\x{2002}") eq ' ');
+ok(NFKC("\x{2000}") eq ' ');
+ok(NFKD("\x{2000}") eq ' ');
+
+ok(NFKC("\x{210C}") eq 'H');
+ok(NFKD("\x{210C}") eq 'H');
+ok(NFKC("\x{210D}") eq 'H');
+ok(NFKD("\x{210D}") eq 'H');
+
+ok(NFC("\x{F907}") eq "\x{9F9C}");
+ok(NFD("\x{F907}") eq "\x{9F9C}");
+ok(NFKC("\x{F907}") eq "\x{9F9C}");
+ok(NFKD("\x{F907}") eq "\x{9F9C}");
author	Chris 'BinGOs' Williams <chris@bingosnet.co.uk>	2010-06-27 19:51:20 +0100
committer	Chris 'BinGOs' Williams <chris@bingosnet.co.uk>	2010-06-27 19:51:20 +0100
commit	a96160d631b54897af2f2540335c2ca854dcea68 (patch)
tree	2cf0106edd8be54b706dbc2ba0df8a0da0cd7129
parent	6ad8f254c95c6d4523948ded91d651dcc490dee5 (diff)
download	perl-a96160d631b54897af2f2540335c2ca854dcea68.tar.gz