Update Unicode-Collate to CPAN version 0.70 and enable XS version

[DELTA] 0.70 Sun Jan 16 20:31:07 2011 - Now U::C::Locale->new will use the compiled DUCET via XS. 0.69 Sat Jan 15 19:41:11 2011 - clarified about XSUB. revised INSTALL in README. - xs: flag passed to utf8n_to_uvuni(). - doc and comments: [perl #81876] Fix typos by Peter J. Acklam. A pure-perl version of this module was previously installed, this commit enables the XS version of the module instead.
author: Chris 'BinGOs' Williams <chris@bingosnet.co.uk> 2011-01-16 23:23:03 +0000
committer: Chris 'BinGOs' Williams <chris@bingosnet.co.uk> 2011-01-17 00:22:13 +0000
commit: 211cc5012284f4bd900fcaa630adbcac69ca6112 (patch)
tree: 7f07e1acee7dd8b86e8816730d8a975a4946d477 /cpan
parent: a62b1201c068dc7b099bcb7182e188c4d2fbf34c (diff)
download: perl-211cc5012284f4bd900fcaa630adbcac69ca6112.tar.gz
9 files changed, 1001 insertions, 330 deletions
diff --git a/cpan/Unicode-Collate/.gitignore b/cpan/Unicode-Collate/.gitignore
new file mode 100644
index 0000000000..424c745c12
--- /dev/null
+++ b/cpan/Unicode-Collate/.gitignore
@@ -0,0 +1 @@
+*.h
diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes
index ca9be54809..c7bba12874 100644
--- a/cpan/Unicode-Collate/Changes
+++ b/cpan/Unicode-Collate/Changes
@@ -1,5 +1,13 @@
 Revision history for Perl module Unicode::Collate.
 
+0.70  Sun Jan 16 20:31:07 2011
+    - Now U::C::Locale->new will use the compiled DUCET via XS.
+
+0.69  Sat Jan 15 19:41:11 2011
+    - clarified about XSUB. revised INSTALL in README.
+    - xs: flag passed to utf8n_to_uvuni().
+    - doc and comments: [perl #81876] Fix typos by Peter J. Acklam.
+
 0.68  Tue Nov 23 20:17:22 2010
     - doc: clarified about (backwards => [ ]) and (backwards => undef).
     - separated t/backwds.t from t/test.t.
@@ -24,7 +32,7 @@ Revision history for Perl module Unicode::Collate.
     - 12 compat. ideographs (e.g. U+FA0E) are treated as unified ideographs.
      (though DUCET also does it, now Unicode::Collate does it without DUCET.)
     - added t/compatui.t.
-    ! Ideographs Ext.B (U+20000..U+2A6D6) can be overrided with UCA_Version 8.
+    ! Ideographs Ext.B (U+20000..U+2A6D6) can be overridden with UCA_Version 8.
       This is a long-standing behavior from Unicode::Collate 0.11 to 0.63.
       A wrong fix at 0.64 should be abandoned.
 
@@ -121,6 +129,8 @@ Revision history for Perl module Unicode::Collate.
     - U+9FC4..U+9FCB and U+2A700..U+2B734 are new CJK unified ideographs.
     - Many hangul jamo are assigned (affecting hangul_terminator).
 
+    ! Now XSUB will be built by default. (XSUB needs a C compiler.)
+      To build pure perl, run disableXS before Makefile.PL.
     ! DUCET will be compiled when XS is used. Explicit saying
       <table => 'allkeys.txt'> (or using another table) will prevent
       this module from using the compiled DUCET.
@@ -174,11 +184,11 @@ Revision history for Perl module Unicode::Collate.
       (Perl 5.7.3 or before)). If perl 5.6.X is used, XSUB may help it
       in place of broken CORE::unpack('U*') in older perl.
     - added illegal.t and illegalp.t in t.
-    - added XSUB (EXPERIMENTAL!) where some functions are implemented
-      in XSUB. Pure Perl is also supported.
+    - added XSUB where some functions are implemented in XSUB.
+      Pure Perl is also supported.
 
 0.30  Mon Oct 13 21:26:37 2003
-    - fix: Completely ignorable in table should be able to be overrided
+    - fix: Completely ignorable in table should be able to be overridden
       by non-ignorable in entry.
     - fix: Maximum length for contraction must not be shortened
       by a shorter contraction following in table and/or entry.
diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm
index b337b6f24b..05822b2c11 100644
--- a/cpan/Unicode-Collate/Collate.pm
+++ b/cpan/Unicode-Collate/Collate.pm
@@ -14,9 +14,13 @@ use File::Spec;
 
 no warnings 'utf8';
 
-our $VERSION = '0.6801';
+our $VERSION = '0.70';
 our $PACKAGE = __PACKAGE__;
 
+require DynaLoader;
+our @ISA = qw(DynaLoader);
+bootstrap Unicode::Collate $VERSION;
+
 my @Path = qw(Unicode Collate);
 my $KeyFile = "allkeys.txt";
 
@@ -71,49 +75,8 @@ use constant NON_VAR => 0; # Non-Variable character
 use constant VAR     => 1; # Variable character
 
 # specific code points
-use constant Hangul_SBase  => 0xAC00;
 use constant Hangul_SIni   => 0xAC00;
 use constant Hangul_SFin   => 0xD7A3;
-use constant Hangul_NCount => 588;
-use constant Hangul_TCount => 28;
-use constant Hangul_LBase  => 0x1100;
-use constant Hangul_LIni   => 0x1100;
-use constant Hangul_LFin   => 0x1159;
-use constant Hangul_LFill  => 0x115F;
-use constant Hangul_LEnd   => 0x115F; # Unicode 5.2
-use constant Hangul_VBase  => 0x1161;
-use constant Hangul_VIni   => 0x1160; # from Vowel Filler
-use constant Hangul_VFin   => 0x11A2;
-use constant Hangul_VEnd   => 0x11A7; # Unicode 5.2
-use constant Hangul_TBase  => 0x11A7; # from "no-final" codepoint
-use constant Hangul_TIni   => 0x11A8;
-use constant Hangul_TFin   => 0x11F9;
-use constant Hangul_TEnd   => 0x11FF; # Unicode 5.2
-use constant HangulL2Ini   => 0xA960; # Unicode 5.2
-use constant HangulL2Fin   => 0xA97C; # Unicode 5.2
-use constant HangulV2Ini   => 0xD7B0; # Unicode 5.2
-use constant HangulV2Fin   => 0xD7C6; # Unicode 5.2
-use constant HangulT2Ini   => 0xD7CB; # Unicode 5.2
-use constant HangulT2Fin   => 0xD7FB; # Unicode 5.2
-
-use constant CJK_UidIni   =>  0x4E00;
-use constant CJK_UidFin   =>  0x9FA5;
-use constant CJK_UidF41   =>  0x9FBB;
-use constant CJK_UidF51   =>  0x9FC3;
-use constant CJK_UidF52   =>  0x9FCB;
-use constant CJK_ExtAIni  =>  0x3400; # Unicode 3.0
-use constant CJK_ExtAFin  =>  0x4DB5; # Unicode 3.0
-use constant CJK_ExtBIni  => 0x20000; # Unicode 3.1
-use constant CJK_ExtBFin  => 0x2A6D6; # Unicode 3.1
-use constant CJK_ExtCIni  => 0x2A700; # Unicode 5.2
-use constant CJK_ExtCFin  => 0x2B734; # Unicode 5.2
-use constant CJK_ExtDIni  => 0x2B740; # Unicode 6.0
-use constant CJK_ExtDFin  => 0x2B81D; # Unicode 6.0
-
-my %CompatUI = map +($_ => 1), (
-    0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14, 0xFA1F,
-    0xFA21, 0xFA23, 0xFA24, 0xFA27, 0xFA28, 0xFA29,
-);
 
 # Logical_Order_Exception in PropList.txt
 my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
@@ -128,10 +91,6 @@ sub pack_U {
     return pack('U*', @_);
 }
 
-sub unpack_U {
-    return unpack('U*', shift(@_).pack('U*'));
-}
-
 ######
 
 my (%VariableOK);
@@ -152,6 +111,7 @@ our @ChangeNG = qw/
     versionTable alternateTable backwardsTable forwardsTable rearrangeTable
     derivCode normCode rearrangeHash backwardsFlag
     suppress suppressHash
+    __useXS
   /;
 # The hash key 'ignored' is deleted at v 0.21.
 # The hash key 'isShift' is deleted at v 0.23.
@@ -285,6 +245,12 @@ sub new
     my $class = shift;
     my $self = bless { @_ }, $class;
 
+    if (! exists $self->{table} &&
+	!defined $self->{undefName} && !defined $self->{ignoreName} &&
+	!defined $self->{undefChar} && !defined $self->{ignoreChar}) {
+	$self->{__useXS} = \&_fetch_simple;
+    } # XS only
+
     # keys of $self->{suppressHash} are $self->{suppress}.
     if ($self->{suppress} && @{ $self->{suppress} }) {
 	@{ $self->{suppressHash} }{ @{ $self->{suppress} } } = ();
@@ -347,6 +313,20 @@ sub parseAtmark {
 sub read_table {
     my $self = shift;
 
+    if ($self->{__useXS}) {
+	my @rest = _fetch_rest(); # complex matter need to parse
+	for my $line (@rest) {
+	    next if $line =~ /^\s*#/;
+
+	    if ($line =~ s/^\s*\@//) {
+		$self->parseAtmark($line);
+	    } else {
+		$self->parseEntry($line);
+	    }
+	}
+	return;
+    }
+
     my($f, $fh);
     foreach my $d (@INC) {
 	$f = File::Spec->catfile($d, @Path, $self->{table});
@@ -445,50 +425,12 @@ sub parseEntry
 }
 
 
-##
-## VCE = _varCE(variable, VCE)
-##
-sub _varCE
-{
-    my $vbl = shift;
-    my $vce = shift;
-    if ($vbl eq 'non-ignorable') {
-	return $vce;
-    }
-    my ($var, @wt) = unpack VCE_TEMPLATE, $vce;
-
-    if ($var) {
-	return pack(VCE_TEMPLATE, $var, 0, 0, 0,
-		$vbl eq 'blanked' ? $wt[3] : $wt[0]);
-    }
-    elsif ($vbl eq 'blanked') {
-	return $vce;
-    }
-    else {
-	return pack(VCE_TEMPLATE, $var, @wt[0..2],
-	    $vbl eq 'shifted' && $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0);
-    }
-}
-
 sub viewSortKey
 {
     my $self = shift;
     $self->visualizeSortKey($self->getSortKey(@_));
 }
 
-sub visualizeSortKey
-{
-    my $self = shift;
-    my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift);
-
-    if ($self->{UCA_Version} <= 8) {
-	$view =~ s/ ?0000 ?/|/g;
-    } else {
-	$view =~ s/\b0000\b/|/g;
-    }
-    return "[$view]";
-}
-
 
 ##
 ## arrayref of JCPS   = splitEnt(string to be collated)
@@ -506,6 +448,7 @@ sub splitEnt
     my $reH  = $self->{rearrangeHash};
     my $vers = $self->{UCA_Version};
     my $ver9 = $vers >= 9 && $vers <= 11;
+    my $uXS  = $self->{__useXS};
 
     my ($str, @buf);
 
@@ -544,6 +487,9 @@ sub splitEnt
 	} elsif ($ver9) {
 	    $src[$i] = undef if $map->{ $src[$i] } &&
 			     @{ $map->{ $src[$i] } } == 0;
+	    if ($uXS) {
+		$src[$i] = undef if _ignorable_simple($src[$i]);
+	    }
 	}
     }
 
@@ -623,7 +569,8 @@ sub splitEnt
 	}
 
 	# skip completely ignorable
-	if ($map->{$jcps} && @{ $map->{$jcps} } == 0) {
+	if ($uXS && $jcps =~ /^[0-9]+\z/ && _ignorable_simple($jcps) ||
+	    $map->{$jcps} && @{ $map->{$jcps} } == 0) {
 	    if ($wLen && @buf) {
 		$buf[-1][2] = $i + 1;
 	    }
@@ -662,10 +609,13 @@ sub getWt
     my $vbl  = $self->{variable};
     my $map  = $self->{mapping};
     my $der  = $self->{derivCode};
+    my $uXS  = $self->{__useXS};
 
     return if !defined $u;
     return map(_varCE($vbl, $_), @{ $map->{$u} })
 	if $map->{$u};
+    return map(_varCE($vbl, $_), _fetch_simple($u))
+	if $uXS && _exists_simple($u);
 
     # JCPS must not be a contraction, then it's a code point.
     if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
@@ -692,7 +642,7 @@ sub getWt
 			$map->{$contract} and @decH = ($contract, $decH[2]);
 		    }
 		    # even if V's ignorable, LT contraction is not supported.
-		    # If such a situatution were required, NFD should be used.
+		    # If such a situation were required, NFD should be used.
 		}
 		if (@decH == 3 && $max->{$decH[1]}) {
 		    my $contract = join(CODE_SEP, @decH[1,2]);
@@ -701,7 +651,9 @@ sub getWt
 	    }
 
 	    @hangulCE = map({
-		    $map->{$_} ? @{ $map->{$_} } : $der->($_);
+		    $map->{$_} ? @{ $map->{$_} } :
+		    $uXS && _exists_simple($_) ? _fetch_simple($_) :
+		    $der->($_);
 		} @decH);
 	}
 	return map _varCE($vbl, $_), @hangulCE;
@@ -726,12 +678,10 @@ sub getWt
 sub getSortKey
 {
     my $self = shift;
-    my $lev  = $self->{level};
     my $rEnt = $self->splitEnt(shift); # get an arrayref of JCPS
     my $vers = $self->{UCA_Version};
     my $vbl  = $self->{variable};
     my $term = $self->{hangul_terminator};
-    my $v2i  = $vers >= 9 && $vbl ne 'non-ignorable';
 
     my @buf; # weight arrays
     if ($term) {
@@ -756,53 +706,7 @@ sub getSortKey
 	}
     }
 
-    # make sort key
-    my @ret = ([],[],[],[]);
-    my $last_is_variable;
-
-    foreach my $vwt (@buf) {
-	my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);
-
-	# "Ignorable (L1, L2) after Variable" since track. v. 9
-	if ($v2i) {
-	    if ($var) {
-		$last_is_variable = TRUE;
-	    } elsif (!$wt[0]) { # ignorable
-		next if $last_is_variable;
-	    } else {
-		$last_is_variable = FALSE;
-	    }
-	}
-	foreach my $v (0..$lev-1) {
-	    0 < $wt[$v] and push @{ $ret[$v] }, $wt[$v];
-	}
-    }
-
-    # modification of tertiary weights
-    if ($self->{upper_before_lower}) {
-	foreach my $w (@{ $ret[2] }) {
-	    if    (0x8 <= $w && $w <= 0xC) { $w -= 6 } # lower
-	    elsif (0x2 <= $w && $w <= 0x6) { $w += 6 } # upper
-	    elsif ($w == 0x1C)             { $w += 1 } # square upper
-	    elsif ($w == 0x1D)             { $w -= 1 } # square lower
-	}
-    }
-    if ($self->{katakana_before_hiragana}) {
-	foreach my $w (@{ $ret[2] }) {
-	    if    (0x0F <= $w && $w <= 0x13) { $w -= 2 } # katakana
-	    elsif (0x0D <= $w && $w <= 0x0E) { $w += 5 } # hiragana
-	}
-    }
-
-    if ($self->{backwardsFlag}) {
-	for (my $v = MinLevel; $v <= MaxLevel; $v++) {
-	    if ($self->{backwardsFlag} & (1 << $v)) {
-		@{ $ret[$v-1] } = reverse @{ $ret[$v-1] };
-	    }
-	}
-    }
-
-    join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret;
+    return $self->mk_SortKey(\@buf);
 }
 
 
@@ -829,174 +733,6 @@ sub sort {
 }
 
 
-sub _derivCE_22 {
-    my $u = shift;
-    my $base = (CJK_UidIni  <= $u && $u <= CJK_UidF52 || $CompatUI{$u})
-		? 0xFB40 : # CJK
-	       (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-		CJK_ExtBIni <= $u && $u <= CJK_ExtBFin ||
-		CJK_ExtCIni <= $u && $u <= CJK_ExtCFin ||
-		CJK_ExtDIni <= $u && $u <= CJK_ExtDFin)
-		? 0xFB80  # CJK ext.
-		: 0xFBC0; # others
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	   pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_20 {
-    my $u = shift;
-    my $base = (CJK_UidIni  <= $u && $u <= CJK_UidF52 || $CompatUI{$u})
-		? 0xFB40 : # CJK
-	       (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-		CJK_ExtBIni <= $u && $u <= CJK_ExtBFin ||
-		CJK_ExtCIni <= $u && $u <= CJK_ExtCFin)
-		? 0xFB80  # CJK ext.
-		: 0xFBC0; # others
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	   pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_18 {
-    my $u = shift;
-    my $base = (CJK_UidIni  <= $u && $u <= CJK_UidF51 || $CompatUI{$u})
-		? 0xFB40 : # CJK
-	       (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-		CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-		? 0xFB80  # CJK ext.
-		: 0xFBC0; # others
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	   pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_14 {
-    my $u = shift;
-    my $base = (CJK_UidIni  <= $u && $u <= CJK_UidF41 || $CompatUI{$u})
-		? 0xFB40 : # CJK
-	       (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-		CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-		? 0xFB80  # CJK ext.
-		: 0xFBC0; # others
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	   pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_9 {
-    my $u = shift;
-    my $base = (CJK_UidIni  <= $u && $u <= CJK_UidFin || $CompatUI{$u})
-		? 0xFB40 : # CJK
-	       (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
-		CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-		? 0xFB80  # CJK ext.
-		: 0xFBC0; # others
-    my $aaaa = $base + ($u >> 15);
-    my $bbbb = ($u & 0x7FFF) | 0x8000;
-    return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
-	   pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
-}
-
-sub _derivCE_8 {
-    my $code = shift;
-    my $aaaa =  0xFF80 + ($code >> 15);
-    my $bbbb = ($code & 0x7FFF) | 0x8000;
-    return pack(VCE_TEMPLATE, NON_VAR, $aaaa, 2, 1, $code),
-	   pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $code);
-}
-
-sub _uideoCE_8 {
-    my $u = shift;
-    return pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u);
-}
-
-sub _isUIdeo {
-    # $uca_vers = 0 for _uideoCE_8()
-    my ($u, $uca_vers) = @_;
-    return((CJK_UidIni <= $u && (
-	    $uca_vers >= 20 ? ($u <= CJK_UidF52) :
-	    $uca_vers >= 18 ? ($u <= CJK_UidF51) :
-	    $uca_vers >= 14 ? ($u <= CJK_UidF41) :
-			      ($u <= CJK_UidFin))) || $CompatUI{$u}
-		||
-	(CJK_ExtAIni <= $u && $u <= CJK_ExtAFin)
-		||
-	($uca_vers >=  8 && CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
-		||
-	($uca_vers >= 20 && CJK_ExtCIni <= $u && $u <= CJK_ExtCFin)
-		||
-	($uca_vers >= 22 && CJK_ExtDIni <= $u && $u <= CJK_ExtDFin)
-    );
-}
-
-
-##
-## "hhhh hhhh hhhh" to (dddd, dddd, dddd)
-##
-sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
-
-#
-# $code *must* be in Hangul syllable.
-# Check it before you enter here.
-#
-sub _decompHangul {
-    my $code = shift;
-    my $si = $code - Hangul_SBase;
-    my $li = int( $si / Hangul_NCount);
-    my $vi = int(($si % Hangul_NCount) / Hangul_TCount);
-    my $ti =      $si % Hangul_TCount;
-    return (
-	Hangul_LBase + $li,
-	Hangul_VBase + $vi,
-	$ti ? (Hangul_TBase + $ti) : (),
-    );
-}
-
-sub _isIllegal {
-    my $code = shift;
-    return((! defined $code)                    # removed
-	|| ($code < 0 || 0x10FFFF < $code)      # out of range
-    );
-}
-
-sub _isNonchar {
-    my $code = shift;
-    return((($code & 0xFFFE) == 0xFFFE)         # ??FFF[EF] (cf. utf8.c)
-	|| (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates
-	|| (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters
-    );
-}
-
-# Hangul Syllable Type
-sub getHST {
-    my $u = shift;
-    my $vers = shift || 0;
-
-    if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
-	return +($u - Hangul_SBase) % Hangul_TCount ? "LVT" : "LV";
-    }
-
-    if ($vers < 20) {
-	return Hangul_LIni <= $u && $u <= Hangul_LFin ||
-			            $u == Hangul_LFill ? "L" :
-	       Hangul_VIni <= $u && $u <= Hangul_VFin  ? "V" :
-	       Hangul_TIni <= $u && $u <= Hangul_TFin  ? "T" : "";
-    } else {
-	return Hangul_LIni <= $u && $u <= Hangul_LEnd ||
-	       HangulL2Ini <= $u && $u <= HangulL2Fin  ? "L" :
-	       Hangul_VIni <= $u && $u <= Hangul_VEnd ||
-	       HangulV2Ini <= $u && $u <= HangulV2Fin  ? "V" :
-	       Hangul_TIni <= $u && $u <= Hangul_TEnd ||
-	       HangulT2Ini <= $u && $u <= HangulT2Fin  ? "T" : "";
-    }
-}
-
-
 ##
 ## bool _nonIgnorAtLevel(arrayref weights, int level)
 ##
@@ -1023,7 +759,7 @@ sub _eqArray($$$)
     my $lev = shift;
 
     for my $g (0..@$substr-1){
-	# Do the $g'th graphemes have the same number of AV weigths?
+	# Do the $g'th graphemes have the same number of AV weights?
 	return if @{ $source->[$g] } != @{ $substr->[$g] };
 
 	for my $w (0..@{ $substr->[$g] }-1) {
@@ -1321,7 +1057,7 @@ The following tracking versions are supported.  The default is 20.
 
 Note: Recent UTS #10 renames "Tracking Version" to "Revision."
 
-* Noncharacters (e.g. U+FFFF) are not ignored, and can be overrided
+* Noncharacters (e.g. U+FFFF) are not ignored, and can be overridden
 since C<UCA_Version> 22.
 
 * Fully ignorable characters were ignored, and would not interrupt
@@ -1359,7 +1095,7 @@ forwards at all the levels.
 
 If the same character (or a sequence of characters) exists
 in the collation element table through C<table>,
-mapping to collation elements is overrided.
+mapping to collation elements is overridden.
 If it does not exist, the mapping is defined additionally.
 
     entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)
@@ -1536,7 +1272,7 @@ those in the CJK Unified Ideographs Extension A etc.
     Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or greater.
 
 Through C<overrideCJK>, ordering of CJK unified ideographs (including
-extensions) can be overrided.
+extensions) can be overridden.
 
 ex. CJK unified ideographs in the JIS code point order.
 
@@ -1579,7 +1315,7 @@ in C<table> or C<entry> is still valid.
 B<Note:> In addition to them, 12 CJK compatibility ideographs (C<U+FA0E>,
 C<U+FA0F>, C<U+FA11>, C<U+FA13>, C<U+FA14>, C<U+FA1F>, C<U+FA21>, C<U+FA23>,
 C<U+FA24>, C<U+FA27>, C<U+FA28>, C<U+FA29>) are also treated as CJK unified
-ideographs. But they can't be overrided via C<overrideCJK> when you use
+ideographs. But they can't be overridden via C<overrideCJK> when you use
 DUCET, as the table includes weights for them. C<table> or C<entry> has
 priority over C<overrideCJK>.
 
@@ -1589,7 +1325,7 @@ priority over C<overrideCJK>.
 
 By default, Hangul syllables are decomposed into Hangul Jamo,
 even if C<(normalization =E<gt> undef)>.
-But the mapping of Hangul syllables may be overrided.
+But the mapping of Hangul syllables may be overridden.
 
 This parameter works like C<overrideCJK>, so see there for examples.
 
@@ -1750,7 +1486,7 @@ this parameter doesn't work validly.
 
 This key allows to variable weighting for variable collation elements,
 which are marked with an ASTERISK in the table
-(NOTE: Many punction marks and symbols are variable in F<allkeys.txt>).
+(NOTE: Many punctuation marks and symbols are variable in F<allkeys.txt>).
 
    variable => 'blanked', 'non-ignorable', 'shifted', or 'shift-trimmed'.
 
@@ -2058,7 +1794,7 @@ B<Unicode::Normalize is required to try The Conformance Test.>
 =head1 AUTHOR, COPYRIGHT AND LICENSE
 
 The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
-<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2010,
+<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2011,
 SADAHIRO Tomoyuki. Japan. All rights reserved.
 
 This module is free software; you can redistribute it and/or
diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs
new file mode 100644
index 0000000000..d6004bdf25
--- /dev/null
+++ b/cpan/Unicode-Collate/Collate.xs
@@ -0,0 +1,691 @@
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+/* This file is prepared by mkheader */
+#include "ucatbl.h"
+
+/* Perl 5.6.1 ? */
+#ifndef utf8n_to_uvuni
+#define utf8n_to_uvuni  utf8_to_uv
+#endif /* utf8n_to_uvuni */
+
+/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
+#ifndef UTF8_ALLOW_BOM
+#define UTF8_ALLOW_BOM  (0)
+#endif /* UTF8_ALLOW_BOM */
+
+#ifndef UTF8_ALLOW_SURROGATE
+#define UTF8_ALLOW_SURROGATE  (0)
+#endif /* UTF8_ALLOW_SURROGATE */
+
+#ifndef UTF8_ALLOW_FE_FF
+#define UTF8_ALLOW_FE_FF  (0)
+#endif /* UTF8_ALLOW_FE_FF */
+
+#ifndef UTF8_ALLOW_FFFF
+#define UTF8_ALLOW_FFFF  (0)
+#endif /* UTF8_ALLOW_FFFF */
+
+#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
+
+/* if utf8n_to_uvuni() sets retlen to 0 (?) */
+#define ErrRetlenIsZero "panic (Unicode::Collate): zero-length character"
+
+/* At present, char > 0x10ffff are unaffected without complaint, right? */
+#define VALID_UTF_MAX    (0x10ffff)
+#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
+
+static const UV max_div_16 = UV_MAX / 16;
+
+/* Supported Levels */
+#define MinLevel	(1)
+#define MaxLevel	(4)
+
+/* Shifted weight at 4th level */
+#define Shift4Wt	(0xFFFF)
+
+#define VCE_Length	(9)
+
+#define Hangul_SBase  (0xAC00)
+#define Hangul_SIni   (0xAC00)
+#define Hangul_SFin   (0xD7A3)
+#define Hangul_NCount (588)
+#define Hangul_TCount (28)
+#define Hangul_LBase  (0x1100)
+#define Hangul_LIni   (0x1100)
+#define Hangul_LFin   (0x1159)
+#define Hangul_LFill  (0x115F)
+#define Hangul_LEnd   (0x115F) /* Unicode 5.2 */
+#define Hangul_VBase  (0x1161)
+#define Hangul_VIni   (0x1160) /* from Vowel Filler */
+#define Hangul_VFin   (0x11A2)
+#define Hangul_VEnd   (0x11A7) /* Unicode 5.2 */
+#define Hangul_TBase  (0x11A7) /* from "no-final" codepoint */
+#define Hangul_TIni   (0x11A8)
+#define Hangul_TFin   (0x11F9)
+#define Hangul_TEnd   (0x11FF) /* Unicode 5.2 */
+#define HangulL2Ini   (0xA960) /* Unicode 5.2 */
+#define HangulL2Fin   (0xA97C) /* Unicode 5.2 */
+#define HangulV2Ini   (0xD7B0) /* Unicode 5.2 */
+#define HangulV2Fin   (0xD7C6) /* Unicode 5.2 */
+#define HangulT2Ini   (0xD7CB) /* Unicode 5.2 */
+#define HangulT2Fin   (0xD7FB) /* Unicode 5.2 */
+
+#define CJK_UidIni    (0x4E00)
+#define CJK_UidFin    (0x9FA5)
+#define CJK_UidF41    (0x9FBB)
+#define CJK_UidF51    (0x9FC3)
+#define CJK_UidF52    (0x9FCB)
+#define CJK_ExtAIni   (0x3400) /* Unicode 3.0 */
+#define CJK_ExtAFin   (0x4DB5) /* Unicode 3.0 */
+#define CJK_ExtBIni  (0x20000) /* Unicode 3.1 */
+#define CJK_ExtBFin  (0x2A6D6) /* Unicode 3.1 */
+#define CJK_ExtCIni  (0x2A700) /* Unicode 5.2 */
+#define CJK_ExtCFin  (0x2B734) /* Unicode 5.2 */
+#define CJK_ExtDIni  (0x2B740) /* Unicode 6.0 */
+#define CJK_ExtDFin  (0x2B81D) /* Unicode 6.0 */
+
+static STDCHAR UnifiedCompat[] = {
+      1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
+}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
+
+#define codeRange(bcode, ecode)	((bcode) <= code && code <= (ecode))
+
+MODULE = Unicode::Collate	PACKAGE = Unicode::Collate
+
+PROTOTYPES: DISABLE
+
+void
+_fetch_rest ()
+  PREINIT:
+    char ** rest;
+  PPCODE:
+    for (rest = UCA_rest; *rest; ++rest) {
+	XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
+    }
+
+
+void
+_fetch_simple (uv)
+    UV uv
+  PREINIT:
+    U8 ***plane, **row;
+    char* result = NULL;
+  PPCODE:
+    if (!OVER_UTF_MAX(uv)){
+	plane = (U8***)UCA_simple[uv >> 16];
+	if (plane) {
+	    row = plane[(uv >> 8) & 0xff];
+	    result = row ? row[uv & 0xff] : NULL;
+	}
+    }
+    if (result) {
+	int i;
+	int num = (int)*result;
+	++result;
+	for (i = 0; i < num; ++i) {
+	    XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
+	    result += VCE_Length;
+	}
+    } else {
+	XPUSHs(sv_2mortal(newSViv(0)));
+    }
+
+SV*
+_ignorable_simple (uv)
+    UV uv
+  ALIAS:
+    _exists_simple = 1
+  PREINIT:
+    U8 ***plane, **row;
+    int num = -1;
+    char* result = NULL;
+  CODE:
+    if (!OVER_UTF_MAX(uv)){
+	plane = (U8***)UCA_simple[uv >> 16];
+	if (plane) {
+	    row = plane[(uv >> 8) & 0xff];
+	    result = row ? row[uv & 0xff] : NULL;
+	}
+	if (result)
+	    num = (int)*result; /* assuming 0 <= num < 128 */
+    }
+
+    if (ix)
+	RETVAL = boolSV(num >0);
+    else
+	RETVAL = boolSV(num==0);
+  OUTPUT:
+    RETVAL
+
+
+void
+_getHexArray (src)
+    SV* src
+  PREINIT:
+    char *s, *e;
+    STRLEN byte;
+    UV value;
+    bool overflowed = FALSE;
+    const char *hexdigit;
+  PPCODE:
+    s = SvPV(src,byte);
+    for (e = s + byte; s < e;) {
+	hexdigit = strchr((char *) PL_hexdigit, *s++);
+        if (! hexdigit)
+	    continue;
+	value = (hexdigit - PL_hexdigit) & 0xF;
+	while (*s) {
+	    hexdigit = strchr((char *) PL_hexdigit, *s++);
+	    if (! hexdigit)
+		break;
+	    if (overflowed)
+		continue;
+	    if (value > max_div_16) {
+		overflowed = TRUE;
+		continue;
+	    }
+	    value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
+	}
+	XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
+    }
+
+
+SV*
+_isIllegal (sv)
+    SV* sv
+  PREINIT:
+    UV uv;
+  CODE:
+    if (!sv || !SvIOK(sv))
+	XSRETURN_YES;
+    uv = SvUVX(sv);
+    RETVAL = boolSV(
+	   0x10FFFF < uv		   /* out of range */
+    );
+OUTPUT:
+    RETVAL
+
+
+SV*
+_isNonchar (sv)
+    SV* sv
+  PREINIT:
+    UV uv;
+  CODE:
+    /* should be called only if ! _isIllegal(sv). */
+    uv = SvUVX(sv);
+    RETVAL = boolSV(
+	   ((uv & 0xFFFE) == 0xFFFE)       /* ??FFF[EF] (cf. utf8.c) */
+	|| (0xD800 <= uv && uv <= 0xDFFF)  /* unpaired surrogates */
+	|| (0xFDD0 <= uv && uv <= 0xFDEF)  /* other non-characters */
+    );
+OUTPUT:
+    RETVAL
+
+
+void
+_decompHangul (code)
+    UV code
+  PREINIT:
+    UV sindex, lindex, vindex, tindex;
+  PPCODE:
+    /* code *must* be in Hangul syllable.
+     * Check it before you enter here. */
+    sindex =  code - Hangul_SBase;
+    lindex =  sindex / Hangul_NCount;
+    vindex = (sindex % Hangul_NCount) / Hangul_TCount;
+    tindex =  sindex % Hangul_TCount;
+
+    XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
+    XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
+    if (tindex)
+	XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
+
+
+SV*
+getHST (code, uca_vers = 0)
+    UV code;
+    IV uca_vers;
+  PREINIT:
+    char * hangtype;
+    STRLEN typelen;
+  CODE:
+    if (codeRange(Hangul_SIni, Hangul_SFin)) {
+	if ((code - Hangul_SBase) % Hangul_TCount) {
+	    hangtype = "LVT"; typelen = 3;
+	} else {
+	    hangtype = "LV"; typelen = 2;
+	}
+    } else if (uca_vers < 20) {
+	if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
+	    hangtype = "L"; typelen = 1;
+	} else if (codeRange(Hangul_VIni, Hangul_VFin)) {
+	    hangtype = "V"; typelen = 1;
+	} else if (codeRange(Hangul_TIni, Hangul_TFin)) {
+	    hangtype = "T"; typelen = 1;
+	} else {
+	    hangtype = ""; typelen = 0;
+	}
+    } else {
+	if        (codeRange(Hangul_LIni, Hangul_LEnd) ||
+		   codeRange(HangulL2Ini, HangulL2Fin)) {
+	    hangtype = "L"; typelen = 1;
+	} else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
+		   codeRange(HangulV2Ini, HangulV2Fin)) {
+	    hangtype = "V"; typelen = 1;
+	} else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
+		   codeRange(HangulT2Ini, HangulT2Fin)) {
+	    hangtype = "T"; typelen = 1;
+	} else {
+	    hangtype = ""; typelen = 0;
+	}
+    }
+
+    RETVAL = newSVpvn(hangtype, typelen);
+OUTPUT:
+    RETVAL
+
+
+void
+_derivCE_9 (code)
+    UV code
+  ALIAS:
+    _derivCE_14 = 1
+    _derivCE_18 = 2
+    _derivCE_20 = 3
+    _derivCE_22 = 4
+  PREINIT:
+    UV base, aaaa, bbbb;
+    U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
+    U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
+    bool basic_unified = 0;
+  PPCODE:
+    if (CJK_UidIni <= code) {
+	if (codeRange(0xFA0E, 0xFA29))
+	    basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
+	else
+	    basic_unified = (ix >= 3 ? (code <= CJK_UidF52) :
+			     ix == 2 ? (code <= CJK_UidF51) :
+			     ix == 1 ? (code <= CJK_UidF41) :
+				       (code <= CJK_UidFin));
+    }
+    base = (basic_unified)
+	    ? 0xFB40 : /* CJK */
+	   ((codeRange(CJK_ExtAIni, CJK_ExtAFin))
+		||
+	    (codeRange(CJK_ExtBIni, CJK_ExtBFin))
+		||
+	    (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
+		||
+	    (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)))
+	    ? 0xFB80   /* CJK ext. */
+	    : 0xFBC0;  /* others */
+    aaaa =  base + (code >> 15);
+    bbbb = (code & 0x7FFF) | 0x8000;
+    a[1] = (U8)(aaaa >> 8);
+    a[2] = (U8)(aaaa & 0xFF);
+    b[1] = (U8)(bbbb >> 8);
+    b[2] = (U8)(bbbb & 0xFF);
+    a[7] = b[7] = (U8)(code >> 8);
+    a[8] = b[8] = (U8)(code & 0xFF);
+    XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
+    XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
+
+
+void
+_derivCE_8 (code)
+    UV code
+  PREINIT:
+    UV aaaa, bbbb;
+    U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF";
+    U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
+  PPCODE:
+    aaaa =  0xFF80 + (code >> 15);
+    bbbb = (code & 0x7FFF) | 0x8000;
+    a[1] = (U8)(aaaa >> 8);
+    a[2] = (U8)(aaaa & 0xFF);
+    b[1] = (U8)(bbbb >> 8);
+    b[2] = (U8)(bbbb & 0xFF);
+    a[7] = b[7] = (U8)(code >> 8);
+    a[8] = b[8] = (U8)(code & 0xFF);
+    XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
+    XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
+
+
+void
+_uideoCE_8 (code)
+    UV code
+  PREINIT:
+    U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
+  PPCODE:
+    uice[1] = uice[7] = (U8)(code >> 8);
+    uice[2] = uice[8] = (U8)(code & 0xFF);
+    XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
+
+
+SV*
+_isUIdeo (code, uca_vers)
+    UV code;
+    IV uca_vers;
+    bool basic_unified = 0;
+  CODE:
+    /* uca_vers = 0 for _uideoCE_8() */
+    if (CJK_UidIni <= code) {
+	if (codeRange(0xFA0E, 0xFA29))
+	    basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
+	else
+	    basic_unified = (uca_vers >= 20 ? (code <= CJK_UidF52) :
+			     uca_vers >= 18 ? (code <= CJK_UidF51) :
+			     uca_vers >= 14 ? (code <= CJK_UidF41) :
+					      (code <= CJK_UidFin));
+    }
+    RETVAL = boolSV(
+	(basic_unified)
+		||
+	(codeRange(CJK_ExtAIni, CJK_ExtAFin))
+		||
+	(uca_vers >=  8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
+		||
+	(uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
+		||
+	(uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
+    );
+OUTPUT:
+    RETVAL
+
+
+SV*
+mk_SortKey (self, buf)
+    SV* self;
+    SV* buf;
+  PREINIT:
+    SV *dst, **svp;
+    STRLEN dlen, vlen;
+    U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
+    AV *bufAV;
+    HV *selfHV;
+    UV back_flag;
+    I32 i, buf_len;
+    IV  lv, level, uca_vers;
+    bool upper_lower, kata_hira, v2i, last_is_var;
+  CODE:
+    if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
+	selfHV = (HV*)SvRV(self);
+    else
+	croak("$self is not a HASHREF.");
+
+    svp = hv_fetch(selfHV, "level", 5, FALSE);
+    level = svp ? SvIV(*svp) : MaxLevel;
+
+    if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
+	bufAV = (AV*)SvRV(buf);
+    else
+	croak("XSUB, not an ARRAYREF.");
+
+    buf_len = av_len(bufAV);
+
+    if (buf_len < 0) { /* empty: -1 */
+	dlen = 2 * (MaxLevel - 1);
+	dst = newSV(dlen);
+	(void)SvPOK_only(dst);
+	d = SvPVX(dst);
+	while (dlen--)
+	    *d++ = '\0';
+    }
+    else {
+	for (lv = 0; lv < level; lv++) {
+	    New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
+	    s[lv] = eachlevel[lv];
+	}
+
+	svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
+	upper_lower = svp ? SvTRUE(*svp) : FALSE;
+	svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
+	kata_hira = svp ? SvTRUE(*svp) : FALSE;
+	svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
+	uca_vers = SvIV(*svp);
+	svp = hv_fetch(selfHV, "variable", 8, FALSE);
+	v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
+	    ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
+	    : FALSE;
+
+	last_is_var = FALSE;
+	for (i = 0; i <= buf_len; i++) {
+	    svp = av_fetch(bufAV, i, FALSE);
+
+	    if (svp && SvPOK(*svp))
+		v = SvPV(*svp, vlen);
+	    else
+		croak("not a vwt.");
+
+	    if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
+		continue;
+
+	    /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
+	    if (v2i) {
+		if (*v)
+		    last_is_var = TRUE;
+		else if (v[1] || v[2]) /* non zero primary weight */
+		    last_is_var = FALSE;
+		else if (last_is_var) /* zero primary weight; skipped */
+		    continue;
+	    }
+
+	    if (v[5] == 0) { /* tert wt < 256 */
+		if (upper_lower) {
+		    if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
+			v[6] -= 6;
+		    else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
+			v[6] += 6;
+		    else if (v[6] == 0x1C) /* square upper */
+			v[6]++;
+		    else if (v[6] == 0x1D) /* square lower */
+			v[6]--;
+		}
+		if (kata_hira) {
+		    if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
+			v[6] -= 2;
+		    else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
+			v[6] += 5;
+		}
+	    }
+
+	    for (lv = 0; lv < level; lv++) {
+		if (v[2 * lv + 1] || v[2 * lv + 2]) {
+		    *s[lv]++ = v[2 * lv + 1];
+		    *s[lv]++ = v[2 * lv + 2];
+		}
+	    }
+	}
+
+	dlen = 2 * (MaxLevel - 1);
+	for (lv = 0; lv < level; lv++)
+	    dlen += s[lv] - eachlevel[lv];
+
+	dst = newSV(dlen);
+	(void)SvPOK_only(dst);
+	d = SvPVX(dst);
+
+	svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
+	back_flag = svp ? SvUV(*svp) : (UV)0;
+
+	for (lv = 0; lv < level; lv++) {
+	    if (back_flag & (1 << (lv + 1))) {
+		p = s[lv];
+		e = eachlevel[lv];
+		for ( ; e < p; p -= 2) {
+		    *d++ = p[-2];
+		    *d++ = p[-1];
+		}
+	    }
+	    else {
+		p = eachlevel[lv];
+		e = s[lv];
+		while (p < e)
+		    *d++ = *p++;
+	    }
+	    if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
+		*d++ = '\0';
+		*d++ = '\0';
+	    }
+	}
+
+	for (lv = level; lv < MaxLevel; lv++) {
+	    if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
+		*d++ = '\0';
+		*d++ = '\0';
+	    }
+	}
+
+	for (lv = 0; lv < level; lv++) {
+	    Safefree(eachlevel[lv]);
+	}
+    }
+    *d = '\0';
+    SvCUR_set(dst, d - (U8*)SvPVX(dst));
+    RETVAL = dst;
+OUTPUT:
+    RETVAL
+
+
+SV*
+_varCE (vbl, vce)
+    SV* vbl
+    SV* vce
+  PREINIT:
+    SV *dst;
+    U8 *a, *v, *d;
+    STRLEN alen, vlen;
+  CODE:
+    a = (U8*)SvPV(vbl, alen);
+    v = (U8*)SvPV(vce, vlen);
+
+    dst = newSV(vlen);
+    d = (U8*)SvPVX(dst);
+    (void)SvPOK_only(dst);
+    Copy(v, d, vlen, U8);
+    SvCUR_set(dst, vlen);
+    d[vlen] = '\0';
+
+    /* variable: checked only the first char and the length,
+       trusting checkCollator() and %VariableOK in Perl ... */
+
+    if (vlen < VCE_Length /* ignore short VCE (unexpected) */
+	||
+	*a == 'n') /* 'non-ignorable' */
+	1;
+    else if (*v) {
+	if (*a == 's') { /* shifted or shift-trimmed */
+	    d[7] = d[1]; /* wt level 1 to 4 */
+	    d[8] = d[2];
+	}
+	d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
+    }
+    else if (*a == 'b') /* blanked */
+	1;
+    else if (*a == 's') { /* shifted or shift-trimmed */
+	if (alen == 7 && (d[1] + d[2] + d[3] + d[4] + d[5] + d[6])) {
+	    d[7] = (U8)(Shift4Wt >> 8);
+	    d[8] = (U8)(Shift4Wt & 0xFF);
+	}
+	else {
+	    d[7] = d[8] = 0;
+	}
+    }
+    else
+	croak("unknown variable value '%s'", a);
+    RETVAL = dst;
+OUTPUT:
+    RETVAL
+
+
+
+SV*
+visualizeSortKey (self, key)
+    SV * self
+    SV * key
+  PREINIT:
+    HV *selfHV;
+    SV **svp, *dst;
+    U8 *s, *e, *d;
+    STRLEN klen, dlen;
+    UV uv;
+    IV uca_vers;
+    static char *upperhex = "0123456789ABCDEF";
+  CODE:
+    if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
+	selfHV = (HV*)SvRV(self);
+    else
+	croak("$self is not a HASHREF.");
+
+    svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
+    if (!svp)
+	croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
+    uca_vers = SvIV(*svp);
+
+    s = (U8*)SvPV(key, klen);
+
+   /* slightly *longer* than the need, but I'm afraid of miscounting;
+      exactly: (klen / 2) * 5 + MaxLevel * 2 - 1 (excluding '\0')
+         = (klen / 2) * 5 - 1  # FFFF (16bit) and ' ' between 16bit units
+         + (MaxLevel - 1) * 2  # ' ' and '|' for level boundaries
+         + 2                   # '[' and ']'
+   */
+    dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
+    dst = newSV(dlen);
+    (void)SvPOK_only(dst);
+    d = (U8*)SvPVX(dst);
+
+    *d++ = '[';
+    for (e = s + klen; s < e; s += 2) {
+	uv = (U16)(*s << 8 | s[1]);
+	if (uv) {
+	    if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
+		*d++ = ' ';
+	    *d++ = upperhex[ (s[0] >> 4) & 0xF ];
+	    *d++ = upperhex[  s[0]       & 0xF ];
+	    *d++ = upperhex[ (s[1] >> 4) & 0xF ];
+	    *d++ = upperhex[  s[1]       & 0xF ];
+	}
+	else {
+	    if ((9 <= uca_vers) && (d[-1] != '['))
+		*d++ = ' ';
+	    *d++ = '|';
+	}
+    }
+    *d++ = ']';
+    *d   = '\0';
+    SvCUR_set(dst, d - (U8*)SvPVX(dst));
+    RETVAL = dst;
+OUTPUT:
+    RETVAL
+
+
+
+void
+unpack_U (src)
+    SV* src
+  PREINIT:
+    STRLEN srclen, retlen;
+    U8 *s, *p, *e;
+    UV uv;
+  PPCODE:
+    s = (U8*)SvPV(src,srclen);
+    if (!SvUTF8(src)) {
+	SV* tmpsv = sv_mortalcopy(src);
+	if (!SvPOK(tmpsv))
+	    (void)sv_pvn_force(tmpsv,&srclen);
+	sv_utf8_upgrade(tmpsv);
+	s = (U8*)SvPV(tmpsv,srclen);
+    }
+    e = s + srclen;
+
+    for (p = s; p < e; p += retlen) {
+	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
+	if (!retlen)
+	    croak(ErrRetlenIsZero);
+	XPUSHs(sv_2mortal(newSVuv(uv)));
+    }
+
diff --git a/cpan/Unicode-Collate/Collate/Locale.pm b/cpan/Unicode-Collate/Collate/Locale.pm
index 5dddfb82a7..39f04fcab5 100644
--- a/cpan/Unicode-Collate/Collate/Locale.pm
+++ b/cpan/Unicode-Collate/Collate/Locale.pm
@@ -4,12 +4,11 @@ use strict;
 use Carp;
 use base qw(Unicode::Collate);
 
-our $VERSION = '0.68';
+our $VERSION = '0.70';
 
 use File::Spec;
 
 (my $ModPath = $INC{'Unicode/Collate/Locale.pm'}) =~ s/\.pm$//;
-my $KeyPath = File::Spec->catfile('allkeys.txt');
 my $PL_EXT  = '.pl';
 
 my %LocaleFile = map { ($_, $_) } qw(
@@ -71,7 +70,6 @@ sub new {
     if (exists $hash{table}) {
 	croak "your table can't be used with Unicode::Collate::Locale";
     }
-    $hash{table} = $KeyPath;
 
     my $href = _fetchpl($hash{accepted_locale});
     while (my($k,$v) = each %$href) {
@@ -297,7 +295,7 @@ tailored as well as it. For example, even though W is tailored,
 fullwidth W (C<U+FF37>), W with acute (C<U+1E82>), etc. are not
 tailored. The result may depend on whether source strings are
 normalized or not, and whether decomposed or composed.
-Thus C<(normalization =E<gt> undef> is less preferred.
+Thus C<(normalization =E<gt> undef)> is less preferred.
 
 =back
 
@@ -305,7 +303,7 @@ Thus C<(normalization =E<gt> undef> is less preferred.
 
 The Unicode::Collate::Locale module for perl was written
 by SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>.
-This module is Copyright(C) 2004-2010, SADAHIRO Tomoyuki. Japan.
+This module is Copyright(C) 2004-2011, SADAHIRO Tomoyuki. Japan.
 All rights reserved.
 
 This module is free software; you can redistribute it and/or
diff --git a/cpan/Unicode-Collate/Makefile.PL b/cpan/Unicode-Collate/Makefile.PL
new file mode 100644
index 0000000000..30d6fc0aee
--- /dev/null
+++ b/cpan/Unicode-Collate/Makefile.PL
@@ -0,0 +1,28 @@
+require 5.006001;
+use ExtUtils::MakeMaker;
+
+my $clean = {};
+
+if (-f "Collate.xs") {
+    print STDERR "Making header files for XS...\n";
+
+    do 'mkheader' or die $@ || "mkheader: $!";
+
+    $clean = { FILES => 'ucatbl.h' };
+}
+
+WriteMakefile(
+    'INSTALLDIRS'	=> $] >= 5.007002 ? 'perl' : 'site',
+    'NAME'		=> 'Unicode::Collate',
+    'VERSION_FROM'	=> 'Collate.pm', # finds $VERSION
+    'clean'		=> $clean,
+    'PREREQ_PM'	  	=> {
+	Carp		=> 0,
+	constant	=> 0,
+	DynaLoader	=> 0,
+	File::Spec	=> 0,
+	strict		=> 0,
+	Test		=> 0,
+	warnings	=> 0,
+    },
+);
diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README
index 16bf8c4aa7..7142c5fb2c 100644
--- a/cpan/Unicode-Collate/README
+++ b/cpan/Unicode-Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.68
+Unicode/Collate version 0.70
 ===============================
 
 NAME
@@ -40,6 +40,7 @@ INSTALL
   gendata/*, and mklocale.
   Tests for Unicode::Collate::Locale are named t/loc_*.t.
 
+Since 0.54, XSUB that requires a C compiler will be built by default.
 To install this module type the following:
 
    perl Makefile.PL
@@ -47,20 +48,20 @@ To install this module type the following:
    make test
    make install
 
-If you have a C compiler and want to use XSUB edition,
-type the following (!! "enableXS" must run before "Makefile.PL" !!):
+Even if a C compiler is not available, pure Perl (i.e. non-XS) edition
+is available; type the following:
 
-   perl enableXS
+   perl disableXS
    perl Makefile.PL
    make
    make test
    make install
 
-If you decide to install pure Perl (i.e. non-XS) edition after trying
-to build XSUB, type the following:
+If you decide to install XSUB edition after trying to build pure Perl,
+type the following:
 
    make clean
-   perl disableXS
+   perl enableXS
    perl Makefile.PL
    make
    make test
@@ -107,7 +108,7 @@ HOW TO CHANGE DUCET (NOT WARRANTED)
 AUTHOR, COPYRIGHT AND LICENSE
 
 The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
-<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2010,
+<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2011,
 SADAHIRO Tomoyuki. Japan. All rights reserved.
 
 This module is free software; you can redistribute it and/or
diff --git a/cpan/Unicode-Collate/mkheader b/cpan/Unicode-Collate/mkheader
new file mode 100644
index 0000000000..dde4ee110c
--- /dev/null
+++ b/cpan/Unicode-Collate/mkheader
@@ -0,0 +1,196 @@
+#!perl
+#
+# This auxiliary script makes five header files
+# used for building XSUB of Unicode::Collate.
+#
+# Usage:
+#    <do 'mkheader'> in perl, or <perl mkheader> in command line
+#
+# Input file:
+#    Collate/allkeys.txt
+#
+# Output file:
+#    ucatbl.h
+#
+use 5.006;
+use strict;
+use warnings;
+use Carp;
+use File::Spec;
+
+BEGIN {
+    unless ("A" eq pack('U', 0x41)) {
+	die "Unicode::Collate cannot stringify a Unicode code point\n";
+    }
+}
+
+use constant TRUE  => 1;
+use constant FALSE => "";
+use constant VCE_TEMPLATE => 'Cn4';
+
+sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
+
+our $PACKAGE = 'Unicode::Collate, mkheader';
+our $prefix  = "UCA_";
+
+our %SimpleEntries;	# $codepoint => $keys
+our @Rest;
+
+{
+    my($f, $fh);
+    foreach my $d ('.') {
+	$f = File::Spec->catfile($d, "Collate", "allkeys.txt");
+	last if open($fh, $f);
+	$f = undef;
+    }
+    croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
+
+    while (my $line = <$fh>) {
+	next if $line =~ /^\s*#/;
+	if ($line =~ /^\s*\@/) {
+	    push @Rest, $line;
+	    next;
+	}
+
+	next if $line !~ /^\s*[0-9A-Fa-f]/;
+
+	$line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
+
+	# gets element
+	my($e, $k) = split /;/, $line;
+
+	croak "Wrong Entry: <charList> must be separated by ';' ".
+	      "from <collElement>" if ! $k;
+
+	my @uv = _getHexArray($e);
+	next if !@uv;
+
+	if (@uv != 1) {
+	    push @Rest, $line;
+	    next;
+	}
+
+	my $is_L3_ignorable = TRUE;
+
+	my @key;
+	foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
+	    my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
+	    my @wt = _getHexArray($arr);
+	    push @key, pack(VCE_TEMPLATE, $var, @wt);
+	    $is_L3_ignorable = FALSE
+		if $wt[0] || $wt[1] || $wt[2];
+	    # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
+	    # is completely ignorable.
+	    # For expansion, an entry $is_L3_ignorable
+	    # if and only if "all" CEs are [.0000.0000.0000].
+	}
+	my $mapping = $is_L3_ignorable ? [] : \@key;
+	my $num = @$mapping;
+	my $str = chr($num).join('', @$mapping);
+	$SimpleEntries{$uv[0]} = stringify($str);
+    }
+}
+
+sub stringify {
+    my $str = shift;
+    return sprintf '"%s"', join '',
+	   map sprintf("\\x%02x", ord $_), split //, $str;
+
+}
+
+########## writing header files ##########
+
+my $init = '';
+{
+    my $type = "char*";
+    my $head = $prefix."rest";
+
+    $init .= "static $type $head [] = {\n";
+    for my $line (@Rest) {
+	$line =~ s/\s*\z//;
+	next if $line eq '';
+	$init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
+	$init .= "($type)".stringify($line).",\n";
+    }
+    $init .= "NULL\n"; # sentinel
+    $init .= "};\n\n";
+}
+
+my @tripletable = (
+    {
+	file => "ucatbl",
+	name => "simple",
+	type => "char*",
+	hash => \%SimpleEntries,
+	null => "NULL",
+	init => $init,
+    },
+);
+
+foreach my $tbl (@tripletable) {
+    my $file = "$tbl->{file}.h";
+    my $head = "${prefix}$tbl->{name}";
+    my $type = $tbl->{type};
+    my $hash = $tbl->{hash};
+    my $null = $tbl->{null};
+    my $init = $tbl->{init};
+
+    open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+    binmode FH; select FH;
+    my %val;
+
+    print FH << 'EOF';
+/*
+ * This file is auto-generated by mkheader.
+ * Any changes here will be lost!
+ */
+EOF
+
+    print $init if defined $init;
+
+    foreach my $uv (keys %$hash) {
+	croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
+	    unless $uv <= 0x10FFFF;
+	my @c = unpack 'CCCC', pack 'N', $uv;
+	$val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
+    }
+
+    foreach my $p (sort { $a <=> $b } keys %val) {
+	next if ! $val{ $p };
+	for (my $r = 0; $r < 256; $r++) {
+	    next if ! $val{ $p }{ $r };
+	    printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
+	    for (my $c = 0; $c < 256; $c++) {
+		print "\t", defined $val{$p}{$r}{$c}
+		    ? "($type)".$val{$p}{$r}{$c}
+		    : $null;
+		print ','  if $c != 255;
+		print "\n" if $c % 8 == 7;
+	    }
+	    print "};\n\n";
+	}
+    }
+    foreach my $p (sort { $a <=> $b } keys %val) {
+	next if ! $val{ $p };
+	printf "static $type* ${head}_%02x [256] = {\n", $p;
+	for (my $r = 0; $r < 256; $r++) {
+	    print $val{ $p }{ $r }
+		? sprintf("${head}_%02x_%02x", $p, $r)
+		: "NULL";
+	    print ','  if $r != 255;
+	    print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+	}
+	print "};\n\n";
+    }
+    print "static $type** $head [] = {\n";
+    for (my $p = 0; $p <= 0x10; $p++) {
+	print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
+	print ','  if $p != 0x10;
+	print "\n";
+    }
+    print "};\n\n";
+    close FH;
+}
+
+1;
+__END__
diff --git a/cpan/Unicode-Collate/t/loc_test.t b/cpan/Unicode-Collate/t/loc_test.t
index d1b5b4a1e4..60c9773af3 100644
--- a/cpan/Unicode-Collate/t/loc_test.t
+++ b/cpan/Unicode-Collate/t/loc_test.t
@@ -12,7 +12,7 @@ BEGIN {
 }
 
 use Test;
-BEGIN { plan tests => 116 };
+BEGIN { plan tests => 120 };
 
 use strict;
 use warnings;
@@ -127,3 +127,13 @@ our @sortFr = $objFr->sort(@randFr);
 ok("@sortFr" eq "@listFr");
 
 # 116
+
+{
+    my $keyXS = '__useXS'; # see Unicode::Collate internal
+    my $UseXS = ref Unicode::Collate->new->{$keyXS};
+    ok(ref($Collator->{$keyXS}), $UseXS);
+    ok(ref($objFr   ->{$keyXS}), $UseXS);
+    ok(ref($objEs   ->{$keyXS}), $UseXS);
+    ok(ref($objEsT  ->{$keyXS}), $UseXS);
+}
+# 120
author	Chris 'BinGOs' Williams <chris@bingosnet.co.uk>	2011-01-16 23:23:03 +0000
committer	Chris 'BinGOs' Williams <chris@bingosnet.co.uk>	2011-01-17 00:22:13 +0000
commit	211cc5012284f4bd900fcaa630adbcac69ca6112 (patch)
tree	7f07e1acee7dd8b86e8816730d8a975a4946d477 /cpan
parent	a62b1201c068dc7b099bcb7182e188c4d2fbf34c (diff)
download	perl-211cc5012284f4bd900fcaa630adbcac69ca6112.tar.gz