summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Collate
diff options
context:
space:
mode:
authorChris 'BinGOs' Williams <chris@bingosnet.co.uk>2011-01-19 14:14:46 +0000
committerChris 'BinGOs' Williams <chris@bingosnet.co.uk>2011-01-19 14:14:46 +0000
commit5a449a8e553425ace4bea1ea13c16c03c6c5bf4c (patch)
tree447e673f0abd93ca42b2bb07171008a2949b69c0 /cpan/Unicode-Collate
parentadcc1be12cd3a2e1c8fcc397726766a9b9df0cf0 (diff)
downloadperl-5a449a8e553425ace4bea1ea13c16c03c6c5bf4c.tar.gz
Revert "Update Unicode-Collate to CPAN version 0.70 and enable XS version"
This reverts commit 211cc5012284f4bd900fcaa630adbcac69ca6112.
Diffstat (limited to 'cpan/Unicode-Collate')
-rw-r--r--cpan/Unicode-Collate/.gitignore1
-rw-r--r--cpan/Unicode-Collate/Changes18
-rw-r--r--cpan/Unicode-Collate/Collate.pm360
-rw-r--r--cpan/Unicode-Collate/Collate.xs691
-rw-r--r--cpan/Unicode-Collate/Collate/Locale.pm8
-rw-r--r--cpan/Unicode-Collate/Makefile.PL28
-rw-r--r--cpan/Unicode-Collate/README17
-rw-r--r--cpan/Unicode-Collate/mkheader196
-rw-r--r--cpan/Unicode-Collate/t/loc_test.t12
9 files changed, 330 insertions, 1001 deletions
diff --git a/cpan/Unicode-Collate/.gitignore b/cpan/Unicode-Collate/.gitignore
deleted file mode 100644
index 424c745c12..0000000000
--- a/cpan/Unicode-Collate/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.h
diff --git a/cpan/Unicode-Collate/Changes b/cpan/Unicode-Collate/Changes
index c7bba12874..ca9be54809 100644
--- a/cpan/Unicode-Collate/Changes
+++ b/cpan/Unicode-Collate/Changes
@@ -1,13 +1,5 @@
Revision history for Perl module Unicode::Collate.
-0.70 Sun Jan 16 20:31:07 2011
- - Now U::C::Locale->new will use the compiled DUCET via XS.
-
-0.69 Sat Jan 15 19:41:11 2011
- - clarified about XSUB. revised INSTALL in README.
- - xs: flag passed to utf8n_to_uvuni().
- - doc and comments: [perl #81876] Fix typos by Peter J. Acklam.
-
0.68 Tue Nov 23 20:17:22 2010
- doc: clarified about (backwards => [ ]) and (backwards => undef).
- separated t/backwds.t from t/test.t.
@@ -32,7 +24,7 @@ Revision history for Perl module Unicode::Collate.
- 12 compat. ideographs (e.g. U+FA0E) are treated as unified ideographs.
(though DUCET also does it, now Unicode::Collate does it without DUCET.)
- added t/compatui.t.
- ! Ideographs Ext.B (U+20000..U+2A6D6) can be overridden with UCA_Version 8.
+ ! Ideographs Ext.B (U+20000..U+2A6D6) can be overrided with UCA_Version 8.
This is a long-standing behavior from Unicode::Collate 0.11 to 0.63.
A wrong fix at 0.64 should be abandoned.
@@ -129,8 +121,6 @@ Revision history for Perl module Unicode::Collate.
- U+9FC4..U+9FCB and U+2A700..U+2B734 are new CJK unified ideographs.
- Many hangul jamo are assigned (affecting hangul_terminator).
- ! Now XSUB will be built by default. (XSUB needs a C compiler.)
- To build pure perl, run disableXS before Makefile.PL.
! DUCET will be compiled when XS is used. Explicit saying
<table => 'allkeys.txt'> (or using another table) will prevent
this module from using the compiled DUCET.
@@ -184,11 +174,11 @@ Revision history for Perl module Unicode::Collate.
(Perl 5.7.3 or before)). If perl 5.6.X is used, XSUB may help it
in place of broken CORE::unpack('U*') in older perl.
- added illegal.t and illegalp.t in t.
- - added XSUB where some functions are implemented in XSUB.
- Pure Perl is also supported.
+ - added XSUB (EXPERIMENTAL!) where some functions are implemented
+ in XSUB. Pure Perl is also supported.
0.30 Mon Oct 13 21:26:37 2003
- - fix: Completely ignorable in table should be able to be overridden
+ - fix: Completely ignorable in table should be able to be overrided
by non-ignorable in entry.
- fix: Maximum length for contraction must not be shortened
by a shorter contraction following in table and/or entry.
diff --git a/cpan/Unicode-Collate/Collate.pm b/cpan/Unicode-Collate/Collate.pm
index 05822b2c11..b337b6f24b 100644
--- a/cpan/Unicode-Collate/Collate.pm
+++ b/cpan/Unicode-Collate/Collate.pm
@@ -14,13 +14,9 @@ use File::Spec;
no warnings 'utf8';
-our $VERSION = '0.70';
+our $VERSION = '0.6801';
our $PACKAGE = __PACKAGE__;
-require DynaLoader;
-our @ISA = qw(DynaLoader);
-bootstrap Unicode::Collate $VERSION;
-
my @Path = qw(Unicode Collate);
my $KeyFile = "allkeys.txt";
@@ -75,8 +71,49 @@ use constant NON_VAR => 0; # Non-Variable character
use constant VAR => 1; # Variable character
# specific code points
+use constant Hangul_SBase => 0xAC00;
use constant Hangul_SIni => 0xAC00;
use constant Hangul_SFin => 0xD7A3;
+use constant Hangul_NCount => 588;
+use constant Hangul_TCount => 28;
+use constant Hangul_LBase => 0x1100;
+use constant Hangul_LIni => 0x1100;
+use constant Hangul_LFin => 0x1159;
+use constant Hangul_LFill => 0x115F;
+use constant Hangul_LEnd => 0x115F; # Unicode 5.2
+use constant Hangul_VBase => 0x1161;
+use constant Hangul_VIni => 0x1160; # from Vowel Filler
+use constant Hangul_VFin => 0x11A2;
+use constant Hangul_VEnd => 0x11A7; # Unicode 5.2
+use constant Hangul_TBase => 0x11A7; # from "no-final" codepoint
+use constant Hangul_TIni => 0x11A8;
+use constant Hangul_TFin => 0x11F9;
+use constant Hangul_TEnd => 0x11FF; # Unicode 5.2
+use constant HangulL2Ini => 0xA960; # Unicode 5.2
+use constant HangulL2Fin => 0xA97C; # Unicode 5.2
+use constant HangulV2Ini => 0xD7B0; # Unicode 5.2
+use constant HangulV2Fin => 0xD7C6; # Unicode 5.2
+use constant HangulT2Ini => 0xD7CB; # Unicode 5.2
+use constant HangulT2Fin => 0xD7FB; # Unicode 5.2
+
+use constant CJK_UidIni => 0x4E00;
+use constant CJK_UidFin => 0x9FA5;
+use constant CJK_UidF41 => 0x9FBB;
+use constant CJK_UidF51 => 0x9FC3;
+use constant CJK_UidF52 => 0x9FCB;
+use constant CJK_ExtAIni => 0x3400; # Unicode 3.0
+use constant CJK_ExtAFin => 0x4DB5; # Unicode 3.0
+use constant CJK_ExtBIni => 0x20000; # Unicode 3.1
+use constant CJK_ExtBFin => 0x2A6D6; # Unicode 3.1
+use constant CJK_ExtCIni => 0x2A700; # Unicode 5.2
+use constant CJK_ExtCFin => 0x2B734; # Unicode 5.2
+use constant CJK_ExtDIni => 0x2B740; # Unicode 6.0
+use constant CJK_ExtDFin => 0x2B81D; # Unicode 6.0
+
+my %CompatUI = map +($_ => 1), (
+ 0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14, 0xFA1F,
+ 0xFA21, 0xFA23, 0xFA24, 0xFA27, 0xFA28, 0xFA29,
+);
# Logical_Order_Exception in PropList.txt
my $DefaultRearrange = [ 0x0E40..0x0E44, 0x0EC0..0x0EC4 ];
@@ -91,6 +128,10 @@ sub pack_U {
return pack('U*', @_);
}
+sub unpack_U {
+ return unpack('U*', shift(@_).pack('U*'));
+}
+
######
my (%VariableOK);
@@ -111,7 +152,6 @@ our @ChangeNG = qw/
versionTable alternateTable backwardsTable forwardsTable rearrangeTable
derivCode normCode rearrangeHash backwardsFlag
suppress suppressHash
- __useXS
/;
# The hash key 'ignored' is deleted at v 0.21.
# The hash key 'isShift' is deleted at v 0.23.
@@ -245,12 +285,6 @@ sub new
my $class = shift;
my $self = bless { @_ }, $class;
- if (! exists $self->{table} &&
- !defined $self->{undefName} && !defined $self->{ignoreName} &&
- !defined $self->{undefChar} && !defined $self->{ignoreChar}) {
- $self->{__useXS} = \&_fetch_simple;
- } # XS only
-
# keys of $self->{suppressHash} are $self->{suppress}.
if ($self->{suppress} && @{ $self->{suppress} }) {
@{ $self->{suppressHash} }{ @{ $self->{suppress} } } = ();
@@ -313,20 +347,6 @@ sub parseAtmark {
sub read_table {
my $self = shift;
- if ($self->{__useXS}) {
- my @rest = _fetch_rest(); # complex matter need to parse
- for my $line (@rest) {
- next if $line =~ /^\s*#/;
-
- if ($line =~ s/^\s*\@//) {
- $self->parseAtmark($line);
- } else {
- $self->parseEntry($line);
- }
- }
- return;
- }
-
my($f, $fh);
foreach my $d (@INC) {
$f = File::Spec->catfile($d, @Path, $self->{table});
@@ -425,12 +445,50 @@ sub parseEntry
}
+##
+## VCE = _varCE(variable, VCE)
+##
+sub _varCE
+{
+ my $vbl = shift;
+ my $vce = shift;
+ if ($vbl eq 'non-ignorable') {
+ return $vce;
+ }
+ my ($var, @wt) = unpack VCE_TEMPLATE, $vce;
+
+ if ($var) {
+ return pack(VCE_TEMPLATE, $var, 0, 0, 0,
+ $vbl eq 'blanked' ? $wt[3] : $wt[0]);
+ }
+ elsif ($vbl eq 'blanked') {
+ return $vce;
+ }
+ else {
+ return pack(VCE_TEMPLATE, $var, @wt[0..2],
+ $vbl eq 'shifted' && $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0);
+ }
+}
+
sub viewSortKey
{
my $self = shift;
$self->visualizeSortKey($self->getSortKey(@_));
}
+sub visualizeSortKey
+{
+ my $self = shift;
+ my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift);
+
+ if ($self->{UCA_Version} <= 8) {
+ $view =~ s/ ?0000 ?/|/g;
+ } else {
+ $view =~ s/\b0000\b/|/g;
+ }
+ return "[$view]";
+}
+
##
## arrayref of JCPS = splitEnt(string to be collated)
@@ -448,7 +506,6 @@ sub splitEnt
my $reH = $self->{rearrangeHash};
my $vers = $self->{UCA_Version};
my $ver9 = $vers >= 9 && $vers <= 11;
- my $uXS = $self->{__useXS};
my ($str, @buf);
@@ -487,9 +544,6 @@ sub splitEnt
} elsif ($ver9) {
$src[$i] = undef if $map->{ $src[$i] } &&
@{ $map->{ $src[$i] } } == 0;
- if ($uXS) {
- $src[$i] = undef if _ignorable_simple($src[$i]);
- }
}
}
@@ -569,8 +623,7 @@ sub splitEnt
}
# skip completely ignorable
- if ($uXS && $jcps =~ /^[0-9]+\z/ && _ignorable_simple($jcps) ||
- $map->{$jcps} && @{ $map->{$jcps} } == 0) {
+ if ($map->{$jcps} && @{ $map->{$jcps} } == 0) {
if ($wLen && @buf) {
$buf[-1][2] = $i + 1;
}
@@ -609,13 +662,10 @@ sub getWt
my $vbl = $self->{variable};
my $map = $self->{mapping};
my $der = $self->{derivCode};
- my $uXS = $self->{__useXS};
return if !defined $u;
return map(_varCE($vbl, $_), @{ $map->{$u} })
if $map->{$u};
- return map(_varCE($vbl, $_), _fetch_simple($u))
- if $uXS && _exists_simple($u);
# JCPS must not be a contraction, then it's a code point.
if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
@@ -642,7 +692,7 @@ sub getWt
$map->{$contract} and @decH = ($contract, $decH[2]);
}
# even if V's ignorable, LT contraction is not supported.
- # If such a situation were required, NFD should be used.
+ # If such a situatution were required, NFD should be used.
}
if (@decH == 3 && $max->{$decH[1]}) {
my $contract = join(CODE_SEP, @decH[1,2]);
@@ -651,9 +701,7 @@ sub getWt
}
@hangulCE = map({
- $map->{$_} ? @{ $map->{$_} } :
- $uXS && _exists_simple($_) ? _fetch_simple($_) :
- $der->($_);
+ $map->{$_} ? @{ $map->{$_} } : $der->($_);
} @decH);
}
return map _varCE($vbl, $_), @hangulCE;
@@ -678,10 +726,12 @@ sub getWt
sub getSortKey
{
my $self = shift;
+ my $lev = $self->{level};
my $rEnt = $self->splitEnt(shift); # get an arrayref of JCPS
my $vers = $self->{UCA_Version};
my $vbl = $self->{variable};
my $term = $self->{hangul_terminator};
+ my $v2i = $vers >= 9 && $vbl ne 'non-ignorable';
my @buf; # weight arrays
if ($term) {
@@ -706,7 +756,53 @@ sub getSortKey
}
}
- return $self->mk_SortKey(\@buf);
+ # make sort key
+ my @ret = ([],[],[],[]);
+ my $last_is_variable;
+
+ foreach my $vwt (@buf) {
+ my($var, @wt) = unpack(VCE_TEMPLATE, $vwt);
+
+ # "Ignorable (L1, L2) after Variable" since track. v. 9
+ if ($v2i) {
+ if ($var) {
+ $last_is_variable = TRUE;
+ } elsif (!$wt[0]) { # ignorable
+ next if $last_is_variable;
+ } else {
+ $last_is_variable = FALSE;
+ }
+ }
+ foreach my $v (0..$lev-1) {
+ 0 < $wt[$v] and push @{ $ret[$v] }, $wt[$v];
+ }
+ }
+
+ # modification of tertiary weights
+ if ($self->{upper_before_lower}) {
+ foreach my $w (@{ $ret[2] }) {
+ if (0x8 <= $w && $w <= 0xC) { $w -= 6 } # lower
+ elsif (0x2 <= $w && $w <= 0x6) { $w += 6 } # upper
+ elsif ($w == 0x1C) { $w += 1 } # square upper
+ elsif ($w == 0x1D) { $w -= 1 } # square lower
+ }
+ }
+ if ($self->{katakana_before_hiragana}) {
+ foreach my $w (@{ $ret[2] }) {
+ if (0x0F <= $w && $w <= 0x13) { $w -= 2 } # katakana
+ elsif (0x0D <= $w && $w <= 0x0E) { $w += 5 } # hiragana
+ }
+ }
+
+ if ($self->{backwardsFlag}) {
+ for (my $v = MinLevel; $v <= MaxLevel; $v++) {
+ if ($self->{backwardsFlag} & (1 << $v)) {
+ @{ $ret[$v-1] } = reverse @{ $ret[$v-1] };
+ }
+ }
+ }
+
+ join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret;
}
@@ -733,6 +829,174 @@ sub sort {
}
+sub _derivCE_22 {
+ my $u = shift;
+ my $base = (CJK_UidIni <= $u && $u <= CJK_UidF52 || $CompatUI{$u})
+ ? 0xFB40 : # CJK
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
+ CJK_ExtBIni <= $u && $u <= CJK_ExtBFin ||
+ CJK_ExtCIni <= $u && $u <= CJK_ExtCFin ||
+ CJK_ExtDIni <= $u && $u <= CJK_ExtDFin)
+ ? 0xFB80 # CJK ext.
+ : 0xFBC0; # others
+ my $aaaa = $base + ($u >> 15);
+ my $bbbb = ($u & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
+}
+
+sub _derivCE_20 {
+ my $u = shift;
+ my $base = (CJK_UidIni <= $u && $u <= CJK_UidF52 || $CompatUI{$u})
+ ? 0xFB40 : # CJK
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
+ CJK_ExtBIni <= $u && $u <= CJK_ExtBFin ||
+ CJK_ExtCIni <= $u && $u <= CJK_ExtCFin)
+ ? 0xFB80 # CJK ext.
+ : 0xFBC0; # others
+ my $aaaa = $base + ($u >> 15);
+ my $bbbb = ($u & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
+}
+
+sub _derivCE_18 {
+ my $u = shift;
+ my $base = (CJK_UidIni <= $u && $u <= CJK_UidF51 || $CompatUI{$u})
+ ? 0xFB40 : # CJK
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
+ CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
+ ? 0xFB80 # CJK ext.
+ : 0xFBC0; # others
+ my $aaaa = $base + ($u >> 15);
+ my $bbbb = ($u & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
+}
+
+sub _derivCE_14 {
+ my $u = shift;
+ my $base = (CJK_UidIni <= $u && $u <= CJK_UidF41 || $CompatUI{$u})
+ ? 0xFB40 : # CJK
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
+ CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
+ ? 0xFB80 # CJK ext.
+ : 0xFBC0; # others
+ my $aaaa = $base + ($u >> 15);
+ my $bbbb = ($u & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
+}
+
+sub _derivCE_9 {
+ my $u = shift;
+ my $base = (CJK_UidIni <= $u && $u <= CJK_UidFin || $CompatUI{$u})
+ ? 0xFB40 : # CJK
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
+ CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
+ ? 0xFB80 # CJK ext.
+ : 0xFBC0; # others
+ my $aaaa = $base + ($u >> 15);
+ my $bbbb = ($u & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
+}
+
+sub _derivCE_8 {
+ my $code = shift;
+ my $aaaa = 0xFF80 + ($code >> 15);
+ my $bbbb = ($code & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, 2, 1, $code),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $code);
+}
+
+sub _uideoCE_8 {
+ my $u = shift;
+ return pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u);
+}
+
+sub _isUIdeo {
+ # $uca_vers = 0 for _uideoCE_8()
+ my ($u, $uca_vers) = @_;
+ return((CJK_UidIni <= $u && (
+ $uca_vers >= 20 ? ($u <= CJK_UidF52) :
+ $uca_vers >= 18 ? ($u <= CJK_UidF51) :
+ $uca_vers >= 14 ? ($u <= CJK_UidF41) :
+ ($u <= CJK_UidFin))) || $CompatUI{$u}
+ ||
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin)
+ ||
+ ($uca_vers >= 8 && CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
+ ||
+ ($uca_vers >= 20 && CJK_ExtCIni <= $u && $u <= CJK_ExtCFin)
+ ||
+ ($uca_vers >= 22 && CJK_ExtDIni <= $u && $u <= CJK_ExtDFin)
+ );
+}
+
+
+##
+## "hhhh hhhh hhhh" to (dddd, dddd, dddd)
+##
+sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
+
+#
+# $code *must* be in Hangul syllable.
+# Check it before you enter here.
+#
+sub _decompHangul {
+ my $code = shift;
+ my $si = $code - Hangul_SBase;
+ my $li = int( $si / Hangul_NCount);
+ my $vi = int(($si % Hangul_NCount) / Hangul_TCount);
+ my $ti = $si % Hangul_TCount;
+ return (
+ Hangul_LBase + $li,
+ Hangul_VBase + $vi,
+ $ti ? (Hangul_TBase + $ti) : (),
+ );
+}
+
+sub _isIllegal {
+ my $code = shift;
+ return((! defined $code) # removed
+ || ($code < 0 || 0x10FFFF < $code) # out of range
+ );
+}
+
+sub _isNonchar {
+ my $code = shift;
+ return((($code & 0xFFFE) == 0xFFFE) # ??FFF[EF] (cf. utf8.c)
+ || (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates
+ || (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters
+ );
+}
+
+# Hangul Syllable Type
+sub getHST {
+ my $u = shift;
+ my $vers = shift || 0;
+
+ if (Hangul_SIni <= $u && $u <= Hangul_SFin) {
+ return +($u - Hangul_SBase) % Hangul_TCount ? "LVT" : "LV";
+ }
+
+ if ($vers < 20) {
+ return Hangul_LIni <= $u && $u <= Hangul_LFin ||
+ $u == Hangul_LFill ? "L" :
+ Hangul_VIni <= $u && $u <= Hangul_VFin ? "V" :
+ Hangul_TIni <= $u && $u <= Hangul_TFin ? "T" : "";
+ } else {
+ return Hangul_LIni <= $u && $u <= Hangul_LEnd ||
+ HangulL2Ini <= $u && $u <= HangulL2Fin ? "L" :
+ Hangul_VIni <= $u && $u <= Hangul_VEnd ||
+ HangulV2Ini <= $u && $u <= HangulV2Fin ? "V" :
+ Hangul_TIni <= $u && $u <= Hangul_TEnd ||
+ HangulT2Ini <= $u && $u <= HangulT2Fin ? "T" : "";
+ }
+}
+
+
##
## bool _nonIgnorAtLevel(arrayref weights, int level)
##
@@ -759,7 +1023,7 @@ sub _eqArray($$$)
my $lev = shift;
for my $g (0..@$substr-1){
- # Do the $g'th graphemes have the same number of AV weights?
+ # Do the $g'th graphemes have the same number of AV weigths?
return if @{ $source->[$g] } != @{ $substr->[$g] };
for my $w (0..@{ $substr->[$g] }-1) {
@@ -1057,7 +1321,7 @@ The following tracking versions are supported. The default is 20.
Note: Recent UTS #10 renames "Tracking Version" to "Revision."
-* Noncharacters (e.g. U+FFFF) are not ignored, and can be overridden
+* Noncharacters (e.g. U+FFFF) are not ignored, and can be overrided
since C<UCA_Version> 22.
* Fully ignorable characters were ignored, and would not interrupt
@@ -1095,7 +1359,7 @@ forwards at all the levels.
If the same character (or a sequence of characters) exists
in the collation element table through C<table>,
-mapping to collation elements is overridden.
+mapping to collation elements is overrided.
If it does not exist, the mapping is defined additionally.
entry => <<'ENTRY', # for DUCET v4.0.0 (allkeys-4.0.0.txt)
@@ -1272,7 +1536,7 @@ those in the CJK Unified Ideographs Extension A etc.
Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or greater.
Through C<overrideCJK>, ordering of CJK unified ideographs (including
-extensions) can be overridden.
+extensions) can be overrided.
ex. CJK unified ideographs in the JIS code point order.
@@ -1315,7 +1579,7 @@ in C<table> or C<entry> is still valid.
B<Note:> In addition to them, 12 CJK compatibility ideographs (C<U+FA0E>,
C<U+FA0F>, C<U+FA11>, C<U+FA13>, C<U+FA14>, C<U+FA1F>, C<U+FA21>, C<U+FA23>,
C<U+FA24>, C<U+FA27>, C<U+FA28>, C<U+FA29>) are also treated as CJK unified
-ideographs. But they can't be overridden via C<overrideCJK> when you use
+ideographs. But they can't be overrided via C<overrideCJK> when you use
DUCET, as the table includes weights for them. C<table> or C<entry> has
priority over C<overrideCJK>.
@@ -1325,7 +1589,7 @@ priority over C<overrideCJK>.
By default, Hangul syllables are decomposed into Hangul Jamo,
even if C<(normalization =E<gt> undef)>.
-But the mapping of Hangul syllables may be overridden.
+But the mapping of Hangul syllables may be overrided.
This parameter works like C<overrideCJK>, so see there for examples.
@@ -1486,7 +1750,7 @@ this parameter doesn't work validly.
This key allows to variable weighting for variable collation elements,
which are marked with an ASTERISK in the table
-(NOTE: Many punctuation marks and symbols are variable in F<allkeys.txt>).
+(NOTE: Many punction marks and symbols are variable in F<allkeys.txt>).
variable => 'blanked', 'non-ignorable', 'shifted', or 'shift-trimmed'.
@@ -1794,7 +2058,7 @@ B<Unicode::Normalize is required to try The Conformance Test.>
=head1 AUTHOR, COPYRIGHT AND LICENSE
The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
-<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2011,
+<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2010,
SADAHIRO Tomoyuki. Japan. All rights reserved.
This module is free software; you can redistribute it and/or
diff --git a/cpan/Unicode-Collate/Collate.xs b/cpan/Unicode-Collate/Collate.xs
deleted file mode 100644
index d6004bdf25..0000000000
--- a/cpan/Unicode-Collate/Collate.xs
+++ /dev/null
@@ -1,691 +0,0 @@
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-
-/* This file is prepared by mkheader */
-#include "ucatbl.h"
-
-/* Perl 5.6.1 ? */
-#ifndef utf8n_to_uvuni
-#define utf8n_to_uvuni utf8_to_uv
-#endif /* utf8n_to_uvuni */
-
-/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
-#ifndef UTF8_ALLOW_BOM
-#define UTF8_ALLOW_BOM (0)
-#endif /* UTF8_ALLOW_BOM */
-
-#ifndef UTF8_ALLOW_SURROGATE
-#define UTF8_ALLOW_SURROGATE (0)
-#endif /* UTF8_ALLOW_SURROGATE */
-
-#ifndef UTF8_ALLOW_FE_FF
-#define UTF8_ALLOW_FE_FF (0)
-#endif /* UTF8_ALLOW_FE_FF */
-
-#ifndef UTF8_ALLOW_FFFF
-#define UTF8_ALLOW_FFFF (0)
-#endif /* UTF8_ALLOW_FFFF */
-
-#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
-
-/* if utf8n_to_uvuni() sets retlen to 0 (?) */
-#define ErrRetlenIsZero "panic (Unicode::Collate): zero-length character"
-
-/* At present, char > 0x10ffff are unaffected without complaint, right? */
-#define VALID_UTF_MAX (0x10ffff)
-#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
-
-static const UV max_div_16 = UV_MAX / 16;
-
-/* Supported Levels */
-#define MinLevel (1)
-#define MaxLevel (4)
-
-/* Shifted weight at 4th level */
-#define Shift4Wt (0xFFFF)
-
-#define VCE_Length (9)
-
-#define Hangul_SBase (0xAC00)
-#define Hangul_SIni (0xAC00)
-#define Hangul_SFin (0xD7A3)
-#define Hangul_NCount (588)
-#define Hangul_TCount (28)
-#define Hangul_LBase (0x1100)
-#define Hangul_LIni (0x1100)
-#define Hangul_LFin (0x1159)
-#define Hangul_LFill (0x115F)
-#define Hangul_LEnd (0x115F) /* Unicode 5.2 */
-#define Hangul_VBase (0x1161)
-#define Hangul_VIni (0x1160) /* from Vowel Filler */
-#define Hangul_VFin (0x11A2)
-#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */
-#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */
-#define Hangul_TIni (0x11A8)
-#define Hangul_TFin (0x11F9)
-#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */
-#define HangulL2Ini (0xA960) /* Unicode 5.2 */
-#define HangulL2Fin (0xA97C) /* Unicode 5.2 */
-#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */
-#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */
-#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */
-#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */
-
-#define CJK_UidIni (0x4E00)
-#define CJK_UidFin (0x9FA5)
-#define CJK_UidF41 (0x9FBB)
-#define CJK_UidF51 (0x9FC3)
-#define CJK_UidF52 (0x9FCB)
-#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */
-#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */
-#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */
-#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */
-#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */
-#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */
-#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */
-#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */
-
-static STDCHAR UnifiedCompat[] = {
- 1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1
-}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */
-
-#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode))
-
-MODULE = Unicode::Collate PACKAGE = Unicode::Collate
-
-PROTOTYPES: DISABLE
-
-void
-_fetch_rest ()
- PREINIT:
- char ** rest;
- PPCODE:
- for (rest = UCA_rest; *rest; ++rest) {
- XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0)));
- }
-
-
-void
-_fetch_simple (uv)
- UV uv
- PREINIT:
- U8 ***plane, **row;
- char* result = NULL;
- PPCODE:
- if (!OVER_UTF_MAX(uv)){
- plane = (U8***)UCA_simple[uv >> 16];
- if (plane) {
- row = plane[(uv >> 8) & 0xff];
- result = row ? row[uv & 0xff] : NULL;
- }
- }
- if (result) {
- int i;
- int num = (int)*result;
- ++result;
- for (i = 0; i < num; ++i) {
- XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length)));
- result += VCE_Length;
- }
- } else {
- XPUSHs(sv_2mortal(newSViv(0)));
- }
-
-SV*
-_ignorable_simple (uv)
- UV uv
- ALIAS:
- _exists_simple = 1
- PREINIT:
- U8 ***plane, **row;
- int num = -1;
- char* result = NULL;
- CODE:
- if (!OVER_UTF_MAX(uv)){
- plane = (U8***)UCA_simple[uv >> 16];
- if (plane) {
- row = plane[(uv >> 8) & 0xff];
- result = row ? row[uv & 0xff] : NULL;
- }
- if (result)
- num = (int)*result; /* assuming 0 <= num < 128 */
- }
-
- if (ix)
- RETVAL = boolSV(num >0);
- else
- RETVAL = boolSV(num==0);
- OUTPUT:
- RETVAL
-
-
-void
-_getHexArray (src)
- SV* src
- PREINIT:
- char *s, *e;
- STRLEN byte;
- UV value;
- bool overflowed = FALSE;
- const char *hexdigit;
- PPCODE:
- s = SvPV(src,byte);
- for (e = s + byte; s < e;) {
- hexdigit = strchr((char *) PL_hexdigit, *s++);
- if (! hexdigit)
- continue;
- value = (hexdigit - PL_hexdigit) & 0xF;
- while (*s) {
- hexdigit = strchr((char *) PL_hexdigit, *s++);
- if (! hexdigit)
- break;
- if (overflowed)
- continue;
- if (value > max_div_16) {
- overflowed = TRUE;
- continue;
- }
- value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF);
- }
- XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value)));
- }
-
-
-SV*
-_isIllegal (sv)
- SV* sv
- PREINIT:
- UV uv;
- CODE:
- if (!sv || !SvIOK(sv))
- XSRETURN_YES;
- uv = SvUVX(sv);
- RETVAL = boolSV(
- 0x10FFFF < uv /* out of range */
- );
-OUTPUT:
- RETVAL
-
-
-SV*
-_isNonchar (sv)
- SV* sv
- PREINIT:
- UV uv;
- CODE:
- /* should be called only if ! _isIllegal(sv). */
- uv = SvUVX(sv);
- RETVAL = boolSV(
- ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */
- || (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */
- || (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */
- );
-OUTPUT:
- RETVAL
-
-
-void
-_decompHangul (code)
- UV code
- PREINIT:
- UV sindex, lindex, vindex, tindex;
- PPCODE:
- /* code *must* be in Hangul syllable.
- * Check it before you enter here. */
- sindex = code - Hangul_SBase;
- lindex = sindex / Hangul_NCount;
- vindex = (sindex % Hangul_NCount) / Hangul_TCount;
- tindex = sindex % Hangul_TCount;
-
- XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase)));
- XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase)));
- if (tindex)
- XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase)));
-
-
-SV*
-getHST (code, uca_vers = 0)
- UV code;
- IV uca_vers;
- PREINIT:
- char * hangtype;
- STRLEN typelen;
- CODE:
- if (codeRange(Hangul_SIni, Hangul_SFin)) {
- if ((code - Hangul_SBase) % Hangul_TCount) {
- hangtype = "LVT"; typelen = 3;
- } else {
- hangtype = "LV"; typelen = 2;
- }
- } else if (uca_vers < 20) {
- if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) {
- hangtype = "L"; typelen = 1;
- } else if (codeRange(Hangul_VIni, Hangul_VFin)) {
- hangtype = "V"; typelen = 1;
- } else if (codeRange(Hangul_TIni, Hangul_TFin)) {
- hangtype = "T"; typelen = 1;
- } else {
- hangtype = ""; typelen = 0;
- }
- } else {
- if (codeRange(Hangul_LIni, Hangul_LEnd) ||
- codeRange(HangulL2Ini, HangulL2Fin)) {
- hangtype = "L"; typelen = 1;
- } else if (codeRange(Hangul_VIni, Hangul_VEnd) ||
- codeRange(HangulV2Ini, HangulV2Fin)) {
- hangtype = "V"; typelen = 1;
- } else if (codeRange(Hangul_TIni, Hangul_TEnd) ||
- codeRange(HangulT2Ini, HangulT2Fin)) {
- hangtype = "T"; typelen = 1;
- } else {
- hangtype = ""; typelen = 0;
- }
- }
-
- RETVAL = newSVpvn(hangtype, typelen);
-OUTPUT:
- RETVAL
-
-
-void
-_derivCE_9 (code)
- UV code
- ALIAS:
- _derivCE_14 = 1
- _derivCE_18 = 2
- _derivCE_20 = 3
- _derivCE_22 = 4
- PREINIT:
- UV base, aaaa, bbbb;
- U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
- U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
- bool basic_unified = 0;
- PPCODE:
- if (CJK_UidIni <= code) {
- if (codeRange(0xFA0E, 0xFA29))
- basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
- else
- basic_unified = (ix >= 3 ? (code <= CJK_UidF52) :
- ix == 2 ? (code <= CJK_UidF51) :
- ix == 1 ? (code <= CJK_UidF41) :
- (code <= CJK_UidFin));
- }
- base = (basic_unified)
- ? 0xFB40 : /* CJK */
- ((codeRange(CJK_ExtAIni, CJK_ExtAFin))
- ||
- (codeRange(CJK_ExtBIni, CJK_ExtBFin))
- ||
- (ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
- ||
- (ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)))
- ? 0xFB80 /* CJK ext. */
- : 0xFBC0; /* others */
- aaaa = base + (code >> 15);
- bbbb = (code & 0x7FFF) | 0x8000;
- a[1] = (U8)(aaaa >> 8);
- a[2] = (U8)(aaaa & 0xFF);
- b[1] = (U8)(bbbb >> 8);
- b[2] = (U8)(bbbb & 0xFF);
- a[7] = b[7] = (U8)(code >> 8);
- a[8] = b[8] = (U8)(code & 0xFF);
- XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
- XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
-
-
-void
-_derivCE_8 (code)
- UV code
- PREINIT:
- UV aaaa, bbbb;
- U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF";
- U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF";
- PPCODE:
- aaaa = 0xFF80 + (code >> 15);
- bbbb = (code & 0x7FFF) | 0x8000;
- a[1] = (U8)(aaaa >> 8);
- a[2] = (U8)(aaaa & 0xFF);
- b[1] = (U8)(bbbb >> 8);
- b[2] = (U8)(bbbb & 0xFF);
- a[7] = b[7] = (U8)(code >> 8);
- a[8] = b[8] = (U8)(code & 0xFF);
- XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length)));
- XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length)));
-
-
-void
-_uideoCE_8 (code)
- UV code
- PREINIT:
- U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF";
- PPCODE:
- uice[1] = uice[7] = (U8)(code >> 8);
- uice[2] = uice[8] = (U8)(code & 0xFF);
- XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length)));
-
-
-SV*
-_isUIdeo (code, uca_vers)
- UV code;
- IV uca_vers;
- bool basic_unified = 0;
- CODE:
- /* uca_vers = 0 for _uideoCE_8() */
- if (CJK_UidIni <= code) {
- if (codeRange(0xFA0E, 0xFA29))
- basic_unified = (bool)UnifiedCompat[code - 0xFA0E];
- else
- basic_unified = (uca_vers >= 20 ? (code <= CJK_UidF52) :
- uca_vers >= 18 ? (code <= CJK_UidF51) :
- uca_vers >= 14 ? (code <= CJK_UidF41) :
- (code <= CJK_UidFin));
- }
- RETVAL = boolSV(
- (basic_unified)
- ||
- (codeRange(CJK_ExtAIni, CJK_ExtAFin))
- ||
- (uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin))
- ||
- (uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin))
- ||
- (uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin))
- );
-OUTPUT:
- RETVAL
-
-
-SV*
-mk_SortKey (self, buf)
- SV* self;
- SV* buf;
- PREINIT:
- SV *dst, **svp;
- STRLEN dlen, vlen;
- U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel];
- AV *bufAV;
- HV *selfHV;
- UV back_flag;
- I32 i, buf_len;
- IV lv, level, uca_vers;
- bool upper_lower, kata_hira, v2i, last_is_var;
- CODE:
- if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
- selfHV = (HV*)SvRV(self);
- else
- croak("$self is not a HASHREF.");
-
- svp = hv_fetch(selfHV, "level", 5, FALSE);
- level = svp ? SvIV(*svp) : MaxLevel;
-
- if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV)
- bufAV = (AV*)SvRV(buf);
- else
- croak("XSUB, not an ARRAYREF.");
-
- buf_len = av_len(bufAV);
-
- if (buf_len < 0) { /* empty: -1 */
- dlen = 2 * (MaxLevel - 1);
- dst = newSV(dlen);
- (void)SvPOK_only(dst);
- d = SvPVX(dst);
- while (dlen--)
- *d++ = '\0';
- }
- else {
- for (lv = 0; lv < level; lv++) {
- New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8);
- s[lv] = eachlevel[lv];
- }
-
- svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE);
- upper_lower = svp ? SvTRUE(*svp) : FALSE;
- svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE);
- kata_hira = svp ? SvTRUE(*svp) : FALSE;
- svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
- uca_vers = SvIV(*svp);
- svp = hv_fetch(selfHV, "variable", 8, FALSE);
- v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */
- ? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13))
- : FALSE;
-
- last_is_var = FALSE;
- for (i = 0; i <= buf_len; i++) {
- svp = av_fetch(bufAV, i, FALSE);
-
- if (svp && SvPOK(*svp))
- v = SvPV(*svp, vlen);
- else
- croak("not a vwt.");
-
- if (vlen < VCE_Length) /* ignore short VCE (unexpected) */
- continue;
-
- /* "Ignorable (L1, L2) after Variable" since track. v. 9 */
- if (v2i) {
- if (*v)
- last_is_var = TRUE;
- else if (v[1] || v[2]) /* non zero primary weight */
- last_is_var = FALSE;
- else if (last_is_var) /* zero primary weight; skipped */
- continue;
- }
-
- if (v[5] == 0) { /* tert wt < 256 */
- if (upper_lower) {
- if (0x8 <= v[6] && v[6] <= 0xC) /* lower */
- v[6] -= 6;
- else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */
- v[6] += 6;
- else if (v[6] == 0x1C) /* square upper */
- v[6]++;
- else if (v[6] == 0x1D) /* square lower */
- v[6]--;
- }
- if (kata_hira) {
- if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */
- v[6] -= 2;
- else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */
- v[6] += 5;
- }
- }
-
- for (lv = 0; lv < level; lv++) {
- if (v[2 * lv + 1] || v[2 * lv + 2]) {
- *s[lv]++ = v[2 * lv + 1];
- *s[lv]++ = v[2 * lv + 2];
- }
- }
- }
-
- dlen = 2 * (MaxLevel - 1);
- for (lv = 0; lv < level; lv++)
- dlen += s[lv] - eachlevel[lv];
-
- dst = newSV(dlen);
- (void)SvPOK_only(dst);
- d = SvPVX(dst);
-
- svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE);
- back_flag = svp ? SvUV(*svp) : (UV)0;
-
- for (lv = 0; lv < level; lv++) {
- if (back_flag & (1 << (lv + 1))) {
- p = s[lv];
- e = eachlevel[lv];
- for ( ; e < p; p -= 2) {
- *d++ = p[-2];
- *d++ = p[-1];
- }
- }
- else {
- p = eachlevel[lv];
- e = s[lv];
- while (p < e)
- *d++ = *p++;
- }
- if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
- *d++ = '\0';
- *d++ = '\0';
- }
- }
-
- for (lv = level; lv < MaxLevel; lv++) {
- if (lv + 1 < MaxLevel) { /* lv + 1 == real level */
- *d++ = '\0';
- *d++ = '\0';
- }
- }
-
- for (lv = 0; lv < level; lv++) {
- Safefree(eachlevel[lv]);
- }
- }
- *d = '\0';
- SvCUR_set(dst, d - (U8*)SvPVX(dst));
- RETVAL = dst;
-OUTPUT:
- RETVAL
-
-
-SV*
-_varCE (vbl, vce)
- SV* vbl
- SV* vce
- PREINIT:
- SV *dst;
- U8 *a, *v, *d;
- STRLEN alen, vlen;
- CODE:
- a = (U8*)SvPV(vbl, alen);
- v = (U8*)SvPV(vce, vlen);
-
- dst = newSV(vlen);
- d = (U8*)SvPVX(dst);
- (void)SvPOK_only(dst);
- Copy(v, d, vlen, U8);
- SvCUR_set(dst, vlen);
- d[vlen] = '\0';
-
- /* variable: checked only the first char and the length,
- trusting checkCollator() and %VariableOK in Perl ... */
-
- if (vlen < VCE_Length /* ignore short VCE (unexpected) */
- ||
- *a == 'n') /* 'non-ignorable' */
- 1;
- else if (*v) {
- if (*a == 's') { /* shifted or shift-trimmed */
- d[7] = d[1]; /* wt level 1 to 4 */
- d[8] = d[2];
- }
- d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0';
- }
- else if (*a == 'b') /* blanked */
- 1;
- else if (*a == 's') { /* shifted or shift-trimmed */
- if (alen == 7 && (d[1] + d[2] + d[3] + d[4] + d[5] + d[6])) {
- d[7] = (U8)(Shift4Wt >> 8);
- d[8] = (U8)(Shift4Wt & 0xFF);
- }
- else {
- d[7] = d[8] = 0;
- }
- }
- else
- croak("unknown variable value '%s'", a);
- RETVAL = dst;
-OUTPUT:
- RETVAL
-
-
-
-SV*
-visualizeSortKey (self, key)
- SV * self
- SV * key
- PREINIT:
- HV *selfHV;
- SV **svp, *dst;
- U8 *s, *e, *d;
- STRLEN klen, dlen;
- UV uv;
- IV uca_vers;
- static char *upperhex = "0123456789ABCDEF";
- CODE:
- if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV)
- selfHV = (HV*)SvRV(self);
- else
- croak("$self is not a HASHREF.");
-
- svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE);
- if (!svp)
- croak("Panic: no $self->{UCA_Version} in visualizeSortKey");
- uca_vers = SvIV(*svp);
-
- s = (U8*)SvPV(key, klen);
-
- /* slightly *longer* than the need, but I'm afraid of miscounting;
- exactly: (klen / 2) * 5 + MaxLevel * 2 - 1 (excluding '\0')
- = (klen / 2) * 5 - 1 # FFFF (16bit) and ' ' between 16bit units
- + (MaxLevel - 1) * 2 # ' ' and '|' for level boundaries
- + 2 # '[' and ']'
- */
- dlen = (klen / 2) * 5 + MaxLevel * 2 + 2;
- dst = newSV(dlen);
- (void)SvPOK_only(dst);
- d = (U8*)SvPVX(dst);
-
- *d++ = '[';
- for (e = s + klen; s < e; s += 2) {
- uv = (U16)(*s << 8 | s[1]);
- if (uv) {
- if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|')))
- *d++ = ' ';
- *d++ = upperhex[ (s[0] >> 4) & 0xF ];
- *d++ = upperhex[ s[0] & 0xF ];
- *d++ = upperhex[ (s[1] >> 4) & 0xF ];
- *d++ = upperhex[ s[1] & 0xF ];
- }
- else {
- if ((9 <= uca_vers) && (d[-1] != '['))
- *d++ = ' ';
- *d++ = '|';
- }
- }
- *d++ = ']';
- *d = '\0';
- SvCUR_set(dst, d - (U8*)SvPVX(dst));
- RETVAL = dst;
-OUTPUT:
- RETVAL
-
-
-
-void
-unpack_U (src)
- SV* src
- PREINIT:
- STRLEN srclen, retlen;
- U8 *s, *p, *e;
- UV uv;
- PPCODE:
- s = (U8*)SvPV(src,srclen);
- if (!SvUTF8(src)) {
- SV* tmpsv = sv_mortalcopy(src);
- if (!SvPOK(tmpsv))
- (void)sv_pvn_force(tmpsv,&srclen);
- sv_utf8_upgrade(tmpsv);
- s = (U8*)SvPV(tmpsv,srclen);
- }
- e = s + srclen;
-
- for (p = s; p < e; p += retlen) {
- uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero);
- XPUSHs(sv_2mortal(newSVuv(uv)));
- }
-
diff --git a/cpan/Unicode-Collate/Collate/Locale.pm b/cpan/Unicode-Collate/Collate/Locale.pm
index 39f04fcab5..5dddfb82a7 100644
--- a/cpan/Unicode-Collate/Collate/Locale.pm
+++ b/cpan/Unicode-Collate/Collate/Locale.pm
@@ -4,11 +4,12 @@ use strict;
use Carp;
use base qw(Unicode::Collate);
-our $VERSION = '0.70';
+our $VERSION = '0.68';
use File::Spec;
(my $ModPath = $INC{'Unicode/Collate/Locale.pm'}) =~ s/\.pm$//;
+my $KeyPath = File::Spec->catfile('allkeys.txt');
my $PL_EXT = '.pl';
my %LocaleFile = map { ($_, $_) } qw(
@@ -70,6 +71,7 @@ sub new {
if (exists $hash{table}) {
croak "your table can't be used with Unicode::Collate::Locale";
}
+ $hash{table} = $KeyPath;
my $href = _fetchpl($hash{accepted_locale});
while (my($k,$v) = each %$href) {
@@ -295,7 +297,7 @@ tailored as well as it. For example, even though W is tailored,
fullwidth W (C<U+FF37>), W with acute (C<U+1E82>), etc. are not
tailored. The result may depend on whether source strings are
normalized or not, and whether decomposed or composed.
-Thus C<(normalization =E<gt> undef)> is less preferred.
+Thus C<(normalization =E<gt> undef> is less preferred.
=back
@@ -303,7 +305,7 @@ Thus C<(normalization =E<gt> undef)> is less preferred.
The Unicode::Collate::Locale module for perl was written
by SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>.
-This module is Copyright(C) 2004-2011, SADAHIRO Tomoyuki. Japan.
+This module is Copyright(C) 2004-2010, SADAHIRO Tomoyuki. Japan.
All rights reserved.
This module is free software; you can redistribute it and/or
diff --git a/cpan/Unicode-Collate/Makefile.PL b/cpan/Unicode-Collate/Makefile.PL
deleted file mode 100644
index 30d6fc0aee..0000000000
--- a/cpan/Unicode-Collate/Makefile.PL
+++ /dev/null
@@ -1,28 +0,0 @@
-require 5.006001;
-use ExtUtils::MakeMaker;
-
-my $clean = {};
-
-if (-f "Collate.xs") {
- print STDERR "Making header files for XS...\n";
-
- do 'mkheader' or die $@ || "mkheader: $!";
-
- $clean = { FILES => 'ucatbl.h' };
-}
-
-WriteMakefile(
- 'INSTALLDIRS' => $] >= 5.007002 ? 'perl' : 'site',
- 'NAME' => 'Unicode::Collate',
- 'VERSION_FROM' => 'Collate.pm', # finds $VERSION
- 'clean' => $clean,
- 'PREREQ_PM' => {
- Carp => 0,
- constant => 0,
- DynaLoader => 0,
- File::Spec => 0,
- strict => 0,
- Test => 0,
- warnings => 0,
- },
-);
diff --git a/cpan/Unicode-Collate/README b/cpan/Unicode-Collate/README
index 7142c5fb2c..16bf8c4aa7 100644
--- a/cpan/Unicode-Collate/README
+++ b/cpan/Unicode-Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.70
+Unicode/Collate version 0.68
===============================
NAME
@@ -40,7 +40,6 @@ INSTALL
gendata/*, and mklocale.
Tests for Unicode::Collate::Locale are named t/loc_*.t.
-Since 0.54, XSUB that requires a C compiler will be built by default.
To install this module type the following:
perl Makefile.PL
@@ -48,20 +47,20 @@ To install this module type the following:
make test
make install
-Even if a C compiler is not available, pure Perl (i.e. non-XS) edition
-is available; type the following:
+If you have a C compiler and want to use XSUB edition,
+type the following (!! "enableXS" must run before "Makefile.PL" !!):
- perl disableXS
+ perl enableXS
perl Makefile.PL
make
make test
make install
-If you decide to install XSUB edition after trying to build pure Perl,
-type the following:
+If you decide to install pure Perl (i.e. non-XS) edition after trying
+to build XSUB, type the following:
make clean
- perl enableXS
+ perl disableXS
perl Makefile.PL
make
make test
@@ -108,7 +107,7 @@ HOW TO CHANGE DUCET (NOT WARRANTED)
AUTHOR, COPYRIGHT AND LICENSE
The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
-<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2011,
+<SADAHIRO@cpan.org>. This module is Copyright(C) 2001-2010,
SADAHIRO Tomoyuki. Japan. All rights reserved.
This module is free software; you can redistribute it and/or
diff --git a/cpan/Unicode-Collate/mkheader b/cpan/Unicode-Collate/mkheader
deleted file mode 100644
index dde4ee110c..0000000000
--- a/cpan/Unicode-Collate/mkheader
+++ /dev/null
@@ -1,196 +0,0 @@
-#!perl
-#
-# This auxiliary script makes five header files
-# used for building XSUB of Unicode::Collate.
-#
-# Usage:
-# <do 'mkheader'> in perl, or <perl mkheader> in command line
-#
-# Input file:
-# Collate/allkeys.txt
-#
-# Output file:
-# ucatbl.h
-#
-use 5.006;
-use strict;
-use warnings;
-use Carp;
-use File::Spec;
-
-BEGIN {
- unless ("A" eq pack('U', 0x41)) {
- die "Unicode::Collate cannot stringify a Unicode code point\n";
- }
-}
-
-use constant TRUE => 1;
-use constant FALSE => "";
-use constant VCE_TEMPLATE => 'Cn4';
-
-sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
-
-our $PACKAGE = 'Unicode::Collate, mkheader';
-our $prefix = "UCA_";
-
-our %SimpleEntries; # $codepoint => $keys
-our @Rest;
-
-{
- my($f, $fh);
- foreach my $d ('.') {
- $f = File::Spec->catfile($d, "Collate", "allkeys.txt");
- last if open($fh, $f);
- $f = undef;
- }
- croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
-
- while (my $line = <$fh>) {
- next if $line =~ /^\s*#/;
- if ($line =~ /^\s*\@/) {
- push @Rest, $line;
- next;
- }
-
- next if $line !~ /^\s*[0-9A-Fa-f]/;
-
- $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
-
- # gets element
- my($e, $k) = split /;/, $line;
-
- croak "Wrong Entry: <charList> must be separated by ';' ".
- "from <collElement>" if ! $k;
-
- my @uv = _getHexArray($e);
- next if !@uv;
-
- if (@uv != 1) {
- push @Rest, $line;
- next;
- }
-
- my $is_L3_ignorable = TRUE;
-
- my @key;
- foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
- my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
- my @wt = _getHexArray($arr);
- push @key, pack(VCE_TEMPLATE, $var, @wt);
- $is_L3_ignorable = FALSE
- if $wt[0] || $wt[1] || $wt[2];
- # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
- # is completely ignorable.
- # For expansion, an entry $is_L3_ignorable
- # if and only if "all" CEs are [.0000.0000.0000].
- }
- my $mapping = $is_L3_ignorable ? [] : \@key;
- my $num = @$mapping;
- my $str = chr($num).join('', @$mapping);
- $SimpleEntries{$uv[0]} = stringify($str);
- }
-}
-
-sub stringify {
- my $str = shift;
- return sprintf '"%s"', join '',
- map sprintf("\\x%02x", ord $_), split //, $str;
-
-}
-
-########## writing header files ##########
-
-my $init = '';
-{
- my $type = "char*";
- my $head = $prefix."rest";
-
- $init .= "static $type $head [] = {\n";
- for my $line (@Rest) {
- $line =~ s/\s*\z//;
- next if $line eq '';
- $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
- $init .= "($type)".stringify($line).",\n";
- }
- $init .= "NULL\n"; # sentinel
- $init .= "};\n\n";
-}
-
-my @tripletable = (
- {
- file => "ucatbl",
- name => "simple",
- type => "char*",
- hash => \%SimpleEntries,
- null => "NULL",
- init => $init,
- },
-);
-
-foreach my $tbl (@tripletable) {
- my $file = "$tbl->{file}.h";
- my $head = "${prefix}$tbl->{name}";
- my $type = $tbl->{type};
- my $hash = $tbl->{hash};
- my $null = $tbl->{null};
- my $init = $tbl->{init};
-
- open FH, ">$file" or croak "$PACKAGE: $file can't be made";
- binmode FH; select FH;
- my %val;
-
- print FH << 'EOF';
-/*
- * This file is auto-generated by mkheader.
- * Any changes here will be lost!
- */
-EOF
-
- print $init if defined $init;
-
- foreach my $uv (keys %$hash) {
- croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
- unless $uv <= 0x10FFFF;
- my @c = unpack 'CCCC', pack 'N', $uv;
- $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
- }
-
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- for (my $r = 0; $r < 256; $r++) {
- next if ! $val{ $p }{ $r };
- printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
- for (my $c = 0; $c < 256; $c++) {
- print "\t", defined $val{$p}{$r}{$c}
- ? "($type)".$val{$p}{$r}{$c}
- : $null;
- print ',' if $c != 255;
- print "\n" if $c % 8 == 7;
- }
- print "};\n\n";
- }
- }
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- printf "static $type* ${head}_%02x [256] = {\n", $p;
- for (my $r = 0; $r < 256; $r++) {
- print $val{ $p }{ $r }
- ? sprintf("${head}_%02x_%02x", $p, $r)
- : "NULL";
- print ',' if $r != 255;
- print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
- }
- print "};\n\n";
- }
- print "static $type** $head [] = {\n";
- for (my $p = 0; $p <= 0x10; $p++) {
- print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
- print ',' if $p != 0x10;
- print "\n";
- }
- print "};\n\n";
- close FH;
-}
-
-1;
-__END__
diff --git a/cpan/Unicode-Collate/t/loc_test.t b/cpan/Unicode-Collate/t/loc_test.t
index 60c9773af3..d1b5b4a1e4 100644
--- a/cpan/Unicode-Collate/t/loc_test.t
+++ b/cpan/Unicode-Collate/t/loc_test.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 120 };
+BEGIN { plan tests => 116 };
use strict;
use warnings;
@@ -127,13 +127,3 @@ our @sortFr = $objFr->sort(@randFr);
ok("@sortFr" eq "@listFr");
# 116
-
-{
- my $keyXS = '__useXS'; # see Unicode::Collate internal
- my $UseXS = ref Unicode::Collate->new->{$keyXS};
- ok(ref($Collator->{$keyXS}), $UseXS);
- ok(ref($objFr ->{$keyXS}), $UseXS);
- ok(ref($objEs ->{$keyXS}), $UseXS);
- ok(ref($objEsT ->{$keyXS}), $UseXS);
-}
-# 120