summaryrefslogtreecommitdiff
path: root/cpan/Unicode-Normalize
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-11-11 14:52:39 +0100
committerKarl Williamson <khw@cpan.org>2016-11-11 17:51:30 +0100
commit3baae3fab5dea1469c2d040f5380bc2009bdeecb (patch)
treef20f6c9da2a60da215c786c01db84d23516914f5 /cpan/Unicode-Normalize
parentaf25b33d388e2824ad52b31c8f5c7bc722f02dd6 (diff)
downloadperl-3baae3fab5dea1469c2d040f5380bc2009bdeecb.tar.gz
Move Unicode-Normalize to dist/
p5p has taken over the maintenance of this module, so it should be in dist/
Diffstat (limited to 'cpan/Unicode-Normalize')
-rw-r--r--cpan/Unicode-Normalize/.gitignore1
-rw-r--r--cpan/Unicode-Normalize/Makefile.PL55
-rw-r--r--cpan/Unicode-Normalize/Normalize.pm635
-rw-r--r--cpan/Unicode-Normalize/Normalize.xs925
-rw-r--r--cpan/Unicode-Normalize/mkheader419
-rw-r--r--cpan/Unicode-Normalize/t/fcdc.t138
-rw-r--r--cpan/Unicode-Normalize/t/form.t84
-rw-r--r--cpan/Unicode-Normalize/t/func.t386
-rw-r--r--cpan/Unicode-Normalize/t/illegal.t85
-rw-r--r--cpan/Unicode-Normalize/t/norm.t145
-rw-r--r--cpan/Unicode-Normalize/t/null.t100
-rw-r--r--cpan/Unicode-Normalize/t/partial1.t120
-rw-r--r--cpan/Unicode-Normalize/t/partial2.t116
-rw-r--r--cpan/Unicode-Normalize/t/proto.t99
-rw-r--r--cpan/Unicode-Normalize/t/split.t147
-rw-r--r--cpan/Unicode-Normalize/t/test.t168
-rw-r--r--cpan/Unicode-Normalize/t/tie.t82
17 files changed, 0 insertions, 3705 deletions
diff --git a/cpan/Unicode-Normalize/.gitignore b/cpan/Unicode-Normalize/.gitignore
deleted file mode 100644
index 424c745c12..0000000000
--- a/cpan/Unicode-Normalize/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.h
diff --git a/cpan/Unicode-Normalize/Makefile.PL b/cpan/Unicode-Normalize/Makefile.PL
deleted file mode 100644
index 44a4b8ded8..0000000000
--- a/cpan/Unicode-Normalize/Makefile.PL
+++ /dev/null
@@ -1,55 +0,0 @@
-require 5.006001;
-use ExtUtils::MakeMaker;
-
-my $clean = {};
-
-my $mm_ver = ExtUtils::MakeMaker->VERSION;
-
-if (-f "Normalize.xs") {
- print STDERR "Making header files for XS...\n";
-
- do 'mkheader' or die $@ || "mkheader: $!";
-
- $clean = { FILES => 'unfcan.h unfcmb.h unfcmp.h unfcpt.h unfexc.h' };
-}
-
-WriteMakefile(
- ($mm_ver < 6.58)
- ? ('AUTHOR' => 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>, Karl Williamson <khw@cpan.org>')
- : ('AUTHOR' => [
- 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>',
- 'Karl Williamson <khw@cpan.org>',
- ]),
- 'ABSTRACT' => 'Unicode Normalization Forms',
- 'INSTALLDIRS' => ($] >= 5.007002 && $] < 5.011) ? 'perl' : 'site',
- # see perl5110delta, @INC reorganization
- 'LICENSE' => 'perl',
- 'NAME' => 'Unicode::Normalize',
- 'VERSION_FROM' => 'Normalize.pm', # finds $VERSION
- 'clean' => $clean,
- 'depend' => { 'Normalize.o' => '$(H_FILES)' },
- 'PREREQ_PM' => {
- Carp => 0,
- constant => 0,
- DynaLoader => 0,
- Exporter => 0,
- File::Spec => 0,
- strict => 0,
- warnings => 0,
- SelectSaver => 0,
- },
- ($mm_ver < 6.48 ? () : MIN_PERL_VERSION => 5.6.0),
- ($mm_ver < 6.46 ? () : (META_MERGE => {
- 'meta-spec' => { version => 2 },
- resources => {
- repository => {
- url => 'https://github.com/khwilliamson/Unicode-Normalize.git',
- web => 'https://github.com/khwilliamson/Unicode-Normalize',
- type => 'git',
- },
- bugtracker => {
- web => 'https://rt.cpan.org/Public/Dist/Display.html?Name=Unicode-Normalize',
- },
- },
- })),
-);
diff --git a/cpan/Unicode-Normalize/Normalize.pm b/cpan/Unicode-Normalize/Normalize.pm
deleted file mode 100644
index ff6c0f0dc4..0000000000
--- a/cpan/Unicode-Normalize/Normalize.pm
+++ /dev/null
@@ -1,635 +0,0 @@
-package Unicode::Normalize;
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- die "Unicode::Normalize cannot stringify a Unicode code point\n";
- }
- unless (0x41 == unpack('U', 'A')) {
- die "Unicode::Normalize cannot get Unicode code point\n";
- }
-}
-
-use 5.006;
-use strict;
-use warnings;
-use Carp;
-
-no warnings 'utf8';
-
-our $VERSION = '1.25';
-our $PACKAGE = __PACKAGE__;
-
-our @EXPORT = qw( NFC NFD NFKC NFKD );
-our @EXPORT_OK = qw(
- normalize decompose reorder compose
- checkNFD checkNFKD checkNFC checkNFKC check
- getCanon getCompat getComposite getCombinClass
- isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
- isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
- FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter
- normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial
-);
-our %EXPORT_TAGS = (
- all => [ @EXPORT, @EXPORT_OK ],
- normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
- check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
- fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ],
-);
-
-##
-## utilities for tests
-##
-
-sub pack_U {
- return pack('U*', @_);
-}
-
-sub unpack_U {
-
- # The empty pack returns an empty UTF-8 string, so the effect is to force
- # the shifted parameter into being UTF-8. This allows this to work on
- # Perl 5.6, where there is no utf8::upgrade().
- return unpack('U*', shift(@_).pack('U*'));
-}
-
-require Exporter;
-
-##### The above part is common to XS and PP #####
-
-our @ISA = qw(Exporter DynaLoader);
-require DynaLoader;
-bootstrap Unicode::Normalize $VERSION;
-
-##### The below part is common to XS and PP #####
-
-##
-## normalize
-##
-
-sub FCD ($) {
- my $str = shift;
- return checkFCD($str) ? $str : NFD($str);
-}
-
-our %formNorm = (
- NFC => \&NFC, C => \&NFC,
- NFD => \&NFD, D => \&NFD,
- NFKC => \&NFKC, KC => \&NFKC,
- NFKD => \&NFKD, KD => \&NFKD,
- FCD => \&FCD, FCC => \&FCC,
-);
-
-sub normalize($$)
-{
- my $form = shift;
- my $str = shift;
- if (exists $formNorm{$form}) {
- return $formNorm{$form}->($str);
- }
- croak($PACKAGE."::normalize: invalid form name: $form");
-}
-
-##
-## partial
-##
-
-sub normalize_partial ($$) {
- if (exists $formNorm{$_[0]}) {
- my $n = normalize($_[0], $_[1]);
- my($p, $u) = splitOnLastStarter($n);
- $_[1] = $u;
- return $p;
- }
- croak($PACKAGE."::normalize_partial: invalid form name: $_[0]");
-}
-
-sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) }
-sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) }
-sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) }
-sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) }
-
-##
-## check
-##
-
-our %formCheck = (
- NFC => \&checkNFC, C => \&checkNFC,
- NFD => \&checkNFD, D => \&checkNFD,
- NFKC => \&checkNFKC, KC => \&checkNFKC,
- NFKD => \&checkNFKD, KD => \&checkNFKD,
- FCD => \&checkFCD, FCC => \&checkFCC,
-);
-
-sub check($$)
-{
- my $form = shift;
- my $str = shift;
- if (exists $formCheck{$form}) {
- return $formCheck{$form}->($str);
- }
- croak($PACKAGE."::check: invalid form name: $form");
-}
-
-1;
-__END__
-
-=head1 NAME
-
-Unicode::Normalize - Unicode Normalization Forms
-
-=head1 SYNOPSIS
-
-(1) using function names exported by default:
-
- use Unicode::Normalize;
-
- $NFD_string = NFD($string); # Normalization Form D
- $NFC_string = NFC($string); # Normalization Form C
- $NFKD_string = NFKD($string); # Normalization Form KD
- $NFKC_string = NFKC($string); # Normalization Form KC
-
-(2) using function names exported on request:
-
- use Unicode::Normalize 'normalize';
-
- $NFD_string = normalize('D', $string); # Normalization Form D
- $NFC_string = normalize('C', $string); # Normalization Form C
- $NFKD_string = normalize('KD', $string); # Normalization Form KD
- $NFKC_string = normalize('KC', $string); # Normalization Form KC
-
-=head1 DESCRIPTION
-
-Parameters:
-
-C<$string> is used as a string under character semantics (see F<perlunicode>).
-
-C<$code_point> should be an unsigned integer representing a Unicode code point.
-
-Note: Between XSUB and pure Perl, there is an incompatibility
-about the interpretation of C<$code_point> as a decimal number.
-XSUB converts C<$code_point> to an unsigned integer, but pure Perl does not.
-Do not use a floating point nor a negative sign in C<$code_point>.
-
-=head2 Normalization Forms
-
-=over 4
-
-=item C<$NFD_string = NFD($string)>
-
-It returns the Normalization Form D (formed by canonical decomposition).
-
-=item C<$NFC_string = NFC($string)>
-
-It returns the Normalization Form C (formed by canonical decomposition
-followed by canonical composition).
-
-=item C<$NFKD_string = NFKD($string)>
-
-It returns the Normalization Form KD (formed by compatibility decomposition).
-
-=item C<$NFKC_string = NFKC($string)>
-
-It returns the Normalization Form KC (formed by compatibility decomposition
-followed by B<canonical> composition).
-
-=item C<$FCD_string = FCD($string)>
-
-If the given string is in FCD ("Fast C or D" form; cf. UTN #5),
-it returns the string without modification; otherwise it returns an FCD string.
-
-Note: FCD is not always unique, then plural forms may be equivalent
-each other. C<FCD()> will return one of these equivalent forms.
-
-=item C<$FCC_string = FCC($string)>
-
-It returns the FCC form ("Fast C Contiguous"; cf. UTN #5).
-
-Note: FCC is unique, as well as four normalization forms (NF*).
-
-=item C<$normalized_string = normalize($form_name, $string)>
-
-It returns the normalization form of C<$form_name>.
-
-As C<$form_name>, one of the following names must be given.
-
- 'C' or 'NFC' for Normalization Form C (UAX #15)
- 'D' or 'NFD' for Normalization Form D (UAX #15)
- 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
- 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
-
- 'FCD' for "Fast C or D" Form (UTN #5)
- 'FCC' for "Fast C Contiguous" (UTN #5)
-
-=back
-
-=head2 Decomposition and Composition
-
-=over 4
-
-=item C<$decomposed_string = decompose($string [, $useCompatMapping])>
-
-It returns the concatenation of the decomposition of each character
-in the string.
-
-If the second parameter (a boolean) is omitted or false,
-the decomposition is canonical decomposition;
-if the second parameter (a boolean) is true,
-the decomposition is compatibility decomposition.
-
-The string returned is not always in NFD/NFKD. Reordering may be required.
-
- $NFD_string = reorder(decompose($string)); # eq. to NFD()
- $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD()
-
-=item C<$reordered_string = reorder($string)>
-
-It returns the result of reordering the combining characters
-according to Canonical Ordering Behavior.
-
-For example, when you have a list of NFD/NFKD strings,
-you can get the concatenated NFD/NFKD string from them, by saying
-
- $concat_NFD = reorder(join '', @NFD_strings);
- $concat_NFKD = reorder(join '', @NFKD_strings);
-
-=item C<$composed_string = compose($string)>
-
-It returns the result of canonical composition
-without applying any decomposition.
-
-For example, when you have a NFD/NFKD string,
-you can get its NFC/NFKC string, by saying
-
- $NFC_string = compose($NFD_string);
- $NFKC_string = compose($NFKD_string);
-
-=item C<($processed, $unprocessed) = splitOnLastStarter($normalized)>
-
-It returns two strings: the first one, C<$processed>, is a part
-before the last starter, and the second one, C<$unprocessed> is
-another part after the first part. A starter is a character having
-a combining class of zero (see UAX #15).
-
-Note that C<$processed> may be empty (when C<$normalized> contains no
-starter or starts with the last starter), and then C<$unprocessed>
-should be equal to the entire C<$normalized>.
-
-When you have a C<$normalized> string and an C<$unnormalized> string
-following it, a simple concatenation is wrong:
-
- $concat = $normalized . normalize($form, $unnormalized); # wrong!
-
-Instead of it, do like this:
-
- ($processed, $unprocessed) = splitOnLastStarter($normalized);
- $concat = $processed . normalize($form, $unprocessed.$unnormalized);
-
-C<splitOnLastStarter()> should be called with a pre-normalized parameter
-C<$normalized>, that is in the same form as C<$form> you want.
-
-If you have an array of C<@string> that should be concatenated and then
-normalized, you can do like this:
-
- my $result = "";
- my $unproc = "";
- foreach my $str (@string) {
- $unproc .= $str;
- my $n = normalize($form, $unproc);
- my($p, $u) = splitOnLastStarter($n);
- $result .= $p;
- $unproc = $u;
- }
- $result .= $unproc;
- # instead of normalize($form, join('', @string))
-
-=item C<$processed = normalize_partial($form, $unprocessed)>
-
-A wrapper for the combination of C<normalize()> and C<splitOnLastStarter()>.
-Note that C<$unprocessed> will be modified as a side-effect.
-
-If you have an array of C<@string> that should be concatenated and then
-normalized, you can do like this:
-
- my $result = "";
- my $unproc = "";
- foreach my $str (@string) {
- $unproc .= $str;
- $result .= normalize_partial($form, $unproc);
- }
- $result .= $unproc;
- # instead of normalize($form, join('', @string))
-
-=item C<$processed = NFD_partial($unprocessed)>
-
-It does like C<normalize_partial('NFD', $unprocessed)>.
-Note that C<$unprocessed> will be modified as a side-effect.
-
-=item C<$processed = NFC_partial($unprocessed)>
-
-It does like C<normalize_partial('NFC', $unprocessed)>.
-Note that C<$unprocessed> will be modified as a side-effect.
-
-=item C<$processed = NFKD_partial($unprocessed)>
-
-It does like C<normalize_partial('NFKD', $unprocessed)>.
-Note that C<$unprocessed> will be modified as a side-effect.
-
-=item C<$processed = NFKC_partial($unprocessed)>
-
-It does like C<normalize_partial('NFKC', $unprocessed)>.
-Note that C<$unprocessed> will be modified as a side-effect.
-
-=back
-
-=head2 Quick Check
-
-(see Annex 8, UAX #15; and F<DerivedNormalizationProps.txt>)
-
-The following functions check whether the string is in that normalization form.
-
-The result returned will be one of the following:
-
- YES The string is in that normalization form.
- NO The string is not in that normalization form.
- MAYBE Dubious. Maybe yes, maybe no.
-
-=over 4
-
-=item C<$result = checkNFD($string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>.
-
-=item C<$result = checkNFC($string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
-C<undef> if C<MAYBE>.
-
-=item C<$result = checkNFKD($string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>.
-
-=item C<$result = checkNFKC($string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
-C<undef> if C<MAYBE>.
-
-=item C<$result = checkFCD($string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>.
-
-=item C<$result = checkFCC($string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
-C<undef> if C<MAYBE>.
-
-Note: If a string is not in FCD, it must not be in FCC.
-So C<checkFCC($not_FCD_string)> should return C<NO>.
-
-=item C<$result = check($form_name, $string)>
-
-It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
-C<undef> if C<MAYBE>.
-
-As C<$form_name>, one of the following names must be given.
-
- 'C' or 'NFC' for Normalization Form C (UAX #15)
- 'D' or 'NFD' for Normalization Form D (UAX #15)
- 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
- 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
-
- 'FCD' for "Fast C or D" Form (UTN #5)
- 'FCC' for "Fast C Contiguous" (UTN #5)
-
-=back
-
-B<Note>
-
-In the cases of NFD, NFKD, and FCD, the answer must be
-either C<YES> or C<NO>. The answer C<MAYBE> may be returned
-in the cases of NFC, NFKC, and FCC.
-
-A C<MAYBE> string should contain at least one combining character
-or the like. For example, C<COMBINING ACUTE ACCENT> has
-the MAYBE_NFC/MAYBE_NFKC property.
-
-Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")>
-and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>.
-C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC
-(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">),
-while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
-
-If you want to check exactly, compare the string with its NFC/NFKC/FCC.
-
- if ($string eq NFC($string)) {
- # $string is exactly normalized in NFC;
- } else {
- # $string is not normalized in NFC;
- }
-
- if ($string eq NFKC($string)) {
- # $string is exactly normalized in NFKC;
- } else {
- # $string is not normalized in NFKC;
- }
-
-=head2 Character Data
-
-These functions are interface of character data used internally.
-If you want only to get Unicode normalization forms, you don't need
-call them yourself.
-
-=over 4
-
-=item C<$canonical_decomposition = getCanon($code_point)>
-
-If the character is canonically decomposable (including Hangul Syllables),
-it returns the (full) canonical decomposition as a string.
-Otherwise it returns C<undef>.
-
-B<Note:> According to the Unicode standard, the canonical decomposition
-of the character that is not canonically decomposable is same as
-the character itself.
-
-=item C<$compatibility_decomposition = getCompat($code_point)>
-
-If the character is compatibility decomposable (including Hangul Syllables),
-it returns the (full) compatibility decomposition as a string.
-Otherwise it returns C<undef>.
-
-B<Note:> According to the Unicode standard, the compatibility decomposition
-of the character that is not compatibility decomposable is same as
-the character itself.
-
-=item C<$code_point_composite = getComposite($code_point_here, $code_point_next)>
-
-If two characters here and next (as code points) are composable
-(including Hangul Jamo/Syllables and Composition Exclusions),
-it returns the code point of the composite.
-
-If they are not composable, it returns C<undef>.
-
-=item C<$combining_class = getCombinClass($code_point)>
-
-It returns the combining class (as an integer) of the character.
-
-=item C<$may_be_composed_with_prev_char = isComp2nd($code_point)>
-
-It returns a boolean whether the character of the specified codepoint
-may be composed with the previous one in a certain composition
-(including Hangul Compositions, but excluding
-Composition Exclusions and Non-Starter Decompositions).
-
-=item C<$is_exclusion = isExclusion($code_point)>
-
-It returns a boolean whether the code point is a composition exclusion.
-
-=item C<$is_singleton = isSingleton($code_point)>
-
-It returns a boolean whether the code point is a singleton
-
-=item C<$is_non_starter_decomposition = isNonStDecomp($code_point)>
-
-It returns a boolean whether the code point has Non-Starter Decomposition.
-
-=item C<$is_Full_Composition_Exclusion = isComp_Ex($code_point)>
-
-It returns a boolean of the derived property Comp_Ex
-(Full_Composition_Exclusion). This property is generated from
-Composition Exclusions + Singletons + Non-Starter Decompositions.
-
-=item C<$NFD_is_NO = isNFD_NO($code_point)>
-
-It returns a boolean of the derived property NFD_NO
-(NFD_Quick_Check=No).
-
-=item C<$NFC_is_NO = isNFC_NO($code_point)>
-
-It returns a boolean of the derived property NFC_NO
-(NFC_Quick_Check=No).
-
-=item C<$NFC_is_MAYBE = isNFC_MAYBE($code_point)>
-
-It returns a boolean of the derived property NFC_MAYBE
-(NFC_Quick_Check=Maybe).
-
-=item C<$NFKD_is_NO = isNFKD_NO($code_point)>
-
-It returns a boolean of the derived property NFKD_NO
-(NFKD_Quick_Check=No).
-
-=item C<$NFKC_is_NO = isNFKC_NO($code_point)>
-
-It returns a boolean of the derived property NFKC_NO
-(NFKC_Quick_Check=No).
-
-=item C<$NFKC_is_MAYBE = isNFKC_MAYBE($code_point)>
-
-It returns a boolean of the derived property NFKC_MAYBE
-(NFKC_Quick_Check=Maybe).
-
-=back
-
-=head1 EXPORT
-
-C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
-
-C<normalize> and other some functions: on request.
-
-=head1 CAVEATS
-
-=over 4
-
-=item Perl's version vs. Unicode version
-
-Since this module refers to perl core's Unicode database in the directory
-F</lib/unicore> (or formerly F</lib/unicode>), the Unicode version of
-normalization implemented by this module depends on what has been
-compiled into your perl. The following table lists the default Unicode
-version that comes with various perl versions. (It is possible to change
-the Unicode version in any perl version to be any earlier Unicode version,
-so one could cause Unicode 3.2 to be used in any perl version starting with
-5.8.0. See C<$Config{privlib}>/F<unicore/README.perl>.
-
- perl's version implemented Unicode version
- 5.6.1 3.0.1
- 5.7.2 3.1.0
- 5.7.3 3.1.1 (normalization is same as 3.1.0)
- 5.8.0 3.2.0
- 5.8.1-5.8.3 4.0.0
- 5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0)
- 5.8.7-5.8.8 4.1.0
- 5.10.0 5.0.0
- 5.8.9, 5.10.1 5.1.0
- 5.12.x 5.2.0
- 5.14.x 6.0.0
- 5.16.x 6.1.0
- 5.18.x 6.2.0
- 5.20.x 6.3.0
- 5.22.x 7.0.0
-
-=item Correction of decomposition mapping
-
-In older Unicode versions, a small number of characters (all of which are
-CJK compatibility ideographs as far as they have been found) may have
-an erroneous decomposition mapping (see F<NormalizationCorrections.txt>).
-Anyhow, this module will neither refer to F<NormalizationCorrections.txt>
-nor provide any specific version of normalization. Therefore this module
-running on an older perl with an older Unicode database may use
-the erroneous decomposition mapping blindly conforming to the Unicode database.
-
-=item Revised definition of canonical composition
-
-In Unicode 4.1.0, the definition D2 of canonical composition (which
-affects NFC and NFKC) has been changed (see Public Review Issue #29
-and recent UAX #15). This module has used the newer definition
-since the version 0.07 (Oct 31, 2001).
-This module will not support the normalization according to the older
-definition, even if the Unicode version implemented by perl is
-lower than 4.1.0.
-
-=back
-
-=head1 AUTHOR
-
-SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
-
-Currently maintained by <perl5-porters@perl.org>
-
-Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved.
-
-=head1 LICENSE
-
-This module is free software; you can redistribute it
-and/or modify it under the same terms as Perl itself.
-
-=head1 SEE ALSO
-
-=over 4
-
-=item http://www.unicode.org/reports/tr15/
-
-Unicode Normalization Forms - UAX #15
-
-=item http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt
-
-Composition Exclusion Table
-
-=item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt
-
-Derived Normalization Properties
-
-=item http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt
-
-Normalization Corrections
-
-=item http://www.unicode.org/review/pr-29.html
-
-Public Review Issue #29: Normalization Issue
-
-=item http://www.unicode.org/notes/tn5/
-
-Canonical Equivalence in Applications - UTN #5
-
-=back
-
-=cut
diff --git a/cpan/Unicode-Normalize/Normalize.xs b/cpan/Unicode-Normalize/Normalize.xs
deleted file mode 100644
index 4acff7fe49..0000000000
--- a/cpan/Unicode-Normalize/Normalize.xs
+++ /dev/null
@@ -1,925 +0,0 @@
-
-#define PERL_NO_GET_CONTEXT /* we want efficiency */
-
-/* private functions which need pTHX_ and aTHX_
- pv_cat_decompHangul
- sv_2pvunicode
- pv_utf8_decompose
- pv_utf8_reorder
- pv_utf8_compose
-*/
-
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-
-/* These 5 files are prepared by mkheader */
-#include "unfcmb.h"
-#include "unfcan.h"
-#include "unfcpt.h"
-#include "unfcmp.h"
-#include "unfexc.h"
-
-/* The generated normalization tables since v5.20 are in native character set
- * terms. Prior to that, they were in Unicode terms. So we use 'uvchr' for
- * later perls, and redefine that to be 'uvuni' for earlier ones */
-#if PERL_VERSION < 20
-# undef uvchr_to_utf8
-# ifdef uvuni_to_utf8
-# define uvchr_to_utf8 uvuni_to_utf8
-# else /* Perl 5.6.1 */
-# define uvchr_to_utf8 uv_to_utf8
-# endif
-
-# undef utf8n_to_uvchr
-# ifdef utf8n_to_uvuni
-# define utf8n_to_uvchr utf8n_to_uvuni
-# else /* Perl 5.6.1 */
-# define utf8n_to_uvchr utf8_to_uv
-# endif
-#endif
-
-/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
-#ifndef UTF8_ALLOW_BOM
-#define UTF8_ALLOW_BOM (0)
-#endif /* UTF8_ALLOW_BOM */
-
-#ifndef UTF8_ALLOW_SURROGATE
-#define UTF8_ALLOW_SURROGATE (0)
-#endif /* UTF8_ALLOW_SURROGATE */
-
-#ifndef UTF8_ALLOW_FE_FF
-#define UTF8_ALLOW_FE_FF (0)
-#endif /* UTF8_ALLOW_FE_FF */
-
-#ifndef UTF8_ALLOW_FFFF
-#define UTF8_ALLOW_FFFF (0)
-#endif /* UTF8_ALLOW_FFFF */
-
-#ifndef PERL_UNUSED_VAR
-# define PERL_UNUSED_VAR(x) ((void)sizeof(x))
-#endif
-
-#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
-
-/* check if the string buffer is enough before uvchr_to_utf8(). */
-/* dstart, d, and dlen should be defined outside before. */
-#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \
- if (dlen < curlen + (need)) { \
- dlen += (need); \
- Renew(dstart, dlen+1, U8); \
- d = dstart + curlen; \
- }
-
-/* if utf8n_to_uvchr() sets retlen to 0 (if broken?) */
-#define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character"
-
-/* utf8_hop() hops back before start. Maybe broken UTF-8 */
-#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
-
-/* At present, char > 0x10ffff are unaffected without complaint, right? */
-#define VALID_UTF_MAX (0x10ffff)
-#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
-
-/* size of array for combining characters */
-/* enough as an initial value? */
-#define CC_SEQ_SIZE (10)
-#define CC_SEQ_STEP (5)
-
-/* HANGUL begin */
-#define Hangul_SBase 0xAC00
-#define Hangul_SFinal 0xD7A3
-#define Hangul_SCount 11172
-
-#define Hangul_NCount 588
-
-#define Hangul_LBase 0x1100
-#define Hangul_LFinal 0x1112
-#define Hangul_LCount 19
-
-#define Hangul_VBase 0x1161
-#define Hangul_VFinal 0x1175
-#define Hangul_VCount 21
-
-#define Hangul_TBase 0x11A7
-#define Hangul_TFinal 0x11C2
-#define Hangul_TCount 28
-
-#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
-#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0)
-#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
-#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
-#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
-#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal))
-/* HANGUL end */
-
-/* this is used for canonical ordering of combining characters (c.c.). */
-typedef struct {
- U8 cc; /* combining class */
- UV uv; /* codepoint */
- STRLEN pos; /* position */
-} UNF_cc;
-
-static int compare_cc(const void *a, const void *b)
-{
- int ret_cc;
- ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc;
- if (ret_cc)
- return ret_cc;
-
- return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos )
- - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos );
-}
-
-static U8* dec_canonical(UV uv)
-{
- U8 ***plane, **row;
- if (OVER_UTF_MAX(uv))
- return NULL;
- plane = (U8***)UNF_canon[uv >> 16];
- if (! plane)
- return NULL;
- row = plane[(uv >> 8) & 0xff];
- return row ? row[uv & 0xff] : NULL;
-}
-
-static U8* dec_compat(UV uv)
-{
- U8 ***plane, **row;
- if (OVER_UTF_MAX(uv))
- return NULL;
- plane = (U8***)UNF_compat[uv >> 16];
- if (! plane)
- return NULL;
- row = plane[(uv >> 8) & 0xff];
- return row ? row[uv & 0xff] : NULL;
-}
-
-static UV composite_uv(UV uv, UV uv2)
-{
- UNF_complist ***plane, **row, *cell, *i;
-
- if (!uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2))
- return 0;
-
- if (Hangul_IsL(uv) && Hangul_IsV(uv2)) {
- UV lindex = uv - Hangul_LBase;
- UV vindex = uv2 - Hangul_VBase;
- return(Hangul_SBase + (lindex * Hangul_VCount + vindex) *
- Hangul_TCount);
- }
- if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
- UV tindex = uv2 - Hangul_TBase;
- return(uv + tindex);
- }
- plane = UNF_compos[uv >> 16];
- if (! plane)
- return 0;
- row = plane[(uv >> 8) & 0xff];
- if (! row)
- return 0;
- cell = row[uv & 0xff];
- if (! cell)
- return 0;
- for (i = cell; i->nextchar; i++) {
- if (uv2 == i->nextchar)
- return i->composite;
- }
- return 0;
-}
-
-static U8 getCombinClass(UV uv)
-{
- U8 **plane, *row;
- if (OVER_UTF_MAX(uv))
- return 0;
- plane = (U8**)UNF_combin[uv >> 16];
- if (! plane)
- return 0;
- row = plane[(uv >> 8) & 0xff];
- return row ? row[uv & 0xff] : 0;
-}
-
-static U8* pv_cat_decompHangul(pTHX_ U8* d, UV uv)
-{
- UV sindex = uv - Hangul_SBase;
- UV lindex = sindex / Hangul_NCount;
- UV vindex = (sindex % Hangul_NCount) / Hangul_TCount;
- UV tindex = sindex % Hangul_TCount;
-
- if (! Hangul_IsS(uv))
- return d;
-
- d = uvchr_to_utf8(d, (lindex + Hangul_LBase));
- d = uvchr_to_utf8(d, (vindex + Hangul_VBase));
- if (tindex)
- d = uvchr_to_utf8(d, (tindex + Hangul_TBase));
- return d;
-}
-
-static char* sv_2pvunicode(pTHX_ SV *sv, STRLEN *lp)
-{
- char *s;
- STRLEN len;
- s = SvPV(sv,len);
- if (!SvUTF8(sv)) {
- SV* tmpsv = sv_2mortal(newSVpvn(s, len));
- if (!SvPOK(tmpsv))
- s = SvPV_force(tmpsv,len);
- sv_utf8_upgrade(tmpsv);
- s = SvPV(tmpsv,len);
- }
- if (lp)
- *lp = len;
- return s;
-}
-
-static
-U8* pv_utf8_decompose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat)
-{
- U8* p = s;
- U8* e = s + slen;
- U8* dstart = *dp;
- U8* d = dstart;
-
- while (p < e) {
- STRLEN retlen;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero, "decompose");
- p += retlen;
-
- if (Hangul_IsS(uv)) {
- Renew_d_if_not_enough_to(UTF8_MAXLEN * 3)
- d = pv_cat_decompHangul(aTHX_ d, uv);
- }
- else {
- U8* r = iscompat ? dec_compat(uv) : dec_canonical(uv);
-
- if (r) {
- STRLEN len = (STRLEN)strlen((char *)r);
- Renew_d_if_not_enough_to(len)
- while (len--)
- *d++ = *r++;
- }
- else {
- Renew_d_if_not_enough_to(UTF8_MAXLEN)
- d = uvchr_to_utf8(d, uv);
- }
- }
- }
- *dp = dstart;
- return d;
-}
-
-static
-U8* pv_utf8_reorder(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen)
-{
- U8* p = s;
- U8* e = s + slen;
- U8* dstart = *dp;
- U8* d = dstart;
-
- UNF_cc seq_ary[CC_SEQ_SIZE];
- UNF_cc* seq_ptr = seq_ary; /* use array at the beginning */
- UNF_cc* seq_ext = NULL; /* extend if need */
- STRLEN seq_max = CC_SEQ_SIZE;
- STRLEN cc_pos = 0;
-
- while (p < e) {
- U8 curCC;
- STRLEN retlen;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero, "reorder");
- p += retlen;
-
- curCC = getCombinClass(uv);
-
- if (curCC != 0) {
- if (seq_max < cc_pos + 1) { /* extend if need */
- seq_max = cc_pos + CC_SEQ_STEP; /* new size */
- if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */
- STRLEN i;
- New(0, seq_ext, seq_max, UNF_cc);
- for (i = 0; i < cc_pos; i++)
- seq_ext[i] = seq_ary[i];
- }
- else {
- Renew(seq_ext, seq_max, UNF_cc);
- }
- seq_ptr = seq_ext; /* use seq_ext from now */
- }
-
- seq_ptr[cc_pos].cc = curCC;
- seq_ptr[cc_pos].uv = uv;
- seq_ptr[cc_pos].pos = cc_pos;
- ++cc_pos;
-
- if (p < e)
- continue;
- }
-
- /* output */
- if (cc_pos) {
- STRLEN i;
-
- if (cc_pos > 1) /* reordered if there are two c.c.'s */
- qsort((void*)seq_ptr, cc_pos, sizeof(UNF_cc), compare_cc);
-
- for (i = 0; i < cc_pos; i++) {
- Renew_d_if_not_enough_to(UTF8_MAXLEN)
- d = uvchr_to_utf8(d, seq_ptr[i].uv);
- }
- cc_pos = 0;
- }
-
- if (curCC == 0) {
- Renew_d_if_not_enough_to(UTF8_MAXLEN)
- d = uvchr_to_utf8(d, uv);
- }
- }
- if (seq_ext)
- Safefree(seq_ext);
- *dp = dstart;
- return d;
-}
-
-static
-U8* pv_utf8_compose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscontig)
-{
- U8* p = s;
- U8* e = s + slen;
- U8* dstart = *dp;
- U8* d = dstart;
-
- UV uvS = 0; /* code point of the starter */
- bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */
- U8 preCC = 0;
-
- UV seq_ary[CC_SEQ_SIZE];
- UV* seq_ptr = seq_ary; /* use array at the beginning */
- UV* seq_ext = NULL; /* extend if need */
- STRLEN seq_max = CC_SEQ_SIZE;
- STRLEN cc_pos = 0;
-
- while (p < e) {
- U8 curCC;
- STRLEN retlen;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero, "compose");
- p += retlen;
-
- curCC = getCombinClass(uv);
-
- if (!valid_uvS) {
- if (curCC == 0) {
- uvS = uv; /* the first Starter is found */
- valid_uvS = TRUE;
- if (p < e)
- continue;
- }
- else {
- Renew_d_if_not_enough_to(UTF8_MAXLEN)
- d = uvchr_to_utf8(d, uv);
- continue;
- }
- }
- else {
- bool composed;
-
- /* blocked */
- if ((iscontig && cc_pos) || /* discontiguous combination */
- (curCC != 0 && preCC == curCC) || /* blocked by same CC */
- (preCC > curCC)) /* blocked by higher CC: revised D2 */
- composed = FALSE;
-
- /* not blocked:
- iscontig && cc_pos == 0 -- contiguous combination
- curCC == 0 && preCC == 0 -- starter + starter
- curCC != 0 && preCC < curCC -- lower CC */
- else {
- /* try composition */
- UV uvComp = composite_uv(uvS, uv);
-
- if (uvComp && !isExclusion(uvComp)) {
- uvS = uvComp;
- composed = TRUE;
-
- /* preCC should not be changed to curCC */
- /* e.g. 1E14 = 0045 0304 0300 where CC(0304) == CC(0300) */
- if (p < e)
- continue;
- }
- else
- composed = FALSE;
- }
-
- if (!composed) {
- preCC = curCC;
- if (curCC != 0 || !(p < e)) {
- if (seq_max < cc_pos + 1) { /* extend if need */
- seq_max = cc_pos + CC_SEQ_STEP; /* new size */
- if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */
- New(0, seq_ext, seq_max, UV);
- Copy(seq_ary, seq_ext, cc_pos, UV);
- }
- else {
- Renew(seq_ext, seq_max, UV);
- }
- seq_ptr = seq_ext; /* use seq_ext from now */
- }
- seq_ptr[cc_pos] = uv;
- ++cc_pos;
- }
- if (curCC != 0 && p < e)
- continue;
- }
- }
-
- /* output */
- {
- Renew_d_if_not_enough_to(UTF8_MAXLEN)
- d = uvchr_to_utf8(d, uvS); /* starter (composed or not) */
- }
-
- if (cc_pos) {
- STRLEN i;
-
- for (i = 0; i < cc_pos; i++) {
- Renew_d_if_not_enough_to(UTF8_MAXLEN)
- d = uvchr_to_utf8(d, seq_ptr[i]);
- }
- cc_pos = 0;
- }
-
- uvS = uv;
- }
- if (seq_ext)
- Safefree(seq_ext);
- *dp = dstart;
- return d;
-}
-
-MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
-
-SV*
-decompose(src, compat = &PL_sv_no)
- SV * src
- SV * compat
- PROTOTYPE: $;$
- PREINIT:
- SV* dst;
- U8 *s, *d, *dend;
- STRLEN slen, dlen;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
- dst = newSVpvn("", 0);
- dlen = slen;
- New(0, d, dlen+1, U8);
- dend = pv_utf8_decompose(aTHX_ s, slen, &d, dlen, (bool)SvTRUE(compat));
- sv_setpvn(dst, (char *)d, dend - d);
- SvUTF8_on(dst);
- Safefree(d);
- RETVAL = dst;
- OUTPUT:
- RETVAL
-
-
-SV*
-reorder(src)
- SV * src
- PROTOTYPE: $
- PREINIT:
- SV* dst;
- U8 *s, *d, *dend;
- STRLEN slen, dlen;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
- dst = newSVpvn("", 0);
- dlen = slen;
- New(0, d, dlen+1, U8);
- dend = pv_utf8_reorder(aTHX_ s, slen, &d, dlen);
- sv_setpvn(dst, (char *)d, dend - d);
- SvUTF8_on(dst);
- Safefree(d);
- RETVAL = dst;
- OUTPUT:
- RETVAL
-
-
-SV*
-compose(src)
- SV * src
- PROTOTYPE: $
- ALIAS:
- composeContiguous = 1
- PREINIT:
- SV* dst;
- U8 *s, *d, *dend;
- STRLEN slen, dlen;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
- dst = newSVpvn("", 0);
- dlen = slen;
- New(0, d, dlen+1, U8);
- dend = pv_utf8_compose(aTHX_ s, slen, &d, dlen, (bool)ix);
- sv_setpvn(dst, (char *)d, dend - d);
- SvUTF8_on(dst);
- Safefree(d);
- RETVAL = dst;
- OUTPUT:
- RETVAL
-
-
-SV*
-NFD(src)
- SV * src
- PROTOTYPE: $
- ALIAS:
- NFKD = 1
- PREINIT:
- SV *dst;
- U8 *s, *t, *tend, *d, *dend;
- STRLEN slen, tlen, dlen;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
-
- /* decompose */
- tlen = slen;
- New(0, t, tlen+1, U8);
- tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1));
- *tend = '\0';
- tlen = tend - t; /* no longer know real size of t */
-
- /* reorder */
- dlen = tlen;
- New(0, d, dlen+1, U8);
- dend = pv_utf8_reorder(aTHX_ t, tlen, &d, dlen);
- *dend = '\0';
- dlen = dend - d; /* no longer know real size of d */
-
- /* return */
- dst = newSVpvn("", 0);
- sv_setpvn(dst, (char *)d, dlen);
- SvUTF8_on(dst);
-
- Safefree(t);
- Safefree(d);
- RETVAL = dst;
- OUTPUT:
- RETVAL
-
-
-SV*
-NFC(src)
- SV * src
- PROTOTYPE: $
- ALIAS:
- NFKC = 1
- FCC = 2
- PREINIT:
- SV *dst;
- U8 *s, *t, *tend, *u, *uend, *d, *dend;
- STRLEN slen, tlen, ulen, dlen;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
-
- /* decompose */
- tlen = slen;
- New(0, t, tlen+1, U8);
- tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1));
- *tend = '\0';
- tlen = tend - t; /* no longer know real size of t */
-
- /* reorder */
- ulen = tlen;
- New(0, u, ulen+1, U8);
- uend = pv_utf8_reorder(aTHX_ t, tlen, &u, ulen);
- *uend = '\0';
- ulen = uend - u; /* no longer know real size of u */
-
- /* compose */
- dlen = ulen;
- New(0, d, dlen+1, U8);
- dend = pv_utf8_compose(aTHX_ u, ulen, &d, dlen, (bool)(ix==2));
- *dend = '\0';
- dlen = dend - d; /* no longer know real size of d */
-
- /* return */
- dst = newSVpvn("", 0);
- sv_setpvn(dst, (char *)d, dlen);
- SvUTF8_on(dst);
-
- Safefree(t);
- Safefree(u);
- Safefree(d);
- RETVAL = dst;
- OUTPUT:
- RETVAL
-
-
-SV*
-checkNFD(src)
- SV * src
- PROTOTYPE: $
- ALIAS:
- checkNFKD = 1
- PREINIT:
- STRLEN srclen, retlen;
- U8 *s, *e, *p, curCC, preCC;
- bool result = TRUE;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
- e = s + srclen;
-
- preCC = 0;
- for (p = s; p < e; p += retlen) {
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero, "checkNFD or -NFKD");
-
- curCC = getCombinClass(uv);
- if (preCC > curCC && curCC != 0) { /* canonical ordering violated */
- result = FALSE;
- break;
- }
- if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) {
- result = FALSE;
- break;
- }
- preCC = curCC;
- }
- RETVAL = boolSV(result);
- OUTPUT:
- RETVAL
-
-
-SV*
-checkNFC(src)
- SV * src
- PROTOTYPE: $
- ALIAS:
- checkNFKC = 1
- PREINIT:
- STRLEN srclen, retlen;
- U8 *s, *e, *p, curCC, preCC;
- bool result = TRUE;
- bool isMAYBE = FALSE;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
- e = s + srclen;
-
- preCC = 0;
- for (p = s; p < e; p += retlen) {
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero, "checkNFC or -NFKC");
-
- curCC = getCombinClass(uv);
- if (preCC > curCC && curCC != 0) { /* canonical ordering violated */
- result = FALSE;
- break;
- }
-
- /* get NFC/NFKC property */
- if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */
- ; /* YES */
- else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) {
- result = FALSE;
- break;
- }
- else if (isComp2nd(uv))
- isMAYBE = TRUE;
- else if (ix) {
- char *canon, *compat;
- /* NFKC_NO when having compatibility mapping. */
- canon = (char *) dec_canonical(uv);
- compat = (char *) dec_compat(uv);
- if (compat && !(canon && strEQ(canon, compat))) {
- result = FALSE;
- break;
- }
- } /* end of get NFC/NFKC property */
-
- preCC = curCC;
- }
- if (isMAYBE && result) /* NO precedes MAYBE */
- XSRETURN_UNDEF;
- RETVAL = boolSV(result);
- OUTPUT:
- RETVAL
-
-
-SV*
-checkFCD(src)
- SV * src
- PROTOTYPE: $
- ALIAS:
- checkFCC = 1
- PREINIT:
- STRLEN srclen, retlen;
- U8 *s, *e, *p, curCC, preCC;
- bool result = TRUE;
- bool isMAYBE = FALSE;
- CODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
- e = s + srclen;
- preCC = 0;
- for (p = s; p < e; p += retlen) {
- U8 *sCan;
- UV uvLead;
- STRLEN canlen = 0;
- UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
- if (!retlen)
- croak(ErrRetlenIsZero, "checkFCD or -FCC");
-
- sCan = (U8*) dec_canonical(uv);
-
- if (sCan) {
- STRLEN canret;
- canlen = (STRLEN)strlen((char *) sCan);
- uvLead = utf8n_to_uvchr(sCan, canlen, &canret, AllowAnyUTF);
- if (!canret)
- croak(ErrRetlenIsZero, "checkFCD or -FCC");
- }
- else {
- uvLead = uv;
- }
-
- curCC = getCombinClass(uvLead);
-
- if (curCC != 0 && curCC < preCC) { /* canonical ordering violated */
- result = FALSE;
- break;
- }
-
- if (ix) {
- if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) {
- result = FALSE;
- break;
- }
- else if (isComp2nd(uv))
- isMAYBE = TRUE;
- }
-
- if (sCan) {
- STRLEN canret;
- UV uvTrail;
- U8* eCan = sCan + canlen;
- U8* pCan = utf8_hop(eCan, -1);
- if (pCan < sCan)
- croak(ErrHopBeforeStart);
- uvTrail = utf8n_to_uvchr(pCan, eCan - pCan, &canret, AllowAnyUTF);
- if (!canret)
- croak(ErrRetlenIsZero, "checkFCD or -FCC");
- preCC = getCombinClass(uvTrail);
- }
- else {
- preCC = curCC;
- }
- }
- if (isMAYBE && result) /* NO precedes MAYBE */
- XSRETURN_UNDEF;
- RETVAL = boolSV(result);
- OUTPUT:
- RETVAL
-
-
-U8
-getCombinClass(uv)
- UV uv
- PROTOTYPE: $
-
-bool
-isExclusion(uv)
- UV uv
- PROTOTYPE: $
-
-bool
-isSingleton(uv)
- UV uv
- PROTOTYPE: $
-
-bool
-isNonStDecomp(uv)
- UV uv
- PROTOTYPE: $
-
-bool
-isComp2nd(uv)
- UV uv
- PROTOTYPE: $
- ALIAS:
- isNFC_MAYBE = 1
- isNFKC_MAYBE = 2
- INIT:
- PERL_UNUSED_VAR(ix);
-
-SV*
-isNFD_NO(uv)
- UV uv
- PROTOTYPE: $
- ALIAS:
- isNFKD_NO = 1
- PREINIT:
- bool result = FALSE;
- CODE:
- if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
- result = TRUE; /* NFD_NO or NFKD_NO */
- RETVAL = boolSV(result);
- OUTPUT:
- RETVAL
-
-
-SV*
-isComp_Ex(uv)
- UV uv
- PROTOTYPE: $
- ALIAS:
- isNFC_NO = 0
- isNFKC_NO = 1
- PREINIT:
- bool result = FALSE;
- CODE:
- if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
- result = TRUE; /* NFC_NO or NFKC_NO */
- else if (ix) {
- char *canon, *compat;
- canon = (char *) dec_canonical(uv);
- compat = (char *) dec_compat(uv);
- if (compat && (!canon || strNE(canon, compat)))
- result = TRUE; /* NFC_NO or NFKC_NO */
- }
- RETVAL = boolSV(result);
- OUTPUT:
- RETVAL
-
-SV*
-getComposite(uv, uv2)
- UV uv
- UV uv2
- PROTOTYPE: $$
- PREINIT:
- UV composite;
- CODE:
- composite = composite_uv(uv, uv2);
- RETVAL = composite ? newSVuv(composite) : &PL_sv_undef;
- OUTPUT:
- RETVAL
-
-
-
-SV*
-getCanon(uv)
- UV uv
- PROTOTYPE: $
- ALIAS:
- getCompat = 1
- CODE:
- if (Hangul_IsS(uv)) {
- U8 tmp[3 * UTF8_MAXLEN + 1];
- U8 *t = tmp;
- U8 *e = pv_cat_decompHangul(aTHX_ t, uv);
- RETVAL = newSVpvn((char *)t, e - t);
- } else {
- U8* rstr = ix ? dec_compat(uv) : dec_canonical(uv);
- if (!rstr)
- XSRETURN_UNDEF;
- RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
- }
- SvUTF8_on(RETVAL);
- OUTPUT:
- RETVAL
-
-
-void
-splitOnLastStarter(src)
- SV * src
- PREINIT:
- SV *svp;
- STRLEN srclen;
- U8 *s, *e, *p;
- PPCODE:
- s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
- e = s + srclen;
- p = e;
- while (s < p) {
- UV uv;
- p = utf8_hop(p, -1);
- if (p < s)
- croak(ErrHopBeforeStart);
- uv = utf8n_to_uvchr(p, e - p, NULL, AllowAnyUTF);
- if (getCombinClass(uv) == 0) /* Last Starter found */
- break;
- }
-
- svp = sv_2mortal(newSVpvn((char*)s, p - s));
- SvUTF8_on(svp);
- XPUSHs(svp);
-
- svp = sv_2mortal(newSVpvn((char*)p, e - p));
- SvUTF8_on(svp);
- XPUSHs(svp);
-
diff --git a/cpan/Unicode-Normalize/mkheader b/cpan/Unicode-Normalize/mkheader
deleted file mode 100644
index 8d4c1b8e8d..0000000000
--- a/cpan/Unicode-Normalize/mkheader
+++ /dev/null
@@ -1,419 +0,0 @@
-#!perl
-#
-# This auxiliary script makes five header files
-# used for building XSUB of Unicode::Normalize.
-#
-# Usage:
-# <do 'mkheader'> in perl, or <perl mkheader> in command line
-#
-# Input files:
-# unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
-# unicore/Decomposition.pl (or unicode/Decomposition.pl)
-#
-# Output files:
-# unfcan.h
-# unfcpt.h
-# unfcmb.h
-# unfcmp.h
-# unfexc.h
-#
-use 5.006;
-use strict;
-use warnings;
-use Carp;
-use File::Spec;
-use SelectSaver;
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- die "Unicode::Normalize cannot stringify a Unicode code point\n";
- }
- unless (0x41 == unpack('U', 'A')) {
- die "Unicode::Normalize cannot get Unicode code point\n";
- }
-}
-
-our $PACKAGE = 'Unicode::Normalize, mkheader';
-
-our $prefix = "UNF_";
-our $structname = "${prefix}complist";
-
-# Starting in v5.20, the tables in lib/unicore are built using the platform's
-# native character set for code points 0-255.
-*pack_U = ($] ge 5.020)
- ? sub { return pack('W*', @_).pack('U*'); } # The empty pack returns
- # an empty UTF-8 string,
- # so the effect is to
- # force the return into
- # being UTF-8.
- : sub { return pack('U*', @_); };
-
-# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify()
-our %Comp1st; # $codepoint => $listname : may be composed with a next char.
-our %CompList; # $listname,$2nd => $codepoint : composite
-
-##### The below part is common to mkheader and PP #####
-
-our %Combin; # $codepoint => $number : combination class
-our %Canon; # $codepoint => \@codepoints : canonical decomp.
-our %Compat; # $codepoint => \@codepoints : compat. decomp.
-our %Compos; # $1st,$2nd => $codepoint : composite
-our %Exclus; # $codepoint => 1 : composition exclusions
-our %Single; # $codepoint => 1 : singletons
-our %NonStD; # $codepoint => 1 : non-starter decompositions
-our %Comp2nd; # $codepoint => 1 : may be composed with a prev char.
-
-# from core Unicode database
-our $Combin = do "unicore/CombiningClass.pl"
- || do "unicode/CombiningClass.pl"
- || croak "$PACKAGE: CombiningClass.pl not found";
-our $Decomp = do "unicore/Decomposition.pl"
- || do "unicode/Decomposition.pl"
- || croak "$PACKAGE: Decomposition.pl not found";
-
-# CompositionExclusions.txt since Unicode 3.2.0. If this ever changes, it
-# would be better to get the values from Unicode::UCD rather than hard-code
-# them here, as that will protect from having to make fixes for future
-# changes.
-our @CompEx = qw(
- 0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36
- 0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76
- 0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D
- FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B
- FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C
- FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB
- 1D1BC 1D1BD 1D1BE 1D1BF 1D1C0
-);
-
-# definition of Hangul constants
-use constant SBase => 0xAC00;
-use constant SFinal => 0xD7A3; # SBase -1 + SCount
-use constant SCount => 11172; # LCount * NCount
-use constant NCount => 588; # VCount * TCount
-use constant LBase => 0x1100;
-use constant LFinal => 0x1112;
-use constant LCount => 19;
-use constant VBase => 0x1161;
-use constant VFinal => 0x1175;
-use constant VCount => 21;
-use constant TBase => 0x11A7;
-use constant TFinal => 0x11C2;
-use constant TCount => 28;
-
-sub decomposeHangul {
- my $sindex = $_[0] - SBase;
- my $lindex = int( $sindex / NCount);
- my $vindex = int(($sindex % NCount) / TCount);
- my $tindex = $sindex % TCount;
- my @ret = (
- LBase + $lindex,
- VBase + $vindex,
- $tindex ? (TBase + $tindex) : (),
- );
- return wantarray ? @ret : pack_U(@ret);
-}
-
-########## getting full decomposition ##########
-
-## converts string "hhhh hhhh hhhh" to a numeric list
-## (hex digits separated by spaces)
-sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g }
-
-while ($Combin =~ /(.+)/g) {
- my @tab = split /\t/, $1;
- my $ini = hex $tab[0];
- if ($tab[1] eq '') {
- $Combin{$ini} = $tab[2];
- } else {
- $Combin{$_} = $tab[2] foreach $ini .. hex($tab[1]);
- }
-}
-
-while ($Decomp =~ /(.+)/g) {
- my @tab = split /\t/, $1;
- my $compat = $tab[2] =~ s/<[^>]+>//;
- my $dec = [ _getHexArray($tab[2]) ]; # decomposition
- my $ini = hex($tab[0]); # initial decomposable character
- my $end = $tab[1] eq '' ? $ini : hex($tab[1]);
- # ($ini .. $end) is the range of decomposable characters.
-
- foreach my $u ($ini .. $end) {
- $Compat{$u} = $dec;
- $Canon{$u} = $dec if ! $compat;
- }
-}
-
-for my $s (@CompEx) {
- my $u = hex $s;
- next if !$Canon{$u}; # not assigned
- next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2
- $Exclus{$u} = 1;
-}
-
-foreach my $u (keys %Canon) {
- my $dec = $Canon{$u};
-
- if (@$dec == 2) {
- if ($Combin{ $dec->[0] }) {
- $NonStD{$u} = 1;
- } else {
- $Compos{ $dec->[0] }{ $dec->[1] } = $u;
- $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
- }
- } elsif (@$dec == 1) {
- $Single{$u} = 1;
- } else {
- my $h = sprintf '%04X', $u;
- croak("Weird Canonical Decomposition of U+$h");
- }
-}
-
-# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
-foreach my $j (0x1161..0x1175, 0x11A8..0x11C2) {
- $Comp2nd{$j} = 1;
-}
-
-sub getCanonList {
- my @src = @_;
- my @dec = map {
- (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
- : $Canon{$_} ? @{ $Canon{$_} } : $_
- } @src;
- return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
- # condition @src == @dec is not ok.
-}
-
-sub getCompatList {
- my @src = @_;
- my @dec = map {
- (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
- : $Compat{$_} ? @{ $Compat{$_} } : $_
- } @src;
- return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
- # condition @src == @dec is not ok.
-}
-
-# exhaustive decomposition
-foreach my $key (keys %Canon) {
- $Canon{$key} = [ getCanonList($key) ];
-}
-
-# exhaustive decomposition
-foreach my $key (keys %Compat) {
- $Compat{$key} = [ getCompatList($key) ];
-}
-
-##### The above part is common to mkheader and PP #####
-
-foreach my $comp1st (keys %Compos) {
- my $listname = sprintf("${structname}_%06x", $comp1st);
- # %04x is bad since it'd place _3046 after _1d157.
- $Comp1st{$comp1st} = $listname;
- my $rh1st = $Compos{$comp1st};
-
- foreach my $comp2nd (keys %$rh1st) {
- my $uc = $rh1st->{$comp2nd};
- $CompList{$listname}{$comp2nd} = $uc;
- }
-}
-
-sub split_into_char {
- use bytes;
- my $uni = shift;
- my $len = length($uni);
- my @ary;
- for(my $i = 0; $i < $len; ++$i) {
- push @ary, ord(substr($uni,$i,1));
- }
- return @ary;
-}
-
-sub _U_stringify {
- sprintf '"%s"', join '',
- map sprintf("\\x%02x", $_), split_into_char(pack_U(@_));
-}
-
-foreach my $hash (\%Canon, \%Compat) {
- foreach my $key (keys %$hash) {
- $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
- }
-}
-
-########## writing header files ##########
-
-my @boolfunc = (
- {
- name => "Exclusion",
- type => "bool",
- hash => \%Exclus,
- },
- {
- name => "Singleton",
- type => "bool",
- hash => \%Single,
- },
- {
- name => "NonStDecomp",
- type => "bool",
- hash => \%NonStD,
- },
- {
- name => "Comp2nd",
- type => "bool",
- hash => \%Comp2nd,
- },
-);
-
-my $orig_fh = SelectSaver->new;
-{
-
-my $file = "unfexc.h";
-open FH, ">$file" or croak "$PACKAGE: $file can't be made";
-binmode FH; select FH;
-
- print << 'EOF';
-/*
- * This file is auto-generated by mkheader.
- * Any changes here will be lost!
- */
-EOF
-
-foreach my $tbl (@boolfunc) {
- my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};
- my $type = $tbl->{type};
- my $name = $tbl->{name};
- print "$type is$name (UV uv)\n{\nreturn\n\t";
-
- while (@temp) {
- my $cur = shift @temp;
- if (@temp && $cur + 1 == $temp[0]) {
- print "($cur <= uv && uv <= ";
- while (@temp && $cur + 1 == $temp[0]) {
- $cur = shift @temp;
- }
- print "$cur)";
- print "\n\t|| " if @temp;
- } else {
- print "uv == $cur";
- print "\n\t|| " if @temp;
- }
- }
- print "\n\t? TRUE : FALSE;\n}\n\n";
-}
-
-close FH;
-
-####################################
-
-my $compinit =
- "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
-
-foreach my $i (sort keys %CompList) {
- $compinit .= "$structname $i [] = {\n";
- $compinit .= join ",\n",
- map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
- sort {$a <=> $b } keys %{ $CompList{$i} };
- $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
-}
-
-my @tripletable = (
- {
- file => "unfcmb",
- name => "combin",
- type => "STDCHAR",
- hash => \%Combin,
- null => 0,
- },
- {
- file => "unfcan",
- name => "canon",
- type => "char*",
- hash => \%Canon,
- null => "NULL",
- },
- {
- file => "unfcpt",
- name => "compat",
- type => "char*",
- hash => \%Compat,
- null => "NULL",
- },
- {
- file => "unfcmp",
- name => "compos",
- type => "$structname *",
- hash => \%Comp1st,
- null => "NULL",
- init => $compinit,
- },
-);
-
-foreach my $tbl (@tripletable) {
- my $file = "$tbl->{file}.h";
- my $head = "${prefix}$tbl->{name}";
- my $type = $tbl->{type};
- my $hash = $tbl->{hash};
- my $null = $tbl->{null};
- my $init = $tbl->{init};
-
- open FH, ">$file" or croak "$PACKAGE: $file can't be made";
- binmode FH; select FH;
- my %val;
-
- print FH << 'EOF';
-/*
- * This file is auto-generated by mkheader.
- * Any changes here will be lost!
- */
-EOF
-
- print $init if defined $init;
-
- foreach my $uv (keys %$hash) {
- croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
- unless $uv <= 0x10FFFF;
- my @c = unpack 'CCCC', pack 'N', $uv;
- $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
- }
-
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- for (my $r = 0; $r < 256; $r++) {
- next if ! $val{ $p }{ $r };
- printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
- for (my $c = 0; $c < 256; $c++) {
- print "\t", defined $val{$p}{$r}{$c}
- ? "($type)".$val{$p}{$r}{$c}
- : $null;
- print ',' if $c != 255;
- print "\n" if $c % 8 == 7;
- }
- print "};\n\n";
- }
- }
- foreach my $p (sort { $a <=> $b } keys %val) {
- next if ! $val{ $p };
- printf "static $type* ${head}_%02x [256] = {\n", $p;
- for (my $r = 0; $r < 256; $r++) {
- print $val{ $p }{ $r }
- ? sprintf("${head}_%02x_%02x", $p, $r)
- : "NULL";
- print ',' if $r != 255;
- print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
- }
- print "};\n\n";
- }
- print "static $type** $head [] = {\n";
- for (my $p = 0; $p <= 0x10; $p++) {
- print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
- print ',' if $p != 0x10;
- print "\n";
- }
- print "};\n\n";
- close FH;
-}
-
-} # End of block for SelectSaver
-
-1;
-__END__
diff --git a/cpan/Unicode-Normalize/t/fcdc.t b/cpan/Unicode-Normalize/t/fcdc.t
deleted file mode 100644
index d2ef28b9e9..0000000000
--- a/cpan/Unicode-Normalize/t/fcdc.t
+++ /dev/null
@@ -1,138 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..70\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub hexU { _pack_U map hex, split ' ', shift }
-sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
-
-#########################
-
-ok(FCD(''), "");
-ok(FCC(''), "");
-ok(FCD('A'), "A");
-ok(FCC('A'), "A");
-
-ok(normalize('FCD', ""), "");
-ok(normalize('FCC', ""), "");
-ok(normalize('FCC', "A"), "A");
-ok(normalize('FCD', "A"), "A");
-
-# 9
-
-# if checkFCD is YES, the return value from FCD should be same as the original
-ok(FCD(hexU("00C5")), hexU("00C5")); # A with ring above
-ok(FCD(hexU("0041 030A")), hexU("0041 030A")); # A+ring
-ok(FCD(hexU("0041 0327 030A")), hexU("0041 0327 030A")); # A+cedilla+ring
-ok(FCD(hexU("AC01 1100 1161")), hexU("AC01 1100 1161")); # hangul
-ok(FCD(hexU("212B F900")), hexU("212B F900")); # compat
-
-ok(normalize('FCD', hexU("00C5")), hexU("00C5"));
-ok(normalize('FCD', hexU("0041 030A")), hexU("0041 030A"));
-ok(normalize('FCD', hexU("0041 0327 030A")), hexU("0041 0327 030A"));
-ok(normalize('FCD', hexU("AC01 1100 1161")), hexU("AC01 1100 1161"));
-ok(normalize('FCD', hexU("212B F900")), hexU("212B F900"));
-
-# 19
-
-# if checkFCD is MAYBE or NO, FCD returns NFD (this behavior isn't documented)
-ok(FCD(hexU("00C5 0327")), hexU("0041 0327 030A"));
-ok(FCD(hexU("0041 030A 0327")), hexU("0041 0327 030A"));
-ok(FCD(hexU("00C5 0327")), NFD(hexU("00C5 0327")));
-ok(FCD(hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327")));
-
-ok(normalize('FCD', hexU("00C5 0327")), hexU("0041 0327 030A"));
-ok(normalize('FCD', hexU("0041 030A 0327")), hexU("0041 0327 030A"));
-ok(normalize('FCD', hexU("00C5 0327")), NFD(hexU("00C5 0327")));
-ok(normalize('FCD', hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327")));
-
-# 27
-
-ok(answer(checkFCD('')), 'YES');
-ok(answer(checkFCD('A')), 'YES');
-ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE
-ok(answer(checkFCD("\x{0327}")), 'YES'); # 0327;COMBINING CEDILLA
-ok(answer(checkFCD(_pack_U(0x00C5))), 'YES'); # A with ring above
-ok(answer(checkFCD(hexU("0041 030A"))), 'YES'); # A+ring
-ok(answer(checkFCD(hexU("0041 0327 030A"))), 'YES'); # A+cedilla+ring
-ok(answer(checkFCD(hexU("0041 030A 0327"))), 'NO'); # A+ring+cedilla
-ok(answer(checkFCD(hexU("00C5 0327"))), 'NO'); # A-ring+cedilla
-ok(answer(checkNFC(hexU("00C5 0327"))), 'MAYBE'); # NFC: A-ring+cedilla
-ok(answer(check("FCD", hexU("00C5 0327"))), 'NO');
-ok(answer(check("NFC", hexU("00C5 0327"))), 'MAYBE');
-ok(answer(checkFCD("\x{AC01}\x{1100}\x{1161}")), 'YES'); # hangul
-ok(answer(checkFCD("\x{212B}\x{F900}")), 'YES'); # compat
-
-ok(answer(checkFCD(hexU("1EA7 05AE 0315 0062"))), "NO");
-ok(answer(checkFCC(hexU("1EA7 05AE 0315 0062"))), "NO");
-ok(answer(check('FCD', hexU("1EA7 05AE 0315 0062"))), "NO");
-ok(answer(check('FCC', hexU("1EA7 05AE 0315 0062"))), "NO");
-
-# 45
-
-ok(FCC(hexU("00C5 0327")), hexU("0041 0327 030A"));
-ok(FCC(hexU("0045 0304 0300")), "\x{1E14}");
-ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}");
-ok(normalize('FCC', hexU("00C5 0327")), hexU("0041 0327 030A"));
-ok(normalize('FCC', hexU("0045 0304 0300")), "\x{1E14}");
-ok(normalize('FCC', hexU("1100 1161 1100 1173 11AF")), "\x{AC00}\x{AE00}");
-
-ok(FCC("\x{0B47}\x{0300}\x{0B3E}"), "\x{0B47}\x{0300}\x{0B3E}");
-ok(FCC("\x{1100}\x{0300}\x{1161}"), "\x{1100}\x{0300}\x{1161}");
-ok(FCC("\x{0B47}\x{0B3E}\x{0300}"), "\x{0B4B}\x{0300}");
-ok(FCC("\x{1100}\x{1161}\x{0300}"), "\x{AC00}\x{0300}");
-ok(FCC("\x{0B47}\x{300}\x{0B3E}\x{327}"), "\x{0B47}\x{300}\x{0B3E}\x{327}");
-ok(FCC("\x{1100}\x{300}\x{1161}\x{327}"), "\x{1100}\x{300}\x{1161}\x{327}");
-
-# 57
-
-ok(answer(checkFCC('')), 'YES');
-ok(answer(checkFCC('A')), 'YES');
-ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE
-ok(answer(checkFCC("\x{0327}")), 'MAYBE'); # 0327;COMBINING CEDILLA
-ok(answer(checkFCC(hexU("00C5"))), 'YES'); # A with ring above
-ok(answer(checkFCC(hexU("0041 030A"))), 'MAYBE'); # A+ring
-ok(answer(checkFCC(hexU("0041 0327 030A"))), 'MAYBE'); # A+cedilla+ring
-ok(answer(checkFCC(hexU("0041 030A 0327"))), 'NO'); # A+ring+cedilla
-ok(answer(checkFCC(hexU("00C5 0327"))), 'NO'); # A-ring+cedilla
-ok(answer(checkFCC("\x{AC01}\x{1100}\x{1161}")), 'MAYBE'); # hangul
-ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat
-ok(answer(checkFCC("\x{212B}\x{0327}")), 'NO'); # compat
-ok(answer(checkFCC("\x{0327}\x{212B}")), 'NO'); # compat
-
-# 70
-
diff --git a/cpan/Unicode-Normalize/t/form.t b/cpan/Unicode-Normalize/t/form.t
deleted file mode 100644
index 6bbfb082ca..0000000000
--- a/cpan/Unicode-Normalize/t/form.t
+++ /dev/null
@@ -1,84 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..37\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
-
-#########################
-
-ok(NFD ("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
-ok(NFC ("\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
-ok(NFKD("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
-ok(NFKC("\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
-
-ok(answer(checkNFD ("\x{304C}")), "NO");
-ok(answer(checkNFC ("\x{304C}")), "YES");
-ok(answer(checkNFKD("\x{304C}")), "NO");
-ok(answer(checkNFKC("\x{304C}")), "YES");
-ok(answer(checkNFD ("\x{FF76}")), "YES");
-ok(answer(checkNFC ("\x{FF76}")), "YES");
-ok(answer(checkNFKD("\x{FF76}")), "NO");
-ok(answer(checkNFKC("\x{FF76}")), "NO");
-
-ok(normalize('D', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
-ok(normalize('C', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
-ok(normalize('KD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
-ok(normalize('KC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
-
-ok(answer(check('D', "\x{304C}")), "NO");
-ok(answer(check('C', "\x{304C}")), "YES");
-ok(answer(check('KD',"\x{304C}")), "NO");
-ok(answer(check('KC',"\x{304C}")), "YES");
-ok(answer(check('D' ,"\x{FF76}")), "YES");
-ok(answer(check('C' ,"\x{FF76}")), "YES");
-ok(answer(check('KD',"\x{FF76}")), "NO");
-ok(answer(check('KC',"\x{FF76}")), "NO");
-
-ok(normalize('NFD', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
-ok(normalize('NFC', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
-ok(normalize('NFKD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
-ok(normalize('NFKC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
-
-ok(answer(check('NFD', "\x{304C}")), "NO");
-ok(answer(check('NFC', "\x{304C}")), "YES");
-ok(answer(check('NFKD',"\x{304C}")), "NO");
-ok(answer(check('NFKC',"\x{304C}")), "YES");
-ok(answer(check('NFD' ,"\x{FF76}")), "YES");
-ok(answer(check('NFC' ,"\x{FF76}")), "YES");
-ok(answer(check('NFKD',"\x{FF76}")), "NO");
-ok(answer(check('NFKC',"\x{FF76}")), "NO");
-
diff --git a/cpan/Unicode-Normalize/t/func.t b/cpan/Unicode-Normalize/t/func.t
deleted file mode 100644
index 2bd6e504a3..0000000000
--- a/cpan/Unicode-Normalize/t/func.t
+++ /dev/null
@@ -1,386 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..217\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub hexU { _pack_U map hex, split ' ', shift }
-
-# This won't work on EBCDIC platforms prior to v5.8.0, which is when this
-# translation function was defined
-*to_native = (defined &utf8::unicode_to_native)
- ? \&utf8::unicode_to_native
- : sub { return shift };
-
-#########################
-
-ok(getCombinClass( to_native(0)), 0);
-ok(getCombinClass(to_native(41)), 0);
-ok(getCombinClass(to_native(65)), 0);
-ok(getCombinClass( 768), 230);
-ok(getCombinClass(1809), 36);
-
-ok(getCanon(to_native( 0)), undef);
-ok(getCanon(to_native(0x29)), undef);
-ok(getCanon(to_native(0x41)), undef);
-ok(getCanon(to_native(0x00C0)), _pack_U(0x0041, 0x0300));
-ok(getCanon(to_native(0x00EF)), _pack_U(0x0069, 0x0308));
-ok(getCanon(0x304C), _pack_U(0x304B, 0x3099));
-ok(getCanon(0x1EA4), _pack_U(0x0041, 0x0302, 0x0301));
-ok(getCanon(0x1F82), _pack_U(0x03B1, 0x0313, 0x0300, 0x0345));
-ok(getCanon(0x1FAF), _pack_U(0x03A9, 0x0314, 0x0342, 0x0345));
-ok(getCanon(0xAC00), _pack_U(0x1100, 0x1161));
-ok(getCanon(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF));
-ok(getCanon(0x212C), undef);
-ok(getCanon(0x3243), undef);
-ok(getCanon(0xFA2D), _pack_U(0x9DB4));
-
-# 20
-
-ok(getCompat(to_native( 0)), undef);
-ok(getCompat(to_native(0x29)), undef);
-ok(getCompat(to_native(0x41)), undef);
-ok(getCompat(to_native(0x00C0)), _pack_U(0x0041, 0x0300));
-ok(getCompat(to_native(0x00EF)), _pack_U(0x0069, 0x0308));
-ok(getCompat(0x304C), _pack_U(0x304B, 0x3099));
-ok(getCompat(0x1EA4), _pack_U(0x0041, 0x0302, 0x0301));
-ok(getCompat(0x1F82), _pack_U(0x03B1, 0x0313, 0x0300, 0x0345));
-ok(getCompat(0x1FAF), _pack_U(0x03A9, 0x0314, 0x0342, 0x0345));
-ok(getCompat(0x212C), _pack_U(0x0042));
-ok(getCompat(0x3243), _pack_U(0x0028, 0x81F3, 0x0029));
-ok(getCompat(0xAC00), _pack_U(0x1100, 0x1161));
-ok(getCompat(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF));
-ok(getCompat(0xFA2D), _pack_U(0x9DB4));
-
-# 34
-
-ok(getComposite(to_native( 0), to_native( 0)), undef);
-ok(getComposite(to_native( 0), to_native(0x29)), undef);
-ok(getComposite(to_native(0x29), to_native( 0)), undef);
-ok(getComposite(to_native(0x29), to_native(0x29)), undef);
-ok(getComposite(to_native( 0), to_native(0x41)), undef);
-ok(getComposite(to_native(0x41), to_native( 0)), undef);
-ok(getComposite(to_native(0x41), to_native(0x41)), undef);
-ok(getComposite(to_native(12), to_native(0x0300)), undef);
-ok(getComposite(to_native(0x0055), 0xFF00), undef);
-ok(getComposite(to_native(0x0041), 0x0300), to_native(0x00C0));
-ok(getComposite(to_native(0x0055), 0x0300), to_native(0x00D9));
-ok(getComposite(0x0112, 0x0300), 0x1E14);
-ok(getComposite(0x1100, 0x1161), 0xAC00);
-ok(getComposite(0x1100, 0x1173), 0xADF8);
-ok(getComposite(0x1100, 0x11AF), undef);
-ok(getComposite(0x1173, 0x11AF), undef);
-ok(getComposite(0xAC00, 0x11A7), undef);
-ok(getComposite(0xAC00, 0x11A8), 0xAC01);
-ok(getComposite(0xADF8, 0x11AF), 0xAE00);
-
-# 53
-
-sub uprops {
- my $uv = shift;
- my $r = "";
- $r .= isExclusion($uv) ? 'X' : 'x';
- $r .= isSingleton($uv) ? 'S' : 's';
- $r .= isNonStDecomp($uv) ? 'N' : 'n'; # Non-Starter Decomposition
- $r .= isComp_Ex($uv) ? 'F' : 'f'; # Full exclusion (X + S + N)
- $r .= isComp2nd($uv) ? 'B' : 'b'; # B = M = Y
- $r .= isNFD_NO($uv) ? 'D' : 'd';
- $r .= isNFC_MAYBE($uv) ? 'M' : 'm'; # Maybe
- $r .= isNFC_NO($uv) ? 'C' : 'c';
- $r .= isNFKD_NO($uv) ? 'K' : 'k';
- $r .= isNFKC_MAYBE($uv) ? 'Y' : 'y'; # maYbe
- $r .= isNFKC_NO($uv) ? 'G' : 'g';
- return $r;
-}
-
-ok(uprops(to_native(0x0000)), 'xsnfbdmckyg'); # NULL
-ok(uprops(to_native(0x0029)), 'xsnfbdmckyg'); # RIGHT PARENTHESIS
-ok(uprops(to_native(0x0041)), 'xsnfbdmckyg'); # LATIN CAPITAL LETTER A
-ok(uprops(to_native(0x00A0)), 'xsnfbdmcKyG'); # NO-BREAK SPACE
-ok(uprops(to_native(0x00C0)), 'xsnfbDmcKyg'); # LATIN CAPITAL LETTER A WITH GRAVE
-ok(uprops(0x0300), 'xsnfBdMckYg'); # COMBINING GRAVE ACCENT
-ok(uprops(0x0344), 'xsNFbDmCKyG'); # COMBINING GREEK DIALYTIKA TONOS
-ok(uprops(0x0387), 'xSnFbDmCKyG'); # GREEK ANO TELEIA
-ok(uprops(0x0958), 'XsnFbDmCKyG'); # DEVANAGARI LETTER QA
-ok(uprops(0x0F43), 'XsnFbDmCKyG'); # TIBETAN LETTER GHA
-ok(uprops(0x1100), 'xsnfbdmckyg'); # HANGUL CHOSEONG KIYEOK
-ok(uprops(0x1161), 'xsnfBdMckYg'); # HANGUL JUNGSEONG A
-ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEUL
-ok(uprops(0x212B), 'xSnFbDmCKyG'); # ANGSTROM SIGN
-ok(uprops(0xAC00), 'xsnfbDmcKyg'); # HANGUL SYLLABLE GA
-ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900
-ok(uprops(0xFB4E), 'XsnFbDmCKyG'); # HEBREW LETTER PE WITH RAFE
-ok(uprops(0xFF71), 'xsnfbdmcKyG'); # HALFWIDTH KATAKANA LETTER A
-
-# 71
-
-ok(decompose(""), "");
-ok(decompose("A"), "A");
-ok(decompose("", 1), "");
-ok(decompose("A", 1), "A");
-
-ok(decompose(hexU("1E14 AC01")), hexU("0045 0304 0300 1100 1161 11A8"));
-ok(decompose(hexU("AC00 AE00")), hexU("1100 1161 1100 1173 11AF"));
-ok(decompose(hexU("304C FF76")), hexU("304B 3099 FF76"));
-
-ok(decompose(hexU("1E14 AC01"), 1), hexU("0045 0304 0300 1100 1161 11A8"));
-ok(decompose(hexU("AC00 AE00"), 1), hexU("1100 1161 1100 1173 11AF"));
-ok(decompose(hexU("304C FF76"), 1), hexU("304B 3099 30AB"));
-
-# don't modify the source
-my $sDec = "\x{FA19}";
-ok(decompose($sDec), "\x{795E}");
-ok($sDec, "\x{FA19}");
-
-# 83
-
-ok(reorder(""), "");
-ok(reorder("A"), "A");
-ok(reorder(hexU("0041 0300 0315 0313 031b 0061")),
- hexU("0041 031b 0300 0313 0315 0061"));
-ok(reorder(hexU("00C1 0300 0315 0313 031b 0061 309A 3099")),
- hexU("00C1 031b 0300 0313 0315 0061 309A 3099"));
-
-# don't modify the source
-my $sReord = "\x{3000}\x{300}\x{31b}";
-ok(reorder($sReord), "\x{3000}\x{31b}\x{300}");
-ok($sReord, "\x{3000}\x{300}\x{31b}");
-
-# 89
-
-ok(compose(""), "");
-ok(compose("A"), "A");
-ok(compose(hexU("0061 0300")), hexU("00E0"));
-ok(compose(hexU("0061 0300 031B")), hexU("00E0 031B"));
-ok(compose(hexU("0061 0300 0315")), hexU("00E0 0315"));
-ok(compose(hexU("0061 0300 0313")), hexU("00E0 0313"));
-ok(compose(hexU("0061 031B 0300")), hexU("00E0 031B"));
-ok(compose(hexU("0061 0315 0300")), hexU("0061 0315 0300"));
-ok(compose(hexU("0061 0313 0300")), hexU("0061 0313 0300"));
-
-# don't modify the source
-my $sCom = "\x{304B}\x{3099}";
-ok(compose($sCom), "\x{304C}");
-ok($sCom, "\x{304B}\x{3099}");
-
-# 100
-
-ok(composeContiguous(""), "");
-ok(composeContiguous("A"), "A");
-ok(composeContiguous(hexU("0061 0300")), hexU("00E0"));
-ok(composeContiguous(hexU("0061 0300 031B")), hexU("00E0 031B"));
-ok(composeContiguous(hexU("0061 0300 0315")), hexU("00E0 0315"));
-ok(composeContiguous(hexU("0061 0300 0313")), hexU("00E0 0313"));
-ok(composeContiguous(hexU("0061 031B 0300")), hexU("0061 031B 0300"));
-ok(composeContiguous(hexU("0061 0315 0300")), hexU("0061 0315 0300"));
-ok(composeContiguous(hexU("0061 0313 0300")), hexU("0061 0313 0300"));
-
-# don't modify the source
-my $sCtg = "\x{30DB}\x{309A}";
-ok(composeContiguous($sCtg), "\x{30DD}");
-ok($sCtg, "\x{30DB}\x{309A}");
-
-# 111
-
-sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
-
-ok(answer(checkNFD("")), "YES");
-ok(answer(checkNFC("")), "YES");
-ok(answer(checkNFKD("")), "YES");
-ok(answer(checkNFKC("")), "YES");
-ok(answer(check("NFD", "")), "YES");
-ok(answer(check("NFC", "")), "YES");
-ok(answer(check("NFKD","")), "YES");
-ok(answer(check("NFKC","")), "YES");
-
-# U+0000 to U+007F are prenormalized in all the normalization forms.
-ok(answer(checkNFD("AZaz\t12!#`")), "YES");
-ok(answer(checkNFC("AZaz\t12!#`")), "YES");
-ok(answer(checkNFKD("AZaz\t12!#`")), "YES");
-ok(answer(checkNFKC("AZaz\t12!#`")), "YES");
-ok(answer(check("D", "AZaz\t12!#`")), "YES");
-ok(answer(check("C", "AZaz\t12!#`")), "YES");
-ok(answer(check("KD","AZaz\t12!#`")), "YES");
-ok(answer(check("KC","AZaz\t12!#`")), "YES");
-
-ok(answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))), "YES");
-ok(answer(checkNFD(hexU("20 C1 1100 1173 11AF"))), "NO");
-ok(answer(checkNFC(hexU("20 C1 1173 11AF"))), "MAYBE");
-ok(answer(checkNFC(hexU("20 C1 AE00 1100"))), "YES");
-ok(answer(checkNFC(hexU("20 C1 AE00 1100 0300"))), "MAYBE");
-ok(answer(checkNFC(hexU("212B 1100 0300"))), "NO");
-ok(answer(checkNFC(hexU("1100 0300 212B"))), "NO");
-ok(answer(checkNFC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring
-ok(answer(checkNFC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla
-ok(answer(checkNFC(hexU("20 C1 FF71 2025"))),"YES");
-ok(answer(check("NFC", hexU("20 C1 212B 300"))), "NO");
-ok(answer(checkNFKD(hexU("20 C1 FF71 2025"))), "NO");
-ok(answer(checkNFKC(hexU("20 C1 AE00 2025"))), "NO");
-ok(answer(checkNFKC(hexU("212B 1100 0300"))), "NO");
-ok(answer(checkNFKC(hexU("1100 0300 212B"))), "NO");
-ok(answer(checkNFKC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring
-ok(answer(checkNFKC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla
-ok(answer(check("NFKC", hexU("20 C1 212B 300"))), "NO");
-
-# 145
-
-"012ABC" =~ /(\d+)(\w+)/;
-ok("012" eq NFC $1 && "ABC" eq NFC $2);
-
-ok(normalize('C', $1), "012");
-ok(normalize('C', $2), "ABC");
-
-ok(normalize('NFC', $1), "012");
-ok(normalize('NFC', $2), "ABC");
- # s/^NF// in normalize() must not prevent using $1, $&, etc.
-
-# 150
-
-# a string with initial zero should be treated like a number
-
-# LATIN CAPITAL LETTER A WITH GRAVE
-ok(getCombinClass(sprintf("0%d", to_native(192))), 0);
-ok(getCanon (sprintf("0%d", to_native(192))), _pack_U(0x41, 0x300));
-ok(getCompat(sprintf("0%d", to_native(192))), _pack_U(0x41, 0x300));
-my $lead_zero = sprintf "0%d", to_native(65);
-ok(getComposite($lead_zero, "0768"), to_native(192));
-ok(isNFD_NO (sprintf("0%d", to_native(192))));
-ok(isNFKD_NO(sprintf("0%d", to_native(192))));
-
-# DEVANAGARI LETTER QA
-ok(isExclusion("02392"));
-ok(isComp_Ex ("02392"));
-ok(isNFC_NO ("02392"));
-ok(isNFKC_NO ("02392"));
-ok(isNFD_NO ("02392"));
-ok(isNFKD_NO ("02392"));
-
-# ANGSTROM SIGN
-ok(isSingleton("08491"));
-ok(isComp_Ex ("08491"));
-ok(isNFC_NO ("08491"));
-ok(isNFKC_NO ("08491"));
-ok(isNFD_NO ("08491"));
-ok(isNFKD_NO ("08491"));
-
-# COMBINING GREEK DIALYTIKA TONOS
-ok(isNonStDecomp("0836"));
-ok(isComp_Ex ("0836"));
-ok(isNFC_NO ("0836"));
-ok(isNFKC_NO ("0836"));
-ok(isNFD_NO ("0836"));
-ok(isNFKD_NO ("0836"));
-
-# COMBINING GRAVE ACCENT
-ok(getCombinClass("0768"), 230);
-ok(isComp2nd ("0768"));
-ok(isNFC_MAYBE ("0768"));
-ok(isNFKC_MAYBE("0768"));
-
-# HANGUL SYLLABLE GA
-ok(getCombinClass("044032"), 0);
-ok(getCanon("044032"), _pack_U(0x1100, 0x1161));
-ok(getCompat("044032"), _pack_U(0x1100, 0x1161));
-ok(getComposite("04352", "04449"), 0xAC00);
-
-# 182
-
-# string with 22 combining characters: (0x300..0x315)
-my $str_cc22 = _pack_U(0x3041, 0x300..0x315, 0x3042);
-ok(decompose($str_cc22), $str_cc22);
-ok(reorder($str_cc22), $str_cc22);
-ok(compose($str_cc22), $str_cc22);
-ok(composeContiguous($str_cc22), $str_cc22);
-ok(NFD($str_cc22), $str_cc22);
-ok(NFC($str_cc22), $str_cc22);
-ok(NFKD($str_cc22), $str_cc22);
-ok(NFKC($str_cc22), $str_cc22);
-ok(FCD($str_cc22), $str_cc22);
-ok(FCC($str_cc22), $str_cc22);
-
-# 192
-
-# string with 40 combining characters of the same class: (0x300..0x313)x2
-my $str_cc40 = _pack_U(0x3041, 0x300..0x313, 0x300..0x313, 0x3042);
-ok(decompose($str_cc40), $str_cc40);
-ok(reorder($str_cc40), $str_cc40);
-ok(compose($str_cc40), $str_cc40);
-ok(composeContiguous($str_cc40), $str_cc40);
-ok(NFD($str_cc40), $str_cc40);
-ok(NFC($str_cc40), $str_cc40);
-ok(NFKD($str_cc40), $str_cc40);
-ok(NFKC($str_cc40), $str_cc40);
-ok(FCD($str_cc40), $str_cc40);
-ok(FCC($str_cc40), $str_cc40);
-
-# 202
-
-my $precomp = hexU("304C 304E 3050 3052 3054");
-my $combseq = hexU("304B 3099 304D 3099 304F 3099 3051 3099 3053 3099");
-ok(decompose($precomp x 5), $combseq x 5);
-ok(decompose($precomp x 10), $combseq x 10);
-ok(decompose($precomp x 20), $combseq x 20);
-
-my $hangsyl = hexU("AC00 B098 B2E4 B77C B9C8");
-my $jamoseq = hexU("1100 1161 1102 1161 1103 1161 1105 1161 1106 1161");
-ok(decompose($hangsyl x 5), $jamoseq x 5);
-ok(decompose($hangsyl x 10), $jamoseq x 10);
-ok(decompose($hangsyl x 20), $jamoseq x 20);
-
-my $notcomp = hexU("304B 304D 304F 3051 3053");
-ok(decompose($precomp . $notcomp), $combseq . $notcomp);
-ok(decompose($precomp . $notcomp x 5), $combseq . $notcomp x 5);
-ok(decompose($precomp . $notcomp x10), $combseq . $notcomp x10);
-
-# 211
-
-my $preUnicode3_1 = !defined getCanon(0x1D15E);
-my $preUnicode3_2 = !defined getCanon(0x2ADC);
-
-# HEBREW LETTER YOD WITH HIRIQ
-ok($preUnicode3_1 xor isExclusion(0xFB1D));
-ok($preUnicode3_1 xor isComp_Ex (0xFB1D));
-
-# MUSICAL SYMBOL HALF NOTE
-ok($preUnicode3_1 xor isExclusion(0x1D15E));
-ok($preUnicode3_1 xor isComp_Ex (0x1D15E));
-
-# FORKING
-ok($preUnicode3_2 xor isExclusion(0x2ADC));
-ok($preUnicode3_2 xor isComp_Ex (0x2ADC));
-
-# 217
-
diff --git a/cpan/Unicode-Normalize/t/illegal.t b/cpan/Unicode-Normalize/t/illegal.t
deleted file mode 100644
index ccf2b4aae6..0000000000
--- a/cpan/Unicode-Normalize/t/illegal.t
+++ /dev/null
@@ -1,85 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-BEGIN {
- unless (5.006001 <= $]) {
- print "1..0 # skipped: Perl 5.6.1 or later".
- " needed for this test\n";
- exit;
- }
-}
-
-#########################
-
-BEGIN {
- use Unicode::Normalize qw(:all);
-
- unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
- print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
- " needed for this test\n";
- print $@;
- exit;
- }
-}
-
-use strict;
-use warnings;
-
-BEGIN { $| = 1; print "1..113\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-ok(1);
-
-#########################
-
-no warnings qw(utf8);
-
-for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF,
- 0x1FFFF, 0x10FFFF, 0x110000, 0x3FFFFFFF)
-{
- my $c = chr $u;
- ok($c eq NFD($c)); # 1
- ok($c eq NFC($c)); # 2
- ok($c eq NFKD($c)); # 3
- ok($c eq NFKC($c)); # 4
- ok($c eq FCD($c)); # 5
- ok($c eq FCC($c)); # 6
- ok($c eq decompose($c)); # 7
- ok($c eq decompose($c,1)); # 8
- ok($c eq reorder($c)); # 9
- ok($c eq compose($c)); # 10
-}
-
-our $proc; # before the last starter
-our $unproc; # the last starter and after
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-
-($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF));
-ok($proc eq _pack_U(0x41, 0x300, 0x327));
-ok($unproc eq "\x{FFFF}");
-
diff --git a/cpan/Unicode-Normalize/t/norm.t b/cpan/Unicode-Normalize/t/norm.t
deleted file mode 100644
index d3cec3aea1..0000000000
--- a/cpan/Unicode-Normalize/t/norm.t
+++ /dev/null
@@ -1,145 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..64\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(normalize);
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
-
-#########################
-
-ok(normalize('D', ""), "");
-ok(normalize('C', ""), "");
-ok(normalize('KD',""), "");
-ok(normalize('KC',""), "");
-
-ok(normalize('D', "A"), "A");
-ok(normalize('C', "A"), "A");
-ok(normalize('KD',"A"), "A");
-ok(normalize('KC',"A"), "A");
-
-ok(normalize('NFD', ""), "");
-ok(normalize('NFC', ""), "");
-ok(normalize('NFKD',""), "");
-ok(normalize('NFKC',""), "");
-
-ok(normalize('NFD', "A"), "A");
-ok(normalize('NFC', "A"), "A");
-ok(normalize('NFKD',"A"), "A");
-ok(normalize('NFKC',"A"), "A");
-
-# 17
-
-# don't modify the source
-my $sNFD = "\x{FA19}";
-ok(normalize('NFD', $sNFD), "\x{795E}");
-ok($sNFD, "\x{FA19}");
-
-my $sNFC = "\x{FA1B}";
-ok(normalize('NFC', $sNFC), "\x{798F}");
-ok($sNFC, "\x{FA1B}");
-
-my $sNFKD = "\x{FA1E}";
-ok(normalize('NFKD', $sNFKD), "\x{7FBD}");
-ok($sNFKD, "\x{FA1E}");
-
-my $sNFKC = "\x{FA26}";
-ok(normalize('NFKC', $sNFKC), "\x{90FD}");
-ok($sNFKC, "\x{FA26}");
-
-# 25
-
-sub hexNFC {
- join " ", map sprintf("%04X", $_),
- _unpack_U normalize 'C', _pack_U map hex, split ' ', shift;
-}
-sub hexNFD {
- join " ", map sprintf("%04X", $_),
- _unpack_U normalize 'D', _pack_U map hex, split ' ', shift;
-}
-
-ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8");
-ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF");
-
-ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
-ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
-ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
-ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
-ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
-ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
-
-ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
-ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
-ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
-ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
-ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
-
-ok(hexNFC("AC00 11A7"), "AC00 11A7");
-ok(hexNFC("AC00 11A8"), "AC01");
-ok(hexNFC("AC00 11A9"), "AC02");
-ok(hexNFC("AC00 11C2"), "AC1B");
-ok(hexNFC("AC00 11C3"), "AC00 11C3");
-
-# 47
-
-# Test Cases from Public Review Issue #29: Normalization Issue
-# cf. http://www.unicode.org/review/pr-29.html
-ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
-ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
-ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
-ok(hexNFC("1100 1161 0300"), "AC00 0300");
-ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
-ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
-
-ok(hexNFC("0300 0041"), "0300 0041");
-ok(hexNFC("0300 0301 0041"), "0300 0301 0041");
-ok(hexNFC("0301 0300 0041"), "0301 0300 0041");
-ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301");
-ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300");
-
-ok(hexNFC("0327 0061 0300"), "0327 00E0");
-ok(hexNFC("0301 0061 0300"), "0301 00E0");
-ok(hexNFC("0315 0061 0300"), "0315 00E0");
-ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
-ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
-ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");
-
-# 64
-
diff --git a/cpan/Unicode-Normalize/t/null.t b/cpan/Unicode-Normalize/t/null.t
deleted file mode 100644
index 9a0008708e..0000000000
--- a/cpan/Unicode-Normalize/t/null.t
+++ /dev/null
@@ -1,100 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-
-use Unicode::Normalize qw(:all);
-print "1..24\n";
-
-print "ok 1\n";
-
-# if $_ is not NULL-terminated, test may fail.
-
-$_ = compose('abc');
-print /c$/ ? "ok" : "not ok", " 2\n";
-
-$_ = decompose('abc');
-print /c$/ ? "ok" : "not ok", " 3\n";
-
-$_ = reorder('abc');
-print /c$/ ? "ok" : "not ok", " 4\n";
-
-$_ = NFD('abc');
-print /c$/ ? "ok" : "not ok", " 5\n";
-
-$_ = NFC('abc');
-print /c$/ ? "ok" : "not ok", " 6\n";
-
-$_ = NFKD('abc');
-print /c$/ ? "ok" : "not ok", " 7\n";
-
-$_ = NFKC('abc');
-print /c$/ ? "ok" : "not ok", " 8\n";
-
-$_ = FCC('abc');
-print /c$/ ? "ok" : "not ok", " 9\n";
-
-$_ = decompose("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 10\n";
-
-$_ = decompose("\x{304B}\x{3099}abc");
-print /c$/ ? "ok" : "not ok", " 11\n";
-
-$_ = reorder("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 12\n";
-
-$_ = reorder("\x{304B}\x{3099}abc");
-print /c$/ ? "ok" : "not ok", " 13\n";
-
-$_ = compose("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 14\n";
-
-$_ = compose("\x{304B}\x{3099}abc");
-print /c$/ ? "ok" : "not ok", " 15\n";
-
-$_ = NFD("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 16\n";
-
-$_ = NFC("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 17\n";
-
-$_ = NFKD("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 18\n";
-
-$_ = NFKC("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 19\n";
-
-$_ = FCC("\x{304C}abc");
-print /c$/ ? "ok" : "not ok", " 20\n";
-
-$_ = getCanon(0x100);
-print s/.$// ? "ok" : "not ok", " 21\n";
-
-$_ = getCompat(0x100);
-print s/.$// ? "ok" : "not ok", " 22\n";
-
-$_ = getCanon(0xAC00);
-print s/.$// ? "ok" : "not ok", " 23\n";
-
-$_ = getCompat(0xAC00);
-print s/.$// ? "ok" : "not ok", " 24\n";
-
diff --git a/cpan/Unicode-Normalize/t/partial1.t b/cpan/Unicode-Normalize/t/partial1.t
deleted file mode 100644
index 3e44a63dc0..0000000000
--- a/cpan/Unicode-Normalize/t/partial1.t
+++ /dev/null
@@ -1,120 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-BEGIN {
- unless (5.006001 <= $]) {
- print "1..0 # skipped: Perl 5.6.1 or later".
- " needed for this test\n";
- exit;
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..26\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
-
-#########################
-
-sub arraynorm {
- my $form = shift;
- my @string = @_;
- my $result = "";
- my $unproc = "";
- foreach my $str (@string) {
- $unproc .= $str;
- $result .= $form eq 'NFC' ? NFC_partial ($unproc) :
- $form eq 'NFD' ? NFD_partial ($unproc) :
- $form eq 'NFKC' ? NFKC_partial($unproc) :
- $form eq 'NFKD' ? NFKD_partial($unproc) :
- undef;
- }
- $result .= $unproc;
- return $result;
-}
-
-my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
-my $strC = "\x{3CE}\x{AC01}\x{AC03}";
-my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
-my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
-ok($strC eq NFC($strD));
-ok($strD eq join('', @str1));
-ok($strC eq arraynorm('NFC', @str1));
-ok($strD eq join('', @str2));
-ok($strC eq arraynorm('NFC', @str2));
-
-my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
-my $strX = "\x{300}\x{AC01}";
-ok($strX eq NFC(join('', @strX)));
-ok($strX eq arraynorm('NFC', @strX));
-ok($strX eq NFKC(join('', @strX)));
-ok($strX eq arraynorm('NFKC', @strX));
-
-my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
-my $strY = ("\x{304C}\x{0323}\x{0308}");
-ok($strY eq NFC(join('', @strY)));
-ok($strY eq arraynorm('NFC', @strY));
-ok($strY eq NFKC(join('', @strY)));
-ok($strY eq arraynorm('NFKC', @strY));
-
-my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
-my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
-ok($strZ eq NFD(join('', @strZ)));
-ok($strZ eq arraynorm('NFD', @strZ));
-ok($strZ eq NFKD(join('', @strZ)));
-ok($strZ eq arraynorm('NFKD', @strZ));
-
-# 18
-
-# must modify the source
-my $sNFD = "\x{FA19}";
-ok(NFD_partial($sNFD), "");
-ok($sNFD, "\x{795E}");
-
-my $sNFC = "\x{FA1B}";
-ok(NFC_partial($sNFC), "");
-ok($sNFC, "\x{798F}");
-
-my $sNFKD = "\x{FA1E}";
-ok(NFKD_partial($sNFKD), "");
-ok($sNFKD, "\x{7FBD}");
-
-my $sNFKC = "\x{FA26}";
-ok(NFKC_partial($sNFKC), "");
-ok($sNFKC, "\x{90FD}");
-
-# 26
-
diff --git a/cpan/Unicode-Normalize/t/partial2.t b/cpan/Unicode-Normalize/t/partial2.t
deleted file mode 100644
index 7f19e9365b..0000000000
--- a/cpan/Unicode-Normalize/t/partial2.t
+++ /dev/null
@@ -1,116 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-BEGIN {
- unless (5.006001 <= $]) {
- print "1..0 # skipped: Perl 5.6.1 or later".
- " needed for this test\n";
- exit;
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..26\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
-
-#########################
-
-sub arraynorm {
- my $form = shift;
- my @string = @_;
- my $result = "";
- my $unproc = "";
- foreach my $str (@string) {
- $unproc .= $str;
- $result .= normalize_partial($form, $unproc);
- }
- $result .= $unproc;
- return $result;
-}
-
-my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
-my $strC = "\x{3CE}\x{AC01}\x{AC03}";
-my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
-my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
-ok($strC eq NFC($strD));
-ok($strD eq join('', @str1));
-ok($strC eq arraynorm('NFC', @str1));
-ok($strD eq join('', @str2));
-ok($strC eq arraynorm('NFC', @str2));
-
-my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
-my $strX = "\x{300}\x{AC01}";
-ok($strX eq NFC(join('', @strX)));
-ok($strX eq arraynorm('NFC', @strX));
-ok($strX eq NFKC(join('', @strX)));
-ok($strX eq arraynorm('NFKC', @strX));
-
-my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
-my $strY = ("\x{304C}\x{0323}\x{0308}");
-ok($strY eq NFC(join('', @strY)));
-ok($strY eq arraynorm('NFC', @strY));
-ok($strY eq NFKC(join('', @strY)));
-ok($strY eq arraynorm('NFKC', @strY));
-
-my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
-my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
-ok($strZ eq NFD(join('', @strZ)));
-ok($strZ eq arraynorm('NFD', @strZ));
-ok($strZ eq NFKD(join('', @strZ)));
-ok($strZ eq arraynorm('NFKD', @strZ));
-
-# 18
-
-# must modify the source
-my $sNFD = "\x{FA19}";
-ok(normalize_partial('NFD', $sNFD), "");
-ok($sNFD, "\x{795E}");
-
-my $sNFC = "\x{FA1B}";
-ok(normalize_partial('NFC', $sNFC), "");
-ok($sNFC, "\x{798F}");
-
-my $sNFKD = "\x{FA1E}";
-ok(normalize_partial('NFKD', $sNFKD), "");
-ok($sNFKD, "\x{7FBD}");
-
-my $sNFKC = "\x{FA26}";
-ok(normalize_partial('NFKC', $sNFKC), "");
-ok($sNFKC, "\x{90FD}");
-
-# 26
-
diff --git a/cpan/Unicode-Normalize/t/proto.t b/cpan/Unicode-Normalize/t/proto.t
deleted file mode 100644
index 38c6985759..0000000000
--- a/cpan/Unicode-Normalize/t/proto.t
+++ /dev/null
@@ -1,99 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..48\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-#########################
-
-# unary op. RING-CEDILLA
-ok( "\x{30A}\x{327}" ne "\x{327}\x{30A}");
-ok(NFD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-ok(NFC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-ok(NFKD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-ok(NFKC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}");
-
-# 9
-
-ok(prototype \&normalize,'$$');
-ok(prototype \&NFD, '$');
-ok(prototype \&NFC, '$');
-ok(prototype \&NFKD, '$');
-ok(prototype \&NFKC, '$');
-ok(prototype \&FCD, '$');
-ok(prototype \&FCC, '$');
-
-ok(prototype \&check, '$$');
-ok(prototype \&checkNFD, '$');
-ok(prototype \&checkNFC, '$');
-ok(prototype \&checkNFKD,'$');
-ok(prototype \&checkNFKC,'$');
-ok(prototype \&checkFCD, '$');
-ok(prototype \&checkFCC, '$');
-
-ok(prototype \&decompose, '$;$');
-ok(prototype \&reorder, '$');
-ok(prototype \&compose, '$');
-ok(prototype \&composeContiguous, '$');
-
-# 27
-
-ok(prototype \&getCanon, '$');
-ok(prototype \&getCompat, '$');
-ok(prototype \&getComposite, '$$');
-ok(prototype \&getCombinClass,'$');
-ok(prototype \&isExclusion, '$');
-ok(prototype \&isSingleton, '$');
-ok(prototype \&isNonStDecomp, '$');
-ok(prototype \&isComp2nd, '$');
-ok(prototype \&isComp_Ex, '$');
-ok(prototype \&isNFD_NO, '$');
-ok(prototype \&isNFC_NO, '$');
-ok(prototype \&isNFC_MAYBE, '$');
-ok(prototype \&isNFKD_NO, '$');
-ok(prototype \&isNFKC_NO, '$');
-ok(prototype \&isNFKC_MAYBE, '$');
-ok(prototype \&splitOnLastStarter, undef);
-ok(prototype \&normalize_partial, '$$');
-ok(prototype \&NFD_partial, '$');
-ok(prototype \&NFC_partial, '$');
-ok(prototype \&NFKD_partial, '$');
-ok(prototype \&NFKC_partial, '$');
-
-# 48
-
diff --git a/cpan/Unicode-Normalize/t/split.t b/cpan/Unicode-Normalize/t/split.t
deleted file mode 100644
index a92957c208..0000000000
--- a/cpan/Unicode-Normalize/t/split.t
+++ /dev/null
@@ -1,147 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-BEGIN {
- unless (5.006001 <= $]) {
- print "1..0 # skipped: Perl 5.6.1 or later".
- " needed for this test\n";
- exit;
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..34\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize qw(:all);
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
-
-#########################
-
-our $proc; # before the last starter
-our $unproc; # the last starter and after
-# If string has no starter, entire string is set to $unproc.
-
-($proc, $unproc) = splitOnLastStarter("");
-ok($proc, "");
-ok($unproc, "");
-
-($proc, $unproc) = splitOnLastStarter("A");
-ok($proc, "");
-ok($unproc, "A");
-
-($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
-ok($proc, _pack_U(0x41, 0x300, 0x327));
-ok($unproc, "B");
-
-($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
-ok($proc, _pack_U(0x4E00));
-ok($unproc, _pack_U(0x41, 0x301));
-
-($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
-ok($proc, "");
-ok($unproc, _pack_U(0x302, 0x301, 0x300));
-
-our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
-our $dakuten = _pack_U(0x3099);
-our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);
-
-our ($p, $u) = splitOnLastStarter($ka_grave);
-our $concat = $p . NFC($u.$dakuten);
-
-ok(NFC($ka_grave.$dakuten) eq $ga_grave);
-ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
-ok($concat eq $ga_grave);
-
-# 14
-
-sub arraynorm {
- my $form = shift;
- my @string = @_;
- my $result = "";
- my $unproc = "";
- foreach my $str (@string) {
- $unproc .= $str;
- my $n = normalize($form, $unproc);
- my($p, $u) = splitOnLastStarter($n);
- $result .= $p;
- $unproc = $u;
- }
- $result .= $unproc;
- return $result;
-}
-
-my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
-my $strC = "\x{3CE}\x{AC01}\x{AC03}";
-my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
-my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
-ok($strC eq NFC($strD));
-ok($strD eq join('', @str1));
-ok($strC eq arraynorm('NFC', @str1));
-ok($strD eq join('', @str2));
-ok($strC eq arraynorm('NFC', @str2));
-
-my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
-my $strX = "\x{300}\x{AC01}";
-ok($strX eq NFC(join('', @strX)));
-ok($strX eq arraynorm('NFC', @strX));
-ok($strX eq NFKC(join('', @strX)));
-ok($strX eq arraynorm('NFKC', @strX));
-
-my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
-my $strY = ("\x{304C}\x{0323}\x{0308}");
-ok($strY eq NFC(join('', @strY)));
-ok($strY eq arraynorm('NFC', @strY));
-ok($strY eq NFKC(join('', @strY)));
-ok($strY eq arraynorm('NFKC', @strY));
-
-my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
-my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
-ok($strZ eq NFD(join('', @strZ)));
-ok($strZ eq arraynorm('NFD', @strZ));
-ok($strZ eq NFKD(join('', @strZ)));
-ok($strZ eq arraynorm('NFKD', @strZ));
-
-# 31
-
-# don't modify the source
-
-my $source = "ABC";
-($proc, $unproc) = splitOnLastStarter($source);
-ok($proc, "AB");
-ok($unproc, "C");
-ok($source, "ABC");
-
-# 34
-
diff --git a/cpan/Unicode-Normalize/t/test.t b/cpan/Unicode-Normalize/t/test.t
deleted file mode 100644
index cb4b6ea637..0000000000
--- a/cpan/Unicode-Normalize/t/test.t
+++ /dev/null
@@ -1,168 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..72\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-use Unicode::Normalize;
-
-ok(1);
-
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
-
-#########################
-
-ok(NFD(""), "");
-ok(NFC(""), "");
-ok(NFKD(""), "");
-ok(NFKC(""), "");
-
-ok(NFD("A"), "A");
-ok(NFC("A"), "A");
-ok(NFKD("A"), "A");
-ok(NFKC("A"), "A");
-
-# 9
-
-# don't modify the source
-my $sNFD = "\x{FA19}";
-ok(NFD($sNFD), "\x{795E}");
-ok($sNFD, "\x{FA19}");
-
-my $sNFC = "\x{FA1B}";
-ok(NFC($sNFC), "\x{798F}");
-ok($sNFC, "\x{FA1B}");
-
-my $sNFKD = "\x{FA1E}";
-ok(NFKD($sNFKD), "\x{7FBD}");
-ok($sNFKD, "\x{FA1E}");
-
-my $sNFKC = "\x{FA26}";
-ok(NFKC($sNFKC), "\x{90FD}");
-ok($sNFKC, "\x{FA26}");
-
-# 17
-
-sub hexNFC {
- join " ", map sprintf("%04X", $_),
- _unpack_U NFC _pack_U map hex, split ' ', shift;
-}
-sub hexNFD {
- join " ", map sprintf("%04X", $_),
- _unpack_U NFD _pack_U map hex, split ' ', shift;
-}
-
-ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8");
-ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF");
-
-ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
-ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
-ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
-ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
-ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
-ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
-
-ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
-ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
-ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
-ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
-ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
-ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
-
-ok(hexNFC("AC00 11A7"), "AC00 11A7");
-ok(hexNFC("AC00 11A8"), "AC01");
-ok(hexNFC("AC00 11A9"), "AC02");
-ok(hexNFC("AC00 11C2"), "AC1B");
-ok(hexNFC("AC00 11C3"), "AC00 11C3");
-
-# 39
-
-# Test Cases from Public Review Issue #29: Normalization Issue
-# cf. http://www.unicode.org/review/pr-29.html
-ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
-ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
-ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
-ok(hexNFC("1100 1161 0300"), "AC00 0300");
-ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
-ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
-
-ok(hexNFC("0300 0041"), "0300 0041");
-ok(hexNFC("0300 0301 0041"), "0300 0301 0041");
-ok(hexNFC("0301 0300 0041"), "0301 0300 0041");
-ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301");
-ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300");
-
-ok(hexNFC("0327 0061 0300"), "0327 00E0");
-ok(hexNFC("0301 0061 0300"), "0301 00E0");
-ok(hexNFC("0315 0061 0300"), "0315 00E0");
-ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
-ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
-ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");
-
-# 56
-
-# NFC() and NFKC() should be unary.
-my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62);
-my $str12 = _pack_U(0x1EA4, 0x62);
-ok(NFC $str11 eq $str12);
-ok(NFKC $str11 eq $str12);
-
-# NFD() and NFKD() should be unary.
-my $str21 = _pack_U(0xE0, 0xAC00);
-my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161);
-ok(NFD $str21 eq $str22);
-ok(NFKD $str21 eq $str22);
-
-# 60
-
-## Bug #53197: NFKC("\x{2000}") produces...
-
-ok(NFKC("\x{2002}") eq ' ');
-ok(NFKD("\x{2002}") eq ' ');
-ok(NFKC("\x{2000}") eq ' ');
-ok(NFKD("\x{2000}") eq ' ');
-
-ok(NFKC("\x{210C}") eq 'H');
-ok(NFKD("\x{210C}") eq 'H');
-ok(NFKC("\x{210D}") eq 'H');
-ok(NFKD("\x{210D}") eq 'H');
-
-ok(NFC("\x{F907}") eq "\x{9F9C}");
-ok(NFD("\x{F907}") eq "\x{9F9C}");
-ok(NFKC("\x{F907}") eq "\x{9F9C}");
-ok(NFKD("\x{F907}") eq "\x{9F9C}");
-
-# 72
-
diff --git a/cpan/Unicode-Normalize/t/tie.t b/cpan/Unicode-Normalize/t/tie.t
deleted file mode 100644
index 4fdd121e07..0000000000
--- a/cpan/Unicode-Normalize/t/tie.t
+++ /dev/null
@@ -1,82 +0,0 @@
-
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
- if ($ENV{PERL_CORE}) {
- chdir('t') if -d 't';
- @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
- }
-}
-
-#########################
-
-BEGIN {
- use Unicode::Normalize qw(:all);
-
- unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
- print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
- " needed for this test\n";
- print $@;
- exit;
- }
-}
-
-use strict;
-use warnings;
-BEGIN { $| = 1; print "1..17\n"; }
-my $count = 0;
-sub ok ($;$) {
- my $p = my $r = shift;
- if (@_) {
- my $x = shift;
- $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
- }
- print $p ? "ok" : "not ok", ' ', ++$count, "\n";
-}
-
-ok(1);
-
-package tiescalar;
-sub TIESCALAR {
- my ($class, $instance) = @_;
- return bless \$instance => $class;
-}
-sub FETCH { return ${$_[0]}++ }
-sub STORE { return ${$_[0]} = $_[1] }
-sub DESTROY { undef ${$_[0]} }
-
-#########################
-
-package main;
-
-tie my $tie1, 'tiescalar', "123";
-ok(NFD($tie1), 123);
-ok(NFC($tie1), 124);
-ok(NFKD($tie1), 125);
-ok(NFKC($tie1), 126);
-ok(FCD($tie1), 127);
-ok(FCC($tie1), 128);
-
-tie my $tie2, 'tiescalar', "256";
-ok(normalize('NFD', $tie2), 256);
-ok(normalize('NFC', $tie2), 257);
-ok(normalize('NFKD', $tie2), 258);
-ok(normalize('NFKC', $tie2), 259);
-ok(normalize('FCD', $tie2), 260);
-ok(normalize('FCC', $tie2), 261);
-
-tie my $tie3, 'tiescalar', "315";
-ok(decompose($tie3), 315);
-ok(reorder($tie3), 316);
-ok(compose($tie3), 317);
-ok(composeContiguous($tie3), 318);
-