summaryrefslogtreecommitdiff
path: root/dist/Unicode-Normalize
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-11-11 14:52:39 +0100
committerKarl Williamson <khw@cpan.org>2016-11-11 17:51:30 +0100
commit3baae3fab5dea1469c2d040f5380bc2009bdeecb (patch)
treef20f6c9da2a60da215c786c01db84d23516914f5 /dist/Unicode-Normalize
parentaf25b33d388e2824ad52b31c8f5c7bc722f02dd6 (diff)
downloadperl-3baae3fab5dea1469c2d040f5380bc2009bdeecb.tar.gz
Move Unicode-Normalize to dist/
p5p has taken over the maintenance of this module, so it should be in dist/
Diffstat (limited to 'dist/Unicode-Normalize')
-rw-r--r--dist/Unicode-Normalize/.gitignore1
-rw-r--r--dist/Unicode-Normalize/Makefile.PL55
-rw-r--r--dist/Unicode-Normalize/Normalize.pm635
-rw-r--r--dist/Unicode-Normalize/Normalize.xs925
-rw-r--r--dist/Unicode-Normalize/mkheader419
-rw-r--r--dist/Unicode-Normalize/t/fcdc.t138
-rw-r--r--dist/Unicode-Normalize/t/form.t84
-rw-r--r--dist/Unicode-Normalize/t/func.t386
-rw-r--r--dist/Unicode-Normalize/t/illegal.t85
-rw-r--r--dist/Unicode-Normalize/t/norm.t145
-rw-r--r--dist/Unicode-Normalize/t/null.t100
-rw-r--r--dist/Unicode-Normalize/t/partial1.t120
-rw-r--r--dist/Unicode-Normalize/t/partial2.t116
-rw-r--r--dist/Unicode-Normalize/t/proto.t99
-rw-r--r--dist/Unicode-Normalize/t/split.t147
-rw-r--r--dist/Unicode-Normalize/t/test.t168
-rw-r--r--dist/Unicode-Normalize/t/tie.t82
17 files changed, 3705 insertions, 0 deletions
diff --git a/dist/Unicode-Normalize/.gitignore b/dist/Unicode-Normalize/.gitignore
new file mode 100644
index 0000000000..424c745c12
--- /dev/null
+++ b/dist/Unicode-Normalize/.gitignore
@@ -0,0 +1 @@
+*.h
diff --git a/dist/Unicode-Normalize/Makefile.PL b/dist/Unicode-Normalize/Makefile.PL
new file mode 100644
index 0000000000..44a4b8ded8
--- /dev/null
+++ b/dist/Unicode-Normalize/Makefile.PL
@@ -0,0 +1,55 @@
+require 5.006001;
+use ExtUtils::MakeMaker;
+
+my $clean = {};
+
+my $mm_ver = ExtUtils::MakeMaker->VERSION;
+
+if (-f "Normalize.xs") {
+ print STDERR "Making header files for XS...\n";
+
+ do 'mkheader' or die $@ || "mkheader: $!";
+
+ $clean = { FILES => 'unfcan.h unfcmb.h unfcmp.h unfcpt.h unfexc.h' };
+}
+
+WriteMakefile(
+ ($mm_ver < 6.58)
+ ? ('AUTHOR' => 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>, Karl Williamson <khw@cpan.org>')
+ : ('AUTHOR' => [
+ 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>',
+ 'Karl Williamson <khw@cpan.org>',
+ ]),
+ 'ABSTRACT' => 'Unicode Normalization Forms',
+ 'INSTALLDIRS' => ($] >= 5.007002 && $] < 5.011) ? 'perl' : 'site',
+ # see perl5110delta, @INC reorganization
+ 'LICENSE' => 'perl',
+ 'NAME' => 'Unicode::Normalize',
+ 'VERSION_FROM' => 'Normalize.pm', # finds $VERSION
+ 'clean' => $clean,
+ 'depend' => { 'Normalize.o' => '$(H_FILES)' },
+ 'PREREQ_PM' => {
+ Carp => 0,
+ constant => 0,
+ DynaLoader => 0,
+ Exporter => 0,
+ File::Spec => 0,
+ strict => 0,
+ warnings => 0,
+ SelectSaver => 0,
+ },
+ ($mm_ver < 6.48 ? () : MIN_PERL_VERSION => 5.6.0),
+ ($mm_ver < 6.46 ? () : (META_MERGE => {
+ 'meta-spec' => { version => 2 },
+ resources => {
+ repository => {
+ url => 'https://github.com/khwilliamson/Unicode-Normalize.git',
+ web => 'https://github.com/khwilliamson/Unicode-Normalize',
+ type => 'git',
+ },
+ bugtracker => {
+ web => 'https://rt.cpan.org/Public/Dist/Display.html?Name=Unicode-Normalize',
+ },
+ },
+ })),
+);
diff --git a/dist/Unicode-Normalize/Normalize.pm b/dist/Unicode-Normalize/Normalize.pm
new file mode 100644
index 0000000000..b53f5c728b
--- /dev/null
+++ b/dist/Unicode-Normalize/Normalize.pm
@@ -0,0 +1,635 @@
+package Unicode::Normalize;
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ die "Unicode::Normalize cannot stringify a Unicode code point\n";
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ die "Unicode::Normalize cannot get Unicode code point\n";
+ }
+}
+
+use 5.006;
+use strict;
+use warnings;
+use Carp;
+
+no warnings 'utf8';
+
+our $VERSION = '1.25';
+our $PACKAGE = __PACKAGE__;
+
+our @EXPORT = qw( NFC NFD NFKC NFKD );
+our @EXPORT_OK = qw(
+ normalize decompose reorder compose
+ checkNFD checkNFKD checkNFC checkNFKC check
+ getCanon getCompat getComposite getCombinClass
+ isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex
+ isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE
+ FCD checkFCD FCC checkFCC composeContiguous splitOnLastStarter
+ normalize_partial NFC_partial NFD_partial NFKC_partial NFKD_partial
+);
+our %EXPORT_TAGS = (
+ all => [ @EXPORT, @EXPORT_OK ],
+ normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ],
+ check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ],
+ fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ],
+);
+
+##
+## utilities for tests
+##
+
+sub pack_U {
+ return pack('U*', @_);
+}
+
+sub unpack_U {
+
+ # The empty pack returns an empty UTF-8 string, so the effect is to force
+ # the shifted parameter into being UTF-8. This allows this to work on
+ # Perl 5.6, where there is no utf8::upgrade().
+ return unpack('U*', shift(@_).pack('U*'));
+}
+
+require Exporter;
+
+##### The above part is common to XS and PP #####
+
+our @ISA = qw(Exporter DynaLoader);
+require DynaLoader;
+bootstrap Unicode::Normalize $VERSION;
+
+##### The below part is common to XS and PP #####
+
+##
+## normalize
+##
+
+sub FCD ($) {
+ my $str = shift;
+ return checkFCD($str) ? $str : NFD($str);
+}
+
+our %formNorm = (
+ NFC => \&NFC, C => \&NFC,
+ NFD => \&NFD, D => \&NFD,
+ NFKC => \&NFKC, KC => \&NFKC,
+ NFKD => \&NFKD, KD => \&NFKD,
+ FCD => \&FCD, FCC => \&FCC,
+);
+
+sub normalize($$)
+{
+ my $form = shift;
+ my $str = shift;
+ if (exists $formNorm{$form}) {
+ return $formNorm{$form}->($str);
+ }
+ croak($PACKAGE."::normalize: invalid form name: $form");
+}
+
+##
+## partial
+##
+
+sub normalize_partial ($$) {
+ if (exists $formNorm{$_[0]}) {
+ my $n = normalize($_[0], $_[1]);
+ my($p, $u) = splitOnLastStarter($n);
+ $_[1] = $u;
+ return $p;
+ }
+ croak($PACKAGE."::normalize_partial: invalid form name: $_[0]");
+}
+
+sub NFD_partial ($) { return normalize_partial('NFD', $_[0]) }
+sub NFC_partial ($) { return normalize_partial('NFC', $_[0]) }
+sub NFKD_partial($) { return normalize_partial('NFKD',$_[0]) }
+sub NFKC_partial($) { return normalize_partial('NFKC',$_[0]) }
+
+##
+## check
+##
+
+our %formCheck = (
+ NFC => \&checkNFC, C => \&checkNFC,
+ NFD => \&checkNFD, D => \&checkNFD,
+ NFKC => \&checkNFKC, KC => \&checkNFKC,
+ NFKD => \&checkNFKD, KD => \&checkNFKD,
+ FCD => \&checkFCD, FCC => \&checkFCC,
+);
+
+sub check($$)
+{
+ my $form = shift;
+ my $str = shift;
+ if (exists $formCheck{$form}) {
+ return $formCheck{$form}->($str);
+ }
+ croak($PACKAGE."::check: invalid form name: $form");
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Unicode::Normalize - Unicode Normalization Forms
+
+=head1 SYNOPSIS
+
+(1) using function names exported by default:
+
+ use Unicode::Normalize;
+
+ $NFD_string = NFD($string); # Normalization Form D
+ $NFC_string = NFC($string); # Normalization Form C
+ $NFKD_string = NFKD($string); # Normalization Form KD
+ $NFKC_string = NFKC($string); # Normalization Form KC
+
+(2) using function names exported on request:
+
+ use Unicode::Normalize 'normalize';
+
+ $NFD_string = normalize('D', $string); # Normalization Form D
+ $NFC_string = normalize('C', $string); # Normalization Form C
+ $NFKD_string = normalize('KD', $string); # Normalization Form KD
+ $NFKC_string = normalize('KC', $string); # Normalization Form KC
+
+=head1 DESCRIPTION
+
+Parameters:
+
+C<$string> is used as a string under character semantics (see L<perlunicode>).
+
+C<$code_point> should be an unsigned integer representing a Unicode code point.
+
+Note: Between XSUB and pure Perl, there is an incompatibility
+about the interpretation of C<$code_point> as a decimal number.
+XSUB converts C<$code_point> to an unsigned integer, but pure Perl does not.
+Do not use a floating point nor a negative sign in C<$code_point>.
+
+=head2 Normalization Forms
+
+=over 4
+
+=item C<$NFD_string = NFD($string)>
+
+It returns the Normalization Form D (formed by canonical decomposition).
+
+=item C<$NFC_string = NFC($string)>
+
+It returns the Normalization Form C (formed by canonical decomposition
+followed by canonical composition).
+
+=item C<$NFKD_string = NFKD($string)>
+
+It returns the Normalization Form KD (formed by compatibility decomposition).
+
+=item C<$NFKC_string = NFKC($string)>
+
+It returns the Normalization Form KC (formed by compatibility decomposition
+followed by B<canonical> composition).
+
+=item C<$FCD_string = FCD($string)>
+
+If the given string is in FCD ("Fast C or D" form; cf. UTN #5),
+it returns the string without modification; otherwise it returns an FCD string.
+
+Note: FCD is not always unique, then plural forms may be equivalent
+each other. C<FCD()> will return one of these equivalent forms.
+
+=item C<$FCC_string = FCC($string)>
+
+It returns the FCC form ("Fast C Contiguous"; cf. UTN #5).
+
+Note: FCC is unique, as well as four normalization forms (NF*).
+
+=item C<$normalized_string = normalize($form_name, $string)>
+
+It returns the normalization form of C<$form_name>.
+
+As C<$form_name>, one of the following names must be given.
+
+ 'C' or 'NFC' for Normalization Form C (UAX #15)
+ 'D' or 'NFD' for Normalization Form D (UAX #15)
+ 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
+ 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
+
+ 'FCD' for "Fast C or D" Form (UTN #5)
+ 'FCC' for "Fast C Contiguous" (UTN #5)
+
+=back
+
+=head2 Decomposition and Composition
+
+=over 4
+
+=item C<$decomposed_string = decompose($string [, $useCompatMapping])>
+
+It returns the concatenation of the decomposition of each character
+in the string.
+
+If the second parameter (a boolean) is omitted or false,
+the decomposition is canonical decomposition;
+if the second parameter (a boolean) is true,
+the decomposition is compatibility decomposition.
+
+The string returned is not always in NFD/NFKD. Reordering may be required.
+
+ $NFD_string = reorder(decompose($string)); # eq. to NFD()
+ $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD()
+
+=item C<$reordered_string = reorder($string)>
+
+It returns the result of reordering the combining characters
+according to Canonical Ordering Behavior.
+
+For example, when you have a list of NFD/NFKD strings,
+you can get the concatenated NFD/NFKD string from them, by saying
+
+ $concat_NFD = reorder(join '', @NFD_strings);
+ $concat_NFKD = reorder(join '', @NFKD_strings);
+
+=item C<$composed_string = compose($string)>
+
+It returns the result of canonical composition
+without applying any decomposition.
+
+For example, when you have a NFD/NFKD string,
+you can get its NFC/NFKC string, by saying
+
+ $NFC_string = compose($NFD_string);
+ $NFKC_string = compose($NFKD_string);
+
+=item C<($processed, $unprocessed) = splitOnLastStarter($normalized)>
+
+It returns two strings: the first one, C<$processed>, is a part
+before the last starter, and the second one, C<$unprocessed> is
+another part after the first part. A starter is a character having
+a combining class of zero (see UAX #15).
+
+Note that C<$processed> may be empty (when C<$normalized> contains no
+starter or starts with the last starter), and then C<$unprocessed>
+should be equal to the entire C<$normalized>.
+
+When you have a C<$normalized> string and an C<$unnormalized> string
+following it, a simple concatenation is wrong:
+
+ $concat = $normalized . normalize($form, $unnormalized); # wrong!
+
+Instead of it, do like this:
+
+ ($processed, $unprocessed) = splitOnLastStarter($normalized);
+ $concat = $processed . normalize($form,$unprocessed.$unnormalized);
+
+C<splitOnLastStarter()> should be called with a pre-normalized parameter
+C<$normalized>, that is in the same form as C<$form> you want.
+
+If you have an array of C<@string> that should be concatenated and then
+normalized, you can do like this:
+
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ my $n = normalize($form, $unproc);
+ my($p, $u) = splitOnLastStarter($n);
+ $result .= $p;
+ $unproc = $u;
+ }
+ $result .= $unproc;
+ # instead of normalize($form, join('', @string))
+
+=item C<$processed = normalize_partial($form, $unprocessed)>
+
+A wrapper for the combination of C<normalize()> and C<splitOnLastStarter()>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+If you have an array of C<@string> that should be concatenated and then
+normalized, you can do like this:
+
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ $result .= normalize_partial($form, $unproc);
+ }
+ $result .= $unproc;
+ # instead of normalize($form, join('', @string))
+
+=item C<$processed = NFD_partial($unprocessed)>
+
+It does like C<normalize_partial('NFD', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFC_partial($unprocessed)>
+
+It does like C<normalize_partial('NFC', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFKD_partial($unprocessed)>
+
+It does like C<normalize_partial('NFKD', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=item C<$processed = NFKC_partial($unprocessed)>
+
+It does like C<normalize_partial('NFKC', $unprocessed)>.
+Note that C<$unprocessed> will be modified as a side-effect.
+
+=back
+
+=head2 Quick Check
+
+(see Annex 8, UAX #15; and F<DerivedNormalizationProps.txt>)
+
+The following functions check whether the string is in that normalization form.
+
+The result returned will be one of the following:
+
+ YES The string is in that normalization form.
+ NO The string is not in that normalization form.
+ MAYBE Dubious. Maybe yes, maybe no.
+
+=over 4
+
+=item C<$result = checkNFD($string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>.
+
+=item C<$result = checkNFC($string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
+C<undef> if C<MAYBE>.
+
+=item C<$result = checkNFKD($string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>.
+
+=item C<$result = checkNFKC($string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
+C<undef> if C<MAYBE>.
+
+=item C<$result = checkFCD($string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>.
+
+=item C<$result = checkFCC($string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
+C<undef> if C<MAYBE>.
+
+Note: If a string is not in FCD, it must not be in FCC.
+So C<checkFCC($not_FCD_string)> should return C<NO>.
+
+=item C<$result = check($form_name, $string)>
+
+It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>;
+C<undef> if C<MAYBE>.
+
+As C<$form_name>, one of the following names must be given.
+
+ 'C' or 'NFC' for Normalization Form C (UAX #15)
+ 'D' or 'NFD' for Normalization Form D (UAX #15)
+ 'KC' or 'NFKC' for Normalization Form KC (UAX #15)
+ 'KD' or 'NFKD' for Normalization Form KD (UAX #15)
+
+ 'FCD' for "Fast C or D" Form (UTN #5)
+ 'FCC' for "Fast C Contiguous" (UTN #5)
+
+=back
+
+B<Note>
+
+In the cases of NFD, NFKD, and FCD, the answer must be
+either C<YES> or C<NO>. The answer C<MAYBE> may be returned
+in the cases of NFC, NFKC, and FCC.
+
+A C<MAYBE> string should contain at least one combining character
+or the like. For example, C<COMBINING ACUTE ACCENT> has
+the MAYBE_NFC/MAYBE_NFKC property.
+
+Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")>
+and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>.
+C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC
+(its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">),
+while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC.
+
+If you want to check exactly, compare the string with its NFC/NFKC/FCC.
+
+ if ($string eq NFC($string)) {
+ # $string is exactly normalized in NFC;
+ } else {
+ # $string is not normalized in NFC;
+ }
+
+ if ($string eq NFKC($string)) {
+ # $string is exactly normalized in NFKC;
+ } else {
+ # $string is not normalized in NFKC;
+ }
+
+=head2 Character Data
+
+These functions are interface of character data used internally.
+If you want only to get Unicode normalization forms, you don't need
+call them yourself.
+
+=over 4
+
+=item C<$canonical_decomposition = getCanon($code_point)>
+
+If the character is canonically decomposable (including Hangul Syllables),
+it returns the (full) canonical decomposition as a string.
+Otherwise it returns C<undef>.
+
+B<Note:> According to the Unicode standard, the canonical decomposition
+of the character that is not canonically decomposable is same as
+the character itself.
+
+=item C<$compatibility_decomposition = getCompat($code_point)>
+
+If the character is compatibility decomposable (including Hangul Syllables),
+it returns the (full) compatibility decomposition as a string.
+Otherwise it returns C<undef>.
+
+B<Note:> According to the Unicode standard, the compatibility decomposition
+of the character that is not compatibility decomposable is same as
+the character itself.
+
+=item C<$code_point_composite = getComposite($code_point_here, $code_point_next)>
+
+If two characters here and next (as code points) are composable
+(including Hangul Jamo/Syllables and Composition Exclusions),
+it returns the code point of the composite.
+
+If they are not composable, it returns C<undef>.
+
+=item C<$combining_class = getCombinClass($code_point)>
+
+It returns the combining class (as an integer) of the character.
+
+=item C<$may_be_composed_with_prev_char = isComp2nd($code_point)>
+
+It returns a boolean whether the character of the specified codepoint
+may be composed with the previous one in a certain composition
+(including Hangul Compositions, but excluding
+Composition Exclusions and Non-Starter Decompositions).
+
+=item C<$is_exclusion = isExclusion($code_point)>
+
+It returns a boolean whether the code point is a composition exclusion.
+
+=item C<$is_singleton = isSingleton($code_point)>
+
+It returns a boolean whether the code point is a singleton
+
+=item C<$is_non_starter_decomposition = isNonStDecomp($code_point)>
+
+It returns a boolean whether the code point has Non-Starter Decomposition.
+
+=item C<$is_Full_Composition_Exclusion = isComp_Ex($code_point)>
+
+It returns a boolean of the derived property Comp_Ex
+(Full_Composition_Exclusion). This property is generated from
+Composition Exclusions + Singletons + Non-Starter Decompositions.
+
+=item C<$NFD_is_NO = isNFD_NO($code_point)>
+
+It returns a boolean of the derived property NFD_NO
+(NFD_Quick_Check=No).
+
+=item C<$NFC_is_NO = isNFC_NO($code_point)>
+
+It returns a boolean of the derived property NFC_NO
+(NFC_Quick_Check=No).
+
+=item C<$NFC_is_MAYBE = isNFC_MAYBE($code_point)>
+
+It returns a boolean of the derived property NFC_MAYBE
+(NFC_Quick_Check=Maybe).
+
+=item C<$NFKD_is_NO = isNFKD_NO($code_point)>
+
+It returns a boolean of the derived property NFKD_NO
+(NFKD_Quick_Check=No).
+
+=item C<$NFKC_is_NO = isNFKC_NO($code_point)>
+
+It returns a boolean of the derived property NFKC_NO
+(NFKC_Quick_Check=No).
+
+=item C<$NFKC_is_MAYBE = isNFKC_MAYBE($code_point)>
+
+It returns a boolean of the derived property NFKC_MAYBE
+(NFKC_Quick_Check=Maybe).
+
+=back
+
+=head1 EXPORT
+
+C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
+
+C<normalize> and other some functions: on request.
+
+=head1 CAVEATS
+
+=over 4
+
+=item Perl's version vs. Unicode version
+
+Since this module refers to perl core's Unicode database in the directory
+F</lib/unicore> (or formerly F</lib/unicode>), the Unicode version of
+normalization implemented by this module depends on what has been
+compiled into your perl. The following table lists the default Unicode
+version that comes with various perl versions. (It is possible to change
+the Unicode version in any perl version to be any earlier Unicode version,
+so one could cause Unicode 3.2 to be used in any perl version starting with
+5.8.0. Read F<C<$Config{privlib}>/unicore/README.perl> for details.
+
+ perl's version implemented Unicode version
+ 5.6.1 3.0.1
+ 5.7.2 3.1.0
+ 5.7.3 3.1.1 (normalization is same as 3.1.0)
+ 5.8.0 3.2.0
+ 5.8.1-5.8.3 4.0.0
+ 5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0)
+ 5.8.7-5.8.8 4.1.0
+ 5.10.0 5.0.0
+ 5.8.9, 5.10.1 5.1.0
+ 5.12.x 5.2.0
+ 5.14.x 6.0.0
+ 5.16.x 6.1.0
+ 5.18.x 6.2.0
+ 5.20.x 6.3.0
+ 5.22.x 7.0.0
+
+=item Correction of decomposition mapping
+
+In older Unicode versions, a small number of characters (all of which are
+CJK compatibility ideographs as far as they have been found) may have
+an erroneous decomposition mapping (see F<NormalizationCorrections.txt>).
+Anyhow, this module will neither refer to F<NormalizationCorrections.txt>
+nor provide any specific version of normalization. Therefore this module
+running on an older perl with an older Unicode database may use
+the erroneous decomposition mapping blindly conforming to the Unicode database.
+
+=item Revised definition of canonical composition
+
+In Unicode 4.1.0, the definition D2 of canonical composition (which
+affects NFC and NFKC) has been changed (see Public Review Issue #29
+and recent UAX #15). This module has used the newer definition
+since the version 0.07 (Oct 31, 2001).
+This module will not support the normalization according to the older
+definition, even if the Unicode version implemented by perl is
+lower than 4.1.0.
+
+=back
+
+=head1 AUTHOR
+
+SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
+
+Currently maintained by <perl5-porters@perl.org>
+
+Copyright(C) 2001-2012, SADAHIRO Tomoyuki. Japan. All rights reserved.
+
+=head1 LICENSE
+
+This module is free software; you can redistribute it
+and/or modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+=over 4
+
+=item http://www.unicode.org/reports/tr15/
+
+Unicode Normalization Forms - UAX #15
+
+=item http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt
+
+Composition Exclusion Table
+
+=item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt
+
+Derived Normalization Properties
+
+=item http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt
+
+Normalization Corrections
+
+=item http://www.unicode.org/review/pr-29.html
+
+Public Review Issue #29: Normalization Issue
+
+=item http://www.unicode.org/notes/tn5/
+
+Canonical Equivalence in Applications - UTN #5
+
+=back
+
+=cut
diff --git a/dist/Unicode-Normalize/Normalize.xs b/dist/Unicode-Normalize/Normalize.xs
new file mode 100644
index 0000000000..4acff7fe49
--- /dev/null
+++ b/dist/Unicode-Normalize/Normalize.xs
@@ -0,0 +1,925 @@
+
+#define PERL_NO_GET_CONTEXT /* we want efficiency */
+
+/* private functions which need pTHX_ and aTHX_
+ pv_cat_decompHangul
+ sv_2pvunicode
+ pv_utf8_decompose
+ pv_utf8_reorder
+ pv_utf8_compose
+*/
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+/* These 5 files are prepared by mkheader */
+#include "unfcmb.h"
+#include "unfcan.h"
+#include "unfcpt.h"
+#include "unfcmp.h"
+#include "unfexc.h"
+
+/* The generated normalization tables since v5.20 are in native character set
+ * terms. Prior to that, they were in Unicode terms. So we use 'uvchr' for
+ * later perls, and redefine that to be 'uvuni' for earlier ones */
+#if PERL_VERSION < 20
+# undef uvchr_to_utf8
+# ifdef uvuni_to_utf8
+# define uvchr_to_utf8 uvuni_to_utf8
+# else /* Perl 5.6.1 */
+# define uvchr_to_utf8 uv_to_utf8
+# endif
+
+# undef utf8n_to_uvchr
+# ifdef utf8n_to_uvuni
+# define utf8n_to_uvchr utf8n_to_uvuni
+# else /* Perl 5.6.1 */
+# define utf8n_to_uvchr utf8_to_uv
+# endif
+#endif
+
+/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
+#ifndef UTF8_ALLOW_BOM
+#define UTF8_ALLOW_BOM (0)
+#endif /* UTF8_ALLOW_BOM */
+
+#ifndef UTF8_ALLOW_SURROGATE
+#define UTF8_ALLOW_SURROGATE (0)
+#endif /* UTF8_ALLOW_SURROGATE */
+
+#ifndef UTF8_ALLOW_FE_FF
+#define UTF8_ALLOW_FE_FF (0)
+#endif /* UTF8_ALLOW_FE_FF */
+
+#ifndef UTF8_ALLOW_FFFF
+#define UTF8_ALLOW_FFFF (0)
+#endif /* UTF8_ALLOW_FFFF */
+
+#ifndef PERL_UNUSED_VAR
+# define PERL_UNUSED_VAR(x) ((void)sizeof(x))
+#endif
+
+#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF)
+
+/* check if the string buffer is enough before uvchr_to_utf8(). */
+/* dstart, d, and dlen should be defined outside before. */
+#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \
+ if (dlen < curlen + (need)) { \
+ dlen += (need); \
+ Renew(dstart, dlen+1, U8); \
+ d = dstart + curlen; \
+ }
+
+/* if utf8n_to_uvchr() sets retlen to 0 (if broken?) */
+#define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character"
+
+/* utf8_hop() hops back before start. Maybe broken UTF-8 */
+#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
+
+/* At present, char > 0x10ffff are unaffected without complaint, right? */
+#define VALID_UTF_MAX (0x10ffff)
+#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
+
+/* size of array for combining characters */
+/* enough as an initial value? */
+#define CC_SEQ_SIZE (10)
+#define CC_SEQ_STEP (5)
+
+/* HANGUL begin */
+#define Hangul_SBase 0xAC00
+#define Hangul_SFinal 0xD7A3
+#define Hangul_SCount 11172
+
+#define Hangul_NCount 588
+
+#define Hangul_LBase 0x1100
+#define Hangul_LFinal 0x1112
+#define Hangul_LCount 19
+
+#define Hangul_VBase 0x1161
+#define Hangul_VFinal 0x1175
+#define Hangul_VCount 21
+
+#define Hangul_TBase 0x11A7
+#define Hangul_TFinal 0x11C2
+#define Hangul_TCount 28
+
+#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
+#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0)
+#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
+#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
+#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
+#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal))
+/* HANGUL end */
+
+/* this is used for canonical ordering of combining characters (c.c.). */
+typedef struct {
+ U8 cc; /* combining class */
+ UV uv; /* codepoint */
+ STRLEN pos; /* position */
+} UNF_cc;
+
+static int compare_cc(const void *a, const void *b)
+{
+ int ret_cc;
+ ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc;
+ if (ret_cc)
+ return ret_cc;
+
+ return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos )
+ - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos );
+}
+
+static U8* dec_canonical(UV uv)
+{
+ U8 ***plane, **row;
+ if (OVER_UTF_MAX(uv))
+ return NULL;
+ plane = (U8***)UNF_canon[uv >> 16];
+ if (! plane)
+ return NULL;
+ row = plane[(uv >> 8) & 0xff];
+ return row ? row[uv & 0xff] : NULL;
+}
+
+static U8* dec_compat(UV uv)
+{
+ U8 ***plane, **row;
+ if (OVER_UTF_MAX(uv))
+ return NULL;
+ plane = (U8***)UNF_compat[uv >> 16];
+ if (! plane)
+ return NULL;
+ row = plane[(uv >> 8) & 0xff];
+ return row ? row[uv & 0xff] : NULL;
+}
+
+static UV composite_uv(UV uv, UV uv2)
+{
+ UNF_complist ***plane, **row, *cell, *i;
+
+ if (!uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2))
+ return 0;
+
+ if (Hangul_IsL(uv) && Hangul_IsV(uv2)) {
+ UV lindex = uv - Hangul_LBase;
+ UV vindex = uv2 - Hangul_VBase;
+ return(Hangul_SBase + (lindex * Hangul_VCount + vindex) *
+ Hangul_TCount);
+ }
+ if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
+ UV tindex = uv2 - Hangul_TBase;
+ return(uv + tindex);
+ }
+ plane = UNF_compos[uv >> 16];
+ if (! plane)
+ return 0;
+ row = plane[(uv >> 8) & 0xff];
+ if (! row)
+ return 0;
+ cell = row[uv & 0xff];
+ if (! cell)
+ return 0;
+ for (i = cell; i->nextchar; i++) {
+ if (uv2 == i->nextchar)
+ return i->composite;
+ }
+ return 0;
+}
+
+static U8 getCombinClass(UV uv)
+{
+ U8 **plane, *row;
+ if (OVER_UTF_MAX(uv))
+ return 0;
+ plane = (U8**)UNF_combin[uv >> 16];
+ if (! plane)
+ return 0;
+ row = plane[(uv >> 8) & 0xff];
+ return row ? row[uv & 0xff] : 0;
+}
+
+static U8* pv_cat_decompHangul(pTHX_ U8* d, UV uv)
+{
+ UV sindex = uv - Hangul_SBase;
+ UV lindex = sindex / Hangul_NCount;
+ UV vindex = (sindex % Hangul_NCount) / Hangul_TCount;
+ UV tindex = sindex % Hangul_TCount;
+
+ if (! Hangul_IsS(uv))
+ return d;
+
+ d = uvchr_to_utf8(d, (lindex + Hangul_LBase));
+ d = uvchr_to_utf8(d, (vindex + Hangul_VBase));
+ if (tindex)
+ d = uvchr_to_utf8(d, (tindex + Hangul_TBase));
+ return d;
+}
+
+static char* sv_2pvunicode(pTHX_ SV *sv, STRLEN *lp)
+{
+ char *s;
+ STRLEN len;
+ s = SvPV(sv,len);
+ if (!SvUTF8(sv)) {
+ SV* tmpsv = sv_2mortal(newSVpvn(s, len));
+ if (!SvPOK(tmpsv))
+ s = SvPV_force(tmpsv,len);
+ sv_utf8_upgrade(tmpsv);
+ s = SvPV(tmpsv,len);
+ }
+ if (lp)
+ *lp = len;
+ return s;
+}
+
+static
+U8* pv_utf8_decompose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat)
+{
+ U8* p = s;
+ U8* e = s + slen;
+ U8* dstart = *dp;
+ U8* d = dstart;
+
+ while (p < e) {
+ STRLEN retlen;
+ UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero, "decompose");
+ p += retlen;
+
+ if (Hangul_IsS(uv)) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN * 3)
+ d = pv_cat_decompHangul(aTHX_ d, uv);
+ }
+ else {
+ U8* r = iscompat ? dec_compat(uv) : dec_canonical(uv);
+
+ if (r) {
+ STRLEN len = (STRLEN)strlen((char *)r);
+ Renew_d_if_not_enough_to(len)
+ while (len--)
+ *d++ = *r++;
+ }
+ else {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvchr_to_utf8(d, uv);
+ }
+ }
+ }
+ *dp = dstart;
+ return d;
+}
+
+static
+U8* pv_utf8_reorder(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen)
+{
+ U8* p = s;
+ U8* e = s + slen;
+ U8* dstart = *dp;
+ U8* d = dstart;
+
+ UNF_cc seq_ary[CC_SEQ_SIZE];
+ UNF_cc* seq_ptr = seq_ary; /* use array at the beginning */
+ UNF_cc* seq_ext = NULL; /* extend if need */
+ STRLEN seq_max = CC_SEQ_SIZE;
+ STRLEN cc_pos = 0;
+
+ while (p < e) {
+ U8 curCC;
+ STRLEN retlen;
+ UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero, "reorder");
+ p += retlen;
+
+ curCC = getCombinClass(uv);
+
+ if (curCC != 0) {
+ if (seq_max < cc_pos + 1) { /* extend if need */
+ seq_max = cc_pos + CC_SEQ_STEP; /* new size */
+ if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */
+ STRLEN i;
+ New(0, seq_ext, seq_max, UNF_cc);
+ for (i = 0; i < cc_pos; i++)
+ seq_ext[i] = seq_ary[i];
+ }
+ else {
+ Renew(seq_ext, seq_max, UNF_cc);
+ }
+ seq_ptr = seq_ext; /* use seq_ext from now */
+ }
+
+ seq_ptr[cc_pos].cc = curCC;
+ seq_ptr[cc_pos].uv = uv;
+ seq_ptr[cc_pos].pos = cc_pos;
+ ++cc_pos;
+
+ if (p < e)
+ continue;
+ }
+
+ /* output */
+ if (cc_pos) {
+ STRLEN i;
+
+ if (cc_pos > 1) /* reordered if there are two c.c.'s */
+ qsort((void*)seq_ptr, cc_pos, sizeof(UNF_cc), compare_cc);
+
+ for (i = 0; i < cc_pos; i++) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvchr_to_utf8(d, seq_ptr[i].uv);
+ }
+ cc_pos = 0;
+ }
+
+ if (curCC == 0) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvchr_to_utf8(d, uv);
+ }
+ }
+ if (seq_ext)
+ Safefree(seq_ext);
+ *dp = dstart;
+ return d;
+}
+
+static
+U8* pv_utf8_compose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscontig)
+{
+ U8* p = s;
+ U8* e = s + slen;
+ U8* dstart = *dp;
+ U8* d = dstart;
+
+ UV uvS = 0; /* code point of the starter */
+ bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */
+ U8 preCC = 0;
+
+ UV seq_ary[CC_SEQ_SIZE];
+ UV* seq_ptr = seq_ary; /* use array at the beginning */
+ UV* seq_ext = NULL; /* extend if need */
+ STRLEN seq_max = CC_SEQ_SIZE;
+ STRLEN cc_pos = 0;
+
+ while (p < e) {
+ U8 curCC;
+ STRLEN retlen;
+ UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero, "compose");
+ p += retlen;
+
+ curCC = getCombinClass(uv);
+
+ if (!valid_uvS) {
+ if (curCC == 0) {
+ uvS = uv; /* the first Starter is found */
+ valid_uvS = TRUE;
+ if (p < e)
+ continue;
+ }
+ else {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvchr_to_utf8(d, uv);
+ continue;
+ }
+ }
+ else {
+ bool composed;
+
+ /* blocked */
+ if ((iscontig && cc_pos) || /* discontiguous combination */
+ (curCC != 0 && preCC == curCC) || /* blocked by same CC */
+ (preCC > curCC)) /* blocked by higher CC: revised D2 */
+ composed = FALSE;
+
+ /* not blocked:
+ iscontig && cc_pos == 0 -- contiguous combination
+ curCC == 0 && preCC == 0 -- starter + starter
+ curCC != 0 && preCC < curCC -- lower CC */
+ else {
+ /* try composition */
+ UV uvComp = composite_uv(uvS, uv);
+
+ if (uvComp && !isExclusion(uvComp)) {
+ uvS = uvComp;
+ composed = TRUE;
+
+ /* preCC should not be changed to curCC */
+ /* e.g. 1E14 = 0045 0304 0300 where CC(0304) == CC(0300) */
+ if (p < e)
+ continue;
+ }
+ else
+ composed = FALSE;
+ }
+
+ if (!composed) {
+ preCC = curCC;
+ if (curCC != 0 || !(p < e)) {
+ if (seq_max < cc_pos + 1) { /* extend if need */
+ seq_max = cc_pos + CC_SEQ_STEP; /* new size */
+ if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */
+ New(0, seq_ext, seq_max, UV);
+ Copy(seq_ary, seq_ext, cc_pos, UV);
+ }
+ else {
+ Renew(seq_ext, seq_max, UV);
+ }
+ seq_ptr = seq_ext; /* use seq_ext from now */
+ }
+ seq_ptr[cc_pos] = uv;
+ ++cc_pos;
+ }
+ if (curCC != 0 && p < e)
+ continue;
+ }
+ }
+
+ /* output */
+ {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvchr_to_utf8(d, uvS); /* starter (composed or not) */
+ }
+
+ if (cc_pos) {
+ STRLEN i;
+
+ for (i = 0; i < cc_pos; i++) {
+ Renew_d_if_not_enough_to(UTF8_MAXLEN)
+ d = uvchr_to_utf8(d, seq_ptr[i]);
+ }
+ cc_pos = 0;
+ }
+
+ uvS = uv;
+ }
+ if (seq_ext)
+ Safefree(seq_ext);
+ *dp = dstart;
+ return d;
+}
+
+MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
+
+SV*
+decompose(src, compat = &PL_sv_no)
+ SV * src
+ SV * compat
+ PROTOTYPE: $;$
+ PREINIT:
+ SV* dst;
+ U8 *s, *d, *dend;
+ STRLEN slen, dlen;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
+ dst = newSVpvn("", 0);
+ dlen = slen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_decompose(aTHX_ s, slen, &d, dlen, (bool)SvTRUE(compat));
+ sv_setpvn(dst, (char *)d, dend - d);
+ SvUTF8_on(dst);
+ Safefree(d);
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
+
+
+SV*
+reorder(src)
+ SV * src
+ PROTOTYPE: $
+ PREINIT:
+ SV* dst;
+ U8 *s, *d, *dend;
+ STRLEN slen, dlen;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
+ dst = newSVpvn("", 0);
+ dlen = slen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_reorder(aTHX_ s, slen, &d, dlen);
+ sv_setpvn(dst, (char *)d, dend - d);
+ SvUTF8_on(dst);
+ Safefree(d);
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
+
+
+SV*
+compose(src)
+ SV * src
+ PROTOTYPE: $
+ ALIAS:
+ composeContiguous = 1
+ PREINIT:
+ SV* dst;
+ U8 *s, *d, *dend;
+ STRLEN slen, dlen;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
+ dst = newSVpvn("", 0);
+ dlen = slen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_compose(aTHX_ s, slen, &d, dlen, (bool)ix);
+ sv_setpvn(dst, (char *)d, dend - d);
+ SvUTF8_on(dst);
+ Safefree(d);
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
+
+
+SV*
+NFD(src)
+ SV * src
+ PROTOTYPE: $
+ ALIAS:
+ NFKD = 1
+ PREINIT:
+ SV *dst;
+ U8 *s, *t, *tend, *d, *dend;
+ STRLEN slen, tlen, dlen;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
+
+ /* decompose */
+ tlen = slen;
+ New(0, t, tlen+1, U8);
+ tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1));
+ *tend = '\0';
+ tlen = tend - t; /* no longer know real size of t */
+
+ /* reorder */
+ dlen = tlen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_reorder(aTHX_ t, tlen, &d, dlen);
+ *dend = '\0';
+ dlen = dend - d; /* no longer know real size of d */
+
+ /* return */
+ dst = newSVpvn("", 0);
+ sv_setpvn(dst, (char *)d, dlen);
+ SvUTF8_on(dst);
+
+ Safefree(t);
+ Safefree(d);
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
+
+
+SV*
+NFC(src)
+ SV * src
+ PROTOTYPE: $
+ ALIAS:
+ NFKC = 1
+ FCC = 2
+ PREINIT:
+ SV *dst;
+ U8 *s, *t, *tend, *u, *uend, *d, *dend;
+ STRLEN slen, tlen, ulen, dlen;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&slen);
+
+ /* decompose */
+ tlen = slen;
+ New(0, t, tlen+1, U8);
+ tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1));
+ *tend = '\0';
+ tlen = tend - t; /* no longer know real size of t */
+
+ /* reorder */
+ ulen = tlen;
+ New(0, u, ulen+1, U8);
+ uend = pv_utf8_reorder(aTHX_ t, tlen, &u, ulen);
+ *uend = '\0';
+ ulen = uend - u; /* no longer know real size of u */
+
+ /* compose */
+ dlen = ulen;
+ New(0, d, dlen+1, U8);
+ dend = pv_utf8_compose(aTHX_ u, ulen, &d, dlen, (bool)(ix==2));
+ *dend = '\0';
+ dlen = dend - d; /* no longer know real size of d */
+
+ /* return */
+ dst = newSVpvn("", 0);
+ sv_setpvn(dst, (char *)d, dlen);
+ SvUTF8_on(dst);
+
+ Safefree(t);
+ Safefree(u);
+ Safefree(d);
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
+
+
+SV*
+checkNFD(src)
+ SV * src
+ PROTOTYPE: $
+ ALIAS:
+ checkNFKD = 1
+ PREINIT:
+ STRLEN srclen, retlen;
+ U8 *s, *e, *p, curCC, preCC;
+ bool result = TRUE;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
+ e = s + srclen;
+
+ preCC = 0;
+ for (p = s; p < e; p += retlen) {
+ UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero, "checkNFD or -NFKD");
+
+ curCC = getCombinClass(uv);
+ if (preCC > curCC && curCC != 0) { /* canonical ordering violated */
+ result = FALSE;
+ break;
+ }
+ if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) {
+ result = FALSE;
+ break;
+ }
+ preCC = curCC;
+ }
+ RETVAL = boolSV(result);
+ OUTPUT:
+ RETVAL
+
+
+SV*
+checkNFC(src)
+ SV * src
+ PROTOTYPE: $
+ ALIAS:
+ checkNFKC = 1
+ PREINIT:
+ STRLEN srclen, retlen;
+ U8 *s, *e, *p, curCC, preCC;
+ bool result = TRUE;
+ bool isMAYBE = FALSE;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
+ e = s + srclen;
+
+ preCC = 0;
+ for (p = s; p < e; p += retlen) {
+ UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero, "checkNFC or -NFKC");
+
+ curCC = getCombinClass(uv);
+ if (preCC > curCC && curCC != 0) { /* canonical ordering violated */
+ result = FALSE;
+ break;
+ }
+
+ /* get NFC/NFKC property */
+ if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */
+ ; /* YES */
+ else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) {
+ result = FALSE;
+ break;
+ }
+ else if (isComp2nd(uv))
+ isMAYBE = TRUE;
+ else if (ix) {
+ char *canon, *compat;
+ /* NFKC_NO when having compatibility mapping. */
+ canon = (char *) dec_canonical(uv);
+ compat = (char *) dec_compat(uv);
+ if (compat && !(canon && strEQ(canon, compat))) {
+ result = FALSE;
+ break;
+ }
+ } /* end of get NFC/NFKC property */
+
+ preCC = curCC;
+ }
+ if (isMAYBE && result) /* NO precedes MAYBE */
+ XSRETURN_UNDEF;
+ RETVAL = boolSV(result);
+ OUTPUT:
+ RETVAL
+
+
+SV*
+checkFCD(src)
+ SV * src
+ PROTOTYPE: $
+ ALIAS:
+ checkFCC = 1
+ PREINIT:
+ STRLEN srclen, retlen;
+ U8 *s, *e, *p, curCC, preCC;
+ bool result = TRUE;
+ bool isMAYBE = FALSE;
+ CODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
+ e = s + srclen;
+ preCC = 0;
+ for (p = s; p < e; p += retlen) {
+ U8 *sCan;
+ UV uvLead;
+ STRLEN canlen = 0;
+ UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF);
+ if (!retlen)
+ croak(ErrRetlenIsZero, "checkFCD or -FCC");
+
+ sCan = (U8*) dec_canonical(uv);
+
+ if (sCan) {
+ STRLEN canret;
+ canlen = (STRLEN)strlen((char *) sCan);
+ uvLead = utf8n_to_uvchr(sCan, canlen, &canret, AllowAnyUTF);
+ if (!canret)
+ croak(ErrRetlenIsZero, "checkFCD or -FCC");
+ }
+ else {
+ uvLead = uv;
+ }
+
+ curCC = getCombinClass(uvLead);
+
+ if (curCC != 0 && curCC < preCC) { /* canonical ordering violated */
+ result = FALSE;
+ break;
+ }
+
+ if (ix) {
+ if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) {
+ result = FALSE;
+ break;
+ }
+ else if (isComp2nd(uv))
+ isMAYBE = TRUE;
+ }
+
+ if (sCan) {
+ STRLEN canret;
+ UV uvTrail;
+ U8* eCan = sCan + canlen;
+ U8* pCan = utf8_hop(eCan, -1);
+ if (pCan < sCan)
+ croak(ErrHopBeforeStart);
+ uvTrail = utf8n_to_uvchr(pCan, eCan - pCan, &canret, AllowAnyUTF);
+ if (!canret)
+ croak(ErrRetlenIsZero, "checkFCD or -FCC");
+ preCC = getCombinClass(uvTrail);
+ }
+ else {
+ preCC = curCC;
+ }
+ }
+ if (isMAYBE && result) /* NO precedes MAYBE */
+ XSRETURN_UNDEF;
+ RETVAL = boolSV(result);
+ OUTPUT:
+ RETVAL
+
+
+U8
+getCombinClass(uv)
+ UV uv
+ PROTOTYPE: $
+
+bool
+isExclusion(uv)
+ UV uv
+ PROTOTYPE: $
+
+bool
+isSingleton(uv)
+ UV uv
+ PROTOTYPE: $
+
+bool
+isNonStDecomp(uv)
+ UV uv
+ PROTOTYPE: $
+
+bool
+isComp2nd(uv)
+ UV uv
+ PROTOTYPE: $
+ ALIAS:
+ isNFC_MAYBE = 1
+ isNFKC_MAYBE = 2
+ INIT:
+ PERL_UNUSED_VAR(ix);
+
+SV*
+isNFD_NO(uv)
+ UV uv
+ PROTOTYPE: $
+ ALIAS:
+ isNFKD_NO = 1
+ PREINIT:
+ bool result = FALSE;
+ CODE:
+ if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
+ result = TRUE; /* NFD_NO or NFKD_NO */
+ RETVAL = boolSV(result);
+ OUTPUT:
+ RETVAL
+
+
+SV*
+isComp_Ex(uv)
+ UV uv
+ PROTOTYPE: $
+ ALIAS:
+ isNFC_NO = 0
+ isNFKC_NO = 1
+ PREINIT:
+ bool result = FALSE;
+ CODE:
+ if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
+ result = TRUE; /* NFC_NO or NFKC_NO */
+ else if (ix) {
+ char *canon, *compat;
+ canon = (char *) dec_canonical(uv);
+ compat = (char *) dec_compat(uv);
+ if (compat && (!canon || strNE(canon, compat)))
+ result = TRUE; /* NFC_NO or NFKC_NO */
+ }
+ RETVAL = boolSV(result);
+ OUTPUT:
+ RETVAL
+
+SV*
+getComposite(uv, uv2)
+ UV uv
+ UV uv2
+ PROTOTYPE: $$
+ PREINIT:
+ UV composite;
+ CODE:
+ composite = composite_uv(uv, uv2);
+ RETVAL = composite ? newSVuv(composite) : &PL_sv_undef;
+ OUTPUT:
+ RETVAL
+
+
+
+SV*
+getCanon(uv)
+ UV uv
+ PROTOTYPE: $
+ ALIAS:
+ getCompat = 1
+ CODE:
+ if (Hangul_IsS(uv)) {
+ U8 tmp[3 * UTF8_MAXLEN + 1];
+ U8 *t = tmp;
+ U8 *e = pv_cat_decompHangul(aTHX_ t, uv);
+ RETVAL = newSVpvn((char *)t, e - t);
+ } else {
+ U8* rstr = ix ? dec_compat(uv) : dec_canonical(uv);
+ if (!rstr)
+ XSRETURN_UNDEF;
+ RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
+ }
+ SvUTF8_on(RETVAL);
+ OUTPUT:
+ RETVAL
+
+
+void
+splitOnLastStarter(src)
+ SV * src
+ PREINIT:
+ SV *svp;
+ STRLEN srclen;
+ U8 *s, *e, *p;
+ PPCODE:
+ s = (U8*)sv_2pvunicode(aTHX_ src,&srclen);
+ e = s + srclen;
+ p = e;
+ while (s < p) {
+ UV uv;
+ p = utf8_hop(p, -1);
+ if (p < s)
+ croak(ErrHopBeforeStart);
+ uv = utf8n_to_uvchr(p, e - p, NULL, AllowAnyUTF);
+ if (getCombinClass(uv) == 0) /* Last Starter found */
+ break;
+ }
+
+ svp = sv_2mortal(newSVpvn((char*)s, p - s));
+ SvUTF8_on(svp);
+ XPUSHs(svp);
+
+ svp = sv_2mortal(newSVpvn((char*)p, e - p));
+ SvUTF8_on(svp);
+ XPUSHs(svp);
+
diff --git a/dist/Unicode-Normalize/mkheader b/dist/Unicode-Normalize/mkheader
new file mode 100644
index 0000000000..8d4c1b8e8d
--- /dev/null
+++ b/dist/Unicode-Normalize/mkheader
@@ -0,0 +1,419 @@
+#!perl
+#
+# This auxiliary script makes five header files
+# used for building XSUB of Unicode::Normalize.
+#
+# Usage:
+# <do 'mkheader'> in perl, or <perl mkheader> in command line
+#
+# Input files:
+# unicore/CombiningClass.pl (or unicode/CombiningClass.pl)
+# unicore/Decomposition.pl (or unicode/Decomposition.pl)
+#
+# Output files:
+# unfcan.h
+# unfcpt.h
+# unfcmb.h
+# unfcmp.h
+# unfexc.h
+#
+use 5.006;
+use strict;
+use warnings;
+use Carp;
+use File::Spec;
+use SelectSaver;
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ die "Unicode::Normalize cannot stringify a Unicode code point\n";
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ die "Unicode::Normalize cannot get Unicode code point\n";
+ }
+}
+
+our $PACKAGE = 'Unicode::Normalize, mkheader';
+
+our $prefix = "UNF_";
+our $structname = "${prefix}complist";
+
+# Starting in v5.20, the tables in lib/unicore are built using the platform's
+# native character set for code points 0-255.
+*pack_U = ($] ge 5.020)
+ ? sub { return pack('W*', @_).pack('U*'); } # The empty pack returns
+ # an empty UTF-8 string,
+ # so the effect is to
+ # force the return into
+ # being UTF-8.
+ : sub { return pack('U*', @_); };
+
+# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify()
+our %Comp1st; # $codepoint => $listname : may be composed with a next char.
+our %CompList; # $listname,$2nd => $codepoint : composite
+
+##### The below part is common to mkheader and PP #####
+
+our %Combin; # $codepoint => $number : combination class
+our %Canon; # $codepoint => \@codepoints : canonical decomp.
+our %Compat; # $codepoint => \@codepoints : compat. decomp.
+our %Compos; # $1st,$2nd => $codepoint : composite
+our %Exclus; # $codepoint => 1 : composition exclusions
+our %Single; # $codepoint => 1 : singletons
+our %NonStD; # $codepoint => 1 : non-starter decompositions
+our %Comp2nd; # $codepoint => 1 : may be composed with a prev char.
+
+# from core Unicode database
+our $Combin = do "unicore/CombiningClass.pl"
+ || do "unicode/CombiningClass.pl"
+ || croak "$PACKAGE: CombiningClass.pl not found";
+our $Decomp = do "unicore/Decomposition.pl"
+ || do "unicode/Decomposition.pl"
+ || croak "$PACKAGE: Decomposition.pl not found";
+
+# CompositionExclusions.txt since Unicode 3.2.0. If this ever changes, it
+# would be better to get the values from Unicode::UCD rather than hard-code
+# them here, as that will protect from having to make fixes for future
+# changes.
+our @CompEx = qw(
+ 0958 0959 095A 095B 095C 095D 095E 095F 09DC 09DD 09DF 0A33 0A36
+ 0A59 0A5A 0A5B 0A5E 0B5C 0B5D 0F43 0F4D 0F52 0F57 0F5C 0F69 0F76
+ 0F78 0F93 0F9D 0FA2 0FA7 0FAC 0FB9 FB1D FB1F FB2A FB2B FB2C FB2D
+ FB2E FB2F FB30 FB31 FB32 FB33 FB34 FB35 FB36 FB38 FB39 FB3A FB3B
+ FB3C FB3E FB40 FB41 FB43 FB44 FB46 FB47 FB48 FB49 FB4A FB4B FB4C
+ FB4D FB4E 2ADC 1D15E 1D15F 1D160 1D161 1D162 1D163 1D164 1D1BB
+ 1D1BC 1D1BD 1D1BE 1D1BF 1D1C0
+);
+
+# definition of Hangul constants
+use constant SBase => 0xAC00;
+use constant SFinal => 0xD7A3; # SBase -1 + SCount
+use constant SCount => 11172; # LCount * NCount
+use constant NCount => 588; # VCount * TCount
+use constant LBase => 0x1100;
+use constant LFinal => 0x1112;
+use constant LCount => 19;
+use constant VBase => 0x1161;
+use constant VFinal => 0x1175;
+use constant VCount => 21;
+use constant TBase => 0x11A7;
+use constant TFinal => 0x11C2;
+use constant TCount => 28;
+
+sub decomposeHangul {
+ my $sindex = $_[0] - SBase;
+ my $lindex = int( $sindex / NCount);
+ my $vindex = int(($sindex % NCount) / TCount);
+ my $tindex = $sindex % TCount;
+ my @ret = (
+ LBase + $lindex,
+ VBase + $vindex,
+ $tindex ? (TBase + $tindex) : (),
+ );
+ return wantarray ? @ret : pack_U(@ret);
+}
+
+########## getting full decomposition ##########
+
+## converts string "hhhh hhhh hhhh" to a numeric list
+## (hex digits separated by spaces)
+sub _getHexArray { map hex, $_[0] =~ /\G *([0-9A-Fa-f]+)/g }
+
+while ($Combin =~ /(.+)/g) {
+ my @tab = split /\t/, $1;
+ my $ini = hex $tab[0];
+ if ($tab[1] eq '') {
+ $Combin{$ini} = $tab[2];
+ } else {
+ $Combin{$_} = $tab[2] foreach $ini .. hex($tab[1]);
+ }
+}
+
+while ($Decomp =~ /(.+)/g) {
+ my @tab = split /\t/, $1;
+ my $compat = $tab[2] =~ s/<[^>]+>//;
+ my $dec = [ _getHexArray($tab[2]) ]; # decomposition
+ my $ini = hex($tab[0]); # initial decomposable character
+ my $end = $tab[1] eq '' ? $ini : hex($tab[1]);
+ # ($ini .. $end) is the range of decomposable characters.
+
+ foreach my $u ($ini .. $end) {
+ $Compat{$u} = $dec;
+ $Canon{$u} = $dec if ! $compat;
+ }
+}
+
+for my $s (@CompEx) {
+ my $u = hex $s;
+ next if !$Canon{$u}; # not assigned
+ next if $u == 0xFB1D && !$Canon{0x1D15E}; # 3.0.1 before Corrigendum #2
+ $Exclus{$u} = 1;
+}
+
+foreach my $u (keys %Canon) {
+ my $dec = $Canon{$u};
+
+ if (@$dec == 2) {
+ if ($Combin{ $dec->[0] }) {
+ $NonStD{$u} = 1;
+ } else {
+ $Compos{ $dec->[0] }{ $dec->[1] } = $u;
+ $Comp2nd{ $dec->[1] } = 1 if ! $Exclus{$u};
+ }
+ } elsif (@$dec == 1) {
+ $Single{$u} = 1;
+ } else {
+ my $h = sprintf '%04X', $u;
+ croak("Weird Canonical Decomposition of U+$h");
+ }
+}
+
+# modern HANGUL JUNGSEONG and HANGUL JONGSEONG jamo
+foreach my $j (0x1161..0x1175, 0x11A8..0x11C2) {
+ $Comp2nd{$j} = 1;
+}
+
+sub getCanonList {
+ my @src = @_;
+ my @dec = map {
+ (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
+ : $Canon{$_} ? @{ $Canon{$_} } : $_
+ } @src;
+ return join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
+ # condition @src == @dec is not ok.
+}
+
+sub getCompatList {
+ my @src = @_;
+ my @dec = map {
+ (SBase <= $_ && $_ <= SFinal) ? decomposeHangul($_)
+ : $Compat{$_} ? @{ $Compat{$_} } : $_
+ } @src;
+ return join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
+ # condition @src == @dec is not ok.
+}
+
+# exhaustive decomposition
+foreach my $key (keys %Canon) {
+ $Canon{$key} = [ getCanonList($key) ];
+}
+
+# exhaustive decomposition
+foreach my $key (keys %Compat) {
+ $Compat{$key} = [ getCompatList($key) ];
+}
+
+##### The above part is common to mkheader and PP #####
+
+foreach my $comp1st (keys %Compos) {
+ my $listname = sprintf("${structname}_%06x", $comp1st);
+ # %04x is bad since it'd place _3046 after _1d157.
+ $Comp1st{$comp1st} = $listname;
+ my $rh1st = $Compos{$comp1st};
+
+ foreach my $comp2nd (keys %$rh1st) {
+ my $uc = $rh1st->{$comp2nd};
+ $CompList{$listname}{$comp2nd} = $uc;
+ }
+}
+
+sub split_into_char {
+ use bytes;
+ my $uni = shift;
+ my $len = length($uni);
+ my @ary;
+ for(my $i = 0; $i < $len; ++$i) {
+ push @ary, ord(substr($uni,$i,1));
+ }
+ return @ary;
+}
+
+sub _U_stringify {
+ sprintf '"%s"', join '',
+ map sprintf("\\x%02x", $_), split_into_char(pack_U(@_));
+}
+
+foreach my $hash (\%Canon, \%Compat) {
+ foreach my $key (keys %$hash) {
+ $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
+ }
+}
+
+########## writing header files ##########
+
+my @boolfunc = (
+ {
+ name => "Exclusion",
+ type => "bool",
+ hash => \%Exclus,
+ },
+ {
+ name => "Singleton",
+ type => "bool",
+ hash => \%Single,
+ },
+ {
+ name => "NonStDecomp",
+ type => "bool",
+ hash => \%NonStD,
+ },
+ {
+ name => "Comp2nd",
+ type => "bool",
+ hash => \%Comp2nd,
+ },
+);
+
+my $orig_fh = SelectSaver->new;
+{
+
+my $file = "unfexc.h";
+open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+binmode FH; select FH;
+
+ print << 'EOF';
+/*
+ * This file is auto-generated by mkheader.
+ * Any changes here will be lost!
+ */
+EOF
+
+foreach my $tbl (@boolfunc) {
+ my @temp = sort {$a <=> $b} keys %{$tbl->{hash}};
+ my $type = $tbl->{type};
+ my $name = $tbl->{name};
+ print "$type is$name (UV uv)\n{\nreturn\n\t";
+
+ while (@temp) {
+ my $cur = shift @temp;
+ if (@temp && $cur + 1 == $temp[0]) {
+ print "($cur <= uv && uv <= ";
+ while (@temp && $cur + 1 == $temp[0]) {
+ $cur = shift @temp;
+ }
+ print "$cur)";
+ print "\n\t|| " if @temp;
+ } else {
+ print "uv == $cur";
+ print "\n\t|| " if @temp;
+ }
+ }
+ print "\n\t? TRUE : FALSE;\n}\n\n";
+}
+
+close FH;
+
+####################################
+
+my $compinit =
+ "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
+
+foreach my $i (sort keys %CompList) {
+ $compinit .= "$structname $i [] = {\n";
+ $compinit .= join ",\n",
+ map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
+ sort {$a <=> $b } keys %{ $CompList{$i} };
+ $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
+}
+
+my @tripletable = (
+ {
+ file => "unfcmb",
+ name => "combin",
+ type => "STDCHAR",
+ hash => \%Combin,
+ null => 0,
+ },
+ {
+ file => "unfcan",
+ name => "canon",
+ type => "char*",
+ hash => \%Canon,
+ null => "NULL",
+ },
+ {
+ file => "unfcpt",
+ name => "compat",
+ type => "char*",
+ hash => \%Compat,
+ null => "NULL",
+ },
+ {
+ file => "unfcmp",
+ name => "compos",
+ type => "$structname *",
+ hash => \%Comp1st,
+ null => "NULL",
+ init => $compinit,
+ },
+);
+
+foreach my $tbl (@tripletable) {
+ my $file = "$tbl->{file}.h";
+ my $head = "${prefix}$tbl->{name}";
+ my $type = $tbl->{type};
+ my $hash = $tbl->{hash};
+ my $null = $tbl->{null};
+ my $init = $tbl->{init};
+
+ open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+ binmode FH; select FH;
+ my %val;
+
+ print FH << 'EOF';
+/*
+ * This file is auto-generated by mkheader.
+ * Any changes here will be lost!
+ */
+EOF
+
+ print $init if defined $init;
+
+ foreach my $uv (keys %$hash) {
+ croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
+ unless $uv <= 0x10FFFF;
+ my @c = unpack 'CCCC', pack 'N', $uv;
+ $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
+ }
+
+ foreach my $p (sort { $a <=> $b } keys %val) {
+ next if ! $val{ $p };
+ for (my $r = 0; $r < 256; $r++) {
+ next if ! $val{ $p }{ $r };
+ printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
+ for (my $c = 0; $c < 256; $c++) {
+ print "\t", defined $val{$p}{$r}{$c}
+ ? "($type)".$val{$p}{$r}{$c}
+ : $null;
+ print ',' if $c != 255;
+ print "\n" if $c % 8 == 7;
+ }
+ print "};\n\n";
+ }
+ }
+ foreach my $p (sort { $a <=> $b } keys %val) {
+ next if ! $val{ $p };
+ printf "static $type* ${head}_%02x [256] = {\n", $p;
+ for (my $r = 0; $r < 256; $r++) {
+ print $val{ $p }{ $r }
+ ? sprintf("${head}_%02x_%02x", $p, $r)
+ : "NULL";
+ print ',' if $r != 255;
+ print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+ }
+ print "};\n\n";
+ }
+ print "static $type** $head [] = {\n";
+ for (my $p = 0; $p <= 0x10; $p++) {
+ print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
+ print ',' if $p != 0x10;
+ print "\n";
+ }
+ print "};\n\n";
+ close FH;
+}
+
+} # End of block for SelectSaver
+
+1;
+__END__
diff --git a/dist/Unicode-Normalize/t/fcdc.t b/dist/Unicode-Normalize/t/fcdc.t
new file mode 100644
index 0000000000..d2ef28b9e9
--- /dev/null
+++ b/dist/Unicode-Normalize/t/fcdc.t
@@ -0,0 +1,138 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..70\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub hexU { _pack_U map hex, split ' ', shift }
+sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
+
+#########################
+
+ok(FCD(''), "");
+ok(FCC(''), "");
+ok(FCD('A'), "A");
+ok(FCC('A'), "A");
+
+ok(normalize('FCD', ""), "");
+ok(normalize('FCC', ""), "");
+ok(normalize('FCC', "A"), "A");
+ok(normalize('FCD', "A"), "A");
+
+# 9
+
+# if checkFCD is YES, the return value from FCD should be same as the original
+ok(FCD(hexU("00C5")), hexU("00C5")); # A with ring above
+ok(FCD(hexU("0041 030A")), hexU("0041 030A")); # A+ring
+ok(FCD(hexU("0041 0327 030A")), hexU("0041 0327 030A")); # A+cedilla+ring
+ok(FCD(hexU("AC01 1100 1161")), hexU("AC01 1100 1161")); # hangul
+ok(FCD(hexU("212B F900")), hexU("212B F900")); # compat
+
+ok(normalize('FCD', hexU("00C5")), hexU("00C5"));
+ok(normalize('FCD', hexU("0041 030A")), hexU("0041 030A"));
+ok(normalize('FCD', hexU("0041 0327 030A")), hexU("0041 0327 030A"));
+ok(normalize('FCD', hexU("AC01 1100 1161")), hexU("AC01 1100 1161"));
+ok(normalize('FCD', hexU("212B F900")), hexU("212B F900"));
+
+# 19
+
+# if checkFCD is MAYBE or NO, FCD returns NFD (this behavior isn't documented)
+ok(FCD(hexU("00C5 0327")), hexU("0041 0327 030A"));
+ok(FCD(hexU("0041 030A 0327")), hexU("0041 0327 030A"));
+ok(FCD(hexU("00C5 0327")), NFD(hexU("00C5 0327")));
+ok(FCD(hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327")));
+
+ok(normalize('FCD', hexU("00C5 0327")), hexU("0041 0327 030A"));
+ok(normalize('FCD', hexU("0041 030A 0327")), hexU("0041 0327 030A"));
+ok(normalize('FCD', hexU("00C5 0327")), NFD(hexU("00C5 0327")));
+ok(normalize('FCD', hexU("0041 030A 0327")), NFD(hexU("0041 030A 0327")));
+
+# 27
+
+ok(answer(checkFCD('')), 'YES');
+ok(answer(checkFCD('A')), 'YES');
+ok(answer(checkFCD("\x{030A}")), 'YES'); # 030A;COMBINING RING ABOVE
+ok(answer(checkFCD("\x{0327}")), 'YES'); # 0327;COMBINING CEDILLA
+ok(answer(checkFCD(_pack_U(0x00C5))), 'YES'); # A with ring above
+ok(answer(checkFCD(hexU("0041 030A"))), 'YES'); # A+ring
+ok(answer(checkFCD(hexU("0041 0327 030A"))), 'YES'); # A+cedilla+ring
+ok(answer(checkFCD(hexU("0041 030A 0327"))), 'NO'); # A+ring+cedilla
+ok(answer(checkFCD(hexU("00C5 0327"))), 'NO'); # A-ring+cedilla
+ok(answer(checkNFC(hexU("00C5 0327"))), 'MAYBE'); # NFC: A-ring+cedilla
+ok(answer(check("FCD", hexU("00C5 0327"))), 'NO');
+ok(answer(check("NFC", hexU("00C5 0327"))), 'MAYBE');
+ok(answer(checkFCD("\x{AC01}\x{1100}\x{1161}")), 'YES'); # hangul
+ok(answer(checkFCD("\x{212B}\x{F900}")), 'YES'); # compat
+
+ok(answer(checkFCD(hexU("1EA7 05AE 0315 0062"))), "NO");
+ok(answer(checkFCC(hexU("1EA7 05AE 0315 0062"))), "NO");
+ok(answer(check('FCD', hexU("1EA7 05AE 0315 0062"))), "NO");
+ok(answer(check('FCC', hexU("1EA7 05AE 0315 0062"))), "NO");
+
+# 45
+
+ok(FCC(hexU("00C5 0327")), hexU("0041 0327 030A"));
+ok(FCC(hexU("0045 0304 0300")), "\x{1E14}");
+ok(FCC("\x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}"), "\x{AC00}\x{AE00}");
+ok(normalize('FCC', hexU("00C5 0327")), hexU("0041 0327 030A"));
+ok(normalize('FCC', hexU("0045 0304 0300")), "\x{1E14}");
+ok(normalize('FCC', hexU("1100 1161 1100 1173 11AF")), "\x{AC00}\x{AE00}");
+
+ok(FCC("\x{0B47}\x{0300}\x{0B3E}"), "\x{0B47}\x{0300}\x{0B3E}");
+ok(FCC("\x{1100}\x{0300}\x{1161}"), "\x{1100}\x{0300}\x{1161}");
+ok(FCC("\x{0B47}\x{0B3E}\x{0300}"), "\x{0B4B}\x{0300}");
+ok(FCC("\x{1100}\x{1161}\x{0300}"), "\x{AC00}\x{0300}");
+ok(FCC("\x{0B47}\x{300}\x{0B3E}\x{327}"), "\x{0B47}\x{300}\x{0B3E}\x{327}");
+ok(FCC("\x{1100}\x{300}\x{1161}\x{327}"), "\x{1100}\x{300}\x{1161}\x{327}");
+
+# 57
+
+ok(answer(checkFCC('')), 'YES');
+ok(answer(checkFCC('A')), 'YES');
+ok(answer(checkFCC("\x{030A}")), 'MAYBE'); # 030A;COMBINING RING ABOVE
+ok(answer(checkFCC("\x{0327}")), 'MAYBE'); # 0327;COMBINING CEDILLA
+ok(answer(checkFCC(hexU("00C5"))), 'YES'); # A with ring above
+ok(answer(checkFCC(hexU("0041 030A"))), 'MAYBE'); # A+ring
+ok(answer(checkFCC(hexU("0041 0327 030A"))), 'MAYBE'); # A+cedilla+ring
+ok(answer(checkFCC(hexU("0041 030A 0327"))), 'NO'); # A+ring+cedilla
+ok(answer(checkFCC(hexU("00C5 0327"))), 'NO'); # A-ring+cedilla
+ok(answer(checkFCC("\x{AC01}\x{1100}\x{1161}")), 'MAYBE'); # hangul
+ok(answer(checkFCC("\x{212B}\x{F900}")), 'NO'); # compat
+ok(answer(checkFCC("\x{212B}\x{0327}")), 'NO'); # compat
+ok(answer(checkFCC("\x{0327}\x{212B}")), 'NO'); # compat
+
+# 70
+
diff --git a/dist/Unicode-Normalize/t/form.t b/dist/Unicode-Normalize/t/form.t
new file mode 100644
index 0000000000..6bbfb082ca
--- /dev/null
+++ b/dist/Unicode-Normalize/t/form.t
@@ -0,0 +1,84 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..37\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
+
+#########################
+
+ok(NFD ("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
+ok(NFC ("\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
+ok(NFKD("\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
+ok(NFKC("\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
+
+ok(answer(checkNFD ("\x{304C}")), "NO");
+ok(answer(checkNFC ("\x{304C}")), "YES");
+ok(answer(checkNFKD("\x{304C}")), "NO");
+ok(answer(checkNFKC("\x{304C}")), "YES");
+ok(answer(checkNFD ("\x{FF76}")), "YES");
+ok(answer(checkNFC ("\x{FF76}")), "YES");
+ok(answer(checkNFKD("\x{FF76}")), "NO");
+ok(answer(checkNFKC("\x{FF76}")), "NO");
+
+ok(normalize('D', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
+ok(normalize('C', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
+ok(normalize('KD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
+ok(normalize('KC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
+
+ok(answer(check('D', "\x{304C}")), "NO");
+ok(answer(check('C', "\x{304C}")), "YES");
+ok(answer(check('KD',"\x{304C}")), "NO");
+ok(answer(check('KC',"\x{304C}")), "YES");
+ok(answer(check('D' ,"\x{FF76}")), "YES");
+ok(answer(check('C' ,"\x{FF76}")), "YES");
+ok(answer(check('KD',"\x{FF76}")), "NO");
+ok(answer(check('KC',"\x{FF76}")), "NO");
+
+ok(normalize('NFD', "\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{FF76}");
+ok(normalize('NFC', "\x{304C}\x{FF76}"), "\x{304C}\x{FF76}");
+ok(normalize('NFKD',"\x{304C}\x{FF76}"), "\x{304B}\x{3099}\x{30AB}");
+ok(normalize('NFKC',"\x{304C}\x{FF76}"), "\x{304C}\x{30AB}");
+
+ok(answer(check('NFD', "\x{304C}")), "NO");
+ok(answer(check('NFC', "\x{304C}")), "YES");
+ok(answer(check('NFKD',"\x{304C}")), "NO");
+ok(answer(check('NFKC',"\x{304C}")), "YES");
+ok(answer(check('NFD' ,"\x{FF76}")), "YES");
+ok(answer(check('NFC' ,"\x{FF76}")), "YES");
+ok(answer(check('NFKD',"\x{FF76}")), "NO");
+ok(answer(check('NFKC',"\x{FF76}")), "NO");
+
diff --git a/dist/Unicode-Normalize/t/func.t b/dist/Unicode-Normalize/t/func.t
new file mode 100644
index 0000000000..2bd6e504a3
--- /dev/null
+++ b/dist/Unicode-Normalize/t/func.t
@@ -0,0 +1,386 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..217\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub hexU { _pack_U map hex, split ' ', shift }
+
+# This won't work on EBCDIC platforms prior to v5.8.0, which is when this
+# translation function was defined
+*to_native = (defined &utf8::unicode_to_native)
+ ? \&utf8::unicode_to_native
+ : sub { return shift };
+
+#########################
+
+ok(getCombinClass( to_native(0)), 0);
+ok(getCombinClass(to_native(41)), 0);
+ok(getCombinClass(to_native(65)), 0);
+ok(getCombinClass( 768), 230);
+ok(getCombinClass(1809), 36);
+
+ok(getCanon(to_native( 0)), undef);
+ok(getCanon(to_native(0x29)), undef);
+ok(getCanon(to_native(0x41)), undef);
+ok(getCanon(to_native(0x00C0)), _pack_U(0x0041, 0x0300));
+ok(getCanon(to_native(0x00EF)), _pack_U(0x0069, 0x0308));
+ok(getCanon(0x304C), _pack_U(0x304B, 0x3099));
+ok(getCanon(0x1EA4), _pack_U(0x0041, 0x0302, 0x0301));
+ok(getCanon(0x1F82), _pack_U(0x03B1, 0x0313, 0x0300, 0x0345));
+ok(getCanon(0x1FAF), _pack_U(0x03A9, 0x0314, 0x0342, 0x0345));
+ok(getCanon(0xAC00), _pack_U(0x1100, 0x1161));
+ok(getCanon(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF));
+ok(getCanon(0x212C), undef);
+ok(getCanon(0x3243), undef);
+ok(getCanon(0xFA2D), _pack_U(0x9DB4));
+
+# 20
+
+ok(getCompat(to_native( 0)), undef);
+ok(getCompat(to_native(0x29)), undef);
+ok(getCompat(to_native(0x41)), undef);
+ok(getCompat(to_native(0x00C0)), _pack_U(0x0041, 0x0300));
+ok(getCompat(to_native(0x00EF)), _pack_U(0x0069, 0x0308));
+ok(getCompat(0x304C), _pack_U(0x304B, 0x3099));
+ok(getCompat(0x1EA4), _pack_U(0x0041, 0x0302, 0x0301));
+ok(getCompat(0x1F82), _pack_U(0x03B1, 0x0313, 0x0300, 0x0345));
+ok(getCompat(0x1FAF), _pack_U(0x03A9, 0x0314, 0x0342, 0x0345));
+ok(getCompat(0x212C), _pack_U(0x0042));
+ok(getCompat(0x3243), _pack_U(0x0028, 0x81F3, 0x0029));
+ok(getCompat(0xAC00), _pack_U(0x1100, 0x1161));
+ok(getCompat(0xAE00), _pack_U(0x1100, 0x1173, 0x11AF));
+ok(getCompat(0xFA2D), _pack_U(0x9DB4));
+
+# 34
+
+ok(getComposite(to_native( 0), to_native( 0)), undef);
+ok(getComposite(to_native( 0), to_native(0x29)), undef);
+ok(getComposite(to_native(0x29), to_native( 0)), undef);
+ok(getComposite(to_native(0x29), to_native(0x29)), undef);
+ok(getComposite(to_native( 0), to_native(0x41)), undef);
+ok(getComposite(to_native(0x41), to_native( 0)), undef);
+ok(getComposite(to_native(0x41), to_native(0x41)), undef);
+ok(getComposite(to_native(12), to_native(0x0300)), undef);
+ok(getComposite(to_native(0x0055), 0xFF00), undef);
+ok(getComposite(to_native(0x0041), 0x0300), to_native(0x00C0));
+ok(getComposite(to_native(0x0055), 0x0300), to_native(0x00D9));
+ok(getComposite(0x0112, 0x0300), 0x1E14);
+ok(getComposite(0x1100, 0x1161), 0xAC00);
+ok(getComposite(0x1100, 0x1173), 0xADF8);
+ok(getComposite(0x1100, 0x11AF), undef);
+ok(getComposite(0x1173, 0x11AF), undef);
+ok(getComposite(0xAC00, 0x11A7), undef);
+ok(getComposite(0xAC00, 0x11A8), 0xAC01);
+ok(getComposite(0xADF8, 0x11AF), 0xAE00);
+
+# 53
+
+sub uprops {
+ my $uv = shift;
+ my $r = "";
+ $r .= isExclusion($uv) ? 'X' : 'x';
+ $r .= isSingleton($uv) ? 'S' : 's';
+ $r .= isNonStDecomp($uv) ? 'N' : 'n'; # Non-Starter Decomposition
+ $r .= isComp_Ex($uv) ? 'F' : 'f'; # Full exclusion (X + S + N)
+ $r .= isComp2nd($uv) ? 'B' : 'b'; # B = M = Y
+ $r .= isNFD_NO($uv) ? 'D' : 'd';
+ $r .= isNFC_MAYBE($uv) ? 'M' : 'm'; # Maybe
+ $r .= isNFC_NO($uv) ? 'C' : 'c';
+ $r .= isNFKD_NO($uv) ? 'K' : 'k';
+ $r .= isNFKC_MAYBE($uv) ? 'Y' : 'y'; # maYbe
+ $r .= isNFKC_NO($uv) ? 'G' : 'g';
+ return $r;
+}
+
+ok(uprops(to_native(0x0000)), 'xsnfbdmckyg'); # NULL
+ok(uprops(to_native(0x0029)), 'xsnfbdmckyg'); # RIGHT PARENTHESIS
+ok(uprops(to_native(0x0041)), 'xsnfbdmckyg'); # LATIN CAPITAL LETTER A
+ok(uprops(to_native(0x00A0)), 'xsnfbdmcKyG'); # NO-BREAK SPACE
+ok(uprops(to_native(0x00C0)), 'xsnfbDmcKyg'); # LATIN CAPITAL LETTER A WITH GRAVE
+ok(uprops(0x0300), 'xsnfBdMckYg'); # COMBINING GRAVE ACCENT
+ok(uprops(0x0344), 'xsNFbDmCKyG'); # COMBINING GREEK DIALYTIKA TONOS
+ok(uprops(0x0387), 'xSnFbDmCKyG'); # GREEK ANO TELEIA
+ok(uprops(0x0958), 'XsnFbDmCKyG'); # DEVANAGARI LETTER QA
+ok(uprops(0x0F43), 'XsnFbDmCKyG'); # TIBETAN LETTER GHA
+ok(uprops(0x1100), 'xsnfbdmckyg'); # HANGUL CHOSEONG KIYEOK
+ok(uprops(0x1161), 'xsnfBdMckYg'); # HANGUL JUNGSEONG A
+ok(uprops(0x11AF), 'xsnfBdMckYg'); # HANGUL JONGSEONG RIEUL
+ok(uprops(0x212B), 'xSnFbDmCKyG'); # ANGSTROM SIGN
+ok(uprops(0xAC00), 'xsnfbDmcKyg'); # HANGUL SYLLABLE GA
+ok(uprops(0xF900), 'xSnFbDmCKyG'); # CJK COMPATIBILITY IDEOGRAPH-F900
+ok(uprops(0xFB4E), 'XsnFbDmCKyG'); # HEBREW LETTER PE WITH RAFE
+ok(uprops(0xFF71), 'xsnfbdmcKyG'); # HALFWIDTH KATAKANA LETTER A
+
+# 71
+
+ok(decompose(""), "");
+ok(decompose("A"), "A");
+ok(decompose("", 1), "");
+ok(decompose("A", 1), "A");
+
+ok(decompose(hexU("1E14 AC01")), hexU("0045 0304 0300 1100 1161 11A8"));
+ok(decompose(hexU("AC00 AE00")), hexU("1100 1161 1100 1173 11AF"));
+ok(decompose(hexU("304C FF76")), hexU("304B 3099 FF76"));
+
+ok(decompose(hexU("1E14 AC01"), 1), hexU("0045 0304 0300 1100 1161 11A8"));
+ok(decompose(hexU("AC00 AE00"), 1), hexU("1100 1161 1100 1173 11AF"));
+ok(decompose(hexU("304C FF76"), 1), hexU("304B 3099 30AB"));
+
+# don't modify the source
+my $sDec = "\x{FA19}";
+ok(decompose($sDec), "\x{795E}");
+ok($sDec, "\x{FA19}");
+
+# 83
+
+ok(reorder(""), "");
+ok(reorder("A"), "A");
+ok(reorder(hexU("0041 0300 0315 0313 031b 0061")),
+ hexU("0041 031b 0300 0313 0315 0061"));
+ok(reorder(hexU("00C1 0300 0315 0313 031b 0061 309A 3099")),
+ hexU("00C1 031b 0300 0313 0315 0061 309A 3099"));
+
+# don't modify the source
+my $sReord = "\x{3000}\x{300}\x{31b}";
+ok(reorder($sReord), "\x{3000}\x{31b}\x{300}");
+ok($sReord, "\x{3000}\x{300}\x{31b}");
+
+# 89
+
+ok(compose(""), "");
+ok(compose("A"), "A");
+ok(compose(hexU("0061 0300")), hexU("00E0"));
+ok(compose(hexU("0061 0300 031B")), hexU("00E0 031B"));
+ok(compose(hexU("0061 0300 0315")), hexU("00E0 0315"));
+ok(compose(hexU("0061 0300 0313")), hexU("00E0 0313"));
+ok(compose(hexU("0061 031B 0300")), hexU("00E0 031B"));
+ok(compose(hexU("0061 0315 0300")), hexU("0061 0315 0300"));
+ok(compose(hexU("0061 0313 0300")), hexU("0061 0313 0300"));
+
+# don't modify the source
+my $sCom = "\x{304B}\x{3099}";
+ok(compose($sCom), "\x{304C}");
+ok($sCom, "\x{304B}\x{3099}");
+
+# 100
+
+ok(composeContiguous(""), "");
+ok(composeContiguous("A"), "A");
+ok(composeContiguous(hexU("0061 0300")), hexU("00E0"));
+ok(composeContiguous(hexU("0061 0300 031B")), hexU("00E0 031B"));
+ok(composeContiguous(hexU("0061 0300 0315")), hexU("00E0 0315"));
+ok(composeContiguous(hexU("0061 0300 0313")), hexU("00E0 0313"));
+ok(composeContiguous(hexU("0061 031B 0300")), hexU("0061 031B 0300"));
+ok(composeContiguous(hexU("0061 0315 0300")), hexU("0061 0315 0300"));
+ok(composeContiguous(hexU("0061 0313 0300")), hexU("0061 0313 0300"));
+
+# don't modify the source
+my $sCtg = "\x{30DB}\x{309A}";
+ok(composeContiguous($sCtg), "\x{30DD}");
+ok($sCtg, "\x{30DB}\x{309A}");
+
+# 111
+
+sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
+
+ok(answer(checkNFD("")), "YES");
+ok(answer(checkNFC("")), "YES");
+ok(answer(checkNFKD("")), "YES");
+ok(answer(checkNFKC("")), "YES");
+ok(answer(check("NFD", "")), "YES");
+ok(answer(check("NFC", "")), "YES");
+ok(answer(check("NFKD","")), "YES");
+ok(answer(check("NFKC","")), "YES");
+
+# U+0000 to U+007F are prenormalized in all the normalization forms.
+ok(answer(checkNFD("AZaz\t12!#`")), "YES");
+ok(answer(checkNFC("AZaz\t12!#`")), "YES");
+ok(answer(checkNFKD("AZaz\t12!#`")), "YES");
+ok(answer(checkNFKC("AZaz\t12!#`")), "YES");
+ok(answer(check("D", "AZaz\t12!#`")), "YES");
+ok(answer(check("C", "AZaz\t12!#`")), "YES");
+ok(answer(check("KD","AZaz\t12!#`")), "YES");
+ok(answer(check("KC","AZaz\t12!#`")), "YES");
+
+ok(answer(checkNFD(NFD(_pack_U(0xC1, 0x1100, 0x1173, 0x11AF)))), "YES");
+ok(answer(checkNFD(hexU("20 C1 1100 1173 11AF"))), "NO");
+ok(answer(checkNFC(hexU("20 C1 1173 11AF"))), "MAYBE");
+ok(answer(checkNFC(hexU("20 C1 AE00 1100"))), "YES");
+ok(answer(checkNFC(hexU("20 C1 AE00 1100 0300"))), "MAYBE");
+ok(answer(checkNFC(hexU("212B 1100 0300"))), "NO");
+ok(answer(checkNFC(hexU("1100 0300 212B"))), "NO");
+ok(answer(checkNFC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring
+ok(answer(checkNFC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla
+ok(answer(checkNFC(hexU("20 C1 FF71 2025"))),"YES");
+ok(answer(check("NFC", hexU("20 C1 212B 300"))), "NO");
+ok(answer(checkNFKD(hexU("20 C1 FF71 2025"))), "NO");
+ok(answer(checkNFKC(hexU("20 C1 AE00 2025"))), "NO");
+ok(answer(checkNFKC(hexU("212B 1100 0300"))), "NO");
+ok(answer(checkNFKC(hexU("1100 0300 212B"))), "NO");
+ok(answer(checkNFKC(hexU("0041 0327 030A"))), "MAYBE"); # A+cedilla+ring
+ok(answer(checkNFKC(hexU("0041 030A 0327"))), "NO"); # A+ring+cedilla
+ok(answer(check("NFKC", hexU("20 C1 212B 300"))), "NO");
+
+# 145
+
+"012ABC" =~ /(\d+)(\w+)/;
+ok("012" eq NFC $1 && "ABC" eq NFC $2);
+
+ok(normalize('C', $1), "012");
+ok(normalize('C', $2), "ABC");
+
+ok(normalize('NFC', $1), "012");
+ok(normalize('NFC', $2), "ABC");
+ # s/^NF// in normalize() must not prevent using $1, $&, etc.
+
+# 150
+
+# a string with initial zero should be treated like a number
+
+# LATIN CAPITAL LETTER A WITH GRAVE
+ok(getCombinClass(sprintf("0%d", to_native(192))), 0);
+ok(getCanon (sprintf("0%d", to_native(192))), _pack_U(0x41, 0x300));
+ok(getCompat(sprintf("0%d", to_native(192))), _pack_U(0x41, 0x300));
+my $lead_zero = sprintf "0%d", to_native(65);
+ok(getComposite($lead_zero, "0768"), to_native(192));
+ok(isNFD_NO (sprintf("0%d", to_native(192))));
+ok(isNFKD_NO(sprintf("0%d", to_native(192))));
+
+# DEVANAGARI LETTER QA
+ok(isExclusion("02392"));
+ok(isComp_Ex ("02392"));
+ok(isNFC_NO ("02392"));
+ok(isNFKC_NO ("02392"));
+ok(isNFD_NO ("02392"));
+ok(isNFKD_NO ("02392"));
+
+# ANGSTROM SIGN
+ok(isSingleton("08491"));
+ok(isComp_Ex ("08491"));
+ok(isNFC_NO ("08491"));
+ok(isNFKC_NO ("08491"));
+ok(isNFD_NO ("08491"));
+ok(isNFKD_NO ("08491"));
+
+# COMBINING GREEK DIALYTIKA TONOS
+ok(isNonStDecomp("0836"));
+ok(isComp_Ex ("0836"));
+ok(isNFC_NO ("0836"));
+ok(isNFKC_NO ("0836"));
+ok(isNFD_NO ("0836"));
+ok(isNFKD_NO ("0836"));
+
+# COMBINING GRAVE ACCENT
+ok(getCombinClass("0768"), 230);
+ok(isComp2nd ("0768"));
+ok(isNFC_MAYBE ("0768"));
+ok(isNFKC_MAYBE("0768"));
+
+# HANGUL SYLLABLE GA
+ok(getCombinClass("044032"), 0);
+ok(getCanon("044032"), _pack_U(0x1100, 0x1161));
+ok(getCompat("044032"), _pack_U(0x1100, 0x1161));
+ok(getComposite("04352", "04449"), 0xAC00);
+
+# 182
+
+# string with 22 combining characters: (0x300..0x315)
+my $str_cc22 = _pack_U(0x3041, 0x300..0x315, 0x3042);
+ok(decompose($str_cc22), $str_cc22);
+ok(reorder($str_cc22), $str_cc22);
+ok(compose($str_cc22), $str_cc22);
+ok(composeContiguous($str_cc22), $str_cc22);
+ok(NFD($str_cc22), $str_cc22);
+ok(NFC($str_cc22), $str_cc22);
+ok(NFKD($str_cc22), $str_cc22);
+ok(NFKC($str_cc22), $str_cc22);
+ok(FCD($str_cc22), $str_cc22);
+ok(FCC($str_cc22), $str_cc22);
+
+# 192
+
+# string with 40 combining characters of the same class: (0x300..0x313)x2
+my $str_cc40 = _pack_U(0x3041, 0x300..0x313, 0x300..0x313, 0x3042);
+ok(decompose($str_cc40), $str_cc40);
+ok(reorder($str_cc40), $str_cc40);
+ok(compose($str_cc40), $str_cc40);
+ok(composeContiguous($str_cc40), $str_cc40);
+ok(NFD($str_cc40), $str_cc40);
+ok(NFC($str_cc40), $str_cc40);
+ok(NFKD($str_cc40), $str_cc40);
+ok(NFKC($str_cc40), $str_cc40);
+ok(FCD($str_cc40), $str_cc40);
+ok(FCC($str_cc40), $str_cc40);
+
+# 202
+
+my $precomp = hexU("304C 304E 3050 3052 3054");
+my $combseq = hexU("304B 3099 304D 3099 304F 3099 3051 3099 3053 3099");
+ok(decompose($precomp x 5), $combseq x 5);
+ok(decompose($precomp x 10), $combseq x 10);
+ok(decompose($precomp x 20), $combseq x 20);
+
+my $hangsyl = hexU("AC00 B098 B2E4 B77C B9C8");
+my $jamoseq = hexU("1100 1161 1102 1161 1103 1161 1105 1161 1106 1161");
+ok(decompose($hangsyl x 5), $jamoseq x 5);
+ok(decompose($hangsyl x 10), $jamoseq x 10);
+ok(decompose($hangsyl x 20), $jamoseq x 20);
+
+my $notcomp = hexU("304B 304D 304F 3051 3053");
+ok(decompose($precomp . $notcomp), $combseq . $notcomp);
+ok(decompose($precomp . $notcomp x 5), $combseq . $notcomp x 5);
+ok(decompose($precomp . $notcomp x10), $combseq . $notcomp x10);
+
+# 211
+
+my $preUnicode3_1 = !defined getCanon(0x1D15E);
+my $preUnicode3_2 = !defined getCanon(0x2ADC);
+
+# HEBREW LETTER YOD WITH HIRIQ
+ok($preUnicode3_1 xor isExclusion(0xFB1D));
+ok($preUnicode3_1 xor isComp_Ex (0xFB1D));
+
+# MUSICAL SYMBOL HALF NOTE
+ok($preUnicode3_1 xor isExclusion(0x1D15E));
+ok($preUnicode3_1 xor isComp_Ex (0x1D15E));
+
+# FORKING
+ok($preUnicode3_2 xor isExclusion(0x2ADC));
+ok($preUnicode3_2 xor isComp_Ex (0x2ADC));
+
+# 217
+
diff --git a/dist/Unicode-Normalize/t/illegal.t b/dist/Unicode-Normalize/t/illegal.t
new file mode 100644
index 0000000000..ccf2b4aae6
--- /dev/null
+++ b/dist/Unicode-Normalize/t/illegal.t
@@ -0,0 +1,85 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
+#########################
+
+BEGIN {
+ use Unicode::Normalize qw(:all);
+
+ unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
+ print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
+ " needed for this test\n";
+ print $@;
+ exit;
+ }
+}
+
+use strict;
+use warnings;
+
+BEGIN { $| = 1; print "1..113\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+ok(1);
+
+#########################
+
+no warnings qw(utf8);
+
+for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF,
+ 0x1FFFF, 0x10FFFF, 0x110000, 0x3FFFFFFF)
+{
+ my $c = chr $u;
+ ok($c eq NFD($c)); # 1
+ ok($c eq NFC($c)); # 2
+ ok($c eq NFKD($c)); # 3
+ ok($c eq NFKC($c)); # 4
+ ok($c eq FCD($c)); # 5
+ ok($c eq FCC($c)); # 6
+ ok($c eq decompose($c)); # 7
+ ok($c eq decompose($c,1)); # 8
+ ok($c eq reorder($c)); # 9
+ ok($c eq compose($c)); # 10
+}
+
+our $proc; # before the last starter
+our $unproc; # the last starter and after
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF));
+ok($proc eq _pack_U(0x41, 0x300, 0x327));
+ok($unproc eq "\x{FFFF}");
+
diff --git a/dist/Unicode-Normalize/t/norm.t b/dist/Unicode-Normalize/t/norm.t
new file mode 100644
index 0000000000..d3cec3aea1
--- /dev/null
+++ b/dist/Unicode-Normalize/t/norm.t
@@ -0,0 +1,145 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..64\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(normalize);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+ok(normalize('D', ""), "");
+ok(normalize('C', ""), "");
+ok(normalize('KD',""), "");
+ok(normalize('KC',""), "");
+
+ok(normalize('D', "A"), "A");
+ok(normalize('C', "A"), "A");
+ok(normalize('KD',"A"), "A");
+ok(normalize('KC',"A"), "A");
+
+ok(normalize('NFD', ""), "");
+ok(normalize('NFC', ""), "");
+ok(normalize('NFKD',""), "");
+ok(normalize('NFKC',""), "");
+
+ok(normalize('NFD', "A"), "A");
+ok(normalize('NFC', "A"), "A");
+ok(normalize('NFKD',"A"), "A");
+ok(normalize('NFKC',"A"), "A");
+
+# 17
+
+# don't modify the source
+my $sNFD = "\x{FA19}";
+ok(normalize('NFD', $sNFD), "\x{795E}");
+ok($sNFD, "\x{FA19}");
+
+my $sNFC = "\x{FA1B}";
+ok(normalize('NFC', $sNFC), "\x{798F}");
+ok($sNFC, "\x{FA1B}");
+
+my $sNFKD = "\x{FA1E}";
+ok(normalize('NFKD', $sNFKD), "\x{7FBD}");
+ok($sNFKD, "\x{FA1E}");
+
+my $sNFKC = "\x{FA26}";
+ok(normalize('NFKC', $sNFKC), "\x{90FD}");
+ok($sNFKC, "\x{FA26}");
+
+# 25
+
+sub hexNFC {
+ join " ", map sprintf("%04X", $_),
+ _unpack_U normalize 'C', _pack_U map hex, split ' ', shift;
+}
+sub hexNFD {
+ join " ", map sprintf("%04X", $_),
+ _unpack_U normalize 'D', _pack_U map hex, split ' ', shift;
+}
+
+ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8");
+ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF");
+
+ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
+ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
+ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
+
+ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
+ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
+ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
+ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
+ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
+
+ok(hexNFC("AC00 11A7"), "AC00 11A7");
+ok(hexNFC("AC00 11A8"), "AC01");
+ok(hexNFC("AC00 11A9"), "AC02");
+ok(hexNFC("AC00 11C2"), "AC1B");
+ok(hexNFC("AC00 11C3"), "AC00 11C3");
+
+# 47
+
+# Test Cases from Public Review Issue #29: Normalization Issue
+# cf. http://www.unicode.org/review/pr-29.html
+ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
+ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
+ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
+ok(hexNFC("1100 1161 0300"), "AC00 0300");
+ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
+ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
+
+ok(hexNFC("0300 0041"), "0300 0041");
+ok(hexNFC("0300 0301 0041"), "0300 0301 0041");
+ok(hexNFC("0301 0300 0041"), "0301 0300 0041");
+ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301");
+ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300");
+
+ok(hexNFC("0327 0061 0300"), "0327 00E0");
+ok(hexNFC("0301 0061 0300"), "0301 00E0");
+ok(hexNFC("0315 0061 0300"), "0315 00E0");
+ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
+ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
+ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");
+
+# 64
+
diff --git a/dist/Unicode-Normalize/t/null.t b/dist/Unicode-Normalize/t/null.t
new file mode 100644
index 0000000000..9a0008708e
--- /dev/null
+++ b/dist/Unicode-Normalize/t/null.t
@@ -0,0 +1,100 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+
+use Unicode::Normalize qw(:all);
+print "1..24\n";
+
+print "ok 1\n";
+
+# if $_ is not NULL-terminated, test may fail.
+
+$_ = compose('abc');
+print /c$/ ? "ok" : "not ok", " 2\n";
+
+$_ = decompose('abc');
+print /c$/ ? "ok" : "not ok", " 3\n";
+
+$_ = reorder('abc');
+print /c$/ ? "ok" : "not ok", " 4\n";
+
+$_ = NFD('abc');
+print /c$/ ? "ok" : "not ok", " 5\n";
+
+$_ = NFC('abc');
+print /c$/ ? "ok" : "not ok", " 6\n";
+
+$_ = NFKD('abc');
+print /c$/ ? "ok" : "not ok", " 7\n";
+
+$_ = NFKC('abc');
+print /c$/ ? "ok" : "not ok", " 8\n";
+
+$_ = FCC('abc');
+print /c$/ ? "ok" : "not ok", " 9\n";
+
+$_ = decompose("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 10\n";
+
+$_ = decompose("\x{304B}\x{3099}abc");
+print /c$/ ? "ok" : "not ok", " 11\n";
+
+$_ = reorder("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 12\n";
+
+$_ = reorder("\x{304B}\x{3099}abc");
+print /c$/ ? "ok" : "not ok", " 13\n";
+
+$_ = compose("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 14\n";
+
+$_ = compose("\x{304B}\x{3099}abc");
+print /c$/ ? "ok" : "not ok", " 15\n";
+
+$_ = NFD("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 16\n";
+
+$_ = NFC("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 17\n";
+
+$_ = NFKD("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 18\n";
+
+$_ = NFKC("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 19\n";
+
+$_ = FCC("\x{304C}abc");
+print /c$/ ? "ok" : "not ok", " 20\n";
+
+$_ = getCanon(0x100);
+print s/.$// ? "ok" : "not ok", " 21\n";
+
+$_ = getCompat(0x100);
+print s/.$// ? "ok" : "not ok", " 22\n";
+
+$_ = getCanon(0xAC00);
+print s/.$// ? "ok" : "not ok", " 23\n";
+
+$_ = getCompat(0xAC00);
+print s/.$// ? "ok" : "not ok", " 24\n";
+
diff --git a/dist/Unicode-Normalize/t/partial1.t b/dist/Unicode-Normalize/t/partial1.t
new file mode 100644
index 0000000000..3e44a63dc0
--- /dev/null
+++ b/dist/Unicode-Normalize/t/partial1.t
@@ -0,0 +1,120 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..26\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+sub arraynorm {
+ my $form = shift;
+ my @string = @_;
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ $result .= $form eq 'NFC' ? NFC_partial ($unproc) :
+ $form eq 'NFD' ? NFD_partial ($unproc) :
+ $form eq 'NFKC' ? NFKC_partial($unproc) :
+ $form eq 'NFKD' ? NFKD_partial($unproc) :
+ undef;
+ }
+ $result .= $unproc;
+ return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX = "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+# 18
+
+# must modify the source
+my $sNFD = "\x{FA19}";
+ok(NFD_partial($sNFD), "");
+ok($sNFD, "\x{795E}");
+
+my $sNFC = "\x{FA1B}";
+ok(NFC_partial($sNFC), "");
+ok($sNFC, "\x{798F}");
+
+my $sNFKD = "\x{FA1E}";
+ok(NFKD_partial($sNFKD), "");
+ok($sNFKD, "\x{7FBD}");
+
+my $sNFKC = "\x{FA26}";
+ok(NFKC_partial($sNFKC), "");
+ok($sNFKC, "\x{90FD}");
+
+# 26
+
diff --git a/dist/Unicode-Normalize/t/partial2.t b/dist/Unicode-Normalize/t/partial2.t
new file mode 100644
index 0000000000..7f19e9365b
--- /dev/null
+++ b/dist/Unicode-Normalize/t/partial2.t
@@ -0,0 +1,116 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..26\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+sub arraynorm {
+ my $form = shift;
+ my @string = @_;
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ $result .= normalize_partial($form, $unproc);
+ }
+ $result .= $unproc;
+ return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX = "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+# 18
+
+# must modify the source
+my $sNFD = "\x{FA19}";
+ok(normalize_partial('NFD', $sNFD), "");
+ok($sNFD, "\x{795E}");
+
+my $sNFC = "\x{FA1B}";
+ok(normalize_partial('NFC', $sNFC), "");
+ok($sNFC, "\x{798F}");
+
+my $sNFKD = "\x{FA1E}";
+ok(normalize_partial('NFKD', $sNFKD), "");
+ok($sNFKD, "\x{7FBD}");
+
+my $sNFKC = "\x{FA26}";
+ok(normalize_partial('NFKC', $sNFKC), "");
+ok($sNFKC, "\x{90FD}");
+
+# 26
+
diff --git a/dist/Unicode-Normalize/t/proto.t b/dist/Unicode-Normalize/t/proto.t
new file mode 100644
index 0000000000..38c6985759
--- /dev/null
+++ b/dist/Unicode-Normalize/t/proto.t
@@ -0,0 +1,99 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..48\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+#########################
+
+# unary op. RING-CEDILLA
+ok( "\x{30A}\x{327}" ne "\x{327}\x{30A}");
+ok(NFD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(NFC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(NFKD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(NFKC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(FCD "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(FCC "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+ok(reorder "\x{30A}\x{327}" eq "\x{327}\x{30A}");
+
+# 9
+
+ok(prototype \&normalize,'$$');
+ok(prototype \&NFD, '$');
+ok(prototype \&NFC, '$');
+ok(prototype \&NFKD, '$');
+ok(prototype \&NFKC, '$');
+ok(prototype \&FCD, '$');
+ok(prototype \&FCC, '$');
+
+ok(prototype \&check, '$$');
+ok(prototype \&checkNFD, '$');
+ok(prototype \&checkNFC, '$');
+ok(prototype \&checkNFKD,'$');
+ok(prototype \&checkNFKC,'$');
+ok(prototype \&checkFCD, '$');
+ok(prototype \&checkFCC, '$');
+
+ok(prototype \&decompose, '$;$');
+ok(prototype \&reorder, '$');
+ok(prototype \&compose, '$');
+ok(prototype \&composeContiguous, '$');
+
+# 27
+
+ok(prototype \&getCanon, '$');
+ok(prototype \&getCompat, '$');
+ok(prototype \&getComposite, '$$');
+ok(prototype \&getCombinClass,'$');
+ok(prototype \&isExclusion, '$');
+ok(prototype \&isSingleton, '$');
+ok(prototype \&isNonStDecomp, '$');
+ok(prototype \&isComp2nd, '$');
+ok(prototype \&isComp_Ex, '$');
+ok(prototype \&isNFD_NO, '$');
+ok(prototype \&isNFC_NO, '$');
+ok(prototype \&isNFC_MAYBE, '$');
+ok(prototype \&isNFKD_NO, '$');
+ok(prototype \&isNFKC_NO, '$');
+ok(prototype \&isNFKC_MAYBE, '$');
+ok(prototype \&splitOnLastStarter, undef);
+ok(prototype \&normalize_partial, '$$');
+ok(prototype \&NFD_partial, '$');
+ok(prototype \&NFC_partial, '$');
+ok(prototype \&NFKD_partial, '$');
+ok(prototype \&NFKC_partial, '$');
+
+# 48
+
diff --git a/dist/Unicode-Normalize/t/split.t b/dist/Unicode-Normalize/t/split.t
new file mode 100644
index 0000000000..a92957c208
--- /dev/null
+++ b/dist/Unicode-Normalize/t/split.t
@@ -0,0 +1,147 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..34\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize qw(:all);
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+our $proc; # before the last starter
+our $unproc; # the last starter and after
+# If string has no starter, entire string is set to $unproc.
+
+($proc, $unproc) = splitOnLastStarter("");
+ok($proc, "");
+ok($unproc, "");
+
+($proc, $unproc) = splitOnLastStarter("A");
+ok($proc, "");
+ok($unproc, "A");
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0x42));
+ok($proc, _pack_U(0x41, 0x300, 0x327));
+ok($unproc, "B");
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x4E00, 0x41, 0x301));
+ok($proc, _pack_U(0x4E00));
+ok($unproc, _pack_U(0x41, 0x301));
+
+($proc, $unproc) = splitOnLastStarter(_pack_U(0x302, 0x301, 0x300));
+ok($proc, "");
+ok($unproc, _pack_U(0x302, 0x301, 0x300));
+
+our $ka_grave = _pack_U(0x41, 0, 0x42, 0x304B, 0x300);
+our $dakuten = _pack_U(0x3099);
+our $ga_grave = _pack_U(0x41, 0, 0x42, 0x304C, 0x300);
+
+our ($p, $u) = splitOnLastStarter($ka_grave);
+our $concat = $p . NFC($u.$dakuten);
+
+ok(NFC($ka_grave.$dakuten) eq $ga_grave);
+ok(NFC($ka_grave).NFC($dakuten) ne $ga_grave);
+ok($concat eq $ga_grave);
+
+# 14
+
+sub arraynorm {
+ my $form = shift;
+ my @string = @_;
+ my $result = "";
+ my $unproc = "";
+ foreach my $str (@string) {
+ $unproc .= $str;
+ my $n = normalize($form, $unproc);
+ my($p, $u) = splitOnLastStarter($n);
+ $result .= $p;
+ $unproc = $u;
+ }
+ $result .= $unproc;
+ return $result;
+}
+
+my $strD = "\x{3C9}\x{301}\x{1100}\x{1161}\x{11A8}\x{1100}\x{1161}\x{11AA}";
+my $strC = "\x{3CE}\x{AC01}\x{AC03}";
+my @str1 = (substr($strD,0,3), substr($strD,3,4), substr($strD,7));
+my @str2 = (substr($strD,0,1), substr($strD,1,3), substr($strD,4));
+ok($strC eq NFC($strD));
+ok($strD eq join('', @str1));
+ok($strC eq arraynorm('NFC', @str1));
+ok($strD eq join('', @str2));
+ok($strC eq arraynorm('NFC', @str2));
+
+my @strX = ("\x{300}\x{AC00}", "\x{11A8}");
+my $strX = "\x{300}\x{AC01}";
+ok($strX eq NFC(join('', @strX)));
+ok($strX eq arraynorm('NFC', @strX));
+ok($strX eq NFKC(join('', @strX)));
+ok($strX eq arraynorm('NFKC', @strX));
+
+my @strY = ("\x{304B}\x{0308}", "\x{0323}\x{3099}");
+my $strY = ("\x{304C}\x{0323}\x{0308}");
+ok($strY eq NFC(join('', @strY)));
+ok($strY eq arraynorm('NFC', @strY));
+ok($strY eq NFKC(join('', @strY)));
+ok($strY eq arraynorm('NFKC', @strY));
+
+my @strZ = ("\x{304B}\x{0308}", "\x{0323}", "\x{3099}");
+my $strZ = ("\x{304B}\x{3099}\x{0323}\x{0308}");
+ok($strZ eq NFD(join('', @strZ)));
+ok($strZ eq arraynorm('NFD', @strZ));
+ok($strZ eq NFKD(join('', @strZ)));
+ok($strZ eq arraynorm('NFKD', @strZ));
+
+# 31
+
+# don't modify the source
+
+my $source = "ABC";
+($proc, $unproc) = splitOnLastStarter($source);
+ok($proc, "AB");
+ok($unproc, "C");
+ok($source, "ABC");
+
+# 34
+
diff --git a/dist/Unicode-Normalize/t/test.t b/dist/Unicode-Normalize/t/test.t
new file mode 100644
index 0000000000..cb4b6ea637
--- /dev/null
+++ b/dist/Unicode-Normalize/t/test.t
@@ -0,0 +1,168 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..72\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+use Unicode::Normalize;
+
+ok(1);
+
+sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+
+#########################
+
+ok(NFD(""), "");
+ok(NFC(""), "");
+ok(NFKD(""), "");
+ok(NFKC(""), "");
+
+ok(NFD("A"), "A");
+ok(NFC("A"), "A");
+ok(NFKD("A"), "A");
+ok(NFKC("A"), "A");
+
+# 9
+
+# don't modify the source
+my $sNFD = "\x{FA19}";
+ok(NFD($sNFD), "\x{795E}");
+ok($sNFD, "\x{FA19}");
+
+my $sNFC = "\x{FA1B}";
+ok(NFC($sNFC), "\x{798F}");
+ok($sNFC, "\x{FA1B}");
+
+my $sNFKD = "\x{FA1E}";
+ok(NFKD($sNFKD), "\x{7FBD}");
+ok($sNFKD, "\x{FA1E}");
+
+my $sNFKC = "\x{FA26}";
+ok(NFKC($sNFKC), "\x{90FD}");
+ok($sNFKC, "\x{FA26}");
+
+# 17
+
+sub hexNFC {
+ join " ", map sprintf("%04X", $_),
+ _unpack_U NFC _pack_U map hex, split ' ', shift;
+}
+sub hexNFD {
+ join " ", map sprintf("%04X", $_),
+ _unpack_U NFD _pack_U map hex, split ' ', shift;
+}
+
+ok(hexNFD("1E14 AC01"), "0045 0304 0300 1100 1161 11A8");
+ok(hexNFD("AC00 AE00"), "1100 1161 1100 1173 11AF");
+
+ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
+ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
+ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
+
+ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
+ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
+ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
+ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
+ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
+ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
+
+ok(hexNFC("AC00 11A7"), "AC00 11A7");
+ok(hexNFC("AC00 11A8"), "AC01");
+ok(hexNFC("AC00 11A9"), "AC02");
+ok(hexNFC("AC00 11C2"), "AC1B");
+ok(hexNFC("AC00 11C3"), "AC00 11C3");
+
+# 39
+
+# Test Cases from Public Review Issue #29: Normalization Issue
+# cf. http://www.unicode.org/review/pr-29.html
+ok(hexNFC("0B47 0300 0B3E"), "0B47 0300 0B3E");
+ok(hexNFC("1100 0300 1161"), "1100 0300 1161");
+ok(hexNFC("0B47 0B3E 0300"), "0B4B 0300");
+ok(hexNFC("1100 1161 0300"), "AC00 0300");
+ok(hexNFC("0B47 0300 0B3E 0327"), "0B47 0300 0B3E 0327");
+ok(hexNFC("1100 0300 1161 0327"), "1100 0300 1161 0327");
+
+ok(hexNFC("0300 0041"), "0300 0041");
+ok(hexNFC("0300 0301 0041"), "0300 0301 0041");
+ok(hexNFC("0301 0300 0041"), "0301 0300 0041");
+ok(hexNFC("0000 0300 0000 0301"), "0000 0300 0000 0301");
+ok(hexNFC("0000 0301 0000 0300"), "0000 0301 0000 0300");
+
+ok(hexNFC("0327 0061 0300"), "0327 00E0");
+ok(hexNFC("0301 0061 0300"), "0301 00E0");
+ok(hexNFC("0315 0061 0300"), "0315 00E0");
+ok(hexNFC("0000 0327 0061 0300"), "0000 0327 00E0");
+ok(hexNFC("0000 0301 0061 0300"), "0000 0301 00E0");
+ok(hexNFC("0000 0315 0061 0300"), "0000 0315 00E0");
+
+# 56
+
+# NFC() and NFKC() should be unary.
+my $str11 = _pack_U(0x41, 0x0302, 0x0301, 0x62);
+my $str12 = _pack_U(0x1EA4, 0x62);
+ok(NFC $str11 eq $str12);
+ok(NFKC $str11 eq $str12);
+
+# NFD() and NFKD() should be unary.
+my $str21 = _pack_U(0xE0, 0xAC00);
+my $str22 = _pack_U(0x61, 0x0300, 0x1100, 0x1161);
+ok(NFD $str21 eq $str22);
+ok(NFKD $str21 eq $str22);
+
+# 60
+
+## Bug #53197: NFKC("\x{2000}") produces...
+
+ok(NFKC("\x{2002}") eq ' ');
+ok(NFKD("\x{2002}") eq ' ');
+ok(NFKC("\x{2000}") eq ' ');
+ok(NFKD("\x{2000}") eq ' ');
+
+ok(NFKC("\x{210C}") eq 'H');
+ok(NFKD("\x{210C}") eq 'H');
+ok(NFKC("\x{210D}") eq 'H');
+ok(NFKD("\x{210D}") eq 'H');
+
+ok(NFC("\x{F907}") eq "\x{9F9C}");
+ok(NFD("\x{F907}") eq "\x{9F9C}");
+ok(NFKC("\x{F907}") eq "\x{9F9C}");
+ok(NFKD("\x{F907}") eq "\x{9F9C}");
+
+# 72
+
diff --git a/dist/Unicode-Normalize/t/tie.t b/dist/Unicode-Normalize/t/tie.t
new file mode 100644
index 0000000000..4fdd121e07
--- /dev/null
+++ b/dist/Unicode-Normalize/t/tie.t
@@ -0,0 +1,82 @@
+
+BEGIN {
+ unless ('A' eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
+ exit 0;
+ }
+ unless (0x41 == unpack('U', 'A')) {
+ print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
+ exit 0;
+ }
+}
+
+BEGIN {
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+#########################
+
+BEGIN {
+ use Unicode::Normalize qw(:all);
+
+ unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) {
+ print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
+ " needed for this test\n";
+ print $@;
+ exit;
+ }
+}
+
+use strict;
+use warnings;
+BEGIN { $| = 1; print "1..17\n"; }
+my $count = 0;
+sub ok ($;$) {
+ my $p = my $r = shift;
+ if (@_) {
+ my $x = shift;
+ $p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
+ }
+ print $p ? "ok" : "not ok", ' ', ++$count, "\n";
+}
+
+ok(1);
+
+package tiescalar;
+sub TIESCALAR {
+ my ($class, $instance) = @_;
+ return bless \$instance => $class;
+}
+sub FETCH { return ${$_[0]}++ }
+sub STORE { return ${$_[0]} = $_[1] }
+sub DESTROY { undef ${$_[0]} }
+
+#########################
+
+package main;
+
+tie my $tie1, 'tiescalar', "123";
+ok(NFD($tie1), 123);
+ok(NFC($tie1), 124);
+ok(NFKD($tie1), 125);
+ok(NFKC($tie1), 126);
+ok(FCD($tie1), 127);
+ok(FCC($tie1), 128);
+
+tie my $tie2, 'tiescalar', "256";
+ok(normalize('NFD', $tie2), 256);
+ok(normalize('NFC', $tie2), 257);
+ok(normalize('NFKD', $tie2), 258);
+ok(normalize('NFKC', $tie2), 259);
+ok(normalize('FCD', $tie2), 260);
+ok(normalize('FCC', $tie2), 261);
+
+tie my $tie3, 'tiescalar', "315";
+ok(decompose($tie3), 315);
+ok(reorder($tie3), 316);
+ok(compose($tie3), 317);
+ok(composeContiguous($tie3), 318);
+