summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-11-09 00:23:40 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-11-09 00:23:40 +0000
commitac5ea53171baa7dab1a92df1eacf8d2fe19cbdbb (patch)
tree5ce2221e6a7335594861f38233d4d665313a7c82
parent61a515a61510e728f2014674d12cb94cb5a90834 (diff)
downloadperl-ac5ea53171baa7dab1a92df1eacf8d2fe19cbdbb.tar.gz
Upgrade to Unicode::Normalize 0.10, now in XS.
The CPAN distribution has both pm and XS implementations, and for performance reasons we choose the XS. Another reason to choose the XS is that it doesn't require Lingua::KO::Hangul::Util, which means that we can delete that-- which in turn means that Unicode::UCD cannot expect that: support it, but don't expect. Ditto Unicode::Collate. Note that Unicode::Normalize Makefile.PL and Normalize.xs have been modified from the CPAN 0.10 versions: the first one to be simpler (no pm) and clean up the generated unf*.h files, the second one to quench compiler grumblings. Must notify Sadahiro about these changes. p4raw-id: //depot/perl@12909
-rw-r--r--MANIFEST19
-rw-r--r--NetWare/Makefile90
-rw-r--r--djgpp/config.over1
-rw-r--r--epoc/config.sh2
-rw-r--r--ext/Unicode/Normalize/Changes38
-rw-r--r--ext/Unicode/Normalize/Makefile.PL15
-rw-r--r--ext/Unicode/Normalize/Normalize.pm45
-rw-r--r--ext/Unicode/Normalize/Normalize.pod89
-rw-r--r--ext/Unicode/Normalize/Normalize.xs378
-rw-r--r--ext/Unicode/Normalize/README (renamed from lib/Unicode/Normalize/README)23
-rw-r--r--ext/Unicode/Normalize/mkheader284
-rw-r--r--ext/Unicode/Normalize/t/func.t69
-rw-r--r--ext/Unicode/Normalize/t/norm.t (renamed from lib/Unicode/Normalize/t/norm.t)7
-rw-r--r--ext/Unicode/Normalize/t/test.t (renamed from lib/Unicode/Normalize/t/test.t)7
-rw-r--r--hints/uwin.sh2
-rw-r--r--hints/vmesa.sh1
-rw-r--r--lib/Lingua/KO/Hangul/Util.pm278
-rw-r--r--lib/Lingua/KO/Hangul/Util/Changes11
-rw-r--r--lib/Lingua/KO/Hangul/Util/README44
-rw-r--r--lib/Lingua/KO/Hangul/Util/t/test.t55
-rw-r--r--lib/Unicode/Collate.pm15
-rw-r--r--lib/Unicode/Normalize/Changes16
-rw-r--r--lib/Unicode/UCD.pm26
-rw-r--r--lib/Unicode/UCD.t8
-rw-r--r--win32/Makefile123
-rw-r--r--win32/makefile.mk4
26 files changed, 1109 insertions, 541 deletions
diff --git a/MANIFEST b/MANIFEST
index 627c8f05ca..63b0257fb9 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -596,6 +596,16 @@ ext/Time/HiRes/HiRes.pm Time::HiRes extension
ext/Time/HiRes/HiRes.t Test for Time::HiRes
ext/Time/HiRes/HiRes.xs Time::HiRes extension
ext/Time/HiRes/Makefile.PL Time::HiRes extension
+ext/Unicode/Normalize/Changes Unicode::Normalize
+ext/Unicode/Normalize/Makefile.PL Unicode::Normalize
+ext/Unicode/Normalize/mkheader Unicode::Normalize
+ext/Unicode/Normalize/Normalize.pm Unicode::Normalize
+ext/Unicode/Normalize/Normalize.pod Unicode::Normalize
+ext/Unicode/Normalize/Normalize.xs Unicode::Normalize
+ext/Unicode/Normalize/README Unicode::Normalize
+ext/Unicode/Normalize/t/func.t Unicode::Normalize
+ext/Unicode/Normalize/t/norm.t Unicode::Normalize
+ext/Unicode/Normalize/t/test.t Unicode::Normalize
ext/util/make_ext Used by Makefile to execute extension Makefiles
ext/XS/Typemap/Makefile.PL XS::Typemap extension
ext/XS/Typemap/README XS::Typemap extension
@@ -1005,10 +1015,6 @@ lib/IPC/SysV.t See if IPC::SysV works
lib/less.pm For "use less"
lib/less.t See if less support works
lib/lib_pm.PL For "use lib", produces lib/lib.pm
-lib/Lingua/KO/Hangul/Util.pm Lingua::KO::Hangul::Util
-lib/Lingua/KO/Hangul/Util/Changes Lingua::KO::Hangul::Util
-lib/Lingua/KO/Hangul/Util/README Lingua::KO::Hangul::Util
-lib/Lingua/KO/Hangul/Util/t/test.t Lingua::KO::Hangul::Util
lib/locale.pm For "use locale"
lib/locale.t See if locale support works
lib/Locale/Codes/t/all.t See if Locale::Codes work
@@ -1277,11 +1283,6 @@ lib/Unicode/Collate/Changes Unicode::Collate
lib/Unicode/Collate/keys.txt Unicode::Collate
lib/Unicode/Collate/README Unicode::Collate
lib/Unicode/Collate/t/test.t Unicode::Collate
-lib/Unicode/Normalize.pm Unicode::Normalize
-lib/Unicode/Normalize/Changes Unicode::Normalize
-lib/Unicode/Normalize/README Unicode::Normalize
-lib/Unicode/Normalize/t/norm.t Unicode::Normalize
-lib/Unicode/Normalize/t/test.t Unicode::Normalize
lib/Unicode/README Explanation what happened to lib/unicode.
lib/Unicode/UCD.pm Unicode character database
lib/Unicode/UCD.t See if Unicode character database works
diff --git a/NetWare/Makefile b/NetWare/Makefile
index ec06f7c395..ff879e9b93 100644
--- a/NetWare/Makefile
+++ b/NetWare/Makefile
@@ -258,26 +258,27 @@ NW_CFG_VARS = \
NW_CFGSH_TMPL = config.wc
NW_CFGH_TMPL = config_H.wc
-SOCKET_NLP = $(AUTODIR)\Socket\Socket.nlp
-FCNTL_NLP = $(AUTODIR)\Fcntl\Fcntl.nlp
-IO_NLP = $(AUTODIR)\IO\IO.nlp
-OPCODE_NLP = $(AUTODIR)\Opcode\Opcode.nlp
-SDBM_FILE_NLP = $(AUTODIR)\SDBM_File\SDBM_File.nlp
-POSIX_NLP = $(AUTODIR)\POSIX\POSIX.nlp
-ATTRS_NLP = $(AUTODIR)\attrs\attrs.nlp
-THREAD_NLP = $(AUTODIR)\Thread\Thread.nlp
-B_NLP = $(AUTODIR)\B\B.nlp
-DUMPER_NLP = $(AUTODIR)\Data\Dumper\Dumper.nlp
-PEEK_NLP = $(AUTODIR)\Devel\Peek\Peek.nlp
-RE_NLP = $(AUTODIR)\re\re.nlp
-BYTELOADER_NLP = $(AUTODIR)\ByteLoader\ByteLoader.nlp
-DPROF_NLP = $(AUTODIR)\Devel\DProf\DProf.nlp
-GLOB_NLP = $(AUTODIR)\File\Glob\Glob.nlp
-CWD_NLP = $(AUTODIR)\Cwd\Cwd.nlp
-STORABLE_NLP = $(AUTODIR)\Storable\Storable.nlp
-LISTUTIL_NLP = $(AUTODIR)\List\Util\Util.nlp
-MIMEBASE64_NLP = $(AUTODIR)\MIME\Base64\Base64.nlp
-XSTYPEMAP_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp
+SOCKET_NLP = $(AUTODIR)\Socket\Socket.nlp
+FCNTL_NLP = $(AUTODIR)\Fcntl\Fcntl.nlp
+IO_NLP = $(AUTODIR)\IO\IO.nlp
+OPCODE_NLP = $(AUTODIR)\Opcode\Opcode.nlp
+SDBM_FILE_NLP = $(AUTODIR)\SDBM_File\SDBM_File.nlp
+POSIX_NLP = $(AUTODIR)\POSIX\POSIX.nlp
+ATTRS_NLP = $(AUTODIR)\attrs\attrs.nlp
+THREAD_NLP = $(AUTODIR)\Thread\Thread.nlp
+B_NLP = $(AUTODIR)\B\B.nlp
+DUMPER_NLP = $(AUTODIR)\Data\Dumper\Dumper.nlp
+PEEK_NLP = $(AUTODIR)\Devel\Peek\Peek.nlp
+RE_NLP = $(AUTODIR)\re\re.nlp
+BYTELOADER_NLP = $(AUTODIR)\ByteLoader\ByteLoader.nlp
+DPROF_NLP = $(AUTODIR)\Devel\DProf\DProf.nlp
+GLOB_NLP = $(AUTODIR)\File\Glob\Glob.nlp
+CWD_NLP = $(AUTODIR)\Cwd\Cwd.nlp
+STORABLE_NLP = $(AUTODIR)\Storable\Storable.nlp
+LISTUTIL_NLP = $(AUTODIR)\List\Util\Util.nlp
+MIMEBASE64_NLP = $(AUTODIR)\MIME\Base64\Base64.nlp
+XSTYPEMAP_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp
+UNICODENORMALIZE_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp
EXTENSION_NLP = \
$(FCNTL_NLP) \
@@ -299,6 +300,7 @@ EXTENSION_NLP = \
$(LISTUTIL_NLP) \
$(MIMEBASE64_NLP) \
$(XSTYPEMAP_NLP) \
+ $(UNICODENORMALIZE_NLP) \
# $(CWD_NLP) \
# cwd.pm needs to be modifed for NetWare.
@@ -764,33 +766,35 @@ X2P_OBJ = $(X2P_SRC:.c=.obj)
DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \
Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \
- Storable/Storable List/Util MIME/Base64/Base64 XS/Typemap/Typemap
+ Storable/Storable List/Util MIME/Base64/Base64 \
+ XS/Typemap/Typemap Unicode/Normalize/Normalize
STATIC_EXT = DynaLoader
NONXS_EXT = Errno
-DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader
-SOCKET = $(EXTDIR)\Socket\Socket
-FCNTL = $(EXTDIR)\Fcntl\Fcntl
-OPCODE = $(EXTDIR)\Opcode\Opcode
-SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File
+DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader
+SOCKET = $(EXTDIR)\Socket\Socket
+FCNTL = $(EXTDIR)\Fcntl\Fcntl
+OPCODE = $(EXTDIR)\Opcode\Opcode
+SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File
IO = $(EXTDIR)\IO\IO
-POSIX = $(EXTDIR)\POSIX\POSIX
-ATTRS = $(EXTDIR)\attrs\attrs
-THREAD = $(EXTDIR)\Thread\Thread
+POSIX = $(EXTDIR)\POSIX\POSIX
+ATTRS = $(EXTDIR)\attrs\attrs
+THREAD = $(EXTDIR)\Thread\Thread
B = $(EXTDIR)\B\B
RE = $(EXTDIR)\re\re
-DUMPER = $(EXTDIR)\Data\Dumper\Dumper
-ERRNO = $(EXTDIR)\Errno\Errno
-PEEK = $(EXTDIR)\Devel\Peek\Peek
-BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader
-DPROF = $(EXTDIR)\Devel\DProf\DProf
-GLOB = $(EXTDIR)\File\Glob\Glob
+DUMPER = $(EXTDIR)\Data\Dumper\Dumper
+ERRNO = $(EXTDIR)\Errno\Errno
+PEEK = $(EXTDIR)\Devel\Peek\Peek
+BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader
+DPROF = $(EXTDIR)\Devel\DProf\DProf
+GLOB = $(EXTDIR)\File\Glob\Glob
CWD = $(EXTDIR)\Cwd\Cwd
-STORABLE = $(EXTDIR)\Storable\Storable
-LISTUTIL = $(EXTDIR)\List\Util
-MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64
-XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap
+STORABLE = $(EXTDIR)\Storable\Storable
+LISTUTIL = $(EXTDIR)\List\Util
+MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64
+XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap
+UNICODENORMALIZE = $(EXTDIR)\Unicode\Normalize\Normalize
EXTENSION_C = \
$(SOCKET).c \
@@ -813,6 +817,7 @@ EXTENSION_C = \
$(LISTUTIL).c \
$(MIMEBASE64).c \
$(XSTYPEMAP).c \
+ $(UNICODENORMALIZE).c \
POD2HTML = $(PODDIR)\pod2html
POD2MAN = $(PODDIR)\pod2man
@@ -1285,6 +1290,12 @@ $(XSTYPEMAP_NLP):
$(MAKE)
cd ..\..\..\netware
+$(UNICODENORMALIZE_NLP):
+ cd $(EXTDIR)\Unicode\$(*B)
+ ..\..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
+ $(MAKE)
+ cd ..\..\..\netware
+
$(ERRNO_PM_NW):
cd $(EXTDIR)\$(*B)
..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl
@@ -1425,6 +1436,7 @@ distclean: clean nwclean
-del /f $(LIBDIR)\Data\Dumper.pm $(LIBDIR)\ByteLoader.pm
-del /f $(LIBDIR)\Devel\Peek.pm $(LIBDIR)\Devel\DProf.pm
-del /f $(LIBDIR)\File\Glob.pm
+ -del /f $(LIBDIR)\Unicode\Normalize.pm
-rmdir /s /q $(LIBDIR)\IO || rmdir /s $(LIBDIR)\IO
-rmdir /s /q $(LIBDIR)\Thread || rmdir /s $(LIBDIR)\Thread
-rmdir /s /q $(LIBDIR)\B || rmdir /s $(LIBDIR)\B
diff --git a/djgpp/config.over b/djgpp/config.over
index 5f58ba70b6..55eef9b9d5 100644
--- a/djgpp/config.over
+++ b/djgpp/config.over
@@ -46,6 +46,7 @@ repair()
-e 's=cwd=Cwd=' \
-e 's=perlio/via=PerlIO/Via=' \
-e 's=xs/typemap=XS/Typemap=' \
+ -e 's=unicode/normalize=Unicode/Normalize=' \
-e 's=i18n/langinfo=I18N/Langinfo='
}
static_ext=$(repair "$static_ext")
diff --git a/epoc/config.sh b/epoc/config.sh
index 42ada756ca..2cafe19167 100644
--- a/epoc/config.sh
+++ b/epoc/config.sh
@@ -426,7 +426,7 @@ emacs=''
eunicefix=':'
exe_ext=''
expr='expr'
-extensions='Data/Dumper Digest/MD5 Errno Fcntl File/Glob Filter/Util/Call IO List/Util MIME/Base64 Opcode PerlIO/Scalar Socket Storable Sys/Hostname attrs re'
+extensions='Data/Dumper Digest/MD5 Errno Fcntl File/Glob Filter/Util/Call IO List/Util MIME/Base64 Opcode PerlIO/Scalar Socket Storable Sys/Hostname Unicode/Storable attrs re'
fflushNULL='undef'
fflushall='define'
find=''
diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes
new file mode 100644
index 0000000000..bf17449ab2
--- /dev/null
+++ b/ext/Unicode/Normalize/Changes
@@ -0,0 +1,38 @@
+Revision history for Perl extension Unicode::Normalize.
+
+0.10 Sat Nov 03 16:30:20 2001
+ - The XS version is now independent of Lingua::KO::Hangul::Util.
+ (though the Non-XS version still requires that.)
+
+0.09 Fri Nov 02 22:39:30 2001
+ - remove pTHX_.
+
+0.08 Thu Nov 01 23:20:42 2001
+ - use Lingua::KO::Hangul::Util 0.06 and remove "hangul.h".
+
+0.07 Wed Oct 31 22:06:42 2001
+ - modify internal. decompose() - reorder() - compose().
+
+0.06 Sun Oct 28 14:28:46 2001
+ - an XS version.
+ (but the Non-XS version is also supported.)
+
+0.05 Wed Oct 10 22:02:15 2001 (not released)
+ - %Compos contains unnecessary singletons
+ (though it did not cause any bug, only useless).
+ They will not be stored.
+
+0.04 Wed Aug 15 19:02:41 2001
+ - fix: NFD("") and NFKD("") must return "", not but undef.
+
+0.03 Fri Aug 10 22:44:18 2001
+ - rename the module name to Unicode::Normalize.
+ - normalize takes two arguments.
+
+0.02 Thu Aug 9 22:56:36 2001
+ - add function normalize
+
+0.01 Mon Aug 6 21:45:11 2001
+ - original version; created by h2xs 1.21 with options
+ -A -X -n Text::Unicode::Normalize
+
diff --git a/ext/Unicode/Normalize/Makefile.PL b/ext/Unicode/Normalize/Makefile.PL
new file mode 100644
index 0000000000..88ab9b7b63
--- /dev/null
+++ b/ext/Unicode/Normalize/Makefile.PL
@@ -0,0 +1,15 @@
+use ExtUtils::MakeMaker;
+
+# This is not the CPAN Unicode::Normalize makefile
+# that can handle XS-NOXS installing. We do just XS.
+
+do "mkheader";
+
+WriteMakefile(
+ 'NAME' => 'Unicode::Normalize',
+ 'VERSION_FROM' => 'Normalize.pm', # finds $VERSION
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'Normalize.pod', # retrieve abstract from module
+ AUTHOR => 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>') : ()),
+ clean => {FILES=> 'unfcan.h unfcmb.h unfcmp.h unfcpt.h unfexc.h'},
+);
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm
new file mode 100644
index 0000000000..a583425a3b
--- /dev/null
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -0,0 +1,45 @@
+package Unicode::Normalize;
+
+use 5.006;
+use strict;
+use warnings;
+use Carp;
+
+our $VERSION = '0.10';
+our $PACKAGE = __PACKAGE__;
+
+require Exporter;
+require DynaLoader;
+require AutoLoader;
+
+our @ISA = qw(Exporter DynaLoader);
+our @EXPORT = qw( NFC NFD NFKC NFKD );
+our @EXPORT_OK = qw( normalize decompose reorder compose
+ getCanon getCompat getComposite getCombinClass getExclusion);
+our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );
+
+bootstrap Unicode::Normalize $VERSION;
+
+use constant CANON => 0;
+use constant COMPAT => 1;
+
+sub NFD ($) { reorder(decompose($_[0], CANON)) }
+
+sub NFKD ($) { reorder(decompose($_[0], COMPAT)) }
+
+sub NFC ($) { compose(reorder(decompose($_[0], CANON))) }
+
+sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) }
+
+sub normalize($$)
+{
+ my $form = shift;
+ $form eq 'D' || $form eq 'NFD' ? NFD ($_[0]) :
+ $form eq 'C' || $form eq 'NFC' ? NFC ($_[0]) :
+ $form eq 'KD' || $form eq 'NFKD' ? NFKD($_[0]) :
+ $form eq 'KC' || $form eq 'NFKC' ? NFKC($_[0]) :
+ croak $PACKAGE."::normalize: invalid form name: $form";
+}
+
+1;
+__END__
diff --git a/ext/Unicode/Normalize/Normalize.pod b/ext/Unicode/Normalize/Normalize.pod
new file mode 100644
index 0000000000..4ac8966a83
--- /dev/null
+++ b/ext/Unicode/Normalize/Normalize.pod
@@ -0,0 +1,89 @@
+
+=head1 NAME
+
+Unicode::Normalize - normalized forms of Unicode text
+
+=head1 SYNOPSIS
+
+ use Unicode::Normalize;
+
+ $string_NFD = NFD($raw_string); # Normalization Form D
+ $string_NFC = NFC($raw_string); # Normalization Form C
+ $string_NFKD = NFKD($raw_string); # Normalization Form KD
+ $string_NFKC = NFKC($raw_string); # Normalization Form KC
+
+ or
+
+ use Unicode::Normalize 'normalize';
+
+ $string_NFD = normalize('D', $raw_string); # Normalization Form D
+ $string_NFC = normalize('C', $raw_string); # Normalization Form C
+ $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD
+ $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC
+
+=head1 DESCRIPTION
+
+=over 4
+
+=item C<$string_NFD = NFD($raw_string)>
+
+returns the Normalization Form D (formed by canonical decomposition).
+
+
+=item C<$string_NFC = NFC($raw_string)>
+
+returns the Normalization Form C (formed by canonical decomposition
+followed by canonical composition).
+
+=item C<$string_NFKD = NFKD($raw_string)>
+
+returns the Normalization Form KD (formed by compatibility decomposition).
+
+=item C<$string_NFKC = NFKC($raw_string)>
+
+returns the Normalization Form KC (formed by compatibility decomposition
+followed by B<canonical> composition).
+
+=item C<$normalized_string = normalize($form_name, $raw_string)>
+
+As C<$form_name>, one of the following names must be given.
+
+ 'C' or 'NFC' for Normalization Form C
+ 'D' or 'NFD' for Normalization Form D
+ 'KC' or 'NFKC' for Normalization Form KC
+ 'KD' or 'NFKD' for Normalization Form KD
+
+=back
+
+=head2 EXPORT
+
+C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default.
+
+C<normalize>: on request.
+
+=head1 AUTHOR
+
+SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt>
+
+ http://homepage1.nifty.com/nomenclator/perl/
+
+ Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+=over 4
+
+=item L<Lingua::KO::Hangul::Util>
+
+utility functions for Hangul Syllables
+
+=item http://www.unicode.org/unicode/reports/tr15/
+
+Unicode Normalization Forms - UAX #15
+
+=back
+
+=cut
diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs
new file mode 100644
index 0000000000..aca08538fb
--- /dev/null
+++ b/ext/Unicode/Normalize/Normalize.xs
@@ -0,0 +1,378 @@
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+/* These 5 files are prepared by mkheader */
+#include "unfcmb.h"
+#include "unfcan.h"
+#include "unfcpt.h"
+#include "unfcmp.h"
+#include "unfexc.h"
+
+/* Perl 5.6.1 ? */
+#ifndef uvuni_to_utf8
+#define uvuni_to_utf8 uv_to_utf8
+#endif /* uvuni_to_utf8 */
+
+/* Perl 5.6.1 ? */
+#ifndef utf8n_to_uvchr
+#define utf8n_to_uvchr utf8_to_uv
+#endif /* utf8n_to_uvchr */
+
+/* At present, char > 0x10ffff are unaffected without complaint, right? */
+#define VALID_UTF_MAX (0x10ffff)
+#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
+
+/* HANGUL_H */
+#define Hangul_SBase 0xAC00
+#define Hangul_SFinal 0xD7A3
+#define Hangul_SCount 11172
+
+#define Hangul_NCount 588
+
+#define Hangul_LBase 0x1100
+#define Hangul_LFinal 0x1112
+#define Hangul_LCount 19
+
+#define Hangul_VBase 0x1161
+#define Hangul_VFinal 0x1175
+#define Hangul_VCount 21
+
+#define Hangul_TBase 0x11A7
+#define Hangul_TFinal 0x11C2
+#define Hangul_TCount 28
+
+#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
+#define Hangul_IsN(u) (! (((u) - Hangul_SBase) % Hangul_TCount))
+#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
+#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
+#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
+#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal))
+/* HANGUL_H */
+
+/* this is used for canonical ordering of combining characters (c.c.). */
+typedef struct {
+ U8 cc; /* combining class */
+ UV uv; /* codepoint */
+ STRLEN pos; /* position */
+} UNF_cc;
+
+int compare_cc(const void *a, const void *b)
+{
+ int ret_cc;
+ ret_cc = (*(UNF_cc*)a).cc - (*(UNF_cc*)b).cc;
+ if(ret_cc) return ret_cc;
+ return (*(UNF_cc*)a).pos - (*(UNF_cc*)b).pos;
+}
+
+U8* dec_canonical (UV uv)
+{
+ U8 ***plane, **row;
+ if(OVER_UTF_MAX(uv)) return NULL;
+ plane = (U8***)UNF_canon[uv >> 16];
+ if(! plane) return NULL;
+ row = plane[(uv >> 8) & 0xff];
+ return row ? row[uv & 0xff] : NULL;
+}
+
+U8* dec_compat (UV uv)
+{
+ U8 ***plane, **row;
+ if(OVER_UTF_MAX(uv)) return NULL;
+ plane = (U8***)UNF_compat[uv >> 16];
+ if(! plane) return NULL;
+ row = plane[(uv >> 8) & 0xff];
+ return row ? row[uv & 0xff] : NULL;
+}
+
+UV getComposite (UV uv, UV uv2)
+{
+ UNF_complist ***plane, **row, *cell, *i;
+
+ if(! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) return 0;
+
+ if(Hangul_IsL(uv) && Hangul_IsV(uv2)) {
+ uv -= Hangul_LBase; /* lindex */
+ uv2 -= Hangul_VBase; /* vindex */
+ return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount);
+ }
+ if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
+ uv2 -= Hangul_TBase; /* tindex */
+ return (uv + uv2);
+ }
+ plane = UNF_compos[uv >> 16];
+ if(! plane) return 0;
+ row = plane[(uv >> 8) & 0xff];
+ if(! row) return 0;
+ cell = row[uv & 0xff];
+ if(! cell) return 0;
+ for(i = cell; i->nextchar; i++) {
+ if(uv2 == i->nextchar) return i->composite;
+ }
+ return 0;
+}
+
+U8 getCombinClass (UV uv)
+{
+ U8 **plane, *row;
+ if(OVER_UTF_MAX(uv)) return 0;
+ plane = (U8**)UNF_combin[uv >> 16];
+ if(! plane) return 0;
+ row = plane[(uv >> 8) & 0xff];
+ return row ? row[uv & 0xff] : 0;
+}
+
+void sv_cat_decompHangul (SV* sv, UV uv)
+{
+ UV sindex, lindex, vindex, tindex;
+ U8 *t, temp[3 * UTF8_MAXLEN + 1];
+
+ if(! Hangul_IsS(uv)) return;
+
+ sindex = uv - Hangul_SBase;
+ lindex = sindex / Hangul_NCount;
+ vindex = (sindex % Hangul_NCount) / Hangul_TCount;
+ tindex = sindex % Hangul_TCount;
+
+ t = temp;
+ t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
+ t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
+ if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
+ *t = '\0';
+ sv_catpvn(sv, (char *)temp, strlen((char *)temp));
+}
+
+MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
+
+
+SV*
+decompose(arg, compat)
+ SV * arg
+ SV * compat
+ PROTOTYPE: $
+ PREINIT:
+ SV *src, *dst;
+ STRLEN srclen, dstlen, retlen;
+ U8 *s, *e, *p, *d, *r;
+ UV uv;
+ bool iscompat;
+ CODE:
+ if(SvUTF8(arg)) {
+ src = arg;
+ } else {
+ src = sv_mortalcopy(arg);
+ sv_utf8_upgrade(src);
+ }
+
+ iscompat = SvTRUE(compat);
+
+ dst = newSV(1);
+ (void)SvPOK_only(dst);
+ SvUTF8_on(dst);
+
+ s = (U8*)SvPV(src,srclen);
+ e = s + srclen;
+ for(p = s; p < e;){
+ uv = utf8n_to_uvchr(p, e - p, &retlen, 0);
+ p += retlen;
+ if(Hangul_IsS(uv)) sv_cat_decompHangul(dst, uv);
+ else {
+ r = iscompat ? dec_compat(uv) : dec_canonical(uv);
+ if(r) sv_catpv(dst, (char *)r);
+ else sv_catpvn(dst, (char *)p - retlen, retlen);
+ }
+ }
+ RETVAL = dst;
+ OUTPUT:
+ RETVAL
+
+
+
+SV*
+reorder(arg)
+ SV * arg
+ PROTOTYPE: $
+ PREINIT:
+ SV *src;
+ STRLEN srclen, retlen, stk_cc_max;
+ U8 *s, *e, *p, curCC;
+ UV uv;
+ UNF_cc * stk_cc;
+ CODE:
+ src = newSVsv(arg);
+ if(! SvUTF8(arg)) sv_utf8_upgrade(src);
+
+ stk_cc_max = 10; /* enough as an initial value? */
+ New(0, stk_cc, stk_cc_max, UNF_cc);
+
+ s = (U8*)SvPV(src,srclen);
+ e = s + srclen;
+ for(p = s; p < e;){
+ U8 *cc_in;
+ STRLEN cc_len, cc_iter, cc_pos;
+
+ uv = utf8n_to_uvchr(p, e - p, &retlen, 0);
+ p += retlen;
+ cc_pos = 0;
+ curCC = getCombinClass(uv);
+ if(! (curCC && p < e)) continue; else cc_in = p - retlen;
+
+ stk_cc[cc_pos].cc = curCC;
+ stk_cc[cc_pos].uv = uv;
+ stk_cc[cc_pos].pos = cc_pos;
+
+ while(p < e) {
+ uv = utf8n_to_uvchr(p, e - p, &retlen, 0);
+ curCC = getCombinClass(uv);
+ if(!curCC) break;
+ p += retlen;
+ cc_pos++;
+ if(stk_cc_max <= cc_pos) { /* extend if need */
+ stk_cc_max = cc_pos + 1;
+ Renew(stk_cc, stk_cc_max, UNF_cc);
+ }
+ stk_cc[cc_pos].cc = curCC;
+ stk_cc[cc_pos].uv = uv;
+ stk_cc[cc_pos].pos = cc_pos;
+ }
+
+ /* only one c.c. in cc_len from cc_in, no need of reordering */
+ if(!cc_pos) continue;
+
+ qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc);
+
+ cc_len = p - cc_in;
+ p = cc_in;
+ for(cc_iter = 0; cc_iter <= cc_pos; cc_iter++) {
+ p = uvuni_to_utf8(p, stk_cc[cc_iter].uv);
+ }
+ }
+ Safefree(stk_cc);
+ RETVAL = src;
+ OUTPUT:
+ RETVAL
+
+
+
+void
+compose(arg)
+ SV * arg
+ PROTOTYPE: $
+ PREINIT:
+ SV *src, *dst, *tmp;
+ U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
+ UV uv, uvS, uvComp;
+ STRLEN srclen, dstlen, tmplen, dstcur, retlen;
+ bool beginning = TRUE;
+ PPCODE:
+ if(SvUTF8(arg)) {
+ src = arg;
+ } else {
+ src = sv_mortalcopy(arg);
+ sv_utf8_upgrade(src);
+ }
+ s = (U8*)SvPV(src, srclen);
+ e = s + srclen;
+ dstlen = srclen + 1; /* equal or shorter, XXX */
+ dst = sv_2mortal(newSV(dstlen));
+ (void)SvPOK_only(dst);
+ SvUTF8_on(dst);
+ d = (U8*)SvPVX(dst);
+
+ /* for uncomposed combining char */
+ tmp = sv_2mortal(newSV(dstlen));
+ (void)SvPOK_only(tmp);
+ SvUTF8_on(tmp);
+
+ for(p = s; p < e;){
+ if(beginning) {
+ uvS = utf8n_to_uvchr(p, e - p, &retlen, 0);
+ p += retlen;
+
+ if (getCombinClass(uvS)){ /* no Starter found yet */
+ d = uvuni_to_utf8(d, uvS);
+ continue;
+ }
+ beginning = FALSE;
+ }
+
+ /* Starter */
+ t = tmp_start = (U8*)SvPVX(tmp);
+ preCC = 0;
+
+ /* to the next Starter */
+ while(p < e) {
+ uv = utf8n_to_uvchr(p, e - p, &retlen, 0);
+ p += retlen;
+ curCC = getCombinClass(uv);
+
+ if(preCC && preCC == curCC) {
+ preCC = curCC;
+ t = uvuni_to_utf8(t, uv);
+ } else {
+ uvComp = getComposite(uvS, uv);
+
+ /* S + C + S => S-S + C would be also blocked. */
+ if( uvComp && ! getExclusion(uvComp) && preCC <= curCC)
+ {
+ /* preCC not changed to curCC */
+ uvS = uvComp;
+ } else if (! curCC && p < e) { /* blocked */
+ break;
+ } else {
+ preCC = curCC;
+ t = uvuni_to_utf8(t, uv);
+ }
+ }
+ }
+ d = uvuni_to_utf8(d, uvS); /* composed char */
+ if(tmplen = t - tmp_start) { /* uncomposed combining char */
+ t = (U8*)SvPVX(tmp);
+ while(tmplen--) *d++ = *t++;
+ }
+ uvS = uv;
+ } /* for */
+ dstcur = d - (U8*)SvPVX(dst);
+ SvCUR_set(dst, dstcur);
+ XPUSHs(dst);
+
+
+
+U8
+getCombinClass(uv)
+ UV uv
+
+bool
+getExclusion(uv)
+ UV uv
+
+UV
+getComposite(uv, uv2)
+ UV uv
+ UV uv2
+
+SV*
+getCanon(uv)
+ UV uv
+ PROTOTYPE: $
+ ALIAS:
+ getCompat = 1
+ PREINIT:
+ U8 * rstr;
+ CODE:
+ if(Hangul_IsS(uv)) {
+ SV * dst;
+ dst = newSV(1);
+ (void)SvPOK_only(dst);
+ sv_cat_decompHangul(dst, uv);
+ RETVAL = dst;
+ } else {
+ rstr = ix ? dec_compat(uv) : dec_canonical(uv);
+ if(!rstr) XSRETURN_UNDEF;
+ RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
+ }
+ SvUTF8_on(RETVAL);
+ OUTPUT:
+ RETVAL
+
diff --git a/lib/Unicode/Normalize/README b/ext/Unicode/Normalize/README
index e1f9e962eb..3f0c4240fe 100644
--- a/lib/Unicode/Normalize/README
+++ b/ext/Unicode/Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 0.04
+Unicode/Normalize version 0.10
===================================
Unicode::Normalize - normalized forms of Unicode text
@@ -23,6 +23,8 @@ SYNOPSIS
INSTALLATION
+Perl 5.006 or later
+
To install this module type the following:
perl Makefile.PL
@@ -30,19 +32,26 @@ To install this module type the following:
make test
make install
+If you have a C compiler and want to use the XS version,
+type the following:
+
+ perl Makefile.PL xs
+ make
+ make test
+ make install
+
DEPENDENCIES
This module requires these other modules and libraries:
Carp
Exporter
+File::Copy
File::Spec
-Lingua::KO::Hangul::Util
-$unidir/CombiningClass.pl
-$unidir/Decomposition.pl
-$unidir/CompExcl.txt
-
-# $unidir is $LIB/unicore or $LIB/unicode
+Lingua::KO::Hangul::Util 0.06
+unicore/CombiningClass.pl or unicode/CombiningClass.pl
+unicore/Decomposition.pl or unicode/Decomposition.pl
+unicore/CompExcl.txt or unicode/CompExcl.txt
COPYRIGHT AND LICENCE
diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader
new file mode 100644
index 0000000000..85d2b90e62
--- /dev/null
+++ b/ext/Unicode/Normalize/mkheader
@@ -0,0 +1,284 @@
+#!perl
+#
+# This script generates "unfcan.h", "unfcpt.h", "unfcmb.h",
+# "unfcmp.h", and "unfexc.h"
+# from CombiningClass.pl, Decomposition.pl, CompExcl.txt
+# in lib/unicore or unicode directory
+# for Unicode::Normalize.xs. (cf. Makefile.PL)
+#
+use 5.006;
+use strict;
+use warnings;
+use Carp;
+
+our $PACKAGE = 'Unicode::Normalize, mkheader';
+
+our $Combin = do "unicore/CombiningClass.pl"
+ || do "unicode/CombiningClass.pl"
+ || croak "$PACKAGE: CombiningClass.pl not found";
+
+our $Decomp = do "unicore/Decomposition.pl"
+ || do "unicode/Decomposition.pl"
+ || croak "$PACKAGE: Decomposition.pl not found";
+
+our %Combin; # $codepoint => $number : combination class
+our %Canon; # $codepoint => $hexstring : canonical decomp.
+our %Compat; # $codepoint => $hexstring : compat. decomp.
+our %Compos; # $string => $codepoint : composite
+
+our %Exclus; # $codepoint => 1 : composition exclusions
+
+{
+ my($f, $fh);
+ foreach my $d (@INC) {
+ use File::Spec;
+ $f = File::Spec->catfile($d, "unicore", "CompExcl.txt");
+ last if open($fh, $f);
+ $f = File::Spec->catfile($d, "unicode", "CompExcl.txt");
+ last if open($fh, $f);
+ $f = undef;
+ }
+ croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f;
+ while(<$fh>) {
+ next if /^#/ or /^$/;
+ s/#.*//;
+ $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/;
+ }
+ close $fh;
+}
+
+while($Combin =~ /(.+)/g) {
+ my @tab = split /\t/, $1;
+ my $ini = hex $tab[0];
+ if($tab[1] eq '') {
+ $Combin{ $ini } = $tab[2];
+ } else {
+ $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]);
+ }
+}
+
+while($Decomp =~ /(.+)/g) {
+ my @tab = split /\t/, $1;
+ my $compat = $tab[2] =~ s/<[^>]+>//;
+ my $dec = [ _getHexArray($tab[2]) ]; # decomposition
+ my $com = pack('U*', @$dec); # composable sequence
+ my $ini = hex($tab[0]);
+ if($tab[1] eq '') {
+ $Compat{ $ini } = $dec;
+ if(! $compat) {
+ $Canon{ $ini } = $dec;
+ $Compos{ $com } = $ini if @$dec > 1;
+ }
+ } else {
+ foreach my $u ($ini .. hex($tab[1])){
+ $Compat{ $u } = $dec;
+ if(! $compat){
+ $Canon{ $u } = $dec;
+ $Compos{ $com } = $ini if @$dec > 1;
+ }
+ }
+ }
+}
+
+# exhaustive decomposition
+foreach my $key (keys %Canon) {
+ $Canon{$key} = [ getCanonList($key) ];
+}
+
+# exhaustive decomposition
+foreach my $key (keys %Compat) {
+ $Compat{$key} = [ getCompatList($key) ];
+}
+
+sub getCanonList {
+ my @src = @_;
+ my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src;
+ join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec);
+ # condition @src == @dec is not ok.
+}
+
+sub getCompatList {
+ my @src = @_;
+ my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src;
+ join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec);
+ # condition @src == @dec is not ok.
+}
+
+sub _getHexArray {
+ my $str = shift;
+ map hex(), $str =~ /([0-9A-Fa-f]+)/g;
+}
+
+sub _U_stringify {
+ sprintf '"%s"', join '',
+ map sprintf("\\x%2x", $_), unpack 'C*', pack 'U*', @_;
+}
+
+foreach my $hash (\%Canon, \%Compat) {
+ foreach my $key (keys %$hash) {
+ $hash->{$key} = _U_stringify( @{ $hash->{$key} } );
+ }
+}
+
+sub utf8len {
+ my $uv = shift;
+ return $uv < 0x80 ? 1 :
+ $uv < 0x800 ? 2 :
+ $uv < 0x10000 ? 3 :
+ $uv < 0x110000 ? 4 :
+ croak "$PACKAGE: illegal char in the composite. utf-8 max is 0x10ffff.";
+}
+
+my $prefix = "UNF_";
+
+my $structname = "${prefix}complist";
+
+our (%Comp1st, %CompList);
+
+foreach(sort keys %Compos) {
+ my @a = unpack('U*', $_);
+ my $val = $Compos{$_};
+ my $name = sprintf "${structname}_%06x", $a[0];
+ $Comp1st{ $a[0] } = $name;
+ $CompList{ $name }{ $a[1] } = $val;
+
+ if( utf8len($a[0]) + utf8len($a[1]) < utf8len($val) ) {
+ croak "$PACKAGE: "
+ . "composable pair is longer than the composite in bytes!\n"
+ . sprintf("%d + %d => %d", $a[0], $a[1], $val);
+ }
+}
+
+my $compinit =
+ "typedef struct { UV nextchar; UV composite; } $structname;\n\n";
+
+foreach my $i (sort keys %CompList) {
+ $compinit .= "$structname $i [] = {\n";
+ $compinit .= join ",\n",
+ map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}),
+ sort {$a <=> $b } keys %{ $CompList{$i} };
+ $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel
+}
+
+####################################
+
+my @Exclus = sort {$a <=> $b} keys %Exclus;
+
+my $file = "unfexc.h";
+open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+binmode FH; select FH;
+
+print "bool getExclusion (UV uv) \n{\nreturn\n\t";
+
+while(@Exclus) {
+ my $cur = shift @Exclus;
+ if(@Exclus && $cur + 1 == $Exclus[0]) {
+ print "$cur <= uv && uv <= ";
+ while(@Exclus && $cur + 1 == $Exclus[0]) {
+ $cur = shift @Exclus;
+ }
+ print $cur;
+ print "\n\t|| " if @Exclus;
+ } else {
+ print "uv == $cur";
+ print "\n\t|| " if @Exclus;
+ }
+}
+
+print "\n\t? TRUE : FALSE;\n}\n\n";
+close FH;
+
+####################################
+
+my @tripletable = (
+ {
+ file => "unfcmb",
+ name => "combin",
+ type => "char",
+ hash => \%Combin,
+ null => 0,
+ },
+ {
+ file => "unfcan",
+ name => "canon",
+ type => "char*",
+ hash => \%Canon,
+ null => "NULL",
+ },
+ {
+ file => "unfcpt",
+ name => "compat",
+ type => "char*",
+ hash => \%Compat,
+ null => "NULL",
+ },
+ {
+ file => "unfcmp",
+ name => "compos",
+ type => "$structname *",
+ hash => \%Comp1st,
+ null => "NULL",
+ init => $compinit,
+ },
+);
+
+foreach my $tbl (@tripletable) {
+ my $file = "$tbl->{file}.h";
+ my $head = "${prefix}$tbl->{name}";
+ my $type = $tbl->{type};
+ my $hash = $tbl->{hash};
+ my $null = $tbl->{null};
+ my $init = $tbl->{init};
+
+ open FH, ">$file" or croak "$PACKAGE: $file can't be made";
+ binmode FH; select FH;
+ my %val;
+
+ print FH << 'EOF';
+/*
+ * This file is auto-generated by mkheader.
+ * Any changes here will be lost!
+ */
+EOF
+
+ print $init if defined $init;
+
+ foreach my $uv (keys %$hash) {
+ my @c = unpack 'CCCC', pack 'N', $uv;
+ $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
+ }
+
+ foreach my $p (sort { $a <=> $b } keys %val) {
+ next if ! $val{ $p };
+ for(my $r = 0; $r < 256; $r++){
+ next if ! $val{ $p }{ $r };
+ printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r;
+ for(my $c = 0; $c < 256; $c++){
+ print "\t", defined $val{$p}{$r}{$c} ? $val{$p}{$r}{$c} : $null;
+ print ',' if $c != 255;
+ print "\n" if $c % 8 == 7;
+ }
+ print "};\n\n";
+ }
+ }
+ foreach my $p (sort { $a <=> $b } keys %val) {
+ next if ! $val{ $p };
+ printf "$type* ${head}_%02x [256] = {\n", $p;
+ for(my $r = 0; $r < 256; $r++){
+ print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL";
+ print ',' if $r != 255;
+ print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
+ }
+ print "};\n\n";
+ }
+ print "$type** $head [] = {\n";
+ for(my $p = 0; $p <= 0x10; $p++){
+ print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
+ print ',' if $p != 0x10;
+ print "\n";
+ }
+ print "};\n\n";
+ close FH;
+}
+
+__END__
diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t
new file mode 100644
index 0000000000..8907634c47
--- /dev/null
+++ b/ext/Unicode/Normalize/t/func.t
@@ -0,0 +1,69 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.pl'
+
+#########################
+
+use Test;
+use strict;
+use warnings;
+BEGIN { plan tests => 6 };
+use Unicode::Normalize qw(:all);
+ok(1); # If we made it this far, we're ok.
+
+#########################
+
+print getCombinClass( 0) == 0
+ && getCombinClass( 768) == 230
+ && getCombinClass(1809) == 36
+# && getCombinClass(119143) == 1
+ ? "ok" : "not ok", " 2\n";
+
+print ! defined getCanon( 0)
+ && ! defined getCanon(41)
+ && getCanon(0x00C0) eq pack('U*', 0x0041, 0x0300)
+ && getCanon(0x00EF) eq pack('U*', 0x0069, 0x0308)
+ && getCanon(0x304C) eq pack('U*', 0x304B, 0x3099)
+ && getCanon(0x1EA4) eq pack('U*', 0x0041, 0x0302, 0x0301)
+ && getCanon(0x1FAF) eq pack('U*', 0x03A9, 0x0314, 0x0342, 0x0345)
+ && getCanon(0xAC00) eq pack('U*', 0x1100, 0x1161)
+ && getCanon(0xAE00) eq pack('U*', 0x1100, 0x1173, 0x11AF)
+ && ! defined getCanon(0x212C)
+ && ! defined getCanon(0x3243)
+ && getCanon(0xFA2D) eq pack('U*', 0x9DB4)
+ ? "ok" : "not ok", " 3\n";
+
+print ! defined getCompat( 0)
+ && ! defined getCompat(41)
+ && getCompat(0x00C0) eq pack('U*', 0x0041, 0x0300)
+ && getCompat(0x00EF) eq pack('U*', 0x0069, 0x0308)
+ && getCompat(0x304C) eq pack('U*', 0x304B, 0x3099)
+ && getCompat(0x1EA4) eq pack('U*', 0x0041, 0x0302, 0x0301)
+ && getCompat(0x1FAF) eq pack('U*', 0x03A9, 0x0314, 0x0342, 0x0345)
+ && getCompat(0x212C) eq pack('U*', 0x0042)
+ && getCompat(0x3243) eq pack('U*', 0x0028, 0x81F3, 0x0029)
+ && getCompat(0xAC00) eq pack('U*', 0x1100, 0x1161)
+ && getCompat(0xAE00) eq pack('U*', 0x1100, 0x1173, 0x11AF)
+ && getCompat(0xFA2D) eq pack('U*', 0x9DB4)
+ ? "ok" : "not ok", " 4\n";
+
+print ! getComposite( 0, 0)
+ && ! getComposite( 0, 41)
+ && ! getComposite(41, 0)
+ && ! getComposite(41, 41)
+ && ! getComposite(12, 0x0300)
+ && ! getComposite(0x0055, 0xFF00)
+ && 0x00D9 == getComposite(0x0055, 0x0300)
+ && 0x1E14 == getComposite(0x0112, 0x0300)
+ && 0xAC00 == getComposite(0x1100, 0x1161)
+ && 0xADF8 == getComposite(0x1100, 0x1173)
+ && ! getComposite(0x1100, 0x11AF)
+ && ! getComposite(0x1173, 0x11AF)
+ && 0xAE00 == getComposite(0xADF8, 0x11AF)
+ ? "ok" : "not ok", " 5\n";
+
+print ! getExclusion( 0)
+ && ! getExclusion(41)
+ && getExclusion(2392)
+ && getExclusion(3907)
+ && getExclusion(64334)
+ ? "ok" : "not ok", " 6\n";
diff --git a/lib/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t
index 88e4e7d441..1de2e7fcb8 100644
--- a/lib/Unicode/Normalize/t/norm.t
+++ b/ext/Unicode/Normalize/t/norm.t
@@ -6,7 +6,7 @@
use Test;
use strict;
use warnings;
-BEGIN { plan tests => 15 };
+BEGIN { plan tests => 18 };
use Unicode::Normalize qw(normalize);
ok(1); # If we made it this far, we're ok.
@@ -27,16 +27,17 @@ sub hexNFD {
ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
+ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
+ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
-
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
-
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
diff --git a/lib/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t
index 499f3aec8f..5544a3b13b 100644
--- a/lib/Unicode/Normalize/t/test.t
+++ b/ext/Unicode/Normalize/t/test.t
@@ -6,7 +6,7 @@
use Test;
use strict;
use warnings;
-BEGIN { plan tests => 15 };
+BEGIN { plan tests => 18 };
use Unicode::Normalize;
ok(1); # If we made it this far, we're ok.
@@ -27,16 +27,17 @@ sub hexNFD {
ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062");
+ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01");
+ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00");
+ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF");
ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062");
-
ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062");
ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062");
-
ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000");
ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000");
diff --git a/hints/uwin.sh b/hints/uwin.sh
index b8dd26cc32..e5a09a698f 100644
--- a/hints/uwin.sh
+++ b/hints/uwin.sh
@@ -24,7 +24,7 @@ i_utime=undef
# compile/link flags
ldflags=-g
optimize=-g
-static_ext="B Data/Dumper Digest/MD5 Errno Fcntl Filter::Util::Call IO IPC/SysV MIME::Base64 Opcode PerlIO::Scalar POSIX SDBM_File Socket Storable attrs re"
+static_ext="B Data/Dumper Digest/MD5 Errno Fcntl Filter::Util::Call IO IPC/SysV MIME::Base64 Opcode PerlIO::Scalar POSIX SDBM_File Socket Storable Unicode::Normalize attrs re"
#static_ext=none
# dynamic loading needs work
usedl=undef
diff --git a/hints/vmesa.sh b/hints/vmesa.sh
index 20502c1e6c..f0c0232e06 100644
--- a/hints/vmesa.sh
+++ b/hints/vmesa.sh
@@ -218,7 +218,6 @@ dynamic_ext=''
eagain='EAGAIN'
ebcdic='define'
exe_ext=''
-extensions='Data/Dumper Digest/MD5 Errno Fcntl Filter/Util/Call GDBM_File IO IPC/SysV List/Util MIME/Base64 NDBM_File Opcode PerlIO/Scalar POSIX Socket Storable Time/HiRes Thread attrs re'
fpostype='fpos_t'
freetype='void'
groupstype='gid_t'
diff --git a/lib/Lingua/KO/Hangul/Util.pm b/lib/Lingua/KO/Hangul/Util.pm
deleted file mode 100644
index 3848592903..0000000000
--- a/lib/Lingua/KO/Hangul/Util.pm
+++ /dev/null
@@ -1,278 +0,0 @@
-package Lingua::KO::Hangul::Util;
-
-use 5.006;
-use strict;
-use warnings;
-
-require Exporter;
-
-our @ISA = qw(Exporter);
-our %EXPORT_TAGS = ();
-our @EXPORT_OK = ();
-our @EXPORT = qw(
- decomposeHangul
- composeHangul
- getHangulName
- parseHangulName
-);
-our $VERSION = '0.02';
-
-our @JamoL = ( # Initial (HANGUL CHOSEONG)
- "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
- "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H",
- );
-
-our @JamoV = ( # Medial (HANGUL JUNGSEONG)
- "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
- "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
- "YU", "EU", "YI", "I",
- );
-
-our @JamoT = ( # Final (HANGUL JONGSEONG)
- "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
- "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
- "S", "SS", "NG", "J", "C", "K", "T", "P", "H",
- );
-
-our $BlockName = "HANGUL SYLLABLE ";
-
-use constant SBase => 0xAC00;
-use constant LBase => 0x1100;
-use constant VBase => 0x1161;
-use constant TBase => 0x11A7;
-use constant LCount => 19; # scalar @JamoL
-use constant VCount => 21; # scalar @JamoV
-use constant TCount => 28; # scalar @JamoT
-use constant NCount => 588; # VCount * TCount
-use constant SCount => 11172; # LCount * NCount
-use constant SFinal => 0xD7A3; # SBase -1 + SCount
-
-our(%CodeL, %CodeV, %CodeT);
-@CodeL{@JamoL} = 0 .. LCount-1;
-@CodeV{@JamoV} = 0 .. VCount-1;
-@CodeT{@JamoT} = 0 .. TCount-1;
-
-sub getHangulName {
- my $code = shift;
- return undef unless SBase <= $code && $code <= SFinal;
- my $SIndex = $code - SBase;
- my $LIndex = int( $SIndex / NCount);
- my $VIndex = int(($SIndex % NCount) / TCount);
- my $TIndex = $SIndex % TCount;
- "$BlockName$JamoL[$LIndex]$JamoV[$VIndex]$JamoT[$TIndex]";
-}
-
-sub parseHangulName {
- my $arg = shift;
- return undef unless $arg =~ s/$BlockName//o;
- return undef unless $arg =~ /^([^AEIOUWY]*)([AEIOUWY]+)([^AEIOUWY]*)$/;
- return undef unless exists $CodeL{$1}
- && exists $CodeV{$2}
- && exists $CodeT{$3};
- SBase + $CodeL{$1} * NCount + $CodeV{$2} * TCount + $CodeT{$3};
-}
-
-sub decomposeHangul {
- my $code = shift;
- return unless SBase <= $code && $code <= SFinal;
- my $SIndex = $code - SBase;
- my $LIndex = int( $SIndex / NCount);
- my $VIndex = int(($SIndex % NCount) / TCount);
- my $TIndex = $SIndex % TCount;
- my @ret = (
- LBase + $LIndex,
- VBase + $VIndex,
- $TIndex ? (TBase + $TIndex) : (),
- );
- wantarray ? @ret : pack('U*', @ret);
-}
-
-#
-# To Do:
-# s/(\p{JamoL}\p{JamoV})/toHangLV($1)/ge;
-# s/(\p{HangLV}\p{JamoT})/toHangLVT($1)/ge;
-#
-sub composeHangul {
- my $str = shift;
- return $str unless length $str;
- my(@ret);
-
- foreach my $ch (unpack('U*', $str)) # Makes list! The string be short!
- {
- push(@ret, $ch) and next unless @ret;
-
- # 1. check to see if $ret[-1] is L and $ch is V.
- my $LIndex = $ret[-1] - LBase;
- if(0 <= $LIndex && $LIndex < LCount)
- {
- my $VIndex = $ch - VBase;
- if(0 <= $VIndex && $VIndex < VCount)
- {
- $ret[-1] = SBase + ($LIndex * VCount + $VIndex) * TCount;
- next; # discard $ch
- }
- }
-
- # 2. check to see if $ret[-1] is LV and $ch is T.
- my $SIndex = $ret[-1] - SBase;
- if(0 <= $SIndex && $SIndex < SCount && $SIndex % TCount == 0)
- {
- my $TIndex = $ch - TBase;
- if(0 <= $TIndex && $TIndex < TCount)
- {
- $ret[-1] += $TIndex;
- next; # discard $ch
- }
- }
-
- # 3. just append $ch
- push(@ret, $ch);
- }
- wantarray ? @ret : pack('U*', @ret);
-}
-
-1;
-__END__
-
-=head1 NAME
-
-Lingua::KO::Hangul::Util - utility functions for Hangul Syllables
-
-=head1 SYNOPSIS
-
- use Lingua::KO::Hangul::Util;
-
- decomposeHangul(0xAC00);
- # (0x1100,0x1161) or "\x{1100}\x{1161}"
-
- composeHangul("\x{1100}\x{1161}");
- # "\x{AC00}"
-
- getHangulName(0xAC00);
- # "HANGUL SYLLABLE GA"
-
- parseHangulName("HANGUL SYLLABLE GA");
- # 0xAC00
-
-=head1 DESCRIPTION
-
-A Hangul syllable consists of Hangul Jamo.
-
-Hangul Jamo are classified into three classes:
-
- CHOSEONG (the initial sound) as a leading consonant (L),
- JUNGSEONG (the medial sound) as a vowel (V),
- JONGSEONG (the final sound) as a trailing consonant (T).
-
-Any Hangul syllable is a composition of
-
- i) CHOSEONG + JUNGSEONG (L + V)
-
- or
-
- ii) CHOSEONG + JUNGSEONG + JONGSEONG (L + V + T).
-
-Names of Hangul Syllables have a format of C<"HANGUL SYLLABLE %s">.
-
-=head2 Composition and Decomposition
-
-=over 4
-
-=item C<$string_decomposed = decomposeHangul($codepoint)>
-
-=item C<@codepoints = decomposeHangul($codepoint)>
-
-Accepts unicode codepoint integer.
-
-If the specified codepoint is of a Hangul syllable,
-returns a list of codepoints (in a list context)
-or a UTF-8 string (in a scalar context)
-of its decomposition.
-
- decomposeHangul(0xAC00) # U+AC00 is HANGUL SYLLABLE GA.
- returns "\x{1100}\x{1161}" or (0x1100, 0x1161);
-
- decomposeHangul(0xAE00) # U+AE00 is HANGUL SYLLABLE GEUL.
- returns "\x{1100}\x{1173}\x{11AF}" or (0x1100, 0x1173, 0x11AF);
-
-Otherwise, returns false (empty string or empty list).
-
- decomposeHangul(0x0041) # outside Hangul Syllables
- returns empty string or empty list.
-
-=item C<$string_composed = composeHangul($src_string)>
-
-=item C<@codepoints_composed = composeHangul($src_string)>
-
-Any sequence of an initial Jamo C<L> and a medial Jamo C<V>
-is composed into a syllable C<LV>;
-then any sequence of a syllable C<LV> and a final Jamo C<T>
-is composed into a syllable C<LVT>.
-
-Any characters other than Hangul Jamo and Hangul Syllables
-are unaffected.
-
- composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}.")
- returns "Hangul \x{AC00}\x{AE00}." or
- (0x48,0x61,0x6E,0x67,0x75,0x6C,0x20,0xAC00,0xAE00,0x2E);
-
-=back
-
-=head2 Hangul Syllable Name
-
-=over 4
-
-=item C<$name = getHangulName($codepoint)>
-
-If the specified codepoint is of a Hangul syllable,
-returns its name; otherwise returns undef.
-
- getHangulName(0xAC00) returns "HANGUL SYLLABLE GA";
- getHangulName(0x0041) returns undef.
-
-=item C<$codepoint = parseHangulName($name)>
-
-If the specified name is of a Hangul syllable,
-returns its codepoint; otherwise returns undef.
-
- parseHangulName("HANGUL SYLLABLE GEUL") returns 0xAE00;
-
- parseHangulName("LATIN SMALL LETTER A") returns undef;
-
- parseHangulName("HANGUL SYLLABLE PERL") returns undef;
- # Regrettably, HANGUL SYLLABLE PERL does not exist :-)
-
-=back
-
-=head2 EXPORT
-
-By default,
-
- decomposeHangul
- composeHangul
- getHangulName
- parseHangulName
-
-=head1 AUTHOR
-
-SADAHIRO Tomoyuki
-
- bqw10602@nifty.com
- http://homepage1.nifty.com/nomenclator/perl/
-
- Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
-
- This program is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
-
-=head1 SEE ALSO
-
-=over 4
-
-=item http://www.unicode.org/unicode/reports/tr15
-
-Annex 10: Hangul, in Unicode Normalization Forms (UAX #15).
-
-=back
-
-=cut
diff --git a/lib/Lingua/KO/Hangul/Util/Changes b/lib/Lingua/KO/Hangul/Util/Changes
deleted file mode 100644
index 2e43817169..0000000000
--- a/lib/Lingua/KO/Hangul/Util/Changes
+++ /dev/null
@@ -1,11 +0,0 @@
-Revision history for Perl extension Lingua::KO::Hangul::Util.
-
-0.02 Sat Aug 11 00:16:02 2001
- - fix SEE ALSO (the Unicode Normalization Forms is UAX #15)
- - getHangulName and parseHangulName return
- a list (undef) of one element in list context.
-
-0.01 Fri Aug 3 21:25:11 2001
- - original version; created by h2xs 1.21 with options
- -A -X -n Lingua::KO::Hangul::Util
-
diff --git a/lib/Lingua/KO/Hangul/Util/README b/lib/Lingua/KO/Hangul/Util/README
deleted file mode 100644
index 9fc04d81cc..0000000000
--- a/lib/Lingua/KO/Hangul/Util/README
+++ /dev/null
@@ -1,44 +0,0 @@
-Lingua/KO/Hangul/Util version 0.02
-==================================
-
-SYNOPSIS
-
- use Lingua::KO::Hangul::Util;
-
- decomposeHangul(0xAC00);
- # (0x1100,0x1161) or "\x{1100}\x{1161}"
-
- composeHangul("\x{1100}\x{1161}");
- # "\x{AC00}"
-
- getHangulName(0xAC00);
- # "HANGUL SYLLABLE GA"
-
- parseHangulName("HANGUL SYLLABLE GA");
- # 0xAC00
-
-INSTALLATION
-
-To install this module type the following:
-
- perl Makefile.PL
- make
- make test
- make install
-
-DEPENDENCIES
-
-Perl 5.006 or later
-
-COPYRIGHT AND LICENCE
-
-SADAHIRO Tomoyuki
-
- bqw10602@nifty.com
-
- http://homepage1.nifty.com/nomenclator/perl/
-
- Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved.
-
- This program is free software; you can redistribute it and/or
- modify it under the same terms as Perl itself.
diff --git a/lib/Lingua/KO/Hangul/Util/t/test.t b/lib/Lingua/KO/Hangul/Util/t/test.t
deleted file mode 100644
index d4a5df5bb8..0000000000
--- a/lib/Lingua/KO/Hangul/Util/t/test.t
+++ /dev/null
@@ -1,55 +0,0 @@
-# Before `make install' is performed this script should be runnable with
-# `make test'. After `make install' it should work as `perl test.pl'
-
-#########################
-
-use Test;
-use strict;
-BEGIN { plan tests => 22 };
-use Lingua::KO::Hangul::Util;
-ok(1); # If we made it this far, we're ok.
-
-#########################
-
-sub unpk {
- join ':', map sprintf("%04X", $_),
- @_ == 1 ? unpack('U*', shift) : @_;
-}
-
-ok(getHangulName(0xAC00), "HANGUL SYLLABLE GA");
-ok(getHangulName(0xAE00), "HANGUL SYLLABLE GEUL");
-ok(getHangulName(0xC544), "HANGUL SYLLABLE A");
-ok(getHangulName(0xD7A3), "HANGUL SYLLABLE HIH");
-ok(getHangulName(0x11A3), undef);
-ok(getHangulName(0x0000), undef);
-
-ok(unpk(decomposeHangul(0xAC00)), "1100:1161");
-ok(unpk(decomposeHangul(0xAE00)), "1100:1173:11AF");
-ok(unpk(scalar decomposeHangul(0xAC00)), "1100:1161");
-ok(unpk(scalar decomposeHangul(0xAE00)), "1100:1173:11AF");
-ok(scalar decomposeHangul(0x0041), undef);
-ok(scalar decomposeHangul(0x0000), undef);
-
-ok(composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}."),
- "Hangul \x{AC00}\x{AE00}.");
-
-ok(parseHangulName("HANGUL SYLLABLE GA"), 0xAC00);
-ok(parseHangulName("HANGUL SYLLABLE GEUL"), 0xAE00);
-ok(parseHangulName("HANGUL SYLLABLE A"), 0xC544);
-ok(parseHangulName("HANGUL SYLLABLE HIH"), 0xD7A3);
-ok(parseHangulName("HANGUL SYLLABLE PERL"), undef);
-ok(parseHangulName("LATIN LETTER SMALL A"), undef);
-
-my $ng;
-
-$ng = 0;
-foreach my $i (0xAC00..0xD7A3){
- $ng ++ if $i != parseHangulName(getHangulName($i));
-}
-ok($ng, 0);
-
-$ng = 0;
-foreach my $i (0xAC00..0xD7A3){
- $ng ++ if $i != (composeHangul scalar decomposeHangul($i))[0];
-}
-ok($ng, 0);
diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm
index 113613e18f..2ffda37faa 100644
--- a/lib/Unicode/Collate.pm
+++ b/lib/Unicode/Collate.pm
@@ -4,7 +4,6 @@ use 5.006;
use strict;
use warnings;
use Carp;
-use Lingua::KO::Hangul::Util;
require Exporter;
our $VERSION = '0.08';
@@ -19,6 +18,15 @@ our @EXPORT = ();
(our $Path = $INC{'Unicode/Collate.pm'}) =~ s/\.pm$//;
our $KeyFile = "allkeys.txt";
+# Lingua::KO::Hangul::Util not part of the standard distribution
+# but it will be used if available.
+
+eval { require Lingua::KO::Hangul::Util };
+my $hasHangulUtil = ! $@;
+if ($hasHangulUtil) {
+ Lingua::KO::Hangul::Util->import();
+}
+
our %Combin; # combining class from Unicode::Normalize
use constant Min2 => 0x20; # minimum weight at level 2
@@ -256,7 +264,10 @@ sub getWt
_isHangul($u)
? $hang
? &$hang($u)
- : map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u))
+ : ($hasHangulUtil ?
+ map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) :
+ # runtime compile error...
+ (eval 'use Lingua::KO::Hangul::Util', print $@))
: _isCJK($u)
? $cjk ? &$cjk($u) : map($self->altCE(0,@$_), _CJK($u))
: map($self->altCE(0,@$_), _derivCE($u));
diff --git a/lib/Unicode/Normalize/Changes b/lib/Unicode/Normalize/Changes
deleted file mode 100644
index 910016cb23..0000000000
--- a/lib/Unicode/Normalize/Changes
+++ /dev/null
@@ -1,16 +0,0 @@
-Revision history for Perl extension Unicode::Normalize.
-
-0.04 Wed Aug 15 19:02:41 2001
- - fix: NFD("") and NFKD("") must return "", not but undef.
-
-0.03 Fri Aug 10 22:44:18 2001
- - rename the module name to Unicode::Normalize.
- - normalize takes two arguments.
-
-0.02 Thu Aug 9 22:56:36 2001
- - add function normalize
-
-0.01 Mon Aug 6 21:45:11 2001
- - original version; created by h2xs 1.21 with options
- -A -X -n Text::Unicode::Normalize
-
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 2cc0ece98e..0aaccd0c23 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -135,14 +135,26 @@ sub _getcode {
return;
}
-use Lingua::KO::Hangul::Util;
+# Lingua::KO::Hangul::Util not part of the standard distribution
+# but it will be used if available.
+
+eval { require Lingua::KO::Hangul::Util };
+my $hasHangulUtil = ! $@;
+if ($hasHangulUtil) {
+ Lingua::KO::Hangul::Util->import();
+}
sub hangul_decomp { # internal: called from charinfo
- my @tmp = decomposeHangul(shift);
- return
- @tmp == 2 ? sprintf("%04X %04X", @tmp) :
- @tmp == 3 ? sprintf("%04X %04X %04X", @tmp) :
- undef;
+ if ($hasHangulUtil) {
+ my @tmp = decomposeHangul(shift);
+ return sprintf("%04X %04X", @tmp) if @tmp == 2;
+ return sprintf("%04X %04X %04X", @tmp) if @tmp == 3;
+ }
+ return;
+}
+
+sub hangul_charname { # internal: called from charinfo
+ return sprintf("HANGUL SYLLABLE-%04X", shift);
}
sub han_charname { # internal: called from charinfo
@@ -157,7 +169,7 @@ my @CharinfoRanges = (
# CJK Ideographs
[ 0x4E00, 0x9FA5, \&han_charname, undef ],
# Hangul Syllables
- [ 0xAC00, 0xD7A3, \&getHangulName, \&hangul_decomp ],
+ [ 0xAC00, 0xD7A3, $hasHangulUtil ? \&getHangulName : \&hangul_charname, \&hangul_decomp ],
# Non-Private Use High Surrogates
[ 0xD800, 0xDB7F, undef, undef ],
# Private Use High Surrogates
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index 0434eb92d4..e70e104874 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -106,11 +106,11 @@ ok($charinfo->{script}, 'Hebrew');
$charinfo = charinfo(0xAC00);
ok($charinfo->{code}, 'AC00');
-ok($charinfo->{name}, 'HANGUL SYLLABLE GA');
+ok($charinfo->{name}, 'HANGUL SYLLABLE-AC00');
ok($charinfo->{category}, 'Lo');
ok($charinfo->{combining}, '0');
ok($charinfo->{bidi}, 'L');
-ok($charinfo->{decomposition}, '1100 1161');
+ok($charinfo->{decomposition}, undef);
ok($charinfo->{decimal}, '');
ok($charinfo->{digit}, '');
ok($charinfo->{numeric}, '');
@@ -128,11 +128,11 @@ ok($charinfo->{script}, 'Hangul');
$charinfo = charinfo(0xAE00);
ok($charinfo->{code}, 'AE00');
-ok($charinfo->{name}, 'HANGUL SYLLABLE GEUL');
+ok($charinfo->{name}, 'HANGUL SYLLABLE-AE00');
ok($charinfo->{category}, 'Lo');
ok($charinfo->{combining}, '0');
ok($charinfo->{bidi}, 'L');
-ok($charinfo->{decomposition}, '1100 1173 11AF');
+ok($charinfo->{decomposition}, undef);
ok($charinfo->{decimal}, '');
ok($charinfo->{digit}, '');
ok($charinfo->{numeric}, '');
diff --git a/win32/Makefile b/win32/Makefile
index 5ae84c7d0f..c43b8c8992 100644
--- a/win32/Makefile
+++ b/win32/Makefile
@@ -592,63 +592,65 @@ PERLDLL_OBJ = $(PERLDLL_OBJ) $(WIN32_OBJ) $(DLL_OBJ)
SETARGV_OBJ = setargv$(o)
!ENDIF
-DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader
-SOCKET = $(EXTDIR)\Socket\Socket
-FCNTL = $(EXTDIR)\Fcntl\Fcntl
-OPCODE = $(EXTDIR)\Opcode\Opcode
-SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File
-IO = $(EXTDIR)\IO\IO
-POSIX = $(EXTDIR)\POSIX\POSIX
-ATTRS = $(EXTDIR)\attrs\attrs
-THREAD = $(EXTDIR)\Thread\Thread
-B = $(EXTDIR)\B\B
-RE = $(EXTDIR)\re\re
-DUMPER = $(EXTDIR)\Data\Dumper\Dumper
-ERRNO = $(EXTDIR)\Errno\Errno
-PEEK = $(EXTDIR)\Devel\Peek\Peek
-BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader
-DPROF = $(EXTDIR)\Devel\DProf\DProf
-GLOB = $(EXTDIR)\File\Glob\Glob
-HOSTNAME = $(EXTDIR)\Sys\Hostname\Hostname
-STORABLE = $(EXTDIR)\Storable\Storable
-FILTER = $(EXTDIR)\Filter\Util\Call\Call
-ENCODE = $(EXTDIR)\Encode\Encode
-MD5 = $(EXTDIR)\Digest\MD5\MD5
-PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar
-MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64
-TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes
-CWD = $(EXTDIR)\Cwd\Cwd
-LISTUTIL = $(EXTDIR)\List\Util\Util
-PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via
-XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap
-
-SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll
-FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll
-OPCODE_DLL = $(AUTODIR)\Opcode\Opcode.dll
-SDBM_FILE_DLL = $(AUTODIR)\SDBM_File\SDBM_File.dll
-IO_DLL = $(AUTODIR)\IO\IO.dll
-POSIX_DLL = $(AUTODIR)\POSIX\POSIX.dll
-ATTRS_DLL = $(AUTODIR)\attrs\attrs.dll
-THREAD_DLL = $(AUTODIR)\Thread\Thread.dll
-B_DLL = $(AUTODIR)\B\B.dll
-DUMPER_DLL = $(AUTODIR)\Data\Dumper\Dumper.dll
-PEEK_DLL = $(AUTODIR)\Devel\Peek\Peek.dll
-RE_DLL = $(AUTODIR)\re\re.dll
-BYTELOADER_DLL = $(AUTODIR)\ByteLoader\ByteLoader.dll
-DPROF_DLL = $(AUTODIR)\Devel\DProf\DProf.dll
-GLOB_DLL = $(AUTODIR)\File\Glob\Glob.dll
-HOSTNAME_DLL = $(AUTODIR)\Sys\Hostname\Hostname.dll
-STORABLE_DLL = $(AUTODIR)\Storable\Storable.dll
-FILTER_DLL = $(AUTODIR)\Filter\Util\Call\Call.dll
-ENCODE_DLL = $(AUTODIR)\Encode\Encode.dll
-MD5_DLL = $(AUTODIR)\Digest\MD5\MD5.dll
-PERLIOSCALAR_DLL= $(AUTODIR)\PerlIO\Scalar\Scalar.dll
-MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll
-TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll
-CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll
-LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll
-PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll
-XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll
+DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader
+SOCKET = $(EXTDIR)\Socket\Socket
+FCNTL = $(EXTDIR)\Fcntl\Fcntl
+OPCODE = $(EXTDIR)\Opcode\Opcode
+SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File
+IO = $(EXTDIR)\IO\IO
+POSIX = $(EXTDIR)\POSIX\POSIX
+ATTRS = $(EXTDIR)\attrs\attrs
+THREAD = $(EXTDIR)\Thread\Thread
+B = $(EXTDIR)\B\B
+RE = $(EXTDIR)\re\re
+DUMPER = $(EXTDIR)\Data\Dumper\Dumper
+ERRNO = $(EXTDIR)\Errno\Errno
+PEEK = $(EXTDIR)\Devel\Peek\Peek
+BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader
+DPROF = $(EXTDIR)\Devel\DProf\DProf
+GLOB = $(EXTDIR)\File\Glob\Glob
+HOSTNAME = $(EXTDIR)\Sys\Hostname\Hostname
+STORABLE = $(EXTDIR)\Storable\Storable
+FILTER = $(EXTDIR)\Filter\Util\Call\Call
+ENCODE = $(EXTDIR)\Encode\Encode
+MD5 = $(EXTDIR)\Digest\MD5\MD5
+PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar
+MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64
+TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes
+CWD = $(EXTDIR)\Cwd\Cwd
+LISTUTIL = $(EXTDIR)\List\Util\Util
+PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via
+XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap
+UNICODENORMALIZE = $(EXTDIR)\Unicode\Normalize\Normalize
+
+SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll
+FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll
+OPCODE_DLL = $(AUTODIR)\Opcode\Opcode.dll
+SDBM_FILE_DLL = $(AUTODIR)\SDBM_File\SDBM_File.dll
+IO_DLL = $(AUTODIR)\IO\IO.dll
+POSIX_DLL = $(AUTODIR)\POSIX\POSIX.dll
+ATTRS_DLL = $(AUTODIR)\attrs\attrs.dll
+THREAD_DLL = $(AUTODIR)\Thread\Thread.dll
+B_DLL = $(AUTODIR)\B\B.dll
+DUMPER_DLL = $(AUTODIR)\Data\Dumper\Dumper.dll
+PEEK_DLL = $(AUTODIR)\Devel\Peek\Peek.dll
+RE_DLL = $(AUTODIR)\re\re.dll
+BYTELOADER_DLL = $(AUTODIR)\ByteLoader\ByteLoader.dll
+DPROF_DLL = $(AUTODIR)\Devel\DProf\DProf.dll
+GLOB_DLL = $(AUTODIR)\File\Glob\Glob.dll
+HOSTNAME_DLL = $(AUTODIR)\Sys\Hostname\Hostname.dll
+STORABLE_DLL = $(AUTODIR)\Storable\Storable.dll
+FILTER_DLL = $(AUTODIR)\Filter\Util\Call\Call.dll
+ENCODE_DLL = $(AUTODIR)\Encode\Encode.dll
+MD5_DLL = $(AUTODIR)\Digest\MD5\MD5.dll
+PERLIOSCALAR_DLL = $(AUTODIR)\PerlIO\Scalar\Scalar.dll
+MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll
+TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll
+CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll
+LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll
+PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll
+XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll
+UNICODENORMALIZE_DLL = $(AUTODIR)\Unicode\Normalize\Normalize.dll
EXTENSION_C = \
$(SOCKET).c \
@@ -677,7 +679,8 @@ EXTENSION_C = \
$(CWD).c \
$(LISTUTIL).c \
$(PERLIOVIA).c \
- $(XSTYPEMAP).c
+ $(XSTYPEMAP).c \
+ $(UNICODENORMALIZE).c
EXTENSION_DLL = \
$(SOCKET_DLL) \
@@ -706,7 +709,8 @@ EXTENSION_DLL = \
$(CWD_DLL) \
$(LISTUTIL_DLL) \
$(PERLIOVIA_DLL) \
- $(XSTYPEMAP_DLL)
+ $(XSTYPEMAP_DLL) \
+ $(UNICODENORMALIZE_DLL)
POD2HTML = $(PODDIR)\pod2html
POD2MAN = $(PODDIR)\pod2man
@@ -968,6 +972,7 @@ distclean: clean
-del /f $(LIBDIR)\Scalar\Util.pm
-del /f $(LIBDIR)\Time\HiRes.pm
-del /f $(LIBDIR)\XS\Typemap.pm
+ -del /f $(LIBDIR)\Unicode\Normalize.pm
-if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO
-rmdir /s $(LIBDIR)\IO
-if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread
diff --git a/win32/makefile.mk b/win32/makefile.mk
index bff42335d6..fd2b5ffde3 100644
--- a/win32/makefile.mk
+++ b/win32/makefile.mk
@@ -748,7 +748,8 @@ SETARGV_OBJ = setargv$(o)
DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \
Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \
Sys/Hostname Storable Filter/Util/Call Encode \
- Digest/MD5 PerlIO/Scalar MIME/Base64 Time/HiRes
+ Digest/MD5 PerlIO/Scalar MIME/Base64 Time/HiRes \
+ Unicode/Normalize
STATIC_EXT = DynaLoader
NONXS_EXT = Errno
@@ -1123,6 +1124,7 @@ distclean: clean
-del /f $(LIBDIR)\Time\HiRes.pm
-del /f $(LIBDIR)\List\Util.pm
-del /f $(LIBDIR)\Scalar\Util.pm
+ -del /f $(LIBDIR)\Unicode\Normalize.pm
-if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO || rmdir /s $(LIBDIR)\IO
-if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread || rmdir /s $(LIBDIR)\Thread
-if exist $(LIBDIR)\B rmdir /s /q $(LIBDIR)\B || rmdir /s $(LIBDIR)\B