diff options
26 files changed, 1109 insertions, 541 deletions
@@ -596,6 +596,16 @@ ext/Time/HiRes/HiRes.pm Time::HiRes extension ext/Time/HiRes/HiRes.t Test for Time::HiRes ext/Time/HiRes/HiRes.xs Time::HiRes extension ext/Time/HiRes/Makefile.PL Time::HiRes extension +ext/Unicode/Normalize/Changes Unicode::Normalize +ext/Unicode/Normalize/Makefile.PL Unicode::Normalize +ext/Unicode/Normalize/mkheader Unicode::Normalize +ext/Unicode/Normalize/Normalize.pm Unicode::Normalize +ext/Unicode/Normalize/Normalize.pod Unicode::Normalize +ext/Unicode/Normalize/Normalize.xs Unicode::Normalize +ext/Unicode/Normalize/README Unicode::Normalize +ext/Unicode/Normalize/t/func.t Unicode::Normalize +ext/Unicode/Normalize/t/norm.t Unicode::Normalize +ext/Unicode/Normalize/t/test.t Unicode::Normalize ext/util/make_ext Used by Makefile to execute extension Makefiles ext/XS/Typemap/Makefile.PL XS::Typemap extension ext/XS/Typemap/README XS::Typemap extension @@ -1005,10 +1015,6 @@ lib/IPC/SysV.t See if IPC::SysV works lib/less.pm For "use less" lib/less.t See if less support works lib/lib_pm.PL For "use lib", produces lib/lib.pm -lib/Lingua/KO/Hangul/Util.pm Lingua::KO::Hangul::Util -lib/Lingua/KO/Hangul/Util/Changes Lingua::KO::Hangul::Util -lib/Lingua/KO/Hangul/Util/README Lingua::KO::Hangul::Util -lib/Lingua/KO/Hangul/Util/t/test.t Lingua::KO::Hangul::Util lib/locale.pm For "use locale" lib/locale.t See if locale support works lib/Locale/Codes/t/all.t See if Locale::Codes work @@ -1277,11 +1283,6 @@ lib/Unicode/Collate/Changes Unicode::Collate lib/Unicode/Collate/keys.txt Unicode::Collate lib/Unicode/Collate/README Unicode::Collate lib/Unicode/Collate/t/test.t Unicode::Collate -lib/Unicode/Normalize.pm Unicode::Normalize -lib/Unicode/Normalize/Changes Unicode::Normalize -lib/Unicode/Normalize/README Unicode::Normalize -lib/Unicode/Normalize/t/norm.t Unicode::Normalize -lib/Unicode/Normalize/t/test.t Unicode::Normalize lib/Unicode/README Explanation what happened to lib/unicode. lib/Unicode/UCD.pm Unicode character database lib/Unicode/UCD.t See if Unicode character database works diff --git a/NetWare/Makefile b/NetWare/Makefile index ec06f7c395..ff879e9b93 100644 --- a/NetWare/Makefile +++ b/NetWare/Makefile @@ -258,26 +258,27 @@ NW_CFG_VARS = \ NW_CFGSH_TMPL = config.wc NW_CFGH_TMPL = config_H.wc -SOCKET_NLP = $(AUTODIR)\Socket\Socket.nlp -FCNTL_NLP = $(AUTODIR)\Fcntl\Fcntl.nlp -IO_NLP = $(AUTODIR)\IO\IO.nlp -OPCODE_NLP = $(AUTODIR)\Opcode\Opcode.nlp -SDBM_FILE_NLP = $(AUTODIR)\SDBM_File\SDBM_File.nlp -POSIX_NLP = $(AUTODIR)\POSIX\POSIX.nlp -ATTRS_NLP = $(AUTODIR)\attrs\attrs.nlp -THREAD_NLP = $(AUTODIR)\Thread\Thread.nlp -B_NLP = $(AUTODIR)\B\B.nlp -DUMPER_NLP = $(AUTODIR)\Data\Dumper\Dumper.nlp -PEEK_NLP = $(AUTODIR)\Devel\Peek\Peek.nlp -RE_NLP = $(AUTODIR)\re\re.nlp -BYTELOADER_NLP = $(AUTODIR)\ByteLoader\ByteLoader.nlp -DPROF_NLP = $(AUTODIR)\Devel\DProf\DProf.nlp -GLOB_NLP = $(AUTODIR)\File\Glob\Glob.nlp -CWD_NLP = $(AUTODIR)\Cwd\Cwd.nlp -STORABLE_NLP = $(AUTODIR)\Storable\Storable.nlp -LISTUTIL_NLP = $(AUTODIR)\List\Util\Util.nlp -MIMEBASE64_NLP = $(AUTODIR)\MIME\Base64\Base64.nlp -XSTYPEMAP_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp +SOCKET_NLP = $(AUTODIR)\Socket\Socket.nlp +FCNTL_NLP = $(AUTODIR)\Fcntl\Fcntl.nlp +IO_NLP = $(AUTODIR)\IO\IO.nlp +OPCODE_NLP = $(AUTODIR)\Opcode\Opcode.nlp +SDBM_FILE_NLP = $(AUTODIR)\SDBM_File\SDBM_File.nlp +POSIX_NLP = $(AUTODIR)\POSIX\POSIX.nlp +ATTRS_NLP = $(AUTODIR)\attrs\attrs.nlp +THREAD_NLP = $(AUTODIR)\Thread\Thread.nlp +B_NLP = $(AUTODIR)\B\B.nlp +DUMPER_NLP = $(AUTODIR)\Data\Dumper\Dumper.nlp +PEEK_NLP = $(AUTODIR)\Devel\Peek\Peek.nlp +RE_NLP = $(AUTODIR)\re\re.nlp +BYTELOADER_NLP = $(AUTODIR)\ByteLoader\ByteLoader.nlp +DPROF_NLP = $(AUTODIR)\Devel\DProf\DProf.nlp +GLOB_NLP = $(AUTODIR)\File\Glob\Glob.nlp +CWD_NLP = $(AUTODIR)\Cwd\Cwd.nlp +STORABLE_NLP = $(AUTODIR)\Storable\Storable.nlp +LISTUTIL_NLP = $(AUTODIR)\List\Util\Util.nlp +MIMEBASE64_NLP = $(AUTODIR)\MIME\Base64\Base64.nlp +XSTYPEMAP_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp +UNICODENORMALIZE_NLP = $(AUTODIR)\XS\Typemap\Typemap.nlp EXTENSION_NLP = \ $(FCNTL_NLP) \ @@ -299,6 +300,7 @@ EXTENSION_NLP = \ $(LISTUTIL_NLP) \ $(MIMEBASE64_NLP) \ $(XSTYPEMAP_NLP) \ + $(UNICODENORMALIZE_NLP) \ # $(CWD_NLP) \ # cwd.pm needs to be modifed for NetWare. @@ -764,33 +766,35 @@ X2P_OBJ = $(X2P_SRC:.c=.obj) DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \ Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \ - Storable/Storable List/Util MIME/Base64/Base64 XS/Typemap/Typemap + Storable/Storable List/Util MIME/Base64/Base64 \ + XS/Typemap/Typemap Unicode/Normalize/Normalize STATIC_EXT = DynaLoader NONXS_EXT = Errno -DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader -SOCKET = $(EXTDIR)\Socket\Socket -FCNTL = $(EXTDIR)\Fcntl\Fcntl -OPCODE = $(EXTDIR)\Opcode\Opcode -SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File +DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader +SOCKET = $(EXTDIR)\Socket\Socket +FCNTL = $(EXTDIR)\Fcntl\Fcntl +OPCODE = $(EXTDIR)\Opcode\Opcode +SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File IO = $(EXTDIR)\IO\IO -POSIX = $(EXTDIR)\POSIX\POSIX -ATTRS = $(EXTDIR)\attrs\attrs -THREAD = $(EXTDIR)\Thread\Thread +POSIX = $(EXTDIR)\POSIX\POSIX +ATTRS = $(EXTDIR)\attrs\attrs +THREAD = $(EXTDIR)\Thread\Thread B = $(EXTDIR)\B\B RE = $(EXTDIR)\re\re -DUMPER = $(EXTDIR)\Data\Dumper\Dumper -ERRNO = $(EXTDIR)\Errno\Errno -PEEK = $(EXTDIR)\Devel\Peek\Peek -BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader -DPROF = $(EXTDIR)\Devel\DProf\DProf -GLOB = $(EXTDIR)\File\Glob\Glob +DUMPER = $(EXTDIR)\Data\Dumper\Dumper +ERRNO = $(EXTDIR)\Errno\Errno +PEEK = $(EXTDIR)\Devel\Peek\Peek +BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader +DPROF = $(EXTDIR)\Devel\DProf\DProf +GLOB = $(EXTDIR)\File\Glob\Glob CWD = $(EXTDIR)\Cwd\Cwd -STORABLE = $(EXTDIR)\Storable\Storable -LISTUTIL = $(EXTDIR)\List\Util -MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 -XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap +STORABLE = $(EXTDIR)\Storable\Storable +LISTUTIL = $(EXTDIR)\List\Util +MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 +XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap +UNICODENORMALIZE = $(EXTDIR)\Unicode\Normalize\Normalize EXTENSION_C = \ $(SOCKET).c \ @@ -813,6 +817,7 @@ EXTENSION_C = \ $(LISTUTIL).c \ $(MIMEBASE64).c \ $(XSTYPEMAP).c \ + $(UNICODENORMALIZE).c \ POD2HTML = $(PODDIR)\pod2html POD2MAN = $(PODDIR)\pod2man @@ -1285,6 +1290,12 @@ $(XSTYPEMAP_NLP): $(MAKE) cd ..\..\..\netware +$(UNICODENORMALIZE_NLP): + cd $(EXTDIR)\Unicode\$(*B) + ..\..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl + $(MAKE) + cd ..\..\..\netware + $(ERRNO_PM_NW): cd $(EXTDIR)\$(*B) ..\..\miniperl -I..\..\lib Makefile.PL INSTALLDIRS=perl @@ -1425,6 +1436,7 @@ distclean: clean nwclean -del /f $(LIBDIR)\Data\Dumper.pm $(LIBDIR)\ByteLoader.pm -del /f $(LIBDIR)\Devel\Peek.pm $(LIBDIR)\Devel\DProf.pm -del /f $(LIBDIR)\File\Glob.pm + -del /f $(LIBDIR)\Unicode\Normalize.pm -rmdir /s /q $(LIBDIR)\IO || rmdir /s $(LIBDIR)\IO -rmdir /s /q $(LIBDIR)\Thread || rmdir /s $(LIBDIR)\Thread -rmdir /s /q $(LIBDIR)\B || rmdir /s $(LIBDIR)\B diff --git a/djgpp/config.over b/djgpp/config.over index 5f58ba70b6..55eef9b9d5 100644 --- a/djgpp/config.over +++ b/djgpp/config.over @@ -46,6 +46,7 @@ repair() -e 's=cwd=Cwd=' \ -e 's=perlio/via=PerlIO/Via=' \ -e 's=xs/typemap=XS/Typemap=' \ + -e 's=unicode/normalize=Unicode/Normalize=' \ -e 's=i18n/langinfo=I18N/Langinfo=' } static_ext=$(repair "$static_ext") diff --git a/epoc/config.sh b/epoc/config.sh index 42ada756ca..2cafe19167 100644 --- a/epoc/config.sh +++ b/epoc/config.sh @@ -426,7 +426,7 @@ emacs='' eunicefix=':' exe_ext='' expr='expr' -extensions='Data/Dumper Digest/MD5 Errno Fcntl File/Glob Filter/Util/Call IO List/Util MIME/Base64 Opcode PerlIO/Scalar Socket Storable Sys/Hostname attrs re' +extensions='Data/Dumper Digest/MD5 Errno Fcntl File/Glob Filter/Util/Call IO List/Util MIME/Base64 Opcode PerlIO/Scalar Socket Storable Sys/Hostname Unicode/Storable attrs re' fflushNULL='undef' fflushall='define' find='' diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes new file mode 100644 index 0000000000..bf17449ab2 --- /dev/null +++ b/ext/Unicode/Normalize/Changes @@ -0,0 +1,38 @@ +Revision history for Perl extension Unicode::Normalize. + +0.10 Sat Nov 03 16:30:20 2001 + - The XS version is now independent of Lingua::KO::Hangul::Util. + (though the Non-XS version still requires that.) + +0.09 Fri Nov 02 22:39:30 2001 + - remove pTHX_. + +0.08 Thu Nov 01 23:20:42 2001 + - use Lingua::KO::Hangul::Util 0.06 and remove "hangul.h". + +0.07 Wed Oct 31 22:06:42 2001 + - modify internal. decompose() - reorder() - compose(). + +0.06 Sun Oct 28 14:28:46 2001 + - an XS version. + (but the Non-XS version is also supported.) + +0.05 Wed Oct 10 22:02:15 2001 (not released) + - %Compos contains unnecessary singletons + (though it did not cause any bug, only useless). + They will not be stored. + +0.04 Wed Aug 15 19:02:41 2001 + - fix: NFD("") and NFKD("") must return "", not but undef. + +0.03 Fri Aug 10 22:44:18 2001 + - rename the module name to Unicode::Normalize. + - normalize takes two arguments. + +0.02 Thu Aug 9 22:56:36 2001 + - add function normalize + +0.01 Mon Aug 6 21:45:11 2001 + - original version; created by h2xs 1.21 with options + -A -X -n Text::Unicode::Normalize + diff --git a/ext/Unicode/Normalize/Makefile.PL b/ext/Unicode/Normalize/Makefile.PL new file mode 100644 index 0000000000..88ab9b7b63 --- /dev/null +++ b/ext/Unicode/Normalize/Makefile.PL @@ -0,0 +1,15 @@ +use ExtUtils::MakeMaker; + +# This is not the CPAN Unicode::Normalize makefile +# that can handle XS-NOXS installing. We do just XS. + +do "mkheader"; + +WriteMakefile( + 'NAME' => 'Unicode::Normalize', + 'VERSION_FROM' => 'Normalize.pm', # finds $VERSION + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'Normalize.pod', # retrieve abstract from module + AUTHOR => 'SADAHIRO Tomoyuki <SADAHIRO@cpan.org>') : ()), + clean => {FILES=> 'unfcan.h unfcmb.h unfcmp.h unfcpt.h unfexc.h'}, +); diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm new file mode 100644 index 0000000000..a583425a3b --- /dev/null +++ b/ext/Unicode/Normalize/Normalize.pm @@ -0,0 +1,45 @@ +package Unicode::Normalize; + +use 5.006; +use strict; +use warnings; +use Carp; + +our $VERSION = '0.10'; +our $PACKAGE = __PACKAGE__; + +require Exporter; +require DynaLoader; +require AutoLoader; + +our @ISA = qw(Exporter DynaLoader); +our @EXPORT = qw( NFC NFD NFKC NFKD ); +our @EXPORT_OK = qw( normalize decompose reorder compose + getCanon getCompat getComposite getCombinClass getExclusion); +our %EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] ); + +bootstrap Unicode::Normalize $VERSION; + +use constant CANON => 0; +use constant COMPAT => 1; + +sub NFD ($) { reorder(decompose($_[0], CANON)) } + +sub NFKD ($) { reorder(decompose($_[0], COMPAT)) } + +sub NFC ($) { compose(reorder(decompose($_[0], CANON))) } + +sub NFKC ($) { compose(reorder(decompose($_[0], COMPAT))) } + +sub normalize($$) +{ + my $form = shift; + $form eq 'D' || $form eq 'NFD' ? NFD ($_[0]) : + $form eq 'C' || $form eq 'NFC' ? NFC ($_[0]) : + $form eq 'KD' || $form eq 'NFKD' ? NFKD($_[0]) : + $form eq 'KC' || $form eq 'NFKC' ? NFKC($_[0]) : + croak $PACKAGE."::normalize: invalid form name: $form"; +} + +1; +__END__ diff --git a/ext/Unicode/Normalize/Normalize.pod b/ext/Unicode/Normalize/Normalize.pod new file mode 100644 index 0000000000..4ac8966a83 --- /dev/null +++ b/ext/Unicode/Normalize/Normalize.pod @@ -0,0 +1,89 @@ + +=head1 NAME + +Unicode::Normalize - normalized forms of Unicode text + +=head1 SYNOPSIS + + use Unicode::Normalize; + + $string_NFD = NFD($raw_string); # Normalization Form D + $string_NFC = NFC($raw_string); # Normalization Form C + $string_NFKD = NFKD($raw_string); # Normalization Form KD + $string_NFKC = NFKC($raw_string); # Normalization Form KC + + or + + use Unicode::Normalize 'normalize'; + + $string_NFD = normalize('D', $raw_string); # Normalization Form D + $string_NFC = normalize('C', $raw_string); # Normalization Form C + $string_NFKD = normalize('KD', $raw_string); # Normalization Form KD + $string_NFKC = normalize('KC', $raw_string); # Normalization Form KC + +=head1 DESCRIPTION + +=over 4 + +=item C<$string_NFD = NFD($raw_string)> + +returns the Normalization Form D (formed by canonical decomposition). + + +=item C<$string_NFC = NFC($raw_string)> + +returns the Normalization Form C (formed by canonical decomposition +followed by canonical composition). + +=item C<$string_NFKD = NFKD($raw_string)> + +returns the Normalization Form KD (formed by compatibility decomposition). + +=item C<$string_NFKC = NFKC($raw_string)> + +returns the Normalization Form KC (formed by compatibility decomposition +followed by B<canonical> composition). + +=item C<$normalized_string = normalize($form_name, $raw_string)> + +As C<$form_name>, one of the following names must be given. + + 'C' or 'NFC' for Normalization Form C + 'D' or 'NFD' for Normalization Form D + 'KC' or 'NFKC' for Normalization Form KC + 'KD' or 'NFKD' for Normalization Form KD + +=back + +=head2 EXPORT + +C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default. + +C<normalize>: on request. + +=head1 AUTHOR + +SADAHIRO Tomoyuki, E<lt>SADAHIRO@cpan.orgE<gt> + + http://homepage1.nifty.com/nomenclator/perl/ + + Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. + + This program is free software; you can redistribute it and/or + modify it under the same terms as Perl itself. + +=head1 SEE ALSO + +=over 4 + +=item L<Lingua::KO::Hangul::Util> + +utility functions for Hangul Syllables + +=item http://www.unicode.org/unicode/reports/tr15/ + +Unicode Normalization Forms - UAX #15 + +=back + +=cut diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs new file mode 100644 index 0000000000..aca08538fb --- /dev/null +++ b/ext/Unicode/Normalize/Normalize.xs @@ -0,0 +1,378 @@ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + +/* These 5 files are prepared by mkheader */ +#include "unfcmb.h" +#include "unfcan.h" +#include "unfcpt.h" +#include "unfcmp.h" +#include "unfexc.h" + +/* Perl 5.6.1 ? */ +#ifndef uvuni_to_utf8 +#define uvuni_to_utf8 uv_to_utf8 +#endif /* uvuni_to_utf8 */ + +/* Perl 5.6.1 ? */ +#ifndef utf8n_to_uvchr +#define utf8n_to_uvchr utf8_to_uv +#endif /* utf8n_to_uvchr */ + +/* At present, char > 0x10ffff are unaffected without complaint, right? */ +#define VALID_UTF_MAX (0x10ffff) +#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) + +/* HANGUL_H */ +#define Hangul_SBase 0xAC00 +#define Hangul_SFinal 0xD7A3 +#define Hangul_SCount 11172 + +#define Hangul_NCount 588 + +#define Hangul_LBase 0x1100 +#define Hangul_LFinal 0x1112 +#define Hangul_LCount 19 + +#define Hangul_VBase 0x1161 +#define Hangul_VFinal 0x1175 +#define Hangul_VCount 21 + +#define Hangul_TBase 0x11A7 +#define Hangul_TFinal 0x11C2 +#define Hangul_TCount 28 + +#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal)) +#define Hangul_IsN(u) (! (((u) - Hangul_SBase) % Hangul_TCount)) +#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u)) +#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal)) +#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal)) +#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal)) +/* HANGUL_H */ + +/* this is used for canonical ordering of combining characters (c.c.). */ +typedef struct { + U8 cc; /* combining class */ + UV uv; /* codepoint */ + STRLEN pos; /* position */ +} UNF_cc; + +int compare_cc(const void *a, const void *b) +{ + int ret_cc; + ret_cc = (*(UNF_cc*)a).cc - (*(UNF_cc*)b).cc; + if(ret_cc) return ret_cc; + return (*(UNF_cc*)a).pos - (*(UNF_cc*)b).pos; +} + +U8* dec_canonical (UV uv) +{ + U8 ***plane, **row; + if(OVER_UTF_MAX(uv)) return NULL; + plane = (U8***)UNF_canon[uv >> 16]; + if(! plane) return NULL; + row = plane[(uv >> 8) & 0xff]; + return row ? row[uv & 0xff] : NULL; +} + +U8* dec_compat (UV uv) +{ + U8 ***plane, **row; + if(OVER_UTF_MAX(uv)) return NULL; + plane = (U8***)UNF_compat[uv >> 16]; + if(! plane) return NULL; + row = plane[(uv >> 8) & 0xff]; + return row ? row[uv & 0xff] : NULL; +} + +UV getComposite (UV uv, UV uv2) +{ + UNF_complist ***plane, **row, *cell, *i; + + if(! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) return 0; + + if(Hangul_IsL(uv) && Hangul_IsV(uv2)) { + uv -= Hangul_LBase; /* lindex */ + uv2 -= Hangul_VBase; /* vindex */ + return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount); + } + if(Hangul_IsLV(uv) && Hangul_IsT(uv2)) { + uv2 -= Hangul_TBase; /* tindex */ + return (uv + uv2); + } + plane = UNF_compos[uv >> 16]; + if(! plane) return 0; + row = plane[(uv >> 8) & 0xff]; + if(! row) return 0; + cell = row[uv & 0xff]; + if(! cell) return 0; + for(i = cell; i->nextchar; i++) { + if(uv2 == i->nextchar) return i->composite; + } + return 0; +} + +U8 getCombinClass (UV uv) +{ + U8 **plane, *row; + if(OVER_UTF_MAX(uv)) return 0; + plane = (U8**)UNF_combin[uv >> 16]; + if(! plane) return 0; + row = plane[(uv >> 8) & 0xff]; + return row ? row[uv & 0xff] : 0; +} + +void sv_cat_decompHangul (SV* sv, UV uv) +{ + UV sindex, lindex, vindex, tindex; + U8 *t, temp[3 * UTF8_MAXLEN + 1]; + + if(! Hangul_IsS(uv)) return; + + sindex = uv - Hangul_SBase; + lindex = sindex / Hangul_NCount; + vindex = (sindex % Hangul_NCount) / Hangul_TCount; + tindex = sindex % Hangul_TCount; + + t = temp; + t = uvuni_to_utf8(t, (lindex + Hangul_LBase)); + t = uvuni_to_utf8(t, (vindex + Hangul_VBase)); + if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); + *t = '\0'; + sv_catpvn(sv, (char *)temp, strlen((char *)temp)); +} + +MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize + + +SV* +decompose(arg, compat) + SV * arg + SV * compat + PROTOTYPE: $ + PREINIT: + SV *src, *dst; + STRLEN srclen, dstlen, retlen; + U8 *s, *e, *p, *d, *r; + UV uv; + bool iscompat; + CODE: + if(SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + + iscompat = SvTRUE(compat); + + dst = newSV(1); + (void)SvPOK_only(dst); + SvUTF8_on(dst); + + s = (U8*)SvPV(src,srclen); + e = s + srclen; + for(p = s; p < e;){ + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + if(Hangul_IsS(uv)) sv_cat_decompHangul(dst, uv); + else { + r = iscompat ? dec_compat(uv) : dec_canonical(uv); + if(r) sv_catpv(dst, (char *)r); + else sv_catpvn(dst, (char *)p - retlen, retlen); + } + } + RETVAL = dst; + OUTPUT: + RETVAL + + + +SV* +reorder(arg) + SV * arg + PROTOTYPE: $ + PREINIT: + SV *src; + STRLEN srclen, retlen, stk_cc_max; + U8 *s, *e, *p, curCC; + UV uv; + UNF_cc * stk_cc; + CODE: + src = newSVsv(arg); + if(! SvUTF8(arg)) sv_utf8_upgrade(src); + + stk_cc_max = 10; /* enough as an initial value? */ + New(0, stk_cc, stk_cc_max, UNF_cc); + + s = (U8*)SvPV(src,srclen); + e = s + srclen; + for(p = s; p < e;){ + U8 *cc_in; + STRLEN cc_len, cc_iter, cc_pos; + + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + cc_pos = 0; + curCC = getCombinClass(uv); + if(! (curCC && p < e)) continue; else cc_in = p - retlen; + + stk_cc[cc_pos].cc = curCC; + stk_cc[cc_pos].uv = uv; + stk_cc[cc_pos].pos = cc_pos; + + while(p < e) { + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + curCC = getCombinClass(uv); + if(!curCC) break; + p += retlen; + cc_pos++; + if(stk_cc_max <= cc_pos) { /* extend if need */ + stk_cc_max = cc_pos + 1; + Renew(stk_cc, stk_cc_max, UNF_cc); + } + stk_cc[cc_pos].cc = curCC; + stk_cc[cc_pos].uv = uv; + stk_cc[cc_pos].pos = cc_pos; + } + + /* only one c.c. in cc_len from cc_in, no need of reordering */ + if(!cc_pos) continue; + + qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc); + + cc_len = p - cc_in; + p = cc_in; + for(cc_iter = 0; cc_iter <= cc_pos; cc_iter++) { + p = uvuni_to_utf8(p, stk_cc[cc_iter].uv); + } + } + Safefree(stk_cc); + RETVAL = src; + OUTPUT: + RETVAL + + + +void +compose(arg) + SV * arg + PROTOTYPE: $ + PREINIT: + SV *src, *dst, *tmp; + U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC; + UV uv, uvS, uvComp; + STRLEN srclen, dstlen, tmplen, dstcur, retlen; + bool beginning = TRUE; + PPCODE: + if(SvUTF8(arg)) { + src = arg; + } else { + src = sv_mortalcopy(arg); + sv_utf8_upgrade(src); + } + s = (U8*)SvPV(src, srclen); + e = s + srclen; + dstlen = srclen + 1; /* equal or shorter, XXX */ + dst = sv_2mortal(newSV(dstlen)); + (void)SvPOK_only(dst); + SvUTF8_on(dst); + d = (U8*)SvPVX(dst); + + /* for uncomposed combining char */ + tmp = sv_2mortal(newSV(dstlen)); + (void)SvPOK_only(tmp); + SvUTF8_on(tmp); + + for(p = s; p < e;){ + if(beginning) { + uvS = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + + if (getCombinClass(uvS)){ /* no Starter found yet */ + d = uvuni_to_utf8(d, uvS); + continue; + } + beginning = FALSE; + } + + /* Starter */ + t = tmp_start = (U8*)SvPVX(tmp); + preCC = 0; + + /* to the next Starter */ + while(p < e) { + uv = utf8n_to_uvchr(p, e - p, &retlen, 0); + p += retlen; + curCC = getCombinClass(uv); + + if(preCC && preCC == curCC) { + preCC = curCC; + t = uvuni_to_utf8(t, uv); + } else { + uvComp = getComposite(uvS, uv); + + /* S + C + S => S-S + C would be also blocked. */ + if( uvComp && ! getExclusion(uvComp) && preCC <= curCC) + { + /* preCC not changed to curCC */ + uvS = uvComp; + } else if (! curCC && p < e) { /* blocked */ + break; + } else { + preCC = curCC; + t = uvuni_to_utf8(t, uv); + } + } + } + d = uvuni_to_utf8(d, uvS); /* composed char */ + if(tmplen = t - tmp_start) { /* uncomposed combining char */ + t = (U8*)SvPVX(tmp); + while(tmplen--) *d++ = *t++; + } + uvS = uv; + } /* for */ + dstcur = d - (U8*)SvPVX(dst); + SvCUR_set(dst, dstcur); + XPUSHs(dst); + + + +U8 +getCombinClass(uv) + UV uv + +bool +getExclusion(uv) + UV uv + +UV +getComposite(uv, uv2) + UV uv + UV uv2 + +SV* +getCanon(uv) + UV uv + PROTOTYPE: $ + ALIAS: + getCompat = 1 + PREINIT: + U8 * rstr; + CODE: + if(Hangul_IsS(uv)) { + SV * dst; + dst = newSV(1); + (void)SvPOK_only(dst); + sv_cat_decompHangul(dst, uv); + RETVAL = dst; + } else { + rstr = ix ? dec_compat(uv) : dec_canonical(uv); + if(!rstr) XSRETURN_UNDEF; + RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr)); + } + SvUTF8_on(RETVAL); + OUTPUT: + RETVAL + diff --git a/lib/Unicode/Normalize/README b/ext/Unicode/Normalize/README index e1f9e962eb..3f0c4240fe 100644 --- a/lib/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 0.04 +Unicode/Normalize version 0.10 =================================== Unicode::Normalize - normalized forms of Unicode text @@ -23,6 +23,8 @@ SYNOPSIS INSTALLATION +Perl 5.006 or later + To install this module type the following: perl Makefile.PL @@ -30,19 +32,26 @@ To install this module type the following: make test make install +If you have a C compiler and want to use the XS version, +type the following: + + perl Makefile.PL xs + make + make test + make install + DEPENDENCIES This module requires these other modules and libraries: Carp Exporter +File::Copy File::Spec -Lingua::KO::Hangul::Util -$unidir/CombiningClass.pl -$unidir/Decomposition.pl -$unidir/CompExcl.txt - -# $unidir is $LIB/unicore or $LIB/unicode +Lingua::KO::Hangul::Util 0.06 +unicore/CombiningClass.pl or unicode/CombiningClass.pl +unicore/Decomposition.pl or unicode/Decomposition.pl +unicore/CompExcl.txt or unicode/CompExcl.txt COPYRIGHT AND LICENCE diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader new file mode 100644 index 0000000000..85d2b90e62 --- /dev/null +++ b/ext/Unicode/Normalize/mkheader @@ -0,0 +1,284 @@ +#!perl +# +# This script generates "unfcan.h", "unfcpt.h", "unfcmb.h", +# "unfcmp.h", and "unfexc.h" +# from CombiningClass.pl, Decomposition.pl, CompExcl.txt +# in lib/unicore or unicode directory +# for Unicode::Normalize.xs. (cf. Makefile.PL) +# +use 5.006; +use strict; +use warnings; +use Carp; + +our $PACKAGE = 'Unicode::Normalize, mkheader'; + +our $Combin = do "unicore/CombiningClass.pl" + || do "unicode/CombiningClass.pl" + || croak "$PACKAGE: CombiningClass.pl not found"; + +our $Decomp = do "unicore/Decomposition.pl" + || do "unicode/Decomposition.pl" + || croak "$PACKAGE: Decomposition.pl not found"; + +our %Combin; # $codepoint => $number : combination class +our %Canon; # $codepoint => $hexstring : canonical decomp. +our %Compat; # $codepoint => $hexstring : compat. decomp. +our %Compos; # $string => $codepoint : composite + +our %Exclus; # $codepoint => 1 : composition exclusions + +{ + my($f, $fh); + foreach my $d (@INC) { + use File::Spec; + $f = File::Spec->catfile($d, "unicore", "CompExcl.txt"); + last if open($fh, $f); + $f = File::Spec->catfile($d, "unicode", "CompExcl.txt"); + last if open($fh, $f); + $f = undef; + } + croak "$PACKAGE: CompExcl.txt not found in @INC" unless defined $f; + while(<$fh>) { + next if /^#/ or /^$/; + s/#.*//; + $Exclus{ hex($1) } =1 if /([0-9A-Fa-f]+)/; + } + close $fh; +} + +while($Combin =~ /(.+)/g) { + my @tab = split /\t/, $1; + my $ini = hex $tab[0]; + if($tab[1] eq '') { + $Combin{ $ini } = $tab[2]; + } else { + $Combin{ $_ } = $tab[2] foreach $ini .. hex($tab[1]); + } +} + +while($Decomp =~ /(.+)/g) { + my @tab = split /\t/, $1; + my $compat = $tab[2] =~ s/<[^>]+>//; + my $dec = [ _getHexArray($tab[2]) ]; # decomposition + my $com = pack('U*', @$dec); # composable sequence + my $ini = hex($tab[0]); + if($tab[1] eq '') { + $Compat{ $ini } = $dec; + if(! $compat) { + $Canon{ $ini } = $dec; + $Compos{ $com } = $ini if @$dec > 1; + } + } else { + foreach my $u ($ini .. hex($tab[1])){ + $Compat{ $u } = $dec; + if(! $compat){ + $Canon{ $u } = $dec; + $Compos{ $com } = $ini if @$dec > 1; + } + } + } +} + +# exhaustive decomposition +foreach my $key (keys %Canon) { + $Canon{$key} = [ getCanonList($key) ]; +} + +# exhaustive decomposition +foreach my $key (keys %Compat) { + $Compat{$key} = [ getCompatList($key) ]; +} + +sub getCanonList { + my @src = @_; + my @dec = map $Canon{$_} ? @{ $Canon{$_} } : $_, @src; + join(" ",@src) eq join(" ",@dec) ? @dec : getCanonList(@dec); + # condition @src == @dec is not ok. +} + +sub getCompatList { + my @src = @_; + my @dec = map $Compat{$_} ? @{ $Compat{$_} } : $_, @src; + join(" ",@src) eq join(" ",@dec) ? @dec : getCompatList(@dec); + # condition @src == @dec is not ok. +} + +sub _getHexArray { + my $str = shift; + map hex(), $str =~ /([0-9A-Fa-f]+)/g; +} + +sub _U_stringify { + sprintf '"%s"', join '', + map sprintf("\\x%2x", $_), unpack 'C*', pack 'U*', @_; +} + +foreach my $hash (\%Canon, \%Compat) { + foreach my $key (keys %$hash) { + $hash->{$key} = _U_stringify( @{ $hash->{$key} } ); + } +} + +sub utf8len { + my $uv = shift; + return $uv < 0x80 ? 1 : + $uv < 0x800 ? 2 : + $uv < 0x10000 ? 3 : + $uv < 0x110000 ? 4 : + croak "$PACKAGE: illegal char in the composite. utf-8 max is 0x10ffff."; +} + +my $prefix = "UNF_"; + +my $structname = "${prefix}complist"; + +our (%Comp1st, %CompList); + +foreach(sort keys %Compos) { + my @a = unpack('U*', $_); + my $val = $Compos{$_}; + my $name = sprintf "${structname}_%06x", $a[0]; + $Comp1st{ $a[0] } = $name; + $CompList{ $name }{ $a[1] } = $val; + + if( utf8len($a[0]) + utf8len($a[1]) < utf8len($val) ) { + croak "$PACKAGE: " + . "composable pair is longer than the composite in bytes!\n" + . sprintf("%d + %d => %d", $a[0], $a[1], $val); + } +} + +my $compinit = + "typedef struct { UV nextchar; UV composite; } $structname;\n\n"; + +foreach my $i (sort keys %CompList) { + $compinit .= "$structname $i [] = {\n"; + $compinit .= join ",\n", + map sprintf("\t{ %d, %d }", $_, $CompList{$i}{$_}), + sort {$a <=> $b } keys %{ $CompList{$i} }; + $compinit .= ",\n{0,0}\n};\n\n"; # with sentinel +} + +#################################### + +my @Exclus = sort {$a <=> $b} keys %Exclus; + +my $file = "unfexc.h"; +open FH, ">$file" or croak "$PACKAGE: $file can't be made"; +binmode FH; select FH; + +print "bool getExclusion (UV uv) \n{\nreturn\n\t"; + +while(@Exclus) { + my $cur = shift @Exclus; + if(@Exclus && $cur + 1 == $Exclus[0]) { + print "$cur <= uv && uv <= "; + while(@Exclus && $cur + 1 == $Exclus[0]) { + $cur = shift @Exclus; + } + print $cur; + print "\n\t|| " if @Exclus; + } else { + print "uv == $cur"; + print "\n\t|| " if @Exclus; + } +} + +print "\n\t? TRUE : FALSE;\n}\n\n"; +close FH; + +#################################### + +my @tripletable = ( + { + file => "unfcmb", + name => "combin", + type => "char", + hash => \%Combin, + null => 0, + }, + { + file => "unfcan", + name => "canon", + type => "char*", + hash => \%Canon, + null => "NULL", + }, + { + file => "unfcpt", + name => "compat", + type => "char*", + hash => \%Compat, + null => "NULL", + }, + { + file => "unfcmp", + name => "compos", + type => "$structname *", + hash => \%Comp1st, + null => "NULL", + init => $compinit, + }, +); + +foreach my $tbl (@tripletable) { + my $file = "$tbl->{file}.h"; + my $head = "${prefix}$tbl->{name}"; + my $type = $tbl->{type}; + my $hash = $tbl->{hash}; + my $null = $tbl->{null}; + my $init = $tbl->{init}; + + open FH, ">$file" or croak "$PACKAGE: $file can't be made"; + binmode FH; select FH; + my %val; + + print FH << 'EOF'; +/* + * This file is auto-generated by mkheader. + * Any changes here will be lost! + */ +EOF + + print $init if defined $init; + + foreach my $uv (keys %$hash) { + my @c = unpack 'CCCC', pack 'N', $uv; + $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; + } + + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + for(my $r = 0; $r < 256; $r++){ + next if ! $val{ $p }{ $r }; + printf "$type ${head}_%02x_%02x [256] = {\n", $p, $r; + for(my $c = 0; $c < 256; $c++){ + print "\t", defined $val{$p}{$r}{$c} ? $val{$p}{$r}{$c} : $null; + print ',' if $c != 255; + print "\n" if $c % 8 == 7; + } + print "};\n\n"; + } + } + foreach my $p (sort { $a <=> $b } keys %val) { + next if ! $val{ $p }; + printf "$type* ${head}_%02x [256] = {\n", $p; + for(my $r = 0; $r < 256; $r++){ + print $val{ $p }{ $r } ? sprintf("${head}_%02x_%02x", $p, $r) : "NULL"; + print ',' if $r != 255; + print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; + } + print "};\n\n"; + } + print "$type** $head [] = {\n"; + for(my $p = 0; $p <= 0x10; $p++){ + print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; + print ',' if $p != 0x10; + print "\n"; + } + print "};\n\n"; + close FH; +} + +__END__ diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t new file mode 100644 index 0000000000..8907634c47 --- /dev/null +++ b/ext/Unicode/Normalize/t/func.t @@ -0,0 +1,69 @@ +# Before `make install' is performed this script should be runnable with +# `make test'. After `make install' it should work as `perl test.pl' + +######################### + +use Test; +use strict; +use warnings; +BEGIN { plan tests => 6 }; +use Unicode::Normalize qw(:all); +ok(1); # If we made it this far, we're ok. + +######################### + +print getCombinClass( 0) == 0 + && getCombinClass( 768) == 230 + && getCombinClass(1809) == 36 +# && getCombinClass(119143) == 1 + ? "ok" : "not ok", " 2\n"; + +print ! defined getCanon( 0) + && ! defined getCanon(41) + && getCanon(0x00C0) eq pack('U*', 0x0041, 0x0300) + && getCanon(0x00EF) eq pack('U*', 0x0069, 0x0308) + && getCanon(0x304C) eq pack('U*', 0x304B, 0x3099) + && getCanon(0x1EA4) eq pack('U*', 0x0041, 0x0302, 0x0301) + && getCanon(0x1FAF) eq pack('U*', 0x03A9, 0x0314, 0x0342, 0x0345) + && getCanon(0xAC00) eq pack('U*', 0x1100, 0x1161) + && getCanon(0xAE00) eq pack('U*', 0x1100, 0x1173, 0x11AF) + && ! defined getCanon(0x212C) + && ! defined getCanon(0x3243) + && getCanon(0xFA2D) eq pack('U*', 0x9DB4) + ? "ok" : "not ok", " 3\n"; + +print ! defined getCompat( 0) + && ! defined getCompat(41) + && getCompat(0x00C0) eq pack('U*', 0x0041, 0x0300) + && getCompat(0x00EF) eq pack('U*', 0x0069, 0x0308) + && getCompat(0x304C) eq pack('U*', 0x304B, 0x3099) + && getCompat(0x1EA4) eq pack('U*', 0x0041, 0x0302, 0x0301) + && getCompat(0x1FAF) eq pack('U*', 0x03A9, 0x0314, 0x0342, 0x0345) + && getCompat(0x212C) eq pack('U*', 0x0042) + && getCompat(0x3243) eq pack('U*', 0x0028, 0x81F3, 0x0029) + && getCompat(0xAC00) eq pack('U*', 0x1100, 0x1161) + && getCompat(0xAE00) eq pack('U*', 0x1100, 0x1173, 0x11AF) + && getCompat(0xFA2D) eq pack('U*', 0x9DB4) + ? "ok" : "not ok", " 4\n"; + +print ! getComposite( 0, 0) + && ! getComposite( 0, 41) + && ! getComposite(41, 0) + && ! getComposite(41, 41) + && ! getComposite(12, 0x0300) + && ! getComposite(0x0055, 0xFF00) + && 0x00D9 == getComposite(0x0055, 0x0300) + && 0x1E14 == getComposite(0x0112, 0x0300) + && 0xAC00 == getComposite(0x1100, 0x1161) + && 0xADF8 == getComposite(0x1100, 0x1173) + && ! getComposite(0x1100, 0x11AF) + && ! getComposite(0x1173, 0x11AF) + && 0xAE00 == getComposite(0xADF8, 0x11AF) + ? "ok" : "not ok", " 5\n"; + +print ! getExclusion( 0) + && ! getExclusion(41) + && getExclusion(2392) + && getExclusion(3907) + && getExclusion(64334) + ? "ok" : "not ok", " 6\n"; diff --git a/lib/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t index 88e4e7d441..1de2e7fcb8 100644 --- a/lib/Unicode/Normalize/t/norm.t +++ b/ext/Unicode/Normalize/t/norm.t @@ -6,7 +6,7 @@ use Test; use strict; use warnings; -BEGIN { plan tests => 15 }; +BEGIN { plan tests => 18 }; use Unicode::Normalize qw(normalize); ok(1); # If we made it this far, we're ok. @@ -27,16 +27,17 @@ sub hexNFD { ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); +ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); +ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); - ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); - ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); diff --git a/lib/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t index 499f3aec8f..5544a3b13b 100644 --- a/lib/Unicode/Normalize/t/test.t +++ b/ext/Unicode/Normalize/t/test.t @@ -6,7 +6,7 @@ use Test; use strict; use warnings; -BEGIN { plan tests => 15 }; +BEGIN { plan tests => 18 }; use Unicode::Normalize; ok(1); # If we made it this far, we're ok. @@ -27,16 +27,17 @@ sub hexNFD { ok(hexNFC("0061 0315 0300 05AE 05C4 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("00E0 05AE 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); ok(hexNFC("0061 05AE 0300 05C4 0315 0062"), "00E0 05AE 05C4 0315 0062"); +ok(hexNFC("0045 0304 0300 AC00 11A8"), "1E14 AC01"); +ok(hexNFC("1100 1161 1100 1173 11AF"), "AC00 AE00"); +ok(hexNFC("1100 0300 1161 1173 11AF"), "1100 0300 1161 1173 11AF"); ok(hexNFD("0061 0315 0300 05AE 05C4 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("00E0 05AE 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); ok(hexNFD("0061 05AE 0300 05C4 0315 0062"), "0061 05AE 0300 05C4 0315 0062"); - ok(hexNFC("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFC("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05C4 0315 0300 05AE 0062"), "0061 05AE 05C4 0300 0315 0062"); ok(hexNFD("0061 05AE 05C4 0300 0315 0062"), "0061 05AE 05C4 0300 0315 0062"); - ok(hexNFC("0000 0041 0000 0000"), "0000 0041 0000 0000"); ok(hexNFD("0000 0041 0000 0000"), "0000 0041 0000 0000"); diff --git a/hints/uwin.sh b/hints/uwin.sh index b8dd26cc32..e5a09a698f 100644 --- a/hints/uwin.sh +++ b/hints/uwin.sh @@ -24,7 +24,7 @@ i_utime=undef # compile/link flags ldflags=-g optimize=-g -static_ext="B Data/Dumper Digest/MD5 Errno Fcntl Filter::Util::Call IO IPC/SysV MIME::Base64 Opcode PerlIO::Scalar POSIX SDBM_File Socket Storable attrs re" +static_ext="B Data/Dumper Digest/MD5 Errno Fcntl Filter::Util::Call IO IPC/SysV MIME::Base64 Opcode PerlIO::Scalar POSIX SDBM_File Socket Storable Unicode::Normalize attrs re" #static_ext=none # dynamic loading needs work usedl=undef diff --git a/hints/vmesa.sh b/hints/vmesa.sh index 20502c1e6c..f0c0232e06 100644 --- a/hints/vmesa.sh +++ b/hints/vmesa.sh @@ -218,7 +218,6 @@ dynamic_ext='' eagain='EAGAIN' ebcdic='define' exe_ext='' -extensions='Data/Dumper Digest/MD5 Errno Fcntl Filter/Util/Call GDBM_File IO IPC/SysV List/Util MIME/Base64 NDBM_File Opcode PerlIO/Scalar POSIX Socket Storable Time/HiRes Thread attrs re' fpostype='fpos_t' freetype='void' groupstype='gid_t' diff --git a/lib/Lingua/KO/Hangul/Util.pm b/lib/Lingua/KO/Hangul/Util.pm deleted file mode 100644 index 3848592903..0000000000 --- a/lib/Lingua/KO/Hangul/Util.pm +++ /dev/null @@ -1,278 +0,0 @@ -package Lingua::KO::Hangul::Util; - -use 5.006; -use strict; -use warnings; - -require Exporter; - -our @ISA = qw(Exporter); -our %EXPORT_TAGS = (); -our @EXPORT_OK = (); -our @EXPORT = qw( - decomposeHangul - composeHangul - getHangulName - parseHangulName -); -our $VERSION = '0.02'; - -our @JamoL = ( # Initial (HANGUL CHOSEONG) - "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", - "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H", - ); - -our @JamoV = ( # Medial (HANGUL JUNGSEONG) - "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", - "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", - "YU", "EU", "YI", "I", - ); - -our @JamoT = ( # Final (HANGUL JONGSEONG) - "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", - "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", - "S", "SS", "NG", "J", "C", "K", "T", "P", "H", - ); - -our $BlockName = "HANGUL SYLLABLE "; - -use constant SBase => 0xAC00; -use constant LBase => 0x1100; -use constant VBase => 0x1161; -use constant TBase => 0x11A7; -use constant LCount => 19; # scalar @JamoL -use constant VCount => 21; # scalar @JamoV -use constant TCount => 28; # scalar @JamoT -use constant NCount => 588; # VCount * TCount -use constant SCount => 11172; # LCount * NCount -use constant SFinal => 0xD7A3; # SBase -1 + SCount - -our(%CodeL, %CodeV, %CodeT); -@CodeL{@JamoL} = 0 .. LCount-1; -@CodeV{@JamoV} = 0 .. VCount-1; -@CodeT{@JamoT} = 0 .. TCount-1; - -sub getHangulName { - my $code = shift; - return undef unless SBase <= $code && $code <= SFinal; - my $SIndex = $code - SBase; - my $LIndex = int( $SIndex / NCount); - my $VIndex = int(($SIndex % NCount) / TCount); - my $TIndex = $SIndex % TCount; - "$BlockName$JamoL[$LIndex]$JamoV[$VIndex]$JamoT[$TIndex]"; -} - -sub parseHangulName { - my $arg = shift; - return undef unless $arg =~ s/$BlockName//o; - return undef unless $arg =~ /^([^AEIOUWY]*)([AEIOUWY]+)([^AEIOUWY]*)$/; - return undef unless exists $CodeL{$1} - && exists $CodeV{$2} - && exists $CodeT{$3}; - SBase + $CodeL{$1} * NCount + $CodeV{$2} * TCount + $CodeT{$3}; -} - -sub decomposeHangul { - my $code = shift; - return unless SBase <= $code && $code <= SFinal; - my $SIndex = $code - SBase; - my $LIndex = int( $SIndex / NCount); - my $VIndex = int(($SIndex % NCount) / TCount); - my $TIndex = $SIndex % TCount; - my @ret = ( - LBase + $LIndex, - VBase + $VIndex, - $TIndex ? (TBase + $TIndex) : (), - ); - wantarray ? @ret : pack('U*', @ret); -} - -# -# To Do: -# s/(\p{JamoL}\p{JamoV})/toHangLV($1)/ge; -# s/(\p{HangLV}\p{JamoT})/toHangLVT($1)/ge; -# -sub composeHangul { - my $str = shift; - return $str unless length $str; - my(@ret); - - foreach my $ch (unpack('U*', $str)) # Makes list! The string be short! - { - push(@ret, $ch) and next unless @ret; - - # 1. check to see if $ret[-1] is L and $ch is V. - my $LIndex = $ret[-1] - LBase; - if(0 <= $LIndex && $LIndex < LCount) - { - my $VIndex = $ch - VBase; - if(0 <= $VIndex && $VIndex < VCount) - { - $ret[-1] = SBase + ($LIndex * VCount + $VIndex) * TCount; - next; # discard $ch - } - } - - # 2. check to see if $ret[-1] is LV and $ch is T. - my $SIndex = $ret[-1] - SBase; - if(0 <= $SIndex && $SIndex < SCount && $SIndex % TCount == 0) - { - my $TIndex = $ch - TBase; - if(0 <= $TIndex && $TIndex < TCount) - { - $ret[-1] += $TIndex; - next; # discard $ch - } - } - - # 3. just append $ch - push(@ret, $ch); - } - wantarray ? @ret : pack('U*', @ret); -} - -1; -__END__ - -=head1 NAME - -Lingua::KO::Hangul::Util - utility functions for Hangul Syllables - -=head1 SYNOPSIS - - use Lingua::KO::Hangul::Util; - - decomposeHangul(0xAC00); - # (0x1100,0x1161) or "\x{1100}\x{1161}" - - composeHangul("\x{1100}\x{1161}"); - # "\x{AC00}" - - getHangulName(0xAC00); - # "HANGUL SYLLABLE GA" - - parseHangulName("HANGUL SYLLABLE GA"); - # 0xAC00 - -=head1 DESCRIPTION - -A Hangul syllable consists of Hangul Jamo. - -Hangul Jamo are classified into three classes: - - CHOSEONG (the initial sound) as a leading consonant (L), - JUNGSEONG (the medial sound) as a vowel (V), - JONGSEONG (the final sound) as a trailing consonant (T). - -Any Hangul syllable is a composition of - - i) CHOSEONG + JUNGSEONG (L + V) - - or - - ii) CHOSEONG + JUNGSEONG + JONGSEONG (L + V + T). - -Names of Hangul Syllables have a format of C<"HANGUL SYLLABLE %s">. - -=head2 Composition and Decomposition - -=over 4 - -=item C<$string_decomposed = decomposeHangul($codepoint)> - -=item C<@codepoints = decomposeHangul($codepoint)> - -Accepts unicode codepoint integer. - -If the specified codepoint is of a Hangul syllable, -returns a list of codepoints (in a list context) -or a UTF-8 string (in a scalar context) -of its decomposition. - - decomposeHangul(0xAC00) # U+AC00 is HANGUL SYLLABLE GA. - returns "\x{1100}\x{1161}" or (0x1100, 0x1161); - - decomposeHangul(0xAE00) # U+AE00 is HANGUL SYLLABLE GEUL. - returns "\x{1100}\x{1173}\x{11AF}" or (0x1100, 0x1173, 0x11AF); - -Otherwise, returns false (empty string or empty list). - - decomposeHangul(0x0041) # outside Hangul Syllables - returns empty string or empty list. - -=item C<$string_composed = composeHangul($src_string)> - -=item C<@codepoints_composed = composeHangul($src_string)> - -Any sequence of an initial Jamo C<L> and a medial Jamo C<V> -is composed into a syllable C<LV>; -then any sequence of a syllable C<LV> and a final Jamo C<T> -is composed into a syllable C<LVT>. - -Any characters other than Hangul Jamo and Hangul Syllables -are unaffected. - - composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}.") - returns "Hangul \x{AC00}\x{AE00}." or - (0x48,0x61,0x6E,0x67,0x75,0x6C,0x20,0xAC00,0xAE00,0x2E); - -=back - -=head2 Hangul Syllable Name - -=over 4 - -=item C<$name = getHangulName($codepoint)> - -If the specified codepoint is of a Hangul syllable, -returns its name; otherwise returns undef. - - getHangulName(0xAC00) returns "HANGUL SYLLABLE GA"; - getHangulName(0x0041) returns undef. - -=item C<$codepoint = parseHangulName($name)> - -If the specified name is of a Hangul syllable, -returns its codepoint; otherwise returns undef. - - parseHangulName("HANGUL SYLLABLE GEUL") returns 0xAE00; - - parseHangulName("LATIN SMALL LETTER A") returns undef; - - parseHangulName("HANGUL SYLLABLE PERL") returns undef; - # Regrettably, HANGUL SYLLABLE PERL does not exist :-) - -=back - -=head2 EXPORT - -By default, - - decomposeHangul - composeHangul - getHangulName - parseHangulName - -=head1 AUTHOR - -SADAHIRO Tomoyuki - - bqw10602@nifty.com - http://homepage1.nifty.com/nomenclator/perl/ - - Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. - - This program is free software; you can redistribute it and/or - modify it under the same terms as Perl itself. - -=head1 SEE ALSO - -=over 4 - -=item http://www.unicode.org/unicode/reports/tr15 - -Annex 10: Hangul, in Unicode Normalization Forms (UAX #15). - -=back - -=cut diff --git a/lib/Lingua/KO/Hangul/Util/Changes b/lib/Lingua/KO/Hangul/Util/Changes deleted file mode 100644 index 2e43817169..0000000000 --- a/lib/Lingua/KO/Hangul/Util/Changes +++ /dev/null @@ -1,11 +0,0 @@ -Revision history for Perl extension Lingua::KO::Hangul::Util. - -0.02 Sat Aug 11 00:16:02 2001 - - fix SEE ALSO (the Unicode Normalization Forms is UAX #15) - - getHangulName and parseHangulName return - a list (undef) of one element in list context. - -0.01 Fri Aug 3 21:25:11 2001 - - original version; created by h2xs 1.21 with options - -A -X -n Lingua::KO::Hangul::Util - diff --git a/lib/Lingua/KO/Hangul/Util/README b/lib/Lingua/KO/Hangul/Util/README deleted file mode 100644 index 9fc04d81cc..0000000000 --- a/lib/Lingua/KO/Hangul/Util/README +++ /dev/null @@ -1,44 +0,0 @@ -Lingua/KO/Hangul/Util version 0.02 -================================== - -SYNOPSIS - - use Lingua::KO::Hangul::Util; - - decomposeHangul(0xAC00); - # (0x1100,0x1161) or "\x{1100}\x{1161}" - - composeHangul("\x{1100}\x{1161}"); - # "\x{AC00}" - - getHangulName(0xAC00); - # "HANGUL SYLLABLE GA" - - parseHangulName("HANGUL SYLLABLE GA"); - # 0xAC00 - -INSTALLATION - -To install this module type the following: - - perl Makefile.PL - make - make test - make install - -DEPENDENCIES - -Perl 5.006 or later - -COPYRIGHT AND LICENCE - -SADAHIRO Tomoyuki - - bqw10602@nifty.com - - http://homepage1.nifty.com/nomenclator/perl/ - - Copyright(C) 2001, SADAHIRO Tomoyuki. Japan. All rights reserved. - - This program is free software; you can redistribute it and/or - modify it under the same terms as Perl itself. diff --git a/lib/Lingua/KO/Hangul/Util/t/test.t b/lib/Lingua/KO/Hangul/Util/t/test.t deleted file mode 100644 index d4a5df5bb8..0000000000 --- a/lib/Lingua/KO/Hangul/Util/t/test.t +++ /dev/null @@ -1,55 +0,0 @@ -# Before `make install' is performed this script should be runnable with -# `make test'. After `make install' it should work as `perl test.pl' - -######################### - -use Test; -use strict; -BEGIN { plan tests => 22 }; -use Lingua::KO::Hangul::Util; -ok(1); # If we made it this far, we're ok. - -######################### - -sub unpk { - join ':', map sprintf("%04X", $_), - @_ == 1 ? unpack('U*', shift) : @_; -} - -ok(getHangulName(0xAC00), "HANGUL SYLLABLE GA"); -ok(getHangulName(0xAE00), "HANGUL SYLLABLE GEUL"); -ok(getHangulName(0xC544), "HANGUL SYLLABLE A"); -ok(getHangulName(0xD7A3), "HANGUL SYLLABLE HIH"); -ok(getHangulName(0x11A3), undef); -ok(getHangulName(0x0000), undef); - -ok(unpk(decomposeHangul(0xAC00)), "1100:1161"); -ok(unpk(decomposeHangul(0xAE00)), "1100:1173:11AF"); -ok(unpk(scalar decomposeHangul(0xAC00)), "1100:1161"); -ok(unpk(scalar decomposeHangul(0xAE00)), "1100:1173:11AF"); -ok(scalar decomposeHangul(0x0041), undef); -ok(scalar decomposeHangul(0x0000), undef); - -ok(composeHangul("Hangul \x{1100}\x{1161}\x{1100}\x{1173}\x{11AF}."), - "Hangul \x{AC00}\x{AE00}."); - -ok(parseHangulName("HANGUL SYLLABLE GA"), 0xAC00); -ok(parseHangulName("HANGUL SYLLABLE GEUL"), 0xAE00); -ok(parseHangulName("HANGUL SYLLABLE A"), 0xC544); -ok(parseHangulName("HANGUL SYLLABLE HIH"), 0xD7A3); -ok(parseHangulName("HANGUL SYLLABLE PERL"), undef); -ok(parseHangulName("LATIN LETTER SMALL A"), undef); - -my $ng; - -$ng = 0; -foreach my $i (0xAC00..0xD7A3){ - $ng ++ if $i != parseHangulName(getHangulName($i)); -} -ok($ng, 0); - -$ng = 0; -foreach my $i (0xAC00..0xD7A3){ - $ng ++ if $i != (composeHangul scalar decomposeHangul($i))[0]; -} -ok($ng, 0); diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm index 113613e18f..2ffda37faa 100644 --- a/lib/Unicode/Collate.pm +++ b/lib/Unicode/Collate.pm @@ -4,7 +4,6 @@ use 5.006; use strict; use warnings; use Carp; -use Lingua::KO::Hangul::Util; require Exporter; our $VERSION = '0.08'; @@ -19,6 +18,15 @@ our @EXPORT = (); (our $Path = $INC{'Unicode/Collate.pm'}) =~ s/\.pm$//; our $KeyFile = "allkeys.txt"; +# Lingua::KO::Hangul::Util not part of the standard distribution +# but it will be used if available. + +eval { require Lingua::KO::Hangul::Util }; +my $hasHangulUtil = ! $@; +if ($hasHangulUtil) { + Lingua::KO::Hangul::Util->import(); +} + our %Combin; # combining class from Unicode::Normalize use constant Min2 => 0x20; # minimum weight at level 2 @@ -256,7 +264,10 @@ sub getWt _isHangul($u) ? $hang ? &$hang($u) - : map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) + : ($hasHangulUtil ? + map(@{ $ent->{pack('U', $_)} }, decomposeHangul($u)) : + # runtime compile error... + (eval 'use Lingua::KO::Hangul::Util', print $@)) : _isCJK($u) ? $cjk ? &$cjk($u) : map($self->altCE(0,@$_), _CJK($u)) : map($self->altCE(0,@$_), _derivCE($u)); diff --git a/lib/Unicode/Normalize/Changes b/lib/Unicode/Normalize/Changes deleted file mode 100644 index 910016cb23..0000000000 --- a/lib/Unicode/Normalize/Changes +++ /dev/null @@ -1,16 +0,0 @@ -Revision history for Perl extension Unicode::Normalize. - -0.04 Wed Aug 15 19:02:41 2001 - - fix: NFD("") and NFKD("") must return "", not but undef. - -0.03 Fri Aug 10 22:44:18 2001 - - rename the module name to Unicode::Normalize. - - normalize takes two arguments. - -0.02 Thu Aug 9 22:56:36 2001 - - add function normalize - -0.01 Mon Aug 6 21:45:11 2001 - - original version; created by h2xs 1.21 with options - -A -X -n Text::Unicode::Normalize - diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 2cc0ece98e..0aaccd0c23 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -135,14 +135,26 @@ sub _getcode { return; } -use Lingua::KO::Hangul::Util; +# Lingua::KO::Hangul::Util not part of the standard distribution +# but it will be used if available. + +eval { require Lingua::KO::Hangul::Util }; +my $hasHangulUtil = ! $@; +if ($hasHangulUtil) { + Lingua::KO::Hangul::Util->import(); +} sub hangul_decomp { # internal: called from charinfo - my @tmp = decomposeHangul(shift); - return - @tmp == 2 ? sprintf("%04X %04X", @tmp) : - @tmp == 3 ? sprintf("%04X %04X %04X", @tmp) : - undef; + if ($hasHangulUtil) { + my @tmp = decomposeHangul(shift); + return sprintf("%04X %04X", @tmp) if @tmp == 2; + return sprintf("%04X %04X %04X", @tmp) if @tmp == 3; + } + return; +} + +sub hangul_charname { # internal: called from charinfo + return sprintf("HANGUL SYLLABLE-%04X", shift); } sub han_charname { # internal: called from charinfo @@ -157,7 +169,7 @@ my @CharinfoRanges = ( # CJK Ideographs [ 0x4E00, 0x9FA5, \&han_charname, undef ], # Hangul Syllables - [ 0xAC00, 0xD7A3, \&getHangulName, \&hangul_decomp ], + [ 0xAC00, 0xD7A3, $hasHangulUtil ? \&getHangulName : \&hangul_charname, \&hangul_decomp ], # Non-Private Use High Surrogates [ 0xD800, 0xDB7F, undef, undef ], # Private Use High Surrogates diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 0434eb92d4..e70e104874 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -106,11 +106,11 @@ ok($charinfo->{script}, 'Hebrew'); $charinfo = charinfo(0xAC00); ok($charinfo->{code}, 'AC00'); -ok($charinfo->{name}, 'HANGUL SYLLABLE GA'); +ok($charinfo->{name}, 'HANGUL SYLLABLE-AC00'); ok($charinfo->{category}, 'Lo'); ok($charinfo->{combining}, '0'); ok($charinfo->{bidi}, 'L'); -ok($charinfo->{decomposition}, '1100 1161'); +ok($charinfo->{decomposition}, undef); ok($charinfo->{decimal}, ''); ok($charinfo->{digit}, ''); ok($charinfo->{numeric}, ''); @@ -128,11 +128,11 @@ ok($charinfo->{script}, 'Hangul'); $charinfo = charinfo(0xAE00); ok($charinfo->{code}, 'AE00'); -ok($charinfo->{name}, 'HANGUL SYLLABLE GEUL'); +ok($charinfo->{name}, 'HANGUL SYLLABLE-AE00'); ok($charinfo->{category}, 'Lo'); ok($charinfo->{combining}, '0'); ok($charinfo->{bidi}, 'L'); -ok($charinfo->{decomposition}, '1100 1173 11AF'); +ok($charinfo->{decomposition}, undef); ok($charinfo->{decimal}, ''); ok($charinfo->{digit}, ''); ok($charinfo->{numeric}, ''); diff --git a/win32/Makefile b/win32/Makefile index 5ae84c7d0f..c43b8c8992 100644 --- a/win32/Makefile +++ b/win32/Makefile @@ -592,63 +592,65 @@ PERLDLL_OBJ = $(PERLDLL_OBJ) $(WIN32_OBJ) $(DLL_OBJ) SETARGV_OBJ = setargv$(o) !ENDIF -DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader -SOCKET = $(EXTDIR)\Socket\Socket -FCNTL = $(EXTDIR)\Fcntl\Fcntl -OPCODE = $(EXTDIR)\Opcode\Opcode -SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File -IO = $(EXTDIR)\IO\IO -POSIX = $(EXTDIR)\POSIX\POSIX -ATTRS = $(EXTDIR)\attrs\attrs -THREAD = $(EXTDIR)\Thread\Thread -B = $(EXTDIR)\B\B -RE = $(EXTDIR)\re\re -DUMPER = $(EXTDIR)\Data\Dumper\Dumper -ERRNO = $(EXTDIR)\Errno\Errno -PEEK = $(EXTDIR)\Devel\Peek\Peek -BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader -DPROF = $(EXTDIR)\Devel\DProf\DProf -GLOB = $(EXTDIR)\File\Glob\Glob -HOSTNAME = $(EXTDIR)\Sys\Hostname\Hostname -STORABLE = $(EXTDIR)\Storable\Storable -FILTER = $(EXTDIR)\Filter\Util\Call\Call -ENCODE = $(EXTDIR)\Encode\Encode -MD5 = $(EXTDIR)\Digest\MD5\MD5 -PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar -MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 -TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes -CWD = $(EXTDIR)\Cwd\Cwd -LISTUTIL = $(EXTDIR)\List\Util\Util -PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via -XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap - -SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll -FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll -OPCODE_DLL = $(AUTODIR)\Opcode\Opcode.dll -SDBM_FILE_DLL = $(AUTODIR)\SDBM_File\SDBM_File.dll -IO_DLL = $(AUTODIR)\IO\IO.dll -POSIX_DLL = $(AUTODIR)\POSIX\POSIX.dll -ATTRS_DLL = $(AUTODIR)\attrs\attrs.dll -THREAD_DLL = $(AUTODIR)\Thread\Thread.dll -B_DLL = $(AUTODIR)\B\B.dll -DUMPER_DLL = $(AUTODIR)\Data\Dumper\Dumper.dll -PEEK_DLL = $(AUTODIR)\Devel\Peek\Peek.dll -RE_DLL = $(AUTODIR)\re\re.dll -BYTELOADER_DLL = $(AUTODIR)\ByteLoader\ByteLoader.dll -DPROF_DLL = $(AUTODIR)\Devel\DProf\DProf.dll -GLOB_DLL = $(AUTODIR)\File\Glob\Glob.dll -HOSTNAME_DLL = $(AUTODIR)\Sys\Hostname\Hostname.dll -STORABLE_DLL = $(AUTODIR)\Storable\Storable.dll -FILTER_DLL = $(AUTODIR)\Filter\Util\Call\Call.dll -ENCODE_DLL = $(AUTODIR)\Encode\Encode.dll -MD5_DLL = $(AUTODIR)\Digest\MD5\MD5.dll -PERLIOSCALAR_DLL= $(AUTODIR)\PerlIO\Scalar\Scalar.dll -MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll -TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll -CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll -LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll -PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll -XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll +DYNALOADER = $(EXTDIR)\DynaLoader\DynaLoader +SOCKET = $(EXTDIR)\Socket\Socket +FCNTL = $(EXTDIR)\Fcntl\Fcntl +OPCODE = $(EXTDIR)\Opcode\Opcode +SDBM_FILE = $(EXTDIR)\SDBM_File\SDBM_File +IO = $(EXTDIR)\IO\IO +POSIX = $(EXTDIR)\POSIX\POSIX +ATTRS = $(EXTDIR)\attrs\attrs +THREAD = $(EXTDIR)\Thread\Thread +B = $(EXTDIR)\B\B +RE = $(EXTDIR)\re\re +DUMPER = $(EXTDIR)\Data\Dumper\Dumper +ERRNO = $(EXTDIR)\Errno\Errno +PEEK = $(EXTDIR)\Devel\Peek\Peek +BYTELOADER = $(EXTDIR)\ByteLoader\ByteLoader +DPROF = $(EXTDIR)\Devel\DProf\DProf +GLOB = $(EXTDIR)\File\Glob\Glob +HOSTNAME = $(EXTDIR)\Sys\Hostname\Hostname +STORABLE = $(EXTDIR)\Storable\Storable +FILTER = $(EXTDIR)\Filter\Util\Call\Call +ENCODE = $(EXTDIR)\Encode\Encode +MD5 = $(EXTDIR)\Digest\MD5\MD5 +PERLIOSCALAR = $(EXTDIR)\PerlIO\Scalar\Scalar +MIMEBASE64 = $(EXTDIR)\MIME\Base64\Base64 +TIMEHIRES = $(EXTDIR)\Time\HiRes\HiRes +CWD = $(EXTDIR)\Cwd\Cwd +LISTUTIL = $(EXTDIR)\List\Util\Util +PERLIOVIA = $(EXTDIR)\PerlIO\Via\Via +XSTYPEMAP = $(EXTDIR)\XS\Typemap\Typemap +UNICODENORMALIZE = $(EXTDIR)\Unicode\Normalize\Normalize + +SOCKET_DLL = $(AUTODIR)\Socket\Socket.dll +FCNTL_DLL = $(AUTODIR)\Fcntl\Fcntl.dll +OPCODE_DLL = $(AUTODIR)\Opcode\Opcode.dll +SDBM_FILE_DLL = $(AUTODIR)\SDBM_File\SDBM_File.dll +IO_DLL = $(AUTODIR)\IO\IO.dll +POSIX_DLL = $(AUTODIR)\POSIX\POSIX.dll +ATTRS_DLL = $(AUTODIR)\attrs\attrs.dll +THREAD_DLL = $(AUTODIR)\Thread\Thread.dll +B_DLL = $(AUTODIR)\B\B.dll +DUMPER_DLL = $(AUTODIR)\Data\Dumper\Dumper.dll +PEEK_DLL = $(AUTODIR)\Devel\Peek\Peek.dll +RE_DLL = $(AUTODIR)\re\re.dll +BYTELOADER_DLL = $(AUTODIR)\ByteLoader\ByteLoader.dll +DPROF_DLL = $(AUTODIR)\Devel\DProf\DProf.dll +GLOB_DLL = $(AUTODIR)\File\Glob\Glob.dll +HOSTNAME_DLL = $(AUTODIR)\Sys\Hostname\Hostname.dll +STORABLE_DLL = $(AUTODIR)\Storable\Storable.dll +FILTER_DLL = $(AUTODIR)\Filter\Util\Call\Call.dll +ENCODE_DLL = $(AUTODIR)\Encode\Encode.dll +MD5_DLL = $(AUTODIR)\Digest\MD5\MD5.dll +PERLIOSCALAR_DLL = $(AUTODIR)\PerlIO\Scalar\Scalar.dll +MIMEBASE64_DLL = $(AUTODIR)\MIME\Base64\Base64.dll +TIMEHIRES_DLL = $(AUTODIR)\Time\HiRes\HiRes.dll +CWD_DLL = $(AUTODIR)\Cwd\Cwd.dll +LISTUTIL_DLL = $(AUTODIR)\List\Util\Util.dll +PERLIOVIA_DLL = $(AUTODIR)\PerlIO\Via\Via.dll +XSTYPEMAP_DLL = $(AUTODIR)\XS\Typemap\Typemap.dll +UNICODENORMALIZE_DLL = $(AUTODIR)\Unicode\Normalize\Normalize.dll EXTENSION_C = \ $(SOCKET).c \ @@ -677,7 +679,8 @@ EXTENSION_C = \ $(CWD).c \ $(LISTUTIL).c \ $(PERLIOVIA).c \ - $(XSTYPEMAP).c + $(XSTYPEMAP).c \ + $(UNICODENORMALIZE).c EXTENSION_DLL = \ $(SOCKET_DLL) \ @@ -706,7 +709,8 @@ EXTENSION_DLL = \ $(CWD_DLL) \ $(LISTUTIL_DLL) \ $(PERLIOVIA_DLL) \ - $(XSTYPEMAP_DLL) + $(XSTYPEMAP_DLL) \ + $(UNICODENORMALIZE_DLL) POD2HTML = $(PODDIR)\pod2html POD2MAN = $(PODDIR)\pod2man @@ -968,6 +972,7 @@ distclean: clean -del /f $(LIBDIR)\Scalar\Util.pm -del /f $(LIBDIR)\Time\HiRes.pm -del /f $(LIBDIR)\XS\Typemap.pm + -del /f $(LIBDIR)\Unicode\Normalize.pm -if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO -rmdir /s $(LIBDIR)\IO -if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread diff --git a/win32/makefile.mk b/win32/makefile.mk index bff42335d6..fd2b5ffde3 100644 --- a/win32/makefile.mk +++ b/win32/makefile.mk @@ -748,7 +748,8 @@ SETARGV_OBJ = setargv$(o) DYNAMIC_EXT = Socket IO Fcntl Opcode SDBM_File POSIX attrs Thread B re \ Data/Dumper Devel/Peek ByteLoader Devel/DProf File/Glob \ Sys/Hostname Storable Filter/Util/Call Encode \ - Digest/MD5 PerlIO/Scalar MIME/Base64 Time/HiRes + Digest/MD5 PerlIO/Scalar MIME/Base64 Time/HiRes \ + Unicode/Normalize STATIC_EXT = DynaLoader NONXS_EXT = Errno @@ -1123,6 +1124,7 @@ distclean: clean -del /f $(LIBDIR)\Time\HiRes.pm -del /f $(LIBDIR)\List\Util.pm -del /f $(LIBDIR)\Scalar\Util.pm + -del /f $(LIBDIR)\Unicode\Normalize.pm -if exist $(LIBDIR)\IO rmdir /s /q $(LIBDIR)\IO || rmdir /s $(LIBDIR)\IO -if exist $(LIBDIR)\Thread rmdir /s /q $(LIBDIR)\Thread || rmdir /s $(LIBDIR)\Thread -if exist $(LIBDIR)\B rmdir /s /q $(LIBDIR)\B || rmdir /s $(LIBDIR)\B |