diff options
author | Nicholas Clark <nick@ccl4.org> | 2003-11-29 17:55:32 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2003-11-29 17:55:32 +0000 |
commit | e524f5b2f1cd60b17ac79893058830214d3840a4 (patch) | |
tree | c6bdbed985cbb289670b99046be33d0d901cdb9c | |
parent | 72fcecc52baa353d60255cacfa57be92e76f59d2 (diff) | |
download | perl-e524f5b2f1cd60b17ac79893058830214d3840a4.tar.gz |
Update Unicode::Normalize to 0.28
p4raw-id: //depot/perl@21812
-rw-r--r-- | MANIFEST | 3 | ||||
-rw-r--r-- | ext/Unicode/Normalize/Changes | 140 | ||||
-rw-r--r-- | ext/Unicode/Normalize/Normalize.pm | 10 | ||||
-rw-r--r-- | ext/Unicode/Normalize/Normalize.xs | 109 | ||||
-rw-r--r-- | ext/Unicode/Normalize/README | 9 | ||||
-rw-r--r-- | ext/Unicode/Normalize/t/illegal.t | 66 | ||||
-rw-r--r-- | ext/Unicode/Normalize/t/null.t | 49 | ||||
-rw-r--r-- | ext/Unicode/Normalize/t/short.t | 55 |
8 files changed, 325 insertions, 116 deletions
@@ -762,8 +762,11 @@ ext/Unicode/Normalize/README Unicode::Normalize ext/Unicode/Normalize/t/fcdc.t Unicode::Normalize ext/Unicode/Normalize/t/form.t Unicode::Normalize ext/Unicode/Normalize/t/func.t Unicode::Normalize +ext/Unicode/Normalize/t/illegal.t Unicode::Normalize ext/Unicode/Normalize/t/norm.t Unicode::Normalize +ext/Unicode/Normalize/t/null.t Unicode::Normalize ext/Unicode/Normalize/t/proto.t Unicode::Normalize +ext/Unicode/Normalize/t/short.t Unicode::Normalize ext/Unicode/Normalize/t/split.t Unicode::Normalize ext/Unicode/Normalize/t/test.t Unicode::Normalize ext/util/make_ext Used by Makefile to execute extension Makefiles diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes index 74b87e71e1..e63656c351 100644 --- a/ext/Unicode/Normalize/Changes +++ b/ext/Unicode/Normalize/Changes @@ -1,112 +1,128 @@ Revision history for Perl extension Unicode::Normalize. +0.28 Sat Nov 22 23:46:24 2003 + - XSUB: even if string contains a malformed, "short" Unicode character, + decompose() and reorder() will be safe. Garbage will be no longer added. + - added null.t and short.t. + - now truely added illegal.t (in 0.27, forgot to change MANIFEST). + +0.27 Sun Nov 16 13:16:21 2003 + - Illegal code points (surrogate and noncharacter) will be allowed + (keep your code with <no warnings 'utf8';>); + but porting is not successful in the case of ((Pure Perl) and + (Perl 5.7.3 or before)). + - added illegal.t. + +0.26 Sat Nov 15 21:52:30 2003 + - doc fix: s/FCD(?= is unique)/FCC/; + 0.25 Mon Oct 6 22:26:03 2003 - - added form.t and proto.t. + - added form.t and proto.t. 0.24 Sat Oct 4 17:57:10 2003 - - supports FCD and FCC (UTN #5): - FCD(), normalize('FCD'), checkFCD(), check('FCD'); - FCC(), normalize('FCC'), checkFCC(), check('FCC'). - - changed INSTALLATION (cf. README). - * Initial state of the distribution is changed to XSUB. To build - pure Perl, type <perl disableXS> before <perl Makefile.PL>. - * The purePerl-XSUB converter is now provided as two perl - script files, named "enableXS" and "disableXS". - (no longer <perl Makefile.PL xs> and <perl Makefile.PL noxs>.) - * simplified Makefile.PL. - - added fcdc.t and split.t. + - supports FCD and FCC (UTN #5): + FCD(), normalize('FCD'), checkFCD(), check('FCD'); + FCC(), normalize('FCC'), checkFCC(), check('FCC'). + - changed INSTALLATION (cf. README). + * Initial state of the distribution is changed to XSUB. To build + pure Perl, type <perl disableXS> before <perl Makefile.PL>. + * The purePerl-XSUB converter is now provided as two perl + script files, named "enableXS" and "disableXS". + (no longer <perl Makefile.PL xs> and <perl Makefile.PL noxs>.) + * simplified Makefile.PL. + - added fcdc.t and split.t. 0.23 Sat Jun 28 20:38:10 2003 - - bug fix: \0-terminate in compose() in XS. - - tweak in pure perl: forced $codepoint to numeric (i.e. "+0065" to 65) - - tweak of POD and README. + - bug fix: \0-terminate in compose() in XS. + - tweak in pure perl: forced $codepoint to numeric (i.e. "+0065" to 65) + - tweak of POD and README. 0.22 Mon Jun 09 22:23:10 2003 - - internal tweak (again): pack_U() and unpack_U(). + - internal tweak (again): pack_U() and unpack_U(). 0.21 Thu Apr 02 23:12:54 2003 - - internal tweak: for (?un)pack 'U'. + - internal tweak: for (?un)pack 'U'. 0.20 Sun Mar 02 13:29:25 2003 - - decompose Hangul syllables in a decomposition mapping. + - decompose Hangul syllables in a decomposition mapping. 0.18 ... unreleased - - synchronization with bleadperl. - - Change 16262: by me + - synchronization with bleadperl. + - Change 16262: by me 0.17 Sun Apr 28 23:13:32 2002 - - now normalize('NFC',$1) should work. - - Some croak()'s are added in mkheader. - - synchronization with bleadperl. - - Change 15596: by me - - Change 16136: by pudge + - now normalize('NFC',$1) should work. + - Some croak()'s are added in mkheader. + - synchronization with bleadperl. + - Change 15596: by me + - Change 16136: by pudge 0.16 Thu Mar 21 13:36:14 2002 - - synchronization with bleadperl. - - Change 15318: by jhi - - Change 15319: by jhi + - synchronization with bleadperl. + - Change 15318: by jhi + - Change 15319: by jhi 0.15 Tue Mar 19 22:04:07 2002 - - Quick check is implemented. - - decompose(), reorder(), and compose() are documented. - - The Non-XS version is also independent of Lingua::KO::Hangul::Util. + - Quick check is implemented. + - decompose(), reorder(), and compose() are documented. + - The Non-XS version is also independent of Lingua::KO::Hangul::Util. 0.14 Sat Feb 02 20:40:14 2002 - - synchronization with bleadperl. - - Change 14128: by Arthur - - Change 14129: by jhi - - Change 14156: - - Change 14199: by Nikola Knezevic - - Change 14308: by Benjamin Goldberg - - Change 14370: by jhi + - synchronization with bleadperl. + - Change 14128: by Arthur + - Change 14129: by jhi + - Change 14156: + - Change 14199: by Nikola Knezevic + - Change 14308: by Benjamin Goldberg + - Change 14370: by jhi 0.13 Sat Dec 01 11:42:43 2001 - - modify Makefile.PL to enable rebuild. - (This problem is pointed out by David Dyck.) - - Change 13388: by Jarkko Hietaniemi. + - modify Makefile.PL to enable rebuild. + (This problem is pointed out by David Dyck.) + - Change 13388: by Jarkko Hietaniemi. 0.12 Wed Nov 29 22:49:02 2001 - - documentation in .pod is appended to .pm and the .pod is removed. - (only POD in NON-XS refers to Lingua::KO::Hangul::Util.) + - documentation in .pod is appended to .pm and the .pod is removed. + (only POD in NON-XS refers to Lingua::KO::Hangul::Util.) 0.11 Sat Nov 24 10:18:38 2001 - - documentation of some functions for character data. - - Change 12909: by Jarkko Hietaniemi. - - Change 13228: by Peter Prymmer. + - documentation of some functions for character data. + - Change 12909: by Jarkko Hietaniemi. + - Change 13228: by Peter Prymmer. 0.10 Sat Nov 03 16:30:20 2001 - - The XS version is now independent of Lingua::KO::Hangul::Util. - (though the Non-XS version still requires that.) + - The XS version is now independent of Lingua::KO::Hangul::Util. + (though the Non-XS version still requires that.) 0.09 Fri Nov 02 22:39:30 2001 - - remove pTHX_. + - remove pTHX_. 0.08 Thu Nov 01 23:20:42 2001 - - use Lingua::KO::Hangul::Util 0.06 and remove "hangul.h". + - use Lingua::KO::Hangul::Util 0.06 and remove "hangul.h". 0.07 Wed Oct 31 22:06:42 2001 - - modify internal. decompose() - reorder() - compose(). + - modify internal. decompose() - reorder() - compose(). 0.06 Sun Oct 28 14:28:46 2001 - - an XS version. - (but the Non-XS version is also supported.) + - an XS version. + (but the Non-XS version is also supported.) 0.05 Wed Oct 10 22:02:15 2001 (not released) - - %Compos contains unnecessary singletons - (though it did not cause any bug, only useless). - They will not be stored. + - %Compos contains unnecessary singletons + (though it did not cause any bug, only useless). + They will not be stored. 0.04 Wed Aug 15 19:02:41 2001 - - fix: NFD("") and NFKD("") must return "", not but undef. + - fix: NFD("") and NFKD("") must return "", not but undef. 0.03 Fri Aug 10 22:44:18 2001 - - rename the module name to Unicode::Normalize. - - normalize takes two arguments. + - rename the module name to Unicode::Normalize. + - normalize takes two arguments. 0.02 Thu Aug 9 22:56:36 2001 - - add function normalize + - add function normalize 0.01 Mon Aug 6 21:45:11 2001 - - original version; created by h2xs 1.21 with options - -A -X -n Text::Unicode::Normalize + - original version; created by h2xs 1.21 with options + -A -X -n Text::Unicode::Normalize diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm index 704303dec7..ef8ec68ef9 100644 --- a/ext/Unicode/Normalize/Normalize.pm +++ b/ext/Unicode/Normalize/Normalize.pm @@ -11,7 +11,9 @@ use strict; use warnings; use Carp; -our $VERSION = '0.25'; +no warnings 'utf8'; + +our $VERSION = '0.28'; our $PACKAGE = __PACKAGE__; require Exporter; @@ -180,7 +182,7 @@ each other. C<FCD()> will return one of these equivalent forms. returns the FCC form ("Fast C Contiguous"; cf. UTN #5). -Note: FCD is unique, as well as four normalization forms (NF*). +Note: FCC is unique, as well as four normalization forms (NF*). =item C<$normalized_string = normalize($form_name, $string)> @@ -277,7 +279,7 @@ returns C<YES> (C<1>) or C<NO> (C<empty string>). returns C<YES> (C<1>), C<NO> (C<empty string>), or C<MAYBE> (C<undef>). -If a string is not in C<FCD>, it must not be in <FCC>. +If a string is not in FCD, it must not be in FCC. So C<checkFCC($not_FCD_string)> should return C<NO>. =item C<$result = check($form_name, $string)> @@ -393,7 +395,7 @@ SADAHIRO Tomoyuki, <SADAHIRO@cpan.org> =over 4 -=item http://www.unicode.org/unicode/reports/tr15/ +=item http://www.unicode.org/reports/tr15/ Unicode Normalization Forms - UAX #15 diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs index 04d02563a0..2ce7cbcc98 100644 --- a/ext/Unicode/Normalize/Normalize.xs +++ b/ext/Unicode/Normalize/Normalize.xs @@ -20,7 +20,14 @@ #define utf8n_to_uvuni utf8_to_uv #endif /* utf8n_to_uvuni */ -/* if utf8n_to_uvuni() sets retlen to 0 when flags = 0 */ +/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ +#ifdef UTF8_ALLOW_BOM +#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FFFF) +#else +#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF) +#endif + +/* if utf8n_to_uvuni() sets retlen to 0 (?) */ #define ErrRetlenIsZero "panic (Unicode::Normalize): zero-length character" /* utf8_hop() hops back before start. Maybe broken UTF-8 */ @@ -64,7 +71,7 @@ typedef struct { STRLEN pos; /* position */ } UNF_cc; -int compare_cc (const void *a, const void *b) +static int compare_cc (const void *a, const void *b) { int ret_cc; ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc; @@ -75,7 +82,7 @@ int compare_cc (const void *a, const void *b) - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos ); } -U8* dec_canonical (UV uv) +static U8* dec_canonical (UV uv) { U8 ***plane, **row; if (OVER_UTF_MAX(uv)) @@ -87,7 +94,7 @@ U8* dec_canonical (UV uv) return row ? row[uv & 0xff] : NULL; } -U8* dec_compat (UV uv) +static U8* dec_compat (UV uv) { U8 ***plane, **row; if (OVER_UTF_MAX(uv)) @@ -99,7 +106,7 @@ U8* dec_compat (UV uv) return row ? row[uv & 0xff] : NULL; } -UV composite_uv (UV uv, UV uv2) +static UV composite_uv (UV uv, UV uv2) { UNF_complist ***plane, **row, *cell, *i; @@ -131,7 +138,7 @@ UV composite_uv (UV uv, UV uv2) return 0; } -U8 getCombinClass (UV uv) +static U8 getCombinClass (UV uv) { U8 **plane, *row; if (OVER_UTF_MAX(uv)) @@ -143,7 +150,7 @@ U8 getCombinClass (UV uv) return row ? row[uv & 0xff] : 0; } -void sv_cat_decompHangul (SV* sv, UV uv) +static void sv_cat_decompHangul (SV* sv, UV uv) { UV sindex, lindex, vindex, tindex; U8 *t, tmp[3 * UTF8_MAXLEN + 1]; @@ -162,7 +169,17 @@ void sv_cat_decompHangul (SV* sv, UV uv) if (tindex) t = uvuni_to_utf8(t, (tindex + Hangul_TBase)); *t = '\0'; - sv_catpvn(sv, (char *)tmp, strlen((char *)tmp)); + sv_catpvn(sv, (char *)tmp, t - tmp); +} + +static void sv_cat_uvuni (SV* sv, UV uv) +{ + U8 *t, tmp[UTF8_MAXLEN + 1]; + + t = tmp; + t = uvuni_to_utf8(t, uv); + *t = '\0'; + sv_catpvn(sv, (char *)tmp, t - tmp); } MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize @@ -194,7 +211,7 @@ decompose(arg, compat = &PL_sv_no) s = (U8*)SvPV(src,srclen); e = s + srclen; for (p = s; p < e; p += retlen) { - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); @@ -205,7 +222,7 @@ decompose(arg, compat = &PL_sv_no) if (r) sv_catpv(dst, (char *)r); else - sv_catpvn(dst, (char *)p, retlen); + sv_cat_uvuni(dst, uv); } } RETVAL = dst; @@ -222,8 +239,10 @@ reorder(arg) SV *src, *dst; STRLEN srclen, dstlen, retlen, stk_cc_max; U8 *s, *e, *p, *d, curCC; - UV uv; + UV uv, uvlast; UNF_cc * stk_cc; + STRLEN i, cc_pos; + bool valid_uvlast; CODE: if (SvUTF8(arg)) { src = arg; @@ -233,49 +252,46 @@ reorder(arg) } s = (U8*)SvPV(src, srclen); - + e = s + srclen; dstlen = srclen + 1; dst = newSV(dstlen); - sv_setpvn(dst,(const char*)s,srclen); + (void)SvPOK_only(dst); SvUTF8_on(dst); + d = (U8*)SvPVX(dst); stk_cc_max = 10; /* enough as an initial value? */ New(0, stk_cc, stk_cc_max, UNF_cc); - d = (U8*)SvPV(dst,dstlen); - e = d + dstlen; - - for (p = d; p < e;) { - U8 *cc_in; - STRLEN cc_len, cc_iter, cc_pos; - - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + for (p = s; p < e;) { + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); p += retlen; - - curCC = getCombinClass(uv); - if (! (curCC && p < e)) + if (curCC == 0) { + d = uvuni_to_utf8(d, uv); continue; - else - cc_in = p - retlen; + } cc_pos = 0; stk_cc[cc_pos].cc = curCC; stk_cc[cc_pos].uv = uv; stk_cc[cc_pos].pos = cc_pos; + valid_uvlast = FALSE; while (p < e) { - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); p += retlen; curCC = getCombinClass(uv); - if (!curCC) + if (curCC == 0) { + uvlast = uv; + valid_uvlast = TRUE; break; + } cc_pos++; if (stk_cc_max <= cc_pos) { /* extend if need */ @@ -287,18 +303,21 @@ reorder(arg) stk_cc[cc_pos].pos = cc_pos; } - /* only one c.c. in cc_len from cc_in, no need of reordering */ - if (!cc_pos) - continue; - - qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc); + /* reordered if there are two c.c.'s */ + if (cc_pos) { + qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc); + } - cc_len = p - cc_in; - p = cc_in; - for (cc_iter = 0; cc_iter <= cc_pos; cc_iter++) { - p = uvuni_to_utf8(p, stk_cc[cc_iter].uv); + for (i = 0; i <= cc_pos; i++) { + d = uvuni_to_utf8(d, stk_cc[i].uv); + } + if (valid_uvlast) + { + d = uvuni_to_utf8(d, uvlast); } } + *d = '\0'; + SvCUR_set(dst, d - (U8*)SvPVX(dst)); Safefree(stk_cc); RETVAL = dst; OUTPUT: @@ -341,7 +360,7 @@ compose(arg) for (p = s; p < e;) { if (beginning) { - uvS = utf8n_to_uvuni(p, e - p, &retlen, 0); + uvS = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); p += retlen; @@ -359,7 +378,7 @@ compose(arg) /* to the next Starter */ while (p < e) { - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); p += retlen; @@ -433,7 +452,7 @@ checkNFD(arg) preCC = 0; for (p = s; p < e; p += retlen) { - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); @@ -474,7 +493,7 @@ checkNFC(arg) preCC = 0; isMAYBE = FALSE; for (p = s; p < e; p += retlen) { - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); @@ -535,7 +554,7 @@ checkFCD(arg) preCC = 0; isMAYBE = FALSE; for (p = s; p < e; p += retlen) { - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (!retlen) croak(ErrRetlenIsZero); @@ -543,7 +562,7 @@ checkFCD(arg) if (sCan) { canlen = (STRLEN)strlen((char *) sCan); - uvLead = utf8n_to_uvuni(sCan, canlen, &canret, 0); + uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF); } else { uvLead = uv; @@ -566,7 +585,7 @@ checkFCD(arg) pCan = utf8_hop(eCan, -1); if (pCan < sCan) croak(ErrHopBeforeStart); - uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, 0); + uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, AllowAnyUTF); preCC = getCombinClass(uvTrail); } else { @@ -712,7 +731,7 @@ splitOnLastStarter(arg) p = utf8_hop(p, -1); if (p < s) croak(ErrHopBeforeStart); - uv = utf8n_to_uvuni(p, e - p, &retlen, 0); + uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); if (getCombinClass(uv) == 0) /* Last Starter found */ break; } diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README index 4fbdfe19d5..34e24e3219 100644 --- a/ext/Unicode/Normalize/README +++ b/ext/Unicode/Normalize/README @@ -1,4 +1,4 @@ -Unicode/Normalize version 0.25 +Unicode/Normalize version 0.28 =================================== Unicode::Normalize - Unicode Normalization Forms @@ -24,7 +24,8 @@ SYNOPSIS INSTALLATION -Perl 5.6.1 or later (Caution: Perl 5.6.0 is not recommended.) +Perl 5.6.1 or later. +(Perl 5.8.0 or later is recommended.) To install this module (XSUB: needs a C compiler), type the following: @@ -85,9 +86,7 @@ CAVEAT COPYRIGHT AND LICENCE - SADAHIRO Tomoyuki - - SADAHIRO@cpan.org + SADAHIRO Tomoyuki <SADAHIRO@cpan.org> http://homepage1.nifty.com/nomenclator/perl/ diff --git a/ext/Unicode/Normalize/t/illegal.t b/ext/Unicode/Normalize/t/illegal.t new file mode 100644 index 0000000000..76cd83302b --- /dev/null +++ b/ext/Unicode/Normalize/t/illegal.t @@ -0,0 +1,66 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use 5.006001; +use Test; +use strict; +use warnings; + +BEGIN { + use Unicode::Normalize qw(:all); + + unless (exists &Unicode::Normalize::bootstrap or 5.008 <= $]) { + print "1..0 # skipped: XSUB, or Perl 5.8.0 or later". + " needed for this test\n"; + print $@; + exit; + } +} + +BEGIN { plan tests => 112 }; + +######################### + +no warnings qw(utf8); +# To avoid warning in Test.pm, EXPR in ok(EXPR) must be boolean. + +for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF, + 0x1FFFF, 0x10FFFF, 0x110000, 0x7FFFFFFF) +{ + my $c = chr $u; + ok($c eq NFD($c)); # 1 + ok($c eq NFC($c)); # 2 + ok($c eq NFKD($c)); # 3 + ok($c eq NFKC($c)); # 4 + ok($c eq FCD($c)); # 5 + ok($c eq FCC($c)); # 6 + ok($c eq decompose($c)); # 7 + ok($c eq decompose($c,1)); # 8 + ok($c eq reorder($c)); # 9 + ok($c eq compose($c)); # 10 +} + +our $proc; # before the last starter +our $unproc; # the last starter and after + +sub _pack_U { Unicode::Normalize::pack_U(@_) } + +($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF)); +ok($proc eq _pack_U(0x41, 0x300, 0x327)); +ok($unproc eq "\x{FFFF}"); + diff --git a/ext/Unicode/Normalize/t/null.t b/ext/Unicode/Normalize/t/null.t new file mode 100644 index 0000000000..ae75752480 --- /dev/null +++ b/ext/Unicode/Normalize/t/null.t @@ -0,0 +1,49 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use strict; +use warnings; + +use Unicode::Normalize qw(:all); +print "1..8\n"; + +print "ok 1\n"; + +# if $_ is not NULL-terminated, test may fail. + +$_ = compose('abc'); +print /c$/ ? "ok" : "not ok", " 2\n"; + +$_ = decompose('abc'); +print /c$/ ? "ok" : "not ok", " 3\n"; + +$_ = reorder('abc'); +print /c$/ ? "ok" : "not ok", " 4\n"; + +$_ = NFD('abc'); +print /c$/ ? "ok" : "not ok", " 5\n"; + +$_ = NFC('abc'); +print /c$/ ? "ok" : "not ok", " 6\n"; + +$_ = NFKD('abc'); +print /c$/ ? "ok" : "not ok", " 7\n"; + +$_ = NFKC('abc'); +print /c$/ ? "ok" : "not ok", " 8\n"; + diff --git a/ext/Unicode/Normalize/t/short.t b/ext/Unicode/Normalize/t/short.t new file mode 100644 index 0000000000..1f185acb18 --- /dev/null +++ b/ext/Unicode/Normalize/t/short.t @@ -0,0 +1,55 @@ + +BEGIN { + unless ("A" eq pack('U', 0x41)) { + print "1..0 # Unicode::Normalize " . + "cannot stringify a Unicode code point\n"; + exit 0; + } +} + +BEGIN { + if ($ENV{PERL_CORE}) { + chdir('t') if -d 't'; + @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); + } +} + +######################### + +use strict; +use Unicode::Normalize qw(:all); + +print "1..8\n"; +print "ok 1\n"; + +######################### + +no warnings qw(utf8); + +our $a = "\x{3042}"; # 3-byte length (in UTF-8/UTF-EBCDIC) +{ + use bytes; + substr($a,1,length($a), ''); # remove trailing octets +} + +print NFD($a) eq "\0" + ? "ok" : "not ok", " 2\n"; + +print NFKD($a) eq "\0" + ? "ok" : "not ok", " 3\n"; + +print NFC($a) eq "\0" + ? "ok" : "not ok", " 4\n"; + +print NFKC($a) eq "\0" + ? "ok" : "not ok", " 5\n"; + +print decompose($a) eq "\0" + ? "ok" : "not ok", " 6\n"; + +print reorder($a) eq "\0" + ? "ok" : "not ok", " 7\n"; + +print compose($a) eq "\0" + ? "ok" : "not ok", " 8\n"; + |