summaryrefslogtreecommitdiff
path: root/ext/Unicode
diff options
context:
space:
mode:
authorRafael Garcia-Suarez <rgarciasuarez@gmail.com>2004-06-08 18:42:29 +0000
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2004-06-08 18:42:29 +0000
commita092bcfd0f738c1ea0511ee4fa058ee96e488fcb (patch)
tree76e8d72a7cebd201550e3be7279a0ca2156d9b67 /ext/Unicode
parent7687bb236c3854f2aa4d6ffb59b02bfa3d417c57 (diff)
downloadperl-a092bcfd0f738c1ea0511ee4fa058ee96e488fcb.tar.gz
Upgrade to Unicode::Normalize 0.30.
p4raw-id: //depot/perl@22911
Diffstat (limited to 'ext/Unicode')
-rw-r--r--ext/Unicode/Normalize/Changes4
-rw-r--r--ext/Unicode/Normalize/Normalize.pm10
-rw-r--r--ext/Unicode/Normalize/Normalize.xs131
-rw-r--r--ext/Unicode/Normalize/t/illegal.t9
-rw-r--r--ext/Unicode/Normalize/t/short.t15
-rw-r--r--ext/Unicode/Normalize/t/split.t8
6 files changed, 85 insertions, 92 deletions
diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes
index e63656c351..bb1b6930e0 100644
--- a/ext/Unicode/Normalize/Changes
+++ b/ext/Unicode/Normalize/Changes
@@ -1,5 +1,9 @@
Revision history for Perl extension Unicode::Normalize.
+0.30 Sun May 2 14:35:00 2004
+ - XSUB: (perl 5.8.1 or later) improved utf8 upgrade of non-POK (private POK)
+ values like tied scalars, overloaded objects, etc.
+
0.28 Sat Nov 22 23:46:24 2003
- XSUB: even if string contains a malformed, "short" Unicode character,
decompose() and reorder() will be safe. Garbage will be no longer added.
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm
index ef8ec68ef9..09ef371cc8 100644
--- a/ext/Unicode/Normalize/Normalize.pm
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -13,7 +13,7 @@ use Carp;
no warnings 'utf8';
-our $VERSION = '0.28';
+our $VERSION = '0.30';
our $PACKAGE = __PACKAGE__;
require Exporter;
@@ -117,6 +117,8 @@ Unicode::Normalize - Unicode Normalization Forms
=head1 SYNOPSIS
+(1) using function names exported by default:
+
use Unicode::Normalize;
$NFD_string = NFD($string); # Normalization Form D
@@ -124,7 +126,7 @@ Unicode::Normalize - Unicode Normalization Forms
$NFKD_string = NFKD($string); # Normalization Form KD
$NFKC_string = NFKC($string); # Normalization Form KC
- or
+(2) using function names exported on request:
use Unicode::Normalize 'normalize';
@@ -382,11 +384,11 @@ C<normalize> and other some functions: on request.
=head1 AUTHOR
-SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>
+SADAHIRO Tomoyuki <SADAHIRO@cpan.org>
http://homepage1.nifty.com/nomenclator/perl/
- Copyright(C) 2001-2003, SADAHIRO Tomoyuki. Japan. All rights reserved.
+ Copyright(C) 2001-2004, SADAHIRO Tomoyuki. Japan. All rights reserved.
This module is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
diff --git a/ext/Unicode/Normalize/Normalize.xs b/ext/Unicode/Normalize/Normalize.xs
index 2ce7cbcc98..13544c9240 100644
--- a/ext/Unicode/Normalize/Normalize.xs
+++ b/ext/Unicode/Normalize/Normalize.xs
@@ -182,34 +182,44 @@ static void sv_cat_uvuni (SV* sv, UV uv)
sv_catpvn(sv, (char *)tmp, t - tmp);
}
+static char * sv_2pvunicode(SV *sv, STRLEN *lp)
+{
+ char *s;
+ STRLEN len;
+ s = (char*)SvPV(sv,len);
+ if (!SvUTF8(sv)) {
+ SV* tmpsv = sv_mortalcopy(sv);
+ if (!SvPOK(tmpsv))
+ (void)sv_pvn_force(tmpsv,&len);
+ sv_utf8_upgrade(tmpsv);
+ s = (char*)SvPV(tmpsv,len);
+ }
+ *lp = len;
+ return s;
+}
+
MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize
SV*
-decompose(arg, compat = &PL_sv_no)
- SV * arg
+decompose(src, compat = &PL_sv_no)
+ SV * src
SV * compat
PROTOTYPE: $;$
PREINIT:
- UV uv;
- SV *src, *dst;
+ SV *dst;
STRLEN srclen, retlen;
U8 *s, *e, *p, *r;
+ UV uv;
bool iscompat;
CODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
iscompat = SvTRUE(compat);
+ s = (U8*)sv_2pvunicode(src,&srclen);
+ e = s + srclen;
dst = newSV(1);
(void)SvPOK_only(dst);
SvUTF8_on(dst);
- s = (U8*)SvPV(src,srclen);
- e = s + srclen;
for (p = s; p < e; p += retlen) {
uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
if (!retlen)
@@ -232,11 +242,11 @@ decompose(arg, compat = &PL_sv_no)
SV*
-reorder(arg)
- SV * arg
+reorder(src)
+ SV * src
PROTOTYPE: $
PREINIT:
- SV *src, *dst;
+ SV *dst;
STRLEN srclen, dstlen, retlen, stk_cc_max;
U8 *s, *e, *p, *d, curCC;
UV uv, uvlast;
@@ -244,15 +254,9 @@ reorder(arg)
STRLEN i, cc_pos;
bool valid_uvlast;
CODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
-
- s = (U8*)SvPV(src, srclen);
+ s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
+
dstlen = srclen + 1;
dst = newSV(dstlen);
(void)SvPOK_only(dst);
@@ -326,27 +330,21 @@ reorder(arg)
SV*
-compose(arg)
- SV * arg
+compose(src)
+ SV * src
PROTOTYPE: $
ALIAS:
composeContiguous = 1
PREINIT:
- SV *src, *dst, *tmp;
+ SV *dst, *tmp;
U8 *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
UV uv, uvS, uvComp;
STRLEN srclen, dstlen, tmplen, retlen;
bool beginning = TRUE;
CODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
-
- s = (U8*)SvPV(src, srclen);
+ s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
+
dstlen = srclen + 1;
dst = newSV(dstlen);
(void)SvPOK_only(dst);
@@ -429,25 +427,17 @@ compose(arg)
void
-checkNFD(arg)
- SV * arg
+checkNFD(src)
+ SV * src
PROTOTYPE: $
ALIAS:
checkNFKD = 1
PREINIT:
- UV uv;
- SV *src;
STRLEN srclen, retlen;
U8 *s, *e, *p, curCC, preCC;
+ UV uv;
CODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
-
- s = (U8*)SvPV(src,srclen);
+ s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
preCC = 0;
@@ -468,26 +458,18 @@ checkNFD(arg)
void
-checkNFC(arg)
- SV * arg
+checkNFC(src)
+ SV * src
PROTOTYPE: $
ALIAS:
checkNFKC = 1
PREINIT:
- UV uv;
- SV *src;
STRLEN srclen, retlen;
U8 *s, *e, *p, curCC, preCC;
+ UV uv;
bool isMAYBE;
CODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
-
- s = (U8*)SvPV(src,srclen);
+ s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
preCC = 0;
@@ -528,27 +510,19 @@ checkNFC(arg)
void
-checkFCD(arg)
- SV * arg
+checkFCD(src)
+ SV * src
PROTOTYPE: $
ALIAS:
checkFCC = 1
PREINIT:
- UV uv, uvLead, uvTrail;
- SV *src;
STRLEN srclen, retlen, canlen, canret;
U8 *s, *e, *p, curCC, preCC;
+ UV uv, uvLead, uvTrail;
U8 *sCan, *pCan, *eCan;
bool isMAYBE;
CODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
-
- s = (U8*)SvPV(src,srclen);
+ s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
preCC = 0;
@@ -709,22 +683,15 @@ getCanon(uv)
void
-splitOnLastStarter(arg)
- SV * arg
+splitOnLastStarter(src)
+ SV * src
PREINIT:
- UV uv;
- SV *src, *svp;
+ SV *svp;
STRLEN srclen, retlen;
U8 *s, *e, *p;
+ UV uv;
PPCODE:
- if (SvUTF8(arg)) {
- src = arg;
- } else {
- src = sv_mortalcopy(arg);
- sv_utf8_upgrade(src);
- }
-
- s = (U8*)SvPV(src,srclen);
+ s = (U8*)sv_2pvunicode(src,&srclen);
e = s + srclen;
for (p = e; s < p; ) {
diff --git a/ext/Unicode/Normalize/t/illegal.t b/ext/Unicode/Normalize/t/illegal.t
index 76cd83302b..9d18aadbb6 100644
--- a/ext/Unicode/Normalize/t/illegal.t
+++ b/ext/Unicode/Normalize/t/illegal.t
@@ -14,9 +14,16 @@ BEGIN {
}
}
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
#########################
-use 5.006001;
use Test;
use strict;
use warnings;
diff --git a/ext/Unicode/Normalize/t/short.t b/ext/Unicode/Normalize/t/short.t
index 1f185acb18..d799f4a096 100644
--- a/ext/Unicode/Normalize/t/short.t
+++ b/ext/Unicode/Normalize/t/short.t
@@ -14,6 +14,14 @@ BEGIN {
}
}
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
#########################
use strict;
@@ -26,11 +34,8 @@ print "ok 1\n";
no warnings qw(utf8);
-our $a = "\x{3042}"; # 3-byte length (in UTF-8/UTF-EBCDIC)
-{
- use bytes;
- substr($a,1,length($a), ''); # remove trailing octets
-}
+# U+3042 is 3-byte length (in UTF-8/UTF-EBCDIC)
+our $a = pack 'U0C', unpack 'C', "\x{3042}";
print NFD($a) eq "\0"
? "ok" : "not ok", " 2\n";
diff --git a/ext/Unicode/Normalize/t/split.t b/ext/Unicode/Normalize/t/split.t
index 03b599e123..6bf7ff66b1 100644
--- a/ext/Unicode/Normalize/t/split.t
+++ b/ext/Unicode/Normalize/t/split.t
@@ -14,6 +14,14 @@ BEGIN {
}
}
+BEGIN {
+ unless (5.006001 <= $]) {
+ print "1..0 # skipped: Perl 5.6.1 or later".
+ " needed for this test\n";
+ exit;
+ }
+}
+
#########################
use Test;