diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-11-02 05:18:45 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-11-02 05:18:45 +0000 |
commit | b4e400f9f0f3998e635cdce0c2d2e790cbe42caa (patch) | |
tree | b638c44345271b6f0f9020e6ccc262fb9c21b4c5 | |
parent | 18e548ce1d77b525add551f80ef335f852cf9389 (diff) | |
download | perl-b4e400f9f0f3998e635cdce0c2d2e790cbe42caa.tar.gz |
Unicode: add ToFold mapping. Not used yet; but basically
a more useful mapping for caseless aka case-ignoring than
doing either lc($a) eq lc($b) or uc($a) eq uc($b); the full
algorithm for creating the foldings uses equivalence classes,
see http://www.unicode.org/unicode/reports/tr21/
Hopefully this feature will be used in //i.
(The folding tables were introduced by #12689.)
p4raw-id: //depot/perl@12807
-rw-r--r-- | embed.h | 2 | ||||
-rwxr-xr-x | embed.pl | 1 | ||||
-rw-r--r-- | embedvar.h | 3 | ||||
-rw-r--r-- | intrpvar.h | 1 | ||||
-rw-r--r-- | perl.c | 2 | ||||
-rw-r--r-- | perlapi.h | 2 | ||||
-rw-r--r-- | sv.c | 1 | ||||
-rw-r--r-- | utf8.c | 13 |
8 files changed, 22 insertions, 3 deletions
@@ -725,6 +725,7 @@ #define to_utf8_lower Perl_to_utf8_lower #define to_utf8_upper Perl_to_utf8_upper #define to_utf8_title Perl_to_utf8_title +#define to_utf8_fold Perl_to_utf8_fold #if defined(UNLINK_ALL_VERSIONS) #define unlnk Perl_unlnk #endif @@ -2238,6 +2239,7 @@ #define to_utf8_lower(a,b,c) Perl_to_utf8_lower(aTHX_ a,b,c) #define to_utf8_upper(a,b,c) Perl_to_utf8_upper(aTHX_ a,b,c) #define to_utf8_title(a,b,c) Perl_to_utf8_title(aTHX_ a,b,c) +#define to_utf8_fold(a,b,c) Perl_to_utf8_fold(aTHX_ a,b,c) #if defined(UNLINK_ALL_VERSIONS) #define unlnk(a) Perl_unlnk(aTHX_ a) #endif @@ -1824,6 +1824,7 @@ Ap |UV |to_utf8_case |U8 *p|U8* ustrp|STRLEN *lenp \ Ap |UV |to_utf8_lower |U8 *p|U8* ustrp|STRLEN *lenp Ap |UV |to_utf8_upper |U8 *p|U8* ustrp|STRLEN *lenp Ap |UV |to_utf8_title |U8 *p|U8* ustrp|STRLEN *lenp +Ap |UV |to_utf8_fold |U8 *p|U8* ustrp|STRLEN *lenp #if defined(UNLINK_ALL_VERSIONS) Ap |I32 |unlnk |char* f #endif diff --git a/embedvar.h b/embedvar.h index 95550e6d60..7748218c6d 100644 --- a/embedvar.h +++ b/embedvar.h @@ -418,6 +418,7 @@ #define PL_utf8_print (PERL_GET_INTERP->Iutf8_print) #define PL_utf8_punct (PERL_GET_INTERP->Iutf8_punct) #define PL_utf8_space (PERL_GET_INTERP->Iutf8_space) +#define PL_utf8_tofold (PERL_GET_INTERP->Iutf8_tofold) #define PL_utf8_tolower (PERL_GET_INTERP->Iutf8_tolower) #define PL_utf8_totitle (PERL_GET_INTERP->Iutf8_totitle) #define PL_utf8_toupper (PERL_GET_INTERP->Iutf8_toupper) @@ -710,6 +711,7 @@ #define PL_utf8_print (vTHX->Iutf8_print) #define PL_utf8_punct (vTHX->Iutf8_punct) #define PL_utf8_space (vTHX->Iutf8_space) +#define PL_utf8_tofold (vTHX->Iutf8_tofold) #define PL_utf8_tolower (vTHX->Iutf8_tolower) #define PL_utf8_totitle (vTHX->Iutf8_totitle) #define PL_utf8_toupper (vTHX->Iutf8_toupper) @@ -1005,6 +1007,7 @@ #define PL_Iutf8_print PL_utf8_print #define PL_Iutf8_punct PL_utf8_punct #define PL_Iutf8_space PL_utf8_space +#define PL_Iutf8_tofold PL_utf8_tofold #define PL_Iutf8_tolower PL_utf8_tolower #define PL_Iutf8_totitle PL_utf8_totitle #define PL_Iutf8_toupper PL_utf8_toupper diff --git a/intrpvar.h b/intrpvar.h index 63c9397c55..2d47c8bcd1 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -385,6 +385,7 @@ PERLVAR(Iutf8_mark, SV *) PERLVAR(Iutf8_toupper, SV *) PERLVAR(Iutf8_totitle, SV *) PERLVAR(Iutf8_tolower, SV *) +PERLVAR(Iutf8_tofold, SV *) PERLVAR(Ilast_swash_hv, HV *) PERLVAR(Ilast_swash_klen, U32) PERLVARA(Ilast_swash_key,10, U8) @@ -666,6 +666,7 @@ perl_destruct(pTHXx) SvREFCNT_dec(PL_utf8_toupper); SvREFCNT_dec(PL_utf8_totitle); SvREFCNT_dec(PL_utf8_tolower); + SvREFCNT_dec(PL_utf8_tofold); PL_utf8_alnum = Nullsv; PL_utf8_alnumc = Nullsv; PL_utf8_ascii = Nullsv; @@ -683,6 +684,7 @@ perl_destruct(pTHXx) PL_utf8_toupper = Nullsv; PL_utf8_totitle = Nullsv; PL_utf8_tolower = Nullsv; + PL_utf8_tofold = Nullsv; if (!specialWARN(PL_compiling.cop_warnings)) SvREFCNT_dec(PL_compiling.cop_warnings); @@ -573,6 +573,8 @@ END_EXTERN_C #define PL_utf8_punct (*Perl_Iutf8_punct_ptr(aTHX)) #undef PL_utf8_space #define PL_utf8_space (*Perl_Iutf8_space_ptr(aTHX)) +#undef PL_utf8_tofold +#define PL_utf8_tofold (*Perl_Iutf8_tofold_ptr(aTHX)) #undef PL_utf8_tolower #define PL_utf8_tolower (*Perl_Iutf8_tolower_ptr(aTHX)) #undef PL_utf8_totitle @@ -10096,6 +10096,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_utf8_toupper = sv_dup_inc(proto_perl->Iutf8_toupper, param); PL_utf8_totitle = sv_dup_inc(proto_perl->Iutf8_totitle, param); PL_utf8_tolower = sv_dup_inc(proto_perl->Iutf8_tolower, param); + PL_utf8_tofold = sv_dup_inc(proto_perl->Iutf8_tofold, param); /* swatch cache */ PL_last_swash_hv = Nullhv; /* reinits on demand */ @@ -1223,21 +1223,28 @@ UV Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); + &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); } UV Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); + &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); } UV Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); + &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); +} + +UV +Perl_to_utf8_fold(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) +{ + return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, + &PL_utf8_tofold, "ToFold", "utf8::ToSpecFold"); } /* a "swash" is a swatch hash */ |