diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-05-03 10:12:00 -0600 |
---|---|---|
committer | Jesse Vincent <jesse@bestpractical.com> | 2011-05-03 17:14:06 -0400 |
commit | 36bb2ab64fa2ef022d7870082c0dcc6db902c86e (patch) | |
tree | bfee85e71c3f52ac7076a8148880d5a3a62d693c | |
parent | 7bee57f6d73fe07d73dd9b4a538e4ffd60c7eed8 (diff) | |
download | perl-36bb2ab64fa2ef022d7870082c0dcc6db902c86e.tar.gz |
utf8.c: Add _flags version of to_utf8_fold()
And also to_uni_fold().
The flag allows retrieving either simple or full folds.
The interface is subject to change, so these are marked experimental
and their names begin with underscore. The old versions are turned
into macros calling the new versions with the correct extra parameter.
-rw-r--r-- | embed.fnc | 6 | ||||
-rw-r--r-- | embed.h | 4 | ||||
-rw-r--r-- | global.sym | 4 | ||||
-rw-r--r-- | proto.h | 24 | ||||
-rw-r--r-- | utf8.c | 19 | ||||
-rw-r--r-- | utf8.h | 3 |
6 files changed, 39 insertions, 21 deletions
@@ -562,7 +562,8 @@ ApPR |bool |is_uni_xdigit |UV c Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp -Ap |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp +Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp +AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags ApPR |bool |is_uni_alnum_lc|UV c ApPR |bool |is_uni_idfirst_lc|UV c ApPR |bool |is_uni_alpha_lc|UV c @@ -1322,7 +1323,8 @@ Apd |UV |to_utf8_case |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp \ Apd |UV |to_utf8_lower |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp Apd |UV |to_utf8_upper |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp Apd |UV |to_utf8_title |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp -Apd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp +Ampd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp +AMp |UV |_to_utf8_fold_flags|NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp|U8 flags #if defined(UNLINK_ALL_VERSIONS) Ap |I32 |unlnk |NN const char* f #endif @@ -27,6 +27,8 @@ /* Hide global symbols */ #define Gv_AMupdate(a,b) Perl_Gv_AMupdate(aTHX_ a,b) +#define _to_uni_fold_flags(a,b,c,d) Perl__to_uni_fold_flags(aTHX_ a,b,c,d) +#define _to_utf8_fold_flags(a,b,c,d) Perl__to_utf8_fold_flags(aTHX_ a,b,c,d) #define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d) #define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b) #define apply_attrs_string(a,b,c,d) Perl_apply_attrs_string(aTHX_ a,b,c,d) @@ -623,7 +625,6 @@ #define taint_env() Perl_taint_env(aTHX) #define taint_proper(a,b) Perl_taint_proper(aTHX_ a,b) #define tmps_grow(a) Perl_tmps_grow(aTHX_ a) -#define to_uni_fold(a,b,c) Perl_to_uni_fold(aTHX_ a,b,c) #define to_uni_lower(a,b,c) Perl_to_uni_lower(aTHX_ a,b,c) #define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a) #define to_uni_title(a,b,c) Perl_to_uni_title(aTHX_ a,b,c) @@ -631,7 +632,6 @@ #define to_uni_upper(a,b,c) Perl_to_uni_upper(aTHX_ a,b,c) #define to_uni_upper_lc(a) Perl_to_uni_upper_lc(aTHX_ a) #define to_utf8_case(a,b,c,d,e,f) Perl_to_utf8_case(aTHX_ a,b,c,d,e,f) -#define to_utf8_fold(a,b,c) Perl_to_utf8_fold(aTHX_ a,b,c) #define to_utf8_lower(a,b,c) Perl_to_utf8_lower(aTHX_ a,b,c) #define to_utf8_title(a,b,c) Perl_to_utf8_title(aTHX_ a,b,c) #define to_utf8_upper(a,b,c) Perl_to_utf8_upper(aTHX_ a,b,c) diff --git a/global.sym b/global.sym index dde11d4072..89fb825745 100644 --- a/global.sym +++ b/global.sym @@ -21,6 +21,8 @@ Perl__append_range_to_invlist Perl__new_invlist Perl__swash_inversion_hash Perl__swash_to_invlist +Perl__to_uni_fold_flags +Perl__to_utf8_fold_flags Perl_amagic_call Perl_amagic_deref_call Perl_apply_attrs_string @@ -732,7 +734,6 @@ Perl_sys_term Perl_taint_env Perl_taint_proper Perl_tmps_grow -Perl_to_uni_fold Perl_to_uni_lower Perl_to_uni_lower_lc Perl_to_uni_title @@ -740,7 +741,6 @@ Perl_to_uni_title_lc Perl_to_uni_upper Perl_to_uni_upper_lc Perl_to_utf8_case -Perl_to_utf8_fold Perl_to_utf8_lower Perl_to_utf8_title Perl_to_utf8_upper @@ -43,6 +43,18 @@ PERL_CALLCONV HV* Perl__swash_to_invlist(pTHX_ SV* const swash) #define PERL_ARGS_ASSERT__SWASH_TO_INVLIST \ assert(swash) +PERL_CALLCONV UV Perl__to_uni_fold_flags(pTHX_ UV c, U8 *p, STRLEN *lenp, U8 flags) + __attribute__nonnull__(pTHX_2) + __attribute__nonnull__(pTHX_3); +#define PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS \ + assert(p); assert(lenp) + +PERL_CALLCONV UV Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS \ + assert(p); assert(ustrp) + PERL_CALLCONV PADOFFSET Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_ALLOCMY \ @@ -4213,11 +4225,9 @@ PERL_CALLCONV OP * Perl_tied_method(pTHX_ const char *const methname, SV **sp, S assert(methname); assert(sp); assert(sv); assert(mg) PERL_CALLCONV void Perl_tmps_grow(pTHX_ I32 n); -PERL_CALLCONV UV Perl_to_uni_fold(pTHX_ UV c, U8 *p, STRLEN *lenp) +/* PERL_CALLCONV UV Perl_to_uni_fold(pTHX_ UV c, U8 *p, STRLEN *lenp) __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_3); -#define PERL_ARGS_ASSERT_TO_UNI_FOLD \ - assert(p); assert(lenp) + __attribute__nonnull__(pTHX_3); */ PERL_CALLCONV UV Perl_to_uni_lower(pTHX_ UV c, U8 *p, STRLEN *lenp) __attribute__nonnull__(pTHX_2) @@ -4257,11 +4267,9 @@ PERL_CALLCONV UV Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, S #define PERL_ARGS_ASSERT_TO_UTF8_CASE \ assert(p); assert(ustrp); assert(swashp); assert(normal) -PERL_CALLCONV UV Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) +/* PERL_CALLCONV UV Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) __attribute__nonnull__(pTHX_1) - __attribute__nonnull__(pTHX_2); -#define PERL_ARGS_ASSERT_TO_UTF8_FOLD \ - assert(p); assert(ustrp) + __attribute__nonnull__(pTHX_2); */ PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) __attribute__nonnull__(pTHX_1) @@ -1341,12 +1341,12 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp) } UV -Perl_to_uni_fold(pTHX_ UV c, U8* p, STRLEN *lenp) +Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags) { - PERL_ARGS_ASSERT_TO_UNI_FOLD; + PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS; uvchr_to_utf8(p, c); - return to_utf8_fold(p, p, lenp); + return _to_utf8_fold_flags(p, p, lenp, flags); } /* for now these all assume no locale info available for Unicode > 255 */ @@ -1799,7 +1799,7 @@ of the result. The "swashp" is a pointer to the swash to use. -Both the special and normal mappings are stored lib/unicore/To/Foo.pl, +Both the special and normal mappings are stored in lib/unicore/To/Foo.pl, and loaded by SWASHNEW, using lib/utf8_heavy.pl. The special (usually, but not always, a multicharacter mapping), is tried first. @@ -2026,15 +2026,20 @@ The first character of the foldcased version is returned =cut */ +/* Not currently externally documented is 'flags', which currently is non-zero + * if full case folds are to be used; otherwise simple folds */ + UV -Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp) +Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags) { + const char *specials = (flags) ? "utf8::ToSpecFold" : NULL; + dVAR; - PERL_ARGS_ASSERT_TO_UTF8_FOLD; + PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS; return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, - &PL_utf8_tofold, "ToFold", "utf8::ToSpecFold"); + &PL_utf8_tofold, "ToFold", specials); } /* Note: @@ -16,6 +16,9 @@ # define USE_UTF8_IN_NAMES (PL_hints & HINT_UTF8) #endif +#define to_uni_fold(c, p, lenp) _to_uni_fold_flags(c, p, lenp, 1) +#define to_utf8_fold(c, p, lenp) _to_utf8_fold_flags(c, p, lenp, 1) + /* Source backward compatibility. */ #define uvuni_to_utf8(d, uv) uvuni_to_utf8_flags(d, uv, 0) #define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0) |