summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-05-03 10:12:00 -0600
committerJesse Vincent <jesse@bestpractical.com>2011-05-03 17:14:06 -0400
commit36bb2ab64fa2ef022d7870082c0dcc6db902c86e (patch)
treebfee85e71c3f52ac7076a8148880d5a3a62d693c
parent7bee57f6d73fe07d73dd9b4a538e4ffd60c7eed8 (diff)
downloadperl-36bb2ab64fa2ef022d7870082c0dcc6db902c86e.tar.gz
utf8.c: Add _flags version of to_utf8_fold()
And also to_uni_fold(). The flag allows retrieving either simple or full folds. The interface is subject to change, so these are marked experimental and their names begin with underscore. The old versions are turned into macros calling the new versions with the correct extra parameter.
-rw-r--r--embed.fnc6
-rw-r--r--embed.h4
-rw-r--r--global.sym4
-rw-r--r--proto.h24
-rw-r--r--utf8.c19
-rw-r--r--utf8.h3
6 files changed, 39 insertions, 21 deletions
diff --git a/embed.fnc b/embed.fnc
index 288dacdfca..65116ad681 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -562,7 +562,8 @@ ApPR |bool |is_uni_xdigit |UV c
Ap |UV |to_uni_upper |UV c|NN U8 *p|NN STRLEN *lenp
Ap |UV |to_uni_title |UV c|NN U8 *p|NN STRLEN *lenp
Ap |UV |to_uni_lower |UV c|NN U8 *p|NN STRLEN *lenp
-Ap |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp
+Amp |UV |to_uni_fold |UV c|NN U8 *p|NN STRLEN *lenp
+AMp |UV |_to_uni_fold_flags|UV c|NN U8 *p|NN STRLEN *lenp|U8 flags
ApPR |bool |is_uni_alnum_lc|UV c
ApPR |bool |is_uni_idfirst_lc|UV c
ApPR |bool |is_uni_alpha_lc|UV c
@@ -1322,7 +1323,8 @@ Apd |UV |to_utf8_case |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp \
Apd |UV |to_utf8_lower |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
Apd |UV |to_utf8_upper |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
Apd |UV |to_utf8_title |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
-Apd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
+Ampd |UV |to_utf8_fold |NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp
+AMp |UV |_to_utf8_fold_flags|NN const U8 *p|NN U8* ustrp|NULLOK STRLEN *lenp|U8 flags
#if defined(UNLINK_ALL_VERSIONS)
Ap |I32 |unlnk |NN const char* f
#endif
diff --git a/embed.h b/embed.h
index 89c4fa81be..9ff64404f6 100644
--- a/embed.h
+++ b/embed.h
@@ -27,6 +27,8 @@
/* Hide global symbols */
#define Gv_AMupdate(a,b) Perl_Gv_AMupdate(aTHX_ a,b)
+#define _to_uni_fold_flags(a,b,c,d) Perl__to_uni_fold_flags(aTHX_ a,b,c,d)
+#define _to_utf8_fold_flags(a,b,c,d) Perl__to_utf8_fold_flags(aTHX_ a,b,c,d)
#define amagic_call(a,b,c,d) Perl_amagic_call(aTHX_ a,b,c,d)
#define amagic_deref_call(a,b) Perl_amagic_deref_call(aTHX_ a,b)
#define apply_attrs_string(a,b,c,d) Perl_apply_attrs_string(aTHX_ a,b,c,d)
@@ -623,7 +625,6 @@
#define taint_env() Perl_taint_env(aTHX)
#define taint_proper(a,b) Perl_taint_proper(aTHX_ a,b)
#define tmps_grow(a) Perl_tmps_grow(aTHX_ a)
-#define to_uni_fold(a,b,c) Perl_to_uni_fold(aTHX_ a,b,c)
#define to_uni_lower(a,b,c) Perl_to_uni_lower(aTHX_ a,b,c)
#define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a)
#define to_uni_title(a,b,c) Perl_to_uni_title(aTHX_ a,b,c)
@@ -631,7 +632,6 @@
#define to_uni_upper(a,b,c) Perl_to_uni_upper(aTHX_ a,b,c)
#define to_uni_upper_lc(a) Perl_to_uni_upper_lc(aTHX_ a)
#define to_utf8_case(a,b,c,d,e,f) Perl_to_utf8_case(aTHX_ a,b,c,d,e,f)
-#define to_utf8_fold(a,b,c) Perl_to_utf8_fold(aTHX_ a,b,c)
#define to_utf8_lower(a,b,c) Perl_to_utf8_lower(aTHX_ a,b,c)
#define to_utf8_title(a,b,c) Perl_to_utf8_title(aTHX_ a,b,c)
#define to_utf8_upper(a,b,c) Perl_to_utf8_upper(aTHX_ a,b,c)
diff --git a/global.sym b/global.sym
index dde11d4072..89fb825745 100644
--- a/global.sym
+++ b/global.sym
@@ -21,6 +21,8 @@ Perl__append_range_to_invlist
Perl__new_invlist
Perl__swash_inversion_hash
Perl__swash_to_invlist
+Perl__to_uni_fold_flags
+Perl__to_utf8_fold_flags
Perl_amagic_call
Perl_amagic_deref_call
Perl_apply_attrs_string
@@ -732,7 +734,6 @@ Perl_sys_term
Perl_taint_env
Perl_taint_proper
Perl_tmps_grow
-Perl_to_uni_fold
Perl_to_uni_lower
Perl_to_uni_lower_lc
Perl_to_uni_title
@@ -740,7 +741,6 @@ Perl_to_uni_title_lc
Perl_to_uni_upper
Perl_to_uni_upper_lc
Perl_to_utf8_case
-Perl_to_utf8_fold
Perl_to_utf8_lower
Perl_to_utf8_title
Perl_to_utf8_upper
diff --git a/proto.h b/proto.h
index 0553531229..c83fd12fc5 100644
--- a/proto.h
+++ b/proto.h
@@ -43,6 +43,18 @@ PERL_CALLCONV HV* Perl__swash_to_invlist(pTHX_ SV* const swash)
#define PERL_ARGS_ASSERT__SWASH_TO_INVLIST \
assert(swash)
+PERL_CALLCONV UV Perl__to_uni_fold_flags(pTHX_ UV c, U8 *p, STRLEN *lenp, U8 flags)
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_3);
+#define PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS \
+ assert(p); assert(lenp)
+
+PERL_CALLCONV UV Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS \
+ assert(p); assert(ustrp)
+
PERL_CALLCONV PADOFFSET Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_ALLOCMY \
@@ -4213,11 +4225,9 @@ PERL_CALLCONV OP * Perl_tied_method(pTHX_ const char *const methname, SV **sp, S
assert(methname); assert(sp); assert(sv); assert(mg)
PERL_CALLCONV void Perl_tmps_grow(pTHX_ I32 n);
-PERL_CALLCONV UV Perl_to_uni_fold(pTHX_ UV c, U8 *p, STRLEN *lenp)
+/* PERL_CALLCONV UV Perl_to_uni_fold(pTHX_ UV c, U8 *p, STRLEN *lenp)
__attribute__nonnull__(pTHX_2)
- __attribute__nonnull__(pTHX_3);
-#define PERL_ARGS_ASSERT_TO_UNI_FOLD \
- assert(p); assert(lenp)
+ __attribute__nonnull__(pTHX_3); */
PERL_CALLCONV UV Perl_to_uni_lower(pTHX_ UV c, U8 *p, STRLEN *lenp)
__attribute__nonnull__(pTHX_2)
@@ -4257,11 +4267,9 @@ PERL_CALLCONV UV Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, S
#define PERL_ARGS_ASSERT_TO_UTF8_CASE \
assert(p); assert(ustrp); assert(swashp); assert(normal)
-PERL_CALLCONV UV Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
+/* PERL_CALLCONV UV Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
__attribute__nonnull__(pTHX_1)
- __attribute__nonnull__(pTHX_2);
-#define PERL_ARGS_ASSERT_TO_UTF8_FOLD \
- assert(p); assert(ustrp)
+ __attribute__nonnull__(pTHX_2); */
PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
__attribute__nonnull__(pTHX_1)
diff --git a/utf8.c b/utf8.c
index 9c2061ddae..11c2fa40bd 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1341,12 +1341,12 @@ Perl_to_uni_lower(pTHX_ UV c, U8* p, STRLEN *lenp)
}
UV
-Perl_to_uni_fold(pTHX_ UV c, U8* p, STRLEN *lenp)
+Perl__to_uni_fold_flags(pTHX_ UV c, U8* p, STRLEN *lenp, U8 flags)
{
- PERL_ARGS_ASSERT_TO_UNI_FOLD;
+ PERL_ARGS_ASSERT__TO_UNI_FOLD_FLAGS;
uvchr_to_utf8(p, c);
- return to_utf8_fold(p, p, lenp);
+ return _to_utf8_fold_flags(p, p, lenp, flags);
}
/* for now these all assume no locale info available for Unicode > 255 */
@@ -1799,7 +1799,7 @@ of the result.
The "swashp" is a pointer to the swash to use.
-Both the special and normal mappings are stored lib/unicore/To/Foo.pl,
+Both the special and normal mappings are stored in lib/unicore/To/Foo.pl,
and loaded by SWASHNEW, using lib/utf8_heavy.pl. The special (usually,
but not always, a multicharacter mapping), is tried first.
@@ -2026,15 +2026,20 @@ The first character of the foldcased version is returned
=cut */
+/* Not currently externally documented is 'flags', which currently is non-zero
+ * if full case folds are to be used; otherwise simple folds */
+
UV
-Perl_to_utf8_fold(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp)
+Perl__to_utf8_fold_flags(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp, U8 flags)
{
+ const char *specials = (flags) ? "utf8::ToSpecFold" : NULL;
+
dVAR;
- PERL_ARGS_ASSERT_TO_UTF8_FOLD;
+ PERL_ARGS_ASSERT__TO_UTF8_FOLD_FLAGS;
return Perl_to_utf8_case(aTHX_ p, ustrp, lenp,
- &PL_utf8_tofold, "ToFold", "utf8::ToSpecFold");
+ &PL_utf8_tofold, "ToFold", specials);
}
/* Note:
diff --git a/utf8.h b/utf8.h
index a08ba04b60..c40fb5883e 100644
--- a/utf8.h
+++ b/utf8.h
@@ -16,6 +16,9 @@
# define USE_UTF8_IN_NAMES (PL_hints & HINT_UTF8)
#endif
+#define to_uni_fold(c, p, lenp) _to_uni_fold_flags(c, p, lenp, 1)
+#define to_utf8_fold(c, p, lenp) _to_utf8_fold_flags(c, p, lenp, 1)
+
/* Source backward compatibility. */
#define uvuni_to_utf8(d, uv) uvuni_to_utf8_flags(d, uv, 0)
#define is_utf8_string_loc(s, len, ep) is_utf8_string_loclen(s, len, ep, 0)