summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-09-11 22:18:57 -0600
committerKarl Williamson <khw@cpan.org>2016-09-17 21:10:50 -0600
commitf1c999a79ad93bb81cbb7b1bec96e06c33773b81 (patch)
tree40cd551dc7ef325bc99caf71ce437d9232f5e3f7
parent6cbb924831d50981620d4c51f8b12da5f269e569 (diff)
downloadperl-f1c999a79ad93bb81cbb7b1bec96e06c33773b81.tar.gz
Add is_utf8_valid_partial_char_flags()
This is a generalization of is_utf8_valid_partial_char to allow the caller to automatically exclude things such as surrogates.
-rw-r--r--embed.fnc5
-rw-r--r--embed.h2
-rw-r--r--inline.h31
-rw-r--r--proto.h7
4 files changed, 38 insertions, 7 deletions
diff --git a/embed.fnc b/embed.fnc
index 450a4868d7..3bdc4266b7 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -744,7 +744,10 @@ Abmnpd |STRLEN |is_utf8_char_buf|NN const U8 *buf|NN const U8 *buf_end
AnipdP |bool |is_utf8_string |NN const U8 *s|STRLEN len
Anpdmb |bool |is_utf8_string_loc|NN const U8 *s|STRLEN len|NN const U8 **ep
Anipd |bool |is_utf8_string_loclen|NN const U8 *s|STRLEN len|NULLOK const U8 **ep|NULLOK STRLEN *el
-AnidP |bool |is_utf8_valid_partial_char|NN const U8 * const s|NN const U8 * const e
+AmndP |bool |is_utf8_valid_partial_char \
+ |NN const U8 * const s|NN const U8 * const e
+AnidP |bool |is_utf8_valid_partial_char_flags \
+ |NN const U8 * const s|NN const U8 * const e|const U32 flags
AMpR |bool |_is_uni_FOO|const U8 classnum|const UV c
AMpR |bool |_is_utf8_FOO|const U8 classnum|NN const U8 *p
ADMpR |bool |is_utf8_alnum |NN const U8 *p
diff --git a/embed.h b/embed.h
index 8ff1c93794..50a19a41ba 100644
--- a/embed.h
+++ b/embed.h
@@ -296,7 +296,7 @@
#define is_utf8_string Perl_is_utf8_string
#define is_utf8_string_loclen Perl_is_utf8_string_loclen
#define is_utf8_upper(a) Perl_is_utf8_upper(aTHX_ a)
-#define is_utf8_valid_partial_char S_is_utf8_valid_partial_char
+#define is_utf8_valid_partial_char_flags S_is_utf8_valid_partial_char_flags
#define is_utf8_xdigit(a) Perl_is_utf8_xdigit(aTHX_ a)
#define is_utf8_xidcont(a) Perl_is_utf8_xidcont(aTHX_ a)
#define is_utf8_xidfirst(a) Perl_is_utf8_xidfirst(aTHX_ a)
diff --git a/inline.h b/inline.h
index 41d0a9cdce..44fb484d48 100644
--- a/inline.h
+++ b/inline.h
@@ -526,17 +526,42 @@ failure can be signalled without having to wait for the next read.
=cut
*/
+#define is_utf8_valid_partial_char(s, e) is_utf8_valid_partial_char_flags(s, e, 0)
+
+/*
+
+=for apidoc is_utf8_valid_partial_char_flags
+
+Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether
+or not the input is a valid UTF-8 encoded partial character, but it takes an
+extra parameter, C<flags>, which can further restrict which code points are
+considered valid.
+
+If C<flags> is 0, this behaves identically to
+C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination
+of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If
+there is any sequence of bytes that can complete the input partial character in
+such a way that a non-prohibited character is formed, the function returns
+TRUE; otherwise FALSE. Non characters cannot be determined based on partial
+character input. But many of the other possible excluded types can be
+determined from just the first one or two bytes.
+
+=cut
+ */
+
PERL_STATIC_INLINE bool
-S_is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
{
+ PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
- PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR;
+ assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
+ |UTF8_DISALLOW_ABOVE_31_BIT)));
if (s >= e || s + UTF8SKIP(s) <= e) {
return FALSE;
}
- return cBOOL(_is_utf8_char_helper(s, e, 0));
+ return cBOOL(_is_utf8_char_helper(s, e, flags));
}
/* ------------------------------- perl.h ----------------------------- */
diff --git a/proto.h b/proto.h
index c20986b627..b13b42e1e1 100644
--- a/proto.h
+++ b/proto.h
@@ -1628,9 +1628,12 @@ PERL_CALLCONV bool Perl_is_utf8_upper(pTHX_ const U8 *p)
#define PERL_ARGS_ASSERT_IS_UTF8_UPPER \
assert(p)
-PERL_STATIC_INLINE bool S_is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+/* PERL_CALLCONV bool is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+ __attribute__pure__; */
+
+PERL_STATIC_INLINE bool S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
__attribute__pure__;
-#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR \
+#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \
assert(s); assert(e)
PERL_CALLCONV bool Perl_is_utf8_xdigit(pTHX_ const U8 *p)