diff options
author | Karl Williamson <khw@cpan.org> | 2016-09-11 22:18:57 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-09-17 21:10:50 -0600 |
commit | f1c999a79ad93bb81cbb7b1bec96e06c33773b81 (patch) | |
tree | 40cd551dc7ef325bc99caf71ce437d9232f5e3f7 | |
parent | 6cbb924831d50981620d4c51f8b12da5f269e569 (diff) | |
download | perl-f1c999a79ad93bb81cbb7b1bec96e06c33773b81.tar.gz |
Add is_utf8_valid_partial_char_flags()
This is a generalization of is_utf8_valid_partial_char to allow the
caller to automatically exclude things such as surrogates.
-rw-r--r-- | embed.fnc | 5 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | inline.h | 31 | ||||
-rw-r--r-- | proto.h | 7 |
4 files changed, 38 insertions, 7 deletions
@@ -744,7 +744,10 @@ Abmnpd |STRLEN |is_utf8_char_buf|NN const U8 *buf|NN const U8 *buf_end AnipdP |bool |is_utf8_string |NN const U8 *s|STRLEN len Anpdmb |bool |is_utf8_string_loc|NN const U8 *s|STRLEN len|NN const U8 **ep Anipd |bool |is_utf8_string_loclen|NN const U8 *s|STRLEN len|NULLOK const U8 **ep|NULLOK STRLEN *el -AnidP |bool |is_utf8_valid_partial_char|NN const U8 * const s|NN const U8 * const e +AmndP |bool |is_utf8_valid_partial_char \ + |NN const U8 * const s|NN const U8 * const e +AnidP |bool |is_utf8_valid_partial_char_flags \ + |NN const U8 * const s|NN const U8 * const e|const U32 flags AMpR |bool |_is_uni_FOO|const U8 classnum|const UV c AMpR |bool |_is_utf8_FOO|const U8 classnum|NN const U8 *p ADMpR |bool |is_utf8_alnum |NN const U8 *p @@ -296,7 +296,7 @@ #define is_utf8_string Perl_is_utf8_string #define is_utf8_string_loclen Perl_is_utf8_string_loclen #define is_utf8_upper(a) Perl_is_utf8_upper(aTHX_ a) -#define is_utf8_valid_partial_char S_is_utf8_valid_partial_char +#define is_utf8_valid_partial_char_flags S_is_utf8_valid_partial_char_flags #define is_utf8_xdigit(a) Perl_is_utf8_xdigit(aTHX_ a) #define is_utf8_xidcont(a) Perl_is_utf8_xidcont(aTHX_ a) #define is_utf8_xidfirst(a) Perl_is_utf8_xidfirst(aTHX_ a) @@ -526,17 +526,42 @@ failure can be signalled without having to wait for the next read. =cut */ +#define is_utf8_valid_partial_char(s, e) is_utf8_valid_partial_char_flags(s, e, 0) + +/* + +=for apidoc is_utf8_valid_partial_char_flags + +Like C<L</is_utf8_valid_partial_char>>, it returns a boolean giving whether +or not the input is a valid UTF-8 encoded partial character, but it takes an +extra parameter, C<flags>, which can further restrict which code points are +considered valid. + +If C<flags> is 0, this behaves identically to +C<L</is_utf8_valid_partial_char>>. Otherwise C<flags> can be any combination +of the C<UTF8_DISALLOW_I<foo>> flags accepted by C<L</utf8n_to_uvchr>>. If +there is any sequence of bytes that can complete the input partial character in +such a way that a non-prohibited character is formed, the function returns +TRUE; otherwise FALSE. Non characters cannot be determined based on partial +character input. But many of the other possible excluded types can be +determined from just the first one or two bytes. + +=cut + */ + PERL_STATIC_INLINE bool -S_is_utf8_valid_partial_char(const U8 * const s, const U8 * const e) +S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags) { + PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS; - PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR; + assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE + |UTF8_DISALLOW_ABOVE_31_BIT))); if (s >= e || s + UTF8SKIP(s) <= e) { return FALSE; } - return cBOOL(_is_utf8_char_helper(s, e, 0)); + return cBOOL(_is_utf8_char_helper(s, e, flags)); } /* ------------------------------- perl.h ----------------------------- */ @@ -1628,9 +1628,12 @@ PERL_CALLCONV bool Perl_is_utf8_upper(pTHX_ const U8 *p) #define PERL_ARGS_ASSERT_IS_UTF8_UPPER \ assert(p) -PERL_STATIC_INLINE bool S_is_utf8_valid_partial_char(const U8 * const s, const U8 * const e) +/* PERL_CALLCONV bool is_utf8_valid_partial_char(const U8 * const s, const U8 * const e) + __attribute__pure__; */ + +PERL_STATIC_INLINE bool S_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags) __attribute__pure__; -#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR \ +#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \ assert(s); assert(e) PERL_CALLCONV bool Perl_is_utf8_xdigit(pTHX_ const U8 *p) |