From 25e3a4e08a8b645de44458470ff4f139baf56682 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 14 Sep 2016 18:54:23 -0600 Subject: Add isUTF8_CHAR_flags() macro This is like the previous 2 commits, but the macro takes a flags parameter so any combination of the disallowed flags may be used. The others, along with the original isUTF8_CHAR(), are the most commonly desired strictures, and use an implementation of a, hopefully, inlined trie for speed. This is for generality and the major portion of its implementation isn't inlined. --- utf8.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'utf8.h') diff --git a/utf8.h b/utf8.h index 4278ebbf2a..392a86a560 100644 --- a/utf8.h +++ b/utf8.h @@ -1056,6 +1056,40 @@ L. ? 0 \ : is_C9_STRICT_UTF8_CHAR_utf8_no_length_checks(s)) +/* + +=for apidoc Am|STRLEN|isUTF8_CHAR_flags|const U8 *s|const U8 *e| const U32 flags + +Evaluates to non-zero if the first few bytes of the string starting at C and +looking no further than S> are well-formed UTF-8, as extended by Perl, +that represents some code point, subject to the restrictions given by C; +otherwise it evaluates to 0. If non-zero, the value gives how many bytes +starting at C comprise the code point's representation. + +If C is 0, this gives the same results as C>; +if C is C, this gives the same results +as C>; +and if C is C, this gives +the same results as C>. +Otherwise C may be any combination of the C> flags +understood by C>, with the same meanings. + +The three alternative macros are for the most commonly needed validations; they +are likely to run somewhat faster than this more general one, as they can be +inlined into your code. + +=cut +*/ + +#define isUTF8_CHAR_flags(s, e, flags) \ + (UNLIKELY((e) <= (s)) \ + ? 0 \ + : (UTF8_IS_INVARIANT(*s)) \ + ? 1 \ + : UNLIKELY(((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : _is_utf8_char_helper(s, e, flags)) + /* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is * retained solely for backwards compatibility */ #define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n) -- cgit v1.2.1