diff options
author | Karl Williamson <khw@cpan.org> | 2021-06-25 13:09:08 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-08-14 06:47:43 -0600 |
commit | 22f363ffd253b5142b1138438c30f34da9494d4a (patch) | |
tree | a02ca691b6818aafce1300ab4cdd3706ff7c4087 /utf8.h | |
parent | 22afef87083fc7ad1b066588f5c20637fd387805 (diff) | |
download | perl-22f363ffd253b5142b1138438c30f34da9494d4a.tar.gz |
Make macro isUTF8_CHAR_flags an inline fcn
This makes it use the fast DFA for this functionality.
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 39 |
1 files changed, 0 insertions, 39 deletions
@@ -1219,45 +1219,6 @@ point's representation. #define bytes_from_utf8(s, lenp, is_utf8p) \ bytes_from_utf8_loc(s, lenp, is_utf8p, 0) -/* - -=for apidoc Am|STRLEN|isUTF8_CHAR_flags|const U8 *s|const U8 *e| const U32 flags - -Evaluates to non-zero if the first few bytes of the string starting at C<s> and -looking no further than S<C<e - 1>> are well-formed UTF-8, as extended by Perl, -that represents some code point, subject to the restrictions given by C<flags>; -otherwise it evaluates to 0. If non-zero, the value gives how many bytes -starting at C<s> comprise the code point's representation. Any bytes remaining -before C<e>, but beyond the ones needed to form the first code point in C<s>, -are not examined. - -If C<flags> is 0, this gives the same results as C<L</isUTF8_CHAR>>; -if C<flags> is C<UTF8_DISALLOW_ILLEGAL_INTERCHANGE>, this gives the same results -as C<L</isSTRICT_UTF8_CHAR>>; -and if C<flags> is C<UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE>, this gives -the same results as C<L</isC9_STRICT_UTF8_CHAR>>. -Otherwise C<flags> may be any combination of the C<UTF8_DISALLOW_I<foo>> flags -understood by C<L</utf8n_to_uvchr>>, with the same meanings. - -The three alternative macros are for the most commonly needed validations; they -are likely to run somewhat faster than this more general one, as they can be -inlined into your code. - -Use L</is_utf8_string_flags>, L</is_utf8_string_loc_flags>, and -L</is_utf8_string_loclen_flags> to check entire strings. - -=cut -*/ - -#define isUTF8_CHAR_flags(s, e, flags) \ - (UNLIKELY((e) <= (s)) \ - ? 0 \ - : (UTF8_IS_INVARIANT(*s)) \ - ? 1 \ - : UNLIKELY(((e) - (s)) < UTF8SKIP(s)) \ - ? 0 \ - : is_utf8_char_helper(s, e, flags)) - /* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is * retained solely for backwards compatibility */ #define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n) |