summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-06-25 12:47:29 -0600
committerKarl Williamson <khw@cpan.org>2021-08-07 05:14:44 -0600
commit39fafb79d3ca9909dce0911477aa8340d9402f98 (patch)
tree48f7e79c031e20f8134ea5a26cd4b9fbdfc9ba61 /utf8.h
parent6110285c6da30a8505d3b73bbbd0cbf6e0fdecac (diff)
downloadperl-39fafb79d3ca9909dce0911477aa8340d9402f98.tar.gz
utf8.h: Document some #defines
The reorganization in the previous commit revealed some undocumented public macros
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h37
1 files changed, 37 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index ac32f3bde0..43cb68db30 100644
--- a/utf8.h
+++ b/utf8.h
@@ -855,6 +855,11 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
#define UNICODE_SURROGATE_LAST 0xDFFF
/*
+=for apidoc Am|bool|UNICODE_IS_SURROGATE|const UV uv
+
+Returns a boolean as to whether or not C<uv> is one of the Unicode surrogate
+code points
+
=for apidoc Am|bool|UTF8_IS_SURROGATE|const U8 *s|const U8 *e
Evaluates to non-zero if the first few bytes of the string starting at C<s> and
@@ -877,6 +882,19 @@ point's representation.
Evaluates to 0xFFFD, the code point of the Unicode REPLACEMENT CHARACTER
+=for apidoc Am|bool|UNICODE_IS_REPLACEMENT|const UV uv
+
+Returns a boolean as to whether or not C<uv> is the Unicode REPLACEMENT
+CHARACTER
+
+=for apidoc Am|bool|UTF8_IS_REPLACEMENT|const U8 *s|const U8 *e
+
+Evaluates to non-zero if the first few bytes of the string starting at C<s> and
+looking no further than S<C<e - 1>> are well-formed UTF-8 that represents the
+Unicode REPLACEMENT CHARACTER; otherwise it evaluates to 0. If non-zero, the
+value gives how many bytes starting at C<s> comprise the code point's
+representation.
+
=cut
*/
#define UNICODE_REPLACEMENT 0xFFFD
@@ -887,6 +905,16 @@ Evaluates to 0xFFFD, the code point of the Unicode REPLACEMENT CHARACTER
* let's be conservative and do as Unicode says. */
#define PERL_UNICODE_MAX 0x10FFFF
+/*
+
+=for apidoc Am|bool|UNICODE_IS_SUPER|const UV uv
+
+Returns a boolean as to whether or not C<uv> is above the maximum legal Unicode
+code point of U+10FFFF.
+
+=cut
+*/
+
#define UNICODE_IS_SUPER(uv) UNLIKELY((UV) (uv) > PERL_UNICODE_MAX)
/*
@@ -933,6 +961,15 @@ fit in an IV on the current machine.
? is_utf8_char_helper(s, s + UTF8SKIP(s), 0) : 0)
#endif
+/*
+=for apidoc Am|bool|UNICODE_IS_NONCHAR|const UV uv
+
+Returns a boolean as to whether or not C<uv> is one of the Unicode
+non-character code points
+
+=cut
+*/
+
/* Is 'uv' one of the 32 contiguous-range noncharacters? */
#define UNICODE_IS_32_CONTIGUOUS_NONCHARS(uv) \
UNLIKELY(inRANGE(uv, 0xFDD0, 0xFDEF))