summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-03-13 11:41:09 -0600
committerKarl Williamson <khw@cpan.org>2019-03-13 15:42:49 -0600
commit85fcc8f2234ce65ebd31480efc38dc4a3ec8ad13 (patch)
treecc30524e804b1e8d9cf3e1014229a6c0401b0f29 /utf8.h
parent823c3b2daca3409863f10ec5e1c6d416d2614a5a (diff)
downloadperl-85fcc8f2234ce65ebd31480efc38dc4a3ec8ad13.tar.gz
Add UTF8_SAFE_SKIP API macro
This version of UTF8SKIP refuses to advance beyond the end pointer
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index 99e795d3a4..7773007e49 100644
--- a/utf8.h
+++ b/utf8.h
@@ -498,6 +498,17 @@ only) byte is pointed to by C<s>.
#define UTF8SKIP(s) PL_utf8skip[*(const U8*)(s)]
#define UTF8_SKIP(s) UTF8SKIP(s)
+/*
+
+=for apidoc Am|STRLEN|UTF8_SAFE_SKIP|char* s|char* e
+returns the number of bytes in the UTF-8 encoded character whose first (perhaps
+only) byte is pointed to by C<s>. But never returns beyond C<e>.
+
+=cut
+ */
+#define UTF8_SAFE_SKIP(s, e) (__ASSERT_((e) > (s)) \
+ MIN(((e) - (s)), UTF8_SKIP(s)))
+
/* Most code that says 'UNI_' really means the native value for code points up
* through 255 */
#define UNI_IS_INVARIANT(cp) UVCHR_IS_INVARIANT(cp)