summaryrefslogtreecommitdiff
path: root/inline.h
diff options
context:
space:
mode:
authorTony Cook <tony@develop-help.com>2016-10-31 14:28:34 +1100
committerTony Cook <tony@develop-help.com>2016-11-09 13:29:46 +1100
commit65df57a84b55413fcde1e64b86e3d740485536d3 (patch)
tree848c157d38e8a30b2ccf0bf95d4865174daf28ca /inline.h
parenta7ea90b1451006596c4574b1e65894f0bda1bafc (diff)
downloadperl-65df57a84b55413fcde1e64b86e3d740485536d3.tar.gz
(perl #129000) create a safer utf8_hop()
Unlike utf8_hop(), utf8_hop_safe() won't navigate before the beginning or after the end of the supplied buffer. The original version of this put all of the logic into utf8_hop_safe(), but in many cases a caller specifically needs to go forward or backward, and supplying the other limit made the function less usable, so I split the function into forward and backward cases. This split may also make inlining these functions more efficient or more likely.
Diffstat (limited to 'inline.h')
-rw-r--r--inline.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/inline.h b/inline.h
index 66ba348714..adcd85d24b 100644
--- a/inline.h
+++ b/inline.h
@@ -920,6 +920,117 @@ Perl_utf8_hop(const U8 *s, SSize_t off)
}
/*
+=for apidoc utf8_hop_forward
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+forward.
+
+C<off> must be non-negative.
+
+C<s> must be before or equal to C<end>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_FORWARD;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(s <= end);
+ assert(off >= 0);
+
+ while (off--) {
+ STRLEN skip = UTF8SKIP(s);
+ if ((STRLEN)(end - s) <= skip)
+ return (U8 *)end;
+ s += skip;
+ }
+
+ return (U8 *)s;
+}
+
+/*
+=for apidoc utf8_hop_back
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+backward.
+
+C<off> must be non-positive.
+
+C<s> must be after or equal to C<start>.
+
+When moving backward it will not move before C<start>.
+
+Will not exceed this limit even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_back(const U8 *s, SSize_t off, const U8 *start)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_BACK;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(start <= s);
+ assert(off <= 0);
+
+ while (off++ && s > start) {
+ s--;
+ while (UTF8_IS_CONTINUATION(*s) && s > start)
+ s--;
+ }
+
+ return (U8 *)s;
+}
+
+/*
+=for apidoc utf8_hop_safe
+
+Return the UTF-8 pointer C<s> displaced by up to C<off> characters,
+either forward or backward.
+
+When moving backward it will not move before C<start>.
+
+When moving forward it will not move beyond C<end>.
+
+Will not exceed those limits even if the string is not valid "UTF-8".
+
+=cut
+*/
+
+PERL_STATIC_INLINE U8 *
+Perl_utf8_hop_safe(const U8 *s, SSize_t off, const U8 *start, const U8 *end)
+{
+ PERL_ARGS_ASSERT_UTF8_HOP_SAFE;
+
+ /* Note: cannot use UTF8_IS_...() too eagerly here since e.g
+ * the bitops (especially ~) can create illegal UTF-8.
+ * In other words: in Perl UTF-8 is not just for Unicode. */
+
+ assert(start <= s && s <= end);
+
+ if (off >= 0) {
+ return utf8_hop_forward(s, off, end);
+ }
+ else {
+ return utf8_hop_back(s, off, start);
+ }
+}
+
+/*
=for apidoc is_utf8_valid_partial_char