diff options
author | Karl Williamson <khw@cpan.org> | 2016-08-26 15:03:52 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-08-31 20:32:36 -0600 |
commit | 7c93d8f091c4849e8de126f1da68d0465b916766 (patch) | |
tree | d3e0d8c601fb116adde656a1bc129dc1a52bb8ef /inline.h | |
parent | 5962d97e94001652b6627a579918c2a74637ccd0 (diff) | |
download | perl-7c93d8f091c4849e8de126f1da68d0465b916766.tar.gz |
Inline utf8_distance(), utf8_hop()
Diffstat (limited to 'inline.h')
-rw-r--r-- | inline.h | 56 |
1 files changed, 56 insertions, 0 deletions
@@ -387,6 +387,62 @@ S_is_utf8_invariant_string(const U8* const s, const STRLEN len) return TRUE; } +/* +=for apidoc utf8_distance + +Returns the number of UTF-8 characters between the UTF-8 pointers C<a> +and C<b>. + +WARNING: use only if you *know* that the pointers point inside the +same UTF-8 buffer. + +=cut +*/ + +PERL_STATIC_INLINE IV +Perl_utf8_distance(pTHX_ const U8 *a, const U8 *b) +{ + PERL_ARGS_ASSERT_UTF8_DISTANCE; + + return (a < b) ? -1 * (IV) utf8_length(a, b) : (IV) utf8_length(b, a); +} + +/* +=for apidoc utf8_hop + +Return the UTF-8 pointer C<s> displaced by C<off> characters, either +forward or backward. + +WARNING: do not use the following unless you *know* C<off> is within +the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned +on the first byte of character or just after the last byte of a character. + +=cut +*/ + +PERL_STATIC_INLINE U8 * +Perl_utf8_hop(const U8 *s, SSize_t off) +{ + PERL_ARGS_ASSERT_UTF8_HOP; + + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + + if (off >= 0) { + while (off--) + s += UTF8SKIP(s); + } + else { + while (off++) { + s--; + while (UTF8_IS_CONTINUATION(*s)) + s--; + } + } + return (U8 *)s; +} + /* ------------------------------- perl.h ----------------------------- */ /* |