summaryrefslogtreecommitdiff
path: root/inline.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-05-05 19:37:58 -0600
committerKarl Williamson <khw@cpan.org>2014-05-31 11:37:24 -0600
commite123187a1c7666bc2bf1eab209057d9d3fe83f66 (patch)
treecda33288db40b7a4d67d3717588294e2b6bdd465 /inline.h
parent91e83b736b8e78a0acc723337e6e9a57ac548ef5 (diff)
downloadperl-e123187a1c7666bc2bf1eab209057d9d3fe83f66.tar.gz
utf8.c: Move a static function to inline.h
This is in preparation for it being called from outside utf8.c. It is renamed to have a leading underscore to emphasize its private nature
Diffstat (limited to 'inline.h')
-rw-r--r--inline.h31
1 files changed, 31 insertions, 0 deletions
diff --git a/inline.h b/inline.h
index 615c2e39aa..34d9b3b866 100644
--- a/inline.h
+++ b/inline.h
@@ -238,6 +238,37 @@ S_isALNUM_lazy(pTHX_ const char* p)
return isALNUM_lazy_if(p,1);
}
+/*
+Tests if the first C<len> bytes of string C<s> form a valid UTF-8
+character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC) character is a
+valid UTF-8 character. The number of bytes in the UTF-8 character
+will be returned if it is valid, otherwise 0.
+
+This is the "slow" version as opposed to the "fast" version which is
+the "unrolled" IS_UTF8_CHAR(). E.g. for t/uni/class.t the speed
+difference is a factor of 2 to 3. For lengths (UTF8SKIP(s)) of four
+or less you should use the IS_UTF8_CHAR(), for lengths of five or more
+you should use the _slow(). In practice this means that the _slow()
+will be used very rarely, since the maximum Unicode code point (as of
+Unicode 4.1) is U+10FFFF, which encodes in UTF-8 to four bytes. Only
+the "Perl extended UTF-8" (e.g, the infamous 'v-strings') will encode into
+five bytes or more.
+
+=cut */
+PERL_STATIC_INLINE STRLEN
+S__is_utf8_char_slow(const U8 *s, const STRLEN len)
+{
+ dTHX; /* The function called below requires thread context */
+
+ STRLEN actual_len;
+
+ PERL_ARGS_ASSERT__IS_UTF8_CHAR_SLOW;
+
+ utf8n_to_uvchr(s, len, &actual_len, UTF8_CHECK_ONLY);
+
+ return (actual_len == (STRLEN) -1) ? 0 : actual_len;
+}
+
/* ------------------------------- perl.h ----------------------------- */
/*