diff options
author | Karl Williamson <khw@cpan.org> | 2014-05-05 19:37:58 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-05-31 11:37:24 -0600 |
commit | e123187a1c7666bc2bf1eab209057d9d3fe83f66 (patch) | |
tree | cda33288db40b7a4d67d3717588294e2b6bdd465 /inline.h | |
parent | 91e83b736b8e78a0acc723337e6e9a57ac548ef5 (diff) | |
download | perl-e123187a1c7666bc2bf1eab209057d9d3fe83f66.tar.gz |
utf8.c: Move a static function to inline.h
This is in preparation for it being called from outside utf8.c. It is
renamed to have a leading underscore to emphasize its private nature
Diffstat (limited to 'inline.h')
-rw-r--r-- | inline.h | 31 |
1 files changed, 31 insertions, 0 deletions
@@ -238,6 +238,37 @@ S_isALNUM_lazy(pTHX_ const char* p) return isALNUM_lazy_if(p,1); } +/* +Tests if the first C<len> bytes of string C<s> form a valid UTF-8 +character. Note that an INVARIANT (i.e. ASCII on non-EBCDIC) character is a +valid UTF-8 character. The number of bytes in the UTF-8 character +will be returned if it is valid, otherwise 0. + +This is the "slow" version as opposed to the "fast" version which is +the "unrolled" IS_UTF8_CHAR(). E.g. for t/uni/class.t the speed +difference is a factor of 2 to 3. For lengths (UTF8SKIP(s)) of four +or less you should use the IS_UTF8_CHAR(), for lengths of five or more +you should use the _slow(). In practice this means that the _slow() +will be used very rarely, since the maximum Unicode code point (as of +Unicode 4.1) is U+10FFFF, which encodes in UTF-8 to four bytes. Only +the "Perl extended UTF-8" (e.g, the infamous 'v-strings') will encode into +five bytes or more. + +=cut */ +PERL_STATIC_INLINE STRLEN +S__is_utf8_char_slow(const U8 *s, const STRLEN len) +{ + dTHX; /* The function called below requires thread context */ + + STRLEN actual_len; + + PERL_ARGS_ASSERT__IS_UTF8_CHAR_SLOW; + + utf8n_to_uvchr(s, len, &actual_len, UTF8_CHECK_ONLY); + + return (actual_len == (STRLEN) -1) ? 0 : actual_len; +} + /* ------------------------------- perl.h ----------------------------- */ /* |