diff options
-rw-r--r-- | inline.h | 64 |
1 files changed, 41 insertions, 23 deletions
@@ -2457,8 +2457,8 @@ Perl_utf8n_to_uvchr_msgs(const U8 *s, const U8 * const s0 = s; const U8 * send = s0 + curlen; - UV uv = 0; /* The 0 silences some stupid compilers */ - UV state = 0; + UV type; + UV uv; PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS; @@ -2467,34 +2467,52 @@ Perl_utf8n_to_uvchr_msgs(const U8 *s, * Otherwise we call a helper function to figure out the more complicated * cases. */ - while (s < send && LIKELY(state != 1)) { - UV type = PL_strict_utf8_dfa_tab[*s]; + /* No calls from core pass in an empty string; non-core need a check */ + PERL_NON_CORE_CHECK_EMPTY(s, send); - uv = (state == 0) - ? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s)) - : UTF8_ACCUMULATE(uv, *s); - state = PL_strict_utf8_dfa_tab[256 + state + type]; + type = PL_strict_utf8_dfa_tab[*s]; - if (state != 0) { - s++; - continue; - } + /* The table is structured so that 'type' is 0 iff the input byte is + * represented identically regardless of the UTF-8ness of the string */ + if (type == 0) { /* UTF-8 invariants are returned unchanged */ + uv = *s; + } + else { + UV state = PL_strict_utf8_dfa_tab[256 + type]; + uv = (0xff >> type) & NATIVE_UTF8_TO_I8(*s); - if (retlen) { - *retlen = s - s0 + 1; - } - if (errors) { - *errors = 0; - } - if (msgs) { - *msgs = NULL; + while (++s < send) { + type = PL_strict_utf8_dfa_tab[*s]; + state = PL_strict_utf8_dfa_tab[256 + state + type]; + + uv = UTF8_ACCUMULATE(uv, *s); + + if (state == 0) { + goto success; + } + + if (UNLIKELY(state == 1)) { + break; + } } - return UNI_TO_NATIVE(uv); + /* Here is potentially problematic. Use the full mechanism */ + return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, + errors, msgs); + } + + success: + if (retlen) { + *retlen = s - s0 + 1; + } + if (errors) { + *errors = 0; + } + if (msgs) { + *msgs = NULL; } - /* Here is potentially problematic. Use the full mechanism */ - return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs); + return UNI_TO_NATIVE(uv); } PERL_STATIC_INLINE UV |