summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--inline.h64
1 files changed, 41 insertions, 23 deletions
diff --git a/inline.h b/inline.h
index b920cdf0f8..7ff7d799a2 100644
--- a/inline.h
+++ b/inline.h
@@ -2457,8 +2457,8 @@ Perl_utf8n_to_uvchr_msgs(const U8 *s,
const U8 * const s0 = s;
const U8 * send = s0 + curlen;
- UV uv = 0; /* The 0 silences some stupid compilers */
- UV state = 0;
+ UV type;
+ UV uv;
PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS;
@@ -2467,34 +2467,52 @@ Perl_utf8n_to_uvchr_msgs(const U8 *s,
* Otherwise we call a helper function to figure out the more complicated
* cases. */
- while (s < send && LIKELY(state != 1)) {
- UV type = PL_strict_utf8_dfa_tab[*s];
+ /* No calls from core pass in an empty string; non-core need a check */
+ PERL_NON_CORE_CHECK_EMPTY(s, send);
- uv = (state == 0)
- ? ((0xff >> type) & NATIVE_UTF8_TO_I8(*s))
- : UTF8_ACCUMULATE(uv, *s);
- state = PL_strict_utf8_dfa_tab[256 + state + type];
+ type = PL_strict_utf8_dfa_tab[*s];
- if (state != 0) {
- s++;
- continue;
- }
+ /* The table is structured so that 'type' is 0 iff the input byte is
+ * represented identically regardless of the UTF-8ness of the string */
+ if (type == 0) { /* UTF-8 invariants are returned unchanged */
+ uv = *s;
+ }
+ else {
+ UV state = PL_strict_utf8_dfa_tab[256 + type];
+ uv = (0xff >> type) & NATIVE_UTF8_TO_I8(*s);
- if (retlen) {
- *retlen = s - s0 + 1;
- }
- if (errors) {
- *errors = 0;
- }
- if (msgs) {
- *msgs = NULL;
+ while (++s < send) {
+ type = PL_strict_utf8_dfa_tab[*s];
+ state = PL_strict_utf8_dfa_tab[256 + state + type];
+
+ uv = UTF8_ACCUMULATE(uv, *s);
+
+ if (state == 0) {
+ goto success;
+ }
+
+ if (UNLIKELY(state == 1)) {
+ break;
+ }
}
- return UNI_TO_NATIVE(uv);
+ /* Here is potentially problematic. Use the full mechanism */
+ return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags,
+ errors, msgs);
+ }
+
+ success:
+ if (retlen) {
+ *retlen = s - s0 + 1;
+ }
+ if (errors) {
+ *errors = 0;
+ }
+ if (msgs) {
+ *msgs = NULL;
}
- /* Here is potentially problematic. Use the full mechanism */
- return _utf8n_to_uvchr_msgs_helper(s0, curlen, retlen, flags, errors, msgs);
+ return UNI_TO_NATIVE(uv);
}
PERL_STATIC_INLINE UV