summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-07-01 18:47:45 -0600
committerKarl Williamson <khw@cpan.org>2021-08-14 06:47:43 -0600
commit22afef87083fc7ad1b066588f5c20637fd387805 (patch)
tree74f540e111eb7fbc989150361b1b375861af608a
parent2e5a4e5adeeaafbc8743371293762d71bf27ea15 (diff)
downloadperl-22afef87083fc7ad1b066588f5c20637fd387805.tar.gz
is_utf8_valid_partial_char_flags: Use DFA
The DFA macro for determining if a sequence is valid UTF-8 was deliberately made general enough to accommodate this use-case, in which only a partial character is acceptable. Change the code to use the DFA. The helper function's name is changed to indicate it is private
-rw-r--r--embed.fnc4
-rw-r--r--inline.h39
-rw-r--r--proto.h6
3 files changed, 39 insertions, 10 deletions
diff --git a/embed.fnc b/embed.fnc
index 093de67e06..b4fe575f6b 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1203,9 +1203,9 @@ ATidp |bool |is_utf8_fixed_width_buf_loclen_flags \
|NN const U8 * const s|STRLEN len \
|NULLOK const U8 **ep|NULLOK STRLEN *el|const U32 flags
AmTdP |bool |is_utf8_valid_partial_char \
- |NN const U8 * const s|NN const U8 * const e
+ |NN const U8 * const s0|NN const U8 * const e
ATidRp |bool |is_utf8_valid_partial_char_flags \
- |NN const U8 * const s|NN const U8 * const e|const U32 flags
+ |NN const U8 * const s0|NN const U8 * const e|const U32 flags
CpR |bool |_is_uni_FOO|const U8 classnum|const UV c
CpR |bool |_is_utf8_FOO|const U8 classnum|NN const U8 *p \
|NN const U8 * const e
diff --git a/inline.h b/inline.h
index 7b13be02b6..6c5b1bda46 100644
--- a/inline.h
+++ b/inline.h
@@ -2217,18 +2217,47 @@ determined from just the first one or two bytes.
*/
PERL_STATIC_INLINE bool
-Perl_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
+Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
{
PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS;
-
assert(0 == (flags & ~(UTF8_DISALLOW_ILLEGAL_INTERCHANGE
|UTF8_DISALLOW_PERL_EXTENDED)));
- if (s >= e || s + UTF8SKIP(s) <= e) {
- return FALSE;
+ PERL_IS_UTF8_CHAR_DFA(s0, e, PL_extended_utf8_dfa_tab,
+ DFA_RETURN_FAILURE_,
+ DFA_TEASE_APART_FF_,
+ NOOP);
+
+ /* The NOOP above causes the DFA to drop down here iff the input was a
+ * partial character. flags=0 => can return TRUE immediately; otherwise we
+ * need to check (not inline) if the partial character is the beginning of
+ * a disallowed one */
+ if (flags == 0) {
+ return TRUE;
+ }
+
+ return cBOOL(is_utf8_char_helper(s0, e, flags));
+
+#ifdef HAS_EXTRA_LONG_UTF8
+
+ tease_apart_FF:
+
+ /* Getting here means the input is either malformed, or, in the case of
+ * PL_extended_utf8_dfa_tab, was for the largest possible start byte. The
+ * latter case has to be extended UTF-8, so can fail immediately if that is
+ * forbidden */
+
+ if ( *s0 != I8_TO_NATIVE_UTF8(0xFF)
+ || (flags & (UTF8_DISALLOW_SUPER|UTF8_DISALLOW_PERL_EXTENDED)))
+ {
+ return 0;
}
- return cBOOL(is_utf8_char_helper(s, e, flags));
+ return is_utf8_FF_helper_(s0, e,
+ TRUE /* Require to be a partial character */
+ );
+#endif
+
}
/*
diff --git a/proto.h b/proto.h
index ee194e7abc..4dd2c192e0 100644
--- a/proto.h
+++ b/proto.h
@@ -1780,16 +1780,16 @@ PERL_STATIC_INLINE bool Perl_is_utf8_string_loclen_flags(const U8 *s, STRLEN len
#define PERL_ARGS_ASSERT_IS_UTF8_STRING_LOCLEN_FLAGS \
assert(s)
#endif
-/* PERL_CALLCONV bool is_utf8_valid_partial_char(const U8 * const s, const U8 * const e)
+/* PERL_CALLCONV bool is_utf8_valid_partial_char(const U8 * const s0, const U8 * const e)
__attribute__warn_unused_result__
__attribute__pure__; */
#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR
#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE bool Perl_is_utf8_valid_partial_char_flags(const U8 * const s, const U8 * const e, const U32 flags)
+PERL_STATIC_INLINE bool Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \
- assert(s); assert(e)
+ assert(s0); assert(e)
#endif
PERL_CALLCONV bool Perl_isinfnan(NV nv)