summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-06-30 13:45:12 -0600
committerKarl Williamson <khw@cpan.org>2021-08-14 06:47:43 -0600
commitd4bf6b07402c770d61a5f8692f24fe944655d99f (patch)
tree3f3bfdf3f054385543622303c3dde9873688e3f8
parent8c87dbc398d8be8921f2a003819c770a791d3432 (diff)
downloadperl-d4bf6b07402c770d61a5f8692f24fe944655d99f.tar.gz
utf8.c: Rmv an EBCDIC dependency
This is now generated by regcharclass.pl
-rw-r--r--regcharclass.h2
-rwxr-xr-xregen/regcharclass.pl2
-rw-r--r--utf8.c11
3 files changed, 5 insertions, 10 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 4d2b3eaf3c..fa72120616 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -3765,6 +3765,6 @@
* d0fe91c50f5e6ca796f9a7dbe273c95e44ddd50d8e840364e925baf255f9bb00 lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
- * 3fb6bafb4c830dd501868e34f550cdad3bf8d2c9eed44756488f36c484969417 regen/regcharclass.pl
+ * 1aa94679c695efd507b7e4491629dba1021b74c21a5324dfd3a582a5d654bd32 regen/regcharclass.pl
* b2f896452d2b30da3e04800f478c60c1fd0b03d6b668689b020f1e3cf1f1cdd9 regen/regcharclass_multi_char_folds.pl
* ex: set ro: */
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 890dd23208..32460b72b9 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -1805,6 +1805,8 @@ LARGER_NON_CHARS: # 5 bytes
0xFFFFE - 0xFFFFF
0x10FFFE - 0x10FFFF
+# Note that code in utf8.c is counting on the 'fast' version to look at no
+# more than two bytes
SURROGATE: Surrogate code points
=> UTF8 :safe fast
\p{_Perl_Surrogate}
diff --git a/utf8.c b/utf8.c
index 12568591a4..6df64810d5 100644
--- a/utf8.c
+++ b/utf8.c
@@ -863,13 +863,9 @@ Perl_is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
# define FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER 0xFA
# define IS_UTF8_2_BYTE_SUPER(s0, s1) ((s0) == 0xF9 && (s1) >= 0xA2)
-# define IS_UTF8_2_BYTE_SURROGATE(s0, s1) ((s0) == 0xF1 \
- /* B6 and B7 */ \
- && ((s1) & 0xFE ) == 0xB6)
#else
# define FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER 0xF5
# define IS_UTF8_2_BYTE_SUPER(s0, s1) ((s0) == 0xF4 && (s1) >= 0x90)
-# define IS_UTF8_2_BYTE_SURROGATE(s0, s1) ((s0) == 0xED && (s1) >= 0xA0)
#endif
if ( (flags & UTF8_DISALLOW_SUPER)
@@ -894,7 +890,7 @@ Perl_is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags)
}
if ( (flags & UTF8_DISALLOW_SURROGATE)
- && UNLIKELY(IS_UTF8_2_BYTE_SURROGATE(s0, s1)))
+ && UNLIKELY(is_SURROGATE_utf8(s)))
{
return 0; /* Surrogate */
}
@@ -1748,10 +1744,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s,
{
possible_problems |= UTF8_GOT_SUPER;
}
- else if (UNLIKELY(IS_UTF8_2_BYTE_SURROGATE(
- NATIVE_UTF8_TO_I8(*adjusted_s0),
- NATIVE_UTF8_TO_I8(*(adjusted_s0 + 1)))))
- {
+ else if (UNLIKELY(is_SURROGATE_utf8(adjusted_s0))) {
possible_problems |= UTF8_GOT_SURROGATE;
}
}