diff options
author | Karl Williamson <khw@cpan.org> | 2021-06-30 13:45:12 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-08-14 06:47:43 -0600 |
commit | d4bf6b07402c770d61a5f8692f24fe944655d99f (patch) | |
tree | 3f3bfdf3f054385543622303c3dde9873688e3f8 | |
parent | 8c87dbc398d8be8921f2a003819c770a791d3432 (diff) | |
download | perl-d4bf6b07402c770d61a5f8692f24fe944655d99f.tar.gz |
utf8.c: Rmv an EBCDIC dependency
This is now generated by regcharclass.pl
-rw-r--r-- | regcharclass.h | 2 | ||||
-rwxr-xr-x | regen/regcharclass.pl | 2 | ||||
-rw-r--r-- | utf8.c | 11 |
3 files changed, 5 insertions, 10 deletions
diff --git a/regcharclass.h b/regcharclass.h index 4d2b3eaf3c..fa72120616 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -3765,6 +3765,6 @@ * d0fe91c50f5e6ca796f9a7dbe273c95e44ddd50d8e840364e925baf255f9bb00 lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl - * 3fb6bafb4c830dd501868e34f550cdad3bf8d2c9eed44756488f36c484969417 regen/regcharclass.pl + * 1aa94679c695efd507b7e4491629dba1021b74c21a5324dfd3a582a5d654bd32 regen/regcharclass.pl * b2f896452d2b30da3e04800f478c60c1fd0b03d6b668689b020f1e3cf1f1cdd9 regen/regcharclass_multi_char_folds.pl * ex: set ro: */ diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 890dd23208..32460b72b9 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1805,6 +1805,8 @@ LARGER_NON_CHARS: # 5 bytes 0xFFFFE - 0xFFFFF 0x10FFFE - 0x10FFFF +# Note that code in utf8.c is counting on the 'fast' version to look at no +# more than two bytes SURROGATE: Surrogate code points => UTF8 :safe fast \p{_Perl_Surrogate} @@ -863,13 +863,9 @@ Perl_is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags) # define FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER 0xFA # define IS_UTF8_2_BYTE_SUPER(s0, s1) ((s0) == 0xF9 && (s1) >= 0xA2) -# define IS_UTF8_2_BYTE_SURROGATE(s0, s1) ((s0) == 0xF1 \ - /* B6 and B7 */ \ - && ((s1) & 0xFE ) == 0xB6) #else # define FIRST_START_BYTE_THAT_IS_DEFINITELY_SUPER 0xF5 # define IS_UTF8_2_BYTE_SUPER(s0, s1) ((s0) == 0xF4 && (s1) >= 0x90) -# define IS_UTF8_2_BYTE_SURROGATE(s0, s1) ((s0) == 0xED && (s1) >= 0xA0) #endif if ( (flags & UTF8_DISALLOW_SUPER) @@ -894,7 +890,7 @@ Perl_is_utf8_char_helper(const U8 * const s, const U8 * e, const U32 flags) } if ( (flags & UTF8_DISALLOW_SURROGATE) - && UNLIKELY(IS_UTF8_2_BYTE_SURROGATE(s0, s1))) + && UNLIKELY(is_SURROGATE_utf8(s))) { return 0; /* Surrogate */ } @@ -1748,10 +1744,7 @@ Perl__utf8n_to_uvchr_msgs_helper(const U8 *s, { possible_problems |= UTF8_GOT_SUPER; } - else if (UNLIKELY(IS_UTF8_2_BYTE_SURROGATE( - NATIVE_UTF8_TO_I8(*adjusted_s0), - NATIVE_UTF8_TO_I8(*(adjusted_s0 + 1))))) - { + else if (UNLIKELY(is_SURROGATE_utf8(adjusted_s0))) { possible_problems |= UTF8_GOT_SURROGATE; } } |