diff options
author | Karl Williamson <khw@cpan.org> | 2021-06-30 13:01:49 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-08-07 05:59:15 -0600 |
commit | e1a9b7adc32d702958dc07123a3e3ee55af05ad1 (patch) | |
tree | 1667773e9b957193a642d30b6fa46712061b9107 /regcharclass.h | |
parent | 42b360b2e07dd1c42764c476a72cc282a4400ce9 (diff) | |
download | perl-e1a9b7adc32d702958dc07123a3e3ee55af05ad1.tar.gz |
regcharclass.pl: Add fast surrogate UTF-8 trie
This will be used in the next commit. It requires only the first two
bytes to determine if a UTF-8 or UTF-EBCDIC sequence is for a surrogate
Diffstat (limited to 'regcharclass.h')
-rw-r--r-- | regcharclass.h | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/regcharclass.h b/regcharclass.h index df0a654b4a..ce290b7975 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -220,6 +220,10 @@ \p{_Perl_Surrogate} */ /*** GENERATED CODE ***/ +#define is_SURROGATE_utf8(s) \ +( ( ( 0xED == ((const U8*)s)[0] ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0xA0, 0xBF) ) ) ? 3 : 0 ) + +/*** GENERATED CODE ***/ #define is_SURROGATE_utf8_safe(s,e) \ ( ( ( ( ( ((e) - (s)) >= 3 ) && ( 0xED == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0xA0, 0xBF) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x80, 0xBF) ) ) ? 3 : 0 ) @@ -1451,6 +1455,10 @@ \p{_Perl_Surrogate} */ /*** GENERATED CODE ***/ +#define is_SURROGATE_utf8(s) \ +( ( ( 0xDD == ((const U8*)s)[0] ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x65, 0x66) ) ) ? 4 : 0 ) + +/*** GENERATED CODE ***/ #define is_SURROGATE_utf8_safe(s,e) \ ( ( ( ( ( ( ((e) - (s)) >= 4 ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x65, 0x66) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xBF) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xBF) ) ) ? 4 : 0 ) @@ -2676,6 +2684,10 @@ \p{_Perl_Surrogate} */ /*** GENERATED CODE ***/ +#define is_SURROGATE_utf8(s) \ +( ( ( 0xDD == ((const U8*)s)[0] ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x64, 0x65) ) ) ? 4 : 0 ) + +/*** GENERATED CODE ***/ #define is_SURROGATE_utf8_safe(s,e) \ ( ( ( ( ( ( ((e) - (s)) >= 4 ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x64, 0x65) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xBF) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xBF) ) ) ? 4 : 0 ) @@ -3753,6 +3765,6 @@ * 696e706fddd3ce8cd48c7ea91caf4c9edf5c296432d320aa7b78631f69aa9eac lib/unicore/mktables * 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl - * ca1cae2ae68045dcfa7761a0b8d27399269f3dc395da5735ec4efbf4077c4dd0 regen/regcharclass.pl + * 3fb6bafb4c830dd501868e34f550cdad3bf8d2c9eed44756488f36c484969417 regen/regcharclass.pl * b2f896452d2b30da3e04800f478c60c1fd0b03d6b668689b020f1e3cf1f1cdd9 regen/regcharclass_multi_char_folds.pl * ex: set ro: */ |