diff options
author | Karl Williamson <khw@cpan.org> | 2016-09-12 13:38:22 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-09-17 21:10:50 -0600 |
commit | e23e8bc1957a5981b8a507b62471ae38ec06c661 (patch) | |
tree | 6003733df6d2678d9722de250a532285604b5701 | |
parent | 2c6ed66c0652679e56178882f052322c3fe69a8f (diff) | |
download | perl-e23e8bc1957a5981b8a507b62471ae38ec06c661.tar.gz |
Add macro for determining if UTF-8 is Unicode-strict
-rw-r--r-- | ext/XS-APItest/APItest.xs | 7 | ||||
-rw-r--r-- | ext/XS-APItest/t/utf8.t | 54 | ||||
-rw-r--r-- | regcharclass.h | 2 | ||||
-rwxr-xr-x | regen/regcharclass.pl | 43 | ||||
-rw-r--r-- | utf8.h | 81 | ||||
-rw-r--r-- | utfebcdic.h | 148 |
6 files changed, 323 insertions, 12 deletions
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index b20206e5d6..c226defc85 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -5327,6 +5327,13 @@ test_isUTF8_CHAR(char *s, STRLEN len) OUTPUT: RETVAL +STRLEN +test_isSTRICT_UTF8_CHAR(char *s, STRLEN len) + CODE: + RETVAL = isSTRICT_UTF8_CHAR((U8 *) s, (U8 *) s + len); + OUTPUT: + RETVAL + IV test_is_utf8_valid_partial_char_flags(char *s, STRLEN len, U32 flags) CODE: diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index c909ebbfe9..acba778067 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -421,11 +421,15 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } $this_utf8_flags &= ~($UTF8_DISALLOW_ABOVE_31_BIT|$UTF8_WARN_ABOVE_31_BIT); } + + my $valid_under_strict = 1; if ($n > 0x10FFFF) { $this_utf8_flags &= ~($UTF8_DISALLOW_SUPER|$UTF8_WARN_SUPER); + $valid_under_strict = 0; } elsif (($n & 0xFFFE) == 0xFFFE) { $this_utf8_flags &= ~($UTF8_DISALLOW_NONCHAR|$UTF8_WARN_NONCHAR); + $valid_under_strict = 0; } undef @warnings; @@ -466,6 +470,29 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } undef @warnings; + $ret = test_isSTRICT_UTF8_CHAR($bytes, $len); + my $expected_len = ($valid_under_strict) ? $len : 0; + is($ret, $expected_len, "Verify isSTRICT_UTF8_CHAR($display_bytes) returns expected length: $expected_len"); + + unless (is(scalar @warnings, 0, + "Verify isSTRICT_UTF8_CHAR() for $hex_n generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + + undef @warnings; + + $ret = test_isSTRICT_UTF8_CHAR($bytes, $len - 1); + is($ret, 0, "Verify isSTRICT_UTF8_CHAR() with too short length parameter returns 0"); + + unless (is(scalar @warnings, 0, + "Verify isSTRICT_UTF8_CHAR() generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + + undef @warnings; + $ret_ref = test_valid_utf8_to_uvchr($bytes); is($ret_ref->[0], $n, "Verify valid_utf8_to_uvchr($display_bytes) returns $hex_n"); is($ret_ref->[1], $len, "Verify valid_utf8_to_uvchr() for $hex_n returns expected length: $len"); @@ -734,6 +761,14 @@ foreach my $test (@malformations) { diag "The warnings were: " . join(", ", @warnings); } + $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); + is($ret, 0, "$testname: isSTRICT_UTF8_CHAR returns 0"); + unless (is(scalar @warnings, 0, + "$testname: isSTRICT_UTF8_CHAR() generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + for my $j (1 .. $length - 1) { my $partial = substr($bytes, 0, $j); @@ -1240,6 +1275,25 @@ foreach my $test (@tests) { diag "The warnings were: " . join(", ", @warnings); } + undef @warnings; + $ret = test_isSTRICT_UTF8_CHAR($bytes, $length); + if ($will_overflow) { + is($ret, 0, "isSTRICT_UTF8_CHAR() $testname: returns 0"); + } + else { + my $expected_ret = ( $testname =~ /surrogate|non-character/ + || $allowed_uv > 0x10FFFF) + ? 0 + : $length; + is($ret, $expected_ret, + "isSTRICT_UTF8_CHAR() $testname: returns expected length: $expected_ret"); + } + unless (is(scalar @warnings, 0, + "isSTRICT_UTF8_CHAR() $testname: generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + # Test partial character handling, for each byte not a full character for my $j (1.. $length - 1) { diff --git a/regcharclass.h b/regcharclass.h index e0a021aa93..2daedcd3ac 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -1876,6 +1876,6 @@ * 5c7eb94310e2aaa15702fd6bed24ff0e7ab5448f9a8231d8c49ca96c9e941089 lib/unicore/mktables * cdecb300baad839a6f62791229f551a4fa33f3cbdca08e378dc976466354e778 lib/unicore/version * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl - * 1876ece914e2c14ed38c8a589adaa3d8193532c3a5bbe9ea5c3279bc9d29b279 regen/regcharclass.pl + * e3dc81163da3e92f7be01e9b953f6edb548eba93f1abb3d334e3b0469573c46d regen/regcharclass.pl * 393f8d882713a3ba227351ad0f00ea4839fda74fcf77dcd1cdf31519925adba5 regen/regcharclass_multi_char_folds.pl * ex: set ro: */ diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index bd677acd15..abc4942354 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -1660,6 +1660,49 @@ SURROGATE: Surrogate code points #=> UTF8 :no_length_checks only_ebcdic_platform #0xA0 - 0x1FFFFF +#STRICT_UTF8_CHAR: Matches legal Unicode UTF-8 variant code points, no surrrogates nor non-character code points +#=> UTF8 :no_length_checks only_ascii_platform +#0x0080 - 0xD7FF +#0xE000 - 0xFDCF +#0xFDF0 - 0xFFFD +#0x10000 - 0x1FFFD +#0x20000 - 0x2FFFD +#0x30000 - 0x3FFFD +#0x40000 - 0x4FFFD +#0x50000 - 0x5FFFD +#0x60000 - 0x6FFFD +#0x70000 - 0x7FFFD +#0x80000 - 0x8FFFD +#0x90000 - 0x9FFFD +#0xA0000 - 0xAFFFD +#0xB0000 - 0xBFFFD +#0xC0000 - 0xCFFFD +#0xD0000 - 0xDFFFD +#0xE0000 - 0xEFFFD +#0xF0000 - 0xFFFFD +#0x100000 - 0x10FFFD +# +#STRICT_UTF8_CHAR: Matches legal Unicode UTF-8 variant code points, no surrrogates nor non-character code points +#=> UTF8 :no_length_checks only_ebcdic_platform +#0x00A0 - 0xD7FF +#0xE000 - 0xFDCF +#0xFDF0 - 0xFFFD +#0x10000 - 0x1FFFD +#0x20000 - 0x2FFFD +#0x30000 - 0x3FFFD +#0x40000 - 0x4FFFD +#0x50000 - 0x5FFFD +#0x60000 - 0x6FFFD +#0x70000 - 0x7FFFD +#0x80000 - 0x8FFFD +#0x90000 - 0x9FFFD +#0xA0000 - 0xAFFFD +#0xB0000 - 0xBFFFD +#0xC0000 - 0xCFFFD +#0xD0000 - 0xDFFFD +#0xE0000 - 0xEFFFD +#0xF0000 - 0xFFFFD +#0x100000 - 0x10FFFD QUOTEMETA: Meta-characters that \Q should quote => high :fast @@ -334,6 +334,55 @@ C<cp> is Unicode if above 255; otherwise is platform-native. /* The above macro handles UTF-8 that has this start byte as the maximum */ #define _IS_UTF8_CHAR_HIGHEST_START_BYTE 0xF7 +/* A helper macro for isSTRICT_UTF8_CHAR, so use that one instead of this. + * Like is_UTF8_CHAR_utf8_no_length_checks(), this was moved here and LIKELYs + * added manually. + * + STRICT_UTF8_CHAR: Matches legal Unicode UTF-8 variant code points, no + surrrogates nor non-character code points +*/ +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ +( ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \ + ( LIKELY( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \ +: ( 0xE0 == ((U8*)s)[0] ) ? \ + ( LIKELY( ( ( ((U8*)s)[1] & 0xE0 ) == 0xA0 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ +: ( ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEC ) || 0xEE == ((U8*)s)[0] ) ?\ + ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ +: ( 0xED == ((U8*)s)[0] ) ? \ + ( LIKELY( ( ( ((U8*)s)[1] & 0xE0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ +: ( 0xEF == ((U8*)s)[0] ) ? \ + ( ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xB6 ) || ( 0xB8 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBE ) ) ?\ + ( LIKELY( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ? 3 : 0 ) \ + : ( 0xB7 == ((U8*)s)[1] ) ? \ + ( LIKELY( ( ((U8*)s)[2] & 0xF0 ) == 0x80 || ( ((U8*)s)[2] & 0xF0 ) == 0xB0 ) ? 3 : 0 )\ + : ( ( 0xBF == ((U8*)s)[1] ) && ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBD ) ) ? 3 : 0 )\ +: ( 0xF0 == ((U8*)s)[0] ) ? \ + ( ( ( 0x90 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9E ) || ( 0xA0 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xAE ) || ( 0xB0 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBE ) ) ?\ + ( LIKELY( ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ + : ( ((U8*)s)[1] == 0x9F || ( ( ((U8*)s)[1] & 0xEF ) == 0xAF ) ) ? \ + ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBE ) ? \ + ( LIKELY( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ? 4 : 0 ) \ + : LIKELY( ( 0xBF == ((U8*)s)[2] ) && ( 0x80 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBD ) ) ? 4 : 0 )\ + : 0 ) \ +: ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF3 ) ? \ + ( ( ( ( ((U8*)s)[1] & 0xC8 ) == 0x80 ) || ( ( ((U8*)s)[1] & 0xCC ) == 0x88 ) || ( ( ((U8*)s)[1] & 0xCE ) == 0x8C ) || ( ( ((U8*)s)[1] & 0xCF ) == 0x8E ) ) ?\ + ( LIKELY( ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ + : ( ( ((U8*)s)[1] & 0xCF ) == 0x8F ) ? \ + ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBE ) ? \ + ( LIKELY( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ? 4 : 0 ) \ + : LIKELY( ( 0xBF == ((U8*)s)[2] ) && ( 0x80 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBD ) ) ? 4 : 0 )\ + : 0 ) \ +: ( 0xF4 == ((U8*)s)[0] ) ? \ + ( ( 0x80 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x8E ) ? \ + ( LIKELY( ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ + : ( 0x8F == ((U8*)s)[1] ) ? \ + ( ( 0x80 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xBE ) ? \ + ( LIKELY( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ? 4 : 0 ) \ + : LIKELY( ( 0xBF == ((U8*)s)[2] ) && ( 0x80 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0xBD ) ) ? 4 : 0 )\ + : 0 ) \ +: 0 ) + #endif /* EBCDIC vs ASCII */ /* 2**UTF_ACCUMULATION_SHIFT - 1 */ @@ -885,9 +934,6 @@ point's representation. #define SHARP_S_SKIP 2 -/* If you want to exclude surrogates, and beyond legal Unicode, see the blame - * log for earlier versions which gave details for these */ - /* =for apidoc Am|STRLEN|isUTF8_CHAR|const U8 *s|const U8 *e @@ -928,6 +974,35 @@ is a valid UTF-8 character. #define is_utf8_char_buf(buf, buf_end) isUTF8_CHAR(buf, buf_end) +/* + +=for apidoc Am|STRLEN|isSTRICT_UTF8_CHAR|const U8 *s|const U8 *e + +Evaluates to non-zero if the first few bytes of the string starting at C<s> and +looking no further than S<C<e - 1>> are well-formed UTF-8 that represents some +Unicode code point completely acceptable for open interchange between all +applications; otherwise it evaluates to 0. If non-zero, the value gives how +many bytes starting at C<s> comprise the code point's representation. + +The largest acceptable code point is the Unicode maximum 0x10FFFF, and must not +be a surrogate nor a non-character code point. Thus this excludes any code +point from Perl's extended UTF-8. + +This is used to efficiently decide if the next few bytes in C<s> is +legal Unicode-acceptable UTF-8 for a single character. + +=cut +*/ + +#define isSTRICT_UTF8_CHAR(s, e) \ + (UNLIKELY((e) <= (s)) \ + ? 0 \ + : (UTF8_IS_INVARIANT(*s)) \ + ? 1 \ + : UNLIKELY(((e) - (s)) < UTF8SKIP(s)) \ + ? 0 \ + : is_STRICT_UTF8_CHAR_utf8_no_length_checks(s)) + /* Do not use; should be deprecated. Use isUTF8_CHAR() instead; this is * retained solely for backwards compatibility */ #define IS_UTF8_CHAR(p, n) (isUTF8_CHAR(p, (p) + (n)) == n) diff --git a/utfebcdic.h b/utfebcdic.h index 7d37fbccdb..b0e62ca11a 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -275,19 +275,21 @@ explicitly forbidden, and the shortest possible encoding should always be used # define HIGHEST_REPRESENTABLE_UTF8 "\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA3\xBF\xBF\xBF\xBF\xBF\xBF" #endif -/* A helper macro for isUTF8_CHAR, so use that one instead of this. This was - * generated by regen/regcharclass.pl, and then moved here. Then it was +/* Helper macros for isUTF8_CHAR_foo, so use those instead of this. These were + * generated by regen/regcharclass.pl, and then moved here. Then they were * hand-edited to add some LIKELY() calls, presuming that malformations are * unlikely. The lines that generated it were then commented out. This was * done because it takes on the order of 10 minutes to generate, and is never - * going to change, unless the generated code is improved, and figuring out - * the LIKELYs there would be hard. + * going to change, unless the generated code is improved, and figuring out the + * LIKELYs there would be hard. * - UTF8_CHAR: Matches legal UTF-EBCDIC variant code points up through 0x1FFFFFF + */ + +#if '^' == 95 /* CP 1047 */ +/* UTF8_CHAR: Matches legal UTF-EBCDIC variant code points up through 0x1FFFFFF 0xA0 - 0x1FFFFF */ -#if '^' == 95 /* CP 1047 */ /*** GENERATED CODE ***/ #define is_UTF8_CHAR_utf8_no_length_checks(s) \ @@ -303,6 +305,72 @@ explicitly forbidden, and the shortest possible encoding should always be used ( LIKELY( ( ( ( ( 0x49 == ((U8*)s)[1] || 0x4A == ((U8*)s)[1] ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ : ( ( ( ( ( 0xEE == ((U8*)s)[0] ) && LIKELY( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) ) && LIKELY( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) ) && LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) && LIKELY( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 ) +/* UTF8_CHAR_STRICT: Matches legal Unicode UTF-8 variant code points, no + surrrogates nor non-character code points */ +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) \ +( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ + : ( 0x73 == ((U8*)s)[1] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ? 4 : 0 )\ + : LIKELY( ( 0x73 == ((U8*)s)[2] ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFE ) == 0x70 ) ) ? 4 : 0 )\ + : 0 ) + + +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) \ +( ( 0xED == ((U8*)s)[0] ) ? \ + ( ( ( ( ((U8*)s)[1] & 0xEF ) == 0x49 ) || ( ( ((U8*)s)[1] & 0xF9 ) == 0x51 ) || ((U8*)s)[1] == 0x63 || ( ( ((U8*)s)[1] & 0xFD ) == 0x65 ) || ((U8*)s)[1] == 0x69 || ( ( ((U8*)s)[1] & 0xFD ) == 0x70 ) ) ?\ + ( LIKELY( ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ + : ( ((U8*)s)[1] == 0x4A || ((U8*)s)[1] == 0x52 || ( ( ((U8*)s)[1] & 0xFD ) == 0x54 ) || ((U8*)s)[1] == 0x58 || ((U8*)s)[1] == 0x62 || ( ( ((U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((U8*)s)[1] & 0xFD ) == 0x68 ) || ( ( ((U8*)s)[1] & 0xFD ) == 0x71 ) ) ?\ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ + : ( 0x73 == ((U8*)s)[2] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ? 5 : 0 )\ + : LIKELY( ( 0x73 == ((U8*)s)[3] ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFE ) == 0x70 ) ) ? 5 : 0 )\ + : 0 ) \ + : 0 ) \ +: ( 0xEE == ((U8*)s)[0] ) ? \ + ( ( 0x41 == ((U8*)s)[1] ) ? \ + ( LIKELY( ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ + : ( 0x42 == ((U8*)s)[1] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ) ? 5 : 0 )\ + : ( 0x73 == ((U8*)s)[2] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFC ) == 0x70 ) ? 5 : 0 )\ + : LIKELY( ( 0x73 == ((U8*)s)[3] ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( ((U8*)s)[4] & 0xFE ) == 0x70 ) ) ? 5 : 0 )\ + : 0 ) \ + : 0 ) \ +: 0 ) + + +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ +( ( 0x80 == ((U8*)s)[0] || ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xAC ) || ( 0xAE <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB6 ) ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) ? 2 : 0 )\ +: ( ( ( ((U8*)s)[0] & 0xFC ) == 0xB8 ) || ((U8*)s)[0] == 0xBC || ( ( ((U8*)s)[0] & 0xFE ) == 0xBE ) || ( ( ((U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) ) ? 3 : 0 )\ +: ( 0xDC == ((U8*)s)[0] ) ? \ + ( LIKELY( ( ( ( 0x57 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ +: ( 0xDD == ((U8*)s)[0] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x64 ) || ( 0x67 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ + : ( 0x73 == ((U8*)s)[1] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x54 ) || ( 0x57 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ? 4 : 0 )\ + : ( 0x55 == ((U8*)s)[2] ) ? \ + ( LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x56 ) ) ? 4 : 0 )\ + : ( 0x56 == ((U8*)s)[2] ) ? \ + ( LIKELY( ( 0x57 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ? 4 : 0 )\ + : LIKELY( ( 0x73 == ((U8*)s)[2] ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFE ) == 0x70 ) ) ? 4 : 0 )\ + : 0 ) \ +: ( 0xDE == ((U8*)s)[0] || 0xE1 == ((U8*)s)[0] || 0xEB == ((U8*)s)[0] ) ? \ + ( LIKELY( ( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFC ) == 0x70 ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFC ) == 0x70 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFC ) == 0x70 ) ) ? 4 : 0 )\ +: ( 0xDF == ((U8*)s)[0] || 0xEA == ((U8*)s)[0] || 0xEC == ((U8*)s)[0] ) ? is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) : is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) ) + #endif #if '^' == 176 /* CP 037 */ @@ -321,10 +389,74 @@ explicitly forbidden, and the shortest possible encoding should always be used ( LIKELY( ( ( ( ( 0x49 == ((U8*)s)[1] || 0x4A == ((U8*)s)[1] ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ : ( ( ( ( ( 0xEE == ((U8*)s)[0] ) && LIKELY( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) ) && LIKELY( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ) && LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) && LIKELY( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 ) +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) \ +( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ + : ( 0x72 == ((U8*)s)[1] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ? 4 : 0 )\ + : LIKELY( ( 0x72 == ((U8*)s)[2] ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || 0x70 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : 0 ) + + +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) \ +( ( 0xED == ((U8*)s)[0] ) ? \ + ( ( ( ( ((U8*)s)[1] & 0xEF ) == 0x49 ) || ( ( ((U8*)s)[1] & 0xF9 ) == 0x51 ) || ((U8*)s)[1] == 0x62 || ( ( ((U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((U8*)s)[1] & 0xFD ) == 0x68 ) || ((U8*)s)[1] == 0x71 ) ?\ + ( LIKELY( ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ + : ( ((U8*)s)[1] == 0x4A || ((U8*)s)[1] == 0x52 || ( ( ((U8*)s)[1] & 0xFD ) == 0x54 ) || ((U8*)s)[1] == 0x58 || ((U8*)s)[1] == 0x5F || ((U8*)s)[1] == 0x63 || ( ( ((U8*)s)[1] & 0xFD ) == 0x65 ) || ((U8*)s)[1] == 0x69 || ( ( ((U8*)s)[1] & 0xFD ) == 0x70 ) ) ?\ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ + : ( 0x72 == ((U8*)s)[2] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ? 5 : 0 )\ + : LIKELY( ( 0x72 == ((U8*)s)[3] ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || 0x70 == ((U8*)s)[4] ) ) ? 5 : 0 )\ + : 0 ) \ + : 0 ) \ +: ( 0xEE == ((U8*)s)[0] ) ? \ + ( ( 0x41 == ((U8*)s)[1] ) ? \ + ( LIKELY( ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ + : ( 0x42 == ((U8*)s)[1] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ) ? 5 : 0 )\ + : ( 0x72 == ((U8*)s)[2] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( ((U8*)s)[3] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || ( 0x70 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x72 ) ) ? 5 : 0 )\ + : LIKELY( ( 0x72 == ((U8*)s)[3] ) && ( ( 0x41 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x4A ) || ( 0x51 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x59 ) || 0x5F == ((U8*)s)[4] || ( 0x62 <= ((U8*)s)[4] && ((U8*)s)[4] <= 0x6A ) || 0x70 == ((U8*)s)[4] ) ) ? 5 : 0 )\ + : 0 ) \ + : 0 ) \ +: 0 ) + + +/*** GENERATED CODE ***/ +#define is_STRICT_UTF8_CHAR_utf8_no_length_checks(s) \ +( ( 0x78 == ((U8*)s)[0] || 0x80 == ((U8*)s)[0] || ( 0x8A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x90 ) || ( 0x9A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xA0 ) || ( 0xAA <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xAF ) || ( 0xB1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xB5 ) ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) ? 2 : 0 )\ +: ( ((U8*)s)[0] == 0xB7 || ( ( ((U8*)s)[0] & 0xFE ) == 0xB8 ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xBC ) || ( ( ((U8*)s)[0] & 0xEE ) == 0xCA ) || ( ( ((U8*)s)[0] & 0xFC ) == 0xCC ) ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ) ? 3 : 0 )\ +: ( 0xDC == ((U8*)s)[0] ) ? \ + ( LIKELY( ( ( ( 0x57 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ +: ( 0xDD == ((U8*)s)[0] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( ((U8*)s)[1] & 0xFE ) == 0x62 || ( 0x66 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( ((U8*)s)[1] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ + : ( 0x72 == ((U8*)s)[1] ) ? \ + ( ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x54 ) || ( 0x57 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( ((U8*)s)[2] & 0xFE ) == 0x70 ) ?\ + ( LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ? 4 : 0 )\ + : ( 0x55 == ((U8*)s)[2] ) ? \ + ( LIKELY( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x56 ) ) ? 4 : 0 )\ + : ( 0x56 == ((U8*)s)[2] ) ? \ + ( LIKELY( ( 0x57 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ? 4 : 0 )\ + : LIKELY( ( 0x72 == ((U8*)s)[2] ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || 0x70 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : 0 ) \ +: ( 0xDE == ((U8*)s)[0] || 0xE1 == ((U8*)s)[0] || 0xEB == ((U8*)s)[0] ) ? \ + ( LIKELY( ( ( ( 0x41 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x4A ) || ( 0x51 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x59 ) || 0x5F == ((U8*)s)[1] || ( 0x62 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x6A ) || ( 0x70 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x72 ) ) && ( ( 0x41 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x4A ) || ( 0x51 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x59 ) || 0x5F == ((U8*)s)[2] || ( 0x62 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x6A ) || ( 0x70 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x72 ) ) ) && ( ( 0x41 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x4A ) || ( 0x51 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x59 ) || 0x5F == ((U8*)s)[3] || ( 0x62 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x6A ) || ( 0x70 <= ((U8*)s)[3] && ((U8*)s)[3] <= 0x72 ) ) ) ? 4 : 0 )\ +: ( 0xDF == ((U8*)s)[0] || 0xEA == ((U8*)s)[0] || 0xEC == ((U8*)s)[0] ) ? is_STRICT_UTF8_CHAR_utf8_no_length_checks_part0(s) : is_STRICT_UTF8_CHAR_utf8_no_length_checks_part1(s) ) + #endif -/* The above macro in both code pages handles UTF-8 that has this start byte - * (expressed in I8) as the maximum */ +/* is_UTF8_CHAR_utf8_no_length_checks() in both code pages handles UTF-8 that + * has this start byte (expressed in I8) as the maximum */ #define _IS_UTF8_CHAR_HIGHEST_START_BYTE 0xF9 /* |