summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcharclass.h290
-rwxr-xr-xregen/regcharclass.pl133
2 files changed, 240 insertions, 183 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 4a138f7a56..0bcf455f97 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -1313,7 +1313,7 @@
( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
: ( 0xCA == ((const U8*)s)[0] ) ? \
( ( 0x41 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xAA) ) ? 3 : 0 )\
: ( 0x42 == ((const U8*)s)[1] ) ? \
( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
: ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
@@ -1347,7 +1347,7 @@
/*** GENERATED CODE ***/
#define is_XDIGIT_high(s) \
( ( ( 0xDD == ((const U8*)s)[0] ) && ( 0x73 == ((const U8*)s)[1] ) ) ? ( ( 0x67 == ((const U8*)s)[2] ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x68) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xB9) ) ? 4 : 0 )\
: ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x68, 0x69) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x42, 0x47) ) ) ? 4 : 0 ) : 0 )
/*** GENERATED CODE ***/
@@ -1366,7 +1366,7 @@
( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
: ( 0xCA == ((const U8*)s)[0] ) ? \
( ( 0x41 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xAA) ) ? 3 : 0 )\
: ( 0x42 == ((const U8*)s)[1] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x49, 0x4A) || 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )\
: ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
@@ -1390,15 +1390,15 @@
( ( ( LIKELY((e) > (s)) ) && ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ) ? ( ( 0xDD == ((const U8*)s)[0] ) ?\
( ( 0x73 == ((const U8*)s)[1] ) ? \
( ( 0x55 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x73) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xBF) ) ? 4 : 0 )\
: ( 0x56 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xAF) ) ? 4 : 0 )\
: ( ( 0x73 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )\
: 0 ) \
- : ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) ?\
+ : ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) == 0xF3 || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xFD ) == 0xF5 ) ) ?\
( ( ( ( 0x73 == ((const U8*)s)[1] ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )\
: ( 0xED == ((const U8*)s)[0] ) ? \
- ( ( ( ( ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x62 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x68 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x71 ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )\
+ ( ( ( ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF9 ) == 0xA9 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF1 ) == 0xB1 ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )\
: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 ) : 0 )
/*
@@ -1415,12 +1415,12 @@
( ( 0xDD == ((const U8*)s)[0] ) ? \
( ( 0x73 == ((const U8*)s)[1] ) ? \
( ( 0x55 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x73) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xBF) ) ? 4 : 0 )\
: ( 0x56 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xAF) ) ? 4 : 0 )\
: ( ( 0x73 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )\
: 0 ) \
-: ( ( ( ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) && ( 0x73 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )
+: ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) == 0xF3 || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xFD ) == 0xF5 ) ) && ( 0x73 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ) ? 4 : 0 )
/*
LARGER_NON_CHARS: # 5 bytes
@@ -1442,7 +1442,7 @@
/*** GENERATED CODE ***/
#define is_LARGER_NON_CHARS_utf8(s) \
( ( 0xED == ((const U8*)s)[0] ) ? \
- ( ( ( ( ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x62 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x64 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x68 ) || ( ( ((const U8*)s)[1] & 0xFD ) == 0x71 ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )\
+ ( ( ( ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF9 ) == 0xA9 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF1 ) == 0xB1 ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )\
: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x73 == ((const U8*)s)[2] ) ) && ( 0x73 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x72, 0x73) ) ) ? 5 : 0 )
/*
@@ -1452,7 +1452,7 @@
*/
/*** GENERATED CODE ***/
#define is_SURROGATE_utf8_safe(s,e) \
-( ( ( ( ( ( ((e) - (s)) >= 4 ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x65, 0x66) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x73) ) ) ? 4 : 0 )
+( ( ( ( ( ( ((e) - (s)) >= 4 ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x65, 0x66) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xBF) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xBF) ) ) ? 4 : 0 )
/*
QUOTEMETA: Meta-characters that \Q should quote
@@ -1460,74 +1460,68 @@
\p{_Perl_Quotemeta}
*/
/*** GENERATED CODE ***/
-#define is_QUOTEMETA_high_part0_(s) \
-( ( 0x63 == ((const U8*)s)[1] ) ? \
+#define is_QUOTEMETA_high(s) \
+( ( 0xB1 == ((const U8*)s)[0] ) ? \
+ ( ( 0x56 == ((const U8*)s)[1] ) ? 2 : 0 ) \
+: ( 0xB8 == ((const U8*)s)[0] ) ? \
+ ( ( ( 0x57 == ((const U8*)s)[1] ) && ( 0x70 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+: ( 0xBB == ((const U8*)s)[0] ) ? \
+ ( ( 0x51 == ((const U8*)s)[1] ) ? \
+ ( ( 0x73 == ((const U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0x52 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+: ( 0xBC == ((const U8*)s)[0] ) ? \
+ ( ( 0x63 == ((const U8*)s)[1] ) ? \
( ( 0x41 == ((const U8*)s)[2] ) ? 3 : 0 ) \
- : ( ( 0x71 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x63, 0x64) ) ) ? 3 : 0 )
-
-
-/*** GENERATED CODE ***/
-#define is_QUOTEMETA_high_part1_(s) \
-( ( 0xBE == ((const U8*)s)[0] ) ? \
+ : ( ( 0x71 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x63, 0x64) ) ) ? 3 : 0 )\
+: ( 0xBE == ((const U8*)s)[0] ) ? \
( ( ( 0x41 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x52, 0x55) ) ) ? 3 : 0 )\
: ( 0xCA == ((const U8*)s)[0] ) ? \
- ( ( 0x41 == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x54, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x73) ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ? 3 : 0 )\
+ ( ( 0xA0 == NATIVE_UTF8_TO_I8(((const U8*)s)[1]) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xAD, 0xBF) ) ?\
+ 3 \
: ( 0x42 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xBE) ) ? 3 : 0 )\
: ( 0x43 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x42, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x62 == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x64, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA1, 0xB3) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xB5, 0xBF) ) ? 3 : 0 )\
: ( 0x44 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x56) ) ? 3 : 0 )\
- : ( ( 0x53 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x57, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xAF) ) ? 3 : 0 )\
+ : ( ( 0x53 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xB0, 0xBF) ) ) ? 3 : 0 )\
: ( 0xCB == ((const U8*)s)[0] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x43) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x49, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x69) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x71, 0x73) ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xA0, 0xA2) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xA8, 0xBA) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xBD, 0xBF) ) ?\
+ 3 \
: ( 0x6A == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x64) ) ? 3 : 0 )\
- : ( ( 0x70 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x63, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xB5) ) ? 3 : 0 )\
+ : ( ( 0x70 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xB4, 0xBF) ) ) ? 3 : 0 )\
: ( 0xCC == ((const U8*)s)[0] ) ? \
- ( ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x73) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ) ? 3 : 0 )\
+ 3 \
: ( 0xCD == ((const U8*)s)[0] ) ? \
- ( ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x57, 0x59) || 0x62 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xB0, 0xB3) ) ?\
+ 3 \
+ : 0 ) \
: ( 0xCE == ((const U8*)s)[0] ) ? \
( ( 0x41 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x44) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x49, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xA3) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA8, 0xBF) ) ? 3 : 0 )\
: ( 0x42 == ((const U8*)s)[1] ) ? \
- ( ( 0x41 == ((const U8*)s)[2] || 0x57 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xEF ) == 0xA0 ) ? 3 : 0 )\
: ( ( 0x52 == ((const U8*)s)[1] ) && ( 0x45 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
: ( 0xDD == ((const U8*)s)[0] ) ? \
( ( 0x73 == ((const U8*)s)[1] ) ? \
( ( 0x4A == ((const U8*)s)[2] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x72, 0x73) ) ? 4 : 0 )\
: ( 0x57 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xAF) ) ? 4 : 0 )\
: ( 0x59 == ((const U8*)s)[2] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x46, 0x47) ) ? 4 : 0 )\
: ( 0x66 == ((const U8*)s)[2] ) ? \
( ( 0x73 == ((const U8*)s)[3] ) ? 4 : 0 ) \
: ( 0x71 == ((const U8*)s)[2] ) ? \
( ( 0x41 == ((const U8*)s)[3] ) ? 4 : 0 ) \
- : ( ( 0x73 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x67) ) ) ? 4 : 0 )\
+ : ( ( 0x73 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xB8) ) ) ? 4 : 0 )\
: 0 ) \
: ( 0xDF == ((const U8*)s)[0] ) ? \
( ( 0x56 == ((const U8*)s)[1] ) ? \
( ( ( 0x46 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x44) ) ) ? 4 : 0 )\
: ( ( ( 0x63 == ((const U8*)s)[1] ) && ( 0x52 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x69) ) ) ? 4 : 0 )\
-: ( ( ( ( ( 0xED == ((const U8*)s)[0] ) && ( 0x70 == ((const U8*)s)[1] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x44) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x73) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[4], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[4], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[4], 0x70, 0x73) ) ) ? 5 : 0 )
-
-
-/*** GENERATED CODE ***/
-#define is_QUOTEMETA_high(s) \
-( ( 0xB1 == ((const U8*)s)[0] ) ? \
- ( ( 0x56 == ((const U8*)s)[1] ) ? 2 : 0 ) \
-: ( 0xB8 == ((const U8*)s)[0] ) ? \
- ( ( ( 0x57 == ((const U8*)s)[1] ) && ( 0x70 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
-: ( 0xBB == ((const U8*)s)[0] ) ? \
- ( ( 0x51 == ((const U8*)s)[1] ) ? \
- ( ( 0x73 == ((const U8*)s)[2] ) ? 3 : 0 ) \
- : ( ( 0x52 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
-: ( 0xBC == ((const U8*)s)[0] ) ? is_QUOTEMETA_high_part0_(s) : is_QUOTEMETA_high_part1_(s) )
+: ( ( ( 0xED == ((const U8*)s)[0] ) && ( 0x70 == ((const U8*)s)[1] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x44) ) ) ? 5 : 0 )
/*
MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character
@@ -1560,7 +1554,7 @@
: ( 0xAB == ((const U8*)s)[0] ) ? \
( ( ( 0x70 == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xBF ) == 'n' ) ) ? 3 : 0 )\
: ( 0xB4 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? \
( ( ( 0xB4 == ((const U8*)s)[2] ) && ( 0x68 == ((const U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x58 == ((const U8*)s)[1] || 0x66 == ((const U8*)s)[1] ) ? \
( ( 0xB1 == ((const U8*)s)[2] ) ? \
@@ -1604,13 +1598,13 @@
( ( 0x46 == ((const U8*)s)[2] ) ? \
( ( ( ( 0xB8 == ((const U8*)s)[3] ) && ( 0x53 == ((const U8*)s)[4] ) ) && ( 0x43 == ((const U8*)s)[5] ) ) ? 6 : 0 )\
: ( 0x63 == ((const U8*)s)[2] ) ? \
- ( ( ( ( 0xB8 == ((const U8*)s)[3] ) && ( 0x52 == ((const U8*)s)[4] ) ) && ( 0x46 == ((const U8*)s)[5] || 0x52 == ((const U8*)s)[5] || 0x54 == ((const U8*)s)[5] || 0x65 == ((const U8*)s)[5] ) ) ? 6 : 0 )\
+ ( ( ( ( 0xB8 == ((const U8*)s)[3] ) && ( 0x52 == ((const U8*)s)[4] ) ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[5]) & 0xF7 ) == 0xA5 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[5]) == 0xAB || NATIVE_UTF8_TO_I8(((const U8*)s)[5]) == 0xB6 ) ) ? 6 : 0 )\
: ( ( ( ( 0x72 == ((const U8*)s)[2] ) && ( 0xB8 == ((const U8*)s)[3] ) ) && ( 0x52 == ((const U8*)s)[4] ) ) && ( 0x65 == ((const U8*)s)[5] ) ) ? 6 : 0 )\
: 0 ) \
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x67, 0x68) ) ? \
( ( ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) && ( 0xB4 == ((const U8*)s)[3] ) ) && ( 0x68 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
- : ( ( ( ( 0x6A == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x63 == ((const U8*)s)[2] || 0x70 == ((const U8*)s)[2] ) ) && ( 0xB4 == ((const U8*)s)[3] ) ) && ( 0x68 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
+ : ( ( ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) && ( 0xB4 == ((const U8*)s)[3] ) ) && ( 0x68 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
: 0 )
@@ -1645,7 +1639,7 @@
: ( 0xAB == ((const U8*)s)[0] ) ? \
( ( ( 0x70 == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xBF ) == 'n' ) ) ? 3 : 0 )\
: ( 0xB4 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? \
( ( ( 0xB4 == ((const U8*)s)[2] ) && ( 0x68 == ((const U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x58 == ((const U8*)s)[1] || 0x66 == ((const U8*)s)[1] ) ? \
( ( 0xB1 == ((const U8*)s)[2] ) ? \
@@ -1667,7 +1661,7 @@
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x67, 0x68) ) ? \
( ( ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) && ( 0xB4 == ((const U8*)s)[3] ) ) && ( 0x68 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
- : ( ( ( ( 0x6A == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x63 == ((const U8*)s)[2] || 0x70 == ((const U8*)s)[2] ) ) && ( 0xB4 == ((const U8*)s)[3] ) ) && ( 0x68 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
+ : ( ( ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) && ( 0xB4 == ((const U8*)s)[3] ) ) && ( 0x68 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
: 0 )
@@ -1703,7 +1697,7 @@
: ( 0xAB == ((const U8*)s)[0] ) ? \
( ( ( 0x70 == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xBF ) == 'n' ) ) ? 3 : 0 )\
: ( 0xB4 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? \
( ( ( 0xB4 == ((const U8*)s)[2] ) && ( 0x68 == ((const U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x58 == ((const U8*)s)[1] || 0x66 == ((const U8*)s)[1] ) ? \
( ( 0xB1 == ((const U8*)s)[2] ) ? \
@@ -2272,7 +2266,7 @@
: ( 0xAB == ((const U8*)s)[0] ) ? \
( ( 0x70 == ((const U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xB4 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? 2 \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? 2 \
: ( 0x58 == ((const U8*)s)[1] || 0x66 == ((const U8*)s)[1] ) ? \
( ( ( 0xB1 == ((const U8*)s)[2] ) && ( 0x43 == ((const U8*)s)[3] ) ) ? 4 : 2 )\
: ( 0x68 == ((const U8*)s)[1] ) ? \
@@ -2290,7 +2284,7 @@
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x67, 0x68) ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) ? 3 : 0 )\
- : ( ( 0x6A == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x63 == ((const U8*)s)[2] || 0x70 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+ : ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) ? 3 : 0 )\
: 0 ) \
: ((e)-(s) > 2) ? \
( ( ( ( ((const U8*)s)[0] & 0xAF ) == 'a' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 'h' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 's' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'w' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'y' ) ) ? 1\
@@ -2301,7 +2295,7 @@
: ( 0xAB == ((const U8*)s)[0] ) ? \
( ( 0x70 == ((const U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xB4 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] || 0x58 == ((const U8*)s)[1] || 0x66 == ((const U8*)s)[1] || 0x68 == ((const U8*)s)[1] ) ? 2 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF7 ) == 0xB1 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[1]) == 0xB7 ) ? 2 : 0 )\
: ( 0xB5 == ((const U8*)s)[0] ) ? \
( ( ( ( ((const U8*)s)[1] & 0xFB ) == 0x42 ) || ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x55 ) ? 2 : 0 )\
: ( 0xB8 == ((const U8*)s)[0] ) ? \
@@ -2309,7 +2303,7 @@
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x67, 0x68) ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) ? 3 : 0 )\
- : ( ( 0x6A == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x63 == ((const U8*)s)[2] || 0x70 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+ : ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) ? 3 : 0 )\
: 0 ) \
: ((e)-(s) > 1) ? \
( ( ( ( ((const U8*)s)[0] & 0xAF ) == 'a' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 'h' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 's' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'w' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'y' ) ) ? 1\
@@ -2320,7 +2314,7 @@
: ( 0xAB == ((const U8*)s)[0] ) ? \
( ( 0x70 == ((const U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xB4 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] || 0x58 == ((const U8*)s)[1] || 0x66 == ((const U8*)s)[1] || 0x68 == ((const U8*)s)[1] ) ? 2 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF7 ) == 0xB1 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[1]) == 0xB7 ) ? 2 : 0 )\
: ( ( 0xB5 == ((const U8*)s)[0] ) && ( ( ( ((const U8*)s)[1] & 0xFB ) == 0x42 ) || ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x55 ) ) ? 2 : 0 )\
: ((e)-(s) > 0) ? \
( ( ( ((const U8*)s)[0] & 0xAF ) == 'a' ) || ( ( ((const U8*)s)[0] & 0x9F ) == 'f' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 'h' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 's' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'y' ) )\
@@ -2361,14 +2355,14 @@
( ( 0x63 == ((const U8*)s)[1] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x65, 0x69) || 0x72 == ((const U8*)s)[2] ) ? 3 : 0 )\
: ( 0x69 == ((const U8*)s)[1] ) ? \
- ( ( 0x57 == ((const U8*)s)[2] || 0x59 == ((const U8*)s)[2] || 0x63 == ((const U8*)s)[2] || 0x65 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF9 ) == 0xB0 ) ? 3 : 0 )\
: ( 0x70 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x73) ) ? 3 : 0 )\
+ 3 \
: ( 0x71 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x56) || 0x59 == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x63) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x65, 0x66) || 0x70 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF0 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFA ) == 0xB2 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF7 ) == 0xB4 ) ) ? 3 : 0 )\
: ( 0x72 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x43, 0x45) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x47, 0x48) || 0x53 == ((const U8*)s)[2] || 0x59 == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x65, 0x66) ) ? 3 : 0 )\
- : ( ( 0x73 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x43, 0x45) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x47, 0x48) || 0x59 == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x63) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x65, 0x66) || 0x70 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xEA ) == 0xA2 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF7 ) == 0xA4 ) ) ? 3 : 0 )\
+ : ( ( 0x73 == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xEA ) == 0xA2 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xA4 || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF7 ) == 0xB4 ) ) ) ? 3 : 0 )\
: ( ( ( ( 0xDD == ((const U8*)s)[0] ) && ( 0x72 == ((const U8*)s)[1] ) ) && ( 0x67 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x47) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x66) ) ) ? 4 : 0 )
/*
@@ -2378,9 +2372,9 @@
*/
/*** GENERATED CODE ***/
#define is_PROBLEMATIC_LOCALE_FOLD_utf8(s) \
-( ( ( ((const U8*)s)[0] <= ' ' ) || inRANGE_helper_(U8, ((const U8*)s)[0], '.', '&') || inRANGE_helper_(U8, ((const U8*)s)[0], '!', '/') || inRANGE_helper_(U8, ((const U8*)s)[0], ',', '?') || inRANGE_helper_(U8, ((const U8*)s)[0], '`', '"') || inRANGE_helper_(U8, ((const U8*)s)[0], 'a', 'i') || inRANGE_helper_(U8, ((const U8*)s)[0], 'j', 'r') || inRANGE_helper_(U8, ((const U8*)s)[0], '~', 'z') || '[' == ((const U8*)s)[0] || ']' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], '{', 'I') || inRANGE_helper_(U8, ((const U8*)s)[0], '}', 'R') || '\\' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], 'S', 'Z') || inRANGE_helper_(U8, ((const U8*)s)[0], '0', '9') || 0xFF == ((const U8*)s)[0] ) ? 1\
-: ( 0x80 == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], 0x8A, 0x8B) ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x73) ) ? 2 : 0 )\
+( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0x80 ) == 0x00 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xE0 ) == 0x80 ) ) ? 1\
+: ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[0]), 0xC5, 0xC7) ) ?\
+ 2 \
: ( 0x8D == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x57, 0x58) ) ? 2 : 0 ) \
: ( 0x8E == ((const U8*)s)[0] ) ? \
@@ -2421,9 +2415,9 @@
*/
/*** GENERATED CODE ***/
#define is_PROBLEMATIC_LOCALE_FOLDEDS_START_utf8(s) \
-( ( ( ((const U8*)s)[0] <= ' ' ) || inRANGE_helper_(U8, ((const U8*)s)[0], '.', '&') || inRANGE_helper_(U8, ((const U8*)s)[0], '!', '/') || inRANGE_helper_(U8, ((const U8*)s)[0], ',', '?') || inRANGE_helper_(U8, ((const U8*)s)[0], '`', '"') || inRANGE_helper_(U8, ((const U8*)s)[0], 'a', 'i') || inRANGE_helper_(U8, ((const U8*)s)[0], 'j', 'r') || inRANGE_helper_(U8, ((const U8*)s)[0], '~', 'z') || '[' == ((const U8*)s)[0] || ']' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], '{', 'I') || inRANGE_helper_(U8, ((const U8*)s)[0], '}', 'R') || '\\' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], 'S', 'Z') || inRANGE_helper_(U8, ((const U8*)s)[0], '0', '9') || 0xFF == ((const U8*)s)[0] ) ? 1\
-: ( 0x80 == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], 0x8A, 0x8B) ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x73) ) ? 2 : 0 )\
+( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0x80 ) == 0x00 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xE0 ) == 0x80 ) ) ? 1\
+: ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[0]), 0xC5, 0xC7) ) ?\
+ 2 \
: ( 0x8D == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x57, 0x58) ) ? 2 : 0 ) \
: ( 0x8E == ((const U8*)s)[0] ) ? \
@@ -2544,7 +2538,7 @@
( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
: ( 0xCA == ((const U8*)s)[0] ) ? \
( ( 0x41 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xAA) ) ? 3 : 0 )\
: ( 0x42 == ((const U8*)s)[1] ) ? \
( ( 0x56 == ((const U8*)s)[2] ) ? 3 : 0 ) \
: ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
@@ -2578,7 +2572,7 @@
/*** GENERATED CODE ***/
#define is_XDIGIT_high(s) \
( ( ( 0xDD == ((const U8*)s)[0] ) && ( 0x72 == ((const U8*)s)[1] ) ) ? ( ( 0x66 == ((const U8*)s)[2] ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x67) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xB9) ) ? 4 : 0 )\
: ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x67, 0x68) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x42, 0x47) ) ) ? 4 : 0 ) : 0 )
/*** GENERATED CODE ***/
@@ -2597,7 +2591,7 @@
( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
: ( 0xCA == ((const U8*)s)[0] ) ? \
( ( 0x41 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || 0x51 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xAA) ) ? 3 : 0 )\
: ( 0x42 == ((const U8*)s)[1] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x49, 0x4A) || 0x56 == ((const U8*)s)[2] ) ? 3 : 0 )\
: ( ( 0x43 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
@@ -2621,15 +2615,15 @@
( ( ( LIKELY((e) > (s)) ) && ( LIKELY(((e) - (s)) >= UTF8SKIP(s)) ) ) ? ( ( 0xDD == ((const U8*)s)[0] ) ?\
( ( 0x72 == ((const U8*)s)[1] ) ? \
( ( 0x55 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x72) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xBF) ) ? 4 : 0 )\
: ( 0x56 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xAF) ) ? 4 : 0 )\
: ( ( 0x72 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )\
: 0 ) \
- : ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) ?\
+ : ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) == 0xF3 || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xFD ) == 0xF5 ) ) ?\
( ( ( ( 0x72 == ((const U8*)s)[1] ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )\
: ( 0xED == ((const U8*)s)[0] ) ? \
- ( ( ( ( ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x5F || ((const U8*)s)[1] == 0x63 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) || ((const U8*)s)[1] == 0x69 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x70 ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )\
+ ( ( ( ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF9 ) == 0xA9 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF1 ) == 0xB1 ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )\
: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 ) : 0 )
/*
@@ -2646,12 +2640,12 @@
( ( 0xDD == ((const U8*)s)[0] ) ? \
( ( 0x72 == ((const U8*)s)[1] ) ? \
( ( 0x55 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x72) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xBF) ) ? 4 : 0 )\
: ( 0x56 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xAF) ) ? 4 : 0 )\
: ( ( 0x72 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )\
: 0 ) \
-: ( ( ( ( 0xDF == ((const U8*)s)[0] || 0xEA == ((const U8*)s)[0] || 0xEC == ((const U8*)s)[0] ) && ( 0x72 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )
+: ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) == 0xF3 || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xFD ) == 0xF5 ) ) && ( 0x72 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ) ? 4 : 0 )
/*
LARGER_NON_CHARS: # 5 bytes
@@ -2673,7 +2667,7 @@
/*** GENERATED CODE ***/
#define is_LARGER_NON_CHARS_utf8(s) \
( ( 0xED == ((const U8*)s)[0] ) ? \
- ( ( ( ( ( ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x52 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x54 ) || ((const U8*)s)[1] == 0x58 || ((const U8*)s)[1] == 0x5F || ((const U8*)s)[1] == 0x63 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) || ((const U8*)s)[1] == 0x69 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x70 ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )\
+ ( ( ( ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF9 ) == 0xA9 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF1 ) == 0xB1 ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )\
: ( ( ( ( ( 0xEE == ((const U8*)s)[0] ) && ( 0x42 == ((const U8*)s)[1] ) ) && ( 0x72 == ((const U8*)s)[2] ) ) && ( 0x72 == ((const U8*)s)[3] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x71, 0x72) ) ) ? 5 : 0 )
/*
@@ -2683,7 +2677,7 @@
*/
/*** GENERATED CODE ***/
#define is_SURROGATE_utf8_safe(s,e) \
-( ( ( ( ( ( ((e) - (s)) >= 4 ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x64, 0x65) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x72) ) ) ? 4 : 0 )
+( ( ( ( ( ( ((e) - (s)) >= 4 ) && ( 0xDD == ((const U8*)s)[0] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x64, 0x65) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xBF) ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xBF) ) ) ? 4 : 0 )
/*
QUOTEMETA: Meta-characters that \Q should quote
@@ -2691,74 +2685,68 @@
\p{_Perl_Quotemeta}
*/
/*** GENERATED CODE ***/
-#define is_QUOTEMETA_high_part0_(s) \
-( ( 0x41 == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x54, 0x59) || 0x5F == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x72) ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ? 3 : 0 )\
+#define is_QUOTEMETA_high(s) \
+( ( 0xAF == ((const U8*)s)[0] ) ? \
+ ( ( 0x56 == ((const U8*)s)[1] ) ? 2 : 0 ) \
+: ( 0xB7 == ((const U8*)s)[0] ) ? \
+ ( ( ( 0x57 == ((const U8*)s)[1] ) && ( 0x6A == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+: ( 0xBC == ((const U8*)s)[0] ) ? \
+ ( ( 0x51 == ((const U8*)s)[1] ) ? \
+ ( ( 0x72 == ((const U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0x52 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+: ( 0xBD == ((const U8*)s)[0] ) ? \
+ ( ( 0x62 == ((const U8*)s)[1] ) ? \
+ ( ( 0x41 == ((const U8*)s)[2] ) ? 3 : 0 ) \
+ : ( ( 0x70 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x63) ) ) ? 3 : 0 )\
+: ( 0xBE == ((const U8*)s)[0] ) ? \
+ ( ( ( 0x41 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x52, 0x55) ) ) ? 3 : 0 )\
+: ( 0xCA == ((const U8*)s)[0] ) ? \
+ ( ( 0xA0 == NATIVE_UTF8_TO_I8(((const U8*)s)[1]) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xAD, 0xBF) ) ?\
+ 3 \
: ( 0x42 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x71) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xBE) ) ? 3 : 0 )\
: ( 0x43 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x42, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x63, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA1, 0xB3) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xB5, 0xBF) ) ? 3 : 0 )\
: ( 0x44 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x56) ) ? 3 : 0 )\
- : ( ( 0x53 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x57, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ) ? 3 : 0 )
-
-
-/*** GENERATED CODE ***/
-#define is_QUOTEMETA_high_part1_(s) \
-( ( 0xCB == ((const U8*)s)[0] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x43) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x49, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || 0x5F == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x68) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x72) ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xAF) ) ? 3 : 0 )\
+ : ( ( 0x53 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xB0, 0xBF) ) ) ? 3 : 0 )\
+: ( 0xCB == ((const U8*)s)[0] ) ? \
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xA0, 0xA2) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xA8, 0xBA) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xBD, 0xBF) ) ?\
+ 3 \
: ( 0x69 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x63) ) ? 3 : 0 )\
- : ( ( 0x6A == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xB5) ) ? 3 : 0 )\
+ : ( ( 0x6A == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xB4, 0xBF) ) ) ? 3 : 0 )\
: ( 0xCC == ((const U8*)s)[0] ) ? \
- ( ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || 0x5F == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x72) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ) ? 3 : 0 )\
+ 3 \
: ( 0xCD == ((const U8*)s)[0] ) ? \
- ( ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x57, 0x59) || 0x5F == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[1]), 0xB0, 0xB3) ) ?\
+ 3 \
+ : 0 ) \
: ( 0xCE == ((const U8*)s)[0] ) ? \
( ( 0x41 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x44) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x49, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ? 3 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA0, 0xA3) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[2]), 0xA8, 0xBF) ) ? 3 : 0 )\
: ( 0x42 == ((const U8*)s)[1] ) ? \
- ( ( 0x41 == ((const U8*)s)[2] || 0x57 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xEF ) == 0xA0 ) ? 3 : 0 )\
: ( ( 0x52 == ((const U8*)s)[1] ) && ( 0x45 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
: ( 0xDD == ((const U8*)s)[0] ) ? \
( ( 0x72 == ((const U8*)s)[1] ) ? \
( ( 0x4A == ((const U8*)s)[2] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x71, 0x72) ) ? 4 : 0 )\
: ( 0x57 == ((const U8*)s)[2] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x56) ) ? 4 : 0 )\
+ ( ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xAF) ) ? 4 : 0 )\
: ( 0x59 == ((const U8*)s)[2] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x46, 0x47) ) ? 4 : 0 )\
: ( 0x65 == ((const U8*)s)[2] ) ? \
( ( 0x72 == ((const U8*)s)[3] ) ? 4 : 0 ) \
: ( 0x70 == ((const U8*)s)[2] ) ? \
( ( 0x41 == ((const U8*)s)[3] ) ? 4 : 0 ) \
- : ( ( 0x72 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x57, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x66) ) ) ? 4 : 0 )\
+ : ( ( 0x72 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB0, 0xB8) ) ) ? 4 : 0 )\
: 0 ) \
: ( 0xDF == ((const U8*)s)[0] ) ? \
( ( 0x56 == ((const U8*)s)[1] ) ? \
( ( ( 0x46 == ((const U8*)s)[2] ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x44) ) ) ? 4 : 0 )\
- : ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x52 == ((const U8*)s)[2] ) ) && ( 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x68) ) ) ? 4 : 0 )\
-: ( ( ( ( ( 0xED == ((const U8*)s)[0] ) && ( 0x6A == ((const U8*)s)[1] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x44) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x51, 0x59) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[3], 0x70, 0x72) ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[4], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[4], 0x51, 0x59) || 0x5F == ((const U8*)s)[4] || inRANGE_helper_(U8, ((const U8*)s)[4], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[4], 0x70, 0x72) ) ) ? 5 : 0 )
-
-
-/*** GENERATED CODE ***/
-#define is_QUOTEMETA_high(s) \
-( ( 0xAF == ((const U8*)s)[0] ) ? \
- ( ( 0x56 == ((const U8*)s)[1] ) ? 2 : 0 ) \
-: ( 0xB7 == ((const U8*)s)[0] ) ? \
- ( ( ( 0x57 == ((const U8*)s)[1] ) && ( 0x6A == ((const U8*)s)[2] ) ) ? 3 : 0 )\
-: ( 0xBC == ((const U8*)s)[0] ) ? \
- ( ( 0x51 == ((const U8*)s)[1] ) ? \
- ( ( 0x72 == ((const U8*)s)[2] ) ? 3 : 0 ) \
- : ( ( 0x52 == ((const U8*)s)[1] ) && ( 0x41 == ((const U8*)s)[2] ) ) ? 3 : 0 )\
-: ( 0xBD == ((const U8*)s)[0] ) ? \
- ( ( 0x62 == ((const U8*)s)[1] ) ? \
- ( ( 0x41 == ((const U8*)s)[2] ) ? 3 : 0 ) \
- : ( ( 0x70 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x63) ) ) ? 3 : 0 )\
-: ( 0xBE == ((const U8*)s)[0] ) ? \
- ( ( ( 0x41 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x52, 0x55) ) ) ? 3 : 0 )\
-: ( 0xCA == ((const U8*)s)[0] ) ? is_QUOTEMETA_high_part0_(s) : is_QUOTEMETA_high_part1_(s) )
+ : ( ( ( 0x62 == ((const U8*)s)[1] ) && ( 0x52 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB3, 0xBA) ) ) ? 4 : 0 )\
+: ( ( ( 0xED == ((const U8*)s)[0] ) && ( 0x6A == ((const U8*)s)[1] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x44) ) ) ? 5 : 0 )
/*
MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character
@@ -2791,7 +2779,7 @@
: ( 0xAA == ((const U8*)s)[0] ) ? \
( ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xBF ) == 'n' ) ) ? 3 : 0 )\
: ( 0xB3 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? \
( ( ( 0xB3 == ((const U8*)s)[2] ) && ( 0x67 == ((const U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x58 == ((const U8*)s)[1] || 0x65 == ((const U8*)s)[1] ) ? \
( ( 0xAF == ((const U8*)s)[2] ) ? \
@@ -2835,13 +2823,13 @@
( ( 0x46 == ((const U8*)s)[2] ) ? \
( ( ( ( 0xB7 == ((const U8*)s)[3] ) && ( 0x53 == ((const U8*)s)[4] ) ) && ( 0x43 == ((const U8*)s)[5] ) ) ? 6 : 0 )\
: ( 0x62 == ((const U8*)s)[2] ) ? \
- ( ( ( ( 0xB7 == ((const U8*)s)[3] ) && ( 0x52 == ((const U8*)s)[4] ) ) && ( 0x46 == ((const U8*)s)[5] || 0x52 == ((const U8*)s)[5] || 0x54 == ((const U8*)s)[5] || 0x64 == ((const U8*)s)[5] ) ) ? 6 : 0 )\
+ ( ( ( ( 0xB7 == ((const U8*)s)[3] ) && ( 0x52 == ((const U8*)s)[4] ) ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[5]) & 0xF7 ) == 0xA5 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[5]) == 0xAB || NATIVE_UTF8_TO_I8(((const U8*)s)[5]) == 0xB6 ) ) ? 6 : 0 )\
: ( ( ( ( 0x71 == ((const U8*)s)[2] ) && ( 0xB7 == ((const U8*)s)[3] ) ) && ( 0x52 == ((const U8*)s)[4] ) ) && ( 0x64 == ((const U8*)s)[5] ) ) ? 6 : 0 )\
: 0 ) \
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x66, 0x67) ) ? \
( ( ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) && ( 0xB3 == ((const U8*)s)[3] ) ) && ( 0x67 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
- : ( ( ( ( 0x69 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || 0x6A == ((const U8*)s)[2] ) ) && ( 0xB3 == ((const U8*)s)[3] ) ) && ( 0x67 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
+ : ( ( ( ( 0x69 == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) && ( 0xB3 == ((const U8*)s)[3] ) ) && ( 0x67 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
: 0 )
@@ -2876,7 +2864,7 @@
: ( 0xAA == ((const U8*)s)[0] ) ? \
( ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xBF ) == 'n' ) ) ? 3 : 0 )\
: ( 0xB3 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? \
( ( ( 0xB3 == ((const U8*)s)[2] ) && ( 0x67 == ((const U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x58 == ((const U8*)s)[1] || 0x65 == ((const U8*)s)[1] ) ? \
( ( 0xAF == ((const U8*)s)[2] ) ? \
@@ -2898,7 +2886,7 @@
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x66, 0x67) ) ? \
( ( ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) && ( 0xB3 == ((const U8*)s)[3] ) ) && ( 0x67 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
- : ( ( ( ( 0x69 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || 0x6A == ((const U8*)s)[2] ) ) && ( 0xB3 == ((const U8*)s)[3] ) ) && ( 0x67 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
+ : ( ( ( ( 0x69 == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) && ( 0xB3 == ((const U8*)s)[3] ) ) && ( 0x67 == ((const U8*)s)[4] ) ) ? 5 : 0 )\
: 0 )
@@ -2934,7 +2922,7 @@
: ( 0xAA == ((const U8*)s)[0] ) ? \
( ( ( 0x6A == ((const U8*)s)[1] ) && ( ( ((const U8*)s)[2] & 0xBF ) == 'n' ) ) ? 3 : 0 )\
: ( 0xB3 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? \
( ( ( 0xB3 == ((const U8*)s)[2] ) && ( 0x67 == ((const U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x58 == ((const U8*)s)[1] || 0x65 == ((const U8*)s)[1] ) ? \
( ( 0xAF == ((const U8*)s)[2] ) ? \
@@ -3503,7 +3491,7 @@
: ( 0xAA == ((const U8*)s)[0] ) ? \
( ( 0x6A == ((const U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xB3 == ((const U8*)s)[0] ) ? \
- ( ( 0x53 == ((const U8*)s)[1] || 0x55 == ((const U8*)s)[1] ) ? 2 \
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) ? 2 \
: ( 0x58 == ((const U8*)s)[1] || 0x65 == ((const U8*)s)[1] ) ? \
( ( ( 0xAF == ((const U8*)s)[2] ) && ( 0x43 == ((const U8*)s)[3] ) ) ? 4 : 2 )\
: ( 0x67 == ((const U8*)s)[1] ) ? \
@@ -3521,7 +3509,7 @@
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x66, 0x67) ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) ? 3 : 0 )\
- : ( ( 0x69 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || 0x6A == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+ : ( ( 0x69 == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) ? 3 : 0 )\
: 0 ) \
: ((e)-(s) > 2) ? \
( ( ( ( ((const U8*)s)[0] & 0xAF ) == 'a' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 'h' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 's' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'w' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'y' ) ) ? 1\
@@ -3532,7 +3520,7 @@
: ( 0xAA == ((const U8*)s)[0] ) ? \
( ( 0x6A == ((const U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xB3 == ((const U8*)s)[0] ) ? \
- ( ( ((const U8*)s)[1] == 0x53 || ((const U8*)s)[1] == 0x55 || ((const U8*)s)[1] == 0x58 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) ) ? 2 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF7 ) == 0xB1 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[1]) == 0xB7 ) ? 2 : 0 )\
: ( 0xB4 == ((const U8*)s)[0] ) ? \
( ( ( ( ((const U8*)s)[1] & 0xFB ) == 0x42 ) || ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x55 ) ? 2 : 0 )\
: ( 0xB7 == ((const U8*)s)[0] ) ? \
@@ -3540,7 +3528,7 @@
: ( 0xBF == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x66, 0x67) ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) ) ? 3 : 0 )\
- : ( ( 0x69 == ((const U8*)s)[1] ) && ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x48) || 0x57 == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || 0x6A == ((const U8*)s)[2] ) ) ? 3 : 0 )\
+ : ( ( 0x69 == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF8 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFB ) == 0xB0 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xBC ) ) ? 3 : 0 )\
: 0 ) \
: ((e)-(s) > 1) ? \
( ( ( ( ((const U8*)s)[0] & 0xAF ) == 'a' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 'h' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 's' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'w' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'y' ) ) ? 1\
@@ -3551,7 +3539,7 @@
: ( 0xAA == ((const U8*)s)[0] ) ? \
( ( 0x6A == ((const U8*)s)[1] ) ? 2 : 0 ) \
: ( 0xB3 == ((const U8*)s)[0] ) ? \
- ( ( ((const U8*)s)[1] == 0x53 || ((const U8*)s)[1] == 0x55 || ((const U8*)s)[1] == 0x58 || ( ( ((const U8*)s)[1] & 0xFD ) == 0x65 ) ) ? 2 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xFD ) == 0xAC ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[1]) & 0xF7 ) == 0xB1 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[1]) == 0xB7 ) ? 2 : 0 )\
: ( ( 0xB4 == ((const U8*)s)[0] ) && ( ( ( ((const U8*)s)[1] & 0xFB ) == 0x42 ) || ((const U8*)s)[1] == 0x4A || ((const U8*)s)[1] == 0x55 ) ) ? 2 : 0 )\
: ((e)-(s) > 0) ? \
( ( ( ((const U8*)s)[0] & 0xAF ) == 'a' ) || ( ( ((const U8*)s)[0] & 0x9F ) == 'f' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 'h' ) || ( ( ((const U8*)s)[0] & 0xBE ) == 's' ) || ( ( ((const U8*)s)[0] & 0xBF ) == 'y' ) )\
@@ -3592,15 +3580,15 @@
( ( 0x62 == ((const U8*)s)[1] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x64, 0x68) || 0x71 == ((const U8*)s)[2] ) ? 3 : 0 )\
: ( 0x68 == ((const U8*)s)[1] ) ? \
- ( ( 0x57 == ((const U8*)s)[2] || 0x59 == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || 0x64 == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF9 ) == 0xB0 ) ? 3 : 0 )\
: ( 0x6A == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x59) || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x70, 0x72) ) ? 3 : 0 )\
+ 3 \
: ( 0x70 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x51, 0x56) || 0x59 == ((const U8*)s)[2] || 0x5F == ((const U8*)s)[2] || 0x62 == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x64, 0x65) || 0x6A == ((const U8*)s)[2] ) ? 3 : 0 )\
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF0 ) == 0xA0 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xFA ) == 0xB2 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF7 ) == 0xB4 ) ) ? 3 : 0 )\
: ( 0x71 == ((const U8*)s)[1] ) ? \
- ( ( inRANGE_helper_(U8, ((const U8*)s)[2], 0x43, 0x45) || inRANGE_helper_(U8, ((const U8*)s)[2], 0x47, 0x48) || 0x53 == ((const U8*)s)[2] || 0x59 == ((const U8*)s)[2] || 0x5F == ((const U8*)s)[2] || inRANGE_helper_(U8, ((const U8*)s)[2], 0x64, 0x65) ) ? 3 : 0 )\
- : ( ( 0x72 == ((const U8*)s)[1] ) && ( ( ( ((const U8*)s)[2] & 0xFB ) == 0x43 ) || ( ( ((const U8*)s)[2] & 0xDE ) == 0x44 ) || ((const U8*)s)[2] == 0x48 || ((const U8*)s)[2] == 0x59 || ((const U8*)s)[2] == 0x5F || ( ( ((const U8*)s)[2] & '7' ) == 0x62 ) ) ) ? 3 : 0 )\
-: ( ( ( ( 0xDD == ((const U8*)s)[0] ) && ( 0x71 == ((const U8*)s)[1] ) ) && ( 0x66 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, ((const U8*)s)[3], 0x41, 0x47) || 0x5F == ((const U8*)s)[3] || inRANGE_helper_(U8, ((const U8*)s)[3], 0x62, 0x65) ) ) ? 4 : 0 )
+ ( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xEA ) == 0xA2 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF7 ) == 0xA4 ) ) ? 3 : 0 )\
+ : ( ( 0x72 == ((const U8*)s)[1] ) && ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xEA ) == 0xA2 ) || NATIVE_UTF8_TO_I8(((const U8*)s)[2]) == 0xA4 || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[2]) & 0xF7 ) == 0xB4 ) ) ) ? 3 : 0 )\
+: ( ( ( ( 0xDD == ((const U8*)s)[0] ) && ( 0x71 == ((const U8*)s)[1] ) ) && ( 0x66 == ((const U8*)s)[2] ) ) && ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xA0, 0xA6) || inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[3]), 0xB3, 0xB7) ) ) ? 4 : 0 )
/*
PROBLEMATIC_LOCALE_FOLD: characters whose fold is problematic under locale
@@ -3609,9 +3597,9 @@
*/
/*** GENERATED CODE ***/
#define is_PROBLEMATIC_LOCALE_FOLD_utf8(s) \
-( ( ( ((const U8*)s)[0] <= ' ' ) || inRANGE_helper_(U8, ((const U8*)s)[0], '.', '&') || inRANGE_helper_(U8, ((const U8*)s)[0], '!', ';') || inRANGE_helper_(U8, ((const U8*)s)[0], '-', '/') || inRANGE_helper_(U8, ((const U8*)s)[0], ',', '?') || inRANGE_helper_(U8, ((const U8*)s)[0], '`', '"') || inRANGE_helper_(U8, ((const U8*)s)[0], 'a', 'i') || inRANGE_helper_(U8, ((const U8*)s)[0], 'j', 'r') || inRANGE_helper_(U8, ((const U8*)s)[0], '~', 'z') || '^' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], '[', ']') || inRANGE_helper_(U8, ((const U8*)s)[0], '{', 'I') || inRANGE_helper_(U8, ((const U8*)s)[0], '}', 'R') || '\\' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], 'S', 'Z') || inRANGE_helper_(U8, ((const U8*)s)[0], '0', '9') || 0xFF == ((const U8*)s)[0] ) ? 1\
-: ( 0x78 == ((const U8*)s)[0] || 0x80 == ((const U8*)s)[0] || 0x8A == ((const U8*)s)[0] ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || 0x5F == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x72) ) ? 2 : 0 )\
+( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0x80 ) == 0x00 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xE0 ) == 0x80 ) ) ? 1\
+: ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[0]), 0xC5, 0xC7) ) ?\
+ 2 \
: ( 0x8C == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x57, 0x58) ) ? 2 : 0 ) \
: ( 0x8D == ((const U8*)s)[0] ) ? \
@@ -3652,9 +3640,9 @@
*/
/*** GENERATED CODE ***/
#define is_PROBLEMATIC_LOCALE_FOLDEDS_START_utf8(s) \
-( ( ( ((const U8*)s)[0] <= ' ' ) || inRANGE_helper_(U8, ((const U8*)s)[0], '.', '&') || inRANGE_helper_(U8, ((const U8*)s)[0], '!', ';') || inRANGE_helper_(U8, ((const U8*)s)[0], '-', '/') || inRANGE_helper_(U8, ((const U8*)s)[0], ',', '?') || inRANGE_helper_(U8, ((const U8*)s)[0], '`', '"') || inRANGE_helper_(U8, ((const U8*)s)[0], 'a', 'i') || inRANGE_helper_(U8, ((const U8*)s)[0], 'j', 'r') || inRANGE_helper_(U8, ((const U8*)s)[0], '~', 'z') || '^' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], '[', ']') || inRANGE_helper_(U8, ((const U8*)s)[0], '{', 'I') || inRANGE_helper_(U8, ((const U8*)s)[0], '}', 'R') || '\\' == ((const U8*)s)[0] || inRANGE_helper_(U8, ((const U8*)s)[0], 'S', 'Z') || inRANGE_helper_(U8, ((const U8*)s)[0], '0', '9') || 0xFF == ((const U8*)s)[0] ) ? 1\
-: ( 0x78 == ((const U8*)s)[0] || 0x80 == ((const U8*)s)[0] || 0x8A == ((const U8*)s)[0] ) ?\
- ( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x41, 0x4A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x51, 0x59) || 0x5F == ((const U8*)s)[1] || inRANGE_helper_(U8, ((const U8*)s)[1], 0x62, 0x6A) || inRANGE_helper_(U8, ((const U8*)s)[1], 0x70, 0x72) ) ? 2 : 0 )\
+( ( ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0x80 ) == 0x00 ) || ( ( NATIVE_UTF8_TO_I8(((const U8*)s)[0]) & 0xE0 ) == 0x80 ) ) ? 1\
+: ( inRANGE_helper_(U8, NATIVE_UTF8_TO_I8(((const U8*)s)[0]), 0xC5, 0xC7) ) ?\
+ 2 \
: ( 0x8C == ((const U8*)s)[0] ) ? \
( ( inRANGE_helper_(U8, ((const U8*)s)[1], 0x57, 0x58) ) ? 2 : 0 ) \
: ( 0x8D == ((const U8*)s)[0] ) ? \
@@ -3765,6 +3753,6 @@
* 696e706fddd3ce8cd48c7ea91caf4c9edf5c296432d320aa7b78631f69aa9eac lib/unicore/mktables
* 50b85a67451145545a65cea370dab8d3444fbfe07e9c34cef560c5b7da9d3eef lib/unicore/version
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
- * 9bc06e27bad117d6372cf4ce914756a733988b049f03f150549ffac271f68693 regen/regcharclass.pl
+ * 3c9ee595e6731a94516e947d5919722eaf6728c8ce324fdd8a92de33decc3e54 regen/regcharclass.pl
* b2f896452d2b30da3e04800f478c60c1fd0b03d6b668689b020f1e3cf1f1cdd9 regen/regcharclass_multi_char_folds.pl
* ex: set ro: */
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 63423e18d3..1fec6f92c6 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -1072,14 +1072,17 @@ sub _cond_as_str {
@cond = $op->{vals}->@* if defined $op->{vals};
my $test= $op->{test};
my $is_cp_ret = $opts_ref->{ret_type} eq "cp";
+ my $charset = $opts_ref->{charset};
return "( $test )" unless @cond;
+ my (@ranges, @native_ranges);
+ my @native_conds;
+
# rangify the list. As we encounter a new value, it is placed in a new
# subarray by itself. If the next value is adjacent to it, the end point
# of the subarray is merely incremented; and so on. When the next value
# that isn't adjacent to the previous one is encountered, Update() is
# called to hoist any single-element subarray to be a scalar.
- my @ranges;
my $Update= sub {
# We skip this if there are optimizations that
# we can apply (below) to the individual ranges
@@ -1088,6 +1091,51 @@ sub _cond_as_str {
}
};
+ # Parse things twice, using different approaches for representing things,
+ # afterwards choosing the alternative with the fewest branches
+ for my $i (0, 1) {
+
+ # Should we avoid using mnemonics for code points?
+ my $always_hex = 0;
+
+ if ($i) { # 2nd pass
+ # The second pass is only for non-ascii character sets, to see if
+ # a transform to Unicode/ASCII saves anything.
+ last if $charset =~ /ascii/i;
+
+ # If the first pass came up with a single range, we won't be able
+ # to do better than that, so don't try.
+ last if @ranges == 1;
+
+ # We calculated the native values the first iteration
+ @native_ranges = @ranges;
+ @native_conds = @cond;
+
+ # Start fresh
+ undef @ranges;
+ undef @cond;
+
+ # Determine the translation function, to/from UTF-8 or Latin1, and
+ # the corresponding transform of the condition to match
+ my $lookup;
+ if ($opts_ref->{type} =~ / ^ (?: utf8 | high ) $ /xi) {
+ $lookup = $utf_2_I8{$charset};
+ $test = "NATIVE_UTF8_TO_I8($test)";
+ }
+ else {
+ $lookup = $n2a{$charset};
+ $test = "NATIVE_TO_LATIN1($test)";
+ }
+
+ # Translate the native conditions (bytes) into the Unicode ones
+ for my $condition (@native_conds) {
+ push @cond, $lookup->[$condition];
+ }
+
+ # 'f' won't be the expected 'f' on this box
+ $always_hex = 1;
+ }
+
# Go through the code points (@cond) and collapse them as much as
# possible into ranges
for my $condition ( @cond ) {
@@ -1102,24 +1150,31 @@ sub _cond_as_str {
}
$Update->();
- # _combine is used for cp type matching.
+ # _combine is used for cp type matching. By having it here return, no
+ # second pass is done. It could conceivably be restructured to have a
+ # second pass, but no current uses of script would actually gain any
+ # advantage by doing so, so the work hasn't been further considered.
return $self->_combine( $test, @ranges ) if $combine;
# If the input set has certain characteristics, we can optimize tests
# for it.
- # Return if all bytes match, hence is trivially true
+ # If all bytes match, is trivially true; we don't need a 2nd pass
return 1 if @cond == 256;
# If this is a single UTF-8 range which includes all possible
# continuation bytes, and we aren't checking for well-formedness, this
# is trivially true.
+ #
+ # (In EBCDIC, this won't happen until the 2nd pass transforms the
+ # disjoint continuation byte ranges into a single I8 one.)
if ( @ranges == 1
&& ! $opts_ref->{safe}
&& ! $opts_ref->{no_length_checks}
&& $opts_ref->{type} =~ / ^ (?: utf8 | high ) $ /xi
&& $ranges[0]->[1] == 0xBF
- && $ranges[0]->[0] == 0x80)
+ && $ranges[0]->[0] == (($charset =~ /ascii/i)
+ ? 0x80 : 0xA0))
{
return 1;
}
@@ -1134,11 +1189,11 @@ sub _cond_as_str {
if @ranges > 1;
return 1;
}
- # this case
+
# Here, the first range starts at 0, but doesn't match everything.
# But the condition doesn't have to worry about being < 0
$ranges[0] = "( $test <= "
- . $self->val_fmt($ranges[0]->[1]) . " )";
+ . $self->val_fmt($ranges[0]->[1], $always_hex) . " )";
$loop_start++;
}
@@ -1151,34 +1206,41 @@ sub _cond_as_str {
# If the final range consists of more than one byte ending with
# the highest possible one, the condition doesn't have to worry
# about being > FF
- $ranges[-1] = "( $test >= " . $self->val_fmt($ranges[-1]->[0]) . " )";
+ $ranges[-1] = "( $test >= "
+ . $self->val_fmt($ranges[-1]->[0], $always_hex) . " )";
$loop_end--;
}
- # Look at each range to see if there any optimizations.
+ # Look at each range to see if there any optimizations. The
+ # formatting may be thrown away, so might be wasted effort; and khw
+ # supposes this could be restructured to delay that until the final
+ # method is chosen. But that would be more coding work than
+ # warranted, as this is executed not that many times during a
+ # development cycle.
for (my $i = $loop_start; $i < $loop_end; $i++) {
if (! ref $ranges[$i]) { # Trivial case: no range
- $ranges[$i] = $self->val_fmt($ranges[$i]) . " == $test";
+ $ranges[$i] = $self->val_fmt($ranges[$i], $always_hex)
+ . " == $test";
}
elsif ($ranges[$i]->[0] == $ranges[$i]->[1]) {
$ranges[$i] = # Trivial case: single element range
- $self->val_fmt($ranges[$i]->[0]) . " == $test";
+ $self->val_fmt($ranges[$i]->[0], $always_hex)
+ . " == $test";
}
else {
$ranges[$i] = "inRANGE_helper_(U8, $test, "
- . $self->val_fmt($ranges[$i]->[0]) .", "
- . $self->val_fmt($ranges[$i]->[1]) . ")";
+ . $self->val_fmt($ranges[$i]->[0], $always_hex) .", "
+ . $self->val_fmt($ranges[$i]->[1], $always_hex) . ")";
}
}
- my @masks;
- if (@ranges > 1) {
+ # Here, have collapsed the matched code points into ranges. This code
+ # also sees if some of those different ranges have bit patterns which
+ # causes them to be combinable by ANDing with a mask. There's no need
+ # to do this if we are already down to a single range.
+ next unless @ranges > 1;
- # See if the entire set shares optimizable characteristics, and if so,
- # return the optimization. There is no need to do this on sets with
- # just a single range, as that can be expressed with a single
- # conditional.
- @masks = calculate_mask(@cond);
+ my @masks = calculate_mask(@cond);
# Stringify the output of calculate_mask()
if (@masks) {
@@ -1186,11 +1248,12 @@ sub _cond_as_str {
foreach my $mask_ref (@masks) {
if (defined $mask_ref->[1]) {
push @masked, "( ( $test & "
- . $self->val_fmt($mask_ref->[1]) . " ) == "
- . $self->val_fmt($mask_ref->[0]) . " )";
+ . $self->val_fmt($mask_ref->[1], $always_hex) . " ) == "
+ . $self->val_fmt($mask_ref->[0], $always_hex) . " )";
}
else { # An undefined mask means to use the value as-is
- push @masked, "$test == " . $self->val_fmt($mask_ref->[0]);
+ push @masked, "$test == "
+ . $self->val_fmt($mask_ref->[0], $always_hex);
}
}
@@ -1199,22 +1262,28 @@ sub _cond_as_str {
# results, there is no sense trying something that is bound to be
# worse.
if (@masked < @ranges) {
- return "( " . join( " || ", @masked ) . " )";
+ @ranges = @masked;
+ next;
}
@masks = @masked;
}
+
+ # If we found some mask possibilities, and they have fewer
+ # conditionals in them than the plain range method, convert to use the
+ # masks.
+ @ranges = @masks if @masks && @masks < @ranges;
+ } # End of both passes
+
+ # If the two passes came up with two sets, use the one with the fewest
+ # conditionals (the number of ranges is a proxy for that). If both have
+ # the same number, prefer the native, as that omits transformations.
+ if (@native_ranges && @native_ranges <= @ranges) {
+ @ranges = @native_ranges;
+ @cond = @native_conds;
}
- # We have generated the list of bytes in two ways; one trying to use masks
- # to cut the number of branches down, and the other to look at individual
- # ranges (some of which could be cut down by using a mask for just it).
- # We return whichever method uses the fewest branches.
- return "( "
- . join( " || ", (@masks && @masks < @ranges)
- ? @masks
- : @ranges)
- . " )";
+ return "( " . join( " || ", @ranges) . " )";
}
# _combine