diff options
-rw-r--r-- | regcharclass.h | 134 | ||||
-rwxr-xr-x | regen/regcharclass.pl | 34 | ||||
-rw-r--r-- | utf8.h | 17 |
3 files changed, 102 insertions, 83 deletions
diff --git a/regcharclass.h b/regcharclass.h index 64e4453e58..1d335e6b21 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -624,9 +624,8 @@ ®charclass_multi_char_folds::multi_char_folds(1) */ /*** GENERATED CODE ***/ -#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \ -( ((e)-(s) > 5) ? \ - ( ( 0x61 == ((U8*)s)[0] ) ? \ +#define is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) \ +( ( 0x61 == ((U8*)s)[0] ) ? \ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ : ( 0x66 == ((U8*)s)[0] ) ? \ ( ( 0x66 == ((U8*)s)[1] ) ? \ @@ -651,39 +650,19 @@ ( ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xB1 == ((U8*)s)[1] || 0xB7 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? \ - ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ - : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : ( 0xB9 == ((U8*)s)[1] ) ? \ - ( ( 0xCC == ((U8*)s)[2] ) ? \ - ( ( 0x88 == ((U8*)s)[3] ) ? \ - ( ( 0xCC == ((U8*)s)[4] ) ? \ - ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ - : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ - : 0 ) \ - : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : 0 ) \ + : ( ( ( 0xB9 == ((U8*)s)[1] ) && ( 0xCD == ((U8*)s)[2] ) ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xCF == ((U8*)s)[0] ) ? \ ( ( 0x81 == ((U8*)s)[1] ) ? \ ( ( ( 0xCC == ((U8*)s)[2] ) && ( 0x93 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x85 == ((U8*)s)[1] ) ? \ ( ( 0xCC == ((U8*)s)[2] ) ? \ - ( ( 0x88 == ((U8*)s)[3] ) ? \ - ( ( 0xCC == ((U8*)s)[4] ) ? \ - ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ - : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ - : ( 0x93 == ((U8*)s)[3] ) ? \ - ( ( 0xCC == ((U8*)s)[4] ) ? \ - ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 4 ) \ - : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 4 )\ - : 0 ) \ + ( ( 0x93 == ((U8*)s)[3] ) ? 4 : 0 ) \ : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x89 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? \ - ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ - : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( ( ( 0x8E == ((U8*)s)[1] ) && ( 0xCE == ((U8*)s)[2] ) ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xD5 == ((U8*)s)[0] ) ? \ @@ -696,8 +675,12 @@ ( ( 0xBC == ((U8*)s)[1] ) ? \ ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ - : 0 ) \ -: ((e)-(s) > 4) ? \ + : 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) \ +( ((e)-(s) > 3) ? \ ( ( 0x61 == ((U8*)s)[0] ) ? \ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ : ( 0x66 == ((U8*)s)[0] ) ? \ @@ -744,12 +727,37 @@ : ( 0xB4 == ((U8*)s)[1] ) ? \ ( ( ( 0xD5 == ((U8*)s)[2] ) && ( ( ( ((U8*)s)[3] & 0xF7 ) == 0xA5 ) || ((U8*)s)[3] == 0xAB || ((U8*)s)[3] == 0xB6 ) ) ? 4 : 0 )\ : ( ( ( 0xBE == ((U8*)s)[1] ) && ( 0xD5 == ((U8*)s)[2] ) ) && ( 0xB6 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : ( 0xE1 == ((U8*)s)[0] ) ? \ - ( ( 0xBC == ((U8*)s)[1] ) ? \ - ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ - : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ : 0 ) \ -: ((e)-(s) > 3) ? \ +: ((e)-(s) > 2) ? \ + ( ( 0x61 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x66 == ((U8*)s)[0] ) ? \ + ( ( 0x66 == ((U8*)s)[1] ) ? \ + ( ( 0x69 == ((U8*)s)[2] || 0x6C == ((U8*)s)[2] ) ? 3 : 2 ) \ + : ( 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0x68 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0xB1 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x69 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x6A == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8C == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x73 == ((U8*)s)[0] ) ? \ + ( ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( 0x74 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x88 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( 0x77 == ((U8*)s)[0] || 0x79 == ((U8*)s)[0] ) ? \ + ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8A == ((U8*)s)[2] ) ) ? 3 : 0 ) \ + : ( ( ( 0xCA == ((U8*)s)[0] ) && ( 0xBC == ((U8*)s)[1] ) ) && ( 0x6E == ((U8*)s)[2] ) ) ? 3 : 0 )\ +: ((e)-(s) > 1) ? \ + ( ( 0x66 == ((U8*)s)[0] ) ? \ + ( ( 0x66 == ((U8*)s)[1] || 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 )\ + : ( ( 0x73 == ((U8*)s)[0] ) && ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ) ? 2 : 0 )\ +: 0 ) + + +/*** GENERATED CODE ***/ +#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \ +( ((e)-(s) > 5) ? \ ( ( 0x61 == ((U8*)s)[0] ) ? \ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ : ( 0x66 == ((U8*)s)[0] ) ? \ @@ -775,19 +783,39 @@ ( ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xB1 == ((U8*)s)[1] || 0xB7 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? \ + ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ + : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ - : ( ( ( 0xB9 == ((U8*)s)[1] ) && ( 0xCD == ((U8*)s)[2] ) ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : ( 0xB9 == ((U8*)s)[1] ) ? \ + ( ( 0xCC == ((U8*)s)[2] ) ? \ + ( ( 0x88 == ((U8*)s)[3] ) ? \ + ( ( 0xCC == ((U8*)s)[4] ) ? \ + ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ + : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ + : 0 ) \ + : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : 0 ) \ : ( 0xCF == ((U8*)s)[0] ) ? \ ( ( 0x81 == ((U8*)s)[1] ) ? \ ( ( ( 0xCC == ((U8*)s)[2] ) && ( 0x93 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x85 == ((U8*)s)[1] ) ? \ ( ( 0xCC == ((U8*)s)[2] ) ? \ - ( ( 0x93 == ((U8*)s)[3] ) ? 4 : 0 ) \ + ( ( 0x88 == ((U8*)s)[3] ) ? \ + ( ( 0xCC == ((U8*)s)[4] ) ? \ + ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \ + : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\ + : ( 0x93 == ((U8*)s)[3] ) ? \ + ( ( 0xCC == ((U8*)s)[4] ) ? \ + ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 4 ) \ + : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 4 )\ + : 0 ) \ : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0x89 == ((U8*)s)[1] ) ? \ ( ( 0xCD == ((U8*)s)[2] ) ? \ - ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \ + ( ( 0x82 == ((U8*)s)[3] ) ? \ + ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\ + : 0 ) \ : ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( ( ( 0x8E == ((U8*)s)[1] ) && ( 0xCE == ((U8*)s)[2] ) ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\ : ( 0xD5 == ((U8*)s)[0] ) ? \ @@ -796,32 +824,12 @@ : ( 0xB4 == ((U8*)s)[1] ) ? \ ( ( ( 0xD5 == ((U8*)s)[2] ) && ( ( ( ((U8*)s)[3] & 0xF7 ) == 0xA5 ) || ((U8*)s)[3] == 0xAB || ((U8*)s)[3] == 0xB6 ) ) ? 4 : 0 )\ : ( ( ( 0xBE == ((U8*)s)[1] ) && ( 0xD5 == ((U8*)s)[2] ) ) && ( 0xB6 == ((U8*)s)[3] ) ) ? 4 : 0 )\ + : ( 0xE1 == ((U8*)s)[0] ) ? \ + ( ( 0xBC == ((U8*)s)[1] ) ? \ + ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ + : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\ : 0 ) \ -: ((e)-(s) > 2) ? \ - ( ( 0x61 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x66 == ((U8*)s)[0] ) ? \ - ( ( 0x66 == ((U8*)s)[1] ) ? \ - ( ( 0x69 == ((U8*)s)[2] || 0x6C == ((U8*)s)[2] ) ? 3 : 2 ) \ - : ( 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0x68 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0xB1 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x69 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x6A == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8C == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x73 == ((U8*)s)[0] ) ? \ - ( ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ? 2 : 0 ) \ - : ( 0x74 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x88 == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( 0x77 == ((U8*)s)[0] || 0x79 == ((U8*)s)[0] ) ? \ - ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8A == ((U8*)s)[2] ) ) ? 3 : 0 ) \ - : ( ( ( 0xCA == ((U8*)s)[0] ) && ( 0xBC == ((U8*)s)[1] ) ) && ( 0x6E == ((U8*)s)[2] ) ) ? 3 : 0 )\ -: ((e)-(s) > 1) ? \ - ( ( 0x66 == ((U8*)s)[0] ) ? \ - ( ( 0x66 == ((U8*)s)[1] || 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 )\ - : ( ( 0x73 == ((U8*)s)[0] ) && ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ) ? 2 : 0 )\ -: 0 ) +: ((e)-(s) > 4) ? is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) : is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) ) /* MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 0bab57086a..5d37e8520e 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -237,6 +237,10 @@ sub __clean { : \s* \5 \s* ([()]) /$1 ( $2 && $3 ) ? $4 : $5 $6/gx; + #$expr=~s/\(\(U8\*\)s\)\[(\d+)\]/S$1/g if length $expr > 8000; + #$expr=~s/\s+//g if length $expr > 8000; + + die "Expression too long" if length $expr > 8000; return $expr; } @@ -1093,7 +1097,7 @@ sub _combine { # _render() # recursively convert an optree to text with reasonably neat formatting sub _render { - my ( $self, $op, $combine, $brace, $opts_ref )= @_; + my ( $self, $op, $combine, $brace, $opts_ref, $def, $submacros )= @_; return 0 if ! defined $op; # The set is empty if ( !ref $op ) { return $op; @@ -1101,10 +1105,10 @@ sub _render { my $cond= $self->_cond_as_str( $op, $combine, $opts_ref ); #no warnings 'recursion'; # This would allow really really inefficient # code to be generated. See pod - my $yes= $self->_render( $op->{yes}, $combine, 1, $opts_ref ); + my $yes= $self->_render( $op->{yes}, $combine, 1, $opts_ref, $def, $submacros ); return $yes if $cond eq '1'; - my $no= $self->_render( $op->{no}, $combine, 0, $opts_ref ); + my $no= $self->_render( $op->{no}, $combine, 0, $opts_ref, $def, $submacros ); return "( $cond )" if $yes eq '1' and $no eq '0'; my ( $lb, $rb )= $brace ? ( "( ", " )" ) : ( "", "" ); return "$lb$cond ? $yes : $no$rb" @@ -1118,7 +1122,13 @@ sub _render { $yes= " " . $yes; } - return "$lb$cond ?$yes$ind: $no$rb"; + my $str= "$lb$cond ?$yes$ind: $no$rb"; + if (length $str > 6000) { + push @$submacros, sprintf "#define $def\n( %s )", "_part" . (my $yes_idx= 0+@$submacros), $yes; + push @$submacros, sprintf "#define $def\n( %s )", "_part" . (my $no_idx= 0+@$submacros), $no; + return sprintf "%s%s ? $def : $def%s", $lb, $cond, "_part$yes_idx", "_part$no_idx", $rb; + } + return $str; } # $expr=render($op,$combine) @@ -1129,9 +1139,12 @@ sub _render { # longer lists such as that resulting from type 'cp' output. # Currently only used for type 'cp' macros. sub render { - my ( $self, $op, $combine, $opts_ref )= @_; - my $str= "( " . $self->_render( $op, $combine, 0, $opts_ref ) . " )"; - return __clean( $str ); + my ( $self, $op, $combine, $opts_ref, $def_fmt )= @_; + + my @submacros; + my $macro= sprintf "#define $def_fmt\n( %s )", "", $self->_render( $op, $combine, 0, $opts_ref, $def_fmt, \@submacros ); + + return join "\n\n", map { "/*** GENERATED CODE ***/\n" . __macro( __clean( $_ ) ) } @submacros, $macro; } # make_macro @@ -1168,8 +1181,6 @@ sub make_macro { } else { $method= 'optree'; } - my $optree= $self->$method( %opts, type => $type, ret_type => $ret_type ); - my $text= $self->render( $optree, ($type =~ /^cp/) ? 1 : 0, \%opts ); my @args= $type =~ /^cp/ ? 'cp' : 's'; push @args, "e" if $opts{safe}; push @args, "is_utf8" if $type eq 'generic'; @@ -1179,8 +1190,9 @@ sub make_macro { my $ext= $type eq 'generic' ? '' : '_' . lc( $type ); $ext .= "_safe" if $opts{safe}; my $argstr= join ",", @args; - return "/*** GENERATED CODE ***/\n" - . __macro( "#define $pfx$self->{op}$ext($argstr)\n$text" ); + my $def_fmt="$pfx$self->{op}$ext%s($argstr)"; + my $optree= $self->$method( %opts, type => $type, ret_type => $ret_type ); + return $self->render( $optree, ($type =~ /^cp/) ? 1 : 0, \%opts, $def_fmt ); } # if we arent being used as a module (highly likely) then process @@ -476,6 +476,11 @@ Perl's extended UTF-8 means we can have start bytes up to FF. * takes on the order of 10 minutes to generate, and is never going to change. * The EBCDIC equivalent hasn't been commented out in regcharclass.pl, so it * should generate and run the correct stuff */ +/* + UTF8_CHAR: Matches utf8 from 1 to 4 bytes + + 0x0 - 0x1FFFFF +*/ /*** GENERATED CODE ***/ #define is_UTF8_CHAR_utf8_safe(s,e) \ ( ((e)-(s) > 3) ? \ @@ -488,23 +493,17 @@ Perl's extended UTF-8 means we can have start bytes up to FF. ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ : ( 0xF0 == ((U8*)s)[0] ) ? \ ( ( ( ( 0x90 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ - : ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF7 ) ? \ - ( ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ - : 0 ) \ + : ( ( ( ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF7 ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\ : ((e)-(s) > 2) ? \ ( ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) ? 1 \ : ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \ ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \ : ( 0xE0 == ((U8*)s)[0] ) ? \ ( ( ( ( ((U8*)s)[1] & 0xE0 ) == 0xA0 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ - : ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEF ) ? \ - ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ - : 0 ) \ + : ( ( ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEF ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\ : ((e)-(s) > 1) ? \ ( ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) ? 1 \ - : ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \ - ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \ - : 0 ) \ + : ( ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) ? 2 : 0 )\ : ((e)-(s) > 0) ? \ ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) \ : 0 ) |