summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcharclass.h134
-rwxr-xr-xregen/regcharclass.pl34
-rw-r--r--utf8.h17
3 files changed, 102 insertions, 83 deletions
diff --git a/regcharclass.h b/regcharclass.h
index 64e4453e58..1d335e6b21 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -624,9 +624,8 @@
&regcharclass_multi_char_folds::multi_char_folds(1)
*/
/*** GENERATED CODE ***/
-#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \
-( ((e)-(s) > 5) ? \
- ( ( 0x61 == ((U8*)s)[0] ) ? \
+#define is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) \
+( ( 0x61 == ((U8*)s)[0] ) ? \
( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \
: ( 0x66 == ((U8*)s)[0] ) ? \
( ( 0x66 == ((U8*)s)[1] ) ? \
@@ -651,39 +650,19 @@
( ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0xB1 == ((U8*)s)[1] || 0xB7 == ((U8*)s)[1] ) ? \
( ( 0xCD == ((U8*)s)[2] ) ? \
- ( ( 0x82 == ((U8*)s)[3] ) ? \
- ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\
- : 0 ) \
+ ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \
: ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
- : ( 0xB9 == ((U8*)s)[1] ) ? \
- ( ( 0xCC == ((U8*)s)[2] ) ? \
- ( ( 0x88 == ((U8*)s)[3] ) ? \
- ( ( 0xCC == ((U8*)s)[4] ) ? \
- ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \
- : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\
- : 0 ) \
- : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\
- : 0 ) \
+ : ( ( ( 0xB9 == ((U8*)s)[1] ) && ( 0xCD == ((U8*)s)[2] ) ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0xCF == ((U8*)s)[0] ) ? \
( ( 0x81 == ((U8*)s)[1] ) ? \
( ( ( 0xCC == ((U8*)s)[2] ) && ( 0x93 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x85 == ((U8*)s)[1] ) ? \
( ( 0xCC == ((U8*)s)[2] ) ? \
- ( ( 0x88 == ((U8*)s)[3] ) ? \
- ( ( 0xCC == ((U8*)s)[4] ) ? \
- ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \
- : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\
- : ( 0x93 == ((U8*)s)[3] ) ? \
- ( ( 0xCC == ((U8*)s)[4] ) ? \
- ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 4 ) \
- : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 4 )\
- : 0 ) \
+ ( ( 0x93 == ((U8*)s)[3] ) ? 4 : 0 ) \
: ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x89 == ((U8*)s)[1] ) ? \
( ( 0xCD == ((U8*)s)[2] ) ? \
- ( ( 0x82 == ((U8*)s)[3] ) ? \
- ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\
- : 0 ) \
+ ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \
: ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( ( ( 0x8E == ((U8*)s)[1] ) && ( 0xCE == ((U8*)s)[2] ) ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0xD5 == ((U8*)s)[0] ) ? \
@@ -696,8 +675,12 @@
( ( 0xBC == ((U8*)s)[1] ) ? \
( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\
: ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\
- : 0 ) \
-: ((e)-(s) > 4) ? \
+ : 0 )
+
+
+/*** GENERATED CODE ***/
+#define is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) \
+( ((e)-(s) > 3) ? \
( ( 0x61 == ((U8*)s)[0] ) ? \
( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \
: ( 0x66 == ((U8*)s)[0] ) ? \
@@ -744,12 +727,37 @@
: ( 0xB4 == ((U8*)s)[1] ) ? \
( ( ( 0xD5 == ((U8*)s)[2] ) && ( ( ( ((U8*)s)[3] & 0xF7 ) == 0xA5 ) || ((U8*)s)[3] == 0xAB || ((U8*)s)[3] == 0xB6 ) ) ? 4 : 0 )\
: ( ( ( 0xBE == ((U8*)s)[1] ) && ( 0xD5 == ((U8*)s)[2] ) ) && ( 0xB6 == ((U8*)s)[3] ) ) ? 4 : 0 )\
- : ( 0xE1 == ((U8*)s)[0] ) ? \
- ( ( 0xBC == ((U8*)s)[1] ) ? \
- ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\
- : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\
: 0 ) \
-: ((e)-(s) > 3) ? \
+: ((e)-(s) > 2) ? \
+ ( ( 0x61 == ((U8*)s)[0] ) ? \
+ ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0x66 == ((U8*)s)[0] ) ? \
+ ( ( 0x66 == ((U8*)s)[1] ) ? \
+ ( ( 0x69 == ((U8*)s)[2] || 0x6C == ((U8*)s)[2] ) ? 3 : 2 ) \
+ : ( 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 ) \
+ : ( 0x68 == ((U8*)s)[0] ) ? \
+ ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0xB1 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0x69 == ((U8*)s)[0] ) ? \
+ ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0x6A == ((U8*)s)[0] ) ? \
+ ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8C == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0x73 == ((U8*)s)[0] ) ? \
+ ( ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ? 2 : 0 ) \
+ : ( 0x74 == ((U8*)s)[0] ) ? \
+ ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x88 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( 0x77 == ((U8*)s)[0] || 0x79 == ((U8*)s)[0] ) ? \
+ ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8A == ((U8*)s)[2] ) ) ? 3 : 0 ) \
+ : ( ( ( 0xCA == ((U8*)s)[0] ) && ( 0xBC == ((U8*)s)[1] ) ) && ( 0x6E == ((U8*)s)[2] ) ) ? 3 : 0 )\
+: ((e)-(s) > 1) ? \
+ ( ( 0x66 == ((U8*)s)[0] ) ? \
+ ( ( 0x66 == ((U8*)s)[1] || 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 )\
+ : ( ( 0x73 == ((U8*)s)[0] ) && ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ) ? 2 : 0 )\
+: 0 )
+
+
+/*** GENERATED CODE ***/
+#define is_MULTI_CHAR_FOLD_utf8_safe(s,e) \
+( ((e)-(s) > 5) ? \
( ( 0x61 == ((U8*)s)[0] ) ? \
( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \
: ( 0x66 == ((U8*)s)[0] ) ? \
@@ -775,19 +783,39 @@
( ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0xB1 == ((U8*)s)[1] || 0xB7 == ((U8*)s)[1] ) ? \
( ( 0xCD == ((U8*)s)[2] ) ? \
- ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \
+ ( ( 0x82 == ((U8*)s)[3] ) ? \
+ ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\
+ : 0 ) \
: ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
- : ( ( ( 0xB9 == ((U8*)s)[1] ) && ( 0xCD == ((U8*)s)[2] ) ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\
+ : ( 0xB9 == ((U8*)s)[1] ) ? \
+ ( ( 0xCC == ((U8*)s)[2] ) ? \
+ ( ( 0x88 == ((U8*)s)[3] ) ? \
+ ( ( 0xCC == ((U8*)s)[4] ) ? \
+ ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \
+ : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\
+ : 0 ) \
+ : ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\
+ : 0 ) \
: ( 0xCF == ((U8*)s)[0] ) ? \
( ( 0x81 == ((U8*)s)[1] ) ? \
( ( ( 0xCC == ((U8*)s)[2] ) && ( 0x93 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x85 == ((U8*)s)[1] ) ? \
( ( 0xCC == ((U8*)s)[2] ) ? \
- ( ( 0x93 == ((U8*)s)[3] ) ? 4 : 0 ) \
+ ( ( 0x88 == ((U8*)s)[3] ) ? \
+ ( ( 0xCC == ((U8*)s)[4] ) ? \
+ ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 0 ) \
+ : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 0 )\
+ : ( 0x93 == ((U8*)s)[3] ) ? \
+ ( ( 0xCC == ((U8*)s)[4] ) ? \
+ ( ( ( ((U8*)s)[5] & 0xFE ) == 0x80 ) ? 6 : 4 ) \
+ : ( ( 0xCD == ((U8*)s)[4] ) && ( 0x82 == ((U8*)s)[5] ) ) ? 6 : 4 )\
+ : 0 ) \
: ( ( 0xCD == ((U8*)s)[2] ) && ( 0x82 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0x89 == ((U8*)s)[1] ) ? \
( ( 0xCD == ((U8*)s)[2] ) ? \
- ( ( 0x82 == ((U8*)s)[3] ) ? 4 : 0 ) \
+ ( ( 0x82 == ((U8*)s)[3] ) ? \
+ ( ( ( 0xCE == ((U8*)s)[4] ) && ( 0xB9 == ((U8*)s)[5] ) ) ? 6 : 4 )\
+ : 0 ) \
: ( ( 0xCE == ((U8*)s)[2] ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( ( ( 0x8E == ((U8*)s)[1] ) && ( 0xCE == ((U8*)s)[2] ) ) && ( 0xB9 == ((U8*)s)[3] ) ) ? 4 : 0 )\
: ( 0xD5 == ((U8*)s)[0] ) ? \
@@ -796,32 +824,12 @@
: ( 0xB4 == ((U8*)s)[1] ) ? \
( ( ( 0xD5 == ((U8*)s)[2] ) && ( ( ( ((U8*)s)[3] & 0xF7 ) == 0xA5 ) || ((U8*)s)[3] == 0xAB || ((U8*)s)[3] == 0xB6 ) ) ? 4 : 0 )\
: ( ( ( 0xBE == ((U8*)s)[1] ) && ( 0xD5 == ((U8*)s)[2] ) ) && ( 0xB6 == ((U8*)s)[3] ) ) ? 4 : 0 )\
+ : ( 0xE1 == ((U8*)s)[0] ) ? \
+ ( ( 0xBC == ((U8*)s)[1] ) ? \
+ ( ( ( ( ( ((U8*)s)[2] & 0xD8 ) == 0x80 ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\
+ : ( ( ( ( 0xBD == ((U8*)s)[1] ) && ( ( ( ((U8*)s)[2] & 0xF8 ) == 0xA0 ) || ( ( ((U8*)s)[2] & 0xFB ) == 0xB0 ) || ((U8*)s)[2] == 0xBC ) ) && ( 0xCE == ((U8*)s)[3] ) ) && ( 0xB9 == ((U8*)s)[4] ) ) ? 5 : 0 )\
: 0 ) \
-: ((e)-(s) > 2) ? \
- ( ( 0x61 == ((U8*)s)[0] ) ? \
- ( ( ( 0xCA == ((U8*)s)[1] ) && ( 0xBE == ((U8*)s)[2] ) ) ? 3 : 0 ) \
- : ( 0x66 == ((U8*)s)[0] ) ? \
- ( ( 0x66 == ((U8*)s)[1] ) ? \
- ( ( 0x69 == ((U8*)s)[2] || 0x6C == ((U8*)s)[2] ) ? 3 : 2 ) \
- : ( 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 ) \
- : ( 0x68 == ((U8*)s)[0] ) ? \
- ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0xB1 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
- : ( 0x69 == ((U8*)s)[0] ) ? \
- ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x87 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
- : ( 0x6A == ((U8*)s)[0] ) ? \
- ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8C == ((U8*)s)[2] ) ) ? 3 : 0 ) \
- : ( 0x73 == ((U8*)s)[0] ) ? \
- ( ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ? 2 : 0 ) \
- : ( 0x74 == ((U8*)s)[0] ) ? \
- ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x88 == ((U8*)s)[2] ) ) ? 3 : 0 ) \
- : ( 0x77 == ((U8*)s)[0] || 0x79 == ((U8*)s)[0] ) ? \
- ( ( ( 0xCC == ((U8*)s)[1] ) && ( 0x8A == ((U8*)s)[2] ) ) ? 3 : 0 ) \
- : ( ( ( 0xCA == ((U8*)s)[0] ) && ( 0xBC == ((U8*)s)[1] ) ) && ( 0x6E == ((U8*)s)[2] ) ) ? 3 : 0 )\
-: ((e)-(s) > 1) ? \
- ( ( 0x66 == ((U8*)s)[0] ) ? \
- ( ( 0x66 == ((U8*)s)[1] || 0x69 == ((U8*)s)[1] || 0x6C == ((U8*)s)[1] ) ? 2 : 0 )\
- : ( ( 0x73 == ((U8*)s)[0] ) && ( 0x73 == ((U8*)s)[1] || 0x74 == ((U8*)s)[1] ) ) ? 2 : 0 )\
-: 0 )
+: ((e)-(s) > 4) ? is_MULTI_CHAR_FOLD_utf8_safe_part0(s,e) : is_MULTI_CHAR_FOLD_utf8_safe_part1(s,e) )
/*
MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index 0bab57086a..5d37e8520e 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -237,6 +237,10 @@ sub __clean {
: \s* \5 \s*
([()])
/$1 ( $2 && $3 ) ? $4 : $5 $6/gx;
+ #$expr=~s/\(\(U8\*\)s\)\[(\d+)\]/S$1/g if length $expr > 8000;
+ #$expr=~s/\s+//g if length $expr > 8000;
+
+ die "Expression too long" if length $expr > 8000;
return $expr;
}
@@ -1093,7 +1097,7 @@ sub _combine {
# _render()
# recursively convert an optree to text with reasonably neat formatting
sub _render {
- my ( $self, $op, $combine, $brace, $opts_ref )= @_;
+ my ( $self, $op, $combine, $brace, $opts_ref, $def, $submacros )= @_;
return 0 if ! defined $op; # The set is empty
if ( !ref $op ) {
return $op;
@@ -1101,10 +1105,10 @@ sub _render {
my $cond= $self->_cond_as_str( $op, $combine, $opts_ref );
#no warnings 'recursion'; # This would allow really really inefficient
# code to be generated. See pod
- my $yes= $self->_render( $op->{yes}, $combine, 1, $opts_ref );
+ my $yes= $self->_render( $op->{yes}, $combine, 1, $opts_ref, $def, $submacros );
return $yes if $cond eq '1';
- my $no= $self->_render( $op->{no}, $combine, 0, $opts_ref );
+ my $no= $self->_render( $op->{no}, $combine, 0, $opts_ref, $def, $submacros );
return "( $cond )" if $yes eq '1' and $no eq '0';
my ( $lb, $rb )= $brace ? ( "( ", " )" ) : ( "", "" );
return "$lb$cond ? $yes : $no$rb"
@@ -1118,7 +1122,13 @@ sub _render {
$yes= " " . $yes;
}
- return "$lb$cond ?$yes$ind: $no$rb";
+ my $str= "$lb$cond ?$yes$ind: $no$rb";
+ if (length $str > 6000) {
+ push @$submacros, sprintf "#define $def\n( %s )", "_part" . (my $yes_idx= 0+@$submacros), $yes;
+ push @$submacros, sprintf "#define $def\n( %s )", "_part" . (my $no_idx= 0+@$submacros), $no;
+ return sprintf "%s%s ? $def : $def%s", $lb, $cond, "_part$yes_idx", "_part$no_idx", $rb;
+ }
+ return $str;
}
# $expr=render($op,$combine)
@@ -1129,9 +1139,12 @@ sub _render {
# longer lists such as that resulting from type 'cp' output.
# Currently only used for type 'cp' macros.
sub render {
- my ( $self, $op, $combine, $opts_ref )= @_;
- my $str= "( " . $self->_render( $op, $combine, 0, $opts_ref ) . " )";
- return __clean( $str );
+ my ( $self, $op, $combine, $opts_ref, $def_fmt )= @_;
+
+ my @submacros;
+ my $macro= sprintf "#define $def_fmt\n( %s )", "", $self->_render( $op, $combine, 0, $opts_ref, $def_fmt, \@submacros );
+
+ return join "\n\n", map { "/*** GENERATED CODE ***/\n" . __macro( __clean( $_ ) ) } @submacros, $macro;
}
# make_macro
@@ -1168,8 +1181,6 @@ sub make_macro {
} else {
$method= 'optree';
}
- my $optree= $self->$method( %opts, type => $type, ret_type => $ret_type );
- my $text= $self->render( $optree, ($type =~ /^cp/) ? 1 : 0, \%opts );
my @args= $type =~ /^cp/ ? 'cp' : 's';
push @args, "e" if $opts{safe};
push @args, "is_utf8" if $type eq 'generic';
@@ -1179,8 +1190,9 @@ sub make_macro {
my $ext= $type eq 'generic' ? '' : '_' . lc( $type );
$ext .= "_safe" if $opts{safe};
my $argstr= join ",", @args;
- return "/*** GENERATED CODE ***/\n"
- . __macro( "#define $pfx$self->{op}$ext($argstr)\n$text" );
+ my $def_fmt="$pfx$self->{op}$ext%s($argstr)";
+ my $optree= $self->$method( %opts, type => $type, ret_type => $ret_type );
+ return $self->render( $optree, ($type =~ /^cp/) ? 1 : 0, \%opts, $def_fmt );
}
# if we arent being used as a module (highly likely) then process
diff --git a/utf8.h b/utf8.h
index 3fbc28a206..1df972bba6 100644
--- a/utf8.h
+++ b/utf8.h
@@ -476,6 +476,11 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
* takes on the order of 10 minutes to generate, and is never going to change.
* The EBCDIC equivalent hasn't been commented out in regcharclass.pl, so it
* should generate and run the correct stuff */
+/*
+ UTF8_CHAR: Matches utf8 from 1 to 4 bytes
+
+ 0x0 - 0x1FFFFF
+*/
/*** GENERATED CODE ***/
#define is_UTF8_CHAR_utf8_safe(s,e) \
( ((e)-(s) > 3) ? \
@@ -488,23 +493,17 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\
: ( 0xF0 == ((U8*)s)[0] ) ? \
( ( ( ( 0x90 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xBF ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\
- : ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF7 ) ? \
- ( ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\
- : 0 ) \
+ : ( ( ( ( 0xF1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xF7 ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[3] & 0xC0 ) == 0x80 ) ) ? 4 : 0 )\
: ((e)-(s) > 2) ? \
( ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) ? 1 \
: ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \
( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \
: ( 0xE0 == ((U8*)s)[0] ) ? \
( ( ( ( ((U8*)s)[1] & 0xE0 ) == 0xA0 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\
- : ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEF ) ? \
- ( ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\
- : 0 ) \
+ : ( ( ( 0xE1 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xEF ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) && ( ( ((U8*)s)[2] & 0xC0 ) == 0x80 ) ) ? 3 : 0 )\
: ((e)-(s) > 1) ? \
( ( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) ? 1 \
- : ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) ? \
- ( ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ? 2 : 0 ) \
- : 0 ) \
+ : ( ( 0xC2 <= ((U8*)s)[0] && ((U8*)s)[0] <= 0xDF ) && ( ( ((U8*)s)[1] & 0xC0 ) == 0x80 ) ) ? 2 : 0 )\
: ((e)-(s) > 0) ? \
( ( ((U8*)s)[0] & 0x80 ) == 0x00 ) \
: 0 )