diff options
-rw-r--r-- | regcharclass.h | 18 | ||||
-rwxr-xr-x | regen/regcharclass.pl | 46 |
2 files changed, 32 insertions, 32 deletions
diff --git a/regcharclass.h b/regcharclass.h index 4326bb5064..a34b339e56 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -87,18 +87,18 @@ /*** GENERATED CODE ***/ #define is_LNBREAK_latin1(s) \ -( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \ +( ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) || 0x85 == ((U8*)s)[0] ) ? 1\ : ( 0x0D == ((U8*)s)[0] ) ? \ ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \ -: ( 0x85 == ((U8*)s)[0] ) ) +: 0 ) /*** GENERATED CODE ***/ #define is_LNBREAK_latin1_safe(s,e) \ ( ((e)-(s) > 1) ? \ - ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) ? 1 \ + ( ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0C ) || 0x85 == ((U8*)s)[0] ) ? 1\ : ( 0x0D == ((U8*)s)[0] ) ? \ ( ( 0x0A == ((U8*)s)[1] ) ? 2 : 1 ) \ - : ( 0x85 == ((U8*)s)[0] ) ) \ + : 0 ) \ : ((e)-(s) > 0) ? \ ( ( 0x0A <= ((U8*)s)[0] && ((U8*)s)[0] <= 0x0D ) || 0x85 == ((U8*)s)[0] )\ : 0 ) @@ -460,19 +460,13 @@ ( ( ( 0x81 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0x93 ) || ( 0x95 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xAF ) ) ? 3 : 0 )\ : ( 0x86 == ((U8*)s)[1] ) ? \ ( ( ((U8*)s)[2] >= 0x90 ) ? 3 : 0 ) \ - : ( 0x87 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x90 ) ? \ + : ( ( 0x87 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x90 ) || ( 0x94 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9C ) || ( 0x9F <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xAF ) || ( ((U8*)s)[1] & 0xFE ) == 0xB8 ) ?\ 3 \ : ( 0x91 == ((U8*)s)[1] ) ? \ ( ( ((U8*)s)[2] <= 0x9F ) ? 3 : 0 ) \ - : ( 0x94 <= ((U8*)s)[1] && ((U8*)s)[1] <= 0x9C ) ? \ - 3 \ : ( 0x9D == ((U8*)s)[1] ) ? \ ( ( ((U8*)s)[2] <= 0xB5 ) ? 3 : 0 ) \ - : ( 0x9E == ((U8*)s)[1] ) ? \ - ( ( ((U8*)s)[2] >= 0x94 ) ? 3 : 0 ) \ - : ( ( 0x9F <= ((U8*)s)[1] && ((U8*)s)[1] <= 0xAF ) || ( ((U8*)s)[1] & 0xFE ) == 0xB8 ) ?\ - 3 \ - : 0 ) \ + : ( ( 0x9E == ((U8*)s)[1] ) && ( ((U8*)s)[2] >= 0x94 ) ) ? 3 : 0 ) \ : ( 0xE3 == ((U8*)s)[0] ) ? \ ( ( 0x80 == ((U8*)s)[1] ) ? \ ( ( ( ((U8*)s)[2] <= 0x83 ) || ( 0x88 <= ((U8*)s)[2] && ((U8*)s)[2] <= 0xA0 ) || 0xB0 == ((U8*)s)[2] ) ? 3 : 0 )\ diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index e7f0b44da2..fb64adee4b 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -202,6 +202,8 @@ sub __uni_latin1 { sub __clean { my ( $expr )= @_; + #return $expr; + our $parens; $parens= qr/ (?> \( (?> (?: (?> [^()]+ ) | (??{ $parens }) )* ) \) ) /x; @@ -477,17 +479,11 @@ sub _optree { return $else if !@conds; - my %root; - my $node= \%root; - my ( $yes_res, $as_code, @vals ); my $test= $test_type eq 'cp' ? "cp" : "((U8*)s)[$depth]"; - my $Update= sub { - $node->{vals}= [@vals]; - $node->{test}= $test; - $node->{yes}= $yes_res; - $node->{depth}= $depth; - return $node->{no}= shift; - }; + # first we loop over the possible keys/conditions and find out what they look like + # we group conditions with the same optree together. + my %dmp_res; + my @res_order; local $Data::Dumper::Sortkeys=1; foreach my $cond ( @conds ) { @@ -496,19 +492,29 @@ sub _optree { # convert it to a string with Dumper my $res_code= Dumper( $res ); - # either merge in this optree or merge in this value into the current op. - if ( !$yes_res || $res_code ne $as_code ) { - # initialize/merge in the - if ( $yes_res ) { - $node= $Update->( {} ); - } - ( $yes_res, $as_code )= ( $res, $res_code ); - @vals= ( $cond ); + push @{$dmp_res{$res_code}{vals}}, $cond; + if (!$dmp_res{$res_code}{optree}) { + $dmp_res{$res_code}{optree}= $res; + push @res_order, $res_code; + } + } + + # now that we have deduped the optrees we construct a new optree containing the merged + # results. + my %root; + my $node= \%root; + foreach my $res_code_idx (0 .. $#res_order) { + my $res_code= $res_order[$res_code_idx]; + $node->{vals}= $dmp_res{$res_code}{vals}; + $node->{test}= $test; + $node->{yes}= $dmp_res{$res_code}{optree}; + $node->{depth}= $depth; + if ($res_code_idx < $#res_order) { + $node= $node->{no}= {}; } else { - push @vals, $cond; + $node->{no}= $else; } } - $Update->( $else ); # finalize the optree's else with the value passed in # return the optree. return \%root; |