diff options
author | Yves Orton <demerphq@gmail.com> | 2022-08-03 16:19:38 +0200 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2022-08-06 11:32:34 +0200 |
commit | 12d173c94b050c244dbb4e2162e29834a9ac3aff (patch) | |
tree | 7da7cb9fd9869b779d4b8f8ffaf5fa4402a75bc1 /regen | |
parent | cbf5c5ba5f44cf0fe11b64213facfc8ae7c33e4e (diff) | |
download | perl-12d173c94b050c244dbb4e2162e29834a9ac3aff.tar.gz |
regex engine - replace many attribute arrays with one
This replaces PL_regnode_arg_len, PL_regnode_arg_len_varies,
PL_regnode_off_by_arg and PL_regnode_kind with a single PL_regnode_info
array, which is an array of struct regnode_meta, which contains the same
data but as a struct. Since PL_regnode_name is only used in debugging
builds of the regex engine we keep it separate. If we add more debug
properties it might be good to create a PL_regnode_debug_info[] to hold
that data instead.
This means when we add new properties we do not need to modify any
secondary sources to add new properites, just the struct definition
and regen/regcomp.pl
Diffstat (limited to 'regen')
-rw-r--r-- | regen/regcomp.pl | 132 |
1 files changed, 37 insertions, 95 deletions
diff --git a/regen/regcomp.pl b/regen/regcomp.pl index 3ab868a600..17f2cc03e0 100644 --- a/regen/regcomp.pl +++ b/regen/regcomp.pl @@ -51,8 +51,8 @@ use strict; # id Both integer value for this opcode/state # optype Both Either 'op' or 'state' # line_num Both line_num number of the input file for this item. -# type Op Type of node (aka regkind) -# code Op Apparently not used +# type Op Type of node (aka regnode_kind) +# code Op Meta about the node, used to detect variable length nodes # suffix Op which regnode struct this uses, so if this is '1', it # uses 'struct regnode_1' # flags Op S for simple; V for varies @@ -272,10 +272,10 @@ sub print_process_EXACTish { print $out <<EOP, /* Is 'op', known to be of type EXACT, folding? */ -#define isEXACTFish(op) (__ASSERT_(PL_regnode_kind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT)))) +#define isEXACTFish(op) (__ASSERT_(REGNODE_TYPE(op) == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT)))) /* Do only UTF-8 target strings match 'op', known to be of type EXACT? */ -#define isEXACT_REQ8(op) (__ASSERT_(PL_regnode_kind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) +#define isEXACT_REQ8(op) (__ASSERT_(REGNODE_TYPE(op) == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) #ifndef DOINIT EXTCONST U32 PL_EXACTFish_bitmask; @@ -456,32 +456,6 @@ sub print_state_def_line print $fh "\n"; # Blank line separates groups for clarity } -sub print_regkind { - my ($out)= @_; - print $out <<EOP; - -/* PL_regnode_kind[] What type of regop or state is this. */ - -#ifndef DOINIT -EXTCONST U8 PL_regnode_kind[]; -#else -EXTCONST U8 PL_regnode_kind[] = { -EOP - use Data::Dumper; - foreach my $node (@all) { - print Dumper($node) if !defined $node->{type} or !defined( $node->{name} ); - printf $out "\t%*s\t/* %*s */\n", - -1 - $twidth, "$node->{type},", -$base_name_width, $node->{name}; - print $out "\t/* ------------ States ------------- */\n" - if $node->{id} == $#ops and $node->{id} != $#all; - } - - print $out <<EOP; -}; -#endif -EOP -} - sub print_typedefs { my ($out)= @_; print $out <<EOP; @@ -515,51 +489,44 @@ EOP } -sub print_regarglen { - my ($out)= @_; - print $out <<EOP; - -/* PL_regnode_arg_len[] - How large is the argument part of the node (in regnodes) */ - -#ifndef DOINIT -EXTCONST U8 PL_regnode_arg_len[]; -#else -EXTCONST U8 PL_regnode_arg_len[] = { -EOP - - foreach my $node (@ops) { - my $size= 0; - $size= "EXTRA_SIZE($node->{typedef})" if $node->{suffix}; - printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name}; - } - print $out <<EOP; -}; -#endif /* DOINIT */ -EOP -} - -sub print_regargvaries { +sub print_regnode_info { my ($out)= @_; print $out <<EOP; -/* PL_regnode_arg_len_varies[] - Is the size of the node determined by STR_SZ() macros? - Currently this is a boolean, but in the future it might turn into something - that uses more bits of the value to indicate that a different macro would be - used. */ +/* PL_regnode_info[] - Opcode/state names in string form, for debugging */ #ifndef DOINIT -EXTCONST U8 PL_regnode_arg_len_varies[]; +EXTCONST struct regnode_meta PL_regnode_info[]; #else -EXTCONST U8 PL_regnode_arg_len_varies[] = { +EXTCONST struct regnode_meta const PL_regnode_info[] = { EOP - foreach my $node (@ops) { - my $varies= 0; - $varies= 1 if $node->{code}=~"str"; + my @fields= qw(type arg_len arg_len_varies off_by_arg); + foreach my $node_idx (0..$#all) { + my $node= $all[$node_idx]; + { + my $size= 0; + $size= "EXTRA_SIZE($node->{typedef})" if $node->{suffix}; + $node->{arg_len}= $size; - printf $out "\t%*s\t/* %*s */\n", -37, "$varies,", -$rwidth, $node->{name}; + } + { + my $varies= 0; + $varies= 1 if $node->{code} and $node->{code}=~"str"; + $node->{arg_len_varies}= $varies; + } + $node->{off_by_arg}= $node->{longj} || 0; + print $out " {\n"; + print $out " /* #$node_idx $node->{optype} $node->{name} */\n"; + foreach my $f_idx (0..$#fields) { + my $field= $fields[$f_idx]; + printf $out " .%s = %s", $field, $node->{$field} // 0; + printf $out $f_idx == $#fields ? "\n" : ",\n"; + } + print $out " }"; + print $out $node_idx==$#all ? "\n" : ",\n"; } print $out <<EOP; @@ -569,31 +536,8 @@ EOP EOP } -sub print_reg_off_by_arg { - my ($out)= @_; - print $out <<EOP; - -/* PL_regnode_off_by_arg[] - Which argument holds the offset to the next node */ - -#ifndef DOINIT -EXTCONST U8 PL_regnode_off_by_arg[]; -#else -EXTCONST U8 PL_regnode_off_by_arg[] = { -EOP - - foreach my $node (@ops) { - my $size= $node->{longj} || 0; - - printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name}; - } - - print $out <<EOP; -}; -#endif -EOP -} -sub print_reg_name { +sub print_regnode_name { my ($out)= @_; print $out <<EOP; @@ -608,8 +552,6 @@ EOP my $ofs= 0; my $sym= ""; foreach my $node (@all) { - my $size= $node->{longj} || 0; - printf $out "\t%*s\t/* $sym%#04x */\n", -3 - $base_name_width, qq("$node->{name}",), $node->{id} - $ofs; if ( $node->{id} == $#ops and @ops != @all ) { @@ -867,11 +809,11 @@ my $out= open_new( 'regnodes.h', '>', print $out "#if $confine_to_core\n\n"; print_typedefs($out); print_state_defs($out); -print_regkind($out); -print_regarglen($out); -print_regargvaries($out); -print_reg_off_by_arg($out); -print_reg_name($out); + +print_regnode_name($out); +print_regnode_info($out); + + print_reg_extflags_name($out); print_reg_intflags_name($out); print_process_flags($out); |