summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-08-03 16:19:38 +0200
committerYves Orton <demerphq@gmail.com>2022-08-06 11:32:34 +0200
commit12d173c94b050c244dbb4e2162e29834a9ac3aff (patch)
tree7da7cb9fd9869b779d4b8f8ffaf5fa4402a75bc1 /regen
parentcbf5c5ba5f44cf0fe11b64213facfc8ae7c33e4e (diff)
downloadperl-12d173c94b050c244dbb4e2162e29834a9ac3aff.tar.gz
regex engine - replace many attribute arrays with one
This replaces PL_regnode_arg_len, PL_regnode_arg_len_varies, PL_regnode_off_by_arg and PL_regnode_kind with a single PL_regnode_info array, which is an array of struct regnode_meta, which contains the same data but as a struct. Since PL_regnode_name is only used in debugging builds of the regex engine we keep it separate. If we add more debug properties it might be good to create a PL_regnode_debug_info[] to hold that data instead. This means when we add new properties we do not need to modify any secondary sources to add new properites, just the struct definition and regen/regcomp.pl
Diffstat (limited to 'regen')
-rw-r--r--regen/regcomp.pl132
1 files changed, 37 insertions, 95 deletions
diff --git a/regen/regcomp.pl b/regen/regcomp.pl
index 3ab868a600..17f2cc03e0 100644
--- a/regen/regcomp.pl
+++ b/regen/regcomp.pl
@@ -51,8 +51,8 @@ use strict;
# id Both integer value for this opcode/state
# optype Both Either 'op' or 'state'
# line_num Both line_num number of the input file for this item.
-# type Op Type of node (aka regkind)
-# code Op Apparently not used
+# type Op Type of node (aka regnode_kind)
+# code Op Meta about the node, used to detect variable length nodes
# suffix Op which regnode struct this uses, so if this is '1', it
# uses 'struct regnode_1'
# flags Op S for simple; V for varies
@@ -272,10 +272,10 @@ sub print_process_EXACTish {
print $out <<EOP,
/* Is 'op', known to be of type EXACT, folding? */
-#define isEXACTFish(op) (__ASSERT_(PL_regnode_kind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT))))
+#define isEXACTFish(op) (__ASSERT_(REGNODE_TYPE(op) == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT))))
/* Do only UTF-8 target strings match 'op', known to be of type EXACT? */
-#define isEXACT_REQ8(op) (__ASSERT_(PL_regnode_kind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT))))
+#define isEXACT_REQ8(op) (__ASSERT_(REGNODE_TYPE(op) == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT))))
#ifndef DOINIT
EXTCONST U32 PL_EXACTFish_bitmask;
@@ -456,32 +456,6 @@ sub print_state_def_line
print $fh "\n"; # Blank line separates groups for clarity
}
-sub print_regkind {
- my ($out)= @_;
- print $out <<EOP;
-
-/* PL_regnode_kind[] What type of regop or state is this. */
-
-#ifndef DOINIT
-EXTCONST U8 PL_regnode_kind[];
-#else
-EXTCONST U8 PL_regnode_kind[] = {
-EOP
- use Data::Dumper;
- foreach my $node (@all) {
- print Dumper($node) if !defined $node->{type} or !defined( $node->{name} );
- printf $out "\t%*s\t/* %*s */\n",
- -1 - $twidth, "$node->{type},", -$base_name_width, $node->{name};
- print $out "\t/* ------------ States ------------- */\n"
- if $node->{id} == $#ops and $node->{id} != $#all;
- }
-
- print $out <<EOP;
-};
-#endif
-EOP
-}
-
sub print_typedefs {
my ($out)= @_;
print $out <<EOP;
@@ -515,51 +489,44 @@ EOP
}
-sub print_regarglen {
- my ($out)= @_;
- print $out <<EOP;
-
-/* PL_regnode_arg_len[] - How large is the argument part of the node (in regnodes) */
-
-#ifndef DOINIT
-EXTCONST U8 PL_regnode_arg_len[];
-#else
-EXTCONST U8 PL_regnode_arg_len[] = {
-EOP
-
- foreach my $node (@ops) {
- my $size= 0;
- $size= "EXTRA_SIZE($node->{typedef})" if $node->{suffix};
- printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name};
- }
- print $out <<EOP;
-};
-#endif /* DOINIT */
-EOP
-}
-
-sub print_regargvaries {
+sub print_regnode_info {
my ($out)= @_;
print $out <<EOP;
-/* PL_regnode_arg_len_varies[] - Is the size of the node determined by STR_SZ() macros?
- Currently this is a boolean, but in the future it might turn into something
- that uses more bits of the value to indicate that a different macro would be
- used. */
+/* PL_regnode_info[] - Opcode/state names in string form, for debugging */
#ifndef DOINIT
-EXTCONST U8 PL_regnode_arg_len_varies[];
+EXTCONST struct regnode_meta PL_regnode_info[];
#else
-EXTCONST U8 PL_regnode_arg_len_varies[] = {
+EXTCONST struct regnode_meta const PL_regnode_info[] = {
EOP
- foreach my $node (@ops) {
- my $varies= 0;
- $varies= 1 if $node->{code}=~"str";
+ my @fields= qw(type arg_len arg_len_varies off_by_arg);
+ foreach my $node_idx (0..$#all) {
+ my $node= $all[$node_idx];
+ {
+ my $size= 0;
+ $size= "EXTRA_SIZE($node->{typedef})" if $node->{suffix};
+ $node->{arg_len}= $size;
- printf $out "\t%*s\t/* %*s */\n", -37, "$varies,", -$rwidth, $node->{name};
+ }
+ {
+ my $varies= 0;
+ $varies= 1 if $node->{code} and $node->{code}=~"str";
+ $node->{arg_len_varies}= $varies;
+ }
+ $node->{off_by_arg}= $node->{longj} || 0;
+ print $out " {\n";
+ print $out " /* #$node_idx $node->{optype} $node->{name} */\n";
+ foreach my $f_idx (0..$#fields) {
+ my $field= $fields[$f_idx];
+ printf $out " .%s = %s", $field, $node->{$field} // 0;
+ printf $out $f_idx == $#fields ? "\n" : ",\n";
+ }
+ print $out " }";
+ print $out $node_idx==$#all ? "\n" : ",\n";
}
print $out <<EOP;
@@ -569,31 +536,8 @@ EOP
EOP
}
-sub print_reg_off_by_arg {
- my ($out)= @_;
- print $out <<EOP;
-
-/* PL_regnode_off_by_arg[] - Which argument holds the offset to the next node */
-
-#ifndef DOINIT
-EXTCONST U8 PL_regnode_off_by_arg[];
-#else
-EXTCONST U8 PL_regnode_off_by_arg[] = {
-EOP
-
- foreach my $node (@ops) {
- my $size= $node->{longj} || 0;
-
- printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name};
- }
-
- print $out <<EOP;
-};
-#endif
-EOP
-}
-sub print_reg_name {
+sub print_regnode_name {
my ($out)= @_;
print $out <<EOP;
@@ -608,8 +552,6 @@ EOP
my $ofs= 0;
my $sym= "";
foreach my $node (@all) {
- my $size= $node->{longj} || 0;
-
printf $out "\t%*s\t/* $sym%#04x */\n",
-3 - $base_name_width, qq("$node->{name}",), $node->{id} - $ofs;
if ( $node->{id} == $#ops and @ops != @all ) {
@@ -867,11 +809,11 @@ my $out= open_new( 'regnodes.h', '>',
print $out "#if $confine_to_core\n\n";
print_typedefs($out);
print_state_defs($out);
-print_regkind($out);
-print_regarglen($out);
-print_regargvaries($out);
-print_reg_off_by_arg($out);
-print_reg_name($out);
+
+print_regnode_name($out);
+print_regnode_info($out);
+
+
print_reg_extflags_name($out);
print_reg_intflags_name($out);
print_process_flags($out);