diff options
-rw-r--r-- | regen/regcomp.pl | 760 | ||||
-rw-r--r-- | regnodes.h | 4 |
2 files changed, 481 insertions, 283 deletions
diff --git a/regen/regcomp.pl b/regen/regcomp.pl index b90efc7595..9890a1aa69 100644 --- a/regen/regcomp.pl +++ b/regen/regcomp.pl @@ -1,5 +1,6 @@ #!/usr/bin/perl -w -# +# +# # Regenerate (overwriting only if changed): # # pod/perldebguts.pod @@ -23,113 +24,204 @@ BEGIN { } use strict; -open DESC, 'regcomp.sym'; - -my $ind = 0; -my (@name,@rest,@type,@code,@args,@flags,@longj,@cmnt); -my ($longest_name_length,$desc,$lastregop) = 0; -my (%seen_op, %type_alias); -while (<DESC>) { - # Special pod comments - if (/^#\* ?/) { $cmnt[$ind] .= "# $'"; } - # Truly blank lines possibly surrounding pod comments - elsif (/^\s*$/) { $cmnt[$ind] .= "\n" } - - next if /^(?:#|\s*$)/; - chomp; # No \z in 5.004 - s/\s*$//; - if (/^-+\s*$/) { - $lastregop= $ind; - next; +# NOTE I don't think anyone actually knows what all of these properties mean, +# and I suspect some of them are outright unused. This is a first attempt to +# clean up the generation so maybe one day we can move to something more self +# documenting. (One might argue that an array of hashes of properties would +# be easier to use.) +# +# Why we use the term regnode and nodes, and not say, opcodes, I am not sure. + +# General thoughts: +# 1. We use a single continuum to represent both opcodes and states, +# and in regexec.c we switch on the combined set. +# 2. Opcodes have more information associated to them, states are simpler, +# basically just an identifier/number that can be used to switch within +# the state machine. +# 3. Some opcode are order dependent. +# 4. Output files often use "tricks" to reduce diff effects. Some of what +# we do below is more clumsy looking than it could be because of this. + +# Op/state properties: +# +# Property In Descr +# ---------------------------------------------------------------------------- +# name Both Name of op/state +# id Both integer value for this opcode/state +# optype Both Either 'op' or 'state' +# line_num Both line_num number of the input file for this item. +# type Op Type of node (aka regkind) +# code Op what code is associated with this node (???) +# args Op what type of args the node has (which regnode struct) +# flags Op (???) +# longj Op Whether this node is a longjump +# comment Both Comment about node, if any +# pod_comment Both Special comments for pod output (preceding lines in def) + +# Global State +my @all; # all opcodes/state +my %all; # hash of all opcode/state names + +my @ops; # array of just opcodes +my @states; # array of just states + +my $longest_name_length= 0; # track lengths of names for nicer reports +my (%type_alias); # map the type (??) + +# register a newly constructed node into our state tables. +# ensures that we have no name collisions (on name anyway), +# and issues the "id" for the node. +sub register_node { + my ($node)= @_; + + if ( $all{ $node->{name} } ) { + die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} " + . "previously defined on line $all{ $node->{name} }{line_num}\n"; + } elsif (!$node->{optype}) { + die "must have an optype in node ", Dumper($node); + } elsif ($node->{optype} eq "op") { + push @ops, $node; + } elsif ($node->{optype} eq "state") { + push @states, $node; + } else { + die "Uknown optype '$node->{optype}' in ", Dumper($node); } - unless ($lastregop) { - ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/; + $node->{id}= 0 + @all; + push @all, $node; + $all{ $node->{name} }= $node; +} - if (defined $seen_op{$name[$ind]}) { - die "Duplicate regop $name[$ind] in regcomp.sym line $. previously defined on line $seen_op{$name[$ind]}\n"; - } else { - $seen_op{$name[$ind]}= $.; - } +# Parse and add an opcode definition to the global state. +# An opcode definition looks like this: +# +# +- args +# | +- flags +# | | +- longjmp +# Name Type code | | | ; comment +# -------------------------------------------------------------------------- +# IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches. +# UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches. +# SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE. +# IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher. +# GROUPP GROUPP, num 1 ; Whether the group matched. +# +# Not every opcode definition has all of these. We should maybe make this +# nicer/easier to read in the future. Also note that the above is tab +# sensitive. + +sub parse_opcode_def { + my ( $text, $line_num, $pod_comment )= @_; + my $node= { + line_num => $line_num, + pod_comment => $pod_comment, + optype => "op", + }; - ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind]) - = split /[,\s]\s*/, $desc; + # first split the line into three, the initial NAME, a middle part + # that we call "desc" which contains various (not well documented) things, + # and a comment section. + @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/ + or die "Failed to match $_"; - if (!defined $seen_op{$type[$ind]} and !defined $type_alias{$type[$ind]}) { - #warn "Regop type '$type[$ind]' from regcomp.sym line $. is not an existing regop, and will be aliased to $name[$ind]\n" - # if -t STDERR; - $type_alias{$type[$ind]}= $name[$ind]; - } + # the content of the "desc" field from the first step is extracted here: + @{$node}{qw(type code args flags longj)}= split /[,\s]\s*/, $node->{desc}; - $longest_name_length = length $name[$ind] - if length $name[$ind] > $longest_name_length; - ++$ind; - } else { - my ($type,@lists)=split /\s+/, $_; - die "No list? $type" if !@lists; - foreach my $list (@lists) { - my ($names,$special)=split /:/, $list , 2; - $special ||= ""; - foreach my $name (split /,/,$names) { - my $real= $name eq 'resume' - ? "resume_$type" - : "${type}_$name"; - my @suffix; - if (!$special) { - @suffix=(""); - } elsif ($special=~/\d/) { - @suffix=(1..$special); - } elsif ($special eq 'FAIL') { - @suffix=("","_fail"); - } else { - die "unknown :type ':$special'"; - } - foreach my $suffix (@suffix) { - $name[$ind]="$real$suffix"; - $type[$ind]=$type; - $rest[$ind]="state for $type"; - ++$ind; - } + $node->{$_} //= "" for qw(type code args flags longj); + + register_node($node); # has to be before the type_alias code below + + if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) { + + #warn "Regop type '$node->{type}' from regcomp.sym line $line_num" + # ." is not an existing regop, and will be aliased to $node->{name}\n" + # if -t STDERR; + $type_alias{ $node->{type} }= $node->{name}; + } + + $longest_name_length= length $node->{name} + if length $node->{name} > $longest_name_length; +} + +# parse out a state definition and add the resulting data +# into the global state. may create multiple new states from +# a single definition (this is part of the point). +# Format for states: +# REGOP \t typelist [ \t typelist] +# typelist= namelist +# = namelist:FAIL +# = name:count +# Eg: +# WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL +# BRANCH next:FAIL +# CURLYM A,B:FAIL +# +# The CURLYM definition would create the states: +# CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail +sub parse_state_def { + my ( $text, $line_num, $pod_comment )= @_; + my ( $type, @lists )= split /\s+/, $text; + die "No list? $type" if !@lists; + foreach my $list (@lists) { + my ( $names, $special )= split /:/, $list, 2; + $special ||= ""; + foreach my $name ( split /,/, $names ) { + my $real= + $name eq 'resume' + ? "resume_$type" + : "${type}_$name"; + my @suffix; + if ( !$special ) { + @suffix= (""); + } + elsif ( $special =~ /\d/ ) { + @suffix= ( 1 .. $special ); + } + elsif ( $special eq 'FAIL' ) { + @suffix= ( "", "_fail" ); + } + else { + die "unknown :type ':$special'"; + } + foreach my $suffix (@suffix) { + my $node= { + name => "$real$suffix", + optype => "state", + type => $type || "", + comment => "state for $type", + line_num => $line_num, + }; + register_node($node); } } - } } -# use fixed width to keep the diffs between regcomp.pl recompiles -# as small as possible. -my ($width,$rwidth,$twidth)=(22,12,9); -$lastregop ||= $ind; -my $tot = $ind; -close DESC; -die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" - if $lastregop>256; sub process_flags { - my ($flag, $varname, $comment) = @_; - $comment = '' unless defined $comment; - - $ind = 0; - my @selected; - my $bitmap = ''; - do { - my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0; - # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic - # ops in the C code. - my $current = do { - local $^W; - ord do { - substr $bitmap, ($ind >> 3); - } - }; - substr($bitmap, ($ind >> 3), 1) = chr($current | ($set << ($ind & 7))); - - push @selected, $name[$ind] if $set; - } while (++$ind < $lastregop); - my $out_string = join ', ', @selected, 0; - $out_string =~ s/(.{1,70},) /$1\n /g; + my ( $flag, $varname, $comment )= @_; + $comment= '' unless defined $comment; + + my @selected; + my $bitmap= ''; + for my $node (@ops) { + my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0; + + # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic + # ops in the C code. + my $current= do { + no warnings; + ord substr $bitmap, ( $node->{id} >> 3 ); + }; + substr( $bitmap, ( $node->{id} >> 3 ), 1 )= + chr( $current | ( $set << ( $node->{id} & 7 ) ) ); + + push @selected, $node->{name} if $set; + } + my $out_string= join ', ', @selected, 0; + $out_string =~ s/(.{1,70},) /$1\n /g; - my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap; + my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap; - return $comment . <<"EOP"; + return $comment . <<"EOP"; #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) #ifndef DOINIT @@ -150,36 +242,78 @@ EXTCONST U8 PL_${varname}_bitmask[] = { EOP } -my $out = open_new('regnodes.h', '>', - { by => 'regen/regcomp.pl', from => 'regcomp.sym' }); -printf $out <<EOP, +sub read_definition { + my ( $file )= @_; + my ( $seen_sep, $pod_comment )= ""; + open my $in_fh, "<", $file + or die "Failed to open '$file' for reading: $!"; + while (<$in_fh>) { + + # Special pod comments + if (/^#\* ?/) { $pod_comment .= "# $'"; } + + # Truly blank lines possibly surrounding pod comments + elsif (/^\s*$/) { $pod_comment .= "\n" } + + next if /\A\s*#/ || /\A\s*\z/; + + s/\s*\z//; + if (/^-+\s*$/) { + $seen_sep= 1; + next; + } + + if ($seen_sep) { + parse_state_def( $_, $., $pod_comment ); + } + else { + parse_opcode_def( $_, $., $pod_comment ); + } + $pod_comment= ""; + } + close $in_fh; + die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all, + " in file!" + if @all > 256; +} + +# use fixed width to keep the diffs between regcomp.pl recompiles +# as small as possible. +my ( $width, $rwidth, $twidth )= ( 22, 12, 9 ); + +sub print_state_defs { + my ($out)= @_; + printf $out <<EOP, /* Regops and State definitions */ #define %*s\t%d #define %*s\t%d EOP - -$width, REGNODE_MAX => $lastregop - 1, - -$width, REGMATCH_STATE_MAX => $tot - 1 -; - -my %rev_type_alias= reverse %type_alias; -for ($ind=0; $ind < $lastregop ; ++$ind) { - printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", - -$width, $name[$ind], $ind, $ind, $rest[$ind]; - if (defined(my $alias= $rev_type_alias{$name[$ind]})) { - printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", - -$width, $alias, $ind, $ind, "type alias"; - } + -$width, + REGNODE_MAX => $#ops, + -$width, REGMATCH_STATE_MAX => $#all; + + my %rev_type_alias= reverse %type_alias; + for my $node (@ops) { + printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", + -$width, $node->{name}, $node->{id}, $node->{id}, $node->{comment}; + if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) { + printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", + -$width, $alias, $node->{id}, $node->{id}, "type alias"; + } + } -} -print $out "\t/* ------------ States ------------- */\n"; -for ( ; $ind < $tot ; $ind++) { - printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", - -$width, $name[$ind], $ind - $lastregop + 1, $rest[$ind]; + print $out "\t/* ------------ States ------------- */\n"; + for my $node (@states) { + printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", + -$width, $node->{name}, $node->{id} - $#ops, $node->{comment}; + } } -print $out <<EOP; +sub print_regkind { + my ($out)= @_; + print $out <<EOP; /* PL_regkind[] What type of regop or state is this. */ @@ -188,54 +322,80 @@ EXTCONST U8 PL_regkind[]; #else EXTCONST U8 PL_regkind[] = { EOP + use Data::Dumper; + foreach my $node (@all) { + print Dumper($node) if !defined $node->{type} or !defined( $node->{name} ); + printf $out "\t%*s\t/* %*s */\n", + -1 - $twidth, "$node->{type},", -$width, $node->{name}; + print $out "\t/* ------------ States ------------- */\n" + if $node->{id} == $#ops and $node->{id} != $#all; + } -$ind = 0; -do { - printf $out "\t%*s\t/* %*s */\n", - -1-$twidth, "$type[$ind],", -$width, $name[$ind]; - print $out "\t/* ------------ States ------------- */\n" - if $ind + 1 == $lastregop and $lastregop != $tot; -} while (++$ind < $tot); - -print $out <<EOP; + print $out <<EOP; }; #endif +EOP +} + +sub wrap_ifdef_print { + my $out= shift; + my $token= shift; + print $out <<EOP; + +#ifdef $token +EOP + $_->($out) for @_; + print $out <<EOP; +#endif /* $token */ + +EOP +} + +sub print_regarglen { + my ($out)= @_; + print $out <<EOP; /* regarglen[] - How large is the argument part of the node (in regnodes) */ -#ifdef REG_COMP_C static const U8 regarglen[] = { EOP -$ind = 0; -do { - my $size = 0; - $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; - - printf $out "\t%*s\t/* %*s */\n", - -37, "$size,",-$rwidth,$name[$ind]; -} while (++$ind < $lastregop); + foreach my $node (@ops) { + my $size= 0; + $size= "EXTRA_SIZE(struct regnode_$node->{args})" if $node->{args}; -print $out <<EOP; + printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name}; + } + + print $out <<EOP; }; +EOP +} + +sub print_reg_off_by_arg { + my ($out)= @_; + print $out <<EOP; /* reg_off_by_arg[] - Which argument holds the offset to the next node */ static const char reg_off_by_arg[] = { EOP -$ind = 0; -do { - my $size = $longj[$ind] || 0; + foreach my $node (@ops) { + my $size= $node->{longj} || 0; - printf $out "\t%d,\t/* %*s */\n", - $size, -$rwidth, $name[$ind] -} while (++$ind < $lastregop); + printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name}; + } -print $out <<EOP; + print $out <<EOP; }; -#endif /* REG_COMP_C */ +EOP +} + +sub print_reg_name { + my ($out)= @_; + print $out <<EOP; /* reg_name[] - Opcode/state names in string form, for debugging */ @@ -245,30 +405,30 @@ EXTCONST char * PL_reg_name[]; EXTCONST char * const PL_reg_name[] = { EOP -$ind = 0; -my $ofs = 0; -my $sym = ""; -do { - my $size = $longj[$ind] || 0; - - printf $out "\t%*s\t/* $sym%#04x */\n", - -3-$width,qq("$name[$ind]",), $ind - $ofs; - if ($ind + 1 == $lastregop and $lastregop != $tot) { - print $out "\t/* ------------ States ------------- */\n"; - $ofs = $lastregop - 1; - $sym = 'REGNODE_MAX +'; - } - -} while (++$ind < $tot); + my $ofs= 0; + my $sym= ""; + foreach my $node (@all) { + my $size= $node->{longj} || 0; + + printf $out "\t%*s\t/* $sym%#04x */\n", + -3 - $width, qq("$node->{name}",), $node->{id} - $ofs; + if ( $node->{id} == $#ops and @ops != @all ) { + print $out "\t/* ------------ States ------------- */\n"; + $ofs= $#ops; + $sym= 'REGNODE_MAX +'; + } + } -print $out <<EOP; + print $out <<EOP; }; #endif /* DOINIT */ EOP +} -{ -print $out <<EOP; +sub print_reg_extflags_name { + my ($out)= @_; + print $out <<EOP; /* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ #ifndef DOINIT @@ -277,99 +437,103 @@ EXTCONST char * PL_reg_extflags_name[]; EXTCONST char * const PL_reg_extflags_name[] = { EOP -my %rxfv; -my %definitions; # Remember what the symbol definitions are -my $val = 0; -my %reverse; -my $REG_EXTFLAGS_NAME_SIZE = 0; -foreach my $file ("op_reg_common.h", "regexp.h") { - open FH,"<$file" or die "Can't read $file: $!"; - while (<FH>) { - - # optional leading '_'. Return symbol in $1, and strip it from - # rest of line - if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { - chomp; - my $define = $1; - my $orig= $_; - s{ /\* .*? \*/ }{ }x; # Replace comments by a blank - - # Replace any prior defined symbols by their values - foreach my $key (keys %definitions) { - s/\b$key\b/$definitions{$key}/g; - } + my %rxfv; + my %definitions; # Remember what the symbol definitions are + my $val= 0; + my %reverse; + my $REG_EXTFLAGS_NAME_SIZE= 0; + foreach my $file ( "op_reg_common.h", "regexp.h" ) { + open my $in_fh, "<", $file or die "Can't read '$file': $!"; + while (<$in_fh>) { + + # optional leading '_'. Return symbol in $1, and strip it from + # comment of line + if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { + chomp; + my $define= $1; + my $orig= $_; + s{ /\* .*? \*/ }{ }x; # Replace comments by a blank + + # Replace any prior defined symbols by their values + foreach my $key ( keys %definitions ) { + s/\b$key\b/$definitions{$key}/g; + } - # Remove the U suffix from unsigned int literals - s/\b([0-9]+)U\b/$1/g; + # Remove the U suffix from unsigned int literals + s/\b([0-9]+)U\b/$1/g; - my $newval = eval $_; # Get numeric definition + my $newval= eval $_; # Get numeric definition - $definitions{$define} = $newval; + $definitions{$define}= $newval; - next unless $_ =~ /<</; # Bit defines use left shift - if($val & $newval) { - my @names=($define, $reverse{$newval}); - s/PMf_// for @names; - if ($names[0] ne $names[1]) { - die sprintf "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", $newval, $orig, $_; + next unless $_ =~ /<</; # Bit defines use left shift + if ( $val & $newval ) { + my @names= ( $define, $reverse{$newval} ); + s/PMf_// for @names; + if ( $names[0] ne $names[1] ) { + die sprintf + "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", + $newval, $orig, $_; + } + next; } - next; + $val |= $newval; + $rxfv{$define}= $newval; + $reverse{$newval}= $define; } - $val|=$newval; - $rxfv{$define}= $newval; - $reverse{$newval} = $define; } } -} -my %vrxf=reverse %rxfv; -printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', $val; -my %multibits; -for (0..31) { - my $power_of_2 = 2**$_; - my $n=$vrxf{$power_of_2}; - my $extra = ""; - if (! $n) { - - # Here, there was no name that matched exactly the bit. It could be - # either that it is unused, or the name matches multiple bits. - if (! ($val & $power_of_2)) { - $n = "UNUSED_BIT_$_"; - } - else { - - # Here, must be because it matches multiple bits. Look through - # all possibilities until find one that matches this one. Use - # that name, and all the bits it matches - foreach my $name (keys %rxfv) { - if ($rxfv{$name} & $power_of_2) { - $n = $name . ( $multibits{$name}++ ); - $extra= sprintf qq{ : "%s" - 0x%08x}, $name, $rxfv{$name} - if $power_of_2 != $rxfv{$name}; - last; + my %vrxf= reverse %rxfv; + printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', + $val; + my %multibits; + for ( 0 .. 31 ) { + my $power_of_2= 2**$_; + my $n= $vrxf{$power_of_2}; + my $extra= ""; + if ( !$n ) { + + # Here, there was no name that matched exactly the bit. It could be + # either that it is unused, or the name matches multiple bits. + if ( !( $val & $power_of_2 ) ) { + $n= "UNUSED_BIT_$_"; + } + else { + + # Here, must be because it matches multiple bits. Look through + # all possibilities until find one that matches this one. Use + # that name, and all the bits it matches + foreach my $name ( keys %rxfv ) { + if ( $rxfv{$name} & $power_of_2 ) { + $n= $name . ( $multibits{$name}++ ); + $extra= sprintf qq{ : "%s" - 0x%08x}, $name, + $rxfv{$name} + if $power_of_2 != $rxfv{$name}; + last; + } } } } + s/\bRXf_(PMf_)?// for $n, $extra; + printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra; + $REG_EXTFLAGS_NAME_SIZE++; } - s/\bRXf_(PMf_)?// for $n, $extra; - printf $out qq(\t%-20s/* 0x%08x%s */\n), - qq("$n",),$power_of_2, $extra; - $REG_EXTFLAGS_NAME_SIZE++; -} - -print $out <<EOP; + + print $out <<EOP; }; #endif /* DOINIT */ -EOP -print $out <<EOQ #ifdef DEBUGGING # define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE #endif +EOP -EOQ } -{ -print $out <<EOP; + +sub print_reg_intflags_name { + my ($out)= @_; + print $out <<EOP; + /* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ #ifndef DOINIT @@ -378,37 +542,42 @@ EXTCONST char * PL_reg_intflags_name[]; EXTCONST char * const PL_reg_intflags_name[] = { EOP -my %rxfv; -my %definitions; # Remember what the symbol definitions are -my $val = 0; -my %reverse; -my $REG_INTFLAGS_NAME_SIZE = 0; -foreach my $file ("regcomp.h") { - open my $fh, "<", $file or die "Can't read $file: $!"; - while (<$fh>) { - # optional leading '_'. Return symbol in $1, and strip it from - # rest of line - if (m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi) { - chomp; - my $define = $1; - my $abbr= $2; - my $hex= $3; - my $comment= $4; - my $val= hex($hex); - $comment= $comment ? " - $comment" : ""; - - printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), $val, $define, $comment; - $REG_INTFLAGS_NAME_SIZE++; + my %rxfv; + my %definitions; # Remember what the symbol definitions are + my $val= 0; + my %reverse; + my $REG_INTFLAGS_NAME_SIZE= 0; + foreach my $file ("regcomp.h") { + open my $fh, "<", $file or die "Can't read $file: $!"; + while (<$fh>) { + + # optional leading '_'. Return symbol in $1, and strip it from + # comment of line + if ( + m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi + ) + { + chomp; + my $define= $1; + my $abbr= $2; + my $hex= $3; + my $comment= $4; + my $val= hex($hex); + $comment= $comment ? " - $comment" : ""; + + printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), + $val, $define, $comment; + $REG_INTFLAGS_NAME_SIZE++; + } } } -} -print $out <<EOP; + print $out <<EOP; }; #endif /* DOINIT */ EOP -print $out <<EOQ; + print $out <<EOQ; #ifdef DEBUGGING # define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE #endif @@ -416,62 +585,89 @@ print $out <<EOQ; EOQ } -print $out process_flags('V', 'varies', <<'EOC'); +sub print_process_flags { + my ($out)= @_; + + print $out process_flags( 'V', 'varies', <<'EOC'); /* The following have no fixed length. U8 so we can do strchr() on it. */ EOC -print $out process_flags('S', 'simple', <<'EOC'); + print $out process_flags( 'S', 'simple', <<'EOC'); /* The following always have a length of 1. U8 we can do strchr() on it. */ /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ EOC -read_only_bottom_close_and_rename($out); +} -my $guts = open_new('pod/perldebguts.pod', '>'); +sub do_perldebguts { + my $guts= open_new( 'pod/perldebguts.pod', '>' ); -my $code; -my $name_fmt = '<' x ($longest_name_length-1); -my $descr_fmt = '<' x (58-$longest_name_length); -eval <<EOD; + my $node; + my $code; + my $name_fmt= '<' x ( $longest_name_length - 1 ); + my $descr_fmt= '<' x ( 58 - $longest_name_length ); + eval <<EOD or die $@; format GuTS = ^*~~ - \$cmnt[\$_] + \$node->{pod_comment} ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ - \$name[\$_], \$code, \$rest[\$_] + \$node->{name}, \$code, \$node->{comment}//'' . +1; EOD - -select +(select($guts), do { - $~ = "GuTS"; + + my $old_fh= select($guts); + $~= "GuTS"; open my $oldguts, "pod/perldebguts.pod" or die "$0 cannot open pod/perldebguts.pod for reading: $!"; - while(<$oldguts>) { + while (<$oldguts>) { print; last if /=for regcomp.pl begin/; } - print <<'end'; + print <<'END_OF_DESCR'; # TYPE arg-description [num-args] [longjump-len] DESCRIPTION -end - for (0..$lastregop-1) { - $code = "$code[$_] ".($args[$_]||""); - $code .= " $longj[$_]" if $longj[$_]; - if ($cmnt[$_] ||= "") { +END_OF_DESCR + for my $n (@ops) { + $node= $n; + $code= "$node->{code} " . ( $node->{args} || "" ); + $code .= " $node->{longj}" if $node->{longj}; + if ( $node->{pod_comment} ||= "" ) { + # Trim multiple blanks - $cmnt[$_] =~ s/^\n\n+/\n/; $cmnt[$_] =~ s/\n\n+$/\n\n/ + $node->{pod_comment} =~ s/^\n\n+/\n/; + $node->{pod_comment} =~ s/\n\n+$/\n\n/; } write; } print "\n"; - while(<$oldguts>) { + while (<$oldguts>) { last if /=for regcomp.pl end/; } do { print } while <$oldguts>; + select $old_fh; + close_and_rename($guts); +} -})[0]; +read_definition("regcomp.sym"); +my $out= open_new( 'regnodes.h', '>', + { by => 'regen/regcomp.pl', from => 'regcomp.sym' } ); +print_state_defs($out); +print_regkind($out); +wrap_ifdef_print( + $out, + "REG_COMP_C", + \&print_regarglen, + \&print_reg_off_by_arg +); +print_reg_name($out); +print_reg_extflags_name($out); +print_reg_intflags_name($out); +print_process_flags($out); +read_only_bottom_close_and_rename($out); -close_and_rename($guts); +do_perldebguts(); diff --git a/regnodes.h b/regnodes.h index c05003d1e0..f27abe0c7c 100644 --- a/regnodes.h +++ b/regnodes.h @@ -291,9 +291,10 @@ EXTCONST U8 PL_regkind[] = { }; #endif +#ifdef REG_COMP_C + /* regarglen[] - How large is the argument part of the node (in regnodes) */ -#ifdef REG_COMP_C static const U8 regarglen[] = { 0, /* END */ 0, /* SUCCEED */ @@ -492,6 +493,7 @@ static const char reg_off_by_arg[] = { #endif /* REG_COMP_C */ + /* reg_name[] - Opcode/state names in string form, for debugging */ #ifndef DOINIT |