#!/usr/bin/perl -w # # Regenerate (overwriting only if changed): # # pod/perldebguts.pod # regnodes.h # # from information stored in # # regcomp.sym # regexp.h # # pod/perldebguts.pod is not completely regenerated. Only the table of # regexp nodes is replaced; other parts remain unchanged. # # Accepts the standard regen_lib -q and -v args. # # This script is normally invoked from regen.pl. BEGIN { # Get function prototypes require 'regen/regen_lib.pl'; } use strict; open DESC, 'regcomp.sym'; my $ind = 0; my (@name,@rest,@type,@code,@args,@flags,@longj,@cmnt); my ($longest_name_length,$desc,$lastregop) = 0; my (%seen_op, %type_alias); while () { # Special pod comments if (/^#\* ?/) { $cmnt[$ind] .= "# $'"; } # Truly blank lines possibly surrounding pod comments elsif (/^\s*$/) { $cmnt[$ind] .= "\n" } next if /^(?:#|\s*$)/; chomp; # No \z in 5.004 s/\s*$//; if (/^-+\s*$/) { $lastregop= $ind; next; } unless ($lastregop) { ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/; if (defined $seen_op{$name[$ind]}) { die "Duplicate regop $name[$ind] in regcomp.sym line $. previously defined on line $seen_op{$name[$ind]}\n"; } else { $seen_op{$name[$ind]}= $.; } ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind]) = split /[,\s]\s*/, $desc; if (!defined $seen_op{$type[$ind]} and !defined $type_alias{$type[$ind]}) { #warn "Regop type '$type[$ind]' from regcomp.sym line $. is not an existing regop, and will be aliased to $name[$ind]\n" # if -t STDERR; $type_alias{$type[$ind]}= $name[$ind]; } $longest_name_length = length $name[$ind] if length $name[$ind] > $longest_name_length; ++$ind; } else { my ($type,@lists)=split /\s+/, $_; die "No list? $type" if !@lists; foreach my $list (@lists) { my ($names,$special)=split /:/, $list , 2; $special ||= ""; foreach my $name (split /,/,$names) { my $real= $name eq 'resume' ? "resume_$type" : "${type}_$name"; my @suffix; if (!$special) { @suffix=(""); } elsif ($special=~/\d/) { @suffix=(1..$special); } elsif ($special eq 'FAIL') { @suffix=("","_fail"); } else { die "unknown :type ':$special'"; } foreach my $suffix (@suffix) { $name[$ind]="$real$suffix"; $type[$ind]=$type; $rest[$ind]="state for $type"; ++$ind; } } } } } # use fixed width to keep the diffs between regcomp.pl recompiles # as small as possible. my ($width,$rwidth,$twidth)=(22,12,9); $lastregop ||= $ind; my $tot = $ind; close DESC; die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" if $lastregop>256; sub process_flags { my ($flag, $varname, $comment) = @_; $comment = '' unless defined $comment; $ind = 0; my @selected; my $bitmap = ''; do { my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0; # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic # ops in the C code. my $current = do { local $^W; ord do { substr $bitmap, ($ind >> 3); } }; substr($bitmap, ($ind >> 3), 1) = chr($current | ($set << ($ind & 7))); push @selected, $name[$ind] if $set; } while (++$ind < $lastregop); my $out_string = join ', ', @selected, 0; $out_string =~ s/(.{1,70},) /$1\n /g; my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap; return $comment . <<"EOP"; #define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) #ifndef DOINIT EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; #else EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { $out_string }; #endif /* DOINIT */ #ifndef DOINIT EXTCONST U8 PL_${varname}_bitmask[]; #else EXTCONST U8 PL_${varname}_bitmask[] = { $out_mask }; #endif /* DOINIT */ EOP } my $out = open_new('regnodes.h', '>', { by => 'regen/regcomp.pl', from => 'regcomp.sym' }); printf $out < $lastregop - 1, -$width, REGMATCH_STATE_MAX => $tot - 1 ; my %rev_type_alias= reverse %type_alias; for ($ind=0; $ind < $lastregop ; ++$ind) { printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", -$width, $name[$ind], $ind, $ind, $rest[$ind]; if (defined(my $alias= $rev_type_alias{$name[$ind]})) { printf $out "#define\t%*s\t%d\t/* %#04x %s */\n", -$width, $alias, $ind, $ind, "type alias"; } } print $out "\t/* ------------ States ------------- */\n"; for ( ; $ind < $tot ; $ind++) { printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n", -$width, $name[$ind], $ind - $lastregop + 1, $rest[$ind]; } print $out <) { # optional leading '_'. Return symbol in $1, and strip it from # rest of line if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { chomp; my $define = $1; my $orig= $_; s{ /\* .*? \*/ }{ }x; # Replace comments by a blank # Replace any prior defined symbols by their values foreach my $key (keys %definitions) { s/\b$key\b/$definitions{$key}/g; } # Remove the U suffix from unsigned int literals s/\b([0-9]+)U\b/$1/g; my $newval = eval $_; # Get numeric definition $definitions{$define} = $newval; next unless $_ =~ /<) { # optional leading '_'. Return symbol in $1, and strip it from # rest of line if (m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi) { chomp; my $define = $1; my $abbr= $2; my $hex= $3; my $comment= $4; my $val= hex($hex); $comment= $comment ? " - $comment" : ""; printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), $val, $define, $comment; $REG_INTFLAGS_NAME_SIZE++; } } } print $out <'); my $code; my $name_fmt = '<' x ($longest_name_length-1); my $descr_fmt = '<' x (58-$longest_name_length); eval <) { print; last if /=for regcomp.pl begin/; } print <<'end'; # TYPE arg-description [num-args] [longjump-len] DESCRIPTION end for (0..$lastregop-1) { $code = "$code[$_] ".($args[$_]||""); $code .= " $longj[$_]" if $longj[$_]; if ($cmnt[$_] ||= "") { # Trim multiple blanks $cmnt[$_] =~ s/^\n\n+/\n/; $cmnt[$_] =~ s/\n\n+$/\n\n/ } write; } print "\n"; while(<$oldguts>) { last if /=for regcomp.pl end/; } do { print } while <$oldguts>; })[0]; close_and_rename($guts);