diff options
author | Yves Orton <demerphq@gmail.com> | 2006-09-25 22:09:07 +0200 |
---|---|---|
committer | Dave Mitchell <davem@fdisolutions.com> | 2006-09-25 22:01:36 +0000 |
commit | 03363afd3c2c023b9096c021741ecc63bc0de7dd (patch) | |
tree | f27d40ad2d3d91b064c59e82fc9799526b80d6c0 /regcomp.pl | |
parent | 84da74a7adb7db2354917b83df794f4983438fcd (diff) | |
download | perl-03363afd3c2c023b9096c021741ecc63bc0de7dd.tar.gz |
Automate generation of the regmatch() state constants
Subject: Re: Problem with EVAL handling in bleads iterative regex code.
Message-Id: <9b18b3110609251109t4cb1d443y87d7a7dc94fcfc24@mail.gmail.com>
p4raw-id: //depot/perl@28892
Diffstat (limited to 'regcomp.pl')
-rw-r--r-- | regcomp.pl | 133 |
1 files changed, 93 insertions, 40 deletions
diff --git a/regcomp.pl b/regcomp.pl index febd550552..bfea6e25cd 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -3,49 +3,102 @@ BEGIN { require 'regen_lib.pl'; } #use Fatal qw(open close rename chmod unlink); +use strict; +use warnings; + open DESC, 'regcomp.sym'; -$ind = 0; +my $ind = 0; +my (@name,@rest,@type,@code,@args,@longj); +my ($desc,$lastregop); while (<DESC>) { - next if /^\s*($|\#)/; - $ind++; - chomp; - ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3; - ($type[$ind], $code[$ind], $args[$ind], $longj[$ind]) - = split /[,\s]\s*/, $desc, 4; + s/#.*$//; + next if /^\s*$/; + s/\s*\z//; + if (/^-+\s*$/) { + $lastregop= $ind; + next; + } + unless ($lastregop) { + $ind++; + ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3; + ($type[$ind], $code[$ind], $args[$ind], $longj[$ind]) + = split /[,\s]\s*/, $desc, 4; + } else { + my ($type,@lists)=split /\s*\t+\s*/, $_; + die "No list? $type" if !@lists; + foreach my $list (@lists) { + my ($names,$special)=split /:/, $list , 2; + $special ||= ""; + foreach my $name (split /,/,$names) { + my $real= $name eq 'resume' + ? "resume_$type" + : "${type}_$name"; + my @suffix; + if (!$special) { + @suffix=(""); + } elsif ($special=~/\d/) { + @suffix=(1..$special); + } elsif ($special eq 'FAIL') { + @suffix=("","_fail"); + } else { + die "unknown :type ':$special'"; + } + foreach my $suffix (@suffix) { + $ind++; + $name[$ind]="$real$suffix"; + $type[$ind]=$type; + $rest[$ind]="Regmatch state for $type"; + } + } + } + + } +} +my ($width,$rwidth,$twidth)=(0,0,0); +for (1..@name) { + $width=length($name[$_]) if $name[$_] and $width<length($name[$_]); + $twidth=length($type[$_]) if $type[$_] and $twidth<length($type[$_]); + $rwidth=$width if $_ == $lastregop; } +$lastregop ||= $ind; +my $tot = $ind; close DESC; -$tot = $ind; -die "Too many regexp opcodes! Maximum is 256, but there are $tot in file!" - if $tot>256; +die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" + if $lastregop>256; -$tmp_h = 'tmp_reg.h'; +my $tmp_h = 'tmp_reg.h'; unlink $tmp_h if -f $tmp_h; open OUT, ">$tmp_h"; +#*OUT=\*STDOUT; binmode OUT; -print OUT <<EOP; +printf OUT <<EOP, /* -*- buffer-read-only: t -*- !!!!!!! DO NOT EDIT THIS FILE !!!!!!! This file is built by regcomp.pl from regcomp.sym. Any changes made here will be lost! */ +#define %*s\t%d +#define %*s\t%d + EOP +-$width,REGNODE_MAX=>$lastregop-1,-$width,REGMATCH_STATE_MAX=>$tot-1; $ind = 0; while (++$ind <= $tot) { - $oind = $ind - 1; - $hind = sprintf "%#4x", $oind; - print OUT <<EOP; -#define $name[$ind] $oind /* $hind $rest[$ind] */ -EOP + my $oind = $ind - 1; + printf OUT "#define\t%*s\t%d\t/*%#04x %s*/\n", + -$width, $name[$ind], $ind-1, $ind-1, $rest[$ind]; + print OUT "\n\t/* ------------ States ------------- */\n\n" + if $ind == $lastregop and $lastregop != $tot; } print OUT <<EOP; -#define REGNODE_MAX $oind + #ifndef DOINIT EXTCONST U8 PL_regkind[]; @@ -55,9 +108,10 @@ EOP $ind = 0; while (++$ind <= $tot) { - print OUT <<EOP; - $type[$ind], /* $name[$ind] */ -EOP + printf OUT "\t%*s\t/* %*s */\n", + -1-$twidth, "$type[$ind],", -$width, $name[$ind]; + print OUT "\t/* ------------ States ------------- */\n" + if $ind == $lastregop and $lastregop != $tot; } print OUT <<EOP; @@ -70,13 +124,12 @@ static const U8 regarglen[] = { EOP $ind = 0; -while (++$ind <= $tot) { - $size = 0; +while (++$ind <= $lastregop) { + my $size = 0; $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; - print OUT <<EOP; - $size, /* $name[$ind] */ -EOP + printf OUT "\t%*s\t/* %*s */\n", + -37, "$size,",-$rwidth,$name[$ind]; } print OUT <<EOP; @@ -86,37 +139,37 @@ static const char reg_off_by_arg[] = { EOP $ind = 0; -while (++$ind <= $tot) { - $size = $longj[$ind] || 0; +while (++$ind <= $lastregop) { + my $size = $longj[$ind] || 0; - print OUT <<EOP; - $size, /* $name[$ind] */ -EOP + printf OUT "\t%d,\t/* %*s */\n", + $size, -$rwidth, $name[$ind] } print OUT <<EOP; }; #ifdef DEBUGGING -static const char * const reg_name[] = { +extern const char * const reg_name[] = { EOP $ind = 0; while (++$ind <= $tot) { - $hind = sprintf "%#4x", $ind-1; - $size = $longj[$ind] || 0; + my $size = $longj[$ind] || 0; - print OUT <<EOP; - "$name[$ind]", /* $hind */ -EOP + printf OUT "\t%*s\t/* %#04x */\n", + -3-$width,qq("$name[$ind]",),$ind-1; + print OUT "\t/* ------------ States ------------- */\n" + if $ind == $lastregop and $lastregop != $tot; } print OUT <<EOP; }; - -static const int reg_num = $tot; - #endif /* DEBUGGING */ +#else +#ifdef DEBUGGING +extern const char * const reg_name[]; +#endif #endif /* REG_COMP_C */ /* ex: set ro: */ |