diff options
author | Yves Orton <demerphq@gmail.com> | 2006-09-25 22:09:07 +0200 |
---|---|---|
committer | Dave Mitchell <davem@fdisolutions.com> | 2006-09-25 22:01:36 +0000 |
commit | 03363afd3c2c023b9096c021741ecc63bc0de7dd (patch) | |
tree | f27d40ad2d3d91b064c59e82fc9799526b80d6c0 | |
parent | 84da74a7adb7db2354917b83df794f4983438fcd (diff) | |
download | perl-03363afd3c2c023b9096c021741ecc63bc0de7dd.tar.gz |
Automate generation of the regmatch() state constants
Subject: Re: Problem with EVAL handling in bleads iterative regex code.
Message-Id: <9b18b3110609251109t4cb1d443y87d7a7dc94fcfc24@mail.gmail.com>
p4raw-id: //depot/perl@28892
-rw-r--r-- | regcomp.c | 2 | ||||
-rw-r--r-- | regcomp.pl | 133 | ||||
-rw-r--r-- | regcomp.sym | 45 | ||||
-rw-r--r-- | regexec.c | 55 | ||||
-rw-r--r-- | regnodes.h | 764 |
5 files changed, 556 insertions, 443 deletions
@@ -7258,7 +7258,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) GET_RE_DEBUG_FLAGS_DECL; sv_setpvn(sv, "", 0); - if (OP(o) >= reg_num) /* regnode.type is unsigned */ + if (OP(o) > REGNODE_MAX) /* regnode.type is unsigned */ /* It would be nice to FAIL() here, but this may be called from regexec.c, and it would be hard to supply pRExC_state. */ Perl_croak(aTHX_ "Corrupted regexp opcode"); diff --git a/regcomp.pl b/regcomp.pl index febd550552..bfea6e25cd 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -3,49 +3,102 @@ BEGIN { require 'regen_lib.pl'; } #use Fatal qw(open close rename chmod unlink); +use strict; +use warnings; + open DESC, 'regcomp.sym'; -$ind = 0; +my $ind = 0; +my (@name,@rest,@type,@code,@args,@longj); +my ($desc,$lastregop); while (<DESC>) { - next if /^\s*($|\#)/; - $ind++; - chomp; - ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3; - ($type[$ind], $code[$ind], $args[$ind], $longj[$ind]) - = split /[,\s]\s*/, $desc, 4; + s/#.*$//; + next if /^\s*$/; + s/\s*\z//; + if (/^-+\s*$/) { + $lastregop= $ind; + next; + } + unless ($lastregop) { + $ind++; + ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3; + ($type[$ind], $code[$ind], $args[$ind], $longj[$ind]) + = split /[,\s]\s*/, $desc, 4; + } else { + my ($type,@lists)=split /\s*\t+\s*/, $_; + die "No list? $type" if !@lists; + foreach my $list (@lists) { + my ($names,$special)=split /:/, $list , 2; + $special ||= ""; + foreach my $name (split /,/,$names) { + my $real= $name eq 'resume' + ? "resume_$type" + : "${type}_$name"; + my @suffix; + if (!$special) { + @suffix=(""); + } elsif ($special=~/\d/) { + @suffix=(1..$special); + } elsif ($special eq 'FAIL') { + @suffix=("","_fail"); + } else { + die "unknown :type ':$special'"; + } + foreach my $suffix (@suffix) { + $ind++; + $name[$ind]="$real$suffix"; + $type[$ind]=$type; + $rest[$ind]="Regmatch state for $type"; + } + } + } + + } +} +my ($width,$rwidth,$twidth)=(0,0,0); +for (1..@name) { + $width=length($name[$_]) if $name[$_] and $width<length($name[$_]); + $twidth=length($type[$_]) if $type[$_] and $twidth<length($type[$_]); + $rwidth=$width if $_ == $lastregop; } +$lastregop ||= $ind; +my $tot = $ind; close DESC; -$tot = $ind; -die "Too many regexp opcodes! Maximum is 256, but there are $tot in file!" - if $tot>256; +die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!" + if $lastregop>256; -$tmp_h = 'tmp_reg.h'; +my $tmp_h = 'tmp_reg.h'; unlink $tmp_h if -f $tmp_h; open OUT, ">$tmp_h"; +#*OUT=\*STDOUT; binmode OUT; -print OUT <<EOP; +printf OUT <<EOP, /* -*- buffer-read-only: t -*- !!!!!!! DO NOT EDIT THIS FILE !!!!!!! This file is built by regcomp.pl from regcomp.sym. Any changes made here will be lost! */ +#define %*s\t%d +#define %*s\t%d + EOP +-$width,REGNODE_MAX=>$lastregop-1,-$width,REGMATCH_STATE_MAX=>$tot-1; $ind = 0; while (++$ind <= $tot) { - $oind = $ind - 1; - $hind = sprintf "%#4x", $oind; - print OUT <<EOP; -#define $name[$ind] $oind /* $hind $rest[$ind] */ -EOP + my $oind = $ind - 1; + printf OUT "#define\t%*s\t%d\t/*%#04x %s*/\n", + -$width, $name[$ind], $ind-1, $ind-1, $rest[$ind]; + print OUT "\n\t/* ------------ States ------------- */\n\n" + if $ind == $lastregop and $lastregop != $tot; } print OUT <<EOP; -#define REGNODE_MAX $oind + #ifndef DOINIT EXTCONST U8 PL_regkind[]; @@ -55,9 +108,10 @@ EOP $ind = 0; while (++$ind <= $tot) { - print OUT <<EOP; - $type[$ind], /* $name[$ind] */ -EOP + printf OUT "\t%*s\t/* %*s */\n", + -1-$twidth, "$type[$ind],", -$width, $name[$ind]; + print OUT "\t/* ------------ States ------------- */\n" + if $ind == $lastregop and $lastregop != $tot; } print OUT <<EOP; @@ -70,13 +124,12 @@ static const U8 regarglen[] = { EOP $ind = 0; -while (++$ind <= $tot) { - $size = 0; +while (++$ind <= $lastregop) { + my $size = 0; $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind]; - print OUT <<EOP; - $size, /* $name[$ind] */ -EOP + printf OUT "\t%*s\t/* %*s */\n", + -37, "$size,",-$rwidth,$name[$ind]; } print OUT <<EOP; @@ -86,37 +139,37 @@ static const char reg_off_by_arg[] = { EOP $ind = 0; -while (++$ind <= $tot) { - $size = $longj[$ind] || 0; +while (++$ind <= $lastregop) { + my $size = $longj[$ind] || 0; - print OUT <<EOP; - $size, /* $name[$ind] */ -EOP + printf OUT "\t%d,\t/* %*s */\n", + $size, -$rwidth, $name[$ind] } print OUT <<EOP; }; #ifdef DEBUGGING -static const char * const reg_name[] = { +extern const char * const reg_name[] = { EOP $ind = 0; while (++$ind <= $tot) { - $hind = sprintf "%#4x", $ind-1; - $size = $longj[$ind] || 0; + my $size = $longj[$ind] || 0; - print OUT <<EOP; - "$name[$ind]", /* $hind */ -EOP + printf OUT "\t%*s\t/* %#04x */\n", + -3-$width,qq("$name[$ind]",),$ind-1; + print OUT "\t/* ------------ States ------------- */\n" + if $ind == $lastregop and $lastregop != $tot; } print OUT <<EOP; }; - -static const int reg_num = $tot; - #endif /* DEBUGGING */ +#else +#ifdef DEBUGGING +extern const char * const reg_name[]; +#endif #endif /* REG_COMP_C */ /* ex: set ro: */ diff --git a/regcomp.sym b/regcomp.sym index 25da9f7397..bc6f8e3164 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -1,13 +1,20 @@ -# Format: -# NAME \t TYPE, arg-description [num-args] [longjump-len] \t DESCRIPTION - -# Empty rows and #-comment rows are ignored. - +# regcomp.sym +# +# File has two sections, divided by a line of dashes '-'. +# +# Empty rows after #-comment are removed from input are ignored +# +# First section is for regops, second sectionis for regmatch-states +# # Note that the order in this file is important. # -# Add new regops to the end, and do not re-order the existing ops. +# Format for first section: +# NAME \t TYPE, arg-description [num-args] [longjump-len] \t DESCRIPTION +# # + + #* Exit points (0,1) END END, no End of program. @@ -146,9 +153,13 @@ TRIEC TRIE, trie charclass Same as TRIE, but with embedded charclass data AHOCORASICK TRIE, trie 1 Aho Corasick stclass. flags==type AHOCORASICKC TRIE, trie charclass Same as AHOCORASICK, but with embedded charclass data + + # NEW STUFF ABOVE THIS LINE -- Please update counts below. -#*Special Nodes (65, 66) +################################################################################ + +#*SPECIAL REGOPS (65, 66) # This is not really a node, but an optimized away piece of a "long" node. # To simplify debugging output, we mark it as if it were a node @@ -161,4 +172,22 @@ OPTIMIZED NOTHING,off Placeholder for dump. # "not seen anything to optimize yet". PSEUDO PSEUDO,off Pseudo opcode for internal use. -# NOTHING BELOW HERE
\ No newline at end of file +------------------------------------------------------------------------------- +# Format for second section: +# REGOP \t typelist [ \t typelist] [# Comment] +# typelist= namelist +# = namelist:FAIL +# = name:count + +# Anything below is a state +# +# +TRIE next:FAIL +EVAL AB:FAIL +CURLYX resume +WHILEM resume:6 +BRANCH next:FAIL +CURLYM A,B:FAIL +IFMATCH A:FAIL +CURLY B_min_known,B_min,B_max:FAIL + @@ -2376,32 +2376,6 @@ S_push_slab(pTHX) * allocated since entry are freed. */ -/* *** every FOO_fail should = FOO+1 */ -#define TRIE_next (REGNODE_MAX+1) -#define TRIE_next_fail (REGNODE_MAX+2) -#define EVAL_AB (REGNODE_MAX+3) -#define EVAL_AB_fail (REGNODE_MAX+4) -#define resume_CURLYX (REGNODE_MAX+5) -#define resume_WHILEM1 (REGNODE_MAX+6) -#define resume_WHILEM2 (REGNODE_MAX+7) -#define resume_WHILEM3 (REGNODE_MAX+8) -#define resume_WHILEM4 (REGNODE_MAX+9) -#define resume_WHILEM5 (REGNODE_MAX+10) -#define resume_WHILEM6 (REGNODE_MAX+11) -#define BRANCH_next (REGNODE_MAX+12) -#define BRANCH_next_fail (REGNODE_MAX+13) -#define CURLYM_A (REGNODE_MAX+14) -#define CURLYM_A_fail (REGNODE_MAX+15) -#define CURLYM_B (REGNODE_MAX+16) -#define CURLYM_B_fail (REGNODE_MAX+17) -#define IFMATCH_A (REGNODE_MAX+18) -#define IFMATCH_A_fail (REGNODE_MAX+19) -#define CURLY_B_min_known (REGNODE_MAX+20) -#define CURLY_B_min_known_fail (REGNODE_MAX+21) -#define CURLY_B_min (REGNODE_MAX+22) -#define CURLY_B_min_fail (REGNODE_MAX+23) -#define CURLY_B_max (REGNODE_MAX+24) -#define CURLY_B_max_fail (REGNODE_MAX+25) #define DEBUG_STATE_pp(pp) \ DEBUG_STATE_r({ \ @@ -2409,40 +2383,13 @@ S_push_slab(pTHX) PerlIO_printf(Perl_debug_log, \ " %*s"pp" %s\n", \ depth*2, "", \ - state_names[st->resume_state-REGNODE_MAX-1] ); \ + reg_name[st->resume_state] ); \ }); #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1) #ifdef DEBUGGING -static const char * const state_names[] = { - "TRIE_next", - "TRIE_next_fail", - "EVAL_AB", - "EVAL_AB_fail", - "resume_CURLYX", - "resume_WHILEM1", - "resume_WHILEM2", - "resume_WHILEM3", - "resume_WHILEM4", - "resume_WHILEM5", - "resume_WHILEM6", - "BRANCH_next", - "BRANCH_next_fail", - "CURLYM_A", - "CURLYM_A_fail", - "CURLYM_B", - "CURLYM_B_fail", - "IFMATCH_A", - "IFMATCH_A_fail", - "CURLY_B_min_known", - "CURLY_B_min_known_fail", - "CURLY_B_min", - "CURLY_B_min_fail", - "CURLY_B_max", - "CURLY_B_max_fail" -}; STATIC void S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8, diff --git a/regnodes.h b/regnodes.h index 01985f5a73..b967287a95 100644 --- a/regnodes.h +++ b/regnodes.h @@ -4,365 +4,449 @@ Any changes made here will be lost! */ -#define END 0 /* 0 End of program. */ -#define SUCCEED 1 /* 0x1 Return from a subroutine, basically. */ -#define BOL 2 /* 0x2 Match "" at beginning of line. */ -#define MBOL 3 /* 0x3 Same, assuming multiline. */ -#define SBOL 4 /* 0x4 Same, assuming singleline. */ -#define EOS 5 /* 0x5 Match "" at end of string. */ -#define EOL 6 /* 0x6 Match "" at end of line. */ -#define MEOL 7 /* 0x7 Same, assuming multiline. */ -#define SEOL 8 /* 0x8 Same, assuming singleline. */ -#define BOUND 9 /* 0x9 Match "" at any word boundary */ -#define BOUNDL 10 /* 0xa Match "" at any word boundary */ -#define NBOUND 11 /* 0xb Match "" at any word non-boundary */ -#define NBOUNDL 12 /* 0xc Match "" at any word non-boundary */ -#define GPOS 13 /* 0xd Matches where last m//g left off. */ -#define REG_ANY 14 /* 0xe Match any one character (except newline). */ -#define SANY 15 /* 0xf Match any one character. */ -#define CANY 16 /* 0x10 Match any one byte. */ -#define ANYOF 17 /* 0x11 Match character in (or not in) this class. */ -#define ALNUM 18 /* 0x12 Match any alphanumeric character */ -#define ALNUML 19 /* 0x13 Match any alphanumeric char in locale */ -#define NALNUM 20 /* 0x14 Match any non-alphanumeric character */ -#define NALNUML 21 /* 0x15 Match any non-alphanumeric char in locale */ -#define SPACE 22 /* 0x16 Match any whitespace character */ -#define SPACEL 23 /* 0x17 Match any whitespace char in locale */ -#define NSPACE 24 /* 0x18 Match any non-whitespace character */ -#define NSPACEL 25 /* 0x19 Match any non-whitespace char in locale */ -#define DIGIT 26 /* 0x1a Match any numeric character */ -#define DIGITL 27 /* 0x1b Match any numeric character in locale */ -#define NDIGIT 28 /* 0x1c Match any non-numeric character */ -#define NDIGITL 29 /* 0x1d Match any non-numeric character in locale */ -#define CLUMP 30 /* 0x1e Match any combining character sequence */ -#define BRANCH 31 /* 0x1f Match this alternative, or the next... */ -#define BACK 32 /* 0x20 Match "", "next" ptr points backward. */ -#define EXACT 33 /* 0x21 Match this string (preceded by length). */ -#define EXACTF 34 /* 0x22 Match this string, folded (prec. by length). */ -#define EXACTFL 35 /* 0x23 Match this string, folded in locale (w/len). */ -#define NOTHING 36 /* 0x24 Match empty string. */ -#define TAIL 37 /* 0x25 Match empty string. Can jump here from outside. */ -#define STAR 38 /* 0x26 Match this (simple) thing 0 or more times. */ -#define PLUS 39 /* 0x27 Match this (simple) thing 1 or more times. */ -#define CURLY 40 /* 0x28 Match this simple thing {n,m} times. */ -#define CURLYN 41 /* 0x29 Match next-after-this simple thing */ -#define CURLYM 42 /* 0x2a Match this medium-complex thing {n,m} times. */ -#define CURLYX 43 /* 0x2b Match this complex thing {n,m} times. */ -#define WHILEM 44 /* 0x2c Do curly processing and see if rest matches. */ -#define OPEN 45 /* 0x2d Mark this point in input as start of #n. */ -#define CLOSE 46 /* 0x2e Analogous to OPEN. */ -#define REF 47 /* 0x2f Match some already matched string */ -#define REFF 48 /* 0x30 Match already matched string, folded */ -#define REFFL 49 /* 0x31 Match already matched string, folded in loc. */ -#define IFMATCH 50 /* 0x32 Succeeds if the following matches. */ -#define UNLESSM 51 /* 0x33 Fails if the following matches. */ -#define SUSPEND 52 /* 0x34 "Independent" sub-RE. */ -#define IFTHEN 53 /* 0x35 Switch, should be preceeded by switcher . */ -#define GROUPP 54 /* 0x36 Whether the group matched. */ -#define LONGJMP 55 /* 0x37 Jump far away. */ -#define BRANCHJ 56 /* 0x38 BRANCH with long offset. */ -#define EVAL 57 /* 0x39 Execute some Perl code. */ -#define MINMOD 58 /* 0x3a Next operator is not greedy. */ -#define LOGICAL 59 /* 0x3b Next opcode should set the flag only. */ -#define RENUM 60 /* 0x3c Group with independently numbered parens. */ -#define TRIE 61 /* 0x3d Match many EXACT(FL?)? at once. flags==type */ -#define TRIEC 62 /* 0x3e Same as TRIE, but with embedded charclass data */ -#define AHOCORASICK 63 /* 0x3f Aho Corasick stclass. flags==type */ -#define AHOCORASICKC 64 /* 0x40 Same as AHOCORASICK, but with embedded charclass data */ -#define OPTIMIZED 65 /* 0x41 Placeholder for dump. */ -#define PSEUDO 66 /* 0x42 Pseudo opcode for internal use. */ -#define REGNODE_MAX 66 +#define REGNODE_MAX 66 +#define REGMATCH_STATE_MAX 91 + +#define END 0 /*0000 End of program.*/ +#define SUCCEED 1 /*0x01 Return from a subroutine, basically.*/ +#define BOL 2 /*0x02 Match "" at beginning of line.*/ +#define MBOL 3 /*0x03 Same, assuming multiline.*/ +#define SBOL 4 /*0x04 Same, assuming singleline.*/ +#define EOS 5 /*0x05 Match "" at end of string.*/ +#define EOL 6 /*0x06 Match "" at end of line.*/ +#define MEOL 7 /*0x07 Same, assuming multiline.*/ +#define SEOL 8 /*0x08 Same, assuming singleline.*/ +#define BOUND 9 /*0x09 Match "" at any word boundary*/ +#define BOUNDL 10 /*0x0a Match "" at any word boundary*/ +#define NBOUND 11 /*0x0b Match "" at any word non-boundary*/ +#define NBOUNDL 12 /*0x0c Match "" at any word non-boundary*/ +#define GPOS 13 /*0x0d Matches where last m//g left off.*/ +#define REG_ANY 14 /*0x0e Match any one character (except newline).*/ +#define SANY 15 /*0x0f Match any one character.*/ +#define CANY 16 /*0x10 Match any one byte.*/ +#define ANYOF 17 /*0x11 Match character in (or not in) this class.*/ +#define ALNUM 18 /*0x12 Match any alphanumeric character*/ +#define ALNUML 19 /*0x13 Match any alphanumeric char in locale*/ +#define NALNUM 20 /*0x14 Match any non-alphanumeric character*/ +#define NALNUML 21 /*0x15 Match any non-alphanumeric char in locale*/ +#define SPACE 22 /*0x16 Match any whitespace character*/ +#define SPACEL 23 /*0x17 Match any whitespace char in locale*/ +#define NSPACE 24 /*0x18 Match any non-whitespace character*/ +#define NSPACEL 25 /*0x19 Match any non-whitespace char in locale*/ +#define DIGIT 26 /*0x1a Match any numeric character*/ +#define DIGITL 27 /*0x1b Match any numeric character in locale*/ +#define NDIGIT 28 /*0x1c Match any non-numeric character*/ +#define NDIGITL 29 /*0x1d Match any non-numeric character in locale*/ +#define CLUMP 30 /*0x1e Match any combining character sequence*/ +#define BRANCH 31 /*0x1f Match this alternative, or the next...*/ +#define BACK 32 /*0x20 Match "", "next" ptr points backward.*/ +#define EXACT 33 /*0x21 Match this string (preceded by length).*/ +#define EXACTF 34 /*0x22 Match this string, folded (prec. by length).*/ +#define EXACTFL 35 /*0x23 Match this string, folded in locale (w/len).*/ +#define NOTHING 36 /*0x24 Match empty string.*/ +#define TAIL 37 /*0x25 Match empty string. Can jump here from outside.*/ +#define STAR 38 /*0x26 Match this (simple) thing 0 or more times.*/ +#define PLUS 39 /*0x27 Match this (simple) thing 1 or more times.*/ +#define CURLY 40 /*0x28 Match this simple thing {n,m} times.*/ +#define CURLYN 41 /*0x29 Match next-after-this simple thing*/ +#define CURLYM 42 /*0x2a Match this medium-complex thing {n,m} times.*/ +#define CURLYX 43 /*0x2b Match this complex thing {n,m} times.*/ +#define WHILEM 44 /*0x2c Do curly processing and see if rest matches.*/ +#define OPEN 45 /*0x2d Mark this point in input as start of*/ +#define CLOSE 46 /*0x2e Analogous to OPEN.*/ +#define REF 47 /*0x2f Match some already matched string*/ +#define REFF 48 /*0x30 Match already matched string, folded*/ +#define REFFL 49 /*0x31 Match already matched string, folded in loc.*/ +#define IFMATCH 50 /*0x32 Succeeds if the following matches.*/ +#define UNLESSM 51 /*0x33 Fails if the following matches.*/ +#define SUSPEND 52 /*0x34 "Independent" sub-RE.*/ +#define IFTHEN 53 /*0x35 Switch, should be preceeded by switcher .*/ +#define GROUPP 54 /*0x36 Whether the group matched.*/ +#define LONGJMP 55 /*0x37 Jump far away.*/ +#define BRANCHJ 56 /*0x38 BRANCH with long offset.*/ +#define EVAL 57 /*0x39 Execute some Perl code.*/ +#define MINMOD 58 /*0x3a Next operator is not greedy.*/ +#define LOGICAL 59 /*0x3b Next opcode should set the flag only.*/ +#define RENUM 60 /*0x3c Group with independently numbered parens.*/ +#define TRIE 61 /*0x3d Match many EXACT(FL?)? at once. flags==type*/ +#define TRIEC 62 /*0x3e Same as TRIE, but with embedded charclass data*/ +#define AHOCORASICK 63 /*0x3f Aho Corasick stclass. flags==type*/ +#define AHOCORASICKC 64 /*0x40 Same as AHOCORASICK, but with embedded charclass data*/ +#define OPTIMIZED 65 /*0x41 Placeholder for dump.*/ +#define PSEUDO 66 /*0x42 Pseudo opcode for internal use.*/ + + /* ------------ States ------------- */ + +#define TRIE_next 67 /*0x43 Regmatch state for TRIE*/ +#define TRIE_next_fail 68 /*0x44 Regmatch state for TRIE*/ +#define EVAL_AB 69 /*0x45 Regmatch state for EVAL*/ +#define EVAL_AB_fail 70 /*0x46 Regmatch state for EVAL*/ +#define resume_CURLYX 71 /*0x47 Regmatch state for CURLYX*/ +#define resume_WHILEM1 72 /*0x48 Regmatch state for WHILEM*/ +#define resume_WHILEM2 73 /*0x49 Regmatch state for WHILEM*/ +#define resume_WHILEM3 74 /*0x4a Regmatch state for WHILEM*/ +#define resume_WHILEM4 75 /*0x4b Regmatch state for WHILEM*/ +#define resume_WHILEM5 76 /*0x4c Regmatch state for WHILEM*/ +#define resume_WHILEM6 77 /*0x4d Regmatch state for WHILEM*/ +#define BRANCH_next 78 /*0x4e Regmatch state for BRANCH*/ +#define BRANCH_next_fail 79 /*0x4f Regmatch state for BRANCH*/ +#define CURLYM_A 80 /*0x50 Regmatch state for CURLYM*/ +#define CURLYM_A_fail 81 /*0x51 Regmatch state for CURLYM*/ +#define CURLYM_B 82 /*0x52 Regmatch state for CURLYM*/ +#define CURLYM_B_fail 83 /*0x53 Regmatch state for CURLYM*/ +#define IFMATCH_A 84 /*0x54 Regmatch state for IFMATCH*/ +#define IFMATCH_A_fail 85 /*0x55 Regmatch state for IFMATCH*/ +#define CURLY_B_min_known 86 /*0x56 Regmatch state for CURLY*/ +#define CURLY_B_min_known_fail 87 /*0x57 Regmatch state for CURLY*/ +#define CURLY_B_min 88 /*0x58 Regmatch state for CURLY*/ +#define CURLY_B_min_fail 89 /*0x59 Regmatch state for CURLY*/ +#define CURLY_B_max 90 /*0x5a Regmatch state for CURLY*/ +#define CURLY_B_max_fail 91 /*0x5b Regmatch state for CURLY*/ + #ifndef DOINIT EXTCONST U8 PL_regkind[]; #else EXTCONST U8 PL_regkind[] = { - END, /* END */ - END, /* SUCCEED */ - BOL, /* BOL */ - BOL, /* MBOL */ - BOL, /* SBOL */ - EOL, /* EOS */ - EOL, /* EOL */ - EOL, /* MEOL */ - EOL, /* SEOL */ - BOUND, /* BOUND */ - BOUND, /* BOUNDL */ - NBOUND, /* NBOUND */ - NBOUND, /* NBOUNDL */ - GPOS, /* GPOS */ - REG_ANY, /* REG_ANY */ - REG_ANY, /* SANY */ - REG_ANY, /* CANY */ - ANYOF, /* ANYOF */ - ALNUM, /* ALNUM */ - ALNUM, /* ALNUML */ - NALNUM, /* NALNUM */ - NALNUM, /* NALNUML */ - SPACE, /* SPACE */ - SPACE, /* SPACEL */ - NSPACE, /* NSPACE */ - NSPACE, /* NSPACEL */ - DIGIT, /* DIGIT */ - DIGIT, /* DIGITL */ - NDIGIT, /* NDIGIT */ - NDIGIT, /* NDIGITL */ - CLUMP, /* CLUMP */ - BRANCH, /* BRANCH */ - BACK, /* BACK */ - EXACT, /* EXACT */ - EXACT, /* EXACTF */ - EXACT, /* EXACTFL */ - NOTHING, /* NOTHING */ - NOTHING, /* TAIL */ - STAR, /* STAR */ - PLUS, /* PLUS */ - CURLY, /* CURLY */ - CURLY, /* CURLYN */ - CURLY, /* CURLYM */ - CURLY, /* CURLYX */ - WHILEM, /* WHILEM */ - OPEN, /* OPEN */ - CLOSE, /* CLOSE */ - REF, /* REF */ - REF, /* REFF */ - REF, /* REFFL */ - BRANCHJ, /* IFMATCH */ - BRANCHJ, /* UNLESSM */ - BRANCHJ, /* SUSPEND */ - BRANCHJ, /* IFTHEN */ - GROUPP, /* GROUPP */ - LONGJMP, /* LONGJMP */ - BRANCHJ, /* BRANCHJ */ - EVAL, /* EVAL */ - MINMOD, /* MINMOD */ - LOGICAL, /* LOGICAL */ - BRANCHJ, /* RENUM */ - TRIE, /* TRIE */ - TRIE, /* TRIEC */ - TRIE, /* AHOCORASICK */ - TRIE, /* AHOCORASICKC */ - NOTHING, /* OPTIMIZED */ - PSEUDO, /* PSEUDO */ + END, /* END */ + END, /* SUCCEED */ + BOL, /* BOL */ + BOL, /* MBOL */ + BOL, /* SBOL */ + EOL, /* EOS */ + EOL, /* EOL */ + EOL, /* MEOL */ + EOL, /* SEOL */ + BOUND, /* BOUND */ + BOUND, /* BOUNDL */ + NBOUND, /* NBOUND */ + NBOUND, /* NBOUNDL */ + GPOS, /* GPOS */ + REG_ANY, /* REG_ANY */ + REG_ANY, /* SANY */ + REG_ANY, /* CANY */ + ANYOF, /* ANYOF */ + ALNUM, /* ALNUM */ + ALNUM, /* ALNUML */ + NALNUM, /* NALNUM */ + NALNUM, /* NALNUML */ + SPACE, /* SPACE */ + SPACE, /* SPACEL */ + NSPACE, /* NSPACE */ + NSPACE, /* NSPACEL */ + DIGIT, /* DIGIT */ + DIGIT, /* DIGITL */ + NDIGIT, /* NDIGIT */ + NDIGIT, /* NDIGITL */ + CLUMP, /* CLUMP */ + BRANCH, /* BRANCH */ + BACK, /* BACK */ + EXACT, /* EXACT */ + EXACT, /* EXACTF */ + EXACT, /* EXACTFL */ + NOTHING, /* NOTHING */ + NOTHING, /* TAIL */ + STAR, /* STAR */ + PLUS, /* PLUS */ + CURLY, /* CURLY */ + CURLY, /* CURLYN */ + CURLY, /* CURLYM */ + CURLY, /* CURLYX */ + WHILEM, /* WHILEM */ + OPEN, /* OPEN */ + CLOSE, /* CLOSE */ + REF, /* REF */ + REF, /* REFF */ + REF, /* REFFL */ + BRANCHJ, /* IFMATCH */ + BRANCHJ, /* UNLESSM */ + BRANCHJ, /* SUSPEND */ + BRANCHJ, /* IFTHEN */ + GROUPP, /* GROUPP */ + LONGJMP, /* LONGJMP */ + BRANCHJ, /* BRANCHJ */ + EVAL, /* EVAL */ + MINMOD, /* MINMOD */ + LOGICAL, /* LOGICAL */ + BRANCHJ, /* RENUM */ + TRIE, /* TRIE */ + TRIE, /* TRIEC */ + TRIE, /* AHOCORASICK */ + TRIE, /* AHOCORASICKC */ + NOTHING, /* OPTIMIZED */ + PSEUDO, /* PSEUDO */ + /* ------------ States ------------- */ + TRIE, /* TRIE_next */ + TRIE, /* TRIE_next_fail */ + EVAL, /* EVAL_AB */ + EVAL, /* EVAL_AB_fail */ + CURLYX, /* resume_CURLYX */ + WHILEM, /* resume_WHILEM1 */ + WHILEM, /* resume_WHILEM2 */ + WHILEM, /* resume_WHILEM3 */ + WHILEM, /* resume_WHILEM4 */ + WHILEM, /* resume_WHILEM5 */ + WHILEM, /* resume_WHILEM6 */ + BRANCH, /* BRANCH_next */ + BRANCH, /* BRANCH_next_fail */ + CURLYM, /* CURLYM_A */ + CURLYM, /* CURLYM_A_fail */ + CURLYM, /* CURLYM_B */ + CURLYM, /* CURLYM_B_fail */ + IFMATCH, /* IFMATCH_A */ + IFMATCH, /* IFMATCH_A_fail */ + CURLY, /* CURLY_B_min_known */ + CURLY, /* CURLY_B_min_known_fail */ + CURLY, /* CURLY_B_min */ + CURLY, /* CURLY_B_min_fail */ + CURLY, /* CURLY_B_max */ + CURLY, /* CURLY_B_max_fail */ }; #endif #ifdef REG_COMP_C static const U8 regarglen[] = { - 0, /* END */ - 0, /* SUCCEED */ - 0, /* BOL */ - 0, /* MBOL */ - 0, /* SBOL */ - 0, /* EOS */ - 0, /* EOL */ - 0, /* MEOL */ - 0, /* SEOL */ - 0, /* BOUND */ - 0, /* BOUNDL */ - 0, /* NBOUND */ - 0, /* NBOUNDL */ - 0, /* GPOS */ - 0, /* REG_ANY */ - 0, /* SANY */ - 0, /* CANY */ - 0, /* ANYOF */ - 0, /* ALNUM */ - 0, /* ALNUML */ - 0, /* NALNUM */ - 0, /* NALNUML */ - 0, /* SPACE */ - 0, /* SPACEL */ - 0, /* NSPACE */ - 0, /* NSPACEL */ - 0, /* DIGIT */ - 0, /* DIGITL */ - 0, /* NDIGIT */ - 0, /* NDIGITL */ - 0, /* CLUMP */ - 0, /* BRANCH */ - 0, /* BACK */ - 0, /* EXACT */ - 0, /* EXACTF */ - 0, /* EXACTFL */ - 0, /* NOTHING */ - 0, /* TAIL */ - 0, /* STAR */ - 0, /* PLUS */ - EXTRA_SIZE(struct regnode_2), /* CURLY */ - EXTRA_SIZE(struct regnode_2), /* CURLYN */ - EXTRA_SIZE(struct regnode_2), /* CURLYM */ - EXTRA_SIZE(struct regnode_2), /* CURLYX */ - 0, /* WHILEM */ - EXTRA_SIZE(struct regnode_1), /* OPEN */ - EXTRA_SIZE(struct regnode_1), /* CLOSE */ - EXTRA_SIZE(struct regnode_1), /* REF */ - EXTRA_SIZE(struct regnode_1), /* REFF */ - EXTRA_SIZE(struct regnode_1), /* REFFL */ - EXTRA_SIZE(struct regnode_1), /* IFMATCH */ - EXTRA_SIZE(struct regnode_1), /* UNLESSM */ - EXTRA_SIZE(struct regnode_1), /* SUSPEND */ - EXTRA_SIZE(struct regnode_1), /* IFTHEN */ - EXTRA_SIZE(struct regnode_1), /* GROUPP */ - EXTRA_SIZE(struct regnode_1), /* LONGJMP */ - EXTRA_SIZE(struct regnode_1), /* BRANCHJ */ - EXTRA_SIZE(struct regnode_1), /* EVAL */ - 0, /* MINMOD */ - 0, /* LOGICAL */ - EXTRA_SIZE(struct regnode_1), /* RENUM */ - EXTRA_SIZE(struct regnode_1), /* TRIE */ - EXTRA_SIZE(struct regnode_charclass), /* TRIEC */ - EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */ - EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */ - 0, /* OPTIMIZED */ - 0, /* PSEUDO */ + 0, /* END */ + 0, /* SUCCEED */ + 0, /* BOL */ + 0, /* MBOL */ + 0, /* SBOL */ + 0, /* EOS */ + 0, /* EOL */ + 0, /* MEOL */ + 0, /* SEOL */ + 0, /* BOUND */ + 0, /* BOUNDL */ + 0, /* NBOUND */ + 0, /* NBOUNDL */ + 0, /* GPOS */ + 0, /* REG_ANY */ + 0, /* SANY */ + 0, /* CANY */ + 0, /* ANYOF */ + 0, /* ALNUM */ + 0, /* ALNUML */ + 0, /* NALNUM */ + 0, /* NALNUML */ + 0, /* SPACE */ + 0, /* SPACEL */ + 0, /* NSPACE */ + 0, /* NSPACEL */ + 0, /* DIGIT */ + 0, /* DIGITL */ + 0, /* NDIGIT */ + 0, /* NDIGITL */ + 0, /* CLUMP */ + 0, /* BRANCH */ + 0, /* BACK */ + 0, /* EXACT */ + 0, /* EXACTF */ + 0, /* EXACTFL */ + 0, /* NOTHING */ + 0, /* TAIL */ + 0, /* STAR */ + 0, /* PLUS */ + EXTRA_SIZE(struct regnode_2), /* CURLY */ + EXTRA_SIZE(struct regnode_2), /* CURLYN */ + EXTRA_SIZE(struct regnode_2), /* CURLYM */ + EXTRA_SIZE(struct regnode_2), /* CURLYX */ + 0, /* WHILEM */ + EXTRA_SIZE(struct regnode_1), /* OPEN */ + EXTRA_SIZE(struct regnode_1), /* CLOSE */ + EXTRA_SIZE(struct regnode_1), /* REF */ + EXTRA_SIZE(struct regnode_1), /* REFF */ + EXTRA_SIZE(struct regnode_1), /* REFFL */ + EXTRA_SIZE(struct regnode_1), /* IFMATCH */ + EXTRA_SIZE(struct regnode_1), /* UNLESSM */ + EXTRA_SIZE(struct regnode_1), /* SUSPEND */ + EXTRA_SIZE(struct regnode_1), /* IFTHEN */ + EXTRA_SIZE(struct regnode_1), /* GROUPP */ + EXTRA_SIZE(struct regnode_1), /* LONGJMP */ + EXTRA_SIZE(struct regnode_1), /* BRANCHJ */ + EXTRA_SIZE(struct regnode_1), /* EVAL */ + 0, /* MINMOD */ + 0, /* LOGICAL */ + EXTRA_SIZE(struct regnode_1), /* RENUM */ + EXTRA_SIZE(struct regnode_1), /* TRIE */ + EXTRA_SIZE(struct regnode_charclass), /* TRIEC */ + EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */ + EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */ + 0, /* OPTIMIZED */ + 0, /* PSEUDO */ }; static const char reg_off_by_arg[] = { - 0, /* END */ - 0, /* SUCCEED */ - 0, /* BOL */ - 0, /* MBOL */ - 0, /* SBOL */ - 0, /* EOS */ - 0, /* EOL */ - 0, /* MEOL */ - 0, /* SEOL */ - 0, /* BOUND */ - 0, /* BOUNDL */ - 0, /* NBOUND */ - 0, /* NBOUNDL */ - 0, /* GPOS */ - 0, /* REG_ANY */ - 0, /* SANY */ - 0, /* CANY */ - 0, /* ANYOF */ - 0, /* ALNUM */ - 0, /* ALNUML */ - 0, /* NALNUM */ - 0, /* NALNUML */ - 0, /* SPACE */ - 0, /* SPACEL */ - 0, /* NSPACE */ - 0, /* NSPACEL */ - 0, /* DIGIT */ - 0, /* DIGITL */ - 0, /* NDIGIT */ - 0, /* NDIGITL */ - 0, /* CLUMP */ - 0, /* BRANCH */ - 0, /* BACK */ - 0, /* EXACT */ - 0, /* EXACTF */ - 0, /* EXACTFL */ - 0, /* NOTHING */ - 0, /* TAIL */ - 0, /* STAR */ - 0, /* PLUS */ - 0, /* CURLY */ - 0, /* CURLYN */ - 0, /* CURLYM */ - 0, /* CURLYX */ - 0, /* WHILEM */ - 0, /* OPEN */ - 0, /* CLOSE */ - 0, /* REF */ - 0, /* REFF */ - 0, /* REFFL */ - 2, /* IFMATCH */ - 2, /* UNLESSM */ - 1, /* SUSPEND */ - 1, /* IFTHEN */ - 0, /* GROUPP */ - 1, /* LONGJMP */ - 1, /* BRANCHJ */ - 0, /* EVAL */ - 0, /* MINMOD */ - 0, /* LOGICAL */ - 1, /* RENUM */ - 0, /* TRIE */ - 0, /* TRIEC */ - 0, /* AHOCORASICK */ - 0, /* AHOCORASICKC */ - 0, /* OPTIMIZED */ - 0, /* PSEUDO */ + 0, /* END */ + 0, /* SUCCEED */ + 0, /* BOL */ + 0, /* MBOL */ + 0, /* SBOL */ + 0, /* EOS */ + 0, /* EOL */ + 0, /* MEOL */ + 0, /* SEOL */ + 0, /* BOUND */ + 0, /* BOUNDL */ + 0, /* NBOUND */ + 0, /* NBOUNDL */ + 0, /* GPOS */ + 0, /* REG_ANY */ + 0, /* SANY */ + 0, /* CANY */ + 0, /* ANYOF */ + 0, /* ALNUM */ + 0, /* ALNUML */ + 0, /* NALNUM */ + 0, /* NALNUML */ + 0, /* SPACE */ + 0, /* SPACEL */ + 0, /* NSPACE */ + 0, /* NSPACEL */ + 0, /* DIGIT */ + 0, /* DIGITL */ + 0, /* NDIGIT */ + 0, /* NDIGITL */ + 0, /* CLUMP */ + 0, /* BRANCH */ + 0, /* BACK */ + 0, /* EXACT */ + 0, /* EXACTF */ + 0, /* EXACTFL */ + 0, /* NOTHING */ + 0, /* TAIL */ + 0, /* STAR */ + 0, /* PLUS */ + 0, /* CURLY */ + 0, /* CURLYN */ + 0, /* CURLYM */ + 0, /* CURLYX */ + 0, /* WHILEM */ + 0, /* OPEN */ + 0, /* CLOSE */ + 0, /* REF */ + 0, /* REFF */ + 0, /* REFFL */ + 2, /* IFMATCH */ + 2, /* UNLESSM */ + 1, /* SUSPEND */ + 1, /* IFTHEN */ + 0, /* GROUPP */ + 1, /* LONGJMP */ + 1, /* BRANCHJ */ + 0, /* EVAL */ + 0, /* MINMOD */ + 0, /* LOGICAL */ + 1, /* RENUM */ + 0, /* TRIE */ + 0, /* TRIEC */ + 0, /* AHOCORASICK */ + 0, /* AHOCORASICKC */ + 0, /* OPTIMIZED */ + 0, /* PSEUDO */ }; #ifdef DEBUGGING -static const char * const reg_name[] = { - "END", /* 0 */ - "SUCCEED", /* 0x1 */ - "BOL", /* 0x2 */ - "MBOL", /* 0x3 */ - "SBOL", /* 0x4 */ - "EOS", /* 0x5 */ - "EOL", /* 0x6 */ - "MEOL", /* 0x7 */ - "SEOL", /* 0x8 */ - "BOUND", /* 0x9 */ - "BOUNDL", /* 0xa */ - "NBOUND", /* 0xb */ - "NBOUNDL", /* 0xc */ - "GPOS", /* 0xd */ - "REG_ANY", /* 0xe */ - "SANY", /* 0xf */ - "CANY", /* 0x10 */ - "ANYOF", /* 0x11 */ - "ALNUM", /* 0x12 */ - "ALNUML", /* 0x13 */ - "NALNUM", /* 0x14 */ - "NALNUML", /* 0x15 */ - "SPACE", /* 0x16 */ - "SPACEL", /* 0x17 */ - "NSPACE", /* 0x18 */ - "NSPACEL", /* 0x19 */ - "DIGIT", /* 0x1a */ - "DIGITL", /* 0x1b */ - "NDIGIT", /* 0x1c */ - "NDIGITL", /* 0x1d */ - "CLUMP", /* 0x1e */ - "BRANCH", /* 0x1f */ - "BACK", /* 0x20 */ - "EXACT", /* 0x21 */ - "EXACTF", /* 0x22 */ - "EXACTFL", /* 0x23 */ - "NOTHING", /* 0x24 */ - "TAIL", /* 0x25 */ - "STAR", /* 0x26 */ - "PLUS", /* 0x27 */ - "CURLY", /* 0x28 */ - "CURLYN", /* 0x29 */ - "CURLYM", /* 0x2a */ - "CURLYX", /* 0x2b */ - "WHILEM", /* 0x2c */ - "OPEN", /* 0x2d */ - "CLOSE", /* 0x2e */ - "REF", /* 0x2f */ - "REFF", /* 0x30 */ - "REFFL", /* 0x31 */ - "IFMATCH", /* 0x32 */ - "UNLESSM", /* 0x33 */ - "SUSPEND", /* 0x34 */ - "IFTHEN", /* 0x35 */ - "GROUPP", /* 0x36 */ - "LONGJMP", /* 0x37 */ - "BRANCHJ", /* 0x38 */ - "EVAL", /* 0x39 */ - "MINMOD", /* 0x3a */ - "LOGICAL", /* 0x3b */ - "RENUM", /* 0x3c */ - "TRIE", /* 0x3d */ - "TRIEC", /* 0x3e */ - "AHOCORASICK", /* 0x3f */ - "AHOCORASICKC", /* 0x40 */ - "OPTIMIZED", /* 0x41 */ - "PSEUDO", /* 0x42 */ +extern const char * const reg_name[] = { + "END", /* 0000 */ + "SUCCEED", /* 0x01 */ + "BOL", /* 0x02 */ + "MBOL", /* 0x03 */ + "SBOL", /* 0x04 */ + "EOS", /* 0x05 */ + "EOL", /* 0x06 */ + "MEOL", /* 0x07 */ + "SEOL", /* 0x08 */ + "BOUND", /* 0x09 */ + "BOUNDL", /* 0x0a */ + "NBOUND", /* 0x0b */ + "NBOUNDL", /* 0x0c */ + "GPOS", /* 0x0d */ + "REG_ANY", /* 0x0e */ + "SANY", /* 0x0f */ + "CANY", /* 0x10 */ + "ANYOF", /* 0x11 */ + "ALNUM", /* 0x12 */ + "ALNUML", /* 0x13 */ + "NALNUM", /* 0x14 */ + "NALNUML", /* 0x15 */ + "SPACE", /* 0x16 */ + "SPACEL", /* 0x17 */ + "NSPACE", /* 0x18 */ + "NSPACEL", /* 0x19 */ + "DIGIT", /* 0x1a */ + "DIGITL", /* 0x1b */ + "NDIGIT", /* 0x1c */ + "NDIGITL", /* 0x1d */ + "CLUMP", /* 0x1e */ + "BRANCH", /* 0x1f */ + "BACK", /* 0x20 */ + "EXACT", /* 0x21 */ + "EXACTF", /* 0x22 */ + "EXACTFL", /* 0x23 */ + "NOTHING", /* 0x24 */ + "TAIL", /* 0x25 */ + "STAR", /* 0x26 */ + "PLUS", /* 0x27 */ + "CURLY", /* 0x28 */ + "CURLYN", /* 0x29 */ + "CURLYM", /* 0x2a */ + "CURLYX", /* 0x2b */ + "WHILEM", /* 0x2c */ + "OPEN", /* 0x2d */ + "CLOSE", /* 0x2e */ + "REF", /* 0x2f */ + "REFF", /* 0x30 */ + "REFFL", /* 0x31 */ + "IFMATCH", /* 0x32 */ + "UNLESSM", /* 0x33 */ + "SUSPEND", /* 0x34 */ + "IFTHEN", /* 0x35 */ + "GROUPP", /* 0x36 */ + "LONGJMP", /* 0x37 */ + "BRANCHJ", /* 0x38 */ + "EVAL", /* 0x39 */ + "MINMOD", /* 0x3a */ + "LOGICAL", /* 0x3b */ + "RENUM", /* 0x3c */ + "TRIE", /* 0x3d */ + "TRIEC", /* 0x3e */ + "AHOCORASICK", /* 0x3f */ + "AHOCORASICKC", /* 0x40 */ + "OPTIMIZED", /* 0x41 */ + "PSEUDO", /* 0x42 */ + /* ------------ States ------------- */ + "TRIE_next", /* 0x43 */ + "TRIE_next_fail", /* 0x44 */ + "EVAL_AB", /* 0x45 */ + "EVAL_AB_fail", /* 0x46 */ + "resume_CURLYX", /* 0x47 */ + "resume_WHILEM1", /* 0x48 */ + "resume_WHILEM2", /* 0x49 */ + "resume_WHILEM3", /* 0x4a */ + "resume_WHILEM4", /* 0x4b */ + "resume_WHILEM5", /* 0x4c */ + "resume_WHILEM6", /* 0x4d */ + "BRANCH_next", /* 0x4e */ + "BRANCH_next_fail", /* 0x4f */ + "CURLYM_A", /* 0x50 */ + "CURLYM_A_fail", /* 0x51 */ + "CURLYM_B", /* 0x52 */ + "CURLYM_B_fail", /* 0x53 */ + "IFMATCH_A", /* 0x54 */ + "IFMATCH_A_fail", /* 0x55 */ + "CURLY_B_min_known", /* 0x56 */ + "CURLY_B_min_known_fail", /* 0x57 */ + "CURLY_B_min", /* 0x58 */ + "CURLY_B_min_fail", /* 0x59 */ + "CURLY_B_max", /* 0x5a */ + "CURLY_B_max_fail", /* 0x5b */ }; - -static const int reg_num = 67; - #endif /* DEBUGGING */ +#else +#ifdef DEBUGGING +extern const char * const reg_name[]; +#endif #endif /* REG_COMP_C */ /* ex: set ro: */ |