diff options
-rw-r--r-- | regcomp.pl | 4 | ||||
-rw-r--r-- | regcomp.sym | 238 |
2 files changed, 121 insertions, 121 deletions
diff --git a/regcomp.pl b/regcomp.pl index 7fdbd1303f..2fbe6c68f9 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -36,11 +36,11 @@ while (<DESC>) { } unless ($lastregop) { $ind++; - ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3; + ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+)\s*;\s*(.*)/; ($type[$ind], $code[$ind], $args[$ind], $longj[$ind]) = split /[,\s]\s*/, $desc, 4; } else { - my ($type,@lists)=split /\s*\t+\s*/, $_; + my ($type,@lists)=split /\s+/, $_; die "No list? $type" if !@lists; foreach my $list (@lists) { my ($names,$special)=split /:/, $list , 2; diff --git a/regcomp.sym b/regcomp.sym index a1f59a9e1e..32935bf9d3 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -9,7 +9,7 @@ # Note that the order in this file is important. # # Format for first section: -# NAME \t TYPE, arg-description [num-args] [longjump-len] \t DESCRIPTION +# NAME \s+ TYPE, arg-description [num-args] [longjump-len] ; DESCRIPTION # # # run perl regen.pl after editing this file @@ -18,127 +18,127 @@ #* Exit points (0,1) -END END, no End of program. -SUCCEED END, no Return from a subroutine, basically. +END END, no ; End of program. +SUCCEED END, no ; Return from a subroutine, basically. #* Anchors: (2..13) -BOL BOL, no Match "" at beginning of line. -MBOL BOL, no Same, assuming multiline. -SBOL BOL, no Same, assuming singleline. -EOS EOL, no Match "" at end of string. -EOL EOL, no Match "" at end of line. -MEOL EOL, no Same, assuming multiline. -SEOL EOL, no Same, assuming singleline. -BOUND BOUND, no Match "" at any word boundary -BOUNDL BOUND, no Match "" at any word boundary -NBOUND NBOUND, no Match "" at any word non-boundary -NBOUNDL NBOUND, no Match "" at any word non-boundary -GPOS GPOS, no Matches where last m//g left off. +BOL BOL, no ; Match "" at beginning of line. +MBOL BOL, no ; Same, assuming multiline. +SBOL BOL, no ; Same, assuming singleline. +EOS EOL, no ; Match "" at end of string. +EOL EOL, no ; Match "" at end of line. +MEOL EOL, no ; Same, assuming multiline. +SEOL EOL, no ; Same, assuming singleline. +BOUND BOUND, no ; Match "" at any word boundary +BOUNDL BOUND, no ; Match "" at any word boundary +NBOUND NBOUND, no ; Match "" at any word non-boundary +NBOUNDL NBOUND, no ; Match "" at any word non-boundary +GPOS GPOS, no ; Matches where last m//g left off. #* [Special] alternatives: (14..30) -REG_ANY REG_ANY, no Match any one character (except newline). -SANY REG_ANY, no Match any one character. -CANY REG_ANY, no Match any one byte. -ANYOF ANYOF, sv Match character in (or not in) this class. -ALNUM ALNUM, no Match any alphanumeric character -ALNUML ALNUM, no Match any alphanumeric char in locale -NALNUM NALNUM, no Match any non-alphanumeric character -NALNUML NALNUM, no Match any non-alphanumeric char in locale -SPACE SPACE, no Match any whitespace character -SPACEL SPACE, no Match any whitespace char in locale -NSPACE NSPACE, no Match any non-whitespace character -NSPACEL NSPACE, no Match any non-whitespace char in locale -DIGIT DIGIT, no Match any numeric character -DIGITL DIGIT, no Match any numeric character in locale -NDIGIT NDIGIT, no Match any non-numeric character -NDIGITL NDIGIT, no Match any non-numeric character in locale -CLUMP CLUMP, no Match any combining character sequence +REG_ANY REG_ANY, no ; Match any one character (except newline). +SANY REG_ANY, no ; Match any one character. +CANY REG_ANY, no ; Match any one byte. +ANYOF ANYOF, sv ; Match character in (or not in) this class. +ALNUM ALNUM, no ; Match any alphanumeric character +ALNUML ALNUM, no ; Match any alphanumeric char in locale +NALNUM NALNUM, no ; Match any non-alphanumeric character +NALNUML NALNUM, no ; Match any non-alphanumeric char in locale +SPACE SPACE, no ; Match any whitespace character +SPACEL SPACE, no ; Match any whitespace char in locale +NSPACE NSPACE, no ; Match any non-whitespace character +NSPACEL NSPACE, no ; Match any non-whitespace char in locale +DIGIT DIGIT, no ; Match any numeric character +DIGITL DIGIT, no ; Match any numeric character in locale +NDIGIT NDIGIT, no ; Match any non-numeric character +NDIGITL NDIGIT, no ; Match any non-numeric character in locale +CLUMP CLUMP, no ; Match any combining character sequence #* Alternation (31) -# BRANCH The set of branches constituting a single choice are hooked -# together with their "next" pointers, since precedence prevents -# anything being concatenated to any individual branch. The -# "next" pointer of the last BRANCH in a choice points to the -# thing following the whole choice. This is also where the -# final "next" pointer of each individual branch points; each -# branch starts with the operand node of a BRANCH node. +# BRANCH The set of branches constituting a single choice are hooked +# together with their "next" pointers, since precedence prevents +# anything being concatenated to any individual branch. The +# "next" pointer of the last BRANCH in a choice points to the +# thing following the whole choice. This is also where the +# final "next" pointer of each individual branch points; each +# branch starts with the operand node of a BRANCH node. # -BRANCH BRANCH, node Match this alternative, or the next... +BRANCH BRANCH, node ; Match this alternative, or the next... #*Back pointer (32) -# BACK Normal "next" pointers all implicitly point forward; BACK -# exists to make loop structures possible. +# BACK Normal "next" pointers all implicitly point forward; BACK +# exists to make loop structures possible. # not used -BACK BACK, no Match "", "next" ptr points backward. +BACK BACK, no ; Match "", "next" ptr points backward. #*Literals (33..35) -EXACT EXACT, str Match this string (preceded by length). -EXACTF EXACT, str Match this string, folded (prec. by length). -EXACTFL EXACT, str Match this string, folded in locale (w/len). +EXACT EXACT, str ; Match this string (preceded by length). +EXACTF EXACT, str ; Match this string, folded (prec. by length). +EXACTFL EXACT, str ; Match this string, folded in locale (w/len). #*Do nothing types (36..37) -NOTHING NOTHING,no Match empty string. +NOTHING NOTHING, no ; Match empty string. # A variant of above which delimits a group, thus stops optimizations -TAIL NOTHING,no Match empty string. Can jump here from outside. +TAIL NOTHING, no ; Match empty string. Can jump here from outside. #*Loops (38..44) -# STAR,PLUS '?', and complex '*' and '+', are implemented as circular -# BRANCH structures using BACK. Simple cases (one character -# per match) are implemented with STAR and PLUS for speed -# and to minimize recursive plunges. +# STAR,PLUS '?', and complex '*' and '+', are implemented as circular +# BRANCH structures using BACK. Simple cases (one character +# per match) are implemented with STAR and PLUS for speed +# and to minimize recursive plunges. # -STAR STAR, node Match this (simple) thing 0 or more times. -PLUS PLUS, node Match this (simple) thing 1 or more times. +STAR STAR, node ; Match this (simple) thing 0 or more times. +PLUS PLUS, node ; Match this (simple) thing 1 or more times. -CURLY CURLY, sv 2 Match this simple thing {n,m} times. -CURLYN CURLY, no 2 Capture next-after-this simple thing -CURLYM CURLY, no 2 Capture this medium-complex thing {n,m} times. -CURLYX CURLY, sv 2 Match this complex thing {n,m} times. +CURLY CURLY, sv 2 ; Match this simple thing {n,m} times. +CURLYN CURLY, no 2 ; Capture next-after-this simple thing +CURLYM CURLY, no 2 ; Capture this medium-complex thing {n,m} times. +CURLYX CURLY, sv 2 ; Match this complex thing {n,m} times. # This terminator creates a loop structure for CURLYX -WHILEM WHILEM, no Do curly processing and see if rest matches. +WHILEM WHILEM, no ; Do curly processing and see if rest matches. #*Buffer related (45..49) -# OPEN,CLOSE,GROUPP ...are numbered at compile time. -OPEN OPEN, num 1 Mark this point in input as start of #n. -CLOSE CLOSE, num 1 Analogous to OPEN. +# OPEN,CLOSE,GROUPP ...are numbered at compile time. +OPEN OPEN, num 1 ; Mark this point in input as start of #n. +CLOSE CLOSE, num 1 ; Analogous to OPEN. -REF REF, num 1 Match some already matched string -REFF REF, num 1 Match already matched string, folded -REFFL REF, num 1 Match already matched string, folded in loc. +REF REF, num 1 ; Match some already matched string +REFF REF, num 1 ; Match already matched string, folded +REFFL REF, num 1 ; Match already matched string, folded in loc. #*Grouping assertions (50..54) -IFMATCH BRANCHJ,off 1 2 Succeeds if the following matches. -UNLESSM BRANCHJ,off 1 2 Fails if the following matches. -SUSPEND BRANCHJ,off 1 1 "Independent" sub-RE. -IFTHEN BRANCHJ,off 1 1 Switch, should be preceeded by switcher . -GROUPP GROUPP, num 1 Whether the group matched. +IFMATCH BRANCHJ, off 1 2 ; Succeeds if the following matches. +UNLESSM BRANCHJ, off 1 2 ; Fails if the following matches. +SUSPEND BRANCHJ, off 1 1 ; "Independent" sub-RE. +IFTHEN BRANCHJ, off 1 1 ; Switch, should be preceeded by switcher . +GROUPP GROUPP, num 1 ; Whether the group matched. #*Support for long RE (55..56) -LONGJMP LONGJMP,off 1 1 Jump far away. -BRANCHJ BRANCHJ,off 1 1 BRANCH with long offset. +LONGJMP LONGJMP, off 1 1 ; Jump far away. +BRANCHJ BRANCHJ, off 1 1 ; BRANCH with long offset. #*The heavy worker (57) -EVAL EVAL, evl 1 Execute some Perl code. +EVAL EVAL, evl 1 ; Execute some Perl code. #*Modifiers (58..59) -MINMOD MINMOD, no Next operator is not greedy. -LOGICAL LOGICAL,no Next opcode should set the flag only. +MINMOD MINMOD, no ; Next operator is not greedy. +LOGICAL LOGICAL, no ; Next opcode should set the flag only. # This is not used yet (60) -RENUM BRANCHJ,off 1 1 Group with independently numbered parens. +RENUM BRANCHJ, off 1 1 ; Group with independently numbered parens. #*Trie Related (61..62) @@ -146,53 +146,53 @@ RENUM BRANCHJ,off 1 1 Group with independently numbered parens. # inline charclass data (ascii only), the 'C' store it in the structure. # NOTE: the relative order of the TRIE-like regops is signifigant -TRIE TRIE, trie 1 Match many EXACT(FL?)? at once. flags==type -TRIEC TRIE,trie charclass Same as TRIE, but with embedded charclass data +TRIE TRIE, trie 1 ; Match many EXACT(FL?)? at once. flags==type +TRIEC TRIE,trie charclass ; Same as TRIE, but with embedded charclass data # For start classes, contains an added fail table. -AHOCORASICK TRIE, trie 1 Aho Corasick stclass. flags==type -AHOCORASICKC TRIE,trie charclass Same as AHOCORASICK, but with embedded charclass data +AHOCORASICK TRIE, trie 1 ; Aho Corasick stclass. flags==type +AHOCORASICKC TRIE,trie charclass ; Same as AHOCORASICK, but with embedded charclass data #*Regex Subroutines (65..66) -GOSUB GOSUB, num/ofs 2L recurse to paren arg1 at (signed) ofs arg2 -GOSTART GOSTART, no recurse to start of pattern +GOSUB GOSUB, num/ofs 2L ; recurse to paren arg1 at (signed) ofs arg2 +GOSTART GOSTART, no ; recurse to start of pattern #*Named references (67..69) -NREF REF, no-sv 1 Match some already matched string -NREFF REF, no-sv 1 Match already matched string, folded -NREFFL REF, no-sv 1 Match already matched string, folded in loc. +NREF REF, no-sv 1 ; Match some already matched string +NREFF REF, no-sv 1 ; Match already matched string, folded +NREFFL REF, no-sv 1 ; Match already matched string, folded in loc. #*Special conditionals (70..72) -NGROUPP NGROUPP, no-sv 1 Whether the group matched. -INSUBP INSUBP, num 1 Whether we are in a specific recurse. -DEFINEP DEFINEP, none 1 Never execute directly. +NGROUPP NGROUPP, no-sv 1 ; Whether the group matched. +INSUBP INSUBP, num 1 ; Whether we are in a specific recurse. +DEFINEP DEFINEP, none 1 ; Never execute directly. #*Bactracking Verbs -ENDLIKE ENDLIKE, none Used only for the type field of verbs -OPFAIL ENDLIKE, none Same as (?!) -ACCEPT ENDLIKE, parno 1 Accepts the current matched string. +ENDLIKE ENDLIKE, none ; Used only for the type field of verbs +OPFAIL ENDLIKE, none ; Same as (?!) +ACCEPT ENDLIKE, parno 1 ; Accepts the current matched string. #*Verbs With Arguments -VERB VERB, no-sv 1 Used only for the type field of verbs -PRUNE VERB, no-sv 1 Pattern fails at this startpoint if no-backtracking through this -MARKPOINT VERB, no-sv 1 Push the current location for rollback by cut. -SKIP VERB, no-sv 1 On failure skip forward (to the mark) before retrying -COMMIT VERB, no-sv 1 Pattern fails outright if backtracking through this -CUTGROUP VERB, no-sv 1 On failure go to the next alternation in the group +VERB VERB, no-sv 1 ; Used only for the type field of verbs +PRUNE VERB, no-sv 1 ; Pattern fails at this startpoint if no-backtracking through this +MARKPOINT VERB, no-sv 1 ; Push the current location for rollback by cut. +SKIP VERB, no-sv 1 ; On failure skip forward (to the mark) before retrying +COMMIT VERB, no-sv 1 ; Pattern fails outright if backtracking through this +CUTGROUP VERB, no-sv 1 ; On failure go to the next alternation in the group #*Control what to keep in $&. -KEEPS KEEPS, no $& begins here. +KEEPS KEEPS, no ; $& begins here. #*New charclass like patterns -LNBREAK LNBREAK, none generic newline pattern -VERTWS VERTWS, none vertical whitespace (Perl 6) -NVERTWS NVERTWS, none not vertical whitespace (Perl 6) -HORIZWS HORIZWS, none horizontal whitespace (Perl 6) -NHORIZWS NHORIZWS, none not horizontal whitespace (Perl 6) +LNBREAK LNBREAK, none ; generic newline pattern +VERTWS VERTWS, none ; vertical whitespace (Perl 6) +NVERTWS NVERTWS, none ; not vertical whitespace (Perl 6) +HORIZWS HORIZWS, none ; horizontal whitespace (Perl 6) +NHORIZWS NHORIZWS, none ; not horizontal whitespace (Perl 6) -FOLDCHAR FOLDCHAR, codepoint 1 codepoint with tricky case folding properties. +FOLDCHAR FOLDCHAR, codepoint 1 ; codepoint with tricky case folding properties. # NEW STUFF ABOVE THIS LINE @@ -202,14 +202,14 @@ FOLDCHAR FOLDCHAR, codepoint 1 codepoint with tricky case folding properties. # This is not really a node, but an optimized away piece of a "long" node. # To simplify debugging output, we mark it as if it were a node -OPTIMIZED NOTHING,off Placeholder for dump. +OPTIMIZED NOTHING, off ; Placeholder for dump. # Special opcode with the property that no opcode in a compiled program # will ever be of this type. Thus it can be used as a flag value that # no other opcode has been seen. END is used similarly, in that an END # node cant be optimized. So END implies "unoptimizable" and PSEUDO mean # "not seen anything to optimize yet". -PSEUDO PSEUDO,off Pseudo opcode for internal use. +PSEUDO PSEUDO, off ; Pseudo opcode for internal use. ------------------------------------------------------------------------------- # Format for second section: @@ -221,16 +221,16 @@ PSEUDO PSEUDO,off Pseudo opcode for internal use. # Anything below is a state # # -TRIE next:FAIL -EVAL AB:FAIL -CURLYX end:FAIL -WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL -BRANCH next:FAIL -CURLYM A,B:FAIL -IFMATCH A:FAIL -CURLY B_min_known,B_min,B_max:FAIL -COMMIT next:FAIL -MARKPOINT next:FAIL -SKIP next:FAIL -CUTGROUP next:FAIL -KEEPS next:FAIL +TRIE next:FAIL +EVAL AB:FAIL +CURLYX end:FAIL +WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL +BRANCH next:FAIL +CURLYM A,B:FAIL +IFMATCH A:FAIL +CURLY B_min_known,B_min,B_max:FAIL +COMMIT next:FAIL +MARKPOINT next:FAIL +SKIP next:FAIL +CUTGROUP next:FAIL +KEEPS next:FAIL |