summaryrefslogtreecommitdiff
path: root/libgo/go/exp/regexp/syntax/prog.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/exp/regexp/syntax/prog.go')
-rw-r--r--libgo/go/exp/regexp/syntax/prog.go237
1 files changed, 0 insertions, 237 deletions
diff --git a/libgo/go/exp/regexp/syntax/prog.go b/libgo/go/exp/regexp/syntax/prog.go
deleted file mode 100644
index bf85b720d02..00000000000
--- a/libgo/go/exp/regexp/syntax/prog.go
+++ /dev/null
@@ -1,237 +0,0 @@
-package syntax
-
-import (
- "bytes"
- "strconv"
-)
-
-// Compiled program.
-// May not belong in this package, but convenient for now.
-
-// A Prog is a compiled regular expression program.
-type Prog struct {
- Inst []Inst
- Start int // index of start instruction
- NumCap int // number of InstCapture insts in re
-}
-
-// An InstOp is an instruction opcode.
-type InstOp uint8
-
-const (
- InstAlt InstOp = iota
- InstAltMatch
- InstCapture
- InstEmptyWidth
- InstMatch
- InstFail
- InstNop
- InstRune
-)
-
-// An EmptyOp specifies a kind or mixture of zero-width assertions.
-type EmptyOp uint8
-
-const (
- EmptyBeginLine EmptyOp = 1 << iota
- EmptyEndLine
- EmptyBeginText
- EmptyEndText
- EmptyWordBoundary
- EmptyNoWordBoundary
-)
-
-// An Inst is a single instruction in a regular expression program.
-type Inst struct {
- Op InstOp
- Out uint32 // all but InstMatch, InstFail
- Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
- Rune []int
-}
-
-func (p *Prog) String() string {
- var b bytes.Buffer
- dumpProg(&b, p)
- return b.String()
-}
-
-// skipNop follows any no-op or capturing instructions
-// and returns the resulting pc.
-func (p *Prog) skipNop(pc uint32) *Inst {
- i := &p.Inst[pc]
- for i.Op == InstNop || i.Op == InstCapture {
- pc = i.Out
- i = &p.Inst[pc]
- }
- return i
-}
-
-// Prefix returns a literal string that all matches for the
-// regexp must start with. Complete is true if the prefix
-// is the entire match.
-func (p *Prog) Prefix() (prefix string, complete bool) {
- i := p.skipNop(uint32(p.Start))
-
- // Avoid allocation of buffer if prefix is empty.
- if i.Op != InstRune || len(i.Rune) != 1 {
- return "", i.Op == InstMatch
- }
-
- // Have prefix; gather characters.
- var buf bytes.Buffer
- for i.Op == InstRune && len(i.Rune) == 1 {
- buf.WriteRune(i.Rune[0])
- i = p.skipNop(i.Out)
- }
- return buf.String(), i.Op == InstMatch
-}
-
-// StartCond returns the leading empty-width conditions that must
-// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
-func (p *Prog) StartCond() EmptyOp {
- var flag EmptyOp
- pc := uint32(p.Start)
- i := &p.Inst[pc]
-Loop:
- for {
- switch i.Op {
- case InstEmptyWidth:
- flag |= EmptyOp(i.Arg)
- case InstFail:
- return ^EmptyOp(0)
- case InstCapture, InstNop:
- // skip
- default:
- break Loop
- }
- pc = i.Out
- i = &p.Inst[pc]
- }
- return flag
-}
-
-// MatchRune returns true if the instruction matches (and consumes) r.
-// It should only be called when i.Op == InstRune.
-func (i *Inst) MatchRune(r int) bool {
- rune := i.Rune
-
- // Special case: single-rune slice is from literal string, not char class.
- // TODO: Case folding.
- if len(rune) == 1 {
- return r == rune[0]
- }
-
- // Peek at the first few pairs.
- // Should handle ASCII well.
- for j := 0; j < len(rune) && j <= 8; j += 2 {
- if r < rune[j] {
- return false
- }
- if r <= rune[j+1] {
- return true
- }
- }
-
- // Otherwise binary search.
- lo := 0
- hi := len(rune) / 2
- for lo < hi {
- m := lo + (hi-lo)/2
- if c := rune[2*m]; c <= r {
- if r <= rune[2*m+1] {
- return true
- }
- lo = m + 1
- } else {
- hi = m
- }
- }
- return false
-}
-
-// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
-// Since we act on runes, it would be easy to support Unicode here.
-func wordRune(rune int) bool {
- return rune == '_' ||
- ('A' <= rune && rune <= 'Z') ||
- ('a' <= rune && rune <= 'z') ||
- ('0' <= rune && rune <= '9')
-}
-
-// MatchEmptyWidth returns true if the instruction matches
-// an empty string between the runes before and after.
-// It should only be called when i.Op == InstEmptyWidth.
-func (i *Inst) MatchEmptyWidth(before int, after int) bool {
- switch EmptyOp(i.Arg) {
- case EmptyBeginLine:
- return before == '\n' || before == -1
- case EmptyEndLine:
- return after == '\n' || after == -1
- case EmptyBeginText:
- return before == -1
- case EmptyEndText:
- return after == -1
- case EmptyWordBoundary:
- return wordRune(before) != wordRune(after)
- case EmptyNoWordBoundary:
- return wordRune(before) == wordRune(after)
- }
- panic("unknown empty width arg")
-}
-
-func (i *Inst) String() string {
- var b bytes.Buffer
- dumpInst(&b, i)
- return b.String()
-}
-
-func bw(b *bytes.Buffer, args ...string) {
- for _, s := range args {
- b.WriteString(s)
- }
-}
-
-func dumpProg(b *bytes.Buffer, p *Prog) {
- for j := range p.Inst {
- i := &p.Inst[j]
- pc := strconv.Itoa(j)
- if len(pc) < 3 {
- b.WriteString(" "[len(pc):])
- }
- if j == p.Start {
- pc += "*"
- }
- bw(b, pc, "\t")
- dumpInst(b, i)
- bw(b, "\n")
- }
-}
-
-func u32(i uint32) string {
- return strconv.Uitoa64(uint64(i))
-}
-
-func dumpInst(b *bytes.Buffer, i *Inst) {
- switch i.Op {
- case InstAlt:
- bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
- case InstAltMatch:
- bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
- case InstCapture:
- bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
- case InstEmptyWidth:
- bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
- case InstMatch:
- bw(b, "match")
- case InstFail:
- bw(b, "fail")
- case InstNop:
- bw(b, "nop -> ", u32(i.Out))
- case InstRune:
- if i.Rune == nil {
- // shouldn't happen
- bw(b, "rune <nil>")
- }
- bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
- }
-}