summaryrefslogtreecommitdiff
path: root/libgo/go/regexp/regexp.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/regexp/regexp.go')
-rw-r--r--libgo/go/regexp/regexp.go132
1 files changed, 81 insertions, 51 deletions
diff --git a/libgo/go/regexp/regexp.go b/libgo/go/regexp/regexp.go
index 2c041cd773c..d274ccdf5a5 100644
--- a/libgo/go/regexp/regexp.go
+++ b/libgo/go/regexp/regexp.go
@@ -283,6 +283,24 @@ func escape(c int) int {
return -1
}
+func (p *parser) checkBackslash() int {
+ c := p.c()
+ if c == '\\' {
+ c = p.nextc()
+ switch {
+ case c == endOfFile:
+ p.error(ErrExtraneousBackslash)
+ case ispunct(c):
+ // c is as delivered
+ case escape(c) >= 0:
+ c = int(escaped[escape(c)])
+ default:
+ p.error(ErrBadBackslash)
+ }
+ }
+ return c
+}
+
func (p *parser) charClass() *instr {
i := newCharClass()
cc := i.cclass
@@ -314,20 +332,8 @@ func (p *parser) charClass() *instr {
return i
case '-': // do this before backslash processing
p.error(ErrBadRange)
- case '\\':
- c = p.nextc()
- switch {
- case c == endOfFile:
- p.error(ErrExtraneousBackslash)
- case ispunct(c):
- // c is as delivered
- case escape(c) >= 0:
- c = int(escaped[escape(c)])
- default:
- p.error(ErrBadBackslash)
- }
- fallthrough
default:
+ c = p.checkBackslash()
p.nextc()
switch {
case left < 0: // first of pair
@@ -345,14 +351,14 @@ func (p *parser) charClass() *instr {
}
}
}
- return nil
+ panic("unreachable")
}
func (p *parser) term() (start, end *instr) {
switch c := p.c(); c {
case '|', endOfFile:
return nil, nil
- case '*', '+':
+ case '*', '+', '?':
p.error(ErrBareClosure)
case ')':
if p.nlpar == 0 {
@@ -407,20 +413,8 @@ func (p *parser) term() (start, end *instr) {
}
bra.next = start
return bra, ebra
- case '\\':
- c = p.nextc()
- switch {
- case c == endOfFile:
- p.error(ErrExtraneousBackslash)
- case ispunct(c):
- // c is as delivered
- case escape(c) >= 0:
- c = int(escaped[escape(c)])
- default:
- p.error(ErrBadBackslash)
- }
- fallthrough
default:
+ c = p.checkBackslash()
p.nextc()
start = &instr{kind: iChar, char: c}
p.re.add(start)
@@ -571,15 +565,20 @@ func (re *Regexp) doParse() {
}
}
-// Extract regular text from the beginning of the pattern.
+// Extract regular text from the beginning of the pattern,
+// possibly after a leading iBOT.
// That text can be used by doExecute to speed up matching.
func (re *Regexp) setPrefix() {
var b []byte
var utf = make([]byte, utf8.UTFMax)
var inst *instr
- // First instruction is start; skip that.
+ // First instruction is start; skip that. Also skip any initial iBOT.
+ inst = re.inst[0].next
+ for inst.kind == iBOT {
+ inst = inst.next
+ }
Loop:
- for inst = re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+ for ; inst.kind != iEnd; inst = inst.next {
// stop if this is not a char
if inst.kind != iChar {
break
@@ -590,7 +589,7 @@ Loop:
case iBOT, iEOT, iAlt:
break Loop
}
- n := utf8.EncodeRune(inst.char, utf)
+ n := utf8.EncodeRune(utf, inst.char)
b = append(b, utf[0:n]...)
}
// point prefixStart instruction to first non-CHAR after prefix
@@ -599,6 +598,11 @@ Loop:
re.prefix = string(b)
}
+// String returns the source text used to compile the regular expression.
+func (re *Regexp) String() string {
+ return re.expr
+}
+
// Compile parses a regular expression and returns, if successful, a Regexp
// object that can be used to match against text.
func Compile(str string) (regexp *Regexp, error os.Error) {
@@ -743,34 +747,46 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
if bytestr != nil {
end = len(bytestr)
}
+ anchored := re.inst[0].next.kind == iBOT
+ if anchored && pos > 0 {
+ return nil
+ }
// fast check for initial plain substring
- prefixed := false // has this iteration begun by skipping a prefix?
if re.prefix != "" {
- var advance int
- if bytestr == nil {
- advance = strings.Index(str[pos:], re.prefix)
+ advance := 0
+ if anchored {
+ if bytestr == nil {
+ if !strings.HasPrefix(str, re.prefix) {
+ return nil
+ }
+ } else {
+ if !bytes.HasPrefix(bytestr, re.prefixBytes) {
+ return nil
+ }
+ }
} else {
- advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+ if bytestr == nil {
+ advance = strings.Index(str[pos:], re.prefix)
+ } else {
+ advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+ }
}
if advance == -1 {
return nil
}
- pos += advance + len(re.prefix)
- prefixed = true
+ pos += advance
}
arena := &matchArena{nil, 2 * (re.nbra + 1)}
- for pos <= end {
- if !found {
+ for startPos := pos; pos <= end; {
+ if !found && (pos == startPos || !anchored) {
// prime the pump if we haven't seen a match yet
match := arena.noMatch()
match.m[0] = pos
- if prefixed {
- s[out] = arena.addState(s[out], re.prefixStart, true, match, pos, end)
- prefixed = false // next iteration should start at beginning of machine.
- } else {
- s[out] = arena.addState(s[out], re.start.next, false, match, pos, end)
- }
+ s[out] = arena.addState(s[out], re.start.next, false, match, pos, end)
arena.free(match) // if addState saved it, ref was incremented
+ } else if len(s[out]) == 0 {
+ // machine has completed
+ break
}
in, out = out, in // old out state is new in state
// clear out old state
@@ -779,10 +795,6 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
arena.free(state.match)
}
s[out] = old[0:0] // truncate state vector
- if found && len(s[in]) == 0 {
- // machine has completed
- break
- }
charwidth := 1
c := endOfFile
if pos < end {
@@ -844,6 +856,24 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
return final.match.m
}
+// LiteralPrefix returns a literal string that must begin any match
+// of the regular expression re. It returns the boolean true if the
+// literal string comprises the entire regular expression.
+func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
+ c := make([]int, len(re.inst)-2) // minus start and end.
+ // First instruction is start; skip that.
+ i := 0
+ for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+ // stop if this is not a char
+ if inst.kind != iChar {
+ return string(c[:i]), false
+ }
+ c[i] = inst.char
+ i++
+ }
+ return string(c[:i]), true
+}
+
// MatchString returns whether the Regexp matches the string s.
// The return value is a boolean: true for match, false for no match.
func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(s, nil, 0)) > 0 }